@@ -383,12 +383,12 @@ static HFileBlock createFromBuff(ByteBuff buf, boolean usesHBaseChecksum, final
383383
384384 /**
385385 * Parse total on disk size including header and checksum.
386- * @param headerBuf Header ByteBuffer. Presumed exact size of header.
387- * @param verifyChecksum true if checksum verification is in use.
386+ * @param headerBuf Header ByteBuffer. Presumed exact size of header.
387+ * @param checksumSupport true if checksum verification is in use.
388388 * @return Size of the block with header included.
389389 */
390- private static int getOnDiskSizeWithHeader (final ByteBuff headerBuf , boolean verifyChecksum ) {
391- return headerBuf .getInt (Header .ON_DISK_SIZE_WITHOUT_HEADER_INDEX ) + headerSize (verifyChecksum );
390+ private static int getOnDiskSizeWithHeader (final ByteBuff headerBuf , boolean checksumSupport ) {
391+ return headerBuf .getInt (Header .ON_DISK_SIZE_WITHOUT_HEADER_INDEX ) + headerSize (checksumSupport );
392392 }
393393
394394 /**
@@ -1582,33 +1582,48 @@ public HFileBlock readBlockData(long offset, long onDiskSizeWithHeaderL, boolean
15821582 }
15831583
15841584 /**
1585- * Returns Check <code>onDiskSizeWithHeaderL</code> size is healthy and then return it as an int
1585+ * Check that {@code value} read from a block header seems reasonable, within a large margin of
1586+ * error.
1587+ * @return {@code true} if the value is safe to proceed, {@code false} otherwise.
15861588 */
1587- private static int checkAndGetSizeAsInt (final long onDiskSizeWithHeaderL , final int hdrSize )
1588- throws IOException {
1589- if (
1590- (onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1 )
1591- || onDiskSizeWithHeaderL >= Integer .MAX_VALUE
1592- ) {
1593- throw new IOException (
1594- "Invalid onDisksize=" + onDiskSizeWithHeaderL + ": expected to be at least " + hdrSize
1595- + " and at most " + Integer .MAX_VALUE + ", or -1" );
1589+ private boolean checkOnDiskSizeWithHeader (int value ) {
1590+ if (value < 0 ) {
1591+ if (LOG .isTraceEnabled ()) {
1592+ LOG .trace (
1593+ "onDiskSizeWithHeader={}; value represents a size, so it should never be negative." ,
1594+ value );
1595+ }
1596+ return false ;
15961597 }
1597- return (int ) onDiskSizeWithHeaderL ;
1598+ if (value - hdrSize < 0 ) {
1599+ if (LOG .isTraceEnabled ()) {
1600+ LOG .trace ("onDiskSizeWithHeader={}, hdrSize={}; don't accept a value that is negative"
1601+ + " after the header size is excluded." , value , hdrSize );
1602+ }
1603+ return false ;
1604+ }
1605+ return true ;
15981606 }
15991607
16001608 /**
1601- * Verify the passed in onDiskSizeWithHeader aligns with what is in the header else something is
1602- * not right.
1609+ * Check that {@code value} provided by the calling context seems reasonable, within a large
1610+ * margin of error.
1611+ * @return {@code true} if the value is safe to proceed, {@code false} otherwise.
16031612 */
1604- private void verifyOnDiskSizeMatchesHeader (final int passedIn , final ByteBuff headerBuf ,
1605- final long offset , boolean verifyChecksum ) throws IOException {
1606- // Assert size provided aligns with what is in the header
1607- int fromHeader = getOnDiskSizeWithHeader (headerBuf , verifyChecksum );
1608- if (passedIn != fromHeader ) {
1609- throw new IOException ("Passed in onDiskSizeWithHeader=" + passedIn + " != " + fromHeader
1610- + ", offset=" + offset + ", fileContext=" + this .fileContext );
1613+ private boolean checkCallerProvidedOnDiskSizeWithHeader (long value ) {
1614+ // same validation logic as is used by Math.toIntExact(long)
1615+ int intValue = (int ) value ;
1616+ if (intValue != value ) {
1617+ if (LOG .isTraceEnabled ()) {
1618+ LOG .trace ("onDiskSizeWithHeaderL={}; value exceeds int size limits." , value );
1619+ }
1620+ return false ;
16111621 }
1622+ if (intValue == -1 ) {
1623+ // a magic value we expect to see.
1624+ return true ;
1625+ }
1626+ return checkOnDiskSizeWithHeader (intValue );
16121627 }
16131628
16141629 /**
@@ -1639,14 +1654,16 @@ private void cacheNextBlockHeader(final long offset, ByteBuff onDiskBlock,
16391654 this .prefetchedHeader .set (ph );
16401655 }
16411656
1642- private int getNextBlockOnDiskSize (boolean readNextHeader , ByteBuff onDiskBlock ,
1643- int onDiskSizeWithHeader ) {
1644- int nextBlockOnDiskSize = -1 ;
1645- if (readNextHeader ) {
1646- nextBlockOnDiskSize =
1647- onDiskBlock .getIntAfterPosition (onDiskSizeWithHeader + BlockType .MAGIC_LENGTH ) + hdrSize ;
1648- }
1649- return nextBlockOnDiskSize ;
1657+ /**
1658+ * Clear the cached value when its integrity is suspect.
1659+ */
1660+ private void invalidateNextBlockHeader () {
1661+ prefetchedHeader .set (null );
1662+ }
1663+
1664+ private int getNextBlockOnDiskSize (ByteBuff onDiskBlock , int onDiskSizeWithHeader ) {
1665+ return onDiskBlock .getIntAfterPosition (onDiskSizeWithHeader + BlockType .MAGIC_LENGTH )
1666+ + hdrSize ;
16501667 }
16511668
16521669 private ByteBuff allocate (int size , boolean intoHeap ) {
@@ -1676,8 +1693,13 @@ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
16761693 throw new IOException ("Invalid offset=" + offset + " trying to read " + "block (onDiskSize="
16771694 + onDiskSizeWithHeaderL + ")" );
16781695 }
1679- int onDiskSizeWithHeader = checkAndGetSizeAsInt (onDiskSizeWithHeaderL , hdrSize );
1680- // Try and get cached header. Will serve us in rare case where onDiskSizeWithHeaderL is -1
1696+ if (!checkCallerProvidedOnDiskSizeWithHeader (onDiskSizeWithHeaderL )) {
1697+ LOG .trace ("Caller provided invalid onDiskSizeWithHeaderL={}" , onDiskSizeWithHeaderL );
1698+ onDiskSizeWithHeaderL = -1 ;
1699+ }
1700+ int onDiskSizeWithHeader = (int ) onDiskSizeWithHeaderL ;
1701+
1702+ // Try to use the cached header. Will serve us in rare case where onDiskSizeWithHeaderL==-1
16811703 // and will save us having to seek the stream backwards to reread the header we
16821704 // read the last time through here.
16831705 ByteBuff headerBuf = getCachedHeader (offset );
@@ -1691,8 +1713,8 @@ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
16911713 // file has support for checksums (version 2+).
16921714 boolean checksumSupport = this .fileContext .isUseHBaseChecksum ();
16931715 long startTime = System .currentTimeMillis ();
1694- if (onDiskSizeWithHeader <= 0 ) {
1695- // We were not passed the block size. Need to get it from the header. If header was
1716+ if (onDiskSizeWithHeader == - 1 ) {
1717+ // The caller does not know the block size. Need to get it from the header. If header was
16961718 // not cached (see getCachedHeader above), need to seek to pull it in. This is costly
16971719 // and should happen very rarely. Currently happens on open of a hfile reader where we
16981720 // read the trailer blocks to pull in the indices. Otherwise, we are reading block sizes
@@ -1708,6 +1730,18 @@ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
17081730 }
17091731 onDiskSizeWithHeader = getOnDiskSizeWithHeader (headerBuf , checksumSupport );
17101732 }
1733+
1734+ // The common case is that onDiskSizeWithHeader was produced by a read without checksum
1735+ // validation, so give it a sanity check before trying to use it.
1736+ if (!checkOnDiskSizeWithHeader (onDiskSizeWithHeader )) {
1737+ if (verifyChecksum ) {
1738+ invalidateNextBlockHeader ();
1739+ return null ;
1740+ } else {
1741+ throw new IOException ("Invalid onDiskSizeWithHeader=" + onDiskSizeWithHeader );
1742+ }
1743+ }
1744+
17111745 int preReadHeaderSize = headerBuf == null ? 0 : hdrSize ;
17121746 // Allocate enough space to fit the next block's header too; saves a seek next time through.
17131747 // onDiskBlock is whole block + header + checksums then extra hdrSize to read next header;
@@ -1724,19 +1758,47 @@ protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
17241758 boolean readNextHeader = readAtOffset (is , onDiskBlock ,
17251759 onDiskSizeWithHeader - preReadHeaderSize , true , offset + preReadHeaderSize , pread );
17261760 onDiskBlock .rewind (); // in case of moving position when copying a cached header
1727- int nextBlockOnDiskSize =
1728- getNextBlockOnDiskSize (readNextHeader , onDiskBlock , onDiskSizeWithHeader );
1761+
1762+ // the call to validateChecksum for this block excludes the next block header over-read, so
1763+ // no reason to delay extracting this value.
1764+ int nextBlockOnDiskSize = -1 ;
1765+ if (readNextHeader ) {
1766+ int parsedVal = getNextBlockOnDiskSize (onDiskBlock , onDiskSizeWithHeader );
1767+ if (checkOnDiskSizeWithHeader (parsedVal )) {
1768+ nextBlockOnDiskSize = parsedVal ;
1769+ }
1770+ }
17291771 if (headerBuf == null ) {
17301772 headerBuf = onDiskBlock .duplicate ().position (0 ).limit (hdrSize );
17311773 }
1732- // Do a few checks before we go instantiate HFileBlock.
1733- assert onDiskSizeWithHeader > this .hdrSize ;
1734- verifyOnDiskSizeMatchesHeader (onDiskSizeWithHeader , headerBuf , offset , checksumSupport );
1774+
17351775 ByteBuff curBlock = onDiskBlock .duplicate ().position (0 ).limit (onDiskSizeWithHeader );
17361776 // Verify checksum of the data before using it for building HFileBlock.
17371777 if (verifyChecksum && !validateChecksum (offset , curBlock , hdrSize )) {
1778+ invalidateNextBlockHeader ();
17381779 return null ;
17391780 }
1781+
1782+ // TODO: is this check necessary or can we proceed with a provided value regardless of
1783+ // what is in the header?
1784+ int fromHeader = getOnDiskSizeWithHeader (headerBuf , checksumSupport );
1785+ if (onDiskSizeWithHeader != fromHeader ) {
1786+ if (LOG .isTraceEnabled ()) {
1787+ LOG .trace ("Passed in onDiskSizeWithHeader={} != {}, offset={}, fileContext={}" ,
1788+ onDiskSizeWithHeader , fromHeader , offset , this .fileContext );
1789+ }
1790+ if (checksumSupport && verifyChecksum ) {
1791+ // This file supports HBase checksums and verification of those checksums was
1792+ // requested. The block size provided by the caller (presumably from the block index)
1793+ // does not match the block size written to the block header. treat this as
1794+ // HBase-checksum failure.
1795+ invalidateNextBlockHeader ();
1796+ return null ;
1797+ }
1798+ throw new IOException ("Passed in onDiskSizeWithHeader=" + onDiskSizeWithHeader + " != "
1799+ + fromHeader + ", offset=" + offset + ", fileContext=" + this .fileContext );
1800+ }
1801+
17401802 // remove checksum from buffer now that it's verified
17411803 int sizeWithoutChecksum = curBlock .getInt (Header .ON_DISK_DATA_SIZE_WITH_HEADER_INDEX );
17421804 curBlock .limit (sizeWithoutChecksum );
0 commit comments