diff --git a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java index e45664634..b5105ed6c 100644 --- a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java +++ b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java @@ -143,6 +143,13 @@ default TsPrimitiveType getTsPrimitiveType(int position) { */ long getRetainedSizeInBytes(); + /** + * Returns the size of this Column as if it was compacted, ignoring any over-allocations and any + * unloaded nested Columns. For example, in dictionary blocks, this only counts each dictionary + * entry once, rather than each time a value is referenced. + */ + long getSizeInBytes(); + /** * Returns a column starting at the specified position and extends for the specified length. The * specified region must be entirely contained within this column. diff --git a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java index 3a35d2269..d357a42ea 100644 --- a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java +++ b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java @@ -271,6 +271,18 @@ public static long sizeOf(double[] arr) { : alignObjectSize(NUM_BYTES_ARRAY_HEADER + (long) Double.BYTES * arr.length); } + public static long sizeOf(Accountable[] arr) { + if (arr == null) { + return 0; + } else { + long size = shallowSizeOf(arr); + for (Accountable obj : arr) { + size += obj != null ? obj.ramBytesUsed() : 0; + } + return size; + } + } + /** Returns the size in bytes of the String[] object. */ public static long sizeOf(String[] arr) { long size = shallowSizeOf(arr); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java b/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java index b46f0e0c3..ba3371e9d 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java @@ -53,7 +53,6 @@ public int getSerializedSize() { } private int getSerializedSize(IDeviceID deviceID) { - // TODO: add an interface in IDeviceID int length = deviceID.serializedSize(); return Byte.BYTES + ReadWriteForEncodingUtils.varIntSize(length) + length; } @@ -73,7 +72,6 @@ public static ChunkGroupHeader deserializeFrom( } } - // TODO: add an interface in IDeviceID final IDeviceID deviceID = deserializeDeviceID(inputStream, versionNumber); return new ChunkGroupHeader(deviceID); } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java index 5f5edc6bb..b5224580e 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java @@ -69,6 +69,8 @@ public static TsBlock wrapBlocksWithoutCopy( private volatile long retainedSizeInBytes = -1; + private volatile long sizeInBytes = -1; + public TsBlock(int positionCount) { this(false, positionCount, null, EMPTY_COLUMNS); } @@ -122,6 +124,18 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + /** + * Returns the size of this block as if it was compacted, ignoring any over-allocations and any + * unloaded nested blocks. For example, in dictionary blocks, this only counts each dictionary + * entry once, rather than each time a value is referenced. + */ + public long getSizeInBytes() { + if (sizeInBytes < 0) { + return updateSize(); + } + return sizeInBytes; + } + /** * @param positionOffset start offset * @param length slice length @@ -508,6 +522,16 @@ private long updateRetainedSize() { return newRetainedSizeInBytes; } + private long updateSize() { + long newSizeInBytes = INSTANCE_SIZE; + newSizeInBytes += timeColumn.getSizeInBytes(); + for (Column column : valueColumns) { + newSizeInBytes += column.getSizeInBytes(); + } + this.sizeInBytes = newSizeInBytes; + return newSizeInBytes; + } + public int getTotalInstanceSize() { int totalInstanceSize = INSTANCE_SIZE; totalInstanceSize += timeColumn.getInstanceSize(); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java index 918551d9b..028bffc99 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java @@ -104,8 +104,6 @@ private TsBlockBuilder(int initialExpectedEntries, int maxTsBlockBytes, List types) { valueColumnBuilders = new ColumnBuilder[types.size()]; int initialExpectedEntries = timeColumnBuilder.getPositionCount(); for (int i = 0; i < valueColumnBuilders.length; i++) { - // TODO use Type interface to encapsulate createColumnBuilder to each concrete type class - // instead of switch-case switch (types.get(i)) { case BOOLEAN: valueColumnBuilders[i] = diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java index ec36fc4f1..8a794508a 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java @@ -32,8 +32,8 @@ import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; +import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray; -import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfObjectArray; public class BinaryColumn implements Column { @@ -46,6 +46,7 @@ public class BinaryColumn implements Column { private final Binary[] values; private final long retainedSizeInBytes; + private final long sizeInBytes; public BinaryColumn(int initialCapacity) { this(0, 0, null, new Binary[initialCapacity]); @@ -75,9 +76,37 @@ public BinaryColumn(int positionCount, Optional valueIsNull, Binary[] } this.valueIsNull = valueIsNull; - // TODO we need to sum up all the Binary's retainedSize here - retainedSizeInBytes = - INSTANCE_SIZE + sizeOfBooleanArray(positionCount) + sizeOfObjectArray(positionCount); + retainedSizeInBytes = INSTANCE_SIZE + sizeOfBooleanArray(positionCount) + sizeOf(values); + sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount / values.length : 0L; + } + + // called by getRegion which already knows the underlying retainedSizeInBytes + private BinaryColumn( + int arrayOffset, + int positionCount, + boolean[] valueIsNull, + Binary[] values, + long retainedSizeInBytes) { + if (arrayOffset < 0) { + throw new IllegalArgumentException("arrayOffset is negative"); + } + this.arrayOffset = arrayOffset; + if (positionCount < 0) { + throw new IllegalArgumentException("positionCount is negative"); + } + this.positionCount = positionCount; + + if (values.length - arrayOffset < positionCount) { + throw new IllegalArgumentException("values length is less than positionCount"); + } + this.values = values; + + if (valueIsNull != null && valueIsNull.length - arrayOffset < positionCount) { + throw new IllegalArgumentException("isNull length is less than positionCount"); + } + this.valueIsNull = valueIsNull; + this.retainedSizeInBytes = retainedSizeInBytes; + this.sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount / values.length : 0L; } @Override @@ -140,10 +169,16 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return sizeInBytes; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); - return new BinaryColumn(positionOffset + arrayOffset, length, valueIsNull, values); + return new BinaryColumn( + positionOffset + arrayOffset, length, valueIsNull, values, getRetainedSizeInBytes()); } @Override diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java index a82d82fc0..d9c560dc6 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java @@ -32,7 +32,6 @@ import static java.lang.Math.max; import static org.apache.tsfile.read.common.block.column.ColumnUtil.calculateBlockResetSize; -import static org.apache.tsfile.utils.RamUsageEstimator.shallowSizeOf; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf; public class BinaryColumnBuilder implements ColumnBuilder { @@ -129,7 +128,6 @@ public TSDataType getDataType() { @Override public long getRetainedSizeInBytes() { - // TODO we need to sum up all the Binary's retainedSize here long size = INSTANCE_SIZE + arraysRetainedSizeInBytes; if (columnBuilderStatus != null) { size += ColumnBuilderStatus.INSTANCE_SIZE; @@ -139,7 +137,6 @@ public long getRetainedSizeInBytes() { @Override public ColumnBuilder newColumnBuilderLike(ColumnBuilderStatus columnBuilderStatus) { - // TODO we should take retain size into account here return new BinaryColumnBuilder(columnBuilderStatus, calculateBlockResetSize(positionCount)); } @@ -158,6 +155,6 @@ private void growCapacity() { } private void updateArraysDataSize() { - arraysRetainedSizeInBytes = sizeOf(valueIsNull) + shallowSizeOf(values); + arraysRetainedSizeInBytes = sizeOf(valueIsNull) + sizeOf(values); } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java index 76b3fb6d7..7b9aca747 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java @@ -138,6 +138,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java index 13dbd2261..50a2dd1ef 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java @@ -187,6 +187,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes + dictionary.getRetainedSizeInBytes(); } + @Override + public long getSizeInBytes() { + return ids.length > 0 ? getRetainedSizeInBytes() * positionCount / ids.length : 0L; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(positionCount, positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java index afc78208e..e0aff8f7a 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java @@ -139,6 +139,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java index c008d9353..8a576c0ce 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java @@ -154,6 +154,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java index c2065ee32..6820a83eb 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java @@ -182,6 +182,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java index ecba9013d..03d8af0e6 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java @@ -154,6 +154,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java index 9b999c412..d91359cae 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java @@ -85,6 +85,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return retainedSizeInBytes; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java index 620378182..148407295 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java @@ -193,6 +193,11 @@ public long getRetainedSizeInBytes() { return INSTANCE_SIZE + value.getRetainedSizeInBytes(); } + @Override + public long getSizeInBytes() { + return value.getSizeInBytes(); + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(positionCount, positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java index 3b1880a96..e7108fc85 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java @@ -100,8 +100,7 @@ public boolean isNull(int position) { @Override public boolean[] isNull() { - // todo - return null; + throw new UnsupportedOperationException("isNull is not supported for TimeColumn"); } @Override @@ -114,6 +113,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { ColumnUtil.checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java index e887a5773..a891553b5 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java @@ -67,7 +67,6 @@ public TsBlock deserialize(ByteBuffer byteBuffer) { } // Time column. - // TODO: a TimeColumn will be deserialized as a LongColumn Column timeColumn = ColumnEncoderFactory.get(columnEncodings.get(0)) .readColumn(byteBuffer, TSDataType.INT64, positionCount); @@ -91,12 +90,12 @@ public TsBlock deserialize(ByteBuffer byteBuffer) { * @return Serialized tsblock. */ public ByteBuffer serialize(TsBlock tsBlock) throws IOException { - if (tsBlock.getRetainedSizeInBytes() > Integer.MAX_VALUE) { + if (tsBlock.getSizeInBytes() > Integer.MAX_VALUE) { throw new IllegalStateException( - "TsBlock should not be that large: " + tsBlock.getRetainedSizeInBytes()); + "TsBlock should not be that large: " + tsBlock.getSizeInBytes()); } ByteArrayOutputStream byteArrayOutputStream = - new ByteArrayOutputStream((int) tsBlock.getRetainedSizeInBytes()); + new ByteArrayOutputStream((int) tsBlock.getSizeInBytes()); DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream); // Value column count.