diff --git a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java index 161a2c8eb..e9093693b 100644 --- a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java +++ b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java @@ -133,6 +133,13 @@ default TsPrimitiveType getTsPrimitiveType(int position) { */ long getRetainedSizeInBytes(); + /** + * Returns the size of this Column as if it was compacted, ignoring any over-allocations and any + * unloaded nested Columns. For example, in dictionary blocks, this only counts each dictionary + * entry once, rather than each time a value is referenced. + */ + long getSizeInBytes(); + /** * Returns a column starting at the specified position and extends for the specified length. The * specified region must be entirely contained within this column. diff --git a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java index 7c66e56d2..2f1cf3db6 100644 --- a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java +++ b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java @@ -271,6 +271,18 @@ public static long sizeOf(double[] arr) { : alignObjectSize(NUM_BYTES_ARRAY_HEADER + (long) Double.BYTES * arr.length); } + public static long sizeOf(Accountable[] arr) { + if (arr == null) { + return 0; + } else { + long size = shallowSizeOf(arr); + for (Accountable obj : arr) { + size += obj != null ? obj.ramBytesUsed() : 0; + } + return size; + } + } + /** Returns the size in bytes of the String[] object. */ public static long sizeOf(String[] arr) { if (arr == null) { diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java index 14072b7a8..0c4571588 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java @@ -65,6 +65,8 @@ public static TsBlock wrapBlocksWithoutCopy( private volatile long retainedSizeInBytes = -1; + private volatile long sizeInBytes = -1; + public TsBlock(int positionCount) { this(false, positionCount, null, EMPTY_COLUMNS); } @@ -117,6 +119,18 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + /** + * Returns the size of this block as if it was compacted, ignoring any over-allocations and any + * unloaded nested blocks. For example, in dictionary blocks, this only counts each dictionary + * entry once, rather than each time a value is referenced. + */ + public long getSizeInBytes() { + if (sizeInBytes < 0) { + return updateSize(); + } + return sizeInBytes; + } + /** * @param positionOffset start offset * @param length slice length @@ -504,6 +518,16 @@ private long updateRetainedSize() { return newRetainedSizeInBytes; } + private long updateSize() { + long newSizeInBytes = INSTANCE_SIZE; + newSizeInBytes += timeColumn.getSizeInBytes(); + for (Column column : valueColumns) { + newSizeInBytes += column.getSizeInBytes(); + } + this.sizeInBytes = newSizeInBytes; + return newSizeInBytes; + } + public int getTotalInstanceSize() { int totalInstanceSize = INSTANCE_SIZE; totalInstanceSize += timeColumn.getInstanceSize(); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java index 568c222c7..389d59e61 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java @@ -104,8 +104,6 @@ private TsBlockBuilder(int initialExpectedEntries, int maxTsBlockBytes, List types) { valueColumnBuilders = new ColumnBuilder[types.size()]; int initialExpectedEntries = timeColumnBuilder.getPositionCount(); for (int i = 0; i < valueColumnBuilders.length; i++) { - // TODO use Type interface to encapsulate createColumnBuilder to each concrete type class - // instead of switch-case switch (types.get(i)) { case BOOLEAN: valueColumnBuilders[i] = diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java index beb48c28b..ab244311e 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java @@ -30,8 +30,8 @@ import java.util.Optional; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; +import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray; -import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfObjectArray; public class BinaryColumn implements Column { @@ -44,6 +44,7 @@ public class BinaryColumn implements Column { private final Binary[] values; private final long retainedSizeInBytes; + private final long sizeInBytes; public BinaryColumn(int positionCount, Optional valueIsNull, Binary[] values) { this(0, positionCount, valueIsNull.orElse(null), values); @@ -69,9 +70,37 @@ public BinaryColumn(int positionCount, Optional valueIsNull, Binary[] } this.valueIsNull = valueIsNull; - // TODO we need to sum up all the Binary's retainedSize here - retainedSizeInBytes = - INSTANCE_SIZE + sizeOfBooleanArray(positionCount) + sizeOfObjectArray(positionCount); + retainedSizeInBytes = INSTANCE_SIZE + sizeOfBooleanArray(positionCount) + sizeOf(values); + sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount / values.length : 0L; + } + + // called by getRegion which already knows the underlying retainedSizeInBytes + private BinaryColumn( + int arrayOffset, + int positionCount, + boolean[] valueIsNull, + Binary[] values, + long retainedSizeInBytes) { + if (arrayOffset < 0) { + throw new IllegalArgumentException("arrayOffset is negative"); + } + this.arrayOffset = arrayOffset; + if (positionCount < 0) { + throw new IllegalArgumentException("positionCount is negative"); + } + this.positionCount = positionCount; + + if (values.length - arrayOffset < positionCount) { + throw new IllegalArgumentException("values length is less than positionCount"); + } + this.values = values; + + if (valueIsNull != null && valueIsNull.length - arrayOffset < positionCount) { + throw new IllegalArgumentException("isNull length is less than positionCount"); + } + this.valueIsNull = valueIsNull; + this.retainedSizeInBytes = retainedSizeInBytes; + this.sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount / values.length : 0L; } @Override @@ -134,10 +163,16 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return sizeInBytes; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); - return new BinaryColumn(positionOffset + arrayOffset, length, valueIsNull, values); + return new BinaryColumn( + positionOffset + arrayOffset, length, valueIsNull, values, getRetainedSizeInBytes()); } @Override diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java index 917c6e5f3..fa5d9cdf8 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java @@ -32,7 +32,6 @@ import static java.lang.Math.max; import static org.apache.tsfile.read.common.block.column.ColumnUtil.calculateBlockResetSize; -import static org.apache.tsfile.utils.RamUsageEstimator.shallowSizeOf; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf; public class BinaryColumnBuilder implements ColumnBuilder { @@ -124,7 +123,6 @@ public TSDataType getDataType() { @Override public long getRetainedSizeInBytes() { - // TODO we need to sum up all the Binary's retainedSize here long size = INSTANCE_SIZE + arraysRetainedSizeInBytes; if (columnBuilderStatus != null) { size += ColumnBuilderStatus.INSTANCE_SIZE; @@ -134,7 +132,6 @@ public long getRetainedSizeInBytes() { @Override public ColumnBuilder newColumnBuilderLike(ColumnBuilderStatus columnBuilderStatus) { - // TODO we should take retain size into account here return new BinaryColumnBuilder(columnBuilderStatus, calculateBlockResetSize(positionCount)); } @@ -153,6 +150,6 @@ private void growCapacity() { } private void updateArraysDataSize() { - arraysRetainedSizeInBytes = sizeOf(valueIsNull) + shallowSizeOf(values); + arraysRetainedSizeInBytes = sizeOf(valueIsNull) + sizeOf(values); } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java index f74bcd8ee..f502b74e3 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java @@ -132,6 +132,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java index 0311830b0..ee2caa4ca 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java @@ -133,6 +133,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java index 449212ebc..ad55c15c3 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java @@ -133,6 +133,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java index 0254690b6..9c7a0f739 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java @@ -133,6 +133,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java index 34cc7dc4e..b42c6fa5c 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java @@ -133,6 +133,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java index 8b7e2152a..8e56b5abf 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java @@ -83,6 +83,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return retainedSizeInBytes; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java index a78e2daae..649bc70e0 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java @@ -191,6 +191,11 @@ public long getRetainedSizeInBytes() { return INSTANCE_SIZE + value.getRetainedSizeInBytes(); } + @Override + public long getSizeInBytes() { + return value.getSizeInBytes(); + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(positionCount, positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java index 1783f6a34..a936a3e7a 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java @@ -98,8 +98,7 @@ public boolean isNull(int position) { @Override public boolean[] isNull() { - // todo - return null; + throw new UnsupportedOperationException("isNull is not supported for TimeColumn"); } @Override @@ -112,6 +111,11 @@ public long getRetainedSizeInBytes() { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { ColumnUtil.checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java index 0ef002567..a50745048 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java @@ -89,12 +89,12 @@ public TsBlock deserialize(ByteBuffer byteBuffer) { * @return Serialized tsblock. */ public ByteBuffer serialize(TsBlock tsBlock) throws IOException { - if (tsBlock.getRetainedSizeInBytes() > Integer.MAX_VALUE) { + if (tsBlock.getSizeInBytes() > Integer.MAX_VALUE) { throw new IllegalStateException( - "TsBlock should not be that large: " + tsBlock.getRetainedSizeInBytes()); + "TsBlock should not be that large: " + tsBlock.getSizeInBytes()); } ByteArrayOutputStream byteArrayOutputStream = - new ByteArrayOutputStream((int) tsBlock.getRetainedSizeInBytes()); + new ByteArrayOutputStream((int) tsBlock.getSizeInBytes()); DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream); // Value column count.