diff --git a/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java b/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java index 8a4c10b4e..6f0d09748 100644 --- a/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java +++ b/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java @@ -36,8 +36,8 @@ public class BitMap { (byte) 0X7F // 01111111 }; - private final byte[] bits; - private final int size; + private byte[] bits; + private int size; /** Initialize a BitMap with given size. */ public BitMap(int size) { @@ -258,4 +258,22 @@ public static int getSizeOfBytes(int size) { public byte[] getTruncatedByteArray(int size) { return Arrays.copyOf(this.bits, getSizeOfBytes(size)); } + + public void append(BitMap another, int position, int length) { + for (int i = 0; i < length; i++) { + if (another.isMarked(i)) { + mark(position + i); + } else { + unmark(position + i); + } + } + } + + public void extend(int newSize) { + if (size >= newSize) { + return; + } + bits = Arrays.copyOf(bits, getSizeOfBytes(newSize)); + size = newSize; + } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java b/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java index ac03949a4..c984b4223 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java @@ -42,6 +42,7 @@ import java.nio.ByteBuffer; import java.time.LocalDate; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -57,6 +58,7 @@ * *

Notice: The tablet should not have empty cell, please use BitMap to denote null value */ +@SuppressWarnings("SuspiciousSystemArraycopy") public class Tablet { private static final int DEFAULT_SIZE = 1024; @@ -95,7 +97,7 @@ public class Tablet { private int rowSize; /** The maximum number of rows for this {@link Tablet} */ - private final int maxRowNumber; + private int maxRowNumber; /** * Return a {@link Tablet} with default specified row number. This is the standard constructor @@ -632,32 +634,36 @@ private void createColumns() { } private Object createValueColumnOfDataType(TSDataType dataType) { + return createValueColumnOfDataType(dataType, maxRowNumber); + } + + private Object createValueColumnOfDataType(TSDataType dataType, int capacity) { Object valueColumn; switch (dataType) { case INT32: - valueColumn = new int[maxRowNumber]; + valueColumn = new int[capacity]; break; case INT64: case TIMESTAMP: - valueColumn = new long[maxRowNumber]; + valueColumn = new long[capacity]; break; case FLOAT: - valueColumn = new float[maxRowNumber]; + valueColumn = new float[capacity]; break; case DOUBLE: - valueColumn = new double[maxRowNumber]; + valueColumn = new double[capacity]; break; case BOOLEAN: - valueColumn = new boolean[maxRowNumber]; + valueColumn = new boolean[capacity]; break; case TEXT: case STRING: case BLOB: - valueColumn = new Binary[maxRowNumber]; + valueColumn = new Binary[capacity]; break; case DATE: - valueColumn = new LocalDate[maxRowNumber]; + valueColumn = new LocalDate[capacity]; break; default: throw new UnSupportedDataTypeException(String.format(NOT_SUPPORT_DATATYPE, dataType)); @@ -1320,4 +1326,156 @@ public boolean isSorted() { } return true; } + + /** + * Append `another` to the tail of this tablet. + * + * @return true if append successfully, false if the tablets have inconsistent insertTarget of + * schemas. + */ + public boolean append(Tablet another) { + return append(another, 0); + } + + /** + * Append `another` to the tail of this tablet, with a preferred capacity after appending. To + * avoid frequent memory copy, it is highly recommended to use this method instead of + * `append(Tablet)` when multiple appending could be involved. + * + * @param preferredCapacity if the total size of the two tablets is below this value, this tablet + * will extend to the capacity. + * @return true if append successfully, false if the tablets have inconsistent insertTarget of * + * schemas. + */ + public boolean append(Tablet another, int preferredCapacity) { + if (!Objects.equals(insertTargetName, another.insertTargetName)) { + return false; + } + + if (!Objects.equals(schemas, another.schemas)) { + return false; + } + + if (!Objects.equals(columnCategories, another.columnCategories)) { + return false; + } + + int prevCapacity = timestamps.length; + appendTimestamps(another, preferredCapacity); + appendValues(another, prevCapacity, preferredCapacity); + appendBitMaps(another, prevCapacity, preferredCapacity); + + maxRowNumber = Math.max(preferredCapacity, Math.max(maxRowNumber, rowSize + another.rowSize)); + rowSize = rowSize + another.rowSize; + return true; + } + + private void appendTimestamps(Tablet another, int preferredCapacity) { + int capacity = timestamps.length; + int thisSize = rowSize; + int thatSize = another.rowSize; + int totalSize = Math.max(thisSize + thatSize, preferredCapacity); + + if (thisSize + thatSize <= capacity && capacity >= preferredCapacity) { + System.arraycopy(another.timestamps, 0, timestamps, thisSize, thatSize); + } else { + timestamps = Arrays.copyOf(timestamps, totalSize); + System.arraycopy(another.timestamps, 0, timestamps, thisSize, thatSize); + } + } + + private void appendValues(Tablet another, int prevCapacity, int preferredCapacity) { + for (int i = 0; i < schemas.size(); i++) { + appendValue(another, prevCapacity, i, schemas.get(i).getType(), preferredCapacity); + } + } + + private void appendValue( + Tablet another, + int prevCapacity, + int columnIndex, + TSDataType dataType, + int preferredCapacity) { + Object thisCol = values[columnIndex]; + Object anotherCol = another.values[columnIndex]; + + int thisSize = rowSize; + int thatSize = another.rowSize; + int totalSize = Math.max(thisSize + thatSize, preferredCapacity); + + if (thisSize + thatSize <= prevCapacity && prevCapacity >= preferredCapacity) { + System.arraycopy(anotherCol, 0, thisCol, thisSize, thatSize); + } else { + Object newCol = createValueColumnOfDataType(dataType, totalSize); + System.arraycopy(thisCol, 0, newCol, 0, thisSize); + System.arraycopy(anotherCol, 0, newCol, thisSize, thatSize); + values[columnIndex] = newCol; + } + } + + private void appendBitMaps(Tablet another, int prevCapacity, int preferredCapacity) { + if (bitMaps == null && another.bitMaps == null) { + return; + } + + if (bitMaps == null) { + appendBitMapsWhenThisNull(another, prevCapacity, preferredCapacity); + } else if (another.bitMaps == null) { + appendBitMapsWhenThatNull(another, prevCapacity, preferredCapacity); + } else { + appendBitMapsWhenNoNull(another, prevCapacity, preferredCapacity); + } + } + + private void appendBitMapsWhenThisNull(Tablet another, int prevCapacity, int preferredCapacity) { + int thisSize = rowSize; + int thatSize = another.rowSize; + bitMaps = new BitMap[schemas.size()]; + int totalSize = Math.max(prevCapacity, Math.max(thisSize + thatSize, preferredCapacity)); + for (int i = 0; i < bitMaps.length; i++) { + if (another.bitMaps[i] != null) { + bitMaps[i] = new BitMap(totalSize); + bitMaps[i].append(another.bitMaps[i], thisSize, thatSize); + } + } + } + + private void appendBitMapsWhenThatNull(Tablet another, int prevCapacity, int preferredCapacity) { + int thisSize = rowSize; + int thatSize = another.rowSize; + int totalSize = Math.max(prevCapacity, Math.max(thisSize + thatSize, preferredCapacity)); + for (BitMap bitMap : bitMaps) { + if (bitMap != null) { + bitMap.extend(totalSize); + for (int j = 0; j < thatSize; j++) { + bitMap.unmark(j + thisSize); + } + } + } + } + + private void appendBitMapsWhenNoNull(Tablet another, int prevCapacity, int preferredCapacity) { + int thisSize = rowSize; + int thatSize = another.rowSize; + int totalSize = Math.max(prevCapacity, Math.max(thisSize + thatSize, preferredCapacity)); + + for (int i = 0; i < bitMaps.length; i++) { + if (bitMaps[i] == null && another.bitMaps[i] == null) { + continue; + } + + if (bitMaps[i] == null && another.bitMaps[i] != null) { + bitMaps[i] = new BitMap(totalSize); + bitMaps[i].append(another.bitMaps[i], thisSize, thatSize); + } else if (bitMaps[i] != null && another.bitMaps[i] == null) { + bitMaps[i].extend(totalSize); + for (int j = 0; j < thatSize; j++) { + bitMaps[i].unmark(j + thisSize); + } + } else if (bitMaps[i] != null && another.bitMaps[i] != null) { + bitMaps[i].extend(totalSize); + bitMaps[i].append(another.bitMaps[i], thisSize, thatSize); + } + } + } } diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java index 07aedad4f..258284000 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java @@ -20,10 +20,12 @@ package org.apache.tsfile.write.record; import org.apache.tsfile.common.conf.TSFileConfig; +import org.apache.tsfile.enums.ColumnCategory; import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.utils.Binary; import org.apache.tsfile.utils.BitMap; +import org.apache.tsfile.utils.Pair; import org.apache.tsfile.write.schema.IMeasurementSchema; import org.apache.tsfile.write.schema.MeasurementSchema; @@ -32,13 +34,19 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Random; +import java.util.Set; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; public class TabletTest { @@ -297,4 +305,310 @@ public void testSerializeDateColumnWithNullValue() throws IOException { Assert.assertEquals(tablet.getValue(1, 1), deserializeTablet.getValue(1, 1)); Assert.assertTrue(deserializeTablet.isNull(1, 0)); } + + @Test + public void testAppendInconsistent() { + Tablet t1 = + new Tablet( + "table1", + Arrays.asList("tag1", "s1"), + Arrays.asList(TSDataType.STRING, TSDataType.INT32), + Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD)); + + Tablet tWrongTable = + new Tablet( + "table2", + Arrays.asList("tag1", "s1"), + Arrays.asList(TSDataType.STRING, TSDataType.INT32), + Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD)); + assertFalse(t1.append(tWrongTable)); + + Tablet tWrongColName = + new Tablet( + "table1", + Arrays.asList("tag2", "s1"), + Arrays.asList(TSDataType.STRING, TSDataType.INT32), + Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD)); + assertFalse(t1.append(tWrongColName)); + + Tablet tWrongColType = + new Tablet( + "table1", + Arrays.asList("tag1", "s1"), + Arrays.asList(TSDataType.STRING, TSDataType.INT64), + Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD)); + assertFalse(t1.append(tWrongColType)); + + Tablet tWrongColCategory = + new Tablet( + "table1", + Arrays.asList("tag1", "s1"), + Arrays.asList(TSDataType.STRING, TSDataType.INT32), + Arrays.asList(ColumnCategory.TAG, ColumnCategory.TAG)); + assertFalse(t1.append(tWrongColCategory)); + } + + private void fillTablet(Tablet t, int valueOffset, int length) { + for (int i = 0; i < length; i++) { + t.addTimestamp(i, i + valueOffset); + for (int j = 0; j < t.getSchemas().size(); j++) { + switch (t.getSchemas().get(j).getType()) { + case INT32: + t.addValue(i, j, i + valueOffset); + break; + case TIMESTAMP: + case INT64: + t.addValue(i, j, (long) (i + valueOffset)); + break; + case FLOAT: + t.addValue(i, j, (i + valueOffset) * 1.0f); + break; + case DOUBLE: + t.addValue(i, j, (i + valueOffset) * 1.0); + break; + case BOOLEAN: + t.addValue(i, j, (i + valueOffset) % 2 == 0); + break; + case TEXT: + case STRING: + case BLOB: + t.addValue(i, j, String.valueOf(i + valueOffset)); + break; + case DATE: + t.addValue(i, j, LocalDate.of(i + valueOffset, 1, 1)); + break; + } + } + } + } + + private final List colNamesForAppendTest = + Arrays.asList( + "tag1", + TSDataType.INT32.name(), + TSDataType.INT64.name(), + TSDataType.FLOAT.name(), + TSDataType.DOUBLE.name(), + TSDataType.BOOLEAN.name(), + TSDataType.TEXT.name(), + TSDataType.STRING.name(), + TSDataType.BLOB.name(), + TSDataType.TIMESTAMP.name(), + TSDataType.DATE.name()); + private final List dataTypesForAppendTest = + Arrays.asList( + TSDataType.STRING, + TSDataType.INT32, + TSDataType.INT64, + TSDataType.FLOAT, + TSDataType.DOUBLE, + TSDataType.BOOLEAN, + TSDataType.TEXT, + TSDataType.STRING, + TSDataType.BLOB, + TSDataType.TIMESTAMP, + TSDataType.DATE); + private final List categoriesForAppendTest = + Arrays.asList( + ColumnCategory.TAG, + ColumnCategory.FIELD, + ColumnCategory.FIELD, + ColumnCategory.FIELD, + ColumnCategory.FIELD, + ColumnCategory.FIELD, + ColumnCategory.FIELD, + ColumnCategory.FIELD, + ColumnCategory.FIELD, + ColumnCategory.FIELD, + ColumnCategory.FIELD); + + @Test + public void testAppendNoNull() { + Tablet t1 = + new Tablet( + "table1", colNamesForAppendTest, dataTypesForAppendTest, categoriesForAppendTest); + + int t1Size = 100; + fillTablet(t1, 0, t1Size); + + int t2Size = 100; + Tablet t2 = + new Tablet( + "table1", colNamesForAppendTest, dataTypesForAppendTest, categoriesForAppendTest); + fillTablet(t2, t1Size, t2Size); + + assertTrue(t1.append(t2)); + checkAppendedTablet(t1, t1Size + t2Size, null); + } + + @Test + public void testPreferredCapacity() { + Tablet t1 = + new Tablet( + "table1", colNamesForAppendTest, dataTypesForAppendTest, categoriesForAppendTest); + + int t1Size = 100; + fillTablet(t1, 0, t1Size); + + int t2Size = 100; + Tablet t2 = + new Tablet( + "table1", colNamesForAppendTest, dataTypesForAppendTest, categoriesForAppendTest); + fillTablet(t2, t1Size, t2Size); + + assertTrue(t1.append(t2, 10000)); + checkAppendedTablet(t1, t1Size + t2Size, null); + assertEquals(10000, t1.getMaxRowNumber()); + } + + @Test + public void testAppendNullPoints() { + Set> nullPositions = new HashSet<>(); + int nullPointNum = 10; + Random random = new Random(); + + Tablet t1 = + new Tablet( + "table1", colNamesForAppendTest, dataTypesForAppendTest, categoriesForAppendTest); + + int t1Size = 100; + fillTablet(t1, 0, t1Size); + for (int i = 0; i < nullPointNum; i++) { + int rowIndex = random.nextInt(t1Size); + int columnIndex = random.nextInt(colNamesForAppendTest.size()); + nullPositions.add(new Pair<>(rowIndex, columnIndex)); + t1.getBitMaps()[columnIndex].mark(rowIndex); + } + + int t2Size = 100; + Tablet t2 = + new Tablet( + "table1", colNamesForAppendTest, dataTypesForAppendTest, categoriesForAppendTest); + fillTablet(t2, t1Size, t2Size); + for (int i = 0; i < nullPointNum; i++) { + int rowIndex = random.nextInt(t1Size); + int columnIndex = random.nextInt(colNamesForAppendTest.size()); + nullPositions.add(new Pair<>(rowIndex + t1Size, columnIndex)); + t2.getBitMaps()[columnIndex].mark(rowIndex); + } + + assertTrue(t1.append(t2)); + checkAppendedTablet(t1, t1Size + t2Size, nullPositions); + } + + @Test + public void testAppendNullBitMapColumn() { + int nullBitMapNum = 5; + Random random = new Random(); + + Tablet t1 = + new Tablet( + "table1", colNamesForAppendTest, dataTypesForAppendTest, categoriesForAppendTest); + + int t1Size = 100; + fillTablet(t1, 0, t1Size); + for (int i = 0; i < nullBitMapNum; i++) { + int columnIndex = random.nextInt(colNamesForAppendTest.size()); + t1.getBitMaps()[columnIndex] = null; + } + + int t2Size = 100; + Tablet t2 = + new Tablet( + "table1", colNamesForAppendTest, dataTypesForAppendTest, categoriesForAppendTest); + fillTablet(t2, t1Size, t2Size); + for (int i = 0; i < nullBitMapNum; i++) { + int columnIndex = random.nextInt(colNamesForAppendTest.size()); + t2.getBitMaps()[columnIndex] = null; + } + + assertTrue(t1.append(t2)); + assertEquals(t1Size + t2Size, t1.getRowSize()); + + checkAppendedTablet(t1, t1Size + t2Size, null); + } + + @Test + public void testAppendThisNullBitMap() { + + Tablet t1 = + new Tablet( + "table1", colNamesForAppendTest, dataTypesForAppendTest, categoriesForAppendTest); + + int t1Size = 100; + fillTablet(t1, 0, t1Size); + t1.setBitMaps(null); + + int t2Size = 100; + Tablet t2 = + new Tablet( + "table1", colNamesForAppendTest, dataTypesForAppendTest, categoriesForAppendTest); + fillTablet(t2, t1Size, t2Size); + + assertTrue(t1.append(t2)); + checkAppendedTablet(t1, t1Size + t2Size, null); + } + + @Test + public void testMultipleAppend() { + List tablets = new ArrayList<>(); + int tabletNum = 10; + int singleTabletSize = 100; + for (int i = 0; i < tabletNum; i++) { + Tablet tablet = + new Tablet( + "table1", colNamesForAppendTest, dataTypesForAppendTest, categoriesForAppendTest); + fillTablet(tablet, i * singleTabletSize, singleTabletSize); + tablets.add(tablet); + } + for (int i = 1; i < tabletNum; i++) { + assertTrue(tablets.get(0).append(tablets.get(i))); + } + checkAppendedTablet(tablets.get(0), singleTabletSize * tabletNum, null); + } + + private void checkAppendedTablet( + Tablet result, int totalSize, Set> nullPositions) { + assertEquals(totalSize, result.getRowSize()); + + for (int i = 0; i < totalSize; i++) { + assertEquals(i, result.getTimestamp(i)); + for (int j = 0; j < result.getSchemas().size(); j++) { + if (nullPositions != null && nullPositions.contains(new Pair<>(i, j))) { + assertTrue(result.isNull(i, j)); + continue; + } + + assertFalse(result.isNull(i, j)); + switch (result.getSchemas().get(j).getType()) { + case INT32: + assertEquals(i, result.getValue(i, j)); + break; + case TIMESTAMP: + case INT64: + assertEquals((long) i, result.getValue(i, j)); + break; + case FLOAT: + assertEquals(i * 1.0f, (float) result.getValue(i, j), 0.0001f); + break; + case DOUBLE: + assertEquals(i * 1.0, (double) result.getValue(i, j), 0.0001); + break; + case BOOLEAN: + assertEquals(i % 2 == 0, result.getValue(i, j)); + break; + case TEXT: + case BLOB: + case STRING: + assertEquals( + new Binary(String.valueOf(i).getBytes(StandardCharsets.UTF_8)), + result.getValue(i, j)); + break; + case DATE: + assertEquals(LocalDate.of(i, 1, 1), result.getValue(i, j)); + break; + } + } + } + } }