diff --git a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java index f92d3b5c0..e45664634 100644 --- a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java +++ b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java @@ -170,6 +170,23 @@ default TsPrimitiveType getTsPrimitiveType(int position) { /** This method will create a copy of origin column with different array offset. */ Column subColumnCopy(int fromIndex); + /** + * Create a new colum from the current colum by keeping the same elements only with respect to + * {@code positions} that starts at {@code offset} and has length of {@code length}. The + * implementation may return a view over the data in this colum or may return a copy, and the + * implementation is allowed to retain the positions array for use in the view. + */ + Column getPositions(int[] positions, int offset, int length); + + /** + * Returns a column containing the specified positions. Positions to copy are stored in a subarray + * within {@code positions} array that starts at {@code offset} and has length of {@code length}. + * All specified positions must be valid for this block. + * + *

The returned column must be a compact representation of the original column. + */ + Column copyPositions(int[] positions, int offset, int length); + /** reverse the column */ void reverse(); diff --git a/java/common/src/main/java/org/apache/tsfile/block/column/ColumnEncoding.java b/java/common/src/main/java/org/apache/tsfile/block/column/ColumnEncoding.java index 273836836..fbf0b350f 100644 --- a/java/common/src/main/java/org/apache/tsfile/block/column/ColumnEncoding.java +++ b/java/common/src/main/java/org/apache/tsfile/block/column/ColumnEncoding.java @@ -33,7 +33,9 @@ public enum ColumnEncoding { /** TEXT. */ BINARY_ARRAY((byte) 3), /** All data types. */ - RLE((byte) 4); + RLE((byte) 4), + /** All data types. */ + DICTIONARY((byte) 5); private final byte value; @@ -61,6 +63,8 @@ private static ColumnEncoding getColumnEncoding(byte value) { return BINARY_ARRAY; case 4: return RLE; + case 5: + return DICTIONARY; default: throw new IllegalArgumentException("Invalid value: " + value); } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java index 0be88efe2..ec36fc4f1 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java @@ -29,6 +29,8 @@ import java.util.Arrays; import java.util.Optional; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfObjectArray; @@ -197,6 +199,34 @@ public void reverse() { } } + @Override + public Column getPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + return DictionaryColumn.createInternal( + offset, length, this, positions, DictionaryId.randomDictionaryId()); + } + + @Override + public Column copyPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + boolean[] newValueIsNull = null; + if (valueIsNull != null) { + newValueIsNull = new boolean[length]; + } + Binary[] newValues = new Binary[length]; + for (int i = 0; i < length; i++) { + int position = positions[offset + i]; + checkReadablePosition(this, position); + if (newValueIsNull != null) { + newValueIsNull[i] = valueIsNull[position + arrayOffset]; + } + newValues[i] = values[position + arrayOffset]; + } + return new BinaryColumn(0, length, newValueIsNull, newValues); + } + @Override public int getInstanceSize() { return INSTANCE_SIZE; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java index d534a845a..76b3fb6d7 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java @@ -28,6 +28,8 @@ import java.util.Arrays; import java.util.Optional; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray; @@ -195,6 +197,34 @@ public void reverse() { } } + @Override + public Column getPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + return DictionaryColumn.createInternal( + offset, length, this, positions, DictionaryId.randomDictionaryId()); + } + + @Override + public Column copyPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + boolean[] newValueIsNull = null; + if (valueIsNull != null) { + newValueIsNull = new boolean[length]; + } + boolean[] newValues = new boolean[length]; + for (int i = 0; i < length; i++) { + int position = positions[offset + i]; + checkReadablePosition(this, position); + if (newValueIsNull != null) { + newValueIsNull[i] = valueIsNull[position + arrayOffset]; + } + newValues[i] = values[position + arrayOffset]; + } + return new BooleanColumn(0, length, newValueIsNull, newValues); + } + @Override public int getInstanceSize() { return INSTANCE_SIZE; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoderFactory.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoderFactory.java index 71442e8b6..160864b59 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoderFactory.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoderFactory.java @@ -38,6 +38,7 @@ private ColumnEncoderFactory() { encodingToEncoder.put(ColumnEncoding.BYTE_ARRAY, new ByteArrayColumnEncoder()); encodingToEncoder.put(ColumnEncoding.BINARY_ARRAY, new BinaryArrayColumnEncoder()); encodingToEncoder.put(ColumnEncoding.RLE, new RunLengthColumnEncoder()); + encodingToEncoder.put(ColumnEncoding.DICTIONARY, new DictionaryColumnEncoder()); } public static ColumnEncoder get(ColumnEncoding columnEncoding) { diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnUtil.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnUtil.java index 1549b0dc1..fd5905fa9 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnUtil.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnUtil.java @@ -19,6 +19,10 @@ package org.apache.tsfile.read.common.block.column; +import org.apache.tsfile.block.column.Column; + +import java.util.Arrays; + import static java.lang.Math.ceil; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -68,6 +72,17 @@ static void checkValidPosition(int position, int positionCount) { } } + static void checkReadablePosition(Column column, int position) { + checkValidPosition(position, column.getPositionCount()); + } + + static int[] compactArray(int[] array, int index, int length) { + if (index == 0 && length == array.length) { + return array; + } + return Arrays.copyOfRange(array, index, index + length); + } + static int calculateNewArraySize(int currentSize) { // grow array by 50% long newSize = (long) currentSize + (currentSize >> 1); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java new file mode 100644 index 000000000..13dbd2261 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java @@ -0,0 +1,600 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.block.column; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnEncoding; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.utils.Binary; +import org.apache.tsfile.utils.RamUsageEstimator; +import org.apache.tsfile.utils.TsPrimitiveType; + +import org.apache.commons.lang3.ArrayUtils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +import static java.lang.Math.min; +import static java.util.Objects.requireNonNull; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidPosition; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.compactArray; +import static org.apache.tsfile.read.common.block.column.DictionaryId.randomDictionaryId; +import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf; + +public final class DictionaryColumn implements Column { + private static final int INSTANCE_SIZE = + (int) RamUsageEstimator.shallowSizeOfInstance(DictionaryColumn.class) + + (int) RamUsageEstimator.shallowSizeOfInstance(DictionaryId.class); + + private int positionCount; + private final Column dictionary; + private final int idsOffset; + private final int[] ids; + private final long retainedSizeInBytes; + private volatile int uniqueIds = -1; + // isSequentialIds is only valid when uniqueIds is computed + private volatile boolean isSequentialIds; + private final DictionaryId dictionarySourceId; + private final boolean mayHaveNull; + + public static Column create(int positionCount, Column dictionary, int[] ids) { + return createInternal(0, positionCount, dictionary, ids, randomDictionaryId()); + } + + /** This should not only be used when creating a projection of another dictionary Column. */ + public static Column createProjectedDictionaryColumn( + int positionCount, Column dictionary, int[] ids, DictionaryId dictionarySourceId) { + return createInternal(0, positionCount, dictionary, ids, dictionarySourceId); + } + + static Column createInternal( + int idsOffset, + int positionCount, + Column dictionary, + int[] ids, + DictionaryId dictionarySourceId) { + if (positionCount == 0) { + return dictionary.getRegionCopy(0, 0); + } + if (positionCount == 1) { + return dictionary.getRegion(ids[idsOffset], 1); + } + + // if dictionary is an RLE then this can just be a new RLE + if (dictionary instanceof RunLengthEncodedColumn) { + RunLengthEncodedColumn rle = (RunLengthEncodedColumn) dictionary; + return new RunLengthEncodedColumn(rle.getValue(), positionCount); + } + + if (dictionary instanceof DictionaryColumn) { + DictionaryColumn dictionaryColumn = (DictionaryColumn) dictionary; + // unwrap dictionary in dictionary + int[] newIds = new int[positionCount]; + for (int position = 0; position < positionCount; position++) { + newIds[position] = dictionaryColumn.getId(ids[idsOffset + position]); + } + return new DictionaryColumn( + 0, + positionCount, + dictionaryColumn.getDictionary(), + newIds, + false, + false, + randomDictionaryId()); + } + + return new DictionaryColumn( + idsOffset, positionCount, dictionary, ids, false, false, dictionarySourceId); + } + + DictionaryColumn( + int idsOffset, + int positionCount, + Column dictionary, + int[] ids, + boolean dictionaryIsCompacted, + boolean isSequentialIds, + DictionaryId dictionarySourceId) { + requireNonNull(dictionary, "dictionary is null"); + requireNonNull(ids, "ids is null"); + + if (positionCount < 0) { + throw new IllegalArgumentException("positionCount is negative"); + } + + this.idsOffset = idsOffset; + if (ids.length - idsOffset < positionCount) { + throw new IllegalArgumentException("ids length is less than positionCount"); + } + + this.positionCount = positionCount; + this.dictionary = dictionary; + this.ids = ids; + this.dictionarySourceId = requireNonNull(dictionarySourceId, "dictionarySourceId is null"); + this.retainedSizeInBytes = INSTANCE_SIZE + sizeOf(ids); + // avoid eager loading of lazy dictionaries + this.mayHaveNull = positionCount > 0 && dictionary.mayHaveNull(); + + if (dictionaryIsCompacted) { + this.uniqueIds = dictionary.getPositionCount(); + } + + if (isSequentialIds && !dictionaryIsCompacted) { + throw new IllegalArgumentException( + "sequential ids flag is only valid for compacted dictionary"); + } + this.isSequentialIds = isSequentialIds; + } + + public int[] getRawIds() { + return ids; + } + + public int getRawIdsOffset() { + return idsOffset; + } + + @Override + public int getPositionCount() { + return positionCount; + } + + private void calculateCompactSize() { + int uniqueIds = 0; + boolean[] used = new boolean[dictionary.getPositionCount()]; + // nested dictionaries are assumed not to have sequential ids + boolean isSequentialIds = true; + int previousPosition = -1; + for (int i = 0; i < positionCount; i++) { + int position = ids[idsOffset + i]; + // Avoid branching + uniqueIds += used[position] ? 0 : 1; + used[position] = true; + if (isSequentialIds) { + // this branch is predictable and will switch paths at most once while looping + isSequentialIds = previousPosition < position; + previousPosition = position; + } + } + + this.uniqueIds = uniqueIds; + this.isSequentialIds = isSequentialIds; + } + + @Override + public long getRetainedSizeInBytes() { + return retainedSizeInBytes + dictionary.getRetainedSizeInBytes(); + } + + @Override + public Column getRegion(int positionOffset, int length) { + checkValidRegion(positionCount, positionOffset, length); + + if (length == positionCount) { + return this; + } + + return new DictionaryColumn( + idsOffset + positionOffset, length, dictionary, ids, false, false, dictionarySourceId); + } + + @Override + public Column getRegionCopy(int position, int length) { + checkValidRegion(positionCount, position, length); + if (length == 0) { + // explicit support for case when length == 0 which might otherwise fail + // on getId(position) if position == positionCount + return dictionary.getRegionCopy(0, 0); + } + // Avoid repeated volatile reads to the uniqueIds field + int uniqueIds = this.uniqueIds; + if (length <= 1 || (uniqueIds == dictionary.getPositionCount() && isSequentialIds)) { + // copy the contiguous range directly via copyRegion + return dictionary.getRegionCopy(getId(position), length); + } + if (uniqueIds == positionCount) { + // each Column position is unique or the dictionary is a nested dictionary Column, + // therefore it makes sense to unwrap this outer dictionary layer directly + return dictionary.copyPositions(ids, idsOffset + position, length); + } + int[] newIds = compactArray(ids, idsOffset + position, length); + if (newIds == ids) { + return this; + } + return new DictionaryColumn(0, length, dictionary, newIds, false, false, randomDictionaryId()) + .compact(); + } + + @Override + public Column subColumn(int fromIndex) { + return getRegion(fromIndex, positionCount - fromIndex); + } + + @Override + public Column subColumnCopy(int fromIndex) { + return getRegionCopy(fromIndex, positionCount - fromIndex); + } + + @Override + public TSDataType getDataType() { + return dictionary.getDataType(); + } + + @Override + public ColumnEncoding getEncoding() { + return ColumnEncoding.DICTIONARY; + } + + @Override + public boolean mayHaveNull() { + return mayHaveNull && dictionary.mayHaveNull(); + } + + @Override + public boolean isNull(int position) { + if (!mayHaveNull) { + return false; + } + checkValidPosition(position, positionCount); + return dictionary.isNull(getIdUnchecked(position)); + } + + @Override + public boolean[] isNull() { + boolean[] original = dictionary.isNull(); + boolean[] res = new boolean[positionCount]; + + if (original == null) { + Arrays.fill(res, false); + return res; + } + + for (int i = 0; i < positionCount; i++) { + int position = ids[idsOffset + i]; + res[i] = dictionary.isNull(position); + } + return res; + } + + @Override + public void setNull(int start, int end) { + throw new UnsupportedOperationException(getClass().getSimpleName()); + } + + @Override + public Column getPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + int[] newIds = new int[length]; + boolean isCompact = length >= dictionary.getPositionCount() && isCompact(); + boolean[] usedIds = isCompact ? new boolean[dictionary.getPositionCount()] : null; + int uniqueIds = 0; + for (int i = 0; i < length; i++) { + int id = getId(positions[offset + i]); + newIds[i] = id; + if (usedIds != null) { + uniqueIds += usedIds[id] ? 0 : 1; + usedIds[id] = true; + } + } + // All positions must have been referenced in order to be compact + isCompact &= (usedIds != null && usedIds.length == uniqueIds); + DictionaryColumn result = + new DictionaryColumn( + 0, newIds.length, dictionary, newIds, isCompact, false, getDictionarySourceId()); + if (usedIds != null && !isCompact) { + // resulting dictionary is not compact, but we know the number of unique ids and which + // positions are used + result.uniqueIds = uniqueIds; + } + return result; + } + + @Override + public Column copyPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + if (length <= 1 || uniqueIds == positionCount) { + // each block position is unique or the dictionary is a nested dictionary block, + // therefore it makes sense to unwrap this outer dictionary layer directly + int[] positionsToCopy = new int[length]; + for (int i = 0; i < length; i++) { + positionsToCopy[i] = getId(positions[offset + i]); + } + return dictionary.copyPositions(positionsToCopy, 0, length); + } + + List positionsToCopy = new ArrayList<>(); + HashMap oldIndexToNewIndex = + new HashMap<>(min(length, dictionary.getPositionCount())); + int[] newIds = new int[length]; + + for (int i = 0; i < length; i++) { + int position = positions[offset + i]; + int oldIndex = getId(position); + Integer newId = oldIndexToNewIndex.putIfAbsent(oldIndex, positionsToCopy.size()); + if (newId == null) { + newId = positionsToCopy.size(); + positionsToCopy.add(oldIndex); + } + newIds[i] = newId; + } + Column compactDictionary = + dictionary.copyPositions( + ArrayUtils.toPrimitive(positionsToCopy.toArray(new Integer[0])), + 0, + positionsToCopy.size()); + if (positionsToCopy.size() == length) { + // discovered that all positions are unique, so return the unwrapped underlying dictionary + // directly + return compactDictionary; + } + return new DictionaryColumn( + 0, + length, + compactDictionary, + newIds, + true, // new dictionary is compact + false, + randomDictionaryId()); + } + + @Override + public void reverse() { + int currIndex = idsOffset; + int endIndex = ids.length - 1; + while (currIndex < endIndex) { + int temp = ids[currIndex]; + ids[currIndex] = ids[endIndex]; + ids[endIndex] = temp; + + currIndex++; + endIndex--; + } + } + + @Override + public int getInstanceSize() { + return INSTANCE_SIZE; + } + + @Override + public void setPositionCount(int count) { + positionCount = count; + } + + @Override + public boolean getBoolean(int position) { + return dictionary.getBoolean(getId(position)); + } + + @Override + public int getInt(int position) { + return dictionary.getInt(getId(position)); + } + + @Override + public long getLong(int position) { + return dictionary.getLong(getId(position)); + } + + @Override + public float getFloat(int position) { + return dictionary.getFloat(getId(position)); + } + + @Override + public double getDouble(int position) { + return dictionary.getDouble(getId(position)); + } + + @Override + public Binary getBinary(int position) { + return dictionary.getBinary(getId(position)); + } + + @Override + public Object getObject(int position) { + return dictionary.getObject(getId(position)); + } + + @Override + public TsPrimitiveType getTsPrimitiveType(int position) { + return dictionary.getTsPrimitiveType(position); + } + + @Override + public void reset() { + throw new UnsupportedOperationException(getClass().getSimpleName()); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("DictionaryColumn{"); + sb.append("positionCount=").append(getPositionCount()); + sb.append('}'); + return sb.toString(); + } + + private Column getUnderlyingColumn() { + return dictionary; + } + + private int getUnderlyingValuePosition(int position) { + return getId(position); + } + + public Column getDictionary() { + return dictionary; + } + + public int getId(int position) { + checkValidPosition(position, positionCount); + return getIdUnchecked(position); + } + + private int getIdUnchecked(int position) { + return ids[position + idsOffset]; + } + + public DictionaryId getDictionarySourceId() { + return dictionarySourceId; + } + + public boolean isCompact() { + if (uniqueIds == -1) { + calculateCompactSize(); + } + return uniqueIds == dictionary.getPositionCount(); + } + + public DictionaryColumn compact() { + if (isCompact()) { + return this; + } + + // determine which dictionary entries are referenced and build a reindex for them + int dictionarySize = dictionary.getPositionCount(); + List dictionaryPositionsToCopy = new ArrayList<>(min(dictionarySize, positionCount)); + int[] remapIndex = new int[dictionarySize]; + Arrays.fill(remapIndex, -1); + + int newIndex = 0; + for (int i = 0; i < positionCount; i++) { + int dictionaryIndex = getId(i); + if (remapIndex[dictionaryIndex] == -1) { + dictionaryPositionsToCopy.add(dictionaryIndex); + remapIndex[dictionaryIndex] = newIndex; + newIndex++; + } + } + + // entire dictionary is referenced + if (dictionaryPositionsToCopy.size() == dictionarySize) { + return this; + } + + // compact the dictionary + int[] newIds = new int[positionCount]; + for (int i = 0; i < positionCount; i++) { + int newId = remapIndex[getId(i)]; + if (newId == -1) { + throw new IllegalStateException("reference to a non-existent key"); + } + newIds[i] = newId; + } + try { + Column compactDictionary = + dictionary.copyPositions( + ArrayUtils.toPrimitive(dictionaryPositionsToCopy.toArray(new Integer[0])), + 0, + dictionaryPositionsToCopy.size()); + return new DictionaryColumn( + 0, + positionCount, + compactDictionary, + newIds, + true, + // Copied dictionary positions match ids sequence. Therefore new + // compact dictionary block has sequential ids only if single position + // is not used more than once. + uniqueIds == positionCount, + randomDictionaryId()); + } catch (UnsupportedOperationException e) { + // ignore if copy positions is not supported for the dictionary block + return this; + } + } + + /** + * Compact the dictionary down to only the used positions for a set of Columns that have been + * projected from the same dictionary. + */ + public static List compactRelatedColumns(List Columns) { + DictionaryColumn firstDictionaryColumn = Columns.get(0); + Column dictionary = firstDictionaryColumn.getDictionary(); + + int positionCount = firstDictionaryColumn.getPositionCount(); + int dictionarySize = dictionary.getPositionCount(); + + // determine which dictionary entries are referenced and build a reindex for them + int[] dictionaryPositionsToCopy = new int[min(dictionarySize, positionCount)]; + int[] remapIndex = new int[dictionarySize]; + Arrays.fill(remapIndex, -1); + + int numberOfIndexes = 0; + for (int i = 0; i < positionCount; i++) { + int position = firstDictionaryColumn.getId(i); + if (remapIndex[position] == -1) { + dictionaryPositionsToCopy[numberOfIndexes] = position; + remapIndex[position] = numberOfIndexes; + numberOfIndexes++; + } + } + + // entire dictionary is referenced + if (numberOfIndexes == dictionarySize) { + return Columns; + } + + // compact the dictionaries + int[] newIds = getNewIds(positionCount, firstDictionaryColumn, remapIndex); + List outputDictionaryColumns = new ArrayList<>(Columns.size()); + DictionaryId newDictionaryId = randomDictionaryId(); + for (DictionaryColumn dictionaryColumn : Columns) { + if (!firstDictionaryColumn + .getDictionarySourceId() + .equals(dictionaryColumn.getDictionarySourceId())) { + throw new IllegalArgumentException("dictionarySourceIds must be the same"); + } + + try { + Column compactDictionary = + dictionaryColumn + .getDictionary() + .copyPositions(dictionaryPositionsToCopy, 0, numberOfIndexes); + outputDictionaryColumns.add( + new DictionaryColumn( + 0, positionCount, compactDictionary, newIds, true, false, newDictionaryId)); + } catch (UnsupportedOperationException e) { + // ignore if copy positions is not supported for the dictionary + outputDictionaryColumns.add(dictionaryColumn); + } + } + return outputDictionaryColumns; + } + + private static int[] getNewIds( + int positionCount, DictionaryColumn dictionaryColumn, int[] remapIndex) { + int[] newIds = new int[positionCount]; + for (int i = 0; i < positionCount; i++) { + int newId = remapIndex[dictionaryColumn.getId(i)]; + if (newId == -1) { + throw new IllegalStateException("reference to a non-existent key"); + } + newIds[i] = newId; + } + return newIds; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumnEncoder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumnEncoder.java new file mode 100644 index 000000000..4c55311ec --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumnEncoder.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.block.column; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnEncoding; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.utils.ReadWriteIOUtils; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +public class DictionaryColumnEncoder implements ColumnEncoder { + + @Override + public Column readColumn(ByteBuffer input, TSDataType dataType, int positionCount) { + // dictionary + ColumnEncoder columnEncoder = ColumnEncoderFactory.get(ColumnEncoding.deserializeFrom(input)); + Column dictionary = columnEncoder.readColumn(input, dataType, ReadWriteIOUtils.readInt(input)); + + // ids + int[] ids = ReadWriteIOUtils.readInts(input); + + // flatten the dictionary + return dictionary.copyPositions(ids, 0, positionCount); + } + + @Override + public void writeColumn(DataOutputStream output, Column column) throws IOException { + DictionaryColumn dictionaryColumn = (DictionaryColumn) column; + // compact before serialize + dictionaryColumn = dictionaryColumn.compact(); + + Column dictionary = dictionaryColumn.getDictionary(); + + // dictionary + dictionary.getEncoding().serializeTo(output); + int positionCount = dictionary.getPositionCount(); + ReadWriteIOUtils.write(positionCount, output); + ColumnEncoder columnEncoder = ColumnEncoderFactory.get(dictionary.getEncoding()); + columnEncoder.writeColumn(output, dictionary); + + // ids + ReadWriteIOUtils.writeInts( + dictionaryColumn.getRawIds(), + dictionaryColumn.getRawIdsOffset(), + dictionaryColumn.getPositionCount(), + output); + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryId.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryId.java new file mode 100644 index 000000000..405d8a788 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryId.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.read.common.block.column; + +import java.util.UUID; +import java.util.concurrent.atomic.AtomicLong; + +public class DictionaryId { + private static final UUID nodeId = UUID.randomUUID(); + private static final AtomicLong sequenceGenerator = new AtomicLong(); + + private final long mostSignificantBits; + private final long leastSignificantBits; + private final long sequenceId; + + public static DictionaryId randomDictionaryId() { + return new DictionaryId( + nodeId.getMostSignificantBits(), + nodeId.getLeastSignificantBits(), + sequenceGenerator.getAndIncrement()); + } + + public DictionaryId(long mostSignificantBits, long leastSignificantBits, long sequenceId) { + this.mostSignificantBits = mostSignificantBits; + this.leastSignificantBits = leastSignificantBits; + this.sequenceId = sequenceId; + } + + public long getMostSignificantBits() { + return mostSignificantBits; + } + + public long getLeastSignificantBits() { + return leastSignificantBits; + } + + public long getSequenceId() { + return sequenceId; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + DictionaryId that = (DictionaryId) o; + return mostSignificantBits == that.mostSignificantBits + && leastSignificantBits == that.leastSignificantBits + && sequenceId == that.sequenceId; + } + + @Override + public int hashCode() { + int hashCode = 31 + Long.hashCode(mostSignificantBits); + hashCode = (hashCode * 31) + Long.hashCode(leastSignificantBits); + hashCode = (hashCode * 31) + Long.hashCode(sequenceId); + return hashCode; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java index c065d98d5..afc78208e 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java @@ -28,6 +28,8 @@ import java.util.Arrays; import java.util.Optional; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfDoubleArray; @@ -180,6 +182,34 @@ public Column subColumnCopy(int fromIndex) { return new DoubleColumn(0, length, valueIsNullCopy, valuesCopy); } + @Override + public Column getPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + return DictionaryColumn.createInternal( + offset, length, this, positions, DictionaryId.randomDictionaryId()); + } + + @Override + public Column copyPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + boolean[] newValueIsNull = null; + if (valueIsNull != null) { + newValueIsNull = new boolean[length]; + } + double[] newValues = new double[length]; + for (int i = 0; i < length; i++) { + int position = positions[offset + i]; + checkReadablePosition(this, position); + if (newValueIsNull != null) { + newValueIsNull[i] = valueIsNull[position + arrayOffset]; + } + newValues[i] = values[position + arrayOffset]; + } + return new DoubleColumn(0, length, newValueIsNull, newValues); + } + @Override public void reverse() { for (int i = arrayOffset, j = arrayOffset + positionCount - 1; i < j; i++, j--) { diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java index 563067368..98bcd90e5 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java @@ -28,6 +28,8 @@ import java.util.Arrays; import java.util.Optional; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfFloatArray; @@ -195,6 +197,34 @@ public void reverse() { } } + @Override + public Column getPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + return DictionaryColumn.createInternal( + offset, length, this, positions, DictionaryId.randomDictionaryId()); + } + + @Override + public Column copyPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + boolean[] newValueIsNull = null; + if (valueIsNull != null) { + newValueIsNull = new boolean[length]; + } + float[] newValues = new float[length]; + for (int i = 0; i < length; i++) { + int position = positions[offset + i]; + checkReadablePosition(this, position); + if (newValueIsNull != null) { + newValueIsNull[i] = valueIsNull[position + arrayOffset]; + } + newValues[i] = values[position + arrayOffset]; + } + return new FloatColumn(0, length, newValueIsNull, newValues); + } + @Override public int getInstanceSize() { return INSTANCE_SIZE; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java index 2c3c35c12..3e0caa91c 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java @@ -28,6 +28,8 @@ import java.util.Arrays; import java.util.Optional; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfIntArray; @@ -195,6 +197,34 @@ public void reverse() { } } + @Override + public Column getPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + return DictionaryColumn.createInternal( + offset, length, this, positions, DictionaryId.randomDictionaryId()); + } + + @Override + public Column copyPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + boolean[] newValueIsNull = null; + if (valueIsNull != null) { + newValueIsNull = new boolean[length]; + } + int[] newValues = new int[length]; + for (int i = 0; i < length; i++) { + int position = positions[offset + i]; + checkReadablePosition(this, position); + if (newValueIsNull != null) { + newValueIsNull[i] = valueIsNull[position + arrayOffset]; + } + newValues[i] = values[position + arrayOffset]; + } + return new IntColumn(0, length, newValueIsNull, newValues); + } + @Override public int getInstanceSize() { return INSTANCE_SIZE; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java index 1c022369d..bbbb06631 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java @@ -28,6 +28,8 @@ import java.util.Arrays; import java.util.Optional; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfLongArray; @@ -195,6 +197,34 @@ public void reverse() { } } + @Override + public Column getPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + return DictionaryColumn.createInternal( + offset, length, this, positions, DictionaryId.randomDictionaryId()); + } + + @Override + public Column copyPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + boolean[] newValueIsNull = null; + if (valueIsNull != null) { + newValueIsNull = new boolean[length]; + } + long[] newValues = new long[length]; + for (int i = 0; i < length; i++) { + int position = positions[offset + i]; + checkReadablePosition(this, position); + if (newValueIsNull != null) { + newValueIsNull[i] = valueIsNull[position + arrayOffset]; + } + newValues[i] = values[position + arrayOffset]; + } + return new LongColumn(0, length, newValueIsNull, newValues); + } + @Override public int getInstanceSize() { return INSTANCE_SIZE; diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java index 9b2a7bccd..9b999c412 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java @@ -25,6 +25,8 @@ import org.apache.tsfile.utils.RamUsageEstimator; import static java.util.Objects.requireNonNull; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; /** @@ -107,6 +109,23 @@ public Column subColumnCopy(int fromIndex) { return subColumn(fromIndex); } + @Override + public Column getPositions(int[] positions, int offset, int length) { + // cost of copyPositions is small, no need to transform to DictionaryColumn + return copyPositions(positions, offset, length); + } + + @Override + public Column copyPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + for (int position : positions) { + checkReadablePosition(this, position); + } + + return new NullColumn(length); + } + @Override public void reverse() { // do nothing diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java index d20cc4436..6a5465864 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java @@ -29,6 +29,8 @@ import java.util.Arrays; import static java.util.Objects.requireNonNull; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidPosition; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; public class RunLengthEncodedColumn implements Column { @@ -117,48 +119,6 @@ public boolean[] getBooleans() { return res; } - @Override - public int[] getInts() { - int[] res = new int[positionCount]; - Arrays.fill(res, value.getInt(0)); - return res; - } - - @Override - public long[] getLongs() { - long[] res = new long[positionCount]; - Arrays.fill(res, value.getLong(0)); - return res; - } - - @Override - public float[] getFloats() { - float[] res = new float[positionCount]; - Arrays.fill(res, value.getFloat(0)); - return res; - } - - @Override - public double[] getDoubles() { - double[] res = new double[positionCount]; - Arrays.fill(res, value.getDouble(0)); - return res; - } - - @Override - public Binary[] getBinaries() { - Binary[] res = new Binary[positionCount]; - Arrays.fill(res, value.getBinary(0)); - return res; - } - - @Override - public Object[] getObjects() { - Object[] res = new Object[positionCount]; - Arrays.fill(res, value.getObject(0)); - return res; - } - @Override public TsPrimitiveType getTsPrimitiveType(int position) { return value.getTsPrimitiveType(0); @@ -221,6 +181,26 @@ public Column subColumnCopy(int fromIndex) { return new RunLengthEncodedColumn(valueCopy, positionCount - fromIndex); } + @Override + public Column getPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + for (int i = offset; i < offset + length; i++) { + checkValidPosition(positions[i], positionCount); + } + return new RunLengthEncodedColumn(value, length); + } + + @Override + public Column copyPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + for (int i = offset; i < offset + length; i++) { + checkValidPosition(positions[i], positionCount); + } + return new RunLengthEncodedColumn(value.subColumnCopy(0), length); + } + @Override public void reverse() { // do nothing because the underlying column has only one value @@ -238,6 +218,8 @@ public void setPositionCount(int count) { @Override public void setNull(int start, int end) { - value.setNull(start, end); + throw new UnsupportedOperationException( + String.format( + "set null of %s is not supported !", RunLengthEncodedColumn.class.getSimpleName())); } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java index d681b079f..cce059d71 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java @@ -26,6 +26,8 @@ import java.util.Arrays; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange; +import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfLongArray; public class TimeColumn implements Column { @@ -159,6 +161,27 @@ public void reverse() { } } + @Override + public Column getPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + return DictionaryColumn.createInternal( + offset, length, this, positions, DictionaryId.randomDictionaryId()); + } + + @Override + public Column copyPositions(int[] positions, int offset, int length) { + checkArrayRange(positions, offset, length); + + long[] newValues = new long[length]; + for (int i = 0; i < length; i++) { + int position = positions[offset + i]; + checkReadablePosition(this, position); + newValues[i] = values[position + arrayOffset]; + } + return new TimeColumn(0, length, newValues); + } + public long getStartTime() { return values[arrayOffset]; } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java b/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java index 50aed1179..12728830e 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java @@ -1241,6 +1241,23 @@ public static Object readObject(ByteBuffer buffer) { } } + public static void writeInts(int[] ints, int offset, int length, OutputStream outputStream) + throws IOException { + write(length, outputStream); + for (int i = 0; i < length; i++) { + write(ints[offset + i], outputStream); + } + } + + public static int[] readInts(ByteBuffer buffer) { + int length = readInt(buffer); + int[] ints = new int[length]; + for (int i = 0; i < length; i++) { + ints[i] = readInt(buffer); + } + return ints; + } + public static ByteBuffer clone(ByteBuffer original) { ByteBuffer clone = ByteBuffer.allocate(original.remaining()); while (original.hasRemaining()) { diff --git a/java/tsfile/src/test/java/org/apache/tsfile/common/block/DictionaryColumnEncodingTest.java b/java/tsfile/src/test/java/org/apache/tsfile/common/block/DictionaryColumnEncodingTest.java new file mode 100644 index 000000000..3fd03a11c --- /dev/null +++ b/java/tsfile/src/test/java/org/apache/tsfile/common/block/DictionaryColumnEncodingTest.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.common.block; + +import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnBuilder; +import org.apache.tsfile.block.column.ColumnEncoding; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.read.common.block.column.ColumnEncoder; +import org.apache.tsfile.read.common.block.column.ColumnEncoderFactory; +import org.apache.tsfile.read.common.block.column.DictionaryColumn; +import org.apache.tsfile.read.common.block.column.LongColumn; +import org.apache.tsfile.read.common.block.column.LongColumnBuilder; + +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +public class DictionaryColumnEncodingTest { + @Test + public void testIntColumn() { + ColumnBuilder columnBuilder = new LongColumnBuilder(null, 10); + for (int i = 0; i < 10; i++) { + if (i == 5) { + columnBuilder.appendNull(); + } else { + columnBuilder.writeLong(i % 5); + } + } + Column originalColumn = columnBuilder.build(); + DictionaryColumn input = + (DictionaryColumn) originalColumn.getPositions(new int[] {1, 3, 5, 8, 9}, 1, 4); + + ColumnEncoder encoder = ColumnEncoderFactory.get(ColumnEncoding.DICTIONARY); + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(byteArrayOutputStream); + try { + encoder.writeColumn(dos, input); + } catch (IOException e) { + e.printStackTrace(); + Assert.fail(); + } + + ByteBuffer buffer = ByteBuffer.wrap(byteArrayOutputStream.toByteArray()); + Column output = encoder.readColumn(buffer, TSDataType.INT64, input.getPositionCount()); + Assert.assertTrue(output instanceof LongColumn); + + Assert.assertEquals(input.getPositionCount(), output.getPositionCount()); + Assert.assertTrue(output.mayHaveNull()); + Assert.assertEquals(3, output.getLong(0)); + Assert.assertTrue(output.isNull(1)); + Assert.assertEquals(3, output.getLong(2)); + Assert.assertEquals(4, output.getLong(3)); + } +} diff --git a/java/tsfile/src/test/java/org/apache/tsfile/read/common/ColumnTest.java b/java/tsfile/src/test/java/org/apache/tsfile/read/common/ColumnTest.java index 80ac44828..0cb3f2e78 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/read/common/ColumnTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/read/common/ColumnTest.java @@ -20,10 +20,12 @@ package org.apache.tsfile.read.common; import org.apache.tsfile.block.column.Column; +import org.apache.tsfile.block.column.ColumnBuilder; import org.apache.tsfile.read.common.block.column.BinaryColumn; import org.apache.tsfile.read.common.block.column.BinaryColumnBuilder; import org.apache.tsfile.read.common.block.column.BooleanColumn; import org.apache.tsfile.read.common.block.column.BooleanColumnBuilder; +import org.apache.tsfile.read.common.block.column.DictionaryColumn; import org.apache.tsfile.read.common.block.column.DoubleColumn; import org.apache.tsfile.read.common.block.column.DoubleColumnBuilder; import org.apache.tsfile.read.common.block.column.FloatColumn; @@ -41,6 +43,8 @@ import org.junit.Assert; import org.junit.Test; +import java.util.Optional; + public class ColumnTest { @Test @@ -83,6 +87,27 @@ public void timeColumnSubColumnCopyTest() { Assert.assertNotSame(timeColumn1.getLongs(), timeColumn2.getLongs()); } + @Test + public void timeColumnFilterPositionsTest() { + ColumnBuilder columnBuilder = new TimeColumnBuilder(null, 10); + for (int i = 0; i < 10; i++) { + columnBuilder.writeLong(i); + } + Column originalColumn = columnBuilder.build(); + + int[] selectedPositions = new int[] {1, 3, 5}; + Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2); + Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn); + Assert.assertEquals(2, columnByGetPositions.getPositionCount()); + Assert.assertEquals(3, columnByGetPositions.getLong(0)); + Assert.assertEquals(5, columnByGetPositions.getLong(1)); + + Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2); + Assert.assertEquals(2, columnByCopyPositions.getPositionCount()); + Assert.assertEquals(3, columnByCopyPositions.getLong(0)); + Assert.assertEquals(5, columnByCopyPositions.getLong(1)); + } + @Test public void binaryColumnSubColumnTest() { BinaryColumnBuilder columnBuilder = new BinaryColumnBuilder(null, 10); @@ -123,6 +148,27 @@ public void binaryColumnSubColumnCopyTest() { Assert.assertNotSame(binaryColumn1.getBinaries(), binaryColumn2.getBinaries()); } + @Test + public void binaryColumnFilterPositionsTest() { + ColumnBuilder columnBuilder = new BinaryColumnBuilder(null, 10); + for (int i = 0; i < 10; i++) { + columnBuilder.writeBinary(BytesUtils.valueOf(String.valueOf(i))); + } + Column originalColumn = columnBuilder.build(); + + int[] selectedPositions = new int[] {1, 3, 5}; + Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2); + Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn); + Assert.assertEquals(2, columnByGetPositions.getPositionCount()); + Assert.assertEquals("3", columnByGetPositions.getBinary(0).toString()); + Assert.assertEquals("5", columnByGetPositions.getBinary(1).toString()); + + Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2); + Assert.assertEquals(2, columnByCopyPositions.getPositionCount()); + Assert.assertEquals("3", columnByCopyPositions.getBinary(0).toString()); + Assert.assertEquals("5", columnByCopyPositions.getBinary(1).toString()); + } + @Test public void booleanColumnSubColumnTest() { BooleanColumnBuilder columnBuilder = new BooleanColumnBuilder(null, 10); @@ -165,6 +211,27 @@ public void booleanColumnSubColumnCopyTest() { Assert.assertNotSame(booleanColumn1.getBooleans(), booleanColumn2.getBooleans()); } + @Test + public void booleanColumnFilterPositionsTest() { + ColumnBuilder columnBuilder = new BooleanColumnBuilder(null, 10); + for (int i = 0; i < 10; i++) { + columnBuilder.writeBoolean(i % 2 == 0); + } + Column originalColumn = columnBuilder.build(); + + int[] selectedPositions = new int[] {1, 3, 6}; + Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2); + Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn); + Assert.assertEquals(2, columnByGetPositions.getPositionCount()); + Assert.assertFalse(columnByGetPositions.getBoolean(0)); + Assert.assertTrue(columnByGetPositions.getBoolean(1)); + + Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2); + Assert.assertEquals(2, columnByCopyPositions.getPositionCount()); + Assert.assertFalse(columnByCopyPositions.getBoolean(0)); + Assert.assertTrue(columnByCopyPositions.getBoolean(1)); + } + @Test public void doubleColumnSubColumnTest() { DoubleColumnBuilder columnBuilder = new DoubleColumnBuilder(null, 10); @@ -205,6 +272,27 @@ public void doubleColumnSubColumnCopyTest() { Assert.assertNotSame(doubleColumn1.getDoubles(), doubleColumn2.getDoubles()); } + @Test + public void doubleColumnFilterPositionsTest() { + ColumnBuilder columnBuilder = new DoubleColumnBuilder(null, 10); + for (int i = 0; i < 10; i++) { + columnBuilder.writeDouble(i); + } + Column originalColumn = columnBuilder.build(); + + int[] selectedPositions = new int[] {1, 3, 5}; + Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2); + Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn); + Assert.assertEquals(2, columnByGetPositions.getPositionCount()); + Assert.assertEquals(3, columnByGetPositions.getDouble(0), 0.01); + Assert.assertEquals(5, columnByGetPositions.getDouble(1), 0.01); + + Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2); + Assert.assertEquals(2, columnByCopyPositions.getPositionCount()); + Assert.assertEquals(3, columnByCopyPositions.getDouble(0), 0.01); + Assert.assertEquals(5, columnByCopyPositions.getDouble(1), 0.01); + } + @Test public void floatColumnSubColumnTest() { FloatColumnBuilder columnBuilder = new FloatColumnBuilder(null, 10); @@ -245,6 +333,28 @@ public void floatColumnSubColumnCopyTest() { Assert.assertNotSame(floatColumn1.getFloats(), floatColumn2.getFloats()); } + @Test + public void floatColumnFilterPositionsTest() { + ColumnBuilder columnBuilder = new FloatColumnBuilder(null, 10); + for (int i = 0; i < 10; i++) { + columnBuilder.writeFloat(i); + } + Column originalColumn = columnBuilder.build(); + + int[] selectedPositions = new int[] {1, 3, 5}; + Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2); + Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn); + Assert.assertEquals(2, columnByGetPositions.getPositionCount()); + Assert.assertEquals(3, columnByGetPositions.getFloat(0), 0.01); + Assert.assertEquals(5, columnByGetPositions.getFloat(1), 0.01); + + Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2); + Assert.assertNotSame(originalColumn.getFloats(), columnByCopyPositions.getFloats()); + Assert.assertEquals(2, columnByCopyPositions.getPositionCount()); + Assert.assertEquals(3, columnByCopyPositions.getFloat(0), 0.01); + Assert.assertEquals(5, columnByCopyPositions.getFloat(1), 0.01); + } + @Test public void intColumnSubColumnTest() { IntColumnBuilder columnBuilder = new IntColumnBuilder(null, 10); @@ -285,6 +395,27 @@ public void intColumnSubColumnCopyTest() { Assert.assertNotSame(intColumn1.getInts(), intColumn2.getInts()); } + @Test + public void intColumnFilterPositionsTest() { + ColumnBuilder columnBuilder = new IntColumnBuilder(null, 10); + for (int i = 0; i < 10; i++) { + columnBuilder.writeInt(i); + } + Column originalColumn = columnBuilder.build(); + + int[] selectedPositions = new int[] {1, 3, 5}; + Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2); + Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn); + Assert.assertEquals(2, columnByGetPositions.getPositionCount()); + Assert.assertEquals(3, columnByGetPositions.getInt(0)); + Assert.assertEquals(5, columnByGetPositions.getInt(1)); + + Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2); + Assert.assertEquals(2, columnByCopyPositions.getPositionCount()); + Assert.assertEquals(3, columnByCopyPositions.getInt(0)); + Assert.assertEquals(5, columnByCopyPositions.getInt(1)); + } + @Test public void longColumnSubColumnTest() { LongColumnBuilder columnBuilder = new LongColumnBuilder(null, 10); @@ -325,6 +456,27 @@ public void longColumnSubColumnCopyTest() { Assert.assertNotSame(longColumn1.getLongs(), longColumn2.getLongs()); } + @Test + public void longColumnFilterPositionsTest() { + ColumnBuilder columnBuilder = new LongColumnBuilder(null, 10); + for (int i = 0; i < 10; i++) { + columnBuilder.writeLong(i); + } + Column originalColumn = columnBuilder.build(); + + int[] selectedPositions = new int[] {1, 3, 5}; + Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2); + Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn); + Assert.assertEquals(2, columnByGetPositions.getPositionCount()); + Assert.assertEquals(3, columnByGetPositions.getLong(0)); + Assert.assertEquals(5, columnByGetPositions.getLong(1)); + + Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2); + Assert.assertEquals(2, columnByCopyPositions.getPositionCount()); + Assert.assertEquals(3, columnByCopyPositions.getLong(0)); + Assert.assertEquals(5, columnByCopyPositions.getLong(1)); + } + @Test public void nullColumnTest() { NullColumn nullColumn = new NullColumn(10); @@ -335,6 +487,19 @@ public void nullColumnTest() { Assert.assertEquals(1, subColumn.getPositionCount()); } + @Test + public void nullColumnFilterPositionsTest() { + Column originalColumn = new NullColumn(10); + + int[] selectedPositions = new int[] {1, 3, 5}; + Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2); + Assert.assertTrue(columnByGetPositions instanceof NullColumn); + Assert.assertEquals(2, columnByGetPositions.getPositionCount()); + + Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2); + Assert.assertEquals(2, columnByCopyPositions.getPositionCount()); + } + @Test public void runLengthEncodedColumnSubColumnTest() { LongColumnBuilder longColumnBuilder = new LongColumnBuilder(null, 1); @@ -366,4 +531,78 @@ public void runLengthEncodedColumnSubColumnCopyTest() { Assert.assertEquals(1, column.getLong(0)); Assert.assertEquals(1, column.getLong(1)); } + + @Test + public void runLengthEncodedColumnFilterPositionsTest() { + Column originalColumn = + new RunLengthEncodedColumn(new LongColumn(1, Optional.empty(), new long[] {1L}), 10); + + int[] selectedPositions = new int[] {1, 3, 5}; + Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2); + Assert.assertTrue(columnByGetPositions instanceof RunLengthEncodedColumn); + Assert.assertEquals(2, columnByGetPositions.getPositionCount()); + Assert.assertEquals(1, columnByGetPositions.getLong(0)); + Assert.assertEquals(1, columnByGetPositions.getLong(1)); + + Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2); + Assert.assertEquals(2, columnByCopyPositions.getPositionCount()); + Assert.assertEquals(1, columnByCopyPositions.getLong(0)); + Assert.assertEquals(1, columnByCopyPositions.getLong(1)); + } + + @Test + public void dictionaryColumnTest() { + ColumnBuilder columnBuilder = new LongColumnBuilder(null, 10); + for (int i = 0; i < 10; i++) { + columnBuilder.writeLong(i % 5); + } + Column originalColumn = columnBuilder.build(); + DictionaryColumn dictionaryColumn = + (DictionaryColumn) originalColumn.getPositions(new int[] {1, 3, 5, 8, 9}, 1, 4); + Assert.assertEquals(3, dictionaryColumn.getLong(0)); + Assert.assertEquals(0, dictionaryColumn.getLong(1)); + Assert.assertEquals(3, dictionaryColumn.getLong(2)); + Assert.assertEquals(4, dictionaryColumn.getLong(3)); + + int[] selectedPositions = new int[] {0, 2}; + Column columnByGetPositions = dictionaryColumn.getPositions(selectedPositions, 0, 2); + Assert.assertEquals(2, columnByGetPositions.getPositionCount()); + Assert.assertEquals(3, columnByGetPositions.getLong(0)); + Assert.assertEquals(3, columnByGetPositions.getLong(1)); + + Column columnByCopyPositions = dictionaryColumn.copyPositions(selectedPositions, 0, 2); + Assert.assertEquals(2, columnByCopyPositions.getPositionCount()); + Assert.assertEquals(3, columnByCopyPositions.getLong(0)); + Assert.assertEquals(3, columnByCopyPositions.getLong(1)); + + Column columnByGetRegion = dictionaryColumn.getRegion(1, 2); + Assert.assertEquals(2, columnByGetRegion.getPositionCount()); + Assert.assertEquals(0, columnByGetRegion.getLong(0)); + Assert.assertEquals(3, columnByGetRegion.getLong(1)); + + Column columnByGetRegionCopy = dictionaryColumn.getRegionCopy(1, 2); + Assert.assertEquals(2, columnByGetRegionCopy.getPositionCount()); + Assert.assertEquals(0, columnByGetRegionCopy.getLong(0)); + Assert.assertEquals(3, columnByGetRegionCopy.getLong(1)); + + dictionaryColumn.reverse(); + Assert.assertEquals(4, dictionaryColumn.getLong(0)); + Assert.assertEquals(3, dictionaryColumn.getLong(1)); + Assert.assertEquals(0, dictionaryColumn.getLong(2)); + Assert.assertEquals(3, dictionaryColumn.getLong(3)); + + try { + dictionaryColumn.setNull(0, 1); + Assert.fail(); + } catch (Exception e) { + // Ignore + } + + try { + dictionaryColumn.getLongs(); + Assert.fail(); + } catch (Exception e) { + // Ignore + } + } }