diff --git a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java
index f92d3b5c0..e45664634 100644
--- a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java
+++ b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java
@@ -170,6 +170,23 @@ default TsPrimitiveType getTsPrimitiveType(int position) {
/** This method will create a copy of origin column with different array offset. */
Column subColumnCopy(int fromIndex);
+ /**
+ * Create a new colum from the current colum by keeping the same elements only with respect to
+ * {@code positions} that starts at {@code offset} and has length of {@code length}. The
+ * implementation may return a view over the data in this colum or may return a copy, and the
+ * implementation is allowed to retain the positions array for use in the view.
+ */
+ Column getPositions(int[] positions, int offset, int length);
+
+ /**
+ * Returns a column containing the specified positions. Positions to copy are stored in a subarray
+ * within {@code positions} array that starts at {@code offset} and has length of {@code length}.
+ * All specified positions must be valid for this block.
+ *
+ *
The returned column must be a compact representation of the original column.
+ */
+ Column copyPositions(int[] positions, int offset, int length);
+
/** reverse the column */
void reverse();
diff --git a/java/common/src/main/java/org/apache/tsfile/block/column/ColumnEncoding.java b/java/common/src/main/java/org/apache/tsfile/block/column/ColumnEncoding.java
index 273836836..fbf0b350f 100644
--- a/java/common/src/main/java/org/apache/tsfile/block/column/ColumnEncoding.java
+++ b/java/common/src/main/java/org/apache/tsfile/block/column/ColumnEncoding.java
@@ -33,7 +33,9 @@ public enum ColumnEncoding {
/** TEXT. */
BINARY_ARRAY((byte) 3),
/** All data types. */
- RLE((byte) 4);
+ RLE((byte) 4),
+ /** All data types. */
+ DICTIONARY((byte) 5);
private final byte value;
@@ -61,6 +63,8 @@ private static ColumnEncoding getColumnEncoding(byte value) {
return BINARY_ARRAY;
case 4:
return RLE;
+ case 5:
+ return DICTIONARY;
default:
throw new IllegalArgumentException("Invalid value: " + value);
}
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java
index 0be88efe2..ec36fc4f1 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java
@@ -29,6 +29,8 @@
import java.util.Arrays;
import java.util.Optional;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfObjectArray;
@@ -197,6 +199,34 @@ public void reverse() {
}
}
+ @Override
+ public Column getPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ return DictionaryColumn.createInternal(
+ offset, length, this, positions, DictionaryId.randomDictionaryId());
+ }
+
+ @Override
+ public Column copyPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ boolean[] newValueIsNull = null;
+ if (valueIsNull != null) {
+ newValueIsNull = new boolean[length];
+ }
+ Binary[] newValues = new Binary[length];
+ for (int i = 0; i < length; i++) {
+ int position = positions[offset + i];
+ checkReadablePosition(this, position);
+ if (newValueIsNull != null) {
+ newValueIsNull[i] = valueIsNull[position + arrayOffset];
+ }
+ newValues[i] = values[position + arrayOffset];
+ }
+ return new BinaryColumn(0, length, newValueIsNull, newValues);
+ }
+
@Override
public int getInstanceSize() {
return INSTANCE_SIZE;
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java
index d534a845a..76b3fb6d7 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java
@@ -28,6 +28,8 @@
import java.util.Arrays;
import java.util.Optional;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray;
@@ -195,6 +197,34 @@ public void reverse() {
}
}
+ @Override
+ public Column getPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ return DictionaryColumn.createInternal(
+ offset, length, this, positions, DictionaryId.randomDictionaryId());
+ }
+
+ @Override
+ public Column copyPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ boolean[] newValueIsNull = null;
+ if (valueIsNull != null) {
+ newValueIsNull = new boolean[length];
+ }
+ boolean[] newValues = new boolean[length];
+ for (int i = 0; i < length; i++) {
+ int position = positions[offset + i];
+ checkReadablePosition(this, position);
+ if (newValueIsNull != null) {
+ newValueIsNull[i] = valueIsNull[position + arrayOffset];
+ }
+ newValues[i] = values[position + arrayOffset];
+ }
+ return new BooleanColumn(0, length, newValueIsNull, newValues);
+ }
+
@Override
public int getInstanceSize() {
return INSTANCE_SIZE;
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoderFactory.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoderFactory.java
index 71442e8b6..160864b59 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoderFactory.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnEncoderFactory.java
@@ -38,6 +38,7 @@ private ColumnEncoderFactory() {
encodingToEncoder.put(ColumnEncoding.BYTE_ARRAY, new ByteArrayColumnEncoder());
encodingToEncoder.put(ColumnEncoding.BINARY_ARRAY, new BinaryArrayColumnEncoder());
encodingToEncoder.put(ColumnEncoding.RLE, new RunLengthColumnEncoder());
+ encodingToEncoder.put(ColumnEncoding.DICTIONARY, new DictionaryColumnEncoder());
}
public static ColumnEncoder get(ColumnEncoding columnEncoding) {
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnUtil.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnUtil.java
index 1549b0dc1..fd5905fa9 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnUtil.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/ColumnUtil.java
@@ -19,6 +19,10 @@
package org.apache.tsfile.read.common.block.column;
+import org.apache.tsfile.block.column.Column;
+
+import java.util.Arrays;
+
import static java.lang.Math.ceil;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
@@ -68,6 +72,17 @@ static void checkValidPosition(int position, int positionCount) {
}
}
+ static void checkReadablePosition(Column column, int position) {
+ checkValidPosition(position, column.getPositionCount());
+ }
+
+ static int[] compactArray(int[] array, int index, int length) {
+ if (index == 0 && length == array.length) {
+ return array;
+ }
+ return Arrays.copyOfRange(array, index, index + length);
+ }
+
static int calculateNewArraySize(int currentSize) {
// grow array by 50%
long newSize = (long) currentSize + (currentSize >> 1);
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java
new file mode 100644
index 000000000..13dbd2261
--- /dev/null
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java
@@ -0,0 +1,600 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tsfile.read.common.block.column;
+
+import org.apache.tsfile.block.column.Column;
+import org.apache.tsfile.block.column.ColumnEncoding;
+import org.apache.tsfile.enums.TSDataType;
+import org.apache.tsfile.utils.Binary;
+import org.apache.tsfile.utils.RamUsageEstimator;
+import org.apache.tsfile.utils.TsPrimitiveType;
+
+import org.apache.commons.lang3.ArrayUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import static java.lang.Math.min;
+import static java.util.Objects.requireNonNull;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidPosition;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.compactArray;
+import static org.apache.tsfile.read.common.block.column.DictionaryId.randomDictionaryId;
+import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf;
+
+public final class DictionaryColumn implements Column {
+ private static final int INSTANCE_SIZE =
+ (int) RamUsageEstimator.shallowSizeOfInstance(DictionaryColumn.class)
+ + (int) RamUsageEstimator.shallowSizeOfInstance(DictionaryId.class);
+
+ private int positionCount;
+ private final Column dictionary;
+ private final int idsOffset;
+ private final int[] ids;
+ private final long retainedSizeInBytes;
+ private volatile int uniqueIds = -1;
+ // isSequentialIds is only valid when uniqueIds is computed
+ private volatile boolean isSequentialIds;
+ private final DictionaryId dictionarySourceId;
+ private final boolean mayHaveNull;
+
+ public static Column create(int positionCount, Column dictionary, int[] ids) {
+ return createInternal(0, positionCount, dictionary, ids, randomDictionaryId());
+ }
+
+ /** This should not only be used when creating a projection of another dictionary Column. */
+ public static Column createProjectedDictionaryColumn(
+ int positionCount, Column dictionary, int[] ids, DictionaryId dictionarySourceId) {
+ return createInternal(0, positionCount, dictionary, ids, dictionarySourceId);
+ }
+
+ static Column createInternal(
+ int idsOffset,
+ int positionCount,
+ Column dictionary,
+ int[] ids,
+ DictionaryId dictionarySourceId) {
+ if (positionCount == 0) {
+ return dictionary.getRegionCopy(0, 0);
+ }
+ if (positionCount == 1) {
+ return dictionary.getRegion(ids[idsOffset], 1);
+ }
+
+ // if dictionary is an RLE then this can just be a new RLE
+ if (dictionary instanceof RunLengthEncodedColumn) {
+ RunLengthEncodedColumn rle = (RunLengthEncodedColumn) dictionary;
+ return new RunLengthEncodedColumn(rle.getValue(), positionCount);
+ }
+
+ if (dictionary instanceof DictionaryColumn) {
+ DictionaryColumn dictionaryColumn = (DictionaryColumn) dictionary;
+ // unwrap dictionary in dictionary
+ int[] newIds = new int[positionCount];
+ for (int position = 0; position < positionCount; position++) {
+ newIds[position] = dictionaryColumn.getId(ids[idsOffset + position]);
+ }
+ return new DictionaryColumn(
+ 0,
+ positionCount,
+ dictionaryColumn.getDictionary(),
+ newIds,
+ false,
+ false,
+ randomDictionaryId());
+ }
+
+ return new DictionaryColumn(
+ idsOffset, positionCount, dictionary, ids, false, false, dictionarySourceId);
+ }
+
+ DictionaryColumn(
+ int idsOffset,
+ int positionCount,
+ Column dictionary,
+ int[] ids,
+ boolean dictionaryIsCompacted,
+ boolean isSequentialIds,
+ DictionaryId dictionarySourceId) {
+ requireNonNull(dictionary, "dictionary is null");
+ requireNonNull(ids, "ids is null");
+
+ if (positionCount < 0) {
+ throw new IllegalArgumentException("positionCount is negative");
+ }
+
+ this.idsOffset = idsOffset;
+ if (ids.length - idsOffset < positionCount) {
+ throw new IllegalArgumentException("ids length is less than positionCount");
+ }
+
+ this.positionCount = positionCount;
+ this.dictionary = dictionary;
+ this.ids = ids;
+ this.dictionarySourceId = requireNonNull(dictionarySourceId, "dictionarySourceId is null");
+ this.retainedSizeInBytes = INSTANCE_SIZE + sizeOf(ids);
+ // avoid eager loading of lazy dictionaries
+ this.mayHaveNull = positionCount > 0 && dictionary.mayHaveNull();
+
+ if (dictionaryIsCompacted) {
+ this.uniqueIds = dictionary.getPositionCount();
+ }
+
+ if (isSequentialIds && !dictionaryIsCompacted) {
+ throw new IllegalArgumentException(
+ "sequential ids flag is only valid for compacted dictionary");
+ }
+ this.isSequentialIds = isSequentialIds;
+ }
+
+ public int[] getRawIds() {
+ return ids;
+ }
+
+ public int getRawIdsOffset() {
+ return idsOffset;
+ }
+
+ @Override
+ public int getPositionCount() {
+ return positionCount;
+ }
+
+ private void calculateCompactSize() {
+ int uniqueIds = 0;
+ boolean[] used = new boolean[dictionary.getPositionCount()];
+ // nested dictionaries are assumed not to have sequential ids
+ boolean isSequentialIds = true;
+ int previousPosition = -1;
+ for (int i = 0; i < positionCount; i++) {
+ int position = ids[idsOffset + i];
+ // Avoid branching
+ uniqueIds += used[position] ? 0 : 1;
+ used[position] = true;
+ if (isSequentialIds) {
+ // this branch is predictable and will switch paths at most once while looping
+ isSequentialIds = previousPosition < position;
+ previousPosition = position;
+ }
+ }
+
+ this.uniqueIds = uniqueIds;
+ this.isSequentialIds = isSequentialIds;
+ }
+
+ @Override
+ public long getRetainedSizeInBytes() {
+ return retainedSizeInBytes + dictionary.getRetainedSizeInBytes();
+ }
+
+ @Override
+ public Column getRegion(int positionOffset, int length) {
+ checkValidRegion(positionCount, positionOffset, length);
+
+ if (length == positionCount) {
+ return this;
+ }
+
+ return new DictionaryColumn(
+ idsOffset + positionOffset, length, dictionary, ids, false, false, dictionarySourceId);
+ }
+
+ @Override
+ public Column getRegionCopy(int position, int length) {
+ checkValidRegion(positionCount, position, length);
+ if (length == 0) {
+ // explicit support for case when length == 0 which might otherwise fail
+ // on getId(position) if position == positionCount
+ return dictionary.getRegionCopy(0, 0);
+ }
+ // Avoid repeated volatile reads to the uniqueIds field
+ int uniqueIds = this.uniqueIds;
+ if (length <= 1 || (uniqueIds == dictionary.getPositionCount() && isSequentialIds)) {
+ // copy the contiguous range directly via copyRegion
+ return dictionary.getRegionCopy(getId(position), length);
+ }
+ if (uniqueIds == positionCount) {
+ // each Column position is unique or the dictionary is a nested dictionary Column,
+ // therefore it makes sense to unwrap this outer dictionary layer directly
+ return dictionary.copyPositions(ids, idsOffset + position, length);
+ }
+ int[] newIds = compactArray(ids, idsOffset + position, length);
+ if (newIds == ids) {
+ return this;
+ }
+ return new DictionaryColumn(0, length, dictionary, newIds, false, false, randomDictionaryId())
+ .compact();
+ }
+
+ @Override
+ public Column subColumn(int fromIndex) {
+ return getRegion(fromIndex, positionCount - fromIndex);
+ }
+
+ @Override
+ public Column subColumnCopy(int fromIndex) {
+ return getRegionCopy(fromIndex, positionCount - fromIndex);
+ }
+
+ @Override
+ public TSDataType getDataType() {
+ return dictionary.getDataType();
+ }
+
+ @Override
+ public ColumnEncoding getEncoding() {
+ return ColumnEncoding.DICTIONARY;
+ }
+
+ @Override
+ public boolean mayHaveNull() {
+ return mayHaveNull && dictionary.mayHaveNull();
+ }
+
+ @Override
+ public boolean isNull(int position) {
+ if (!mayHaveNull) {
+ return false;
+ }
+ checkValidPosition(position, positionCount);
+ return dictionary.isNull(getIdUnchecked(position));
+ }
+
+ @Override
+ public boolean[] isNull() {
+ boolean[] original = dictionary.isNull();
+ boolean[] res = new boolean[positionCount];
+
+ if (original == null) {
+ Arrays.fill(res, false);
+ return res;
+ }
+
+ for (int i = 0; i < positionCount; i++) {
+ int position = ids[idsOffset + i];
+ res[i] = dictionary.isNull(position);
+ }
+ return res;
+ }
+
+ @Override
+ public void setNull(int start, int end) {
+ throw new UnsupportedOperationException(getClass().getSimpleName());
+ }
+
+ @Override
+ public Column getPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ int[] newIds = new int[length];
+ boolean isCompact = length >= dictionary.getPositionCount() && isCompact();
+ boolean[] usedIds = isCompact ? new boolean[dictionary.getPositionCount()] : null;
+ int uniqueIds = 0;
+ for (int i = 0; i < length; i++) {
+ int id = getId(positions[offset + i]);
+ newIds[i] = id;
+ if (usedIds != null) {
+ uniqueIds += usedIds[id] ? 0 : 1;
+ usedIds[id] = true;
+ }
+ }
+ // All positions must have been referenced in order to be compact
+ isCompact &= (usedIds != null && usedIds.length == uniqueIds);
+ DictionaryColumn result =
+ new DictionaryColumn(
+ 0, newIds.length, dictionary, newIds, isCompact, false, getDictionarySourceId());
+ if (usedIds != null && !isCompact) {
+ // resulting dictionary is not compact, but we know the number of unique ids and which
+ // positions are used
+ result.uniqueIds = uniqueIds;
+ }
+ return result;
+ }
+
+ @Override
+ public Column copyPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ if (length <= 1 || uniqueIds == positionCount) {
+ // each block position is unique or the dictionary is a nested dictionary block,
+ // therefore it makes sense to unwrap this outer dictionary layer directly
+ int[] positionsToCopy = new int[length];
+ for (int i = 0; i < length; i++) {
+ positionsToCopy[i] = getId(positions[offset + i]);
+ }
+ return dictionary.copyPositions(positionsToCopy, 0, length);
+ }
+
+ List positionsToCopy = new ArrayList<>();
+ HashMap oldIndexToNewIndex =
+ new HashMap<>(min(length, dictionary.getPositionCount()));
+ int[] newIds = new int[length];
+
+ for (int i = 0; i < length; i++) {
+ int position = positions[offset + i];
+ int oldIndex = getId(position);
+ Integer newId = oldIndexToNewIndex.putIfAbsent(oldIndex, positionsToCopy.size());
+ if (newId == null) {
+ newId = positionsToCopy.size();
+ positionsToCopy.add(oldIndex);
+ }
+ newIds[i] = newId;
+ }
+ Column compactDictionary =
+ dictionary.copyPositions(
+ ArrayUtils.toPrimitive(positionsToCopy.toArray(new Integer[0])),
+ 0,
+ positionsToCopy.size());
+ if (positionsToCopy.size() == length) {
+ // discovered that all positions are unique, so return the unwrapped underlying dictionary
+ // directly
+ return compactDictionary;
+ }
+ return new DictionaryColumn(
+ 0,
+ length,
+ compactDictionary,
+ newIds,
+ true, // new dictionary is compact
+ false,
+ randomDictionaryId());
+ }
+
+ @Override
+ public void reverse() {
+ int currIndex = idsOffset;
+ int endIndex = ids.length - 1;
+ while (currIndex < endIndex) {
+ int temp = ids[currIndex];
+ ids[currIndex] = ids[endIndex];
+ ids[endIndex] = temp;
+
+ currIndex++;
+ endIndex--;
+ }
+ }
+
+ @Override
+ public int getInstanceSize() {
+ return INSTANCE_SIZE;
+ }
+
+ @Override
+ public void setPositionCount(int count) {
+ positionCount = count;
+ }
+
+ @Override
+ public boolean getBoolean(int position) {
+ return dictionary.getBoolean(getId(position));
+ }
+
+ @Override
+ public int getInt(int position) {
+ return dictionary.getInt(getId(position));
+ }
+
+ @Override
+ public long getLong(int position) {
+ return dictionary.getLong(getId(position));
+ }
+
+ @Override
+ public float getFloat(int position) {
+ return dictionary.getFloat(getId(position));
+ }
+
+ @Override
+ public double getDouble(int position) {
+ return dictionary.getDouble(getId(position));
+ }
+
+ @Override
+ public Binary getBinary(int position) {
+ return dictionary.getBinary(getId(position));
+ }
+
+ @Override
+ public Object getObject(int position) {
+ return dictionary.getObject(getId(position));
+ }
+
+ @Override
+ public TsPrimitiveType getTsPrimitiveType(int position) {
+ return dictionary.getTsPrimitiveType(position);
+ }
+
+ @Override
+ public void reset() {
+ throw new UnsupportedOperationException(getClass().getSimpleName());
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("DictionaryColumn{");
+ sb.append("positionCount=").append(getPositionCount());
+ sb.append('}');
+ return sb.toString();
+ }
+
+ private Column getUnderlyingColumn() {
+ return dictionary;
+ }
+
+ private int getUnderlyingValuePosition(int position) {
+ return getId(position);
+ }
+
+ public Column getDictionary() {
+ return dictionary;
+ }
+
+ public int getId(int position) {
+ checkValidPosition(position, positionCount);
+ return getIdUnchecked(position);
+ }
+
+ private int getIdUnchecked(int position) {
+ return ids[position + idsOffset];
+ }
+
+ public DictionaryId getDictionarySourceId() {
+ return dictionarySourceId;
+ }
+
+ public boolean isCompact() {
+ if (uniqueIds == -1) {
+ calculateCompactSize();
+ }
+ return uniqueIds == dictionary.getPositionCount();
+ }
+
+ public DictionaryColumn compact() {
+ if (isCompact()) {
+ return this;
+ }
+
+ // determine which dictionary entries are referenced and build a reindex for them
+ int dictionarySize = dictionary.getPositionCount();
+ List dictionaryPositionsToCopy = new ArrayList<>(min(dictionarySize, positionCount));
+ int[] remapIndex = new int[dictionarySize];
+ Arrays.fill(remapIndex, -1);
+
+ int newIndex = 0;
+ for (int i = 0; i < positionCount; i++) {
+ int dictionaryIndex = getId(i);
+ if (remapIndex[dictionaryIndex] == -1) {
+ dictionaryPositionsToCopy.add(dictionaryIndex);
+ remapIndex[dictionaryIndex] = newIndex;
+ newIndex++;
+ }
+ }
+
+ // entire dictionary is referenced
+ if (dictionaryPositionsToCopy.size() == dictionarySize) {
+ return this;
+ }
+
+ // compact the dictionary
+ int[] newIds = new int[positionCount];
+ for (int i = 0; i < positionCount; i++) {
+ int newId = remapIndex[getId(i)];
+ if (newId == -1) {
+ throw new IllegalStateException("reference to a non-existent key");
+ }
+ newIds[i] = newId;
+ }
+ try {
+ Column compactDictionary =
+ dictionary.copyPositions(
+ ArrayUtils.toPrimitive(dictionaryPositionsToCopy.toArray(new Integer[0])),
+ 0,
+ dictionaryPositionsToCopy.size());
+ return new DictionaryColumn(
+ 0,
+ positionCount,
+ compactDictionary,
+ newIds,
+ true,
+ // Copied dictionary positions match ids sequence. Therefore new
+ // compact dictionary block has sequential ids only if single position
+ // is not used more than once.
+ uniqueIds == positionCount,
+ randomDictionaryId());
+ } catch (UnsupportedOperationException e) {
+ // ignore if copy positions is not supported for the dictionary block
+ return this;
+ }
+ }
+
+ /**
+ * Compact the dictionary down to only the used positions for a set of Columns that have been
+ * projected from the same dictionary.
+ */
+ public static List compactRelatedColumns(List Columns) {
+ DictionaryColumn firstDictionaryColumn = Columns.get(0);
+ Column dictionary = firstDictionaryColumn.getDictionary();
+
+ int positionCount = firstDictionaryColumn.getPositionCount();
+ int dictionarySize = dictionary.getPositionCount();
+
+ // determine which dictionary entries are referenced and build a reindex for them
+ int[] dictionaryPositionsToCopy = new int[min(dictionarySize, positionCount)];
+ int[] remapIndex = new int[dictionarySize];
+ Arrays.fill(remapIndex, -1);
+
+ int numberOfIndexes = 0;
+ for (int i = 0; i < positionCount; i++) {
+ int position = firstDictionaryColumn.getId(i);
+ if (remapIndex[position] == -1) {
+ dictionaryPositionsToCopy[numberOfIndexes] = position;
+ remapIndex[position] = numberOfIndexes;
+ numberOfIndexes++;
+ }
+ }
+
+ // entire dictionary is referenced
+ if (numberOfIndexes == dictionarySize) {
+ return Columns;
+ }
+
+ // compact the dictionaries
+ int[] newIds = getNewIds(positionCount, firstDictionaryColumn, remapIndex);
+ List outputDictionaryColumns = new ArrayList<>(Columns.size());
+ DictionaryId newDictionaryId = randomDictionaryId();
+ for (DictionaryColumn dictionaryColumn : Columns) {
+ if (!firstDictionaryColumn
+ .getDictionarySourceId()
+ .equals(dictionaryColumn.getDictionarySourceId())) {
+ throw new IllegalArgumentException("dictionarySourceIds must be the same");
+ }
+
+ try {
+ Column compactDictionary =
+ dictionaryColumn
+ .getDictionary()
+ .copyPositions(dictionaryPositionsToCopy, 0, numberOfIndexes);
+ outputDictionaryColumns.add(
+ new DictionaryColumn(
+ 0, positionCount, compactDictionary, newIds, true, false, newDictionaryId));
+ } catch (UnsupportedOperationException e) {
+ // ignore if copy positions is not supported for the dictionary
+ outputDictionaryColumns.add(dictionaryColumn);
+ }
+ }
+ return outputDictionaryColumns;
+ }
+
+ private static int[] getNewIds(
+ int positionCount, DictionaryColumn dictionaryColumn, int[] remapIndex) {
+ int[] newIds = new int[positionCount];
+ for (int i = 0; i < positionCount; i++) {
+ int newId = remapIndex[dictionaryColumn.getId(i)];
+ if (newId == -1) {
+ throw new IllegalStateException("reference to a non-existent key");
+ }
+ newIds[i] = newId;
+ }
+ return newIds;
+ }
+}
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumnEncoder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumnEncoder.java
new file mode 100644
index 000000000..4c55311ec
--- /dev/null
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumnEncoder.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tsfile.read.common.block.column;
+
+import org.apache.tsfile.block.column.Column;
+import org.apache.tsfile.block.column.ColumnEncoding;
+import org.apache.tsfile.enums.TSDataType;
+import org.apache.tsfile.utils.ReadWriteIOUtils;
+
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public class DictionaryColumnEncoder implements ColumnEncoder {
+
+ @Override
+ public Column readColumn(ByteBuffer input, TSDataType dataType, int positionCount) {
+ // dictionary
+ ColumnEncoder columnEncoder = ColumnEncoderFactory.get(ColumnEncoding.deserializeFrom(input));
+ Column dictionary = columnEncoder.readColumn(input, dataType, ReadWriteIOUtils.readInt(input));
+
+ // ids
+ int[] ids = ReadWriteIOUtils.readInts(input);
+
+ // flatten the dictionary
+ return dictionary.copyPositions(ids, 0, positionCount);
+ }
+
+ @Override
+ public void writeColumn(DataOutputStream output, Column column) throws IOException {
+ DictionaryColumn dictionaryColumn = (DictionaryColumn) column;
+ // compact before serialize
+ dictionaryColumn = dictionaryColumn.compact();
+
+ Column dictionary = dictionaryColumn.getDictionary();
+
+ // dictionary
+ dictionary.getEncoding().serializeTo(output);
+ int positionCount = dictionary.getPositionCount();
+ ReadWriteIOUtils.write(positionCount, output);
+ ColumnEncoder columnEncoder = ColumnEncoderFactory.get(dictionary.getEncoding());
+ columnEncoder.writeColumn(output, dictionary);
+
+ // ids
+ ReadWriteIOUtils.writeInts(
+ dictionaryColumn.getRawIds(),
+ dictionaryColumn.getRawIdsOffset(),
+ dictionaryColumn.getPositionCount(),
+ output);
+ }
+}
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryId.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryId.java
new file mode 100644
index 000000000..405d8a788
--- /dev/null
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryId.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tsfile.read.common.block.column;
+
+import java.util.UUID;
+import java.util.concurrent.atomic.AtomicLong;
+
+public class DictionaryId {
+ private static final UUID nodeId = UUID.randomUUID();
+ private static final AtomicLong sequenceGenerator = new AtomicLong();
+
+ private final long mostSignificantBits;
+ private final long leastSignificantBits;
+ private final long sequenceId;
+
+ public static DictionaryId randomDictionaryId() {
+ return new DictionaryId(
+ nodeId.getMostSignificantBits(),
+ nodeId.getLeastSignificantBits(),
+ sequenceGenerator.getAndIncrement());
+ }
+
+ public DictionaryId(long mostSignificantBits, long leastSignificantBits, long sequenceId) {
+ this.mostSignificantBits = mostSignificantBits;
+ this.leastSignificantBits = leastSignificantBits;
+ this.sequenceId = sequenceId;
+ }
+
+ public long getMostSignificantBits() {
+ return mostSignificantBits;
+ }
+
+ public long getLeastSignificantBits() {
+ return leastSignificantBits;
+ }
+
+ public long getSequenceId() {
+ return sequenceId;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ DictionaryId that = (DictionaryId) o;
+ return mostSignificantBits == that.mostSignificantBits
+ && leastSignificantBits == that.leastSignificantBits
+ && sequenceId == that.sequenceId;
+ }
+
+ @Override
+ public int hashCode() {
+ int hashCode = 31 + Long.hashCode(mostSignificantBits);
+ hashCode = (hashCode * 31) + Long.hashCode(leastSignificantBits);
+ hashCode = (hashCode * 31) + Long.hashCode(sequenceId);
+ return hashCode;
+ }
+}
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java
index c065d98d5..afc78208e 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java
@@ -28,6 +28,8 @@
import java.util.Arrays;
import java.util.Optional;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfDoubleArray;
@@ -180,6 +182,34 @@ public Column subColumnCopy(int fromIndex) {
return new DoubleColumn(0, length, valueIsNullCopy, valuesCopy);
}
+ @Override
+ public Column getPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ return DictionaryColumn.createInternal(
+ offset, length, this, positions, DictionaryId.randomDictionaryId());
+ }
+
+ @Override
+ public Column copyPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ boolean[] newValueIsNull = null;
+ if (valueIsNull != null) {
+ newValueIsNull = new boolean[length];
+ }
+ double[] newValues = new double[length];
+ for (int i = 0; i < length; i++) {
+ int position = positions[offset + i];
+ checkReadablePosition(this, position);
+ if (newValueIsNull != null) {
+ newValueIsNull[i] = valueIsNull[position + arrayOffset];
+ }
+ newValues[i] = values[position + arrayOffset];
+ }
+ return new DoubleColumn(0, length, newValueIsNull, newValues);
+ }
+
@Override
public void reverse() {
for (int i = arrayOffset, j = arrayOffset + positionCount - 1; i < j; i++, j--) {
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java
index 563067368..98bcd90e5 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java
@@ -28,6 +28,8 @@
import java.util.Arrays;
import java.util.Optional;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfFloatArray;
@@ -195,6 +197,34 @@ public void reverse() {
}
}
+ @Override
+ public Column getPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ return DictionaryColumn.createInternal(
+ offset, length, this, positions, DictionaryId.randomDictionaryId());
+ }
+
+ @Override
+ public Column copyPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ boolean[] newValueIsNull = null;
+ if (valueIsNull != null) {
+ newValueIsNull = new boolean[length];
+ }
+ float[] newValues = new float[length];
+ for (int i = 0; i < length; i++) {
+ int position = positions[offset + i];
+ checkReadablePosition(this, position);
+ if (newValueIsNull != null) {
+ newValueIsNull[i] = valueIsNull[position + arrayOffset];
+ }
+ newValues[i] = values[position + arrayOffset];
+ }
+ return new FloatColumn(0, length, newValueIsNull, newValues);
+ }
+
@Override
public int getInstanceSize() {
return INSTANCE_SIZE;
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java
index 2c3c35c12..3e0caa91c 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java
@@ -28,6 +28,8 @@
import java.util.Arrays;
import java.util.Optional;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfIntArray;
@@ -195,6 +197,34 @@ public void reverse() {
}
}
+ @Override
+ public Column getPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ return DictionaryColumn.createInternal(
+ offset, length, this, positions, DictionaryId.randomDictionaryId());
+ }
+
+ @Override
+ public Column copyPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ boolean[] newValueIsNull = null;
+ if (valueIsNull != null) {
+ newValueIsNull = new boolean[length];
+ }
+ int[] newValues = new int[length];
+ for (int i = 0; i < length; i++) {
+ int position = positions[offset + i];
+ checkReadablePosition(this, position);
+ if (newValueIsNull != null) {
+ newValueIsNull[i] = valueIsNull[position + arrayOffset];
+ }
+ newValues[i] = values[position + arrayOffset];
+ }
+ return new IntColumn(0, length, newValueIsNull, newValues);
+ }
+
@Override
public int getInstanceSize() {
return INSTANCE_SIZE;
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java
index 1c022369d..bbbb06631 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java
@@ -28,6 +28,8 @@
import java.util.Arrays;
import java.util.Optional;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfLongArray;
@@ -195,6 +197,34 @@ public void reverse() {
}
}
+ @Override
+ public Column getPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ return DictionaryColumn.createInternal(
+ offset, length, this, positions, DictionaryId.randomDictionaryId());
+ }
+
+ @Override
+ public Column copyPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ boolean[] newValueIsNull = null;
+ if (valueIsNull != null) {
+ newValueIsNull = new boolean[length];
+ }
+ long[] newValues = new long[length];
+ for (int i = 0; i < length; i++) {
+ int position = positions[offset + i];
+ checkReadablePosition(this, position);
+ if (newValueIsNull != null) {
+ newValueIsNull[i] = valueIsNull[position + arrayOffset];
+ }
+ newValues[i] = values[position + arrayOffset];
+ }
+ return new LongColumn(0, length, newValueIsNull, newValues);
+ }
+
@Override
public int getInstanceSize() {
return INSTANCE_SIZE;
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java
index 9b2a7bccd..9b999c412 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java
@@ -25,6 +25,8 @@
import org.apache.tsfile.utils.RamUsageEstimator;
import static java.util.Objects.requireNonNull;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
/**
@@ -107,6 +109,23 @@ public Column subColumnCopy(int fromIndex) {
return subColumn(fromIndex);
}
+ @Override
+ public Column getPositions(int[] positions, int offset, int length) {
+ // cost of copyPositions is small, no need to transform to DictionaryColumn
+ return copyPositions(positions, offset, length);
+ }
+
+ @Override
+ public Column copyPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ for (int position : positions) {
+ checkReadablePosition(this, position);
+ }
+
+ return new NullColumn(length);
+ }
+
@Override
public void reverse() {
// do nothing
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java
index d20cc4436..6a5465864 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java
@@ -29,6 +29,8 @@
import java.util.Arrays;
import static java.util.Objects.requireNonNull;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidPosition;
import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
public class RunLengthEncodedColumn implements Column {
@@ -117,48 +119,6 @@ public boolean[] getBooleans() {
return res;
}
- @Override
- public int[] getInts() {
- int[] res = new int[positionCount];
- Arrays.fill(res, value.getInt(0));
- return res;
- }
-
- @Override
- public long[] getLongs() {
- long[] res = new long[positionCount];
- Arrays.fill(res, value.getLong(0));
- return res;
- }
-
- @Override
- public float[] getFloats() {
- float[] res = new float[positionCount];
- Arrays.fill(res, value.getFloat(0));
- return res;
- }
-
- @Override
- public double[] getDoubles() {
- double[] res = new double[positionCount];
- Arrays.fill(res, value.getDouble(0));
- return res;
- }
-
- @Override
- public Binary[] getBinaries() {
- Binary[] res = new Binary[positionCount];
- Arrays.fill(res, value.getBinary(0));
- return res;
- }
-
- @Override
- public Object[] getObjects() {
- Object[] res = new Object[positionCount];
- Arrays.fill(res, value.getObject(0));
- return res;
- }
-
@Override
public TsPrimitiveType getTsPrimitiveType(int position) {
return value.getTsPrimitiveType(0);
@@ -221,6 +181,26 @@ public Column subColumnCopy(int fromIndex) {
return new RunLengthEncodedColumn(valueCopy, positionCount - fromIndex);
}
+ @Override
+ public Column getPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ for (int i = offset; i < offset + length; i++) {
+ checkValidPosition(positions[i], positionCount);
+ }
+ return new RunLengthEncodedColumn(value, length);
+ }
+
+ @Override
+ public Column copyPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ for (int i = offset; i < offset + length; i++) {
+ checkValidPosition(positions[i], positionCount);
+ }
+ return new RunLengthEncodedColumn(value.subColumnCopy(0), length);
+ }
+
@Override
public void reverse() {
// do nothing because the underlying column has only one value
@@ -238,6 +218,8 @@ public void setPositionCount(int count) {
@Override
public void setNull(int start, int end) {
- value.setNull(start, end);
+ throw new UnsupportedOperationException(
+ String.format(
+ "set null of %s is not supported !", RunLengthEncodedColumn.class.getSimpleName()));
}
}
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java
index d681b079f..cce059d71 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java
@@ -26,6 +26,8 @@
import java.util.Arrays;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
+import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfLongArray;
public class TimeColumn implements Column {
@@ -159,6 +161,27 @@ public void reverse() {
}
}
+ @Override
+ public Column getPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ return DictionaryColumn.createInternal(
+ offset, length, this, positions, DictionaryId.randomDictionaryId());
+ }
+
+ @Override
+ public Column copyPositions(int[] positions, int offset, int length) {
+ checkArrayRange(positions, offset, length);
+
+ long[] newValues = new long[length];
+ for (int i = 0; i < length; i++) {
+ int position = positions[offset + i];
+ checkReadablePosition(this, position);
+ newValues[i] = values[position + arrayOffset];
+ }
+ return new TimeColumn(0, length, newValues);
+ }
+
public long getStartTime() {
return values[arrayOffset];
}
diff --git a/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java b/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java
index 50aed1179..12728830e 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/ReadWriteIOUtils.java
@@ -1241,6 +1241,23 @@ public static Object readObject(ByteBuffer buffer) {
}
}
+ public static void writeInts(int[] ints, int offset, int length, OutputStream outputStream)
+ throws IOException {
+ write(length, outputStream);
+ for (int i = 0; i < length; i++) {
+ write(ints[offset + i], outputStream);
+ }
+ }
+
+ public static int[] readInts(ByteBuffer buffer) {
+ int length = readInt(buffer);
+ int[] ints = new int[length];
+ for (int i = 0; i < length; i++) {
+ ints[i] = readInt(buffer);
+ }
+ return ints;
+ }
+
public static ByteBuffer clone(ByteBuffer original) {
ByteBuffer clone = ByteBuffer.allocate(original.remaining());
while (original.hasRemaining()) {
diff --git a/java/tsfile/src/test/java/org/apache/tsfile/common/block/DictionaryColumnEncodingTest.java b/java/tsfile/src/test/java/org/apache/tsfile/common/block/DictionaryColumnEncodingTest.java
new file mode 100644
index 000000000..3fd03a11c
--- /dev/null
+++ b/java/tsfile/src/test/java/org/apache/tsfile/common/block/DictionaryColumnEncodingTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tsfile.common.block;
+
+import org.apache.tsfile.block.column.Column;
+import org.apache.tsfile.block.column.ColumnBuilder;
+import org.apache.tsfile.block.column.ColumnEncoding;
+import org.apache.tsfile.enums.TSDataType;
+import org.apache.tsfile.read.common.block.column.ColumnEncoder;
+import org.apache.tsfile.read.common.block.column.ColumnEncoderFactory;
+import org.apache.tsfile.read.common.block.column.DictionaryColumn;
+import org.apache.tsfile.read.common.block.column.LongColumn;
+import org.apache.tsfile.read.common.block.column.LongColumnBuilder;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public class DictionaryColumnEncodingTest {
+ @Test
+ public void testIntColumn() {
+ ColumnBuilder columnBuilder = new LongColumnBuilder(null, 10);
+ for (int i = 0; i < 10; i++) {
+ if (i == 5) {
+ columnBuilder.appendNull();
+ } else {
+ columnBuilder.writeLong(i % 5);
+ }
+ }
+ Column originalColumn = columnBuilder.build();
+ DictionaryColumn input =
+ (DictionaryColumn) originalColumn.getPositions(new int[] {1, 3, 5, 8, 9}, 1, 4);
+
+ ColumnEncoder encoder = ColumnEncoderFactory.get(ColumnEncoding.DICTIONARY);
+
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(byteArrayOutputStream);
+ try {
+ encoder.writeColumn(dos, input);
+ } catch (IOException e) {
+ e.printStackTrace();
+ Assert.fail();
+ }
+
+ ByteBuffer buffer = ByteBuffer.wrap(byteArrayOutputStream.toByteArray());
+ Column output = encoder.readColumn(buffer, TSDataType.INT64, input.getPositionCount());
+ Assert.assertTrue(output instanceof LongColumn);
+
+ Assert.assertEquals(input.getPositionCount(), output.getPositionCount());
+ Assert.assertTrue(output.mayHaveNull());
+ Assert.assertEquals(3, output.getLong(0));
+ Assert.assertTrue(output.isNull(1));
+ Assert.assertEquals(3, output.getLong(2));
+ Assert.assertEquals(4, output.getLong(3));
+ }
+}
diff --git a/java/tsfile/src/test/java/org/apache/tsfile/read/common/ColumnTest.java b/java/tsfile/src/test/java/org/apache/tsfile/read/common/ColumnTest.java
index 80ac44828..0cb3f2e78 100644
--- a/java/tsfile/src/test/java/org/apache/tsfile/read/common/ColumnTest.java
+++ b/java/tsfile/src/test/java/org/apache/tsfile/read/common/ColumnTest.java
@@ -20,10 +20,12 @@
package org.apache.tsfile.read.common;
import org.apache.tsfile.block.column.Column;
+import org.apache.tsfile.block.column.ColumnBuilder;
import org.apache.tsfile.read.common.block.column.BinaryColumn;
import org.apache.tsfile.read.common.block.column.BinaryColumnBuilder;
import org.apache.tsfile.read.common.block.column.BooleanColumn;
import org.apache.tsfile.read.common.block.column.BooleanColumnBuilder;
+import org.apache.tsfile.read.common.block.column.DictionaryColumn;
import org.apache.tsfile.read.common.block.column.DoubleColumn;
import org.apache.tsfile.read.common.block.column.DoubleColumnBuilder;
import org.apache.tsfile.read.common.block.column.FloatColumn;
@@ -41,6 +43,8 @@
import org.junit.Assert;
import org.junit.Test;
+import java.util.Optional;
+
public class ColumnTest {
@Test
@@ -83,6 +87,27 @@ public void timeColumnSubColumnCopyTest() {
Assert.assertNotSame(timeColumn1.getLongs(), timeColumn2.getLongs());
}
+ @Test
+ public void timeColumnFilterPositionsTest() {
+ ColumnBuilder columnBuilder = new TimeColumnBuilder(null, 10);
+ for (int i = 0; i < 10; i++) {
+ columnBuilder.writeLong(i);
+ }
+ Column originalColumn = columnBuilder.build();
+
+ int[] selectedPositions = new int[] {1, 3, 5};
+ Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2);
+ Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn);
+ Assert.assertEquals(2, columnByGetPositions.getPositionCount());
+ Assert.assertEquals(3, columnByGetPositions.getLong(0));
+ Assert.assertEquals(5, columnByGetPositions.getLong(1));
+
+ Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2);
+ Assert.assertEquals(2, columnByCopyPositions.getPositionCount());
+ Assert.assertEquals(3, columnByCopyPositions.getLong(0));
+ Assert.assertEquals(5, columnByCopyPositions.getLong(1));
+ }
+
@Test
public void binaryColumnSubColumnTest() {
BinaryColumnBuilder columnBuilder = new BinaryColumnBuilder(null, 10);
@@ -123,6 +148,27 @@ public void binaryColumnSubColumnCopyTest() {
Assert.assertNotSame(binaryColumn1.getBinaries(), binaryColumn2.getBinaries());
}
+ @Test
+ public void binaryColumnFilterPositionsTest() {
+ ColumnBuilder columnBuilder = new BinaryColumnBuilder(null, 10);
+ for (int i = 0; i < 10; i++) {
+ columnBuilder.writeBinary(BytesUtils.valueOf(String.valueOf(i)));
+ }
+ Column originalColumn = columnBuilder.build();
+
+ int[] selectedPositions = new int[] {1, 3, 5};
+ Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2);
+ Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn);
+ Assert.assertEquals(2, columnByGetPositions.getPositionCount());
+ Assert.assertEquals("3", columnByGetPositions.getBinary(0).toString());
+ Assert.assertEquals("5", columnByGetPositions.getBinary(1).toString());
+
+ Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2);
+ Assert.assertEquals(2, columnByCopyPositions.getPositionCount());
+ Assert.assertEquals("3", columnByCopyPositions.getBinary(0).toString());
+ Assert.assertEquals("5", columnByCopyPositions.getBinary(1).toString());
+ }
+
@Test
public void booleanColumnSubColumnTest() {
BooleanColumnBuilder columnBuilder = new BooleanColumnBuilder(null, 10);
@@ -165,6 +211,27 @@ public void booleanColumnSubColumnCopyTest() {
Assert.assertNotSame(booleanColumn1.getBooleans(), booleanColumn2.getBooleans());
}
+ @Test
+ public void booleanColumnFilterPositionsTest() {
+ ColumnBuilder columnBuilder = new BooleanColumnBuilder(null, 10);
+ for (int i = 0; i < 10; i++) {
+ columnBuilder.writeBoolean(i % 2 == 0);
+ }
+ Column originalColumn = columnBuilder.build();
+
+ int[] selectedPositions = new int[] {1, 3, 6};
+ Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2);
+ Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn);
+ Assert.assertEquals(2, columnByGetPositions.getPositionCount());
+ Assert.assertFalse(columnByGetPositions.getBoolean(0));
+ Assert.assertTrue(columnByGetPositions.getBoolean(1));
+
+ Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2);
+ Assert.assertEquals(2, columnByCopyPositions.getPositionCount());
+ Assert.assertFalse(columnByCopyPositions.getBoolean(0));
+ Assert.assertTrue(columnByCopyPositions.getBoolean(1));
+ }
+
@Test
public void doubleColumnSubColumnTest() {
DoubleColumnBuilder columnBuilder = new DoubleColumnBuilder(null, 10);
@@ -205,6 +272,27 @@ public void doubleColumnSubColumnCopyTest() {
Assert.assertNotSame(doubleColumn1.getDoubles(), doubleColumn2.getDoubles());
}
+ @Test
+ public void doubleColumnFilterPositionsTest() {
+ ColumnBuilder columnBuilder = new DoubleColumnBuilder(null, 10);
+ for (int i = 0; i < 10; i++) {
+ columnBuilder.writeDouble(i);
+ }
+ Column originalColumn = columnBuilder.build();
+
+ int[] selectedPositions = new int[] {1, 3, 5};
+ Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2);
+ Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn);
+ Assert.assertEquals(2, columnByGetPositions.getPositionCount());
+ Assert.assertEquals(3, columnByGetPositions.getDouble(0), 0.01);
+ Assert.assertEquals(5, columnByGetPositions.getDouble(1), 0.01);
+
+ Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2);
+ Assert.assertEquals(2, columnByCopyPositions.getPositionCount());
+ Assert.assertEquals(3, columnByCopyPositions.getDouble(0), 0.01);
+ Assert.assertEquals(5, columnByCopyPositions.getDouble(1), 0.01);
+ }
+
@Test
public void floatColumnSubColumnTest() {
FloatColumnBuilder columnBuilder = new FloatColumnBuilder(null, 10);
@@ -245,6 +333,28 @@ public void floatColumnSubColumnCopyTest() {
Assert.assertNotSame(floatColumn1.getFloats(), floatColumn2.getFloats());
}
+ @Test
+ public void floatColumnFilterPositionsTest() {
+ ColumnBuilder columnBuilder = new FloatColumnBuilder(null, 10);
+ for (int i = 0; i < 10; i++) {
+ columnBuilder.writeFloat(i);
+ }
+ Column originalColumn = columnBuilder.build();
+
+ int[] selectedPositions = new int[] {1, 3, 5};
+ Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2);
+ Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn);
+ Assert.assertEquals(2, columnByGetPositions.getPositionCount());
+ Assert.assertEquals(3, columnByGetPositions.getFloat(0), 0.01);
+ Assert.assertEquals(5, columnByGetPositions.getFloat(1), 0.01);
+
+ Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2);
+ Assert.assertNotSame(originalColumn.getFloats(), columnByCopyPositions.getFloats());
+ Assert.assertEquals(2, columnByCopyPositions.getPositionCount());
+ Assert.assertEquals(3, columnByCopyPositions.getFloat(0), 0.01);
+ Assert.assertEquals(5, columnByCopyPositions.getFloat(1), 0.01);
+ }
+
@Test
public void intColumnSubColumnTest() {
IntColumnBuilder columnBuilder = new IntColumnBuilder(null, 10);
@@ -285,6 +395,27 @@ public void intColumnSubColumnCopyTest() {
Assert.assertNotSame(intColumn1.getInts(), intColumn2.getInts());
}
+ @Test
+ public void intColumnFilterPositionsTest() {
+ ColumnBuilder columnBuilder = new IntColumnBuilder(null, 10);
+ for (int i = 0; i < 10; i++) {
+ columnBuilder.writeInt(i);
+ }
+ Column originalColumn = columnBuilder.build();
+
+ int[] selectedPositions = new int[] {1, 3, 5};
+ Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2);
+ Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn);
+ Assert.assertEquals(2, columnByGetPositions.getPositionCount());
+ Assert.assertEquals(3, columnByGetPositions.getInt(0));
+ Assert.assertEquals(5, columnByGetPositions.getInt(1));
+
+ Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2);
+ Assert.assertEquals(2, columnByCopyPositions.getPositionCount());
+ Assert.assertEquals(3, columnByCopyPositions.getInt(0));
+ Assert.assertEquals(5, columnByCopyPositions.getInt(1));
+ }
+
@Test
public void longColumnSubColumnTest() {
LongColumnBuilder columnBuilder = new LongColumnBuilder(null, 10);
@@ -325,6 +456,27 @@ public void longColumnSubColumnCopyTest() {
Assert.assertNotSame(longColumn1.getLongs(), longColumn2.getLongs());
}
+ @Test
+ public void longColumnFilterPositionsTest() {
+ ColumnBuilder columnBuilder = new LongColumnBuilder(null, 10);
+ for (int i = 0; i < 10; i++) {
+ columnBuilder.writeLong(i);
+ }
+ Column originalColumn = columnBuilder.build();
+
+ int[] selectedPositions = new int[] {1, 3, 5};
+ Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2);
+ Assert.assertTrue(columnByGetPositions instanceof DictionaryColumn);
+ Assert.assertEquals(2, columnByGetPositions.getPositionCount());
+ Assert.assertEquals(3, columnByGetPositions.getLong(0));
+ Assert.assertEquals(5, columnByGetPositions.getLong(1));
+
+ Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2);
+ Assert.assertEquals(2, columnByCopyPositions.getPositionCount());
+ Assert.assertEquals(3, columnByCopyPositions.getLong(0));
+ Assert.assertEquals(5, columnByCopyPositions.getLong(1));
+ }
+
@Test
public void nullColumnTest() {
NullColumn nullColumn = new NullColumn(10);
@@ -335,6 +487,19 @@ public void nullColumnTest() {
Assert.assertEquals(1, subColumn.getPositionCount());
}
+ @Test
+ public void nullColumnFilterPositionsTest() {
+ Column originalColumn = new NullColumn(10);
+
+ int[] selectedPositions = new int[] {1, 3, 5};
+ Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2);
+ Assert.assertTrue(columnByGetPositions instanceof NullColumn);
+ Assert.assertEquals(2, columnByGetPositions.getPositionCount());
+
+ Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2);
+ Assert.assertEquals(2, columnByCopyPositions.getPositionCount());
+ }
+
@Test
public void runLengthEncodedColumnSubColumnTest() {
LongColumnBuilder longColumnBuilder = new LongColumnBuilder(null, 1);
@@ -366,4 +531,78 @@ public void runLengthEncodedColumnSubColumnCopyTest() {
Assert.assertEquals(1, column.getLong(0));
Assert.assertEquals(1, column.getLong(1));
}
+
+ @Test
+ public void runLengthEncodedColumnFilterPositionsTest() {
+ Column originalColumn =
+ new RunLengthEncodedColumn(new LongColumn(1, Optional.empty(), new long[] {1L}), 10);
+
+ int[] selectedPositions = new int[] {1, 3, 5};
+ Column columnByGetPositions = originalColumn.getPositions(selectedPositions, 1, 2);
+ Assert.assertTrue(columnByGetPositions instanceof RunLengthEncodedColumn);
+ Assert.assertEquals(2, columnByGetPositions.getPositionCount());
+ Assert.assertEquals(1, columnByGetPositions.getLong(0));
+ Assert.assertEquals(1, columnByGetPositions.getLong(1));
+
+ Column columnByCopyPositions = originalColumn.copyPositions(selectedPositions, 1, 2);
+ Assert.assertEquals(2, columnByCopyPositions.getPositionCount());
+ Assert.assertEquals(1, columnByCopyPositions.getLong(0));
+ Assert.assertEquals(1, columnByCopyPositions.getLong(1));
+ }
+
+ @Test
+ public void dictionaryColumnTest() {
+ ColumnBuilder columnBuilder = new LongColumnBuilder(null, 10);
+ for (int i = 0; i < 10; i++) {
+ columnBuilder.writeLong(i % 5);
+ }
+ Column originalColumn = columnBuilder.build();
+ DictionaryColumn dictionaryColumn =
+ (DictionaryColumn) originalColumn.getPositions(new int[] {1, 3, 5, 8, 9}, 1, 4);
+ Assert.assertEquals(3, dictionaryColumn.getLong(0));
+ Assert.assertEquals(0, dictionaryColumn.getLong(1));
+ Assert.assertEquals(3, dictionaryColumn.getLong(2));
+ Assert.assertEquals(4, dictionaryColumn.getLong(3));
+
+ int[] selectedPositions = new int[] {0, 2};
+ Column columnByGetPositions = dictionaryColumn.getPositions(selectedPositions, 0, 2);
+ Assert.assertEquals(2, columnByGetPositions.getPositionCount());
+ Assert.assertEquals(3, columnByGetPositions.getLong(0));
+ Assert.assertEquals(3, columnByGetPositions.getLong(1));
+
+ Column columnByCopyPositions = dictionaryColumn.copyPositions(selectedPositions, 0, 2);
+ Assert.assertEquals(2, columnByCopyPositions.getPositionCount());
+ Assert.assertEquals(3, columnByCopyPositions.getLong(0));
+ Assert.assertEquals(3, columnByCopyPositions.getLong(1));
+
+ Column columnByGetRegion = dictionaryColumn.getRegion(1, 2);
+ Assert.assertEquals(2, columnByGetRegion.getPositionCount());
+ Assert.assertEquals(0, columnByGetRegion.getLong(0));
+ Assert.assertEquals(3, columnByGetRegion.getLong(1));
+
+ Column columnByGetRegionCopy = dictionaryColumn.getRegionCopy(1, 2);
+ Assert.assertEquals(2, columnByGetRegionCopy.getPositionCount());
+ Assert.assertEquals(0, columnByGetRegionCopy.getLong(0));
+ Assert.assertEquals(3, columnByGetRegionCopy.getLong(1));
+
+ dictionaryColumn.reverse();
+ Assert.assertEquals(4, dictionaryColumn.getLong(0));
+ Assert.assertEquals(3, dictionaryColumn.getLong(1));
+ Assert.assertEquals(0, dictionaryColumn.getLong(2));
+ Assert.assertEquals(3, dictionaryColumn.getLong(3));
+
+ try {
+ dictionaryColumn.setNull(0, 1);
+ Assert.fail();
+ } catch (Exception e) {
+ // Ignore
+ }
+
+ try {
+ dictionaryColumn.getLongs();
+ Assert.fail();
+ } catch (Exception e) {
+ // Ignore
+ }
+ }
}