From fc0e9ecfdadc24d63b76aa22b705a8b60bd2ede7 Mon Sep 17 00:00:00 2001 From: HTHou Date: Thu, 19 Dec 2024 17:04:00 +0800 Subject: [PATCH 1/4] try one try --- .../tsfile/encoding/decoder/FloatDecoder.java | 32 +++++++++++++++-- .../tsfile/encoding/encoder/FloatEncoder.java | 36 +++++++++++++++++-- .../encoding/decoder/FloatDecoderTest.java | 20 +++++++++++ 3 files changed, 83 insertions(+), 5 deletions(-) diff --git a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java index 6b019930e..f307a4c4a 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java @@ -31,6 +31,8 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; /** * Decoder for float or double value using rle or two diff. For more info about encoding pattern, @@ -47,6 +49,9 @@ public class FloatDecoder extends Decoder { /** flag that indicates whether we have read maxPointNumber and calculated maxPointValue. */ private boolean isMaxPointNumberRead; + private List useMaxPointNumber; + private int position = 0; + public FloatDecoder(TSEncoding encodingType, TSDataType dataType) { super(encodingType); if (encodingType == TSEncoding.RLE) { @@ -93,7 +98,8 @@ public FloatDecoder(TSEncoding encodingType, TSDataType dataType) { public float readFloat(ByteBuffer buffer) { readMaxPointValue(buffer); int value = decoder.readInt(buffer); - double result = value / maxPointValue; + double result = value / getMaxPointValue(); + position++; return (float) result; } @@ -104,10 +110,31 @@ public double readDouble(ByteBuffer buffer) { return value / maxPointValue; } + private double getMaxPointValue() { + if (useMaxPointNumber == null) { + return maxPointValue; + } else { + return useMaxPointNumber.get(position) ? maxPointValue : 1; + } + } + private void readMaxPointValue(ByteBuffer buffer) { if (!isMaxPointNumberRead) { int maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); - if (maxPointNumber <= 0) { + if (maxPointNumber == Integer.MAX_VALUE) { + useMaxPointNumber = new ArrayList<>(); + while (true) { + maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + if (maxPointNumber == 1) { + useMaxPointNumber.add(true); + } else if (maxPointNumber == 0) { + useMaxPointNumber.add(false); + } else { + maxPointValue = Math.pow(10, maxPointNumber); + break; + } + } + } else if (maxPointNumber <= 0) { maxPointValue = 1; } else { maxPointValue = Math.pow(10, maxPointNumber); @@ -153,5 +180,6 @@ public long readLong(ByteBuffer buffer) { public void reset() { this.decoder.reset(); this.isMaxPointNumberRead = false; + this.position = 0; } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java index adf328e1a..3f254278b 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java @@ -26,6 +26,8 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; /** * Encoder for float or double value using rle or two-diff according to following grammar. @@ -49,11 +51,14 @@ public class FloatEncoder extends Encoder { /** flag to check whether maxPointNumber is saved in the stream. */ private boolean isMaxPointNumberSaved; + private final List useMaxPointNumber; + public FloatEncoder(TSEncoding encodingType, TSDataType dataType, int maxPointNumber) { super(encodingType); this.maxPointNumber = maxPointNumber; - calculateMaxPonitNum(); + calculateMaxPointNum(); isMaxPointNumberSaved = false; + useMaxPointNumber = new ArrayList<>(); if (encodingType == TSEncoding.RLE) { if (dataType == TSDataType.FLOAT) { encoder = new IntRleEncoder(); @@ -101,7 +106,7 @@ public void encode(double value, ByteArrayOutputStream out) { encoder.encode(valueLong, out); } - private void calculateMaxPonitNum() { + private void calculateMaxPointNum() { if (maxPointNumber <= 0) { maxPointNumber = 0; maxPointValue = 1; @@ -111,7 +116,13 @@ private void calculateMaxPonitNum() { } private int convertFloatToInt(float value) { - return (int) Math.round(value * maxPointValue); + if (value * maxPointValue > Integer.MAX_VALUE || value * maxPointValue < Integer.MIN_VALUE) { + useMaxPointNumber.add(false); + return Math.round(value); + } else { + useMaxPointNumber.add(true); + return (int) Math.round(value * maxPointValue); + } } private long convertDoubleToLong(double value) { @@ -121,11 +132,30 @@ private long convertDoubleToLong(double value) { @Override public void flush(ByteArrayOutputStream out) throws IOException { encoder.flush(out); + if (pointsNotUseMaxPointNumber()) { + byte[] ba = out.toByteArray(); + out.reset(); + ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, out); + for (boolean b : useMaxPointNumber) { + ReadWriteForEncodingUtils.writeUnsignedVarInt(b ? 1 : 0, out); + } + out.write(ba); + } reset(); } private void reset() { isMaxPointNumberSaved = false; + useMaxPointNumber.clear(); + } + + private boolean pointsNotUseMaxPointNumber() { + for (boolean info : useMaxPointNumber) { + if (!info) { + return true; + } + } + return false; } private void saveMaxPointNumber(ByteArrayOutputStream out) { diff --git a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java index bdc1db9ff..cf631cadb 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java @@ -30,6 +30,7 @@ import org.slf4j.LoggerFactory; import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -84,6 +85,25 @@ public void setUp() { @After public void tearDown() {} + public static void main(String[] args) throws IOException { + float value1 = 0.333F; + System.out.println(value1); + float value = 6.5536403E8F; + System.out.println(value); + Encoder encoder = new FloatEncoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT, 2); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + encoder.encode(value1, baos); + encoder.encode(value, baos); + encoder.flush(baos); + + ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray()); + Decoder decoder = new FloatDecoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT); + float value_ = decoder.readFloat(buffer); + System.out.println(value_); + value_ = decoder.readFloat(buffer); + System.out.println(value_); + } + @Test public void testRLEFloat() throws Exception { for (int i = 1; i <= 10; i++) { From 2a239761320cf671c0757aeaea1584ec2d237d36 Mon Sep 17 00:00:00 2001 From: HTHou Date: Thu, 19 Dec 2024 19:13:49 +0800 Subject: [PATCH 2/4] Fix FloatEncoder overflow problem --- .../tsfile/encoding/decoder/FloatDecoder.java | 27 +++++------- .../tsfile/encoding/encoder/FloatEncoder.java | 18 ++++++-- .../encoding/decoder/FloatDecoderTest.java | 44 ++++++++++--------- .../tsfile/write/TsFileReadWriteTest.java | 9 +++- 4 files changed, 56 insertions(+), 42 deletions(-) diff --git a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java index f307a4c4a..2f6fd4282 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java @@ -24,6 +24,7 @@ import org.apache.tsfile.exception.encoding.TsFileDecodingException; import org.apache.tsfile.file.metadata.enums.TSEncoding; import org.apache.tsfile.utils.Binary; +import org.apache.tsfile.utils.BitMap; import org.apache.tsfile.utils.ReadWriteForEncodingUtils; import org.slf4j.Logger; @@ -31,8 +32,6 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; /** * Decoder for float or double value using rle or two diff. For more info about encoding pattern, @@ -49,7 +48,7 @@ public class FloatDecoder extends Decoder { /** flag that indicates whether we have read maxPointNumber and calculated maxPointValue. */ private boolean isMaxPointNumberRead; - private List useMaxPointNumber; + private BitMap useMaxPointNumber; private int position = 0; public FloatDecoder(TSEncoding encodingType, TSDataType dataType) { @@ -107,14 +106,14 @@ public float readFloat(ByteBuffer buffer) { public double readDouble(ByteBuffer buffer) { readMaxPointValue(buffer); long value = decoder.readLong(buffer); - return value / maxPointValue; + return value / getMaxPointValue(); } private double getMaxPointValue() { if (useMaxPointNumber == null) { return maxPointValue; } else { - return useMaxPointNumber.get(position) ? maxPointValue : 1; + return useMaxPointNumber.isMarked(position) ? maxPointValue : 1; } } @@ -122,18 +121,12 @@ private void readMaxPointValue(ByteBuffer buffer) { if (!isMaxPointNumberRead) { int maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); if (maxPointNumber == Integer.MAX_VALUE) { - useMaxPointNumber = new ArrayList<>(); - while (true) { - maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); - if (maxPointNumber == 1) { - useMaxPointNumber.add(true); - } else if (maxPointNumber == 0) { - useMaxPointNumber.add(false); - } else { - maxPointValue = Math.pow(10, maxPointNumber); - break; - } - } + int size = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + byte[] tmp = new byte[size / 8 + 1]; + buffer.get(tmp, 0, size / 8 + 1); + useMaxPointNumber = new BitMap(size, tmp); + maxPointNumber = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); + maxPointValue = Math.pow(10, maxPointNumber); } else if (maxPointNumber <= 0) { maxPointValue = 1; } else { diff --git a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java index 3f254278b..dccb2a93a 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/FloatEncoder.java @@ -22,6 +22,7 @@ import org.apache.tsfile.enums.TSDataType; import org.apache.tsfile.exception.encoding.TsFileEncodingException; import org.apache.tsfile.file.metadata.enums.TSEncoding; +import org.apache.tsfile.utils.BitMap; import org.apache.tsfile.utils.ReadWriteForEncodingUtils; import java.io.ByteArrayOutputStream; @@ -126,7 +127,13 @@ private int convertFloatToInt(float value) { } private long convertDoubleToLong(double value) { - return Math.round(value * maxPointValue); + if (value * maxPointValue > Long.MAX_VALUE || value * maxPointValue < Long.MIN_VALUE) { + useMaxPointNumber.add(false); + return Math.round(value); + } else { + useMaxPointNumber.add(true); + return Math.round(value * maxPointValue); + } } @Override @@ -136,9 +143,14 @@ public void flush(ByteArrayOutputStream out) throws IOException { byte[] ba = out.toByteArray(); out.reset(); ReadWriteForEncodingUtils.writeUnsignedVarInt(Integer.MAX_VALUE, out); - for (boolean b : useMaxPointNumber) { - ReadWriteForEncodingUtils.writeUnsignedVarInt(b ? 1 : 0, out); + BitMap bitMap = new BitMap(useMaxPointNumber.size()); + for (int i = 0; i < useMaxPointNumber.size(); i++) { + if (useMaxPointNumber.get(i)) { + bitMap.mark(i); + } } + ReadWriteForEncodingUtils.writeUnsignedVarInt(useMaxPointNumber.size(), out); + out.write(bitMap.getByteArray()); out.write(ba); } reset(); diff --git a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java index cf631cadb..7cf1cd7df 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java @@ -30,7 +30,6 @@ import org.slf4j.LoggerFactory; import java.io.ByteArrayOutputStream; -import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -85,25 +84,6 @@ public void setUp() { @After public void tearDown() {} - public static void main(String[] args) throws IOException { - float value1 = 0.333F; - System.out.println(value1); - float value = 6.5536403E8F; - System.out.println(value); - Encoder encoder = new FloatEncoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT, 2); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - encoder.encode(value1, baos); - encoder.encode(value, baos); - encoder.flush(baos); - - ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray()); - Decoder decoder = new FloatDecoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT); - float value_ = decoder.readFloat(buffer); - System.out.println(value_); - value_ = decoder.readFloat(buffer); - System.out.println(value_); - } - @Test public void testRLEFloat() throws Exception { for (int i = 1; i <= 10; i++) { @@ -223,6 +203,30 @@ private void testDoubleLength( } } + @Test + public void testBigFloat() throws Exception { + float a = 0.333F; + float b = 6.5536403E8F; + Encoder encoder = new FloatEncoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT, 2); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + encoder.encode(a, baos); + encoder.encode(b, baos); + encoder.flush(baos); + + ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray()); + Decoder decoder = new FloatDecoder(TSEncoding.TS_2DIFF, TSDataType.FLOAT); + assertEquals(roundWithGivenPrecision(a, 2), decoder.readFloat(buffer), delta); + assertEquals(roundWithGivenPrecision(b, 2), decoder.readFloat(buffer), delta); + } + + public static float roundWithGivenPrecision(float data, int size) { + if (size == 0) { + return Math.round(data); + } + return Math.round(data) + + Math.round(((data - Math.round(data)) * Math.pow(10, size))) / (float) Math.pow(10, size); + } + // private void testDecimalLenght(TSEncoding encoding, List valueList, // int maxPointValue, // boolean isDebug, int repeatCount) throws Exception { diff --git a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java index d9add8de9..7f74d1625 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/write/TsFileReadWriteTest.java @@ -134,8 +134,13 @@ public void floatTest() throws IOException, WriteProcessException { public void floatTest(TSEncoding encoding) throws IOException, WriteProcessException { writeDataByTSRecord( - TSDataType.FLOAT, (i) -> new FloatDataPoint("sensor_1", (float) i), encoding); - readData((i, field, delta) -> assertEquals(i, field.getFloatV(), delta)); + TSDataType.FLOAT, + (i) -> new FloatDataPoint("sensor_1", i % 2 == 0 ? 6.55364032E8F : i), + encoding); + readData( + (i, field, delta) -> + assertEquals( + encoding.toString(), i % 2 == 0 ? 6.55364032E8F : i, field.getFloatV(), delta)); } @Test From 3f5ece74663d8c0ae2889f3c944a019c440c353c Mon Sep 17 00:00:00 2001 From: HTHou Date: Wed, 25 Dec 2024 11:20:54 +0800 Subject: [PATCH 3/4] add more UT --- .../tsfile/encoding/decoder/FloatDecoder.java | 4 +++- .../encoding/decoder/FloatDecoderTest.java | 21 +++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java index 2f6fd4282..8f2510a26 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/encoding/decoder/FloatDecoder.java @@ -106,7 +106,9 @@ public float readFloat(ByteBuffer buffer) { public double readDouble(ByteBuffer buffer) { readMaxPointValue(buffer); long value = decoder.readLong(buffer); - return value / getMaxPointValue(); + double result = value / getMaxPointValue(); + position++; + return result; } private double getMaxPointValue() { diff --git a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java index 7cf1cd7df..417a4e0b1 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/encoding/decoder/FloatDecoderTest.java @@ -34,6 +34,7 @@ import java.util.ArrayList; import java.util.List; +import static org.apache.tsfile.utils.EncodingUtils.roundWithGivenPrecision; import static org.junit.Assert.assertEquals; public class FloatDecoderTest { @@ -219,12 +220,20 @@ public void testBigFloat() throws Exception { assertEquals(roundWithGivenPrecision(b, 2), decoder.readFloat(buffer), delta); } - public static float roundWithGivenPrecision(float data, int size) { - if (size == 0) { - return Math.round(data); - } - return Math.round(data) - + Math.round(((data - Math.round(data)) * Math.pow(10, size))) / (float) Math.pow(10, size); + @Test + public void testBigDouble() throws Exception { + double a = 0.333; + double b = 9.223372036854E18; + Encoder encoder = new FloatEncoder(TSEncoding.RLE, TSDataType.DOUBLE, 2); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + encoder.encode(a, baos); + encoder.encode(b, baos); + encoder.flush(baos); + + ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray()); + Decoder decoder = new FloatDecoder(TSEncoding.RLE, TSDataType.DOUBLE); + assertEquals(roundWithGivenPrecision(a, 2), decoder.readDouble(buffer), delta); + assertEquals(roundWithGivenPrecision(b, 2), decoder.readDouble(buffer), delta); } // private void testDecimalLenght(TSEncoding encoding, List valueList, From 50ad190a396680571de3cc662cc9b6f1bb0904bf Mon Sep 17 00:00:00 2001 From: HTHou Date: Wed, 25 Dec 2024 11:23:51 +0800 Subject: [PATCH 4/4] add more UT --- .../apache/tsfile/utils/EncodingUtils.java | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java diff --git a/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java b/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java new file mode 100644 index 000000000..32dd29e44 --- /dev/null +++ b/java/tsfile/src/test/java/org/apache/tsfile/utils/EncodingUtils.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.utils; + +public class EncodingUtils { + + // Copied from org.apache.iotdb.db.utils.MathUtils + public static float roundWithGivenPrecision(float data, int size) { + if (size == 0) { + return Math.round(data); + } + return Math.round(data) + + Math.round(((data - Math.round(data)) * Math.pow(10, size))) / (float) Math.pow(10, size); + } + + // Copied from org.apache.iotdb.db.utils.MathUtils + public static double roundWithGivenPrecision(double data, int size) { + if (size == 0) { + return Math.round(data); + } + return Math.round(data) + + Math.round(((data - Math.round(data)) * Math.pow(10, size))) / Math.pow(10, size); + } +}