Skip to content

Commit 486c407

Browse files
Hanzhi Wangthe-other-tim-brown
authored andcommitted
Fix the edge case when handling non numeric values of double type in delta stats
When reading the delta snapshot and load the information into Delta object AddFile, the non-numeric values of float or double type (example, "NaN", "-Infinity") from col stats become string type. These special values need special handling and see how delta handled: https://github.com/delta-io/delta/blob/master/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/DefaultJsonRow.java#L210
1 parent a184a36 commit 486c407

File tree

2 files changed

+59
-0
lines changed

2 files changed

+59
-0
lines changed

xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@
4444
public class DeltaValueConverter {
4545
private static final String DATE_FORMAT_STR = "yyyy-MM-dd HH:mm:ss";
4646
private static final TimeZone TIME_ZONE = TimeZone.getTimeZone("UTC");
47+
protected static final String NAN_VALUE = "NaN";
48+
protected static final String INFINITY_VALUE = "Infinity";
49+
protected static final String POSITIVE_INFINITY_VALUE = "+Infinity";
50+
protected static final String POSITIVE_INF_VALUE = "+INF";
51+
protected static final String NEGATIVE_INFINITY_VALUE = "-Infinity";
52+
protected static final String NEGATIVE_INF_VALUE = "-INF";
4753

4854
static DateFormat getDateFormat(String dataFormatString) {
4955
DateFormat dateFormat = new SimpleDateFormat(dataFormatString);
@@ -194,9 +200,35 @@ public static Object convertFromDeltaPartitionValue(
194200

195201
private static Object castObjectToInternalType(Object value, InternalType valueType) {
196202
switch (valueType) {
203+
case DOUBLE:
204+
if (value instanceof String)
205+
switch (value.toString()) {
206+
case NAN_VALUE:
207+
return Double.NaN;
208+
case POSITIVE_INF_VALUE:
209+
case POSITIVE_INFINITY_VALUE:
210+
case INFINITY_VALUE:
211+
return Double.POSITIVE_INFINITY;
212+
case NEGATIVE_INF_VALUE:
213+
case NEGATIVE_INFINITY_VALUE:
214+
return Double.NEGATIVE_INFINITY;
215+
}
216+
break;
197217
case FLOAT:
198218
if (value instanceof Double) {
199219
return ((Double) value).floatValue();
220+
} else if (value instanceof String) {
221+
switch (value.toString()) {
222+
case NAN_VALUE:
223+
return Float.NaN;
224+
case POSITIVE_INF_VALUE:
225+
case POSITIVE_INFINITY_VALUE:
226+
case INFINITY_VALUE:
227+
return Float.POSITIVE_INFINITY;
228+
case NEGATIVE_INF_VALUE:
229+
case NEGATIVE_INFINITY_VALUE:
230+
return Float.NEGATIVE_INFINITY;
231+
}
200232
}
201233
break;
202234
case DECIMAL:

xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,15 @@ void parseWrongDateTime() throws ParseException {
8282
assertThrows(ParseException.class, () -> strictDateFormat.parse(wrongDateTime));
8383
}
8484

85+
@ParameterizedTest
86+
@MethodSource("nonNumericValuesForColStats")
87+
void formattedDifferentNonNumericValuesFromDeltaColumnStat(
88+
Object fieldValue, InternalSchema fieldSchema, Object expectedDeltaValue) {
89+
Object internalRepresentation =
90+
DeltaValueConverter.convertFromDeltaColumnStatValue(fieldValue, fieldSchema);
91+
assertEquals(internalRepresentation, expectedDeltaValue);
92+
}
93+
8594
private static Stream<Arguments> valuesWithSchemaProviderForColStats() {
8695
return Stream.of(
8796
Arguments.of(
@@ -187,4 +196,22 @@ private static Stream<Arguments> valuesWithSchemaProviderForPartitions() {
187196
"yyyy",
188197
"2022"));
189198
}
199+
200+
private static Stream<Arguments> nonNumericValuesForColStats() {
201+
InternalSchema doubleSchema =
202+
InternalSchema.builder().name("double").dataType(InternalType.DOUBLE).build();
203+
InternalSchema floatSchema =
204+
InternalSchema.builder().name("float").dataType(InternalType.FLOAT).build();
205+
return Stream.of(
206+
Arguments.of("NaN", doubleSchema, Double.NaN),
207+
Arguments.of("Infinity", doubleSchema, Double.POSITIVE_INFINITY),
208+
Arguments.of("-Infinity", doubleSchema, Double.NEGATIVE_INFINITY),
209+
Arguments.of("+Infinity", doubleSchema, Double.POSITIVE_INFINITY),
210+
Arguments.of("NaN", floatSchema, Float.NaN),
211+
Arguments.of("Infinity", floatSchema, Float.POSITIVE_INFINITY),
212+
Arguments.of("-Infinity", floatSchema, Float.NEGATIVE_INFINITY),
213+
Arguments.of("+Infinity", floatSchema, Float.POSITIVE_INFINITY),
214+
Arguments.of(Double.NaN, doubleSchema, Double.NaN),
215+
Arguments.of(Double.POSITIVE_INFINITY, doubleSchema, Double.POSITIVE_INFINITY));
216+
}
190217
}

0 commit comments

Comments
 (0)