Skip to content

Commit 695b3ed

Browse files
authored
Fix ARRAY NOT NULL JDBC issue (#189)
1 parent 6c578eb commit 695b3ed

File tree

3 files changed

+58
-23
lines changed

3 files changed

+58
-23
lines changed

hoptimator-avro/src/main/java/com/linkedin/hoptimator/avro/AvroConverter.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,9 @@ public static RelDataType rel(Schema schema, RelDataTypeFactory typeFactory) {
157157

158158
/** Converts Avro Schema to RelDataType.
159159
* Nullability is preserved except for array types, JDBC is incapable of interpreting e.g. "FLOAT NOT NULL ARRAY"
160-
* causing "NOT NULL" arrays to get demoted to "ANY ARRAY" which is not desired.
160+
* causing "NOT NULL" arrays to get demoted to "ANY ARRAY" which is not desired. See HoptimatorArraySqlType for
161+
* more details.
162+
*
161163
* TODO: default field values are lost when converting from Avro to RelDataType
162164
*/
163165
public static RelDataType rel(Schema schema, RelDataTypeFactory typeFactory, boolean nullable) {
@@ -187,8 +189,7 @@ public static RelDataType rel(Schema schema, RelDataTypeFactory typeFactory, boo
187189
case BOOLEAN:
188190
return createRelType(typeFactory, SqlTypeName.BOOLEAN, nullable);
189191
case ARRAY:
190-
return typeFactory.createTypeWithNullability(
191-
typeFactory.createArrayType(rel(schema.getElementType(), typeFactory, true), -1), nullable);
192+
return new HoptimatorArraySqlType(rel(schema.getElementType(), typeFactory, true), nullable);
192193
case MAP:
193194
return typeFactory.createTypeWithNullability(
194195
typeFactory.createMapType(typeFactory.createSqlType(SqlTypeName.VARCHAR), rel(schema.getValueType(), typeFactory, nullable)), nullable);
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package com.linkedin.hoptimator.avro;
2+
3+
import org.apache.calcite.rel.type.RelDataType;
4+
import org.apache.calcite.sql.type.ArraySqlType;
5+
import org.checkerframework.checker.initialization.qual.UnknownInitialization;
6+
7+
8+
// Custom ArraySqlType to fix digest compatibility issue with Calcite JdbcSchema.
9+
// JdbcSchema expects row types digest format to match something like "INTEGER ARRAY". JdbcSchema is incapable of parsing
10+
// the array type if the digest format is like "INTEGER NOT NULL ARRAY" or "INTEGER ARRAY NOT NULL".
11+
// Nullability for the inner element is not supported by Calcite at all but nullability for the array itself is supported
12+
// and obtained via the isNullable() method instead.
13+
// Ideally we should be doing this for all SqlTypes but Calcite JdbcSchema is currently only checking ArraySqlType digest format.
14+
public class HoptimatorArraySqlType extends ArraySqlType {
15+
16+
public HoptimatorArraySqlType(RelDataType elementType, boolean isNullable) {
17+
super(elementType, isNullable);
18+
}
19+
20+
@Override
21+
@SuppressWarnings("method.invocation.invalid")
22+
protected void computeDigest(@UnknownInitialization HoptimatorArraySqlType this) {
23+
StringBuilder sb = new StringBuilder();
24+
generateTypeString(sb, true);
25+
digest = sb.toString();
26+
}
27+
}

hoptimator-avro/src/test/java/com/linkedin/hoptimator/avro/AvroConverterTest.java

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -178,22 +178,18 @@ public void testAvroKeyPayloadSchemaPrimitiveKey() {
178178

179179
@Test
180180
public void convertsNestedArray() {
181-
// Create a RelDataType with an array of structs
182-
RelDataTypeFactory typeFactory = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT);
183-
184-
// Create a struct type for array elements
185-
RelDataTypeFactory.Builder elementBuilder = new RelDataTypeFactory.Builder(typeFactory);
186-
elementBuilder.add("field1", typeFactory.createSqlType(SqlTypeName.VARCHAR));
187-
elementBuilder.add("field2", typeFactory.createSqlType(SqlTypeName.INTEGER));
188-
RelDataType structType = elementBuilder.build();
189-
190-
// Create array of structs type
191-
RelDataType arrayOfStructsType = typeFactory.createArrayType(structType, -1);
192-
193-
// Test with a struct containing an array of structs field
194-
RelDataTypeFactory.Builder containerBuilder = new RelDataTypeFactory.Builder(typeFactory);
195-
containerBuilder.add("arrayOfStructsField", arrayOfStructsType);
196-
RelDataType containerType = containerBuilder.build();
181+
String schemaString =
182+
"{\"type\":\"record\",\"name\":\"record\",\"namespace\":\"ns\",\"fields\":["
183+
+ "{\"name\":\"arrayOfStructsField\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"record_event1\",\"fields\":["
184+
+ "{\"name\":\"field1\",\"type\":\"string\"},{\"name\":\"field2\",\"type\":\"int\"}]}}}]}";
185+
Schema avroSchema = (new Schema.Parser()).parse(schemaString);
186+
RelDataType containerType = AvroConverter.rel(avroSchema); // Ensure parsing works
187+
// Calcite does not support inner array fields as NOT NULL.
188+
assertEquals(1, containerType.getFieldList().size());
189+
assertFalse(containerType.getFieldList().get(0).getType().isNullable());
190+
assertEquals(2, containerType.getFieldList().get(0).getType().getComponentType().getFieldList().size());
191+
assertTrue(containerType.getFieldList().get(0).getType().getComponentType().getFieldList().get(0).getType().isNullable());
192+
assertEquals("RecordType(RecordType(VARCHAR field1, INTEGER field2) ARRAY arrayOfStructsField) NOT NULL", containerType.getFullTypeString());
197193

198194
Schema containerSchema = AvroConverter.avro("test", "Record", containerType);
199195
assertNotNull(containerSchema);
@@ -204,10 +200,21 @@ public void convertsNestedArray() {
204200
assertEquals(Schema.Type.ARRAY, arrayFieldSchema.getType());
205201

206202
Schema structElementSchema = arrayFieldSchema.getElementType();
207-
assertEquals(Schema.Type.RECORD, structElementSchema.getType());
208-
assertEquals(2, structElementSchema.getFields().size());
209-
assertEquals("field1", structElementSchema.getFields().get(0).name());
210-
assertEquals("field2", structElementSchema.getFields().get(1).name());
203+
assertEquals(Schema.Type.UNION, structElementSchema.getType());
204+
assertEquals(2, structElementSchema.getTypes().size());
205+
assertEquals(Schema.Type.NULL, structElementSchema.getTypes().get(0).getType());
206+
assertEquals(Schema.Type.RECORD, structElementSchema.getTypes().get(1).getType());
207+
208+
Schema innerRecord = structElementSchema.getTypes().get(1);
209+
assertEquals(2, innerRecord.getFields().size());
210+
assertEquals("field1", innerRecord.getFields().get(0).name());
211+
assertEquals("field2", innerRecord.getFields().get(1).name());
212+
assertEquals(2, innerRecord.getFields().size());
213+
214+
Schema innermostField = innerRecord.getFields().get(0).schema();
215+
assertEquals(2, innermostField.getTypes().size());
216+
assertEquals(Schema.Type.NULL, innermostField.getTypes().get(0).getType());
217+
assertEquals(Schema.Type.STRING, innermostField.getTypes().get(1).getType());
211218
}
212219

213220
@Test

0 commit comments

Comments
 (0)