From f92515d854c260978c7099c189a7913fa68a1199 Mon Sep 17 00:00:00 2001 From: Elliott Brossard Date: Mon, 25 Jun 2018 10:42:05 -0700 Subject: [PATCH] Update BigQueryAvroUtils to support BigQuery's NUMERIC type. For reference, this is the equivalent change in google-cloud-java: https://github.com/GoogleCloudPlatform/google-cloud-java/pull/3110/files --- .../sdk/io/gcp/bigquery/BigQueryAvroUtils.java | 9 +++++++++ .../beam/sdk/io/gcp/bigquery/BigQueryUtils.java | 2 +- .../sdk/io/gcp/bigquery/StandardSQLTypeName.java | 2 ++ .../sdk/io/gcp/bigquery/BigQueryAvroUtilsTest.java | 14 ++++++++++++-- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtils.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtils.java index f8bf472bb972..bf2cb63e55cb 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtils.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtils.java @@ -28,6 +28,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.io.BaseEncoding; +import java.math.BigDecimal; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -53,6 +54,7 @@ class BigQueryAvroUtils { .put("BYTES", Type.BYTES) .put("INTEGER", Type.LONG) .put("FLOAT", Type.DOUBLE) + .put("NUMERIC", Type.STRING) .put("BOOLEAN", Type.BOOLEAN) .put("TIMESTAMP", Type.LONG) .put("RECORD", Type.RECORD) @@ -60,6 +62,7 @@ class BigQueryAvroUtils { .put("DATETIME", Type.STRING) .put("TIME", Type.STRING) .build(); + /** * Formats BigQuery seconds-since-epoch into String matching JSON export. Thread-safe and * immutable. @@ -194,6 +197,12 @@ private static Object convertRequiredField( case "FLOAT": verify(v instanceof Double, "Expected Double, got %s", v.getClass()); return v; + case "NUMERIC": + verify( + v instanceof CharSequence || v instanceof BigDecimal, + "Expected CharSequence (String) or BigDecimal, got %s", + v.getClass()); + return v.toString(); case "BOOLEAN": verify(v instanceof Boolean, "Expected Boolean, got %s", v.getClass()); return v; diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java index 218e9c272f5c..00b0a7d97b05 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java @@ -70,7 +70,7 @@ public class BigQueryUtils { .put(TypeName.INT64, StandardSQLTypeName.INT64) .put(TypeName.FLOAT, StandardSQLTypeName.FLOAT64) .put(TypeName.DOUBLE, StandardSQLTypeName.FLOAT64) - .put(TypeName.DECIMAL, StandardSQLTypeName.FLOAT64) + .put(TypeName.DECIMAL, StandardSQLTypeName.NUMERIC) .put(TypeName.BOOLEAN, StandardSQLTypeName.BOOL) .put(TypeName.ARRAY, StandardSQLTypeName.ARRAY) .put(TypeName.ROW, StandardSQLTypeName.STRUCT) diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StandardSQLTypeName.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StandardSQLTypeName.java index 632b8aeb1a68..7ffa85c8b66f 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StandardSQLTypeName.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StandardSQLTypeName.java @@ -31,6 +31,8 @@ enum StandardSQLTypeName { INT64, /** A 64-bit IEEE binary floating-point value. */ FLOAT64, + /** A decimal value with 38 digits of precision and 9 digits of scale. */ + NUMERIC, /** Variable-length character (Unicode) data. */ STRING, /** Variable-length binary data. */ diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtilsTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtilsTest.java index 42f9167a0096..e000b561b1b0 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtilsTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryAvroUtilsTest.java @@ -27,6 +27,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.io.BaseEncoding; +import java.math.BigDecimal; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -65,6 +66,7 @@ public class BigQueryAvroUtilsTest { new TableFieldSchema().setName("quality").setType("FLOAT") /* default to NULLABLE */, new TableFieldSchema().setName("quantity").setType("INTEGER") /* default to NULLABLE */, new TableFieldSchema().setName("birthday").setType("TIMESTAMP").setMode("NULLABLE"), + new TableFieldSchema().setName("birthdayMoney").setType("NUMERIC").setMode("NULLABLE"), new TableFieldSchema().setName("flighted").setType("BOOLEAN").setMode("NULLABLE"), new TableFieldSchema().setName("sound").setType("BYTES").setMode("NULLABLE"), new TableFieldSchema().setName("anniversaryDate").setType("DATE").setMode("NULLABLE"), @@ -100,7 +102,7 @@ public void testConvertGenericRecordToTableRow() throws Exception { } { // Test type conversion for: - // INTEGER, FLOAT, TIMESTAMP, BOOLEAN, BYTES, DATE, DATETIME, TIME. + // INTEGER, FLOAT, NUMERIC, TIMESTAMP, BOOLEAN, BYTES, DATE, DATETIME, TIME. GenericRecord record = new GenericData.Record(avroSchema); byte[] soundBytes = "chirp,chirp".getBytes(StandardCharsets.UTF_8); ByteBuffer soundByteBuffer = ByteBuffer.wrap(soundBytes); @@ -108,6 +110,7 @@ public void testConvertGenericRecordToTableRow() throws Exception { record.put("number", 5L); record.put("quality", 5.0); record.put("birthday", 5L); + record.put("birthdayMoney", new String("123456789.123456789")); record.put("flighted", Boolean.TRUE); record.put("sound", soundByteBuffer); record.put("anniversaryDate", new Utf8("2000-01-01")); @@ -118,6 +121,7 @@ public void testConvertGenericRecordToTableRow() throws Exception { new TableRow() .set("number", "5") .set("birthday", "1970-01-01 00:00:00.000005 UTC") + .set("birthdayMoney", "123456789.123456789") .set("quality", 5.0) .set("associates", new ArrayList()) .set("flighted", Boolean.TRUE) @@ -135,11 +139,13 @@ public void testConvertGenericRecordToTableRow() throws Exception { GenericRecord record = new GenericData.Record(avroSchema); record.put("number", 5L); record.put("associates", Lists.newArrayList(nestedRecord)); + record.put("birthdayMoney", new BigDecimal("987654321.987654321")); TableRow convertedRow = BigQueryAvroUtils.convertGenericRecordToTableRow(record, tableSchema); TableRow row = new TableRow() .set("associates", Lists.newArrayList(new TableRow().set("species", "other"))) - .set("number", "5"); + .set("number", "5") + .set("birthdayMoney", "987654321.987654321"); assertEquals(row, convertedRow); } } @@ -164,6 +170,9 @@ public void testConvertBigQuerySchemaToAvroSchema() { assertThat( avroSchema.getField("birthday").schema(), equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.LONG)))); + assertThat( + avroSchema.getField("birthdayMoney").schema(), + equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING)))); assertThat( avroSchema.getField("flighted").schema(), equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.BOOLEAN)))); @@ -224,6 +233,7 @@ static class Bird { @Nullable Double quality; @Nullable Long quantity; @Nullable Long birthday; // Exercises TIMESTAMP. + @Nullable String birthdayMoney; // Exercises NUMERIC. @Nullable Boolean flighted; @Nullable ByteBuffer sound; @Nullable Utf8 anniversaryDate;