From 7cb94b219c381bf2a19aa42a6885c2d147996f6f Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Fri, 21 Jan 2022 16:46:46 -0600 Subject: [PATCH 01/30] Core: Adds Utility Class for Implementing ZOrdering --- build.gradle | 1 + .../apache/iceberg/util/ZOrderByteUtils.java | 128 +++++++++ .../iceberg/util/TestZOrderByteUtil.java | 244 ++++++++++++++++++ 3 files changed, 373 insertions(+) create mode 100644 core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java create mode 100644 core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java diff --git a/build.gradle b/build.gradle index 25bf761b242c..8609dbc959d3 100644 --- a/build.gradle +++ b/build.gradle @@ -230,6 +230,7 @@ project(':iceberg-core') { testImplementation 'org.mock-server:mockserver-netty' testImplementation 'org.mock-server:mockserver-client-java' testImplementation "org.xerial:sqlite-jdbc" + testImplementation "org.apache.commons:commons-lang3" testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') } } diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java new file mode 100644 index 000000000000..4ef3120a2217 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.util; + +import java.util.Arrays; + +/** + * Within Z-Ordering the byte representations of objects being compared must be ordered, + * this requires several types to be transformed when converted to bytes. The goal is to + * map object's whose byte representation are not lexicographically ordered into representations + * that are lexicographically ordered. + * Most of these techniques are derived from + * https://aws.amazon.com/blogs/database/z-order-indexing-for-multifaceted-queries-in-amazon-dynamodb-part-2/ + */ +public class ZOrderByteUtils { + + private ZOrderByteUtils() { + + } + + /** + * Signed ints do not have their bytes in magnitude order because of the sign bit. + * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially + * shifts the 0 value so that we don't break our ordering when we cross the new 0 value. + */ + public static byte[] orderIntLikeBytes(byte[] intBytes, int size) { + if (intBytes == null) { + return new byte[size]; + } + intBytes[0] = (byte) (intBytes[0] ^ (1 << 7)); + return intBytes; + } + + /** + * IEEE 754 : + * “If two floating-point numbers in the same format are ordered (say, x < y), + * they are ordered the same way when their bits are reinterpreted as sign-magnitude integers.” + * + * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically + * comparable bytes + */ + public static byte[] orderFloatLikeBytes(byte[] floatBytes, int size) { + if (floatBytes == null) { + return new byte[size]; + } + if ((floatBytes[0] & (1 << 7)) == 0) { + // The signed magnitude is positive set the first bit (reversing the sign so positives order after negatives) + floatBytes[0] = (byte) (floatBytes[0] | (1 << 7)); + } else { + // The signed magnitude is negative so flip the first bit (reversing the sign so positives order after negatives) + // Then flip all remaining bits so numbers with greater negative magnitude come before those + // with less magnitude (reverse the order) + for (int i = 0; i < floatBytes.length; i++) { + floatBytes[i] = (byte) ~floatBytes[i]; + } + } + return floatBytes; + } + + /** + * Strings are lexicographically sortable BUT if different byte array lengths will + * ruin the Z-Ordering. (ZOrder requires that a given column contribute the same number of bytes every time). + * This implementation just uses a set size to for all output byte representations. Truncating longer strings + * and right padding 0 for shorter strings. + */ + public static byte[] orderUTF8LikeBytes(byte[] stringBytes, int size) { + if (stringBytes == null) { + return new byte[size]; + } + return Arrays.copyOf(stringBytes, size); + } + + /** + * Interleave bits using a naive loop. + * @param columnsBinary an array of byte arrays, none of which are empty + * @return their bits interleaved + */ + public static byte[] interleaveBits(byte[][] columnsBinary) { + int interleavedSize = Arrays.stream(columnsBinary).mapToInt(a -> a.length).sum(); + byte[] interleavedBytes = new byte[interleavedSize]; + int sourceBit = 7; + int sourceByte = 0; + int sourceColumn = 0; + int interleaveBit = 7; + int interleaveByte = 0; + while (interleaveByte < interleavedSize) { + // Take what we have, Get the source Bit of the source Byte, move it to the interleaveBit position + interleavedBytes[interleaveByte] = + (byte) (interleavedBytes[interleaveByte] | + (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >> sourceBit << interleaveBit); + + if (--interleaveBit == -1) { + // Finished a byte in our interleave byte array start a new byte + interleaveByte++; + interleaveBit = 7; + } + + // Find next column with a byte we can use + do { + if (++sourceColumn == columnsBinary.length) { + sourceColumn = 0; + if (--sourceBit == -1) { + sourceByte++; + sourceBit = 7; + } + } + } while (columnsBinary[sourceColumn].length <= sourceByte && interleaveByte < interleavedSize); + } + return interleavedBytes; + } +} diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java new file mode 100644 index 000000000000..87d69dc99182 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +package org.apache.iceberg.util; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Random; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes; +import org.junit.Assert; +import org.junit.Test; + +public class TestZOrderByteUtil { + private static final byte IIIIIIII = (byte) 255; + private static final byte IOIOIOIO = (byte) 170; + private static final byte OIOIOIOI = (byte) 85; + private static final byte OOOOIIII = (byte) 15; + private static final byte OOOOOOOI = (byte) 1; + private static final byte OOOOOOOO = (byte) 0; + + private static final int NUM_TESTS = 100000; + + private final Random random = new Random(42); + + private String bytesToString(byte[] bytes) { + StringBuilder result = new StringBuilder(); + for (byte b : bytes) { + result.append(String.format("%8s", Integer.toBinaryString(b & 0xFF)).replace(' ', '0')); + } + return result.toString(); + } + + /** + * Returns a non-0 length byte array + */ + private byte[] generateRandomBytes() { + int length = Math.abs(random.nextInt(100) + 1); + byte[] result = new byte[length]; + random.nextBytes(result); + return result; + } + + /** + * Test method to ensure correctness of byte interleaving code + */ + private String interleaveStrings(String[] strings) { + StringBuilder result = new StringBuilder(); + int totalLength = Arrays.stream(strings).mapToInt(String::length).sum(); + int substringIndex = 0; + int characterIndex = 0; + while (characterIndex < totalLength) { + for (String str : strings) { + if (substringIndex < str.length()) { + result.append(str.charAt(substringIndex)); + characterIndex++; + } + } + substringIndex++; + } + return result.toString(); + } + + /** + * Compares the result of a string based interleaving algorithm implemented above + * versus the binary bit-shifting algorithm used in ZOrderByteUtils. Either both + * algorithms are identically wrong or are both identically correct. + */ + @Test + public void testInterleaveRandomExamples() { + for (int test = 0; test < NUM_TESTS; test++) { + int numByteArrays = Math.abs(random.nextInt(6)) + 1; + byte[][] testBytes = new byte[numByteArrays][]; + String[] testStrings = new String[numByteArrays]; + for (int byteIndex = 0; byteIndex < numByteArrays; byteIndex++) { + testBytes[byteIndex] = generateRandomBytes(); + testStrings[byteIndex] = bytesToString(testBytes[byteIndex]); + } + byte[] byteResult = ZOrderByteUtils.interleaveBits(testBytes); + String byteResultAsString = bytesToString(byteResult); + + String stringResult = interleaveStrings(testStrings); + + Assert.assertEquals("String interleave didn't match byte interleave", stringResult, byteResultAsString); + } + } + + @Test + public void testInterleaveEmptyBits() { + byte[][] test = new byte[4][10]; + byte[] expected = new byte[40]; + + Assert.assertArrayEquals("Should combine empty arrays", + expected, ZOrderByteUtils.interleaveBits(test)); + } + + @Test + public void testInterleaveFullBits() { + byte[][] test = new byte[4][]; + test[0] = new byte[]{IIIIIIII, IIIIIIII}; + test[1] = new byte[]{IIIIIIII}; + test[2] = new byte[0]; + test[3] = new byte[]{IIIIIIII, IIIIIIII, IIIIIIII}; + byte[] expected = new byte[]{IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII}; + + Assert.assertArrayEquals("Should combine full arrays", + expected, ZOrderByteUtils.interleaveBits(test)); + } + + @Test + public void testInterleaveMixedBits() { + byte[][] test = new byte[4][]; + test[0] = new byte[]{OOOOOOOI, IIIIIIII, OOOOOOOO, OOOOIIII}; + test[1] = new byte[]{OOOOOOOI, OOOOOOOO, IIIIIIII}; + test[2] = new byte[]{OOOOOOOI}; + test[3] = new byte[]{OOOOOOOI}; + byte[] expected = new byte[]{ + OOOOOOOO, OOOOOOOO, OOOOOOOO, OOOOIIII, + IOIOIOIO, IOIOIOIO, + OIOIOIOI, OIOIOIOI, + OOOOIIII}; + Assert.assertArrayEquals("Should combine mixed byte arrays", + expected, ZOrderByteUtils.interleaveBits(test)); + } + + @Test + public void testIntOrdering() { + for (int i = 0; i < NUM_TESTS; i++) { + int aInt = random.nextInt(); + int bInt = random.nextInt(); + int intCompare = Integer.compare(aInt, bInt); + byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aInt), 4); + byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bInt), 4); + int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes); + + Assert.assertTrue(String.format( + "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + aInt, bInt, intCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), + (intCompare ^ byteCompare) >= 0); + } + } + + @Test + public void testLongOrdering() { + for (int i = 0; i < NUM_TESTS; i++) { + long aLong = random.nextInt(); + long bLong = random.nextInt(); + int longCompare = Long.compare(aLong, bLong); + byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aLong), 8); + byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bLong), 8); + int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes); + + Assert.assertTrue(String.format( + "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + aLong, bLong, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), + (longCompare ^ byteCompare) >= 0); + } + } + + @Test + public void testFloatOrdering() { + for (int i = 0; i < NUM_TESTS; i++) { + float aFloat = random.nextFloat(); + float bFloat = random.nextFloat(); + int floatCompare = Float.compare(aFloat, bFloat); + byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aFloat), 4); + byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bFloat), 4); + int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes); + + Assert.assertTrue(String.format( + "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + aFloat, bFloat, floatCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), + (floatCompare ^ byteCompare) >= 0); + } + } + + @Test + public void testDoubleOrdering() { + for (int i = 0; i < NUM_TESTS; i++) { + double aDouble = random.nextDouble(); + double bDouble = random.nextDouble(); + int doubleCompare = Double.compare(aDouble, bDouble); + byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aDouble), 8); + byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bDouble), 8); + int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes); + + Assert.assertTrue(String.format( + "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + aDouble, bDouble, doubleCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), + (doubleCompare ^ byteCompare) >= 0); + } + } + + @Test + public void testStringOrdering() { + for (int i = 0; i < NUM_TESTS; i++) { + String aString = RandomStringUtils.random(random.nextInt(35), true, true); + String bString = RandomStringUtils.random(random.nextInt(35), true, true); + int stringCompare = aString.compareTo(bString); + byte[] aBytes = ZOrderByteUtils.orderUTF8LikeBytes(aString.getBytes(StandardCharsets.UTF_8), 128); + byte[] bBytes = ZOrderByteUtils.orderUTF8LikeBytes(bString.getBytes(StandardCharsets.UTF_8), 128); + int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes); + + Assert.assertTrue(String.format( + "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + aString, bString, stringCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), + (stringCompare ^ byteCompare) >= 0); + } + } + + private byte[] bytesOf(int num) { + return ByteBuffer.allocate(4).putInt(num).array(); + } + + private byte[] bytesOf(long num) { + return ByteBuffer.allocate(8).putLong(num).array(); + } + + private byte[] bytesOf(float num) { + return ByteBuffer.allocate(4).putFloat(num).array(); + } + + private byte[] bytesOf(double num) { + return ByteBuffer.allocate(8).putDouble(num).array(); + } +} From 92516f702ed398ae3d287d1ab81dd43d8a9a1cce Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Tue, 25 Jan 2022 15:45:02 -0600 Subject: [PATCH 02/30] Fix JavaDoc --- core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index 4ef3120a2217..759f101b0cc5 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -50,7 +50,7 @@ public static byte[] orderIntLikeBytes(byte[] intBytes, int size) { /** * IEEE 754 : - * “If two floating-point numbers in the same format are ordered (say, x < y), + * “If two floating-point numbers in the same format are ordered (say, x \< y), * they are ordered the same way when their bits are reinterpreted as sign-magnitude integers.” * * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically From ef1d214c7408d284c5cd7283225da6bb02f350ee Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Mon, 31 Jan 2022 12:31:49 -0600 Subject: [PATCH 03/30] Switch Implementations to work on Primitives instead of ByteArrays --- .../apache/iceberg/util/ZOrderByteUtils.java | 66 ++++++++------ .../iceberg/util/TestZOrderByteUtil.java | 86 ++++++++----------- 2 files changed, 74 insertions(+), 78 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index 759f101b0cc5..571ea24d5039 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -19,6 +19,7 @@ package org.apache.iceberg.util; +import java.nio.ByteBuffer; import java.util.Arrays; /** @@ -28,6 +29,9 @@ * that are lexicographically ordered. * Most of these techniques are derived from * https://aws.amazon.com/blogs/database/z-order-indexing-for-multifaceted-queries-in-amazon-dynamodb-part-2/ + * + * Some implementation is taken from + * https://github.com/apache/hbase/blob/master/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java */ public class ZOrderByteUtils { @@ -40,12 +44,19 @@ private ZOrderByteUtils() { * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially * shifts the 0 value so that we don't break our ordering when we cross the new 0 value. */ - public static byte[] orderIntLikeBytes(byte[] intBytes, int size) { - if (intBytes == null) { - return new byte[size]; - } - intBytes[0] = (byte) (intBytes[0] ^ (1 << 7)); - return intBytes; + public static byte[] intToOrderedBytes(int val) { + ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES); + bytes.putInt(val ^ 0x80000000); + return bytes.array(); + } + + /** + * Signed longs are treated the same as the signed ints + */ + public static byte[] longToOrderBytes(long val) { + ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES); + bytes.putLong(val ^ 0x8000000000000000L); + return bytes.array(); } /** @@ -56,22 +67,23 @@ public static byte[] orderIntLikeBytes(byte[] intBytes, int size) { * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically * comparable bytes */ - public static byte[] orderFloatLikeBytes(byte[] floatBytes, int size) { - if (floatBytes == null) { - return new byte[size]; - } - if ((floatBytes[0] & (1 << 7)) == 0) { - // The signed magnitude is positive set the first bit (reversing the sign so positives order after negatives) - floatBytes[0] = (byte) (floatBytes[0] | (1 << 7)); - } else { - // The signed magnitude is negative so flip the first bit (reversing the sign so positives order after negatives) - // Then flip all remaining bits so numbers with greater negative magnitude come before those - // with less magnitude (reverse the order) - for (int i = 0; i < floatBytes.length; i++) { - floatBytes[i] = (byte) ~floatBytes[i]; - } - } - return floatBytes; + public static byte[] floatToOrderedBytes(float val) { + ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES); + int ival = Float.floatToIntBits(val); + ival ^= ((ival >> (Integer.SIZE - 1)) | Integer.MIN_VALUE); + bytes.putInt(ival); + return bytes.array(); + } + + /** + * Doubles are treated the same as floats + */ + public static byte[] doubleToOrderedBytes(double val) { + ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES); + long lng = Double.doubleToLongBits(val); + lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE); + bytes.putLong(lng); + return bytes.array(); } /** @@ -80,11 +92,13 @@ public static byte[] orderFloatLikeBytes(byte[] floatBytes, int size) { * This implementation just uses a set size to for all output byte representations. Truncating longer strings * and right padding 0 for shorter strings. */ - public static byte[] orderUTF8LikeBytes(byte[] stringBytes, int size) { - if (stringBytes == null) { - return new byte[size]; + public static byte[] stringToOrderedBytes(String val, int length) { + ByteBuffer bytes = ByteBuffer.allocate(length); + if (val != null) { + int maxLength = Math.min(length, val.length()); + bytes.put(val.getBytes(), 0, maxLength); } - return Arrays.copyOf(stringBytes, size); + return bytes.array(); } /** diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java index 87d69dc99182..b34f950f90c8 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java @@ -20,8 +20,6 @@ package org.apache.iceberg.util; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; import org.apache.commons.lang3.RandomStringUtils; @@ -146,15 +144,15 @@ public void testIntOrdering() { for (int i = 0; i < NUM_TESTS; i++) { int aInt = random.nextInt(); int bInt = random.nextInt(); - int intCompare = Integer.compare(aInt, bInt); - byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aInt), 4); - byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bInt), 4); - int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes); + int intCompare = Integer.signum(Integer.compare(aInt, bInt)); + byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt); + byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt); + int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); - Assert.assertTrue(String.format( + Assert.assertEquals(String.format( "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", aInt, bInt, intCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), - (intCompare ^ byteCompare) >= 0); + intCompare, byteCompare); } } @@ -163,15 +161,15 @@ public void testLongOrdering() { for (int i = 0; i < NUM_TESTS; i++) { long aLong = random.nextInt(); long bLong = random.nextInt(); - int longCompare = Long.compare(aLong, bLong); - byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aLong), 8); - byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bLong), 8); - int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes); + int longCompare = Integer.signum(Long.compare(aLong, bLong)); + byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aLong); + byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bLong); + int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); - Assert.assertTrue(String.format( - "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + Assert.assertEquals(String.format( + "Ordering of longs should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", aLong, bLong, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), - (longCompare ^ byteCompare) >= 0); + longCompare, byteCompare); } } @@ -180,15 +178,15 @@ public void testFloatOrdering() { for (int i = 0; i < NUM_TESTS; i++) { float aFloat = random.nextFloat(); float bFloat = random.nextFloat(); - int floatCompare = Float.compare(aFloat, bFloat); - byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aFloat), 4); - byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bFloat), 4); - int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes); + int floatCompare = Integer.signum(Float.compare(aFloat, bFloat)); + byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat); + byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat); + int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); - Assert.assertTrue(String.format( - "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + Assert.assertEquals(String.format( + "Ordering of floats should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", aFloat, bFloat, floatCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), - (floatCompare ^ byteCompare) >= 0); + floatCompare, byteCompare); } } @@ -197,15 +195,15 @@ public void testDoubleOrdering() { for (int i = 0; i < NUM_TESTS; i++) { double aDouble = random.nextDouble(); double bDouble = random.nextDouble(); - int doubleCompare = Double.compare(aDouble, bDouble); - byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aDouble), 8); - byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bDouble), 8); - int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes); + int doubleCompare = Integer.signum(Double.compare(aDouble, bDouble)); + byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble); + byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble); + int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); - Assert.assertTrue(String.format( - "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + Assert.assertEquals(String.format( + "Ordering of doubles should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", aDouble, bDouble, doubleCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), - (doubleCompare ^ byteCompare) >= 0); + doubleCompare, byteCompare); } } @@ -214,31 +212,15 @@ public void testStringOrdering() { for (int i = 0; i < NUM_TESTS; i++) { String aString = RandomStringUtils.random(random.nextInt(35), true, true); String bString = RandomStringUtils.random(random.nextInt(35), true, true); - int stringCompare = aString.compareTo(bString); - byte[] aBytes = ZOrderByteUtils.orderUTF8LikeBytes(aString.getBytes(StandardCharsets.UTF_8), 128); - byte[] bBytes = ZOrderByteUtils.orderUTF8LikeBytes(bString.getBytes(StandardCharsets.UTF_8), 128); - int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes); + int stringCompare = Integer.signum(aString.compareTo(bString)); + byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128); + byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128); + int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); - Assert.assertTrue(String.format( - "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + Assert.assertEquals(String.format( + "Ordering of strings should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", aString, bString, stringCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), - (stringCompare ^ byteCompare) >= 0); + stringCompare, byteCompare); } } - - private byte[] bytesOf(int num) { - return ByteBuffer.allocate(4).putInt(num).array(); - } - - private byte[] bytesOf(long num) { - return ByteBuffer.allocate(8).putLong(num).array(); - } - - private byte[] bytesOf(float num) { - return ByteBuffer.allocate(4).putFloat(num).array(); - } - - private byte[] bytesOf(double num) { - return ByteBuffer.allocate(8).putDouble(num).array(); - } } From 545e373e055ffdc71b2d0c683675032cc1566af0 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Mon, 31 Jan 2022 16:47:36 -0600 Subject: [PATCH 04/30] Clean up RandomStringUtilUsage --- build.gradle | 1 - .../main/java/org/apache/iceberg/util/ZOrderByteUtils.java | 6 ++++-- .../java/org/apache/iceberg/util/TestZOrderByteUtil.java | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/build.gradle b/build.gradle index 8609dbc959d3..25bf761b242c 100644 --- a/build.gradle +++ b/build.gradle @@ -230,7 +230,6 @@ project(':iceberg-core') { testImplementation 'org.mock-server:mockserver-netty' testImplementation 'org.mock-server:mockserver-client-java' testImplementation "org.xerial:sqlite-jdbc" - testImplementation "org.apache.commons:commons-lang3" testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') } } diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index 571ea24d5039..750831a9a5e7 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -120,7 +120,8 @@ public static byte[] interleaveBits(byte[][] columnsBinary) { (byte) (interleavedBytes[interleaveByte] | (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >> sourceBit << interleaveBit); - if (--interleaveBit == -1) { + --interleaveBit; + if (interleaveBit == -1) { // Finished a byte in our interleave byte array start a new byte interleaveByte++; interleaveBit = 7; @@ -128,7 +129,8 @@ public static byte[] interleaveBits(byte[][] columnsBinary) { // Find next column with a byte we can use do { - if (++sourceColumn == columnsBinary.length) { + ++sourceColumn; + if (sourceColumn == columnsBinary.length) { sourceColumn = 0; if (--sourceBit == -1) { sourceByte++; diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java index b34f950f90c8..17f19ec01af7 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java @@ -22,8 +22,8 @@ import java.util.Arrays; import java.util.Random; -import org.apache.commons.lang3.RandomStringUtils; import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes; +import org.apache.iceberg.types.Types; import org.junit.Assert; import org.junit.Test; @@ -210,8 +210,8 @@ public void testDoubleOrdering() { @Test public void testStringOrdering() { for (int i = 0; i < NUM_TESTS; i++) { - String aString = RandomStringUtils.random(random.nextInt(35), true, true); - String bString = RandomStringUtils.random(random.nextInt(35), true, true); + String aString = (String) RandomUtil.generatePrimitive(Types.StringType.get(), random); + String bString = (String) RandomUtil.generatePrimitive(Types.StringType.get(), random); int stringCompare = Integer.signum(aString.compareTo(bString)); byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128); byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128); From 1374247cb298bc5ff6a0d6bc1517ed5f830b49e0 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Mon, 31 Jan 2022 16:56:07 -0600 Subject: [PATCH 05/30] Fix JavaDoc --- core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index 750831a9a5e7..e41a5b3be763 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -61,7 +61,7 @@ public static byte[] longToOrderBytes(long val) { /** * IEEE 754 : - * “If two floating-point numbers in the same format are ordered (say, x \< y), + * “If two floating-point numbers in the same format are ordered (say, x {@literal <} y), * they are ordered the same way when their bits are reinterpreted as sign-magnitude integers.” * * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically From 2c48f0cbb5431e088ff06af7adb2a70e76109eea Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Mon, 31 Jan 2022 17:41:05 -0600 Subject: [PATCH 06/30] Add Functions for Smaller Types --- .../apache/iceberg/util/ZOrderByteUtils.java | 18 ++++++++++ .../iceberg/util/TestZOrderByteUtil.java | 34 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index e41a5b3be763..ae68b69b745e 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -59,6 +59,24 @@ public static byte[] longToOrderBytes(long val) { return bytes.array(); } + /** + * Signed shorts are treated the same as the signed ints + */ + public static byte[] shortToOrderBytes(short val) { + ByteBuffer bytes = ByteBuffer.allocate(Short.BYTES); + bytes.putShort((short) (val ^ (0x8000))); + return bytes.array(); + } + + /** + * Signed tiny ints are treated the same as the signed ints + */ + public static byte[] tinyintToOrderedBytes(byte val) { + ByteBuffer bytes = ByteBuffer.allocate(Byte.BYTES); + bytes.put((byte) (val ^ (0x80))); + return bytes.array(); + } + /** * IEEE 754 : * “If two floating-point numbers in the same format are ordered (say, x {@literal <} y), diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java index 17f19ec01af7..81caf0ad0fb3 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java @@ -173,6 +173,40 @@ public void testLongOrdering() { } } + @Test + public void testShortOrdering() { + for (int i = 0; i < NUM_TESTS; i++) { + short aShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1)); + short bShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1)); + int longCompare = Integer.signum(Long.compare(aShort, bShort)); + byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aShort); + byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bShort); + int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); + + Assert.assertEquals(String.format( + "Ordering of longs should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + aShort, bShort, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), + longCompare, byteCompare); + } + } + + @Test + public void testTinyOrdering() { + for (int i = 0; i < NUM_TESTS; i++) { + long aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1)); + long bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1)); + int longCompare = Integer.signum(Long.compare(aByte, bByte)); + byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aByte); + byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bByte); + int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); + + Assert.assertEquals(String.format( + "Ordering of longs should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + aByte, bByte, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), + longCompare, byteCompare); + } + } + @Test public void testFloatOrdering() { for (int i = 0; i < NUM_TESTS; i++) { From 55fa4c8b5e57dd482a4b944770e2ba28bd7c55ea Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Mon, 7 Feb 2022 15:00:45 -0600 Subject: [PATCH 07/30] Updates for reviewer comments --- .../org/apache/iceberg/util/ByteBuffers.java | 10 +++ .../apache/iceberg/util/ZOrderByteUtils.java | 84 +++++++++++-------- .../iceberg/util/TestZOrderByteUtil.java | 50 +++++++---- 3 files changed, 93 insertions(+), 51 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java index 213b222dc507..efc05f179f82 100644 --- a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java +++ b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java @@ -21,6 +21,7 @@ import java.nio.ByteBuffer; import java.util.Arrays; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; public class ByteBuffers { @@ -46,6 +47,15 @@ public static byte[] toByteArray(ByteBuffer buffer) { } } + public static ByteBuffer reuse(ByteBuffer reuse, int length) { + Preconditions.checkArgument(reuse.hasArray() && reuse.arrayOffset() == 0 && reuse.capacity() == length, + "Cannot reuse buffer: Should be an array %s, should have an offset of 0 %s, should be of size %s was %s", + reuse.hasArray(), reuse.arrayOffset(), length, reuse.capacity()); + reuse.position(0); + reuse.limit(length); + return reuse; + } + public static ByteBuffer copy(ByteBuffer buffer) { if (buffer == null) { return null; diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index ae68b69b745e..deab4450a61d 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -44,35 +44,35 @@ private ZOrderByteUtils() { * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially * shifts the 0 value so that we don't break our ordering when we cross the new 0 value. */ - public static byte[] intToOrderedBytes(int val) { - ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES); + public static byte[] intToOrderedBytes(int val, ByteBuffer reuse) { + ByteBuffer bytes = ByteBuffers.reuse(reuse, Integer.BYTES); bytes.putInt(val ^ 0x80000000); return bytes.array(); } /** - * Signed longs are treated the same as the signed ints + * Signed longs are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ - public static byte[] longToOrderBytes(long val) { - ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES); + public static byte[] longToOrderedBytes(long val, ByteBuffer reuse) { + ByteBuffer bytes = ByteBuffers.reuse(reuse, Long.BYTES); bytes.putLong(val ^ 0x8000000000000000L); return bytes.array(); } /** - * Signed shorts are treated the same as the signed ints + * Signed shorts are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ - public static byte[] shortToOrderBytes(short val) { - ByteBuffer bytes = ByteBuffer.allocate(Short.BYTES); + public static byte[] shortToOrderedBytes(short val, ByteBuffer reuse) { + ByteBuffer bytes = ByteBuffers.reuse(reuse, Short.BYTES); bytes.putShort((short) (val ^ (0x8000))); return bytes.array(); } /** - * Signed tiny ints are treated the same as the signed ints + * Signed tiny ints are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ - public static byte[] tinyintToOrderedBytes(byte val) { - ByteBuffer bytes = ByteBuffer.allocate(Byte.BYTES); + public static byte[] tinyintToOrderedBytes(byte val, ByteBuffer reuse) { + ByteBuffer bytes = ByteBuffers.reuse(reuse, Byte.BYTES); bytes.put((byte) (val ^ (0x80))); return bytes.array(); } @@ -85,8 +85,8 @@ public static byte[] tinyintToOrderedBytes(byte val) { * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically * comparable bytes */ - public static byte[] floatToOrderedBytes(float val) { - ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES); + public static byte[] floatToOrderedBytes(float val, ByteBuffer reuse) { + ByteBuffer bytes = ByteBuffers.reuse(reuse, Float.BYTES); int ival = Float.floatToIntBits(val); ival ^= ((ival >> (Integer.SIZE - 1)) | Integer.MIN_VALUE); bytes.putInt(ival); @@ -94,10 +94,10 @@ public static byte[] floatToOrderedBytes(float val) { } /** - * Doubles are treated the same as floats + * Doubles are treated the same as floats in {@link #floatToOrderedBytes(float, ByteBuffer)} */ - public static byte[] doubleToOrderedBytes(double val) { - ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES); + public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) { + ByteBuffer bytes = ByteBuffers.reuse(reuse, Double.BYTES); long lng = Double.doubleToLongBits(val); lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE); bytes.putLong(lng); @@ -108,54 +108,70 @@ public static byte[] doubleToOrderedBytes(double val) { * Strings are lexicographically sortable BUT if different byte array lengths will * ruin the Z-Ordering. (ZOrder requires that a given column contribute the same number of bytes every time). * This implementation just uses a set size to for all output byte representations. Truncating longer strings - * and right padding 0 for shorter strings. + * and right padding 0 for shorter strings. Requires UTF8 (or ASCII) encoding for ordering guarantees to hold. */ - public static byte[] stringToOrderedBytes(String val, int length) { - ByteBuffer bytes = ByteBuffer.allocate(length); + public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reuse) { + ByteBuffer bytes = ByteBuffers.reuse(reuse, length); + Arrays.fill(bytes.array(), 0, length, (byte) 0x00); if (val != null) { int maxLength = Math.min(length, val.length()); + // We may truncate mid-character bytes.put(val.getBytes(), 0, maxLength); } return bytes.array(); } /** - * Interleave bits using a naive loop. - * @param columnsBinary an array of byte arrays, none of which are empty - * @return their bits interleaved + * Interleave bits using a naive loop. Variable length inputs are allowed but to get a consistent ordering it is + * required that every column contribute the same number of bytes in each invocation. Bits are interleaved from all + * columns that have a bit available at that position. Once a Column has no more bits to produce it is skipped in the + * interleaving. + * @param columnsBinary an array of ordered byte representations of the columns being ZOrdered + * @return the columnbytes interleaved */ public static byte[] interleaveBits(byte[][] columnsBinary) { int interleavedSize = Arrays.stream(columnsBinary).mapToInt(a -> a.length).sum(); byte[] interleavedBytes = new byte[interleavedSize]; - int sourceBit = 7; - int sourceByte = 0; int sourceColumn = 0; - int interleaveBit = 7; + int sourceByte = 0; + int sourceBit = 7; int interleaveByte = 0; - while (interleaveByte < interleavedSize) { - // Take what we have, Get the source Bit of the source Byte, move it to the interleaveBit position - interleavedBytes[interleaveByte] = - (byte) (interleavedBytes[interleaveByte] | - (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >> sourceBit << interleaveBit); + int interleaveBit = 7; + while (interleaveByte < interleavedSize) { + // Take the source bit from source byte and move it to the output bit position + interleavedBytes[interleaveByte] |= + (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >>> sourceBit << interleaveBit; --interleaveBit; + + // Check if an output byte has been completed if (interleaveBit == -1) { - // Finished a byte in our interleave byte array start a new byte + // Move to the next output byte interleaveByte++; + // Move to the highest order bit of the new output byte interleaveBit = 7; } - // Find next column with a byte we can use + // Check if the last output byte has been completed + if (interleaveByte == interleavedSize) { + break; + } + + // Find the next source bit to interleave do { + // Move to next column ++sourceColumn; if (sourceColumn == columnsBinary.length) { + // If the last source column was used, reset to next bit of first column sourceColumn = 0; - if (--sourceBit == -1) { + --sourceBit; + if (sourceBit == -1) { + // If the last bit of the source byte was used, reset to the highest bit of the next byte sourceByte++; sourceBit = 7; } } - } while (columnsBinary[sourceColumn].length <= sourceByte && interleaveByte < interleavedSize); + } while (columnsBinary[sourceColumn].length <= sourceByte); } return interleavedBytes; } diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java index 81caf0ad0fb3..e2ff29d76c3a 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java @@ -20,6 +20,7 @@ package org.apache.iceberg.util; +import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Random; import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes; @@ -36,6 +37,7 @@ public class TestZOrderByteUtil { private static final byte OOOOOOOO = (byte) 0; private static final int NUM_TESTS = 100000; + private static final int NUM_INTERLEAVE_TESTS = 1000; private final Random random = new Random(42); @@ -84,7 +86,7 @@ private String interleaveStrings(String[] strings) { */ @Test public void testInterleaveRandomExamples() { - for (int test = 0; test < NUM_TESTS; test++) { + for (int test = 0; test < NUM_INTERLEAVE_TESTS; test++) { int numByteArrays = Math.abs(random.nextInt(6)) + 1; byte[][] testBytes = new byte[numByteArrays][]; String[] testStrings = new String[numByteArrays]; @@ -141,12 +143,14 @@ public void testInterleaveMixedBits() { @Test public void testIntOrdering() { + ByteBuffer aBuffer = ByteBuffer.allocate(Integer.BYTES); + ByteBuffer bBuffer = ByteBuffer.allocate(Integer.BYTES); for (int i = 0; i < NUM_TESTS; i++) { int aInt = random.nextInt(); int bInt = random.nextInt(); int intCompare = Integer.signum(Integer.compare(aInt, bInt)); - byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt); - byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt); + byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt, aBuffer); + byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt, bBuffer); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -158,12 +162,14 @@ public void testIntOrdering() { @Test public void testLongOrdering() { + ByteBuffer aBuffer = ByteBuffer.allocate(Long.BYTES); + ByteBuffer bBuffer = ByteBuffer.allocate(Long.BYTES); for (int i = 0; i < NUM_TESTS; i++) { long aLong = random.nextInt(); long bLong = random.nextInt(); int longCompare = Integer.signum(Long.compare(aLong, bLong)); - byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aLong); - byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bLong); + byte[] aBytes = ZOrderByteUtils.longToOrderedBytes(aLong, aBuffer); + byte[] bBytes = ZOrderByteUtils.longToOrderedBytes(bLong, bBuffer); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -175,12 +181,14 @@ public void testLongOrdering() { @Test public void testShortOrdering() { + ByteBuffer aBuffer = ByteBuffer.allocate(Short.BYTES); + ByteBuffer bBuffer = ByteBuffer.allocate(Short.BYTES); for (int i = 0; i < NUM_TESTS; i++) { short aShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1)); short bShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1)); int longCompare = Integer.signum(Long.compare(aShort, bShort)); - byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aShort); - byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bShort); + byte[] aBytes = ZOrderByteUtils.shortToOrderedBytes(aShort, aBuffer); + byte[] bBytes = ZOrderByteUtils.shortToOrderedBytes(bShort, bBuffer); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -192,12 +200,14 @@ public void testShortOrdering() { @Test public void testTinyOrdering() { + ByteBuffer aBuffer = ByteBuffer.allocate(Byte.BYTES); + ByteBuffer bBuffer = ByteBuffer.allocate(Byte.BYTES); for (int i = 0; i < NUM_TESTS; i++) { - long aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1)); - long bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1)); + byte aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1)); + byte bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1)); int longCompare = Integer.signum(Long.compare(aByte, bByte)); - byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aByte); - byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bByte); + byte[] aBytes = ZOrderByteUtils.tinyintToOrderedBytes(aByte, aBuffer); + byte[] bBytes = ZOrderByteUtils.tinyintToOrderedBytes(bByte, bBuffer); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -209,12 +219,14 @@ public void testTinyOrdering() { @Test public void testFloatOrdering() { + ByteBuffer aBuffer = ByteBuffer.allocate(Float.BYTES); + ByteBuffer bBuffer = ByteBuffer.allocate(Float.BYTES); for (int i = 0; i < NUM_TESTS; i++) { float aFloat = random.nextFloat(); float bFloat = random.nextFloat(); int floatCompare = Integer.signum(Float.compare(aFloat, bFloat)); - byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat); - byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat); + byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat, aBuffer); + byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat, bBuffer); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -226,12 +238,14 @@ public void testFloatOrdering() { @Test public void testDoubleOrdering() { + ByteBuffer aBuffer = ByteBuffer.allocate(Double.BYTES); + ByteBuffer bBuffer = ByteBuffer.allocate(Double.BYTES); for (int i = 0; i < NUM_TESTS; i++) { double aDouble = random.nextDouble(); double bDouble = random.nextDouble(); int doubleCompare = Integer.signum(Double.compare(aDouble, bDouble)); - byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble); - byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble); + byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble, aBuffer); + byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble, bBuffer); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -243,12 +257,14 @@ public void testDoubleOrdering() { @Test public void testStringOrdering() { + ByteBuffer aBuffer = ByteBuffer.allocate(128); + ByteBuffer bBuffer = ByteBuffer.allocate(128); for (int i = 0; i < NUM_TESTS; i++) { String aString = (String) RandomUtil.generatePrimitive(Types.StringType.get(), random); String bString = (String) RandomUtil.generatePrimitive(Types.StringType.get(), random); int stringCompare = Integer.signum(aString.compareTo(bString)); - byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128); - byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128); + byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128, aBuffer); + byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128, bBuffer); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( From 8c7eef7ce7c50dcb447141d39d5ca08713b4a1d4 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Mon, 7 Feb 2022 19:45:49 -0600 Subject: [PATCH 08/30] Specify Output Size --- .../org/apache/iceberg/util/ZOrderByteUtils.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index deab4450a61d..f4d28572be84 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -121,16 +121,24 @@ public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reu return bytes.array(); } + /** + * For Testing interleave all available bytes + */ + static byte[] interleaveBits(byte[][] columnsBinary) { + return interleaveBits(columnsBinary, + Arrays.stream(columnsBinary).mapToInt(column -> column.length).max().getAsInt()); + } + /** * Interleave bits using a naive loop. Variable length inputs are allowed but to get a consistent ordering it is * required that every column contribute the same number of bytes in each invocation. Bits are interleaved from all * columns that have a bit available at that position. Once a Column has no more bits to produce it is skipped in the * interleaving. * @param columnsBinary an array of ordered byte representations of the columns being ZOrdered + * @param interleavedSize the number of bytes to use in the output * @return the columnbytes interleaved */ - public static byte[] interleaveBits(byte[][] columnsBinary) { - int interleavedSize = Arrays.stream(columnsBinary).mapToInt(a -> a.length).sum(); + public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) { byte[] interleavedBytes = new byte[interleavedSize]; int sourceColumn = 0; int sourceByte = 0; From 62a74b9f39afac90c629d261d956a3e7a7d5db24 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Tue, 8 Feb 2022 08:48:29 -0600 Subject: [PATCH 09/30] Fix Encoding Also a patch for the test interleave method length calculation --- .../main/java/org/apache/iceberg/util/ZOrderByteUtils.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index f4d28572be84..52180cac7a2d 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -20,6 +20,7 @@ package org.apache.iceberg.util; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; /** @@ -108,7 +109,7 @@ public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) { * Strings are lexicographically sortable BUT if different byte array lengths will * ruin the Z-Ordering. (ZOrder requires that a given column contribute the same number of bytes every time). * This implementation just uses a set size to for all output byte representations. Truncating longer strings - * and right padding 0 for shorter strings. Requires UTF8 (or ASCII) encoding for ordering guarantees to hold. + * and right padding 0 for shorter strings. */ public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reuse) { ByteBuffer bytes = ByteBuffers.reuse(reuse, length); @@ -116,7 +117,7 @@ public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reu if (val != null) { int maxLength = Math.min(length, val.length()); // We may truncate mid-character - bytes.put(val.getBytes(), 0, maxLength); + bytes.put(val.getBytes(StandardCharsets.UTF_8), 0, maxLength); } return bytes.array(); } @@ -126,7 +127,7 @@ public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reu */ static byte[] interleaveBits(byte[][] columnsBinary) { return interleaveBits(columnsBinary, - Arrays.stream(columnsBinary).mapToInt(column -> column.length).max().getAsInt()); + Arrays.stream(columnsBinary).mapToInt(column -> column.length).sum()); } /** From 0bdabea0bb5fa4addec9c8e72c8662ef8f1e79a6 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Tue, 8 Feb 2022 11:58:08 -0600 Subject: [PATCH 10/30] Methods return ByteBuffers, Strings are efit into our buffer using CharsetEncoder.encode --- .../apache/iceberg/util/ZOrderByteUtils.java | 32 ++++++++++--------- .../iceberg/util/TestZOrderByteUtil.java | 31 ++++++++++-------- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index 52180cac7a2d..967aa0bf7c5c 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -20,6 +20,8 @@ package org.apache.iceberg.util; import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetEncoder; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -45,37 +47,37 @@ private ZOrderByteUtils() { * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially * shifts the 0 value so that we don't break our ordering when we cross the new 0 value. */ - public static byte[] intToOrderedBytes(int val, ByteBuffer reuse) { + public static ByteBuffer intToOrderedBytes(int val, ByteBuffer reuse) { ByteBuffer bytes = ByteBuffers.reuse(reuse, Integer.BYTES); bytes.putInt(val ^ 0x80000000); - return bytes.array(); + return bytes; } /** * Signed longs are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ - public static byte[] longToOrderedBytes(long val, ByteBuffer reuse) { + public static ByteBuffer longToOrderedBytes(long val, ByteBuffer reuse) { ByteBuffer bytes = ByteBuffers.reuse(reuse, Long.BYTES); bytes.putLong(val ^ 0x8000000000000000L); - return bytes.array(); + return bytes; } /** * Signed shorts are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ - public static byte[] shortToOrderedBytes(short val, ByteBuffer reuse) { + public static ByteBuffer shortToOrderedBytes(short val, ByteBuffer reuse) { ByteBuffer bytes = ByteBuffers.reuse(reuse, Short.BYTES); bytes.putShort((short) (val ^ (0x8000))); - return bytes.array(); + return bytes; } /** * Signed tiny ints are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ - public static byte[] tinyintToOrderedBytes(byte val, ByteBuffer reuse) { + public static ByteBuffer tinyintToOrderedBytes(byte val, ByteBuffer reuse) { ByteBuffer bytes = ByteBuffers.reuse(reuse, Byte.BYTES); bytes.put((byte) (val ^ (0x80))); - return bytes.array(); + return bytes; } /** @@ -86,23 +88,23 @@ public static byte[] tinyintToOrderedBytes(byte val, ByteBuffer reuse) { * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically * comparable bytes */ - public static byte[] floatToOrderedBytes(float val, ByteBuffer reuse) { + public static ByteBuffer floatToOrderedBytes(float val, ByteBuffer reuse) { ByteBuffer bytes = ByteBuffers.reuse(reuse, Float.BYTES); int ival = Float.floatToIntBits(val); ival ^= ((ival >> (Integer.SIZE - 1)) | Integer.MIN_VALUE); bytes.putInt(ival); - return bytes.array(); + return bytes; } /** * Doubles are treated the same as floats in {@link #floatToOrderedBytes(float, ByteBuffer)} */ - public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) { + public static ByteBuffer doubleToOrderedBytes(double val, ByteBuffer reuse) { ByteBuffer bytes = ByteBuffers.reuse(reuse, Double.BYTES); long lng = Double.doubleToLongBits(val); lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE); bytes.putLong(lng); - return bytes.array(); + return bytes; } /** @@ -111,15 +113,15 @@ public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) { * This implementation just uses a set size to for all output byte representations. Truncating longer strings * and right padding 0 for shorter strings. */ - public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reuse) { + public static ByteBuffer stringToOrderedBytes(String val, int length, ByteBuffer reuse, CharsetEncoder encoder) { ByteBuffer bytes = ByteBuffers.reuse(reuse, length); Arrays.fill(bytes.array(), 0, length, (byte) 0x00); if (val != null) { int maxLength = Math.min(length, val.length()); // We may truncate mid-character - bytes.put(val.getBytes(StandardCharsets.UTF_8), 0, maxLength); + encoder.encode(CharBuffer.wrap(val), bytes, true); } - return bytes.array(); + return bytes; } /** diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java index e2ff29d76c3a..bf84319d0d45 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java @@ -21,6 +21,8 @@ package org.apache.iceberg.util; import java.nio.ByteBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes; @@ -149,8 +151,8 @@ public void testIntOrdering() { int aInt = random.nextInt(); int bInt = random.nextInt(); int intCompare = Integer.signum(Integer.compare(aInt, bInt)); - byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt, aBuffer); - byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt, bBuffer); + byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt, aBuffer).array(); + byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt, bBuffer).array(); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -168,8 +170,8 @@ public void testLongOrdering() { long aLong = random.nextInt(); long bLong = random.nextInt(); int longCompare = Integer.signum(Long.compare(aLong, bLong)); - byte[] aBytes = ZOrderByteUtils.longToOrderedBytes(aLong, aBuffer); - byte[] bBytes = ZOrderByteUtils.longToOrderedBytes(bLong, bBuffer); + byte[] aBytes = ZOrderByteUtils.longToOrderedBytes(aLong, aBuffer).array(); + byte[] bBytes = ZOrderByteUtils.longToOrderedBytes(bLong, bBuffer).array(); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -187,8 +189,8 @@ public void testShortOrdering() { short aShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1)); short bShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1)); int longCompare = Integer.signum(Long.compare(aShort, bShort)); - byte[] aBytes = ZOrderByteUtils.shortToOrderedBytes(aShort, aBuffer); - byte[] bBytes = ZOrderByteUtils.shortToOrderedBytes(bShort, bBuffer); + byte[] aBytes = ZOrderByteUtils.shortToOrderedBytes(aShort, aBuffer).array(); + byte[] bBytes = ZOrderByteUtils.shortToOrderedBytes(bShort, bBuffer).array(); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -206,8 +208,8 @@ public void testTinyOrdering() { byte aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1)); byte bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1)); int longCompare = Integer.signum(Long.compare(aByte, bByte)); - byte[] aBytes = ZOrderByteUtils.tinyintToOrderedBytes(aByte, aBuffer); - byte[] bBytes = ZOrderByteUtils.tinyintToOrderedBytes(bByte, bBuffer); + byte[] aBytes = ZOrderByteUtils.tinyintToOrderedBytes(aByte, aBuffer).array(); + byte[] bBytes = ZOrderByteUtils.tinyintToOrderedBytes(bByte, bBuffer).array(); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -225,8 +227,8 @@ public void testFloatOrdering() { float aFloat = random.nextFloat(); float bFloat = random.nextFloat(); int floatCompare = Integer.signum(Float.compare(aFloat, bFloat)); - byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat, aBuffer); - byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat, bBuffer); + byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat, aBuffer).array(); + byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat, bBuffer).array(); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -244,8 +246,8 @@ public void testDoubleOrdering() { double aDouble = random.nextDouble(); double bDouble = random.nextDouble(); int doubleCompare = Integer.signum(Double.compare(aDouble, bDouble)); - byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble, aBuffer); - byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble, bBuffer); + byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble, aBuffer).array(); + byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble, bBuffer).array(); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( @@ -257,14 +259,15 @@ public void testDoubleOrdering() { @Test public void testStringOrdering() { + CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder(); ByteBuffer aBuffer = ByteBuffer.allocate(128); ByteBuffer bBuffer = ByteBuffer.allocate(128); for (int i = 0; i < NUM_TESTS; i++) { String aString = (String) RandomUtil.generatePrimitive(Types.StringType.get(), random); String bString = (String) RandomUtil.generatePrimitive(Types.StringType.get(), random); int stringCompare = Integer.signum(aString.compareTo(bString)); - byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128, aBuffer); - byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128, bBuffer); + byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128, aBuffer, encoder).array(); + byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128, bBuffer, encoder).array(); int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); Assert.assertEquals(String.format( From 1e7e660c671d38fac18dffaf3a8fbd614c7c2605 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Tue, 8 Feb 2022 21:49:55 -0600 Subject: [PATCH 11/30] Remove unused string length --- .../java/org/apache/iceberg/util/ZOrderByteUtils.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index 967aa0bf7c5c..3ec4c0f430f4 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -24,6 +24,7 @@ import java.nio.charset.CharsetEncoder; import java.nio.charset.StandardCharsets; import java.util.Arrays; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; /** * Within Z-Ordering the byte representations of objects being compared must be ordered, @@ -114,12 +115,14 @@ public static ByteBuffer doubleToOrderedBytes(double val, ByteBuffer reuse) { * and right padding 0 for shorter strings. */ public static ByteBuffer stringToOrderedBytes(String val, int length, ByteBuffer reuse, CharsetEncoder encoder) { + Preconditions.checkArgument(encoder.charset().equals(StandardCharsets.UTF_8), + "Cannot use an encoder not using UTF_8 as it's Charset"); + ByteBuffer bytes = ByteBuffers.reuse(reuse, length); Arrays.fill(bytes.array(), 0, length, (byte) 0x00); if (val != null) { - int maxLength = Math.min(length, val.length()); - // We may truncate mid-character - encoder.encode(CharBuffer.wrap(val), bytes, true); + CharBuffer inputBuffer = CharBuffer.wrap(val); + encoder.encode(inputBuffer, bytes, true); } return bytes; } From 41d855c46db008fe2b1cab92c33f818d9cf6bf8d Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Tue, 8 Feb 2022 22:05:29 -0600 Subject: [PATCH 12/30] Update docs --- .../main/java/org/apache/iceberg/util/ZOrderByteUtils.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index 3ec4c0f430f4..b008461ea8ca 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -30,10 +30,12 @@ * Within Z-Ordering the byte representations of objects being compared must be ordered, * this requires several types to be transformed when converted to bytes. The goal is to * map object's whose byte representation are not lexicographically ordered into representations - * that are lexicographically ordered. + * that are lexicographically ordered. Bytes produced should be compared lexicographically as + * unsigned bytes, big-endian. + *

* Most of these techniques are derived from * https://aws.amazon.com/blogs/database/z-order-indexing-for-multifaceted-queries-in-amazon-dynamodb-part-2/ - * + *

* Some implementation is taken from * https://github.com/apache/hbase/blob/master/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java */ From 2dfad579939f1638616485d3320758b62a55d15c Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Mon, 24 Jan 2022 17:09:34 -0600 Subject: [PATCH 13/30] Spark: Rewrite Datafiles Implementation Using ZOrder Use Spark UDFs to create a Z-Value column and then invoke a Spark Sort on it. The resultant data is then saved without the Z-Value Column. --- .../iceberg/actions/RewriteDataFiles.java | 9 + .../BaseRewriteDataFilesSparkAction.java | 11 + .../spark/actions/Spark3ZOrderStrategy.java | 249 ++++++++++++++++++ .../spark/actions/SparkSortStrategy.java | 4 + .../actions/TestRewriteDataFilesAction.java | 126 +++++++++ 5 files changed, 399 insertions(+) create mode 100644 spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java diff --git a/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java b/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java index f00596fa46e8..4ed57716603a 100644 --- a/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java +++ b/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java @@ -129,6 +129,15 @@ default RewriteDataFiles sort(SortOrder sortOrder) { throw new UnsupportedOperationException("SORT Rewrite Strategy not implemented for this framework"); } + /** + * Choose Z-ORDER as a strategy for this rewrite operation with a specified list of columns to use + * @param columns Columns to be used to generate Z-Values + * @return this for method chaining + */ + default RewriteDataFiles zOrder(String... columns) { + throw new UnsupportedOperationException("Z-ORDER Rewrite Strategy not implemented for this framework"); + } + /** * A user provided filter for determining which files will be considered by the rewrite strategy. This will be used * in addition to whatever rules the rewrite strategy generates. For example this would be used for providing a diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java index 5350e729c8ea..62cb5b174d43 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java @@ -106,6 +106,11 @@ protected RewriteDataFiles self() { return this; } + /** + * The framework specific ZOrder Strategy + */ + protected abstract SortStrategy zOrderStrategy(String... columnNames); + @Override public RewriteDataFiles binPack() { Preconditions.checkArgument(this.strategy == null, @@ -130,6 +135,12 @@ public RewriteDataFiles sort() { return this; } + @Override + public RewriteDataFiles zOrder(String... columnNames) { + this.strategy = zOrderStrategy(columnNames); + return this; + } + @Override public RewriteDataFiles filter(Expression expression) { filter = Expressions.and(filter, expression); diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java new file mode 100644 index 000000000000..6e9354cf605c --- /dev/null +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.spark.actions; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.iceberg.DataFile; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.NullOrder; +import org.apache.iceberg.PartitionField; +import org.apache.iceberg.Schema; +import org.apache.iceberg.SortDirection; +import org.apache.iceberg.Table; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.spark.FileRewriteCoordinator; +import org.apache.iceberg.spark.FileScanTaskSetManager; +import org.apache.iceberg.spark.SparkDistributionAndOrderingUtil; +import org.apache.iceberg.spark.SparkReadOptions; +import org.apache.iceberg.spark.SparkWriteOptions; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.NestedField; +import org.apache.iceberg.util.SortOrderUtil; +import org.apache.iceberg.util.ZOrderByteUtils; +import org.apache.spark.sql.Column; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan; +import org.apache.spark.sql.connector.distributions.Distribution; +import org.apache.spark.sql.connector.distributions.Distributions; +import org.apache.spark.sql.connector.expressions.SortOrder; +import org.apache.spark.sql.expressions.UserDefinedFunction; +import org.apache.spark.sql.functions; +import org.apache.spark.sql.internal.SQLConf; +import org.apache.spark.sql.types.BinaryType; +import org.apache.spark.sql.types.BooleanType; +import org.apache.spark.sql.types.ByteType; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.DateType; +import org.apache.spark.sql.types.DoubleType; +import org.apache.spark.sql.types.FloatType; +import org.apache.spark.sql.types.IntegerType; +import org.apache.spark.sql.types.LongType; +import org.apache.spark.sql.types.ShortType; +import org.apache.spark.sql.types.StringType; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.TimestampType; +import scala.collection.Seq; + +public class Spark3ZOrderStrategy extends Spark3SortStrategy { + private static final String Z_COLUMN = "ICEZVALUE"; + private static final Schema Z_SCHEMA = new Schema(NestedField.required(0, Z_COLUMN, Types.BinaryType.get())); + private static final org.apache.iceberg.SortOrder Z_SORT_ORDER = org.apache.iceberg.SortOrder.builderFor(Z_SCHEMA) + .sortBy(Z_COLUMN, SortDirection.ASC, NullOrder.NULLS_LAST) + .build(); + private static final int STRING_KEY_LENGTH = 60; + + private final List zOrderColNames; + private final FileScanTaskSetManager manager = FileScanTaskSetManager.get(); + private final FileRewriteCoordinator rewriteCoordinator = FileRewriteCoordinator.get(); + + public Spark3ZOrderStrategy(Table table, SparkSession spark, List zOrderColNames) { + super(table, spark); + + Stream identityPartitionColumns = table.spec().fields().stream() + .filter(f -> f.transform().isIdentity()) + .map(PartitionField::name); + List partZOrderCols = identityPartitionColumns + .filter(zOrderColNames::contains) + .collect(Collectors.toList()); + Preconditions.checkArgument(partZOrderCols.isEmpty(), + "Cannot ZOrder on an Identity partition column as these values are constant within a partition, " + + "ZOrdering requested on %s", + partZOrderCols); + + this.zOrderColNames = zOrderColNames; + } + + @Override + public String name() { + return "Z-ORDER"; + } + + @Override + protected void validateOptions() { + // TODO implement Zorder Strategy in API Module + return; + } + + @Override + public Set rewriteFiles(List filesToRewrite) { + String groupID = UUID.randomUUID().toString(); + boolean requiresRepartition = !filesToRewrite.get(0).spec().equals(table().spec()); + + SortOrder[] ordering; + if (requiresRepartition) { + ordering = SparkDistributionAndOrderingUtil.convert(SortOrderUtil.buildSortOrder(table(), sortOrder())); + } else { + ordering = SparkDistributionAndOrderingUtil.convert(sortOrder()); + } + + Distribution distribution = Distributions.ordered(ordering); + + try { + manager.stageTasks(table(), groupID, filesToRewrite); + + // Disable Adaptive Query Execution as this may change the output partitioning of our write + SparkSession cloneSession = spark().cloneSession(); + cloneSession.conf().set(SQLConf.ADAPTIVE_EXECUTION_ENABLED().key(), false); + + // Reset Shuffle Partitions for our sort + long numOutputFiles = numOutputFiles((long) (inputFileSize(filesToRewrite) * sizeEstimateMultiple())); + cloneSession.conf().set(SQLConf.SHUFFLE_PARTITIONS().key(), Math.max(1, numOutputFiles)); + + Dataset scanDF = cloneSession.read().format("iceberg") + .option(SparkReadOptions.FILE_SCAN_TASK_SET_ID, groupID) + .load(table().name()); + + Column[] originalColumns = Arrays.stream(scanDF.schema().names()) + .map(n -> functions.col(n)) + .toArray(Column[]::new); + + List zOrderColumns = zOrderColNames.stream() + .map(scanDF.schema()::apply) + .collect(Collectors.toList()); + + Column zvalueArray = functions.array(zOrderColumns.stream().map(colStruct -> + SparkZOrder.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType()) + ).toArray(Column[]::new)); + + Dataset zvalueDF = scanDF.withColumn(Z_COLUMN, SparkZOrder.interleaveBytes(zvalueArray)); + + SQLConf sqlConf = cloneSession.sessionState().conf(); + LogicalPlan sortPlan = sortPlan(distribution, ordering, zvalueDF.logicalPlan(), sqlConf); + Dataset sortedDf = new Dataset<>(cloneSession, sortPlan, zvalueDF.encoder()); + sortedDf + .select(originalColumns) + .write() + .format("iceberg") + .option(SparkWriteOptions.REWRITTEN_FILE_SCAN_TASK_SET_ID, groupID) + .option(SparkWriteOptions.TARGET_FILE_SIZE_BYTES, writeMaxFileSize()) + .option(SparkWriteOptions.USE_TABLE_DISTRIBUTION_AND_ORDERING, "false") + .mode("append") + .save(table().name()); + + return rewriteCoordinator.fetchNewDataFiles(table(), groupID); + } finally { + manager.removeTasks(table(), groupID); + rewriteCoordinator.clearRewrite(table(), groupID); + } + } + + @Override + protected org.apache.iceberg.SortOrder sortOrder() { + return Z_SORT_ORDER; + } + + static class SparkZOrder { + + static byte[] interleaveBits(Seq scalaBinary) { + byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary) + .toArray(new byte[scalaBinary.size()][]); + return ZOrderByteUtils.interleaveBits(columnsBinary); + } + + private static final UserDefinedFunction FLOAT_TO_BYTES = + functions.udf((Float f) -> ByteBuffer.allocate(4).putFloat(f).array(), DataTypes.BinaryType); + + private static final UserDefinedFunction DOUBLE_TO_BYTES = + functions.udf((Double d) -> ByteBuffer.allocate(8).putDouble(d).array(), DataTypes.BinaryType); + + private static UserDefinedFunction getLexicalBytesIntLike(int size) { + return functions.udf((byte[] binary) -> ZOrderByteUtils.orderIntLikeBytes(binary, size), DataTypes.BinaryType) + .withName("INT-LIKE-LEXICAL-BYTES"); + } + + private static UserDefinedFunction getLexicalBytesFloatLike(int size) { + return functions.udf((byte[] binary) -> ZOrderByteUtils.orderFloatLikeBytes(binary, size), DataTypes.BinaryType) + .withName("FLOAT-LIKE-LEXICAL-BYTES"); + } + + private static UserDefinedFunction getLexicalBytesUTF8Like(int size) { + return functions.udf((byte[] binary) -> ZOrderByteUtils.orderUTF8LikeBytes(binary, size), DataTypes.BinaryType) + .withName("UTF8-LIKE-LEXICAL-BYTES"); + } + + private static final UserDefinedFunction INTERLEAVE_UDF = + functions.udf((Seq arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType) + .withName("INTERLEAVE_BYTES"); + + static Column interleaveBytes(Column arrayBinary) { + return INTERLEAVE_UDF.apply(arrayBinary); + } + + @SuppressWarnings("checkstyle:CyclomaticComplexity") + static Column sortedLexicographically(Column column, DataType type) { + if (type instanceof ByteType) { + return column.cast(DataTypes.BinaryType); + } else if (type instanceof ShortType) { + return getLexicalBytesIntLike(2).apply(column.cast(DataTypes.BinaryType)); + } else if (type instanceof IntegerType) { + return getLexicalBytesIntLike(4).apply(column.cast(DataTypes.BinaryType)); + } else if (type instanceof LongType) { + return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.BinaryType)); + } else if (type instanceof FloatType) { + return getLexicalBytesFloatLike(4).apply(FLOAT_TO_BYTES.apply(column)); + } else if (type instanceof DoubleType) { + return getLexicalBytesFloatLike(8).apply(DOUBLE_TO_BYTES.apply(column)); + } else if (type instanceof StringType) { + return getLexicalBytesUTF8Like(STRING_KEY_LENGTH).apply(column.cast(DataTypes.BinaryType)); + } else if (type instanceof BinaryType) { + return getLexicalBytesUTF8Like(STRING_KEY_LENGTH).apply(column); + } else if (type instanceof BooleanType) { + return getLexicalBytesUTF8Like(1).apply(column.cast(DataTypes.BinaryType)); + } else if (type instanceof TimestampType) { + return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.LongType).cast(DataTypes.BinaryType)); + } else if (type instanceof DateType) { + return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.LongType).cast(DataTypes.BinaryType)); + } else { + throw new IllegalArgumentException( + String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported", + column, type)); + } + } + } +} diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java index 832ff255579c..d4823560bf17 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java @@ -155,4 +155,8 @@ protected SparkSession spark() { protected LogicalPlan sortPlan(Distribution distribution, SortOrder[] ordering, LogicalPlan plan, SQLConf conf) { return DistributionAndOrderingUtils$.MODULE$.prepareQuery(distribution, ordering, plan, conf); } + + protected double sizeEstimateMultiple() { + return this.sizeEstimateMultiple; + } } diff --git a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java index 1d8695053123..f6f3004aecd0 100644 --- a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java +++ b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java @@ -62,6 +62,7 @@ import org.apache.iceberg.encryption.EncryptedOutputFile; import org.apache.iceberg.encryption.EncryptionKeyMetadata; import org.apache.iceberg.exceptions.CommitStateUnknownException; +import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.hadoop.HadoopTables; import org.apache.iceberg.io.CloseableIterable; @@ -95,6 +96,10 @@ import org.mockito.Mockito; import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.apache.spark.sql.functions.current_date; +import static org.apache.spark.sql.functions.date_add; +import static org.apache.spark.sql.functions.expr; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.Mockito.doAnswer; @@ -1032,6 +1037,83 @@ public void testCommitStateUnknownException() { shouldHaveSnapshots(table, 2); // Commit actually Succeeded } + @Test + public void testZOrderSort() { + int originalFiles = 20; + Table table = createTable(originalFiles); + shouldHaveLastCommitUnsorted(table, "c2"); + shouldHaveFiles(table, originalFiles); + + List originalData = currentData(); + double originalFilesC2 = percentFilesRequired(table, "c2", "foo23"); + double originalFilesC3 = percentFilesRequired(table, "c3", "bar21"); + double originalFilesC2C3 = percentFilesRequired(table, new String[]{"c2", "c3"}, new String[]{"foo23", "bar23"}); + + Assert.assertTrue("Should require all files to scan c2", originalFilesC2 > 0.99); + Assert.assertTrue("Should require all files to scan c3", originalFilesC3 > 0.99); + + RewriteDataFiles.Result result = + basicRewrite(table) + .zOrder("c2", "c3") + .option(SortStrategy.MAX_FILE_SIZE_BYTES, Integer.toString((averageFileSize(table) / 2) + 2)) + // Divide files in 2 + .option(RewriteDataFiles.TARGET_FILE_SIZE_BYTES, Integer.toString(averageFileSize(table) / 2)) + .option(SortStrategy.MIN_INPUT_FILES, "1") + .execute(); + + Assert.assertEquals("Should have 1 fileGroups", 1, result.rewriteResults().size()); + int zOrderedFilesTotal = Iterables.size(table.currentSnapshot().addedFiles()); + Assert.assertTrue("Should have written 40+ files", zOrderedFilesTotal >= 40); + + table.refresh(); + + List postRewriteData = currentData(); + assertEquals("We shouldn't have changed the data", originalData, postRewriteData); + + shouldHaveSnapshots(table, 2); + shouldHaveACleanCache(table); + + double filesScannedC2 = percentFilesRequired(table, "c2", "foo23"); + double filesScannedC3 = percentFilesRequired(table, "c3", "bar21"); + double filesScannedC2C3 = percentFilesRequired(table, new String[]{"c2", "c3"}, new String[]{"foo23", "bar23"}); + + Assert.assertTrue("Should have reduced the number of files required for c2", + filesScannedC2 < originalFilesC2); + Assert.assertTrue("Should have reduced the number of files required for c3", + filesScannedC3 < originalFilesC3); + Assert.assertTrue("Should have reduced the number of files required for a c2,c3 predicate", + filesScannedC2C3 < originalFilesC2C3); + } + + @Test + public void testZOrderAllTypesSort() { + Table table = createTypeTestTable(); + shouldHaveFiles(table, 10); + + List originalRaw = spark.read().format("iceberg").load(tableLocation).sort("longCol").collectAsList(); + List originalData = rowsToJava(originalRaw); + + RewriteDataFiles.Result result = + basicRewrite(table) + .zOrder("longCol", "intCol", "floatCol", "doubleCol", "dateCol", "timestampCol", "stringCol") + .option(SortStrategy.MIN_INPUT_FILES, "1") + .option(SortStrategy.REWRITE_ALL, "true") + .execute(); + + Assert.assertEquals("Should have 1 fileGroups", 1, result.rewriteResults().size()); + int zOrderedFilesTotal = Iterables.size(table.currentSnapshot().addedFiles()); + Assert.assertEquals("Should have written 1 file", 1, zOrderedFilesTotal); + + table.refresh(); + + List postRaw = spark.read().format("iceberg").load(tableLocation).sort("longCol").collectAsList(); + List postRewriteData = rowsToJava(postRaw); + assertEquals("We shouldn't have changed the data", originalData, postRewriteData); + + shouldHaveSnapshots(table, 2); + shouldHaveACleanCache(table); + } + @Test public void testInvalidAPIUsage() { Table table = createTable(1); @@ -1327,6 +1409,35 @@ protected Table createTablePartitioned(int partitions, int files) { return createTablePartitioned(partitions, files, SCALE, Maps.newHashMap()); } + private Table createTypeTestTable() { + Schema schema = new Schema( + required(1, "longCol", Types.LongType.get()), + required(2, "intCol", Types.IntegerType.get()), + required(3, "floatCol", Types.FloatType.get()), + optional(4, "doubleCol", Types.DoubleType.get()), + optional(6, "dateCol", Types.DateType.get()), + optional(7, "timestampCol", Types.TimestampType.withZone()), + optional(8, "stringCol", Types.StringType.get())); + + Map options = Maps.newHashMap(); + Table table = TABLES.create(schema, PartitionSpec.unpartitioned(), options, tableLocation); + + spark.range(0, 10, 1, 10) + .withColumnRenamed("id", "longCol") + .withColumn("intCol", expr("CAST(longCol AS INT)")) + .withColumn("floatCol", expr("CAST(longCol AS FLOAT)")) + .withColumn("doubleCol", expr("CAST(longCol AS DOUBLE)")) + .withColumn("dateCol", date_add(current_date(), 1)) + .withColumn("timestampCol", expr("TO_TIMESTAMP(dateCol)")) + .withColumn("stringCol", expr("CAST(dateCol AS STRING)")) + .write() + .format("iceberg") + .mode("append") + .save(tableLocation); + + return table; + } + protected int averageFileSize(Table table) { table.refresh(); return (int) Streams.stream(table.newScan().planFiles()).mapToLong(FileScanTask::length).average().getAsDouble(); @@ -1412,6 +1523,21 @@ private Set cacheContents(Table table) { .build(); } + private double percentFilesRequired(Table table, String col, String value) { + return percentFilesRequired(table, new String[]{col}, new String[]{value}); + } + + private double percentFilesRequired(Table table, String[] cols, String[] values) { + Preconditions.checkArgument(cols.length == values.length); + Expression restriction = Expressions.alwaysTrue(); + for (int i = 0; i < cols.length; i++) { + restriction = Expressions.and(restriction, Expressions.equal(cols[i], values[i])); + } + int totalFiles = Iterables.size(table.newScan().planFiles()); + int filteredFiles = Iterables.size(table.newScan().filter(restriction).planFiles()); + return (double) filteredFiles / (double) totalFiles; + } + class GroupInfoMatcher implements ArgumentMatcher { private final Set groupIDs; From a3e854341eed24c862dc1b2aa81839afea625b64 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Tue, 25 Jan 2022 10:08:37 -0600 Subject: [PATCH 14/30] Spark: Adds perf benchmarks for ZOrdering vs Sort Rewrite --- .../IcebergSortCompactionBenchmark.java | 272 ++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java new file mode 100644 index 000000000000..3347ebef5017 --- /dev/null +++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java @@ -0,0 +1,272 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +package org.apache.iceberg.spark.action; + +import java.io.IOException; +import java.util.Collections; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.NullOrder; +import org.apache.iceberg.Schema; +import org.apache.iceberg.SortDirection; +import org.apache.iceberg.SortOrder; +import org.apache.iceberg.Table; +import org.apache.iceberg.relocated.com.google.common.io.Files; +import org.apache.iceberg.spark.Spark3Util; +import org.apache.iceberg.spark.SparkSchemaUtil; +import org.apache.iceberg.spark.SparkSessionCatalog; +import org.apache.iceberg.spark.actions.SparkActions; +import org.apache.iceberg.types.Types; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.expressions.Transform; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Timeout; + +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.apache.spark.sql.functions.col; +import static org.apache.spark.sql.functions.current_date; +import static org.apache.spark.sql.functions.date_add; +import static org.apache.spark.sql.functions.expr; + +@Fork(1) +@State(Scope.Benchmark) +@Measurement(iterations = 3) +@BenchmarkMode(Mode.SingleShotTime) +@Timeout(time = 1000, timeUnit = TimeUnit.HOURS) +public class IcebergSortCompactionBenchmark { + + private static final String[] NAMESPACE = new String[] {"default"}; + private static final String NAME = "sortbench"; + private static final Identifier IDENT = Identifier.of(NAMESPACE, NAME); + private static final int NUM_FILES = 8; + private static final long NUM_ROWS = 10000000L; + + + private final Configuration hadoopConf = initHadoopConf(); + private SparkSession spark; + + @Setup + public void setupBench() { + setupSpark(); + } + + @TearDown + public void teardownBench() { + tearDownSpark(); + } + + @Setup(Level.Iteration) + public void setupIteration() { + initTable(); + appendData(); + } + + @TearDown(Level.Iteration) + public void cleanUpIteration() throws IOException { + cleanupFiles(); + } + + @Benchmark + @Threads(1) + public void sortInt() { + SparkActions.get() + .rewriteDataFiles(table()) + .sort(SortOrder + .builderFor(table().schema()) + .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .build()) + .execute(); + } + + @Benchmark + @Threads(1) + public void sortString() { + SparkActions.get() + .rewriteDataFiles(table()) + .sort(SortOrder + .builderFor(table().schema()) + .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .build()) + .execute(); + } + + @Benchmark + @Threads(1) + public void sortFourColumns() { + SparkActions.get() + .rewriteDataFiles(table()) + .sort(SortOrder + .builderFor(table().schema()) + .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("dateCol", SortDirection.DESC, NullOrder.NULLS_FIRST) + .sortBy("doubleCol", SortDirection.DESC, NullOrder.NULLS_FIRST) + .build()) + .execute(); + } + + @Benchmark + @Threads(1) + public void sortSixColumns() { + SparkActions.get() + .rewriteDataFiles(table()) + .sort(SortOrder + .builderFor(table().schema()) + .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("dateCol", SortDirection.DESC, NullOrder.NULLS_FIRST) + .sortBy("timestampCol", SortDirection.DESC, NullOrder.NULLS_FIRST) + .sortBy("doubleCol", SortDirection.DESC, NullOrder.NULLS_FIRST) + .sortBy("longCol", SortDirection.DESC, NullOrder.NULLS_FIRST) + .build()) + .execute(); + } + + @Benchmark + @Threads(1) + public void zSortInt() { + SparkActions.get() + .rewriteDataFiles(table()) + .zOrder("intCol") + .execute(); + } + + @Benchmark + @Threads(1) + public void zSortString() { + SparkActions.get() + .rewriteDataFiles(table()) + .zOrder("stringCol") + .execute(); + } + + @Benchmark + @Threads(1) + public void zSortFourColumns() { + SparkActions.get() + .rewriteDataFiles(table()) + .zOrder("stringCol", "intCol", "dateCol", "doubleCol") + .execute(); + } + + @Benchmark + @Threads(1) + public void zSortSixColumns() { + SparkActions.get() + .rewriteDataFiles(table()) + .zOrder("stringCol", "intCol", "dateCol", "timestampCol", "doubleCol", "longCol") + .execute(); + } + + protected Configuration initHadoopConf() { + return new Configuration(); + } + + protected final void initTable() { + Schema schema = new Schema( + required(1, "longCol", Types.LongType.get()), + required(2, "intCol", Types.IntegerType.get()), + required(3, "floatCol", Types.FloatType.get()), + optional(4, "doubleCol", Types.DoubleType.get()), + optional(6, "dateCol", Types.DateType.get()), + optional(7, "timestampCol", Types.TimestampType.withZone()), + optional(8, "stringCol", Types.StringType.get())); + + SparkSessionCatalog catalog = null; + try { + catalog = (SparkSessionCatalog) + Spark3Util.catalogAndIdentifier(spark(), "spark_catalog").catalog(); + catalog.dropTable(IDENT); + catalog.createTable(IDENT, SparkSchemaUtil.convert(schema), new Transform[0], Collections.emptyMap()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private void appendData() { + Dataset df = spark().range(0, NUM_ROWS * NUM_FILES, 1, NUM_FILES) + .withColumnRenamed("id", "longCol") + .withColumn("intCol", expr("CAST(longCol AS INT)")) + .withColumn("floatCol", expr("CAST(longCol AS FLOAT)")) + .withColumn("doubleCol", expr("CAST(longCol AS DOUBLE)")) + .withColumn("dateCol", date_add(current_date(), col("intCol").mod(NUM_FILES))) + .withColumn("timestampCol", expr("TO_TIMESTAMP(dateCol)")) + .withColumn("stringCol", expr("CAST(dateCol AS STRING)")); + writeData(df); + } + + private void writeData(Dataset df) { + df.write().format("iceberg").mode(SaveMode.Append).save(NAME); + } + + protected final Table table() { + try { + return Spark3Util.loadIcebergTable(spark(), NAME); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + protected final SparkSession spark() { + return spark; + } + + protected String getCatalogWarehouse() { + String location = Files.createTempDir().getAbsolutePath() + "/" + UUID.randomUUID() + "/"; + return location; + } + + protected void cleanupFiles() throws IOException { + spark.sql("DROP TABLE IF EXISTS " + NAME); + } + + protected void setupSpark() { + SparkSession.Builder builder = + SparkSession.builder() + .config("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog") + .config("spark.sql.catalog.spark_catalog.type", "hadoop") + .config("spark.sql.catalog.spark_catalog.warehouse", getCatalogWarehouse()) + .master("local[*]"); + spark = builder.getOrCreate(); + Configuration sparkHadoopConf = spark.sessionState().newHadoopConf(); + hadoopConf.forEach(entry -> sparkHadoopConf.set(entry.getKey(), entry.getValue())); + } + + protected void tearDownSpark() { + spark.stop(); + } +} From fa2add8511aaa00383607bfdab1562f28761c5c7 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Mon, 31 Jan 2022 17:15:05 -0600 Subject: [PATCH 15/30] WIP --- .../spark/actions/Spark3ZOrderStrategy.java | 55 ++++++++++++++----- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java index 6e9354cf605c..62c543833361 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java @@ -186,25 +186,54 @@ static byte[] interleaveBits(Seq scalaBinary) { return ZOrderByteUtils.interleaveBits(columnsBinary); } - private static final UserDefinedFunction FLOAT_TO_BYTES = - functions.udf((Float f) -> ByteBuffer.allocate(4).putFloat(f).array(), DataTypes.BinaryType); + private static UserDefinedFunction intToOrderedBytesUDF() { + return functions.udf((Integer value) -> { + if (value == null) { + return null; + } + return ZOrderByteUtils.intToOrderedBytes(value); + }, DataTypes.BinaryType) + .withName("INT-LEXICAL-BYTES"); + } - private static final UserDefinedFunction DOUBLE_TO_BYTES = - functions.udf((Double d) -> ByteBuffer.allocate(8).putDouble(d).array(), DataTypes.BinaryType); + private static UserDefinedFunction longToOrderedBytesUDF() { + return functions.udf((Long value) -> { + if (value == null) { + return null; + } + return ZOrderByteUtils.longToOrderBytes(value); + }, DataTypes.BinaryType) + .withName("LONG-LEXICAL-BYTES"); + } - private static UserDefinedFunction getLexicalBytesIntLike(int size) { - return functions.udf((byte[] binary) -> ZOrderByteUtils.orderIntLikeBytes(binary, size), DataTypes.BinaryType) - .withName("INT-LIKE-LEXICAL-BYTES"); + private static UserDefinedFunction floatToOrderedBytesUDF() { + return functions.udf((Float value) -> { + if (value == null) { + return null; + } + return ZOrderByteUtils.floatToOrderedBytes(value); + }, DataTypes.BinaryType) + .withName("FLOAT-LEXICAL-BYTES"); } - private static UserDefinedFunction getLexicalBytesFloatLike(int size) { - return functions.udf((byte[] binary) -> ZOrderByteUtils.orderFloatLikeBytes(binary, size), DataTypes.BinaryType) - .withName("FLOAT-LIKE-LEXICAL-BYTES"); + private static UserDefinedFunction doubleToOrderedBytesUDF() { + return functions.udf((Double value) -> { + if (value == null) { + return null; + } + return ZOrderByteUtils.doubleToOrderedBytes(value); + }, DataTypes.BinaryType) + .withName("DOUBLE-LEXICAL-BYTES"); } - private static UserDefinedFunction getLexicalBytesUTF8Like(int size) { - return functions.udf((byte[] binary) -> ZOrderByteUtils.orderUTF8LikeBytes(binary, size), DataTypes.BinaryType) - .withName("UTF8-LIKE-LEXICAL-BYTES"); + private static UserDefinedFunction stringToOrderedBytesUDF() { + return functions.udf((String value) -> { + if (value == null) { + return null; + } + return ZOrderByteUtils.stringToOrderedBytes(value); + }, DataTypes.BinaryType) + .withName("STRING-LEXICAL-BYTES"); } private static final UserDefinedFunction INTERLEAVE_UDF = From 6974f45ca400fc5237f49843079de64ff38c1fef Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Tue, 1 Feb 2022 15:21:38 -0600 Subject: [PATCH 16/30] Update to Match new UtilityCode --- .../spark/actions/Spark3ZOrderStrategy.java | 110 +++++++++++------- 1 file changed, 67 insertions(+), 43 deletions(-) diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java index 62c543833361..449caa039aec 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java @@ -19,7 +19,6 @@ package org.apache.iceberg.spark.actions; -import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; import java.util.Set; @@ -71,6 +70,7 @@ import scala.collection.Seq; public class Spark3ZOrderStrategy extends Spark3SortStrategy { + private static final String Z_COLUMN = "ICEZVALUE"; private static final Schema Z_SCHEMA = new Schema(NestedField.required(0, Z_COLUMN, Types.BinaryType.get())); private static final org.apache.iceberg.SortOrder Z_SORT_ORDER = org.apache.iceberg.SortOrder.builderFor(Z_SCHEMA) @@ -91,7 +91,8 @@ public Spark3ZOrderStrategy(Table table, SparkSession spark, List zOrder List partZOrderCols = identityPartitionColumns .filter(zOrderColNames::contains) .collect(Collectors.toList()); - Preconditions.checkArgument(partZOrderCols.isEmpty(), + Preconditions.checkArgument( + partZOrderCols.isEmpty(), "Cannot ZOrder on an Identity partition column as these values are constant within a partition, " + "ZOrdering requested on %s", partZOrderCols); @@ -143,13 +144,13 @@ public Set rewriteFiles(List filesToRewrite) { .map(n -> functions.col(n)) .toArray(Column[]::new); - List zOrderColumns = zOrderColNames.stream() + List zOrderColumns = zOrderColNames.stream() .map(scanDF.schema()::apply) .collect(Collectors.toList()); Column zvalueArray = functions.array(zOrderColumns.stream().map(colStruct -> - SparkZOrder.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType()) - ).toArray(Column[]::new)); + SparkZOrder.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType()) + ).toArray(Column[]::new)); Dataset zvalueDF = scanDF.withColumn(Z_COLUMN, SparkZOrder.interleaveBytes(zvalueArray)); @@ -180,60 +181,83 @@ protected org.apache.iceberg.SortOrder sortOrder() { static class SparkZOrder { + private static final byte[] TINY_EMPTY = new byte[Byte.BYTES]; + private static final byte[] SHORT_EMPTY = new byte[Short.BYTES]; + private static final byte[] INT_EMPTY = new byte[Integer.BYTES]; + private static final byte[] LONG_EMPTY = new byte[Long.BYTES]; + private static final byte[] FLOAT_EMPTY = new byte[Float.BYTES]; + private static final byte[] DOUBLE_EMPTY = new byte[Double.BYTES]; + static byte[] interleaveBits(Seq scalaBinary) { byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary) .toArray(new byte[scalaBinary.size()][]); return ZOrderByteUtils.interleaveBits(columnsBinary); } + private static UserDefinedFunction tinyToOrderedBytesUDF() { + return functions.udf((Byte value) -> { + if (value == null) { + return TINY_EMPTY; + } + return ZOrderByteUtils.tinyintToOrderedBytes(value); + }, DataTypes.BinaryType) + .withName("TINY_ORDERED_BYTES"); + } + + private static UserDefinedFunction shortToOrderedBytesUDF() { + return functions.udf((Short value) -> { + if (value == null) { + return SHORT_EMPTY; + } + return ZOrderByteUtils.shortToOrderBytes(value); + }, DataTypes.BinaryType) + .withName("SHORT_ORDERED_BYTES"); + } + private static UserDefinedFunction intToOrderedBytesUDF() { return functions.udf((Integer value) -> { if (value == null) { - return null; + return INT_EMPTY; } return ZOrderByteUtils.intToOrderedBytes(value); }, DataTypes.BinaryType) - .withName("INT-LEXICAL-BYTES"); + .withName("INT_ORDERED_BYTES"); } private static UserDefinedFunction longToOrderedBytesUDF() { return functions.udf((Long value) -> { - if (value == null) { - return null; - } - return ZOrderByteUtils.longToOrderBytes(value); - }, DataTypes.BinaryType) - .withName("LONG-LEXICAL-BYTES"); + if (value == null) { + return LONG_EMPTY; + } + return ZOrderByteUtils.longToOrderBytes(value); + }, DataTypes.BinaryType) + .withName("LONG_ORDERED_BYTES"); } private static UserDefinedFunction floatToOrderedBytesUDF() { return functions.udf((Float value) -> { - if (value == null) { - return null; - } - return ZOrderByteUtils.floatToOrderedBytes(value); - }, DataTypes.BinaryType) - .withName("FLOAT-LEXICAL-BYTES"); + if (value == null) { + return FLOAT_EMPTY; + } + return ZOrderByteUtils.floatToOrderedBytes(value); + }, DataTypes.BinaryType) + .withName("FLOAT_ORDERED_BYTES"); } private static UserDefinedFunction doubleToOrderedBytesUDF() { return functions.udf((Double value) -> { - if (value == null) { - return null; - } - return ZOrderByteUtils.doubleToOrderedBytes(value); - }, DataTypes.BinaryType) - .withName("DOUBLE-LEXICAL-BYTES"); + if (value == null) { + return DOUBLE_EMPTY; + } + return ZOrderByteUtils.doubleToOrderedBytes(value); + }, DataTypes.BinaryType) + .withName("FLOAT_ORDERED_BYTES"); } private static UserDefinedFunction stringToOrderedBytesUDF() { - return functions.udf((String value) -> { - if (value == null) { - return null; - } - return ZOrderByteUtils.stringToOrderedBytes(value); - }, DataTypes.BinaryType) - .withName("STRING-LEXICAL-BYTES"); + return functions.udf((String value) -> ZOrderByteUtils.stringToOrderedBytes(value, STRING_KEY_LENGTH), + DataTypes.BinaryType) + .withName("STRING-LEXICAL-BYTES"); } private static final UserDefinedFunction INTERLEAVE_UDF = @@ -247,27 +271,27 @@ static Column interleaveBytes(Column arrayBinary) { @SuppressWarnings("checkstyle:CyclomaticComplexity") static Column sortedLexicographically(Column column, DataType type) { if (type instanceof ByteType) { - return column.cast(DataTypes.BinaryType); + return tinyToOrderedBytesUDF().apply(column); } else if (type instanceof ShortType) { - return getLexicalBytesIntLike(2).apply(column.cast(DataTypes.BinaryType)); + return shortToOrderedBytesUDF().apply(column); } else if (type instanceof IntegerType) { - return getLexicalBytesIntLike(4).apply(column.cast(DataTypes.BinaryType)); + return intToOrderedBytesUDF().apply(column); } else if (type instanceof LongType) { - return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.BinaryType)); + return longToOrderedBytesUDF().apply(column); } else if (type instanceof FloatType) { - return getLexicalBytesFloatLike(4).apply(FLOAT_TO_BYTES.apply(column)); + return floatToOrderedBytesUDF().apply(column); } else if (type instanceof DoubleType) { - return getLexicalBytesFloatLike(8).apply(DOUBLE_TO_BYTES.apply(column)); + return doubleToOrderedBytesUDF().apply(column); } else if (type instanceof StringType) { - return getLexicalBytesUTF8Like(STRING_KEY_LENGTH).apply(column.cast(DataTypes.BinaryType)); + return stringToOrderedBytesUDF().apply(column); } else if (type instanceof BinaryType) { - return getLexicalBytesUTF8Like(STRING_KEY_LENGTH).apply(column); + return stringToOrderedBytesUDF().apply(column); } else if (type instanceof BooleanType) { - return getLexicalBytesUTF8Like(1).apply(column.cast(DataTypes.BinaryType)); + return column.cast(DataTypes.BinaryType); } else if (type instanceof TimestampType) { - return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.LongType).cast(DataTypes.BinaryType)); + return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); } else if (type instanceof DateType) { - return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.LongType).cast(DataTypes.BinaryType)); + return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); } else { throw new IllegalArgumentException( String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported", From ba43cae1a76c1a9fd30c946ae3899ab139169137 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Fri, 18 Feb 2022 07:44:08 -0600 Subject: [PATCH 17/30] Checkpoint for Perf Benchmark --- .../apache/iceberg/util/ZOrderByteUtils.java | 10 +- .../IcebergSortCompactionBenchmark.java | 86 ++++++- .../spark/actions/Spark3ZOrderStrategy.java | 143 ++--------- .../spark/actions/Spark3ZOrderUDF.java | 241 ++++++++++++++++++ 4 files changed, 347 insertions(+), 133 deletions(-) create mode 100644 spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index b008461ea8ca..39ef0dcc14d3 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -19,6 +19,7 @@ package org.apache.iceberg.util; +import java.io.Serializable; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharsetEncoder; @@ -137,6 +138,10 @@ static byte[] interleaveBits(byte[][] columnsBinary) { Arrays.stream(columnsBinary).mapToInt(column -> column.length).sum()); } + public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) { + return interleaveBits(columnsBinary, interleavedSize, ByteBuffer.allocate(interleavedSize)); + } + /** * Interleave bits using a naive loop. Variable length inputs are allowed but to get a consistent ordering it is * required that every column contribute the same number of bytes in each invocation. Bits are interleaved from all @@ -146,8 +151,8 @@ static byte[] interleaveBits(byte[][] columnsBinary) { * @param interleavedSize the number of bytes to use in the output * @return the columnbytes interleaved */ - public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) { - byte[] interleavedBytes = new byte[interleavedSize]; + public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize, ByteBuffer reuse) { + byte[] interleavedBytes = reuse.array(); int sourceColumn = 0; int sourceByte = 0; int sourceBit = 7; @@ -191,4 +196,5 @@ public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) } return interleavedBytes; } + } diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java index 3347ebef5017..69b21a5a89ef 100644 --- a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java +++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java @@ -112,6 +112,49 @@ public void sortInt() { .execute(); } + @Benchmark + @Threads(1) + public void sortInt2() { + SparkActions.get() + .rewriteDataFiles(table()) + .sort(SortOrder + .builderFor(table().schema()) + .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST) + .build()) + .execute(); + } + + @Benchmark + @Threads(1) + public void sortInt3() { + SparkActions.get() + .rewriteDataFiles(table()) + .sort(SortOrder + .builderFor(table().schema()) + .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST) + .build()) + .execute(); + } + + @Benchmark + @Threads(1) + public void sortInt4() { + SparkActions.get() + .rewriteDataFiles(table()) + .sort(SortOrder + .builderFor(table().schema()) + .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST) + .build()) + .execute(); + } + @Benchmark @Threads(1) public void sortString() { @@ -165,6 +208,33 @@ public void zSortInt() { .execute(); } + @Benchmark + @Threads(1) + public void zSortInt2() { + SparkActions.get() + .rewriteDataFiles(table()) + .zOrder("intCol", "intCol2") + .execute(); + } + + @Benchmark + @Threads(1) + public void zSortInt3() { + SparkActions.get() + .rewriteDataFiles(table()) + .zOrder("intCol", "intCol2", "intCol3") + .execute(); + } + + @Benchmark + @Threads(1) + public void zSortInt4() { + SparkActions.get() + .rewriteDataFiles(table()) + .zOrder("intCol", "intCol2", "intCol3", "intCol4") + .execute(); + } + @Benchmark @Threads(1) public void zSortString() { @@ -200,11 +270,14 @@ protected final void initTable() { Schema schema = new Schema( required(1, "longCol", Types.LongType.get()), required(2, "intCol", Types.IntegerType.get()), - required(3, "floatCol", Types.FloatType.get()), - optional(4, "doubleCol", Types.DoubleType.get()), - optional(6, "dateCol", Types.DateType.get()), - optional(7, "timestampCol", Types.TimestampType.withZone()), - optional(8, "stringCol", Types.StringType.get())); + required(3, "intCol2", Types.IntegerType.get()), + required(4, "intCol3", Types.IntegerType.get()), + required(5, "intCol4", Types.IntegerType.get()), + required(6, "floatCol", Types.FloatType.get()), + optional(7, "doubleCol", Types.DoubleType.get()), + optional(8, "dateCol", Types.DateType.get()), + optional(9, "timestampCol", Types.TimestampType.withZone()), + optional(10, "stringCol", Types.StringType.get())); SparkSessionCatalog catalog = null; try { @@ -221,6 +294,9 @@ private void appendData() { Dataset df = spark().range(0, NUM_ROWS * NUM_FILES, 1, NUM_FILES) .withColumnRenamed("id", "longCol") .withColumn("intCol", expr("CAST(longCol AS INT)")) + .withColumn("intCol2", expr("CAST(longCol AS INT)")) + .withColumn("intCol3", expr("CAST(longCol AS INT)")) + .withColumn("intCol4", expr("CAST(longCol AS INT)")) .withColumn("floatCol", expr("CAST(longCol AS FLOAT)")) .withColumn("doubleCol", expr("CAST(longCol AS DOUBLE)")) .withColumn("dateCol", date_add(current_date(), col("intCol").mod(NUM_FILES))) diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java index 449caa039aec..4c16349ea9dc 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java @@ -19,7 +19,13 @@ package org.apache.iceberg.spark.actions; +import java.io.IOException; +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.UUID; @@ -42,6 +48,7 @@ import org.apache.iceberg.types.Types.NestedField; import org.apache.iceberg.util.SortOrderUtil; import org.apache.iceberg.util.ZOrderByteUtils; +import org.apache.spark.api.java.function.MapPartitionsFunction; import org.apache.spark.sql.Column; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -67,6 +74,7 @@ import org.apache.spark.sql.types.StringType; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.TimestampType; +import org.sparkproject.jetty.server.Authentication; import scala.collection.Seq; public class Spark3ZOrderStrategy extends Spark3SortStrategy { @@ -76,11 +84,13 @@ public class Spark3ZOrderStrategy extends Spark3SortStrategy { private static final org.apache.iceberg.SortOrder Z_SORT_ORDER = org.apache.iceberg.SortOrder.builderFor(Z_SCHEMA) .sortBy(Z_COLUMN, SortDirection.ASC, NullOrder.NULLS_LAST) .build(); - private static final int STRING_KEY_LENGTH = 60; + private static final int STRING_KEY_LENGTH = 128; private final List zOrderColNames; - private final FileScanTaskSetManager manager = FileScanTaskSetManager.get(); - private final FileRewriteCoordinator rewriteCoordinator = FileRewriteCoordinator.get(); + private transient FileScanTaskSetManager manager = FileScanTaskSetManager.get(); + private transient FileRewriteCoordinator rewriteCoordinator = FileRewriteCoordinator.get(); + + private final SparkZOrder orderHelper; public Spark3ZOrderStrategy(Table table, SparkSession spark, List zOrderColNames) { super(table, spark); @@ -97,6 +107,8 @@ public Spark3ZOrderStrategy(Table table, SparkSession spark, List zOrder "ZOrdering requested on %s", partZOrderCols); + this.orderHelper = new SparkZOrder(zOrderColNames.size()); + this.zOrderColNames = zOrderColNames; } @@ -149,10 +161,10 @@ public Set rewriteFiles(List filesToRewrite) { .collect(Collectors.toList()); Column zvalueArray = functions.array(zOrderColumns.stream().map(colStruct -> - SparkZOrder.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType()) + orderHelper.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType()) ).toArray(Column[]::new)); - Dataset zvalueDF = scanDF.withColumn(Z_COLUMN, SparkZOrder.interleaveBytes(zvalueArray)); + Dataset zvalueDF = scanDF.withColumn(Z_COLUMN, orderHelper.interleaveBytes(zvalueArray)); SQLConf sqlConf = cloneSession.sessionState().conf(); LogicalPlan sortPlan = sortPlan(distribution, ordering, zvalueDF.logicalPlan(), sqlConf); @@ -178,125 +190,4 @@ public Set rewriteFiles(List filesToRewrite) { protected org.apache.iceberg.SortOrder sortOrder() { return Z_SORT_ORDER; } - - static class SparkZOrder { - - private static final byte[] TINY_EMPTY = new byte[Byte.BYTES]; - private static final byte[] SHORT_EMPTY = new byte[Short.BYTES]; - private static final byte[] INT_EMPTY = new byte[Integer.BYTES]; - private static final byte[] LONG_EMPTY = new byte[Long.BYTES]; - private static final byte[] FLOAT_EMPTY = new byte[Float.BYTES]; - private static final byte[] DOUBLE_EMPTY = new byte[Double.BYTES]; - - static byte[] interleaveBits(Seq scalaBinary) { - byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary) - .toArray(new byte[scalaBinary.size()][]); - return ZOrderByteUtils.interleaveBits(columnsBinary); - } - - private static UserDefinedFunction tinyToOrderedBytesUDF() { - return functions.udf((Byte value) -> { - if (value == null) { - return TINY_EMPTY; - } - return ZOrderByteUtils.tinyintToOrderedBytes(value); - }, DataTypes.BinaryType) - .withName("TINY_ORDERED_BYTES"); - } - - private static UserDefinedFunction shortToOrderedBytesUDF() { - return functions.udf((Short value) -> { - if (value == null) { - return SHORT_EMPTY; - } - return ZOrderByteUtils.shortToOrderBytes(value); - }, DataTypes.BinaryType) - .withName("SHORT_ORDERED_BYTES"); - } - - private static UserDefinedFunction intToOrderedBytesUDF() { - return functions.udf((Integer value) -> { - if (value == null) { - return INT_EMPTY; - } - return ZOrderByteUtils.intToOrderedBytes(value); - }, DataTypes.BinaryType) - .withName("INT_ORDERED_BYTES"); - } - - private static UserDefinedFunction longToOrderedBytesUDF() { - return functions.udf((Long value) -> { - if (value == null) { - return LONG_EMPTY; - } - return ZOrderByteUtils.longToOrderBytes(value); - }, DataTypes.BinaryType) - .withName("LONG_ORDERED_BYTES"); - } - - private static UserDefinedFunction floatToOrderedBytesUDF() { - return functions.udf((Float value) -> { - if (value == null) { - return FLOAT_EMPTY; - } - return ZOrderByteUtils.floatToOrderedBytes(value); - }, DataTypes.BinaryType) - .withName("FLOAT_ORDERED_BYTES"); - } - - private static UserDefinedFunction doubleToOrderedBytesUDF() { - return functions.udf((Double value) -> { - if (value == null) { - return DOUBLE_EMPTY; - } - return ZOrderByteUtils.doubleToOrderedBytes(value); - }, DataTypes.BinaryType) - .withName("FLOAT_ORDERED_BYTES"); - } - - private static UserDefinedFunction stringToOrderedBytesUDF() { - return functions.udf((String value) -> ZOrderByteUtils.stringToOrderedBytes(value, STRING_KEY_LENGTH), - DataTypes.BinaryType) - .withName("STRING-LEXICAL-BYTES"); - } - - private static final UserDefinedFunction INTERLEAVE_UDF = - functions.udf((Seq arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType) - .withName("INTERLEAVE_BYTES"); - - static Column interleaveBytes(Column arrayBinary) { - return INTERLEAVE_UDF.apply(arrayBinary); - } - - @SuppressWarnings("checkstyle:CyclomaticComplexity") - static Column sortedLexicographically(Column column, DataType type) { - if (type instanceof ByteType) { - return tinyToOrderedBytesUDF().apply(column); - } else if (type instanceof ShortType) { - return shortToOrderedBytesUDF().apply(column); - } else if (type instanceof IntegerType) { - return intToOrderedBytesUDF().apply(column); - } else if (type instanceof LongType) { - return longToOrderedBytesUDF().apply(column); - } else if (type instanceof FloatType) { - return floatToOrderedBytesUDF().apply(column); - } else if (type instanceof DoubleType) { - return doubleToOrderedBytesUDF().apply(column); - } else if (type instanceof StringType) { - return stringToOrderedBytesUDF().apply(column); - } else if (type instanceof BinaryType) { - return stringToOrderedBytesUDF().apply(column); - } else if (type instanceof BooleanType) { - return column.cast(DataTypes.BinaryType); - } else if (type instanceof TimestampType) { - return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); - } else if (type instanceof DateType) { - return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); - } else { - throw new IllegalArgumentException( - String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported", - column, type)); - } - } - } } diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java new file mode 100644 index 000000000000..d0ee5c2b324c --- /dev/null +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java @@ -0,0 +1,241 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.spark.actions; + +import java.io.IOException; +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; +import org.apache.iceberg.util.ZOrderByteUtils; +import org.apache.spark.sql.Column; +import org.apache.spark.sql.expressions.UserDefinedFunction; +import org.apache.spark.sql.functions; +import org.apache.spark.sql.types.BinaryType; +import org.apache.spark.sql.types.BooleanType; +import org.apache.spark.sql.types.ByteType; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.DateType; +import org.apache.spark.sql.types.DoubleType; +import org.apache.spark.sql.types.FloatType; +import org.apache.spark.sql.types.IntegerType; +import org.apache.spark.sql.types.LongType; +import org.apache.spark.sql.types.ShortType; +import org.apache.spark.sql.types.StringType; +import org.apache.spark.sql.types.TimestampType; +import scala.collection.Seq; + +class SparkZOrder implements Serializable { + private final int STRING_KEY_LENGTH = 128; + + private final byte[] TINY_EMPTY = new byte[Byte.BYTES]; + private final byte[] SHORT_EMPTY = new byte[Short.BYTES]; + private final byte[] INT_EMPTY = new byte[Integer.BYTES]; + private final byte[] LONG_EMPTY = new byte[Long.BYTES]; + private final byte[] FLOAT_EMPTY = new byte[Float.BYTES]; + private final byte[] DOUBLE_EMPTY = new byte[Double.BYTES]; + + transient private ThreadLocal outputBuffer; + transient private ThreadLocal inputHolder; + transient private ThreadLocal[] inputBuffers; + transient private ThreadLocal encoder; + + private final int numCols; + + private int inputCol = 0; + private int totalBytes = 0; + + SparkZOrder(int numCols) { + this.numCols = numCols; + } + + private void readObject(java.io.ObjectInputStream in) + throws IOException, ClassNotFoundException { + in.defaultReadObject(); + inputBuffers = new ThreadLocal[numCols]; + inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]); + encoder = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8.newEncoder()); + } + + + private ByteBuffer outputBuffer(int size) { + if (outputBuffer == null) { + // May over allocate on concurrent calls + outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size)); + } + return outputBuffer.get(); + } + + private ByteBuffer inputBuffer(int position, int size){ + if (inputBuffers[position] == null) { + // May over allocate on concurrent calls + inputBuffers[position] = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size)); + } + return inputBuffers[position].get(); + } + + byte[] interleaveBits(Seq scalaBinary) { + byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary) + .toArray(inputHolder.get()); + return ZOrderByteUtils.interleaveBits(columnsBinary, totalBytes, outputBuffer(totalBytes)); + } + + private UserDefinedFunction tinyToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Byte value) -> { + if (value == null) { + return TINY_EMPTY; + } + return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, Byte.BYTES)).array(); + }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes+= Byte.BYTES; + + return udf; + } + + private UserDefinedFunction shortToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Short value) -> { + if (value == null) { + return SHORT_EMPTY; + } + return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, Short.BYTES)).array(); + }, DataTypes.BinaryType) + .withName("SHORT_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes+= Short.BYTES; + + return udf; + } + + private UserDefinedFunction intToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Integer value) -> { + if (value == null) { + return INT_EMPTY; + } + return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, Integer.BYTES)).array(); + }, DataTypes.BinaryType) + .withName("INT_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes += Integer.BYTES; + + return udf; + } + + private UserDefinedFunction longToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Long value) -> { + if (value == null) { + return LONG_EMPTY; + } + return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, Long.BYTES)).array(); + }, DataTypes.BinaryType) + .withName("LONG_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes += Long.BYTES; + + return udf; + } + + private UserDefinedFunction floatToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Float value) -> { + if (value == null) { + return FLOAT_EMPTY; + } + return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, Float.BYTES)).array(); + }, DataTypes.BinaryType) + .withName("FLOAT_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes += Float.BYTES; + + return udf; + } + + private UserDefinedFunction doubleToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Double value) -> { + if (value == null) { + return DOUBLE_EMPTY; + } + return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, Double.BYTES)).array(); + }, DataTypes.BinaryType) + .withName("FLOAT_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes += Double.BYTES; + + return udf; + } + + private UserDefinedFunction stringToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((String value) -> + ZOrderByteUtils.stringToOrderedBytes(value, STRING_KEY_LENGTH, inputBuffer(position, STRING_KEY_LENGTH), + encoder.get()).array(), DataTypes.BinaryType).withName("STRING-LEXICAL-BYTES"); + + this.inputCol++; + this.totalBytes += STRING_KEY_LENGTH; + + return udf; + } + + private final UserDefinedFunction INTERLEAVE_UDF = + functions.udf((Seq arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType) + .withName("INTERLEAVE_BYTES"); + + Column interleaveBytes(Column arrayBinary) { + return INTERLEAVE_UDF.apply(arrayBinary); + } + + @SuppressWarnings("checkstyle:CyclomaticComplexity") + Column sortedLexicographically(Column column, DataType type) { + if (type instanceof ByteType) { + return tinyToOrderedBytesUDF().apply(column); + } else if (type instanceof ShortType) { + return shortToOrderedBytesUDF().apply(column); + } else if (type instanceof IntegerType) { + return intToOrderedBytesUDF().apply(column); + } else if (type instanceof LongType) { + return longToOrderedBytesUDF().apply(column); + } else if (type instanceof FloatType) { + return floatToOrderedBytesUDF().apply(column); + } else if (type instanceof DoubleType) { + return doubleToOrderedBytesUDF().apply(column); + } else if (type instanceof StringType) { + return stringToOrderedBytesUDF().apply(column); + } else if (type instanceof BinaryType) { + return stringToOrderedBytesUDF().apply(column); + } else if (type instanceof BooleanType) { + return column.cast(DataTypes.BinaryType); + } else if (type instanceof TimestampType) { + return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); + } else if (type instanceof DateType) { + return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); + } else { + throw new IllegalArgumentException( + String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported", + column, type)); + } + } +} \ No newline at end of file From a50b496c79d58da8957c4d8148c06b7388203d95 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Tue, 15 Mar 2022 21:57:29 -0500 Subject: [PATCH 18/30] WIP Reviewer Comments, Cleanup Change all primtives to use 8 byte buffers, Types now aligned based on magnitude Perf test still WIP, using new Random generating udfs --- .../apache/iceberg/util/ZOrderByteUtils.java | 38 +- .../iceberg/util/TestZOrderByteUtil.java | 24 +- .../IcebergSortCompactionBenchmark.java | 25 +- .../spark/action/RandomGeneratingUDF.java | 42 ++ .../spark/actions/Spark3ZOrderStrategy.java | 30 +- .../spark/actions/Spark3ZOrderUDF.java | 396 +++++++++--------- 6 files changed, 293 insertions(+), 262 deletions(-) create mode 100644 spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index 39ef0dcc14d3..44141e777295 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -19,7 +19,6 @@ package org.apache.iceberg.util; -import java.io.Serializable; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharsetEncoder; @@ -34,6 +33,8 @@ * that are lexicographically ordered. Bytes produced should be compared lexicographically as * unsigned bytes, big-endian. *

+ * All types except for String are stored within an 8 Byte Buffer + *

* Most of these techniques are derived from * https://aws.amazon.com/blogs/database/z-order-indexing-for-multifaceted-queries-in-amazon-dynamodb-part-2/ *

@@ -42,18 +43,23 @@ */ public class ZOrderByteUtils { + public static final int BUFFER_SIZE = 8; + private ZOrderByteUtils() { } + static ByteBuffer allocatePrimitiveBuffer() { + return ByteBuffer.allocate(BUFFER_SIZE); + } /** * Signed ints do not have their bytes in magnitude order because of the sign bit. * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially * shifts the 0 value so that we don't break our ordering when we cross the new 0 value. */ public static ByteBuffer intToOrderedBytes(int val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, Integer.BYTES); - bytes.putInt(val ^ 0x80000000); + ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); + bytes.putLong(((long) val) ^ 0x8000000000000000L); return bytes; } @@ -61,7 +67,7 @@ public static ByteBuffer intToOrderedBytes(int val, ByteBuffer reuse) { * Signed longs are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ public static ByteBuffer longToOrderedBytes(long val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, Long.BYTES); + ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); bytes.putLong(val ^ 0x8000000000000000L); return bytes; } @@ -70,8 +76,8 @@ public static ByteBuffer longToOrderedBytes(long val, ByteBuffer reuse) { * Signed shorts are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ public static ByteBuffer shortToOrderedBytes(short val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, Short.BYTES); - bytes.putShort((short) (val ^ (0x8000))); + ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); + bytes.putLong(((long) val) ^ 0x8000000000000000L); return bytes; } @@ -79,8 +85,8 @@ public static ByteBuffer shortToOrderedBytes(short val, ByteBuffer reuse) { * Signed tiny ints are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ public static ByteBuffer tinyintToOrderedBytes(byte val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, Byte.BYTES); - bytes.put((byte) (val ^ (0x80))); + ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); + bytes.putLong(((long) val) ^ 0x8000000000000000L); return bytes; } @@ -93,10 +99,10 @@ public static ByteBuffer tinyintToOrderedBytes(byte val, ByteBuffer reuse) { * comparable bytes */ public static ByteBuffer floatToOrderedBytes(float val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, Float.BYTES); - int ival = Float.floatToIntBits(val); - ival ^= ((ival >> (Integer.SIZE - 1)) | Integer.MIN_VALUE); - bytes.putInt(ival); + ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); + long lval = Double.doubleToLongBits(val); + lval ^= ((lval >> (Integer.SIZE - 1)) | Long.MIN_VALUE); + bytes.putLong(lval); return bytes; } @@ -104,10 +110,10 @@ public static ByteBuffer floatToOrderedBytes(float val, ByteBuffer reuse) { * Doubles are treated the same as floats in {@link #floatToOrderedBytes(float, ByteBuffer)} */ public static ByteBuffer doubleToOrderedBytes(double val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, Double.BYTES); - long lng = Double.doubleToLongBits(val); - lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE); - bytes.putLong(lng); + ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); + long lval = Double.doubleToLongBits(val); + lval ^= ((lval >> (Integer.SIZE - 1)) | Long.MIN_VALUE); + bytes.putLong(lval); return bytes; } diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java index bf84319d0d45..858200c370b4 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java @@ -145,8 +145,8 @@ public void testInterleaveMixedBits() { @Test public void testIntOrdering() { - ByteBuffer aBuffer = ByteBuffer.allocate(Integer.BYTES); - ByteBuffer bBuffer = ByteBuffer.allocate(Integer.BYTES); + ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); + ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); for (int i = 0; i < NUM_TESTS; i++) { int aInt = random.nextInt(); int bInt = random.nextInt(); @@ -164,8 +164,8 @@ public void testIntOrdering() { @Test public void testLongOrdering() { - ByteBuffer aBuffer = ByteBuffer.allocate(Long.BYTES); - ByteBuffer bBuffer = ByteBuffer.allocate(Long.BYTES); + ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); + ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); for (int i = 0; i < NUM_TESTS; i++) { long aLong = random.nextInt(); long bLong = random.nextInt(); @@ -183,8 +183,8 @@ public void testLongOrdering() { @Test public void testShortOrdering() { - ByteBuffer aBuffer = ByteBuffer.allocate(Short.BYTES); - ByteBuffer bBuffer = ByteBuffer.allocate(Short.BYTES); + ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); + ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); for (int i = 0; i < NUM_TESTS; i++) { short aShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1)); short bShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1)); @@ -202,8 +202,8 @@ public void testShortOrdering() { @Test public void testTinyOrdering() { - ByteBuffer aBuffer = ByteBuffer.allocate(Byte.BYTES); - ByteBuffer bBuffer = ByteBuffer.allocate(Byte.BYTES); + ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); + ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); for (int i = 0; i < NUM_TESTS; i++) { byte aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1)); byte bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1)); @@ -221,8 +221,8 @@ public void testTinyOrdering() { @Test public void testFloatOrdering() { - ByteBuffer aBuffer = ByteBuffer.allocate(Float.BYTES); - ByteBuffer bBuffer = ByteBuffer.allocate(Float.BYTES); + ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); + ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); for (int i = 0; i < NUM_TESTS; i++) { float aFloat = random.nextFloat(); float bFloat = random.nextFloat(); @@ -240,8 +240,8 @@ public void testFloatOrdering() { @Test public void testDoubleOrdering() { - ByteBuffer aBuffer = ByteBuffer.allocate(Double.BYTES); - ByteBuffer bBuffer = ByteBuffer.allocate(Double.BYTES); + ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); + ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer(); for (int i = 0; i < NUM_TESTS; i++) { double aDouble = random.nextDouble(); double bDouble = random.nextDouble(); diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java index 69b21a5a89ef..4cb70960cc0b 100644 --- a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java +++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java @@ -42,6 +42,7 @@ import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.connector.catalog.Identifier; import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.types.DataTypes; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -74,6 +75,7 @@ public class IcebergSortCompactionBenchmark { private static final Identifier IDENT = Identifier.of(NAMESPACE, NAME); private static final int NUM_FILES = 8; private static final long NUM_ROWS = 10000000L; + private static final long UNIQUE_VALUES = NUM_ROWS / 10; private final Configuration hadoopConf = initHadoopConf(); @@ -292,16 +294,23 @@ protected final void initTable() { private void appendData() { Dataset df = spark().range(0, NUM_ROWS * NUM_FILES, 1, NUM_FILES) - .withColumnRenamed("id", "longCol") - .withColumn("intCol", expr("CAST(longCol AS INT)")) - .withColumn("intCol2", expr("CAST(longCol AS INT)")) - .withColumn("intCol3", expr("CAST(longCol AS INT)")) - .withColumn("intCol4", expr("CAST(longCol AS INT)")) - .withColumn("floatCol", expr("CAST(longCol AS FLOAT)")) - .withColumn("doubleCol", expr("CAST(longCol AS DOUBLE)")) + .drop("id") + .withColumn("longCol", new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply()) + .withColumn("intCol", + new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType)) + .withColumn("intCol2", + new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType)) + .withColumn("intCol3", + new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType)) + .withColumn("intCol4", + new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType)) + .withColumn("floatCol", + new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.FloatType)) + .withColumn("doubleCol", + new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.DoubleType)) .withColumn("dateCol", date_add(current_date(), col("intCol").mod(NUM_FILES))) .withColumn("timestampCol", expr("TO_TIMESTAMP(dateCol)")) - .withColumn("stringCol", expr("CAST(dateCol AS STRING)")); + .withColumn("stringCol", new RandomGeneratingUDF(UNIQUE_VALUES).randomString().apply()); writeData(df); } diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java new file mode 100644 index 000000000000..5cb6a350c7c7 --- /dev/null +++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java @@ -0,0 +1,42 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.spark.action; + +import java.io.Serializable; +import java.util.Random; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.RandomUtil; +import org.apache.spark.sql.expressions.UserDefinedFunction; +import org.apache.spark.sql.types.DataTypes; + +import static org.apache.spark.sql.functions.udf; + +class RandomGeneratingUDF implements Serializable { + private final long uniqueValues; + private Random rand = new Random(); + + RandomGeneratingUDF(long uniqueValues) { + this.uniqueValues = uniqueValues; + } + + UserDefinedFunction randomLongUDF() { + return udf(() -> rand.nextLong() % (uniqueValues / 2), DataTypes.LongType).asNondeterministic().asNonNullable(); + } + + UserDefinedFunction randomString() { + return udf(() -> (String) RandomUtil.generatePrimitive(Types.StringType.get(), rand), DataTypes.StringType) + .asNondeterministic().asNonNullable(); + } +} diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java index 4c16349ea9dc..71a0274d5a08 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java @@ -19,13 +19,8 @@ package org.apache.iceberg.spark.actions; -import java.io.IOException; -import java.io.Serializable; import java.nio.ByteBuffer; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.StandardCharsets; import java.util.Arrays; -import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.UUID; @@ -47,8 +42,6 @@ import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.NestedField; import org.apache.iceberg.util.SortOrderUtil; -import org.apache.iceberg.util.ZOrderByteUtils; -import org.apache.spark.api.java.function.MapPartitionsFunction; import org.apache.spark.sql.Column; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -57,25 +50,9 @@ import org.apache.spark.sql.connector.distributions.Distribution; import org.apache.spark.sql.connector.distributions.Distributions; import org.apache.spark.sql.connector.expressions.SortOrder; -import org.apache.spark.sql.expressions.UserDefinedFunction; import org.apache.spark.sql.functions; import org.apache.spark.sql.internal.SQLConf; -import org.apache.spark.sql.types.BinaryType; -import org.apache.spark.sql.types.BooleanType; -import org.apache.spark.sql.types.ByteType; -import org.apache.spark.sql.types.DataType; -import org.apache.spark.sql.types.DataTypes; -import org.apache.spark.sql.types.DateType; -import org.apache.spark.sql.types.DoubleType; -import org.apache.spark.sql.types.FloatType; -import org.apache.spark.sql.types.IntegerType; -import org.apache.spark.sql.types.LongType; -import org.apache.spark.sql.types.ShortType; -import org.apache.spark.sql.types.StringType; import org.apache.spark.sql.types.StructField; -import org.apache.spark.sql.types.TimestampType; -import org.sparkproject.jetty.server.Authentication; -import scala.collection.Seq; public class Spark3ZOrderStrategy extends Spark3SortStrategy { @@ -84,13 +61,12 @@ public class Spark3ZOrderStrategy extends Spark3SortStrategy { private static final org.apache.iceberg.SortOrder Z_SORT_ORDER = org.apache.iceberg.SortOrder.builderFor(Z_SCHEMA) .sortBy(Z_COLUMN, SortDirection.ASC, NullOrder.NULLS_LAST) .build(); - private static final int STRING_KEY_LENGTH = 128; private final List zOrderColNames; private transient FileScanTaskSetManager manager = FileScanTaskSetManager.get(); private transient FileRewriteCoordinator rewriteCoordinator = FileRewriteCoordinator.get(); - private final SparkZOrder orderHelper; + private final Spark3ZOrderUDF orderHelper; public Spark3ZOrderStrategy(Table table, SparkSession spark, List zOrderColNames) { super(table, spark); @@ -107,7 +83,7 @@ public Spark3ZOrderStrategy(Table table, SparkSession spark, List zOrder "ZOrdering requested on %s", partZOrderCols); - this.orderHelper = new SparkZOrder(zOrderColNames.size()); + this.orderHelper = new Spark3ZOrderUDF(zOrderColNames.size()); this.zOrderColNames = zOrderColNames; } @@ -119,7 +95,7 @@ public String name() { @Override protected void validateOptions() { - // TODO implement Zorder Strategy in API Module + // TODO implement ZOrder Strategy in API Module return; } diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java index d0ee5c2b324c..839780fef677 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.spark.actions; @@ -38,204 +43,197 @@ import org.apache.spark.sql.types.TimestampType; import scala.collection.Seq; -class SparkZOrder implements Serializable { - private final int STRING_KEY_LENGTH = 128; +class Spark3ZOrderUDF implements Serializable { + private static final int STRING_KEY_LENGTH = 16; - private final byte[] TINY_EMPTY = new byte[Byte.BYTES]; - private final byte[] SHORT_EMPTY = new byte[Short.BYTES]; - private final byte[] INT_EMPTY = new byte[Integer.BYTES]; - private final byte[] LONG_EMPTY = new byte[Long.BYTES]; - private final byte[] FLOAT_EMPTY = new byte[Float.BYTES]; - private final byte[] DOUBLE_EMPTY = new byte[Double.BYTES]; + private static final byte[] PRIMITIVE_EMPTY = new byte[ZOrderByteUtils.BUFFER_SIZE]; - transient private ThreadLocal outputBuffer; - transient private ThreadLocal inputHolder; - transient private ThreadLocal[] inputBuffers; - transient private ThreadLocal encoder; + private transient ThreadLocal outputBuffer; + private transient ThreadLocal inputHolder; + private transient ThreadLocal[] inputBuffers; + private transient ThreadLocal encoder; - private final int numCols; + private final int numCols; - private int inputCol = 0; - private int totalBytes = 0; + private int inputCol = 0; + private int totalBytes = 0; - SparkZOrder(int numCols) { - this.numCols = numCols; - } - - private void readObject(java.io.ObjectInputStream in) - throws IOException, ClassNotFoundException { - in.defaultReadObject(); - inputBuffers = new ThreadLocal[numCols]; - inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]); - encoder = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8.newEncoder()); - } - - - private ByteBuffer outputBuffer(int size) { - if (outputBuffer == null) { - // May over allocate on concurrent calls - outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size)); - } - return outputBuffer.get(); - } - - private ByteBuffer inputBuffer(int position, int size){ - if (inputBuffers[position] == null) { - // May over allocate on concurrent calls - inputBuffers[position] = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size)); - } - return inputBuffers[position].get(); - } - - byte[] interleaveBits(Seq scalaBinary) { - byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary) - .toArray(inputHolder.get()); - return ZOrderByteUtils.interleaveBits(columnsBinary, totalBytes, outputBuffer(totalBytes)); - } - - private UserDefinedFunction tinyToOrderedBytesUDF() { - int position = inputCol; - UserDefinedFunction udf = functions.udf((Byte value) -> { - if (value == null) { - return TINY_EMPTY; - } - return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, Byte.BYTES)).array(); - }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES"); - - this.inputCol++; - this.totalBytes+= Byte.BYTES; - - return udf; - } - - private UserDefinedFunction shortToOrderedBytesUDF() { - int position = inputCol; - UserDefinedFunction udf = functions.udf((Short value) -> { - if (value == null) { - return SHORT_EMPTY; - } - return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, Short.BYTES)).array(); - }, DataTypes.BinaryType) - .withName("SHORT_ORDERED_BYTES"); - - this.inputCol++; - this.totalBytes+= Short.BYTES; - - return udf; - } - - private UserDefinedFunction intToOrderedBytesUDF() { - int position = inputCol; - UserDefinedFunction udf = functions.udf((Integer value) -> { - if (value == null) { - return INT_EMPTY; - } - return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, Integer.BYTES)).array(); - }, DataTypes.BinaryType) - .withName("INT_ORDERED_BYTES"); + Spark3ZOrderUDF(int numCols) { + this.numCols = numCols; + } - this.inputCol++; - this.totalBytes += Integer.BYTES; + private void readObject(java.io.ObjectInputStream in) + throws IOException, ClassNotFoundException { + in.defaultReadObject(); + inputBuffers = new ThreadLocal[numCols]; + inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]); + encoder = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8.newEncoder()); + } - return udf; + private ByteBuffer outputBuffer(int size) { + if (outputBuffer == null) { + // May over allocate on concurrent calls + outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size)); } + return outputBuffer.get(); + } - private UserDefinedFunction longToOrderedBytesUDF() { - int position = inputCol; - UserDefinedFunction udf = functions.udf((Long value) -> { - if (value == null) { - return LONG_EMPTY; - } - return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, Long.BYTES)).array(); - }, DataTypes.BinaryType) - .withName("LONG_ORDERED_BYTES"); - - this.inputCol++; - this.totalBytes += Long.BYTES; - - return udf; + private ByteBuffer inputBuffer(int position, int size) { + if (inputBuffers[position] == null) { + // May over allocate on concurrent calls + inputBuffers[position] = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size)); } - - private UserDefinedFunction floatToOrderedBytesUDF() { - int position = inputCol; - UserDefinedFunction udf = functions.udf((Float value) -> { - if (value == null) { - return FLOAT_EMPTY; - } - return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, Float.BYTES)).array(); - }, DataTypes.BinaryType) - .withName("FLOAT_ORDERED_BYTES"); - - this.inputCol++; - this.totalBytes += Float.BYTES; - - return udf; - } - - private UserDefinedFunction doubleToOrderedBytesUDF() { - int position = inputCol; - UserDefinedFunction udf = functions.udf((Double value) -> { - if (value == null) { - return DOUBLE_EMPTY; - } - return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, Double.BYTES)).array(); - }, DataTypes.BinaryType) - .withName("FLOAT_ORDERED_BYTES"); - - this.inputCol++; - this.totalBytes += Double.BYTES; - - return udf; - } - - private UserDefinedFunction stringToOrderedBytesUDF() { - int position = inputCol; - UserDefinedFunction udf = functions.udf((String value) -> - ZOrderByteUtils.stringToOrderedBytes(value, STRING_KEY_LENGTH, inputBuffer(position, STRING_KEY_LENGTH), - encoder.get()).array(), DataTypes.BinaryType).withName("STRING-LEXICAL-BYTES"); - - this.inputCol++; - this.totalBytes += STRING_KEY_LENGTH; - - return udf; - } - - private final UserDefinedFunction INTERLEAVE_UDF = - functions.udf((Seq arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType) - .withName("INTERLEAVE_BYTES"); - - Column interleaveBytes(Column arrayBinary) { - return INTERLEAVE_UDF.apply(arrayBinary); - } - - @SuppressWarnings("checkstyle:CyclomaticComplexity") - Column sortedLexicographically(Column column, DataType type) { - if (type instanceof ByteType) { - return tinyToOrderedBytesUDF().apply(column); - } else if (type instanceof ShortType) { - return shortToOrderedBytesUDF().apply(column); - } else if (type instanceof IntegerType) { - return intToOrderedBytesUDF().apply(column); - } else if (type instanceof LongType) { - return longToOrderedBytesUDF().apply(column); - } else if (type instanceof FloatType) { - return floatToOrderedBytesUDF().apply(column); - } else if (type instanceof DoubleType) { - return doubleToOrderedBytesUDF().apply(column); - } else if (type instanceof StringType) { - return stringToOrderedBytesUDF().apply(column); - } else if (type instanceof BinaryType) { - return stringToOrderedBytesUDF().apply(column); - } else if (type instanceof BooleanType) { - return column.cast(DataTypes.BinaryType); - } else if (type instanceof TimestampType) { - return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); - } else if (type instanceof DateType) { - return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); - } else { - throw new IllegalArgumentException( - String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported", - column, type)); - } + return inputBuffers[position].get(); + } + + byte[] interleaveBits(Seq scalaBinary) { + byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary) + .toArray(inputHolder.get()); + return ZOrderByteUtils.interleaveBits(columnsBinary, totalBytes, outputBuffer(totalBytes)); + } + + private UserDefinedFunction tinyToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Byte value) -> { + if (value == null) { + return PRIMITIVE_EMPTY; + } + return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, Byte.BYTES)).array(); + }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes += Byte.BYTES; + + return udf; + } + + private UserDefinedFunction shortToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Short value) -> { + if (value == null) { + return PRIMITIVE_EMPTY; + } + return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, Short.BYTES)).array(); + }, DataTypes.BinaryType).withName("SHORT_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes += Short.BYTES; + + return udf; + } + + private UserDefinedFunction intToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Integer value) -> { + if (value == null) { + return PRIMITIVE_EMPTY; + } + return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, Integer.BYTES)).array(); + }, DataTypes.BinaryType).withName("INT_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes += Integer.BYTES; + + return udf; + } + + private UserDefinedFunction longToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Long value) -> { + if (value == null) { + return PRIMITIVE_EMPTY; + } + return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, Long.BYTES)).array(); + }, DataTypes.BinaryType).withName("LONG_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes += Long.BYTES; + + return udf; + } + + private UserDefinedFunction floatToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Float value) -> { + if (value == null) { + return PRIMITIVE_EMPTY; + } + return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, Float.BYTES)).array(); + }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes += Float.BYTES; + + return udf; + } + + private UserDefinedFunction doubleToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Double value) -> { + if (value == null) { + return PRIMITIVE_EMPTY; + } + return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, Double.BYTES)).array(); + }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES"); + + this.inputCol++; + this.totalBytes += Double.BYTES; + + return udf; + } + + private UserDefinedFunction stringToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((String value) -> + ZOrderByteUtils.stringToOrderedBytes( + value, + STRING_KEY_LENGTH, + inputBuffer(position, STRING_KEY_LENGTH), + encoder.get()).array(), DataTypes.BinaryType) + .withName("STRING-LEXICAL-BYTES"); + + this.inputCol++; + this.totalBytes += STRING_KEY_LENGTH; + + return udf; + } + + private final UserDefinedFunction interleaveUDF = + functions.udf((Seq arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType) + .withName("INTERLEAVE_BYTES"); + + Column interleaveBytes(Column arrayBinary) { + return interleaveUDF.apply(arrayBinary); + } + + @SuppressWarnings("checkstyle:CyclomaticComplexity") + Column sortedLexicographically(Column column, DataType type) { + if (type instanceof ByteType) { + return tinyToOrderedBytesUDF().apply(column); + } else if (type instanceof ShortType) { + return shortToOrderedBytesUDF().apply(column); + } else if (type instanceof IntegerType) { + return intToOrderedBytesUDF().apply(column); + } else if (type instanceof LongType) { + return longToOrderedBytesUDF().apply(column); + } else if (type instanceof FloatType) { + return floatToOrderedBytesUDF().apply(column); + } else if (type instanceof DoubleType) { + return doubleToOrderedBytesUDF().apply(column); + } else if (type instanceof StringType) { + return stringToOrderedBytesUDF().apply(column); + } else if (type instanceof BinaryType) { + return stringToOrderedBytesUDF().apply(column); + } else if (type instanceof BooleanType) { + return column.cast(DataTypes.BinaryType); + } else if (type instanceof TimestampType) { + return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); + } else if (type instanceof DateType) { + return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); + } else { + throw new IllegalArgumentException( + String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported", + column, type)); } -} \ No newline at end of file + } +} From 82bfb0738bb5f21f92072a41cd1e54ded295439d Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Fri, 18 Mar 2022 10:10:19 -0500 Subject: [PATCH 19/30] Update benchmarking --- .../IcebergSortCompactionBenchmark.java | 110 +++++++++++------- .../spark/actions/Spark3ZOrderUDF.java | 14 +-- 2 files changed, 72 insertions(+), 52 deletions(-) diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java index 4cb70960cc0b..1fff9c35186f 100644 --- a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java +++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java @@ -34,6 +34,7 @@ import org.apache.iceberg.spark.Spark3Util; import org.apache.iceberg.spark.SparkSchemaUtil; import org.apache.iceberg.spark.SparkSessionCatalog; +import org.apache.iceberg.spark.actions.Spark3SortStrategy; import org.apache.iceberg.spark.actions.SparkActions; import org.apache.iceberg.types.Types; import org.apache.spark.sql.Dataset; @@ -65,7 +66,7 @@ @Fork(1) @State(Scope.Benchmark) -@Measurement(iterations = 3) +@Measurement(iterations = 10) @BenchmarkMode(Mode.SingleShotTime) @Timeout(time = 1000, timeUnit = TimeUnit.HOURS) public class IcebergSortCompactionBenchmark { @@ -74,9 +75,8 @@ public class IcebergSortCompactionBenchmark { private static final String NAME = "sortbench"; private static final Identifier IDENT = Identifier.of(NAMESPACE, NAME); private static final int NUM_FILES = 8; - private static final long NUM_ROWS = 10000000L; - private static final long UNIQUE_VALUES = NUM_ROWS / 10; - + private static final long NUM_ROWS = 7500000L; + private static final long UNIQUE_VALUES = NUM_ROWS / 4; private final Configuration hadoopConf = initHadoopConf(); private SparkSession spark; @@ -107,6 +107,7 @@ public void cleanUpIteration() throws IOException { public void sortInt() { SparkActions.get() .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") .sort(SortOrder .builderFor(table().schema()) .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) @@ -118,43 +119,46 @@ public void sortInt() { @Threads(1) public void sortInt2() { SparkActions.get() - .rewriteDataFiles(table()) - .sort(SortOrder - .builderFor(table().schema()) - .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) - .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST) - .build()) - .execute(); + .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") + .sort(SortOrder + .builderFor(table().schema()) + .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST) + .build()) + .execute(); } @Benchmark @Threads(1) public void sortInt3() { SparkActions.get() - .rewriteDataFiles(table()) - .sort(SortOrder - .builderFor(table().schema()) - .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) - .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST) - .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST) - .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST) - .build()) - .execute(); + .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") + .sort(SortOrder + .builderFor(table().schema()) + .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST) + .build()) + .execute(); } @Benchmark @Threads(1) public void sortInt4() { SparkActions.get() - .rewriteDataFiles(table()) - .sort(SortOrder - .builderFor(table().schema()) - .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) - .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST) - .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST) - .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST) - .build()) - .execute(); + .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") + .sort(SortOrder + .builderFor(table().schema()) + .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST) + .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST) + .build()) + .execute(); } @Benchmark @@ -162,6 +166,7 @@ public void sortInt4() { public void sortString() { SparkActions.get() .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") .sort(SortOrder .builderFor(table().schema()) .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST) @@ -174,6 +179,7 @@ public void sortString() { public void sortFourColumns() { SparkActions.get() .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") .sort(SortOrder .builderFor(table().schema()) .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST) @@ -189,6 +195,7 @@ public void sortFourColumns() { public void sortSixColumns() { SparkActions.get() .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") .sort(SortOrder .builderFor(table().schema()) .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST) @@ -206,6 +213,7 @@ public void sortSixColumns() { public void zSortInt() { SparkActions.get() .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") .zOrder("intCol") .execute(); } @@ -214,27 +222,30 @@ public void zSortInt() { @Threads(1) public void zSortInt2() { SparkActions.get() - .rewriteDataFiles(table()) - .zOrder("intCol", "intCol2") - .execute(); + .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") + .zOrder("intCol", "intCol2") + .execute(); } @Benchmark @Threads(1) public void zSortInt3() { SparkActions.get() - .rewriteDataFiles(table()) - .zOrder("intCol", "intCol2", "intCol3") - .execute(); + .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") + .zOrder("intCol", "intCol2", "intCol3") + .execute(); } @Benchmark @Threads(1) public void zSortInt4() { SparkActions.get() - .rewriteDataFiles(table()) - .zOrder("intCol", "intCol2", "intCol3", "intCol4") - .execute(); + .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") + .zOrder("intCol", "intCol2", "intCol3", "intCol4") + .execute(); } @Benchmark @@ -242,6 +253,7 @@ public void zSortInt4() { public void zSortString() { SparkActions.get() .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") .zOrder("stringCol") .execute(); } @@ -251,6 +263,7 @@ public void zSortString() { public void zSortFourColumns() { SparkActions.get() .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") .zOrder("stringCol", "intCol", "dateCol", "doubleCol") .execute(); } @@ -260,6 +273,7 @@ public void zSortFourColumns() { public void zSortSixColumns() { SparkActions.get() .rewriteDataFiles(table()) + .option(Spark3SortStrategy.REWRITE_ALL, "true") .zOrder("stringCol", "intCol", "dateCol", "timestampCol", "doubleCol", "longCol") .execute(); } @@ -284,7 +298,7 @@ protected final void initTable() { SparkSessionCatalog catalog = null; try { catalog = (SparkSessionCatalog) - Spark3Util.catalogAndIdentifier(spark(), "spark_catalog").catalog(); + Spark3Util.catalogAndIdentifier(spark(), "spark_catalog").catalog(); catalog.dropTable(IDENT); catalog.createTable(IDENT, SparkSchemaUtil.convert(schema), new Transform[0], Collections.emptyMap()); } catch (Exception e) { @@ -296,17 +310,23 @@ private void appendData() { Dataset df = spark().range(0, NUM_ROWS * NUM_FILES, 1, NUM_FILES) .drop("id") .withColumn("longCol", new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply()) - .withColumn("intCol", + .withColumn( + "intCol", new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType)) - .withColumn("intCol2", + .withColumn( + "intCol2", new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType)) - .withColumn("intCol3", + .withColumn( + "intCol3", new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType)) - .withColumn("intCol4", + .withColumn( + "intCol4", new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType)) - .withColumn("floatCol", + .withColumn( + "floatCol", new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.FloatType)) - .withColumn("doubleCol", + .withColumn( + "doubleCol", new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.DoubleType)) .withColumn("dateCol", date_add(current_date(), col("intCol").mod(NUM_FILES))) .withColumn("timestampCol", expr("TO_TIMESTAMP(dateCol)")) diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java index 839780fef677..1e6eb60b2579 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java @@ -44,7 +44,7 @@ import scala.collection.Seq; class Spark3ZOrderUDF implements Serializable { - private static final int STRING_KEY_LENGTH = 16; + private static final int STRING_KEY_LENGTH = 8; private static final byte[] PRIMITIVE_EMPTY = new byte[ZOrderByteUtils.BUFFER_SIZE]; @@ -98,7 +98,7 @@ private UserDefinedFunction tinyToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, Byte.BYTES)).array(); + return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES"); this.inputCol++; @@ -113,7 +113,7 @@ private UserDefinedFunction shortToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, Short.BYTES)).array(); + return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); }, DataTypes.BinaryType).withName("SHORT_ORDERED_BYTES"); this.inputCol++; @@ -128,7 +128,7 @@ private UserDefinedFunction intToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, Integer.BYTES)).array(); + return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); }, DataTypes.BinaryType).withName("INT_ORDERED_BYTES"); this.inputCol++; @@ -143,7 +143,7 @@ private UserDefinedFunction longToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, Long.BYTES)).array(); + return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); }, DataTypes.BinaryType).withName("LONG_ORDERED_BYTES"); this.inputCol++; @@ -158,7 +158,7 @@ private UserDefinedFunction floatToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, Float.BYTES)).array(); + return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES"); this.inputCol++; @@ -173,7 +173,7 @@ private UserDefinedFunction doubleToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, Double.BYTES)).array(); + return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES"); this.inputCol++; From e96b0206b069ae912008b07e4407b7c4e9f80e33 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Fri, 18 Mar 2022 14:26:11 -0500 Subject: [PATCH 20/30] Reviewer Comments --- .../org/apache/iceberg/util/ByteBuffers.java | 7 +- core/benchmark/ZorderResult.txt | 227 ++++++++++++++++++ .../util/ZOrderByteUtilsBenchmark.java | 121 ++++++++++ .../apache/iceberg/util/ZOrderByteUtils.java | 34 ++- .../iceberg/util/TestZOrderByteUtil.java | 29 ++- jmh.gradle | 2 +- .../spark/actions/Spark3ZOrderStrategy.java | 74 ++++-- .../spark/actions/Spark3ZOrderUDF.java | 80 +++--- .../spark/actions/SparkSortStrategy.java | 10 +- 9 files changed, 514 insertions(+), 70 deletions(-) create mode 100644 core/benchmark/ZorderResult.txt create mode 100644 core/src/jmh/java/org/apache/iceberg/util/ZOrderByteUtilsBenchmark.java diff --git a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java index efc05f179f82..4a5001018da6 100644 --- a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java +++ b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java @@ -48,9 +48,10 @@ public static byte[] toByteArray(ByteBuffer buffer) { } public static ByteBuffer reuse(ByteBuffer reuse, int length) { - Preconditions.checkArgument(reuse.hasArray() && reuse.arrayOffset() == 0 && reuse.capacity() == length, - "Cannot reuse buffer: Should be an array %s, should have an offset of 0 %s, should be of size %s was %s", - reuse.hasArray(), reuse.arrayOffset(), length, reuse.capacity()); + Preconditions.checkArgument(reuse.hasArray(), "Cannot reuse a buffer not backed by an array"); + Preconditions.checkArgument(reuse.arrayOffset() == 0, "Cannot reuse a buffer whose array offset is not 0"); + Preconditions.checkArgument(reuse.capacity() == length, + "Canout use a buffer whose capacity (%s) is not equal to the requested length (%s)", length, reuse.capacity()); reuse.position(0); reuse.limit(length); return reuse; diff --git a/core/benchmark/ZorderResult.txt b/core/benchmark/ZorderResult.txt new file mode 100644 index 000000000000..559025105ce3 --- /dev/null +++ b/core/benchmark/ZorderResult.txt @@ -0,0 +1,227 @@ +# JMH version: 1.32 +# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS +# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java +# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant +# Blackhole mode: full + dont-inline hint +# Warmup: +# Measurement: 5 iterations, single-shot each +# Timeout: 1000 hr per iteration +# Threads: 1 thread +# Benchmark mode: Single shot invocation time +# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns + +# Run progress: 0.00% complete, ETA 00:00:00 +# Fork: 1 of 1 +Iteration 1: 11.086 s/op +Iteration 2: 10.337 s/op +Iteration 3: 10.606 s/op +Iteration 4: 10.897 s/op +Iteration 5: 10.607 s/op + + +Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns": + N = 5 + mean = 10.706 ±(99.9%) 1.117 s/op + + Histogram, s/op: + [10.300, 10.350) = 1 + [10.350, 10.400) = 0 + [10.400, 10.450) = 0 + [10.450, 10.500) = 0 + [10.500, 10.550) = 0 + [10.550, 10.600) = 0 + [10.600, 10.650) = 2 + [10.650, 10.700) = 0 + [10.700, 10.750) = 0 + [10.750, 10.800) = 0 + [10.800, 10.850) = 0 + [10.850, 10.900) = 1 + [10.900, 10.950) = 0 + [10.950, 11.000) = 0 + [11.000, 11.050) = 0 + [11.050, 11.100) = 1 + + Percentiles, s/op: + p(0.0000) = 10.337 s/op + p(50.0000) = 10.607 s/op + p(90.0000) = 11.086 s/op + p(95.0000) = 11.086 s/op + p(99.0000) = 11.086 s/op + p(99.9000) = 11.086 s/op + p(99.9900) = 11.086 s/op + p(99.9990) = 11.086 s/op + p(99.9999) = 11.086 s/op + p(100.0000) = 11.086 s/op + + +# JMH version: 1.32 +# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS +# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java +# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant +# Blackhole mode: full + dont-inline hint +# Warmup: +# Measurement: 5 iterations, single-shot each +# Timeout: 1000 hr per iteration +# Threads: 1 thread +# Benchmark mode: Single shot invocation time +# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput + +# Run progress: 25.00% complete, ETA 00:02:54 +# Fork: 1 of 1 +Iteration 1: 2.521 s/op +Iteration 2: 2.750 s/op +Iteration 3: 2.999 s/op +Iteration 4: 2.972 s/op +Iteration 5: 3.028 s/op + + +Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput": + N = 5 + mean = 2.854 ±(99.9%) 0.832 s/op + + Histogram, s/op: + [2.500, 2.550) = 1 + [2.550, 2.600) = 0 + [2.600, 2.650) = 0 + [2.650, 2.700) = 0 + [2.700, 2.750) = 1 + [2.750, 2.800) = 0 + [2.800, 2.850) = 0 + [2.850, 2.900) = 0 + [2.900, 2.950) = 0 + [2.950, 3.000) = 2 + [3.000, 3.050) = 1 + [3.050, 3.100) = 0 + + Percentiles, s/op: + p(0.0000) = 2.521 s/op + p(50.0000) = 2.972 s/op + p(90.0000) = 3.028 s/op + p(95.0000) = 3.028 s/op + p(99.0000) = 3.028 s/op + p(99.9000) = 3.028 s/op + p(99.9900) = 3.028 s/op + p(99.9990) = 3.028 s/op + p(99.9999) = 3.028 s/op + p(100.0000) = 3.028 s/op + + +# JMH version: 1.32 +# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS +# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java +# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant +# Blackhole mode: full + dont-inline hint +# Warmup: +# Measurement: 5 iterations, single-shot each +# Timeout: 1000 hr per iteration +# Threads: 1 thread +# Benchmark mode: Single shot invocation time +# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns + +# Run progress: 50.00% complete, ETA 00:01:15 +# Fork: 1 of 1 +Iteration 1: 7.440 s/op +Iteration 2: 7.625 s/op +Iteration 3: 8.216 s/op +Iteration 4: 8.314 s/op +Iteration 5: 8.203 s/op + + +Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns": + N = 5 + mean = 7.960 ±(99.9%) 1.532 s/op + + Histogram, s/op: + [7.400, 7.500) = 1 + [7.500, 7.600) = 0 + [7.600, 7.700) = 1 + [7.700, 7.800) = 0 + [7.800, 7.900) = 0 + [7.900, 8.000) = 0 + [8.000, 8.100) = 0 + [8.100, 8.200) = 0 + [8.200, 8.300) = 2 + + Percentiles, s/op: + p(0.0000) = 7.440 s/op + p(50.0000) = 8.203 s/op + p(90.0000) = 8.314 s/op + p(95.0000) = 8.314 s/op + p(99.0000) = 8.314 s/op + p(99.9000) = 8.314 s/op + p(99.9900) = 8.314 s/op + p(99.9990) = 8.314 s/op + p(99.9999) = 8.314 s/op + p(100.0000) = 8.314 s/op + + +# JMH version: 1.32 +# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS +# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java +# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant +# Blackhole mode: full + dont-inline hint +# Warmup: +# Measurement: 5 iterations, single-shot each +# Timeout: 1000 hr per iteration +# Threads: 1 thread +# Benchmark mode: Single shot invocation time +# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns + +# Run progress: 75.00% complete, ETA 00:00:39 +# Fork: 1 of 1 +Iteration 1: 5.327 s/op +Iteration 2: 5.212 s/op +Iteration 3: 5.963 s/op +Iteration 4: 5.758 s/op +Iteration 5: 5.827 s/op + + +Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns": + N = 5 + mean = 5.618 ±(99.9%) 1.265 s/op + + Histogram, s/op: + [5.200, 5.250) = 1 + [5.250, 5.300) = 0 + [5.300, 5.350) = 1 + [5.350, 5.400) = 0 + [5.400, 5.450) = 0 + [5.450, 5.500) = 0 + [5.500, 5.550) = 0 + [5.550, 5.600) = 0 + [5.600, 5.650) = 0 + [5.650, 5.700) = 0 + [5.700, 5.750) = 0 + [5.750, 5.800) = 1 + [5.800, 5.850) = 1 + [5.850, 5.900) = 0 + [5.900, 5.950) = 0 + + Percentiles, s/op: + p(0.0000) = 5.212 s/op + p(50.0000) = 5.758 s/op + p(90.0000) = 5.963 s/op + p(95.0000) = 5.963 s/op + p(99.0000) = 5.963 s/op + p(99.9000) = 5.963 s/op + p(99.9900) = 5.963 s/op + p(99.9990) = 5.963 s/op + p(99.9999) = 5.963 s/op + p(100.0000) = 5.963 s/op + + +# Run complete. Total time: 00:02:29 + +REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on +why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial +experiments, perform baseline and negative tests that provide experimental control, make sure +the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts. +Do not assume the numbers tell you what you want them to tell. + +Benchmark Mode Cnt Score Error Units +ZOrderByteUtilsBenchmark.interleaveValuesFourColumns ss 5 10.706 ± 1.117 s/op +ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput ss 5 2.854 ± 0.832 s/op +ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns ss 5 7.960 ± 1.532 s/op +ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns ss 5 5.618 ± 1.265 s/op + +Benchmark result is saved to /Users/russellspitzer/repos/ipr/iceberg-master/core/build/results/jmh/results.txt diff --git a/core/src/jmh/java/org/apache/iceberg/util/ZOrderByteUtilsBenchmark.java b/core/src/jmh/java/org/apache/iceberg/util/ZOrderByteUtilsBenchmark.java new file mode 100644 index 000000000000..77f66f12cff3 --- /dev/null +++ b/core/src/jmh/java/org/apache/iceberg/util/ZOrderByteUtilsBenchmark.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +package org.apache.iceberg.util; + +import java.nio.ByteBuffer; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Timeout; +import org.openjdk.jmh.infra.Blackhole; + +@Fork(1) +@State(Scope.Benchmark) +@Measurement(iterations = 5) +@BenchmarkMode(Mode.SingleShotTime) +@Timeout(time = 1000, timeUnit = TimeUnit.HOURS) +public class ZOrderByteUtilsBenchmark { + + private static final int NUM_ENTRIES = 10000000; + + private byte[][][] fourColumnInput; + private byte[][][] threeColumnInput; + private byte[][][] twoColumnInput; + + @Setup + public void setupBench() { + Random rand = new Random(42); + fourColumnInput = new byte[NUM_ENTRIES][][]; + threeColumnInput = new byte[NUM_ENTRIES][][]; + twoColumnInput = new byte[NUM_ENTRIES][][]; + for (int i = 0; i < NUM_ENTRIES; i++) { + fourColumnInput[i] = new byte[4][]; + threeColumnInput[i] = new byte[3][]; + twoColumnInput[i] = new byte[2][]; + for (int j = 0; j < 4; j++) { + byte[] value = ByteBuffer.allocate(Long.BYTES).putLong(rand.nextLong()).array(); + if (j < 2) { + twoColumnInput[i][j] = value; + } + if (j < 3) { + threeColumnInput[i][j] = value; + } + fourColumnInput[i][j] = value; + } + } + } + + @Benchmark + @Threads(1) + public void interleaveValuesFourColumns(Blackhole blackhole) { + int outputSize = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE * 4; + ByteBuffer outputBuffer = ByteBuffer.allocate(outputSize); + + for (int i = 0; i < fourColumnInput.length; i++) { + byte[] interleavedBytes = ZOrderByteUtils.interleaveBits(fourColumnInput[i], outputSize, outputBuffer); + blackhole.consume(interleavedBytes); + } + } + + @Benchmark + @Threads(1) + public void interleaveValuesThreeColumns(Blackhole blackhole) { + int outputSize = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE * 3; + ByteBuffer outputBuffer = ByteBuffer.allocate(outputSize); + + for (int i = 0; i < fourColumnInput.length; i++) { + byte[] interleavedBytes = ZOrderByteUtils.interleaveBits(threeColumnInput[i], outputSize, outputBuffer); + blackhole.consume(interleavedBytes); + } + } + + @Benchmark + @Threads(1) + public void interleaveValuesTwoColumns(Blackhole blackhole) { + int outputSize = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE * 2; + ByteBuffer outputBuffer = ByteBuffer.allocate(outputSize); + + for (int i = 0; i < fourColumnInput.length; i++) { + byte[] interleavedBytes = ZOrderByteUtils.interleaveBits(twoColumnInput[i], outputSize, outputBuffer); + blackhole.consume(interleavedBytes); + } + } + + @Benchmark + @Threads(1) + public void interleaveValuesFourColumns8ByteOutput(Blackhole blackhole) { + int outputSize = 8; + ByteBuffer outputBuffer = ByteBuffer.allocate(outputSize); + + for (int i = 0; i < fourColumnInput.length; i++) { + byte[] interleavedBytes = ZOrderByteUtils.interleaveBits(fourColumnInput[i], outputSize, outputBuffer); + blackhole.consume(interleavedBytes); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index 44141e777295..9b5e571862c9 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -43,22 +43,23 @@ */ public class ZOrderByteUtils { - public static final int BUFFER_SIZE = 8; + public static final int PRIMITIVE_BUFFER_SIZE = 8; private ZOrderByteUtils() { } static ByteBuffer allocatePrimitiveBuffer() { - return ByteBuffer.allocate(BUFFER_SIZE); + return ByteBuffer.allocate(PRIMITIVE_BUFFER_SIZE); } + /** * Signed ints do not have their bytes in magnitude order because of the sign bit. * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially * shifts the 0 value so that we don't break our ordering when we cross the new 0 value. */ public static ByteBuffer intToOrderedBytes(int val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); + ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE); bytes.putLong(((long) val) ^ 0x8000000000000000L); return bytes; } @@ -67,7 +68,7 @@ public static ByteBuffer intToOrderedBytes(int val, ByteBuffer reuse) { * Signed longs are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ public static ByteBuffer longToOrderedBytes(long val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); + ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE); bytes.putLong(val ^ 0x8000000000000000L); return bytes; } @@ -76,7 +77,7 @@ public static ByteBuffer longToOrderedBytes(long val, ByteBuffer reuse) { * Signed shorts are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ public static ByteBuffer shortToOrderedBytes(short val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); + ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE); bytes.putLong(((long) val) ^ 0x8000000000000000L); return bytes; } @@ -85,7 +86,7 @@ public static ByteBuffer shortToOrderedBytes(short val, ByteBuffer reuse) { * Signed tiny ints are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)} */ public static ByteBuffer tinyintToOrderedBytes(byte val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); + ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE); bytes.putLong(((long) val) ^ 0x8000000000000000L); return bytes; } @@ -99,7 +100,7 @@ public static ByteBuffer tinyintToOrderedBytes(byte val, ByteBuffer reuse) { * comparable bytes */ public static ByteBuffer floatToOrderedBytes(float val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); + ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE); long lval = Double.doubleToLongBits(val); lval ^= ((lval >> (Integer.SIZE - 1)) | Long.MIN_VALUE); bytes.putLong(lval); @@ -110,7 +111,7 @@ public static ByteBuffer floatToOrderedBytes(float val, ByteBuffer reuse) { * Doubles are treated the same as floats in {@link #floatToOrderedBytes(float, ByteBuffer)} */ public static ByteBuffer doubleToOrderedBytes(double val, ByteBuffer reuse) { - ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE); + ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE); long lval = Double.doubleToLongBits(val); lval ^= ((lval >> (Integer.SIZE - 1)) | Long.MIN_VALUE); bytes.putLong(lval); @@ -137,14 +138,21 @@ public static ByteBuffer stringToOrderedBytes(String val, int length, ByteBuffer } /** - * For Testing interleave all available bytes + * Return a bytebuffer with the given bytes truncated to length, or filled with 0's to length depending on whether + * the given bytes are larger or smaller than the given length. */ - static byte[] interleaveBits(byte[][] columnsBinary) { - return interleaveBits(columnsBinary, - Arrays.stream(columnsBinary).mapToInt(column -> column.length).sum()); + public static ByteBuffer byteTruncateOrFill(byte[] val, int length, ByteBuffer reuse) { + ByteBuffer bytes = ByteBuffers.reuse(reuse, length); + if (val.length < length) { + bytes.put(val, 0, val.length); + Arrays.fill(bytes.array(), val.length, length, (byte) 0x00); + } else { + bytes.put(val, 0, length); + } + return bytes; } - public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) { + static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) { return interleaveBits(columnsBinary, interleavedSize, ByteBuffer.allocate(interleavedSize)); } diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java index 858200c370b4..52ae803a5e18 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java @@ -96,7 +96,9 @@ public void testInterleaveRandomExamples() { testBytes[byteIndex] = generateRandomBytes(); testStrings[byteIndex] = bytesToString(testBytes[byteIndex]); } - byte[] byteResult = ZOrderByteUtils.interleaveBits(testBytes); + + int zOrderSize = Arrays.stream(testBytes).mapToInt(column -> column.length).sum(); + byte[] byteResult = ZOrderByteUtils.interleaveBits(testBytes, zOrderSize); String byteResultAsString = bytesToString(byteResult); String stringResult = interleaveStrings(testStrings); @@ -111,7 +113,7 @@ public void testInterleaveEmptyBits() { byte[] expected = new byte[40]; Assert.assertArrayEquals("Should combine empty arrays", - expected, ZOrderByteUtils.interleaveBits(test)); + expected, ZOrderByteUtils.interleaveBits(test, 40)); } @Test @@ -124,7 +126,7 @@ public void testInterleaveFullBits() { byte[] expected = new byte[]{IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII}; Assert.assertArrayEquals("Should combine full arrays", - expected, ZOrderByteUtils.interleaveBits(test)); + expected, ZOrderByteUtils.interleaveBits(test, 6)); } @Test @@ -140,7 +142,7 @@ public void testInterleaveMixedBits() { OIOIOIOI, OIOIOIOI, OOOOIIII}; Assert.assertArrayEquals("Should combine mixed byte arrays", - expected, ZOrderByteUtils.interleaveBits(test)); + expected, ZOrderByteUtils.interleaveBits(test, 9)); } @Test @@ -276,4 +278,23 @@ public void testStringOrdering() { stringCompare, byteCompare); } } + + @Test + public void testByteTruncateOrFill() { + ByteBuffer aBuffer = ByteBuffer.allocate(128); + ByteBuffer bBuffer = ByteBuffer.allocate(128); + for (int i = 0; i < NUM_TESTS; i++) { + byte[] aBytesRaw = (byte[]) RandomUtil.generatePrimitive(Types.BinaryType.get(), random); + byte[] bBytesRaw = (byte[]) RandomUtil.generatePrimitive(Types.BinaryType.get(), random); + int stringCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytesRaw, bBytesRaw)); + byte[] aBytes = ZOrderByteUtils.byteTruncateOrFill(aBytesRaw, 128, aBuffer).array(); + byte[] bBytes = ZOrderByteUtils.byteTruncateOrFill(bBytesRaw, 128, bBuffer).array(); + int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes)); + + Assert.assertEquals(String.format( + "Ordering of strings should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ", + aBytesRaw, bBytesRaw, stringCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare), + stringCompare, byteCompare); + } + } } diff --git a/jmh.gradle b/jmh.gradle index d458ae2c5903..538fd96af406 100644 --- a/jmh.gradle +++ b/jmh.gradle @@ -23,7 +23,7 @@ if (jdkVersion != '8' && jdkVersion != '11') { def sparkVersions = (System.getProperty("sparkVersions") != null ? System.getProperty("sparkVersions") : System.getProperty("defaultSparkVersions")).split(",") def scalaVersion = System.getProperty("scalaVersion") != null ? System.getProperty("scalaVersion") : System.getProperty("defaultScalaVersion") -def jmhProjects = [] +def jmhProjects = [project(":iceberg-core")] if (jdkVersion == '8' && sparkVersions.contains("2.4")) { jmhProjects.add(project(":iceberg-spark:iceberg-spark-2.4")) diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java index 71a0274d5a08..238e6aaa7e4a 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java @@ -19,13 +19,14 @@ package org.apache.iceberg.spark.actions; -import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.hadoop.shaded.com.google.common.collect.ImmutableSet; import org.apache.iceberg.DataFile; import org.apache.iceberg.FileScanTask; import org.apache.iceberg.NullOrder; @@ -33,15 +34,16 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.SortDirection; import org.apache.iceberg.Table; +import org.apache.iceberg.actions.RewriteStrategy; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.spark.FileRewriteCoordinator; -import org.apache.iceberg.spark.FileScanTaskSetManager; import org.apache.iceberg.spark.SparkDistributionAndOrderingUtil; import org.apache.iceberg.spark.SparkReadOptions; import org.apache.iceberg.spark.SparkWriteOptions; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.NestedField; +import org.apache.iceberg.util.PropertyUtil; import org.apache.iceberg.util.SortOrderUtil; +import org.apache.iceberg.util.ZOrderByteUtils; import org.apache.spark.sql.Column; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -62,15 +64,59 @@ public class Spark3ZOrderStrategy extends Spark3SortStrategy { .sortBy(Z_COLUMN, SortDirection.ASC, NullOrder.NULLS_LAST) .build(); + /** + * Controls the amount of bytes interleaved in the ZOrder Algorithm. Default is all bytes being interleaved. + */ + private static final String MAX_OUTPUT_SIZE_KEY = "max-output-size"; + private static final int DEFAULT_MAX_OUTPUT_SIZE = Integer.MAX_VALUE; + + /** + * Controls the number of bytes considered from an input column of a type with variable length (String, Binary). + * Default is to use the same size as primitives {@link ZOrderByteUtils#PRIMITIVE_BUFFER_SIZE} + */ + private static final String VAR_LENGTH_CONTRIBUTION_KEY = "var-length-contribution"; + private static final int DEFAULT_VAR_LENGTH_CONTRIBUTION = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE; + private final List zOrderColNames; - private transient FileScanTaskSetManager manager = FileScanTaskSetManager.get(); - private transient FileRewriteCoordinator rewriteCoordinator = FileRewriteCoordinator.get(); + private final Spark3ZOrderUDF zOrderUDF; - private final Spark3ZOrderUDF orderHelper; + private int maxOutputSize; + private int varLengthContribution; + + @Override + public Set validOptions() { + return ImmutableSet.builder() + .addAll(super.validOptions()) + .add(VAR_LENGTH_CONTRIBUTION_KEY) + .add(MAX_OUTPUT_SIZE_KEY) + .build(); + } + + @Override + public RewriteStrategy options(Map options) { + super.options(options); + + varLengthContribution = PropertyUtil.propertyAsInt(options, VAR_LENGTH_CONTRIBUTION_KEY, + DEFAULT_VAR_LENGTH_CONTRIBUTION); + Preconditions.checkArgument(varLengthContribution > 0, + "Cannot use less than 1 byte for variable length types with zOrder, %s was set to %s", + VAR_LENGTH_CONTRIBUTION_KEY, varLengthContribution); + + + maxOutputSize = PropertyUtil.propertyAsInt(options, MAX_OUTPUT_SIZE_KEY, DEFAULT_MAX_OUTPUT_SIZE); + Preconditions.checkArgument(maxOutputSize > 0, + "Cannot have the interleaved ZOrder value use less than 1 byte, %s was set to %s", + MAX_OUTPUT_SIZE_KEY, maxOutputSize); + + return this; + } public Spark3ZOrderStrategy(Table table, SparkSession spark, List zOrderColNames) { super(table, spark); + Preconditions.checkArgument(zOrderColNames != null && !zOrderColNames.isEmpty(), + "Cannot ZOrder when no columns are specified"); + Stream identityPartitionColumns = table.spec().fields().stream() .filter(f -> f.transform().isIdentity()) .map(PartitionField::name); @@ -80,10 +126,10 @@ public Spark3ZOrderStrategy(Table table, SparkSession spark, List zOrder Preconditions.checkArgument( partZOrderCols.isEmpty(), "Cannot ZOrder on an Identity partition column as these values are constant within a partition, " + - "ZOrdering requested on %s", + "ZOrdering requested on Identity columns: %s", partZOrderCols); - this.orderHelper = new Spark3ZOrderUDF(zOrderColNames.size()); + this.zOrderUDF = new Spark3ZOrderUDF(zOrderColNames.size(), varLengthContribution, maxOutputSize); this.zOrderColNames = zOrderColNames; } @@ -114,7 +160,7 @@ public Set rewriteFiles(List filesToRewrite) { Distribution distribution = Distributions.ordered(ordering); try { - manager.stageTasks(table(), groupID, filesToRewrite); + manager().stageTasks(table(), groupID, filesToRewrite); // Disable Adaptive Query Execution as this may change the output partitioning of our write SparkSession cloneSession = spark().cloneSession(); @@ -137,10 +183,10 @@ public Set rewriteFiles(List filesToRewrite) { .collect(Collectors.toList()); Column zvalueArray = functions.array(zOrderColumns.stream().map(colStruct -> - orderHelper.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType()) + zOrderUDF.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType()) ).toArray(Column[]::new)); - Dataset zvalueDF = scanDF.withColumn(Z_COLUMN, orderHelper.interleaveBytes(zvalueArray)); + Dataset zvalueDF = scanDF.withColumn(Z_COLUMN, zOrderUDF.interleaveBytes(zvalueArray)); SQLConf sqlConf = cloneSession.sessionState().conf(); LogicalPlan sortPlan = sortPlan(distribution, ordering, zvalueDF.logicalPlan(), sqlConf); @@ -155,10 +201,10 @@ public Set rewriteFiles(List filesToRewrite) { .mode("append") .save(table().name()); - return rewriteCoordinator.fetchNewDataFiles(table(), groupID); + return rewriteCoordinator().fetchNewDataFiles(table(), groupID); } finally { - manager.removeTasks(table(), groupID); - rewriteCoordinator.clearRewrite(table(), groupID); + manager().removeTasks(table(), groupID); + rewriteCoordinator().clearRewrite(table(), groupID); } } diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java index 1e6eb60b2579..8a23e01451ee 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java @@ -20,6 +20,7 @@ package org.apache.iceberg.spark.actions; import java.io.IOException; +import java.io.ObjectInputStream; import java.io.Serializable; import java.nio.ByteBuffer; import java.nio.charset.CharsetEncoder; @@ -44,10 +45,12 @@ import scala.collection.Seq; class Spark3ZOrderUDF implements Serializable { - private static final int STRING_KEY_LENGTH = 8; - - private static final byte[] PRIMITIVE_EMPTY = new byte[ZOrderByteUtils.BUFFER_SIZE]; + private static final byte[] PRIMITIVE_EMPTY = new byte[ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE]; + /** + * Every Spark task runs iteratively on a rows in a single thread so ThreadLocal should protect from + * concurrent access to any of these structures. + */ private transient ThreadLocal outputBuffer; private transient ThreadLocal inputHolder; private transient ThreadLocal[] inputBuffers; @@ -56,28 +59,24 @@ class Spark3ZOrderUDF implements Serializable { private final int numCols; private int inputCol = 0; - private int totalBytes = 0; + private int totalOutputBytes = 0; + private final int varTypeSize; + private final int maxOutputSize; - Spark3ZOrderUDF(int numCols) { + Spark3ZOrderUDF(int numCols, int varTypeSize, int maxOutputSize) { this.numCols = numCols; + this.varTypeSize = varTypeSize; + this.maxOutputSize = maxOutputSize; } - private void readObject(java.io.ObjectInputStream in) - throws IOException, ClassNotFoundException { + private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); inputBuffers = new ThreadLocal[numCols]; inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]); + outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(totalOutputBytes)); encoder = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8.newEncoder()); } - private ByteBuffer outputBuffer(int size) { - if (outputBuffer == null) { - // May over allocate on concurrent calls - outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size)); - } - return outputBuffer.get(); - } - private ByteBuffer inputBuffer(int position, int size) { if (inputBuffers[position] == null) { // May over allocate on concurrent calls @@ -89,7 +88,7 @@ private ByteBuffer inputBuffer(int position, int size) { byte[] interleaveBits(Seq scalaBinary) { byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary) .toArray(inputHolder.get()); - return ZOrderByteUtils.interleaveBits(columnsBinary, totalBytes, outputBuffer(totalBytes)); + return ZOrderByteUtils.interleaveBits(columnsBinary, totalOutputBytes, outputBuffer.get()); } private UserDefinedFunction tinyToOrderedBytesUDF() { @@ -98,11 +97,11 @@ private UserDefinedFunction tinyToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); + return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES"); this.inputCol++; - this.totalBytes += Byte.BYTES; + this.totalOutputBytes += Byte.BYTES; return udf; } @@ -113,11 +112,11 @@ private UserDefinedFunction shortToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); + return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); }, DataTypes.BinaryType).withName("SHORT_ORDERED_BYTES"); this.inputCol++; - this.totalBytes += Short.BYTES; + this.totalOutputBytes += Short.BYTES; return udf; } @@ -128,11 +127,11 @@ private UserDefinedFunction intToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); + return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); }, DataTypes.BinaryType).withName("INT_ORDERED_BYTES"); this.inputCol++; - this.totalBytes += Integer.BYTES; + this.totalOutputBytes += Integer.BYTES; return udf; } @@ -143,11 +142,11 @@ private UserDefinedFunction longToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); + return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); }, DataTypes.BinaryType).withName("LONG_ORDERED_BYTES"); this.inputCol++; - this.totalBytes += Long.BYTES; + this.totalOutputBytes += Long.BYTES; return udf; } @@ -158,11 +157,11 @@ private UserDefinedFunction floatToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); + return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES"); this.inputCol++; - this.totalBytes += Float.BYTES; + this.totalOutputBytes += Float.BYTES; return udf; } @@ -173,11 +172,11 @@ private UserDefinedFunction doubleToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array(); - }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES"); + return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); + }, DataTypes.BinaryType).withName("DOUBLE_ORDERED_BYTES"); this.inputCol++; - this.totalBytes += Double.BYTES; + this.totalOutputBytes += Double.BYTES; return udf; } @@ -187,13 +186,26 @@ private UserDefinedFunction stringToOrderedBytesUDF() { UserDefinedFunction udf = functions.udf((String value) -> ZOrderByteUtils.stringToOrderedBytes( value, - STRING_KEY_LENGTH, - inputBuffer(position, STRING_KEY_LENGTH), + varTypeSize, + inputBuffer(position, varTypeSize), encoder.get()).array(), DataTypes.BinaryType) .withName("STRING-LEXICAL-BYTES"); this.inputCol++; - this.totalBytes += STRING_KEY_LENGTH; + this.totalOutputBytes += varTypeSize; + + return udf; + } + + private UserDefinedFunction bytesTruncateUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((byte[] value) -> + ZOrderByteUtils.byteTruncateOrFill(value, varTypeSize, inputBuffer(position, varTypeSize)).array(), + DataTypes.BinaryType) + .withName("BYTE-TRUNCATE"); + + this.inputCol++; + this.totalOutputBytes += varTypeSize; return udf; } @@ -223,9 +235,9 @@ Column sortedLexicographically(Column column, DataType type) { } else if (type instanceof StringType) { return stringToOrderedBytesUDF().apply(column); } else if (type instanceof BinaryType) { - return stringToOrderedBytesUDF().apply(column); + return bytesTruncateUDF().apply(column); } else if (type instanceof BooleanType) { - return column.cast(DataTypes.BinaryType); + return bytesTruncateUDF().apply(column); } else if (type instanceof TimestampType) { return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); } else if (type instanceof DateType) { diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java index d4823560bf17..6c8f8c027dba 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java @@ -157,6 +157,14 @@ protected LogicalPlan sortPlan(Distribution distribution, SortOrder[] ordering, } protected double sizeEstimateMultiple() { - return this.sizeEstimateMultiple; + return sizeEstimateMultiple; + } + + protected FileScanTaskSetManager manager() { + return manager; + } + + protected FileRewriteCoordinator rewriteCoordinator() { + return rewriteCoordinator; } } From 14637da30f6e7162e77db67c18eece2159e9a7ff Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Fri, 18 Mar 2022 17:00:06 -0500 Subject: [PATCH 21/30] Unstage Results --- core/benchmark/ZorderResult.txt | 227 -------------------------------- 1 file changed, 227 deletions(-) delete mode 100644 core/benchmark/ZorderResult.txt diff --git a/core/benchmark/ZorderResult.txt b/core/benchmark/ZorderResult.txt deleted file mode 100644 index 559025105ce3..000000000000 --- a/core/benchmark/ZorderResult.txt +++ /dev/null @@ -1,227 +0,0 @@ -# JMH version: 1.32 -# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS -# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java -# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant -# Blackhole mode: full + dont-inline hint -# Warmup: -# Measurement: 5 iterations, single-shot each -# Timeout: 1000 hr per iteration -# Threads: 1 thread -# Benchmark mode: Single shot invocation time -# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns - -# Run progress: 0.00% complete, ETA 00:00:00 -# Fork: 1 of 1 -Iteration 1: 11.086 s/op -Iteration 2: 10.337 s/op -Iteration 3: 10.606 s/op -Iteration 4: 10.897 s/op -Iteration 5: 10.607 s/op - - -Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns": - N = 5 - mean = 10.706 ±(99.9%) 1.117 s/op - - Histogram, s/op: - [10.300, 10.350) = 1 - [10.350, 10.400) = 0 - [10.400, 10.450) = 0 - [10.450, 10.500) = 0 - [10.500, 10.550) = 0 - [10.550, 10.600) = 0 - [10.600, 10.650) = 2 - [10.650, 10.700) = 0 - [10.700, 10.750) = 0 - [10.750, 10.800) = 0 - [10.800, 10.850) = 0 - [10.850, 10.900) = 1 - [10.900, 10.950) = 0 - [10.950, 11.000) = 0 - [11.000, 11.050) = 0 - [11.050, 11.100) = 1 - - Percentiles, s/op: - p(0.0000) = 10.337 s/op - p(50.0000) = 10.607 s/op - p(90.0000) = 11.086 s/op - p(95.0000) = 11.086 s/op - p(99.0000) = 11.086 s/op - p(99.9000) = 11.086 s/op - p(99.9900) = 11.086 s/op - p(99.9990) = 11.086 s/op - p(99.9999) = 11.086 s/op - p(100.0000) = 11.086 s/op - - -# JMH version: 1.32 -# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS -# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java -# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant -# Blackhole mode: full + dont-inline hint -# Warmup: -# Measurement: 5 iterations, single-shot each -# Timeout: 1000 hr per iteration -# Threads: 1 thread -# Benchmark mode: Single shot invocation time -# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput - -# Run progress: 25.00% complete, ETA 00:02:54 -# Fork: 1 of 1 -Iteration 1: 2.521 s/op -Iteration 2: 2.750 s/op -Iteration 3: 2.999 s/op -Iteration 4: 2.972 s/op -Iteration 5: 3.028 s/op - - -Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput": - N = 5 - mean = 2.854 ±(99.9%) 0.832 s/op - - Histogram, s/op: - [2.500, 2.550) = 1 - [2.550, 2.600) = 0 - [2.600, 2.650) = 0 - [2.650, 2.700) = 0 - [2.700, 2.750) = 1 - [2.750, 2.800) = 0 - [2.800, 2.850) = 0 - [2.850, 2.900) = 0 - [2.900, 2.950) = 0 - [2.950, 3.000) = 2 - [3.000, 3.050) = 1 - [3.050, 3.100) = 0 - - Percentiles, s/op: - p(0.0000) = 2.521 s/op - p(50.0000) = 2.972 s/op - p(90.0000) = 3.028 s/op - p(95.0000) = 3.028 s/op - p(99.0000) = 3.028 s/op - p(99.9000) = 3.028 s/op - p(99.9900) = 3.028 s/op - p(99.9990) = 3.028 s/op - p(99.9999) = 3.028 s/op - p(100.0000) = 3.028 s/op - - -# JMH version: 1.32 -# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS -# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java -# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant -# Blackhole mode: full + dont-inline hint -# Warmup: -# Measurement: 5 iterations, single-shot each -# Timeout: 1000 hr per iteration -# Threads: 1 thread -# Benchmark mode: Single shot invocation time -# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns - -# Run progress: 50.00% complete, ETA 00:01:15 -# Fork: 1 of 1 -Iteration 1: 7.440 s/op -Iteration 2: 7.625 s/op -Iteration 3: 8.216 s/op -Iteration 4: 8.314 s/op -Iteration 5: 8.203 s/op - - -Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns": - N = 5 - mean = 7.960 ±(99.9%) 1.532 s/op - - Histogram, s/op: - [7.400, 7.500) = 1 - [7.500, 7.600) = 0 - [7.600, 7.700) = 1 - [7.700, 7.800) = 0 - [7.800, 7.900) = 0 - [7.900, 8.000) = 0 - [8.000, 8.100) = 0 - [8.100, 8.200) = 0 - [8.200, 8.300) = 2 - - Percentiles, s/op: - p(0.0000) = 7.440 s/op - p(50.0000) = 8.203 s/op - p(90.0000) = 8.314 s/op - p(95.0000) = 8.314 s/op - p(99.0000) = 8.314 s/op - p(99.9000) = 8.314 s/op - p(99.9900) = 8.314 s/op - p(99.9990) = 8.314 s/op - p(99.9999) = 8.314 s/op - p(100.0000) = 8.314 s/op - - -# JMH version: 1.32 -# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS -# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java -# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant -# Blackhole mode: full + dont-inline hint -# Warmup: -# Measurement: 5 iterations, single-shot each -# Timeout: 1000 hr per iteration -# Threads: 1 thread -# Benchmark mode: Single shot invocation time -# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns - -# Run progress: 75.00% complete, ETA 00:00:39 -# Fork: 1 of 1 -Iteration 1: 5.327 s/op -Iteration 2: 5.212 s/op -Iteration 3: 5.963 s/op -Iteration 4: 5.758 s/op -Iteration 5: 5.827 s/op - - -Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns": - N = 5 - mean = 5.618 ±(99.9%) 1.265 s/op - - Histogram, s/op: - [5.200, 5.250) = 1 - [5.250, 5.300) = 0 - [5.300, 5.350) = 1 - [5.350, 5.400) = 0 - [5.400, 5.450) = 0 - [5.450, 5.500) = 0 - [5.500, 5.550) = 0 - [5.550, 5.600) = 0 - [5.600, 5.650) = 0 - [5.650, 5.700) = 0 - [5.700, 5.750) = 0 - [5.750, 5.800) = 1 - [5.800, 5.850) = 1 - [5.850, 5.900) = 0 - [5.900, 5.950) = 0 - - Percentiles, s/op: - p(0.0000) = 5.212 s/op - p(50.0000) = 5.758 s/op - p(90.0000) = 5.963 s/op - p(95.0000) = 5.963 s/op - p(99.0000) = 5.963 s/op - p(99.9000) = 5.963 s/op - p(99.9900) = 5.963 s/op - p(99.9990) = 5.963 s/op - p(99.9999) = 5.963 s/op - p(100.0000) = 5.963 s/op - - -# Run complete. Total time: 00:02:29 - -REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on -why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial -experiments, perform baseline and negative tests that provide experimental control, make sure -the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts. -Do not assume the numbers tell you what you want them to tell. - -Benchmark Mode Cnt Score Error Units -ZOrderByteUtilsBenchmark.interleaveValuesFourColumns ss 5 10.706 ± 1.117 s/op -ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput ss 5 2.854 ± 0.832 s/op -ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns ss 5 7.960 ± 1.532 s/op -ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns ss 5 5.618 ± 1.265 s/op - -Benchmark result is saved to /Users/russellspitzer/repos/ipr/iceberg-master/core/build/results/jmh/results.txt From 2e68428b9ac0729d37d777d9b738d2073dcadfcd Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Fri, 18 Mar 2022 17:22:11 -0500 Subject: [PATCH 22/30] Checkstyle --- .../spark/action/RandomGeneratingUDF.java | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java index 5cb6a350c7c7..cfbd9d4fb3f6 100644 --- a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java +++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.spark.action; From 859c5585ce00314cbf8229711a1d978b1d393b77 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Fri, 18 Mar 2022 17:29:14 -0500 Subject: [PATCH 23/30] More Checkstyle --- .../spark/actions/Spark3ZOrderStrategy.java | 2 +- .../iceberg/spark/actions/Spark3ZOrderUDF.java | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java index 238e6aaa7e4a..6854d64a3ddc 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java @@ -26,7 +26,6 @@ import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.Stream; -import org.apache.hadoop.shaded.com.google.common.collect.ImmutableSet; import org.apache.iceberg.DataFile; import org.apache.iceberg.FileScanTask; import org.apache.iceberg.NullOrder; @@ -36,6 +35,7 @@ import org.apache.iceberg.Table; import org.apache.iceberg.actions.RewriteStrategy; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.spark.SparkDistributionAndOrderingUtil; import org.apache.iceberg.spark.SparkReadOptions; import org.apache.iceberg.spark.SparkWriteOptions; diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java index 8a23e01451ee..da206efb50db 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java @@ -97,7 +97,8 @@ private UserDefinedFunction tinyToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); + return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)) + .array(); }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES"); this.inputCol++; @@ -112,7 +113,8 @@ private UserDefinedFunction shortToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); + return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)) + .array(); }, DataTypes.BinaryType).withName("SHORT_ORDERED_BYTES"); this.inputCol++; @@ -127,7 +129,8 @@ private UserDefinedFunction intToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); + return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)) + .array(); }, DataTypes.BinaryType).withName("INT_ORDERED_BYTES"); this.inputCol++; @@ -142,7 +145,8 @@ private UserDefinedFunction longToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); + return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)) + .array(); }, DataTypes.BinaryType).withName("LONG_ORDERED_BYTES"); this.inputCol++; @@ -157,7 +161,8 @@ private UserDefinedFunction floatToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); + return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)) + .array(); }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES"); this.inputCol++; @@ -172,7 +177,8 @@ private UserDefinedFunction doubleToOrderedBytesUDF() { if (value == null) { return PRIMITIVE_EMPTY; } - return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array(); + return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)) + .array(); }, DataTypes.BinaryType).withName("DOUBLE_ORDERED_BYTES"); this.inputCol++; From 46b1a16431b13b14869634af22dfb9fe912b5dcb Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Mon, 18 Apr 2022 13:56:14 -0500 Subject: [PATCH 24/30] Rebase and Review Feedback --- .../IcebergSortCompactionBenchmark.java | 32 ++++++++--------- .../BaseRewriteDataFilesSparkAction.java | 9 +++-- ...Strategy.java => SparkZOrderStrategy.java} | 26 ++++++++------ ...ark3ZOrderUDF.java => SparkZOrderUDF.java} | 36 ++++++++++++++----- .../spark/data/SparkParquetWriters.java | 24 +++++++++++++ .../actions/TestRewriteDataFilesAction.java | 18 +++++++--- 6 files changed, 100 insertions(+), 45 deletions(-) rename spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/{Spark3ZOrderStrategy.java => SparkZOrderStrategy.java} (89%) rename spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/{Spark3ZOrderUDF.java => SparkZOrderUDF.java} (89%) diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java index 1fff9c35186f..8c205037f56e 100644 --- a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java +++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java @@ -30,11 +30,11 @@ import org.apache.iceberg.SortDirection; import org.apache.iceberg.SortOrder; import org.apache.iceberg.Table; +import org.apache.iceberg.actions.BinPackStrategy; import org.apache.iceberg.relocated.com.google.common.io.Files; import org.apache.iceberg.spark.Spark3Util; import org.apache.iceberg.spark.SparkSchemaUtil; import org.apache.iceberg.spark.SparkSessionCatalog; -import org.apache.iceberg.spark.actions.Spark3SortStrategy; import org.apache.iceberg.spark.actions.SparkActions; import org.apache.iceberg.types.Types; import org.apache.spark.sql.Dataset; @@ -107,7 +107,7 @@ public void cleanUpIteration() throws IOException { public void sortInt() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .sort(SortOrder .builderFor(table().schema()) .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) @@ -120,7 +120,7 @@ public void sortInt() { public void sortInt2() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .sort(SortOrder .builderFor(table().schema()) .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) @@ -134,7 +134,7 @@ public void sortInt2() { public void sortInt3() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .sort(SortOrder .builderFor(table().schema()) .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) @@ -150,7 +150,7 @@ public void sortInt3() { public void sortInt4() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .sort(SortOrder .builderFor(table().schema()) .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST) @@ -166,7 +166,7 @@ public void sortInt4() { public void sortString() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .sort(SortOrder .builderFor(table().schema()) .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST) @@ -179,7 +179,7 @@ public void sortString() { public void sortFourColumns() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .sort(SortOrder .builderFor(table().schema()) .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST) @@ -195,7 +195,7 @@ public void sortFourColumns() { public void sortSixColumns() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .sort(SortOrder .builderFor(table().schema()) .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST) @@ -213,7 +213,7 @@ public void sortSixColumns() { public void zSortInt() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .zOrder("intCol") .execute(); } @@ -223,7 +223,7 @@ public void zSortInt() { public void zSortInt2() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .zOrder("intCol", "intCol2") .execute(); } @@ -233,7 +233,7 @@ public void zSortInt2() { public void zSortInt3() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .zOrder("intCol", "intCol2", "intCol3") .execute(); } @@ -243,7 +243,7 @@ public void zSortInt3() { public void zSortInt4() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .zOrder("intCol", "intCol2", "intCol3", "intCol4") .execute(); } @@ -253,7 +253,7 @@ public void zSortInt4() { public void zSortString() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .zOrder("stringCol") .execute(); } @@ -263,7 +263,7 @@ public void zSortString() { public void zSortFourColumns() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .zOrder("stringCol", "intCol", "dateCol", "doubleCol") .execute(); } @@ -273,7 +273,7 @@ public void zSortFourColumns() { public void zSortSixColumns() { SparkActions.get() .rewriteDataFiles(table()) - .option(Spark3SortStrategy.REWRITE_ALL, "true") + .option(BinPackStrategy.REWRITE_ALL, "true") .zOrder("stringCol", "intCol", "dateCol", "timestampCol", "doubleCol", "longCol") .execute(); } @@ -295,7 +295,7 @@ protected final void initTable() { optional(9, "timestampCol", Types.TimestampType.withZone()), optional(10, "stringCol", Types.StringType.get())); - SparkSessionCatalog catalog = null; + SparkSessionCatalog catalog; try { catalog = (SparkSessionCatalog) Spark3Util.catalogAndIdentifier(spark(), "spark_catalog").catalog(); diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java index 62cb5b174d43..3a8d8a81fb86 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java @@ -106,11 +106,6 @@ protected RewriteDataFiles self() { return this; } - /** - * The framework specific ZOrder Strategy - */ - protected abstract SortStrategy zOrderStrategy(String... columnNames); - @Override public RewriteDataFiles binPack() { Preconditions.checkArgument(this.strategy == null, @@ -440,6 +435,10 @@ private SortStrategy sortStrategy() { return new SparkSortStrategy(table, spark()); } + private SortStrategy zOrderStrategy(String... columnNames) { + return new SparkZOrderStrategy(table, spark(), Lists.newArrayList(columnNames)); + } + @VisibleForTesting static class RewriteExecutionContext { private final Map numGroupsByPartition; diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java similarity index 89% rename from spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java rename to spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java index 6854d64a3ddc..a38ea69b5959 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java @@ -55,8 +55,11 @@ import org.apache.spark.sql.functions; import org.apache.spark.sql.internal.SQLConf; import org.apache.spark.sql.types.StructField; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -public class Spark3ZOrderStrategy extends Spark3SortStrategy { +public class SparkZOrderStrategy extends SparkSortStrategy { + private static final Logger LOG = LoggerFactory.getLogger(SparkZOrderStrategy.class); private static final String Z_COLUMN = "ICEZVALUE"; private static final Schema Z_SCHEMA = new Schema(NestedField.required(0, Z_COLUMN, Types.BinaryType.get())); @@ -78,7 +81,7 @@ public class Spark3ZOrderStrategy extends Spark3SortStrategy { private static final int DEFAULT_VAR_LENGTH_CONTRIBUTION = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE; private final List zOrderColNames; - private final Spark3ZOrderUDF zOrderUDF; + private final SparkZOrderUDF zOrderUDF; private int maxOutputSize; private int varLengthContribution; @@ -102,16 +105,15 @@ public RewriteStrategy options(Map options) { "Cannot use less than 1 byte for variable length types with zOrder, %s was set to %s", VAR_LENGTH_CONTRIBUTION_KEY, varLengthContribution); - maxOutputSize = PropertyUtil.propertyAsInt(options, MAX_OUTPUT_SIZE_KEY, DEFAULT_MAX_OUTPUT_SIZE); Preconditions.checkArgument(maxOutputSize > 0, "Cannot have the interleaved ZOrder value use less than 1 byte, %s was set to %s", MAX_OUTPUT_SIZE_KEY, maxOutputSize); - return this; + return this; } - public Spark3ZOrderStrategy(Table table, SparkSession spark, List zOrderColNames) { + public SparkZOrderStrategy(Table table, SparkSession spark, List zOrderColNames) { super(table, spark); Preconditions.checkArgument(zOrderColNames != null && !zOrderColNames.isEmpty(), @@ -123,14 +125,16 @@ public Spark3ZOrderStrategy(Table table, SparkSession spark, List zOrder List partZOrderCols = identityPartitionColumns .filter(zOrderColNames::contains) .collect(Collectors.toList()); - Preconditions.checkArgument( - partZOrderCols.isEmpty(), - "Cannot ZOrder on an Identity partition column as these values are constant within a partition, " + - "ZOrdering requested on Identity columns: %s", - partZOrderCols); - this.zOrderUDF = new Spark3ZOrderUDF(zOrderColNames.size(), varLengthContribution, maxOutputSize); + if (!partZOrderCols.isEmpty()) { + LOG.warn("Cannot ZOrder on an Identity partition column as these values are constant within a partition " + + "they will be removed from the ZOrder expression: {}", partZOrderCols); + zOrderColNames.removeAll(partZOrderCols); + Preconditions.checkArgument(!zOrderColNames.isEmpty(), + "Cannot perform ZOrdering, all columns provided were identity partition columns and cannot be used."); + } + this.zOrderUDF = new SparkZOrderUDF(zOrderColNames.size(), varLengthContribution, maxOutputSize); this.zOrderColNames = zOrderColNames; } diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java similarity index 89% rename from spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java rename to spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java index da206efb50db..8bebfb2b7543 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java @@ -44,7 +44,7 @@ import org.apache.spark.sql.types.TimestampType; import scala.collection.Seq; -class Spark3ZOrderUDF implements Serializable { +class SparkZOrderUDF implements Serializable { private static final byte[] PRIMITIVE_EMPTY = new byte[ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE]; /** @@ -53,7 +53,7 @@ class Spark3ZOrderUDF implements Serializable { */ private transient ThreadLocal outputBuffer; private transient ThreadLocal inputHolder; - private transient ThreadLocal[] inputBuffers; + private transient ThreadLocal inputBuffers; private transient ThreadLocal encoder; private final int numCols; @@ -63,7 +63,7 @@ class Spark3ZOrderUDF implements Serializable { private final int varTypeSize; private final int maxOutputSize; - Spark3ZOrderUDF(int numCols, int varTypeSize, int maxOutputSize) { + SparkZOrderUDF(int numCols, int varTypeSize, int maxOutputSize) { this.numCols = numCols; this.varTypeSize = varTypeSize; this.maxOutputSize = maxOutputSize; @@ -71,18 +71,22 @@ class Spark3ZOrderUDF implements Serializable { private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); - inputBuffers = new ThreadLocal[numCols]; + if (totalOutputBytes > maxOutputSize) { + totalOutputBytes = maxOutputSize; + } + inputBuffers = ThreadLocal.withInitial(() -> new ByteBuffer[numCols]); inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]); outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(totalOutputBytes)); encoder = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8.newEncoder()); } private ByteBuffer inputBuffer(int position, int size) { - if (inputBuffers[position] == null) { - // May over allocate on concurrent calls - inputBuffers[position] = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size)); + ByteBuffer buffer = inputBuffers.get()[position]; + if (buffer == null) { + buffer = ByteBuffer.allocate(size); + inputBuffers.get()[position] = buffer; } - return inputBuffers[position].get(); + return buffer; } byte[] interleaveBits(Seq scalaBinary) { @@ -216,6 +220,20 @@ private UserDefinedFunction bytesTruncateUDF() { return udf; } + private UserDefinedFunction booleanToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Boolean value) -> { + ByteBuffer buffer = inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); + buffer.put(0, (byte) (value ? -127 : 0)); + return buffer.array(); + }, DataTypes.BinaryType) + .withName("BOOLEAN-LEXICAL-BYTES"); + + this.inputCol++; + this.totalOutputBytes += ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE; + return udf; + } + private final UserDefinedFunction interleaveUDF = functions.udf((Seq arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType) .withName("INTERLEAVE_BYTES"); @@ -243,7 +261,7 @@ Column sortedLexicographically(Column column, DataType type) { } else if (type instanceof BinaryType) { return bytesTruncateUDF().apply(column); } else if (type instanceof BooleanType) { - return bytesTruncateUDF().apply(column); + return booleanToOrderedBytesUDF().apply(column); } else if (type instanceof TimestampType) { return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); } else if (type instanceof DateType) { diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java index 5e268d26ed9c..845fe305e494 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java @@ -19,10 +19,12 @@ package org.apache.iceberg.spark.data; +import java.nio.ByteBuffer; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; +import java.util.UUID; import org.apache.iceberg.parquet.ParquetValueReaders.ReusableEntry; import org.apache.iceberg.parquet.ParquetValueWriter; import org.apache.iceberg.parquet.ParquetValueWriters; @@ -299,6 +301,28 @@ public void write(int repetitionLevel, byte[] bytes) { } } + private static PrimitiveWriter uuids(ColumnDescriptor desc) { + return new UUIDWriter(desc); + } + + private static class UUIDWriter extends PrimitiveWriter { + private ByteBuffer buffer = ByteBuffer.allocate(16); + + private UUIDWriter(ColumnDescriptor desc) { + super(desc); + } + + @Override + public void write(int repetitionLevel, UTF8String string) { + UUID uuid = UUID.fromString(string.toString()); + buffer.rewind(); + buffer.putLong(uuid.getMostSignificantBits()); + buffer.putLong(uuid.getLeastSignificantBits()); + buffer.rewind(); + column.writeBinary(repetitionLevel, Binary.fromReusedByteBuffer(buffer)); + } + } + private static class ArrayDataWriter extends RepeatedWriter { private final DataType elementType; diff --git a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java index f6f3004aecd0..d577e0ce4eda 100644 --- a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java +++ b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; +import java.nio.ByteBuffer; import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -87,6 +88,9 @@ import org.apache.iceberg.util.Pair; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; +import org.apache.spark.sql.expressions.UserDefinedFunction; +import org.apache.spark.sql.functions; +import org.apache.spark.sql.types.DataTypes; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; @@ -1093,9 +1097,11 @@ public void testZOrderAllTypesSort() { List originalRaw = spark.read().format("iceberg").load(tableLocation).sort("longCol").collectAsList(); List originalData = rowsToJava(originalRaw); + // TODO add in UUID when it is supported in Spark RewriteDataFiles.Result result = basicRewrite(table) - .zOrder("longCol", "intCol", "floatCol", "doubleCol", "dateCol", "timestampCol", "stringCol") + .zOrder("longCol", "intCol", "floatCol", "doubleCol", "dateCol", "timestampCol", "stringCol", "binaryCol", + "booleanCol") .option(SortStrategy.MIN_INPUT_FILES, "1") .option(SortStrategy.REWRITE_ALL, "true") .execute(); @@ -1415,9 +1421,11 @@ private Table createTypeTestTable() { required(2, "intCol", Types.IntegerType.get()), required(3, "floatCol", Types.FloatType.get()), optional(4, "doubleCol", Types.DoubleType.get()), - optional(6, "dateCol", Types.DateType.get()), - optional(7, "timestampCol", Types.TimestampType.withZone()), - optional(8, "stringCol", Types.StringType.get())); + optional(5, "dateCol", Types.DateType.get()), + optional(6, "timestampCol", Types.TimestampType.withZone()), + optional(7, "stringCol", Types.StringType.get()), + optional(8, "booleanCol", Types.BooleanType.get()), + optional(9, "binaryCol", Types.BinaryType.get())); Map options = Maps.newHashMap(); Table table = TABLES.create(schema, PartitionSpec.unpartitioned(), options, tableLocation); @@ -1430,6 +1438,8 @@ private Table createTypeTestTable() { .withColumn("dateCol", date_add(current_date(), 1)) .withColumn("timestampCol", expr("TO_TIMESTAMP(dateCol)")) .withColumn("stringCol", expr("CAST(dateCol AS STRING)")) + .withColumn("booleanCol", expr("longCol > 5")) + .withColumn("binaryCol", expr("CAST(longCol AS BINARY)")) .write() .format("iceberg") .mode("append") From a6981c82b458e952daca91970fc54716ed9cce67 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Mon, 18 Apr 2022 13:59:17 -0500 Subject: [PATCH 25/30] CheckStyle --- .../iceberg/spark/actions/TestRewriteDataFilesAction.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java index d577e0ce4eda..4137bded9404 100644 --- a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java +++ b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java @@ -22,7 +22,6 @@ import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; -import java.nio.ByteBuffer; import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -88,9 +87,6 @@ import org.apache.iceberg.util.Pair; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.expressions.UserDefinedFunction; -import org.apache.spark.sql.functions; -import org.apache.spark.sql.types.DataTypes; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; From ecf04d8cabd76e89df6511308a09babb15761f45 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Mon, 18 Apr 2022 17:53:52 -0500 Subject: [PATCH 26/30] Fix bug with reused Interleave Buffer --- .../apache/iceberg/util/ZOrderByteUtils.java | 2 ++ .../iceberg/util/TestZOrderByteUtil.java | 29 +++++++++++++++++++ .../spark/actions/SparkZOrderStrategy.java | 4 +-- .../iceberg/spark/actions/SparkZOrderUDF.java | 28 ++++++++++-------- 4 files changed, 49 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index 9b5e571862c9..399a8bd21ef1 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -167,6 +167,8 @@ static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) { */ public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize, ByteBuffer reuse) { byte[] interleavedBytes = reuse.array(); + Arrays.fill(interleavedBytes, 0, interleavedSize, (byte) 0x00); + int sourceColumn = 0; int sourceByte = 0; int sourceBit = 7; diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java index 52ae803a5e18..1a2174b679ba 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java @@ -56,6 +56,13 @@ private String bytesToString(byte[] bytes) { */ private byte[] generateRandomBytes() { int length = Math.abs(random.nextInt(100) + 1); + return generateRandomBytes(length); + } + + /** + * Returns a byte array of a specified length + */ + private byte[] generateRandomBytes(int length) { byte[] result = new byte[length]; random.nextBytes(result); return result; @@ -107,6 +114,28 @@ public void testInterleaveRandomExamples() { } } + @Test + public void testReuseInterleaveBuffer() { + int numByteArrays = 2; + int colLength = 16; + ByteBuffer interleaveBuffer = ByteBuffer.allocate(numByteArrays * colLength); + for (int test = 0; test < NUM_INTERLEAVE_TESTS; test++) { + byte[][] testBytes = new byte[numByteArrays][]; + String[] testStrings = new String[numByteArrays]; + for (int byteIndex = 0; byteIndex < numByteArrays; byteIndex++) { + testBytes[byteIndex] = generateRandomBytes(colLength); + testStrings[byteIndex] = bytesToString(testBytes[byteIndex]); + } + + byte[] byteResult = ZOrderByteUtils.interleaveBits(testBytes, numByteArrays * colLength, interleaveBuffer); + String byteResultAsString = bytesToString(byteResult); + + String stringResult = interleaveStrings(testStrings); + + Assert.assertEquals("String interleave didn't match byte interleave", stringResult, byteResultAsString); + } + } + @Test public void testInterleaveEmptyBits() { byte[][] test = new byte[4][10]; diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java index a38ea69b5959..234401d58039 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java @@ -81,7 +81,6 @@ public class SparkZOrderStrategy extends SparkSortStrategy { private static final int DEFAULT_VAR_LENGTH_CONTRIBUTION = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE; private final List zOrderColNames; - private final SparkZOrderUDF zOrderUDF; private int maxOutputSize; private int varLengthContribution; @@ -134,7 +133,6 @@ public SparkZOrderStrategy(Table table, SparkSession spark, List zOrderC "Cannot perform ZOrdering, all columns provided were identity partition columns and cannot be used."); } - this.zOrderUDF = new SparkZOrderUDF(zOrderColNames.size(), varLengthContribution, maxOutputSize); this.zOrderColNames = zOrderColNames; } @@ -151,6 +149,8 @@ protected void validateOptions() { @Override public Set rewriteFiles(List filesToRewrite) { + SparkZOrderUDF zOrderUDF = new SparkZOrderUDF(zOrderColNames.size(), varLengthContribution, maxOutputSize); + String groupID = UUID.randomUUID().toString(); boolean requiresRepartition = !filesToRewrite.get(0).spec().equals(table().spec()); diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java index 8bebfb2b7543..302d969053a8 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java @@ -71,9 +71,6 @@ class SparkZOrderUDF implements Serializable { private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); - if (totalOutputBytes > maxOutputSize) { - totalOutputBytes = maxOutputSize; - } inputBuffers = ThreadLocal.withInitial(() -> new ByteBuffer[numCols]); inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]); outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(totalOutputBytes)); @@ -106,7 +103,7 @@ private UserDefinedFunction tinyToOrderedBytesUDF() { }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES"); this.inputCol++; - this.totalOutputBytes += Byte.BYTES; + increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); return udf; } @@ -122,7 +119,7 @@ private UserDefinedFunction shortToOrderedBytesUDF() { }, DataTypes.BinaryType).withName("SHORT_ORDERED_BYTES"); this.inputCol++; - this.totalOutputBytes += Short.BYTES; + increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); return udf; } @@ -138,7 +135,7 @@ private UserDefinedFunction intToOrderedBytesUDF() { }, DataTypes.BinaryType).withName("INT_ORDERED_BYTES"); this.inputCol++; - this.totalOutputBytes += Integer.BYTES; + increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); return udf; } @@ -154,7 +151,7 @@ private UserDefinedFunction longToOrderedBytesUDF() { }, DataTypes.BinaryType).withName("LONG_ORDERED_BYTES"); this.inputCol++; - this.totalOutputBytes += Long.BYTES; + increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); return udf; } @@ -170,7 +167,7 @@ private UserDefinedFunction floatToOrderedBytesUDF() { }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES"); this.inputCol++; - this.totalOutputBytes += Float.BYTES; + increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); return udf; } @@ -186,7 +183,7 @@ private UserDefinedFunction doubleToOrderedBytesUDF() { }, DataTypes.BinaryType).withName("DOUBLE_ORDERED_BYTES"); this.inputCol++; - this.totalOutputBytes += Double.BYTES; + increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); return udf; } @@ -202,7 +199,7 @@ private UserDefinedFunction stringToOrderedBytesUDF() { .withName("STRING-LEXICAL-BYTES"); this.inputCol++; - this.totalOutputBytes += varTypeSize; + increaseOutputSize(varTypeSize); return udf; } @@ -215,7 +212,7 @@ private UserDefinedFunction bytesTruncateUDF() { .withName("BYTE-TRUNCATE"); this.inputCol++; - this.totalOutputBytes += varTypeSize; + increaseOutputSize(varTypeSize); return udf; } @@ -230,7 +227,7 @@ private UserDefinedFunction booleanToOrderedBytesUDF() { .withName("BOOLEAN-LEXICAL-BYTES"); this.inputCol++; - this.totalOutputBytes += ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE; + increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); return udf; } @@ -272,4 +269,11 @@ Column sortedLexicographically(Column column, DataType type) { column, type)); } } + + private void increaseOutputSize(int bytes) { + totalOutputBytes += bytes; + if (totalOutputBytes > maxOutputSize) { + totalOutputBytes = maxOutputSize; + } + } } From e5fdd4b1afe19e5abb7d9d24501aebc4f5fa3066 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Tue, 19 Apr 2022 14:48:41 -0500 Subject: [PATCH 27/30] Reviewer Feedback --- .../spark/actions/SparkZOrderStrategy.java | 2 +- .../iceberg/spark/actions/SparkZOrderUDF.java | 3 +-- .../spark/data/SparkParquetWriters.java | 22 ------------------- 3 files changed, 2 insertions(+), 25 deletions(-) diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java index 234401d58039..ecb00ad4118b 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java @@ -143,7 +143,7 @@ public String name() { @Override protected void validateOptions() { - // TODO implement ZOrder Strategy in API Module + // Ignore SortStrategy validation return; } diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java index 302d969053a8..dd19e1afff34 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java @@ -265,8 +265,7 @@ Column sortedLexicographically(Column column, DataType type) { return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType)); } else { throw new IllegalArgumentException( - String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported", - column, type)); + String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported", column, type)); } } diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java index 845fe305e494..436927f49025 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java @@ -301,28 +301,6 @@ public void write(int repetitionLevel, byte[] bytes) { } } - private static PrimitiveWriter uuids(ColumnDescriptor desc) { - return new UUIDWriter(desc); - } - - private static class UUIDWriter extends PrimitiveWriter { - private ByteBuffer buffer = ByteBuffer.allocate(16); - - private UUIDWriter(ColumnDescriptor desc) { - super(desc); - } - - @Override - public void write(int repetitionLevel, UTF8String string) { - UUID uuid = UUID.fromString(string.toString()); - buffer.rewind(); - buffer.putLong(uuid.getMostSignificantBits()); - buffer.putLong(uuid.getLeastSignificantBits()); - buffer.rewind(); - column.writeBinary(repetitionLevel, Binary.fromReusedByteBuffer(buffer)); - } - } - private static class ArrayDataWriter extends RepeatedWriter { private final DataType elementType; From f4a100da53bcf82b6340c0158c72ab0412682abf Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Tue, 19 Apr 2022 14:53:47 -0500 Subject: [PATCH 28/30] One more cleanup --- .../java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java index dd19e1afff34..c370008517e4 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java @@ -42,6 +42,7 @@ import org.apache.spark.sql.types.ShortType; import org.apache.spark.sql.types.StringType; import org.apache.spark.sql.types.TimestampType; +import scala.collection.JavaConverters; import scala.collection.Seq; class SparkZOrderUDF implements Serializable { @@ -87,8 +88,7 @@ private ByteBuffer inputBuffer(int position, int size) { } byte[] interleaveBits(Seq scalaBinary) { - byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary) - .toArray(inputHolder.get()); + byte[][] columnsBinary = JavaConverters.seqAsJavaList(scalaBinary).toArray(inputHolder.get()); return ZOrderByteUtils.interleaveBits(columnsBinary, totalOutputBytes, outputBuffer.get()); } From 49a9703b8f2112999ea0d45c9ac06df8b0f6eeb1 Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Tue, 19 Apr 2022 15:18:32 -0500 Subject: [PATCH 29/30] CheckStyle --- .../java/org/apache/iceberg/spark/data/SparkParquetWriters.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java index 436927f49025..5e268d26ed9c 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java @@ -19,12 +19,10 @@ package org.apache.iceberg.spark.data; -import java.nio.ByteBuffer; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; -import java.util.UUID; import org.apache.iceberg.parquet.ParquetValueReaders.ReusableEntry; import org.apache.iceberg.parquet.ParquetValueWriter; import org.apache.iceberg.parquet.ParquetValueWriters; From bec34e915e49336ceb2dd177b63cd62a93a43c8e Mon Sep 17 00:00:00 2001 From: Russell_Spitzer Date: Wed, 20 Apr 2022 16:26:33 -0500 Subject: [PATCH 30/30] Reviewer Comments --- .../apache/iceberg/util/ZOrderByteUtils.java | 1 - .../spark/actions/SparkZOrderStrategy.java | 2 +- .../iceberg/spark/actions/SparkZOrderUDF.java | 32 ++++++++----------- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java index 399a8bd21ef1..8a1b419a3bb0 100644 --- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java +++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java @@ -46,7 +46,6 @@ public class ZOrderByteUtils { public static final int PRIMITIVE_BUFFER_SIZE = 8; private ZOrderByteUtils() { - } static ByteBuffer allocatePrimitiveBuffer() { diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java index ecb00ad4118b..cdd47fe31372 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java @@ -127,7 +127,7 @@ public SparkZOrderStrategy(Table table, SparkSession spark, List zOrderC if (!partZOrderCols.isEmpty()) { LOG.warn("Cannot ZOrder on an Identity partition column as these values are constant within a partition " + - "they will be removed from the ZOrder expression: {}", partZOrderCols); + "and will be removed from the ZOrder expression: {}", partZOrderCols); zOrderColNames.removeAll(partZOrderCols); Preconditions.checkArgument(!zOrderColNames.isEmpty(), "Cannot perform ZOrdering, all columns provided were identity partition columns and cannot be used."); diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java index c370008517e4..eea3689211e2 100644 --- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java +++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java @@ -188,6 +188,19 @@ private UserDefinedFunction doubleToOrderedBytesUDF() { return udf; } + private UserDefinedFunction booleanToOrderedBytesUDF() { + int position = inputCol; + UserDefinedFunction udf = functions.udf((Boolean value) -> { + ByteBuffer buffer = inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); + buffer.put(0, (byte) (value ? -127 : 0)); + return buffer.array(); + }, DataTypes.BinaryType).withName("BOOLEAN-LEXICAL-BYTES"); + + this.inputCol++; + increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); + return udf; + } + private UserDefinedFunction stringToOrderedBytesUDF() { int position = inputCol; UserDefinedFunction udf = functions.udf((String value) -> @@ -217,20 +230,6 @@ private UserDefinedFunction bytesTruncateUDF() { return udf; } - private UserDefinedFunction booleanToOrderedBytesUDF() { - int position = inputCol; - UserDefinedFunction udf = functions.udf((Boolean value) -> { - ByteBuffer buffer = inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); - buffer.put(0, (byte) (value ? -127 : 0)); - return buffer.array(); - }, DataTypes.BinaryType) - .withName("BOOLEAN-LEXICAL-BYTES"); - - this.inputCol++; - increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE); - return udf; - } - private final UserDefinedFunction interleaveUDF = functions.udf((Seq arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType) .withName("INTERLEAVE_BYTES"); @@ -270,9 +269,6 @@ Column sortedLexicographically(Column column, DataType type) { } private void increaseOutputSize(int bytes) { - totalOutputBytes += bytes; - if (totalOutputBytes > maxOutputSize) { - totalOutputBytes = maxOutputSize; - } + totalOutputBytes = Math.min(totalOutputBytes + bytes, maxOutputSize); } }