From 7cb94b219c381bf2a19aa42a6885c2d147996f6f Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Fri, 21 Jan 2022 16:46:46 -0600
Subject: [PATCH 01/30] Core: Adds Utility Class for Implementing ZOrdering

---
 build.gradle                                  |   1 +
 .../apache/iceberg/util/ZOrderByteUtils.java  | 128 +++++++++
 .../iceberg/util/TestZOrderByteUtil.java      | 244 ++++++++++++++++++
 3 files changed, 373 insertions(+)
 create mode 100644 core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
 create mode 100644 core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java

diff --git a/build.gradle b/build.gradle
index 25bf761b242c..8609dbc959d3 100644
--- a/build.gradle
+++ b/build.gradle
@@ -230,6 +230,7 @@ project(':iceberg-core') {
     testImplementation 'org.mock-server:mockserver-netty'
     testImplementation 'org.mock-server:mockserver-client-java'
     testImplementation "org.xerial:sqlite-jdbc"
+    testImplementation "org.apache.commons:commons-lang3"
     testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
   }
 }
diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
new file mode 100644
index 000000000000..4ef3120a2217
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.util;
+
+import java.util.Arrays;
+
+/**
+ * Within Z-Ordering the byte representations of objects being compared must be ordered,
+ * this requires several types to be transformed when converted to bytes. The goal is to
+ * map object's whose byte representation are not lexicographically ordered into representations
+ * that are lexicographically ordered.
+ * Most of these techniques are derived from
+ * https://aws.amazon.com/blogs/database/z-order-indexing-for-multifaceted-queries-in-amazon-dynamodb-part-2/
+ */
+public class ZOrderByteUtils {
+
+  private ZOrderByteUtils() {
+
+  }
+
+  /**
+   * Signed ints do not have their bytes in magnitude order because of the sign bit.
+   * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially
+   * shifts the 0 value so that we don't break our ordering when we cross the new 0 value.
+   */
+  public static byte[] orderIntLikeBytes(byte[] intBytes, int size) {
+    if (intBytes == null) {
+      return new byte[size];
+    }
+    intBytes[0] = (byte) (intBytes[0] ^ (1 << 7));
+    return intBytes;
+  }
+
+  /**
+   * IEEE 754 :
+   * “If two floating-point numbers in the same format are ordered (say, x < y),
+   * they are ordered the same way when their bits are reinterpreted as sign-magnitude integers.”
+   *
+   * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically
+   * comparable bytes
+   */
+  public static byte[] orderFloatLikeBytes(byte[] floatBytes, int size) {
+    if (floatBytes == null) {
+      return new byte[size];
+    }
+    if ((floatBytes[0] & (1 << 7)) == 0) {
+      // The signed magnitude is positive set the first bit (reversing the sign so positives order after negatives)
+      floatBytes[0] = (byte) (floatBytes[0] | (1 << 7));
+    } else {
+      // The signed magnitude is negative so flip the first bit (reversing the sign so positives order after negatives)
+      // Then flip all remaining bits so numbers with greater negative magnitude come before those
+      // with less magnitude (reverse the order)
+      for (int i = 0; i < floatBytes.length; i++) {
+        floatBytes[i] = (byte) ~floatBytes[i];
+      }
+    }
+    return floatBytes;
+  }
+
+  /**
+   * Strings are lexicographically sortable BUT if different byte array lengths will
+   * ruin the Z-Ordering. (ZOrder requires that a given column contribute the same number of bytes every time).
+   * This implementation just uses a set size to for all output byte representations. Truncating longer strings
+   * and right padding 0 for shorter strings.
+   */
+  public static byte[] orderUTF8LikeBytes(byte[] stringBytes, int size) {
+    if (stringBytes == null) {
+      return new byte[size];
+    }
+    return Arrays.copyOf(stringBytes, size);
+  }
+
+  /**
+   * Interleave bits using a naive loop.
+   * @param columnsBinary an array of byte arrays, none of which are empty
+   * @return their bits interleaved
+   */
+  public static byte[] interleaveBits(byte[][] columnsBinary) {
+    int interleavedSize = Arrays.stream(columnsBinary).mapToInt(a -> a.length).sum();
+    byte[] interleavedBytes = new byte[interleavedSize];
+    int sourceBit = 7;
+    int sourceByte = 0;
+    int sourceColumn = 0;
+    int interleaveBit = 7;
+    int interleaveByte = 0;
+    while (interleaveByte < interleavedSize) {
+      // Take what we have, Get the source Bit of the source Byte, move it to the interleaveBit position
+      interleavedBytes[interleaveByte] =
+          (byte) (interleavedBytes[interleaveByte] |
+              (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >> sourceBit << interleaveBit);
+
+      if (--interleaveBit == -1) {
+        // Finished a byte in our interleave byte array start a new byte
+        interleaveByte++;
+        interleaveBit = 7;
+      }
+
+      // Find next column with a byte we can use
+      do {
+        if (++sourceColumn == columnsBinary.length) {
+          sourceColumn = 0;
+          if (--sourceBit == -1) {
+            sourceByte++;
+            sourceBit = 7;
+          }
+        }
+      } while (columnsBinary[sourceColumn].length <= sourceByte && interleaveByte < interleavedSize);
+    }
+    return interleavedBytes;
+  }
+}
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
new file mode 100644
index 000000000000..87d69dc99182
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+package org.apache.iceberg.util;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Random;
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestZOrderByteUtil {
+  private static final byte IIIIIIII = (byte) 255;
+  private static final byte IOIOIOIO = (byte) 170;
+  private static final byte OIOIOIOI = (byte) 85;
+  private static final byte OOOOIIII = (byte) 15;
+  private static final byte OOOOOOOI = (byte) 1;
+  private static final byte OOOOOOOO = (byte) 0;
+
+  private static final int NUM_TESTS = 100000;
+
+  private final Random random = new Random(42);
+
+  private String bytesToString(byte[] bytes) {
+    StringBuilder result = new StringBuilder();
+    for (byte b : bytes) {
+      result.append(String.format("%8s", Integer.toBinaryString(b & 0xFF)).replace(' ', '0'));
+    }
+    return result.toString();
+  }
+
+  /**
+   * Returns a non-0 length byte array
+   */
+  private byte[]  generateRandomBytes() {
+    int length = Math.abs(random.nextInt(100) + 1);
+    byte[] result = new byte[length];
+    random.nextBytes(result);
+    return result;
+  }
+
+  /**
+   * Test method to ensure correctness of byte interleaving code
+   */
+  private String interleaveStrings(String[] strings) {
+    StringBuilder result = new StringBuilder();
+    int totalLength = Arrays.stream(strings).mapToInt(String::length).sum();
+    int substringIndex = 0;
+    int characterIndex = 0;
+    while (characterIndex < totalLength) {
+      for (String str : strings) {
+        if (substringIndex < str.length()) {
+          result.append(str.charAt(substringIndex));
+          characterIndex++;
+        }
+      }
+      substringIndex++;
+    }
+    return result.toString();
+  }
+
+  /**
+   * Compares the result of a string based interleaving algorithm implemented above
+   * versus the binary bit-shifting algorithm used in ZOrderByteUtils. Either both
+   * algorithms are identically wrong or are both identically correct.
+   */
+  @Test
+  public void testInterleaveRandomExamples() {
+    for (int test = 0; test < NUM_TESTS; test++) {
+      int numByteArrays = Math.abs(random.nextInt(6)) + 1;
+      byte[][] testBytes =  new byte[numByteArrays][];
+      String[] testStrings = new String[numByteArrays];
+      for (int byteIndex = 0;  byteIndex < numByteArrays; byteIndex++) {
+        testBytes[byteIndex] = generateRandomBytes();
+        testStrings[byteIndex] = bytesToString(testBytes[byteIndex]);
+      }
+      byte[] byteResult = ZOrderByteUtils.interleaveBits(testBytes);
+      String byteResultAsString = bytesToString(byteResult);
+
+      String stringResult = interleaveStrings(testStrings);
+
+      Assert.assertEquals("String interleave didn't match byte interleave", stringResult, byteResultAsString);
+    }
+  }
+
+  @Test
+  public void testInterleaveEmptyBits() {
+    byte[][] test = new byte[4][10];
+    byte[] expected = new byte[40];
+
+    Assert.assertArrayEquals("Should combine empty arrays",
+        expected, ZOrderByteUtils.interleaveBits(test));
+  }
+
+  @Test
+  public void testInterleaveFullBits() {
+    byte[][] test = new byte[4][];
+    test[0] = new byte[]{IIIIIIII, IIIIIIII};
+    test[1] = new byte[]{IIIIIIII};
+    test[2] = new byte[0];
+    test[3] = new byte[]{IIIIIIII, IIIIIIII, IIIIIIII};
+    byte[] expected = new byte[]{IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII};
+
+    Assert.assertArrayEquals("Should combine full arrays",
+        expected, ZOrderByteUtils.interleaveBits(test));
+  }
+
+  @Test
+  public void testInterleaveMixedBits() {
+    byte[][] test = new byte[4][];
+    test[0] = new byte[]{OOOOOOOI, IIIIIIII, OOOOOOOO, OOOOIIII};
+    test[1] = new byte[]{OOOOOOOI, OOOOOOOO, IIIIIIII};
+    test[2] = new byte[]{OOOOOOOI};
+    test[3] = new byte[]{OOOOOOOI};
+    byte[] expected = new byte[]{
+        OOOOOOOO, OOOOOOOO, OOOOOOOO, OOOOIIII,
+        IOIOIOIO, IOIOIOIO,
+        OIOIOIOI, OIOIOIOI,
+        OOOOIIII};
+    Assert.assertArrayEquals("Should combine mixed byte arrays",
+        expected, ZOrderByteUtils.interleaveBits(test));
+  }
+
+  @Test
+  public void testIntOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      int aInt = random.nextInt();
+      int bInt = random.nextInt();
+      int intCompare = Integer.compare(aInt, bInt);
+      byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aInt), 4);
+      byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bInt), 4);
+      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+
+      Assert.assertTrue(String.format(
+          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+          aInt, bInt, intCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          (intCompare ^ byteCompare) >= 0);
+    }
+  }
+
+  @Test
+  public void testLongOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      long aLong = random.nextInt();
+      long bLong = random.nextInt();
+      int longCompare = Long.compare(aLong, bLong);
+      byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aLong), 8);
+      byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bLong), 8);
+      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+
+      Assert.assertTrue(String.format(
+          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+          aLong, bLong, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          (longCompare ^ byteCompare) >= 0);
+    }
+  }
+
+  @Test
+  public void testFloatOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      float aFloat = random.nextFloat();
+      float bFloat = random.nextFloat();
+      int floatCompare = Float.compare(aFloat, bFloat);
+      byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aFloat), 4);
+      byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bFloat), 4);
+      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+
+      Assert.assertTrue(String.format(
+          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+          aFloat, bFloat, floatCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          (floatCompare ^ byteCompare) >= 0);
+    }
+  }
+
+  @Test
+  public void testDoubleOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      double aDouble = random.nextDouble();
+      double bDouble = random.nextDouble();
+      int doubleCompare = Double.compare(aDouble, bDouble);
+      byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aDouble), 8);
+      byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bDouble), 8);
+      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+
+      Assert.assertTrue(String.format(
+          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+          aDouble, bDouble, doubleCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          (doubleCompare ^ byteCompare) >= 0);
+    }
+  }
+
+  @Test
+  public void testStringOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      String aString = RandomStringUtils.random(random.nextInt(35), true, true);
+      String bString = RandomStringUtils.random(random.nextInt(35), true, true);
+      int stringCompare = aString.compareTo(bString);
+      byte[] aBytes = ZOrderByteUtils.orderUTF8LikeBytes(aString.getBytes(StandardCharsets.UTF_8), 128);
+      byte[] bBytes = ZOrderByteUtils.orderUTF8LikeBytes(bString.getBytes(StandardCharsets.UTF_8), 128);
+      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+
+      Assert.assertTrue(String.format(
+          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+          aString, bString, stringCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          (stringCompare ^ byteCompare) >= 0);
+    }
+  }
+
+  private byte[] bytesOf(int num) {
+    return ByteBuffer.allocate(4).putInt(num).array();
+  }
+
+  private byte[] bytesOf(long num) {
+    return ByteBuffer.allocate(8).putLong(num).array();
+  }
+
+  private byte[] bytesOf(float num) {
+    return ByteBuffer.allocate(4).putFloat(num).array();
+  }
+
+  private byte[] bytesOf(double num) {
+    return ByteBuffer.allocate(8).putDouble(num).array();
+  }
+}

From 92516f702ed398ae3d287d1ab81dd43d8a9a1cce Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 25 Jan 2022 15:45:02 -0600
Subject: [PATCH 02/30] Fix JavaDoc

---
 core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 4ef3120a2217..759f101b0cc5 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -50,7 +50,7 @@ public static byte[] orderIntLikeBytes(byte[] intBytes, int size) {
 
   /**
    * IEEE 754 :
-   * “If two floating-point numbers in the same format are ordered (say, x < y),
+   * “If two floating-point numbers in the same format are ordered (say, x \< y),
    * they are ordered the same way when their bits are reinterpreted as sign-magnitude integers.”
    *
    * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically

From ef1d214c7408d284c5cd7283225da6bb02f350ee Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 31 Jan 2022 12:31:49 -0600
Subject: [PATCH 03/30] Switch Implementations to work on Primitives instead of
 ByteArrays

---
 .../apache/iceberg/util/ZOrderByteUtils.java  | 66 ++++++++------
 .../iceberg/util/TestZOrderByteUtil.java      | 86 ++++++++-----------
 2 files changed, 74 insertions(+), 78 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 759f101b0cc5..571ea24d5039 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -19,6 +19,7 @@
 
 package org.apache.iceberg.util;
 
+import java.nio.ByteBuffer;
 import java.util.Arrays;
 
 /**
@@ -28,6 +29,9 @@
  * that are lexicographically ordered.
  * Most of these techniques are derived from
  * https://aws.amazon.com/blogs/database/z-order-indexing-for-multifaceted-queries-in-amazon-dynamodb-part-2/
+ *
+ * Some implementation is taken from
+ * https://github.com/apache/hbase/blob/master/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java
  */
 public class ZOrderByteUtils {
 
@@ -40,12 +44,19 @@ private ZOrderByteUtils() {
    * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially
    * shifts the 0 value so that we don't break our ordering when we cross the new 0 value.
    */
-  public static byte[] orderIntLikeBytes(byte[] intBytes, int size) {
-    if (intBytes == null) {
-      return new byte[size];
-    }
-    intBytes[0] = (byte) (intBytes[0] ^ (1 << 7));
-    return intBytes;
+  public static byte[] intToOrderedBytes(int val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES);
+    bytes.putInt(val ^ 0x80000000);
+    return bytes.array();
+  }
+
+  /**
+   * Signed longs are treated the same as the signed ints
+   */
+  public static byte[] longToOrderBytes(long val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES);
+    bytes.putLong(val ^ 0x8000000000000000L);
+    return bytes.array();
   }
 
   /**
@@ -56,22 +67,23 @@ public static byte[] orderIntLikeBytes(byte[] intBytes, int size) {
    * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically
    * comparable bytes
    */
-  public static byte[] orderFloatLikeBytes(byte[] floatBytes, int size) {
-    if (floatBytes == null) {
-      return new byte[size];
-    }
-    if ((floatBytes[0] & (1 << 7)) == 0) {
-      // The signed magnitude is positive set the first bit (reversing the sign so positives order after negatives)
-      floatBytes[0] = (byte) (floatBytes[0] | (1 << 7));
-    } else {
-      // The signed magnitude is negative so flip the first bit (reversing the sign so positives order after negatives)
-      // Then flip all remaining bits so numbers with greater negative magnitude come before those
-      // with less magnitude (reverse the order)
-      for (int i = 0; i < floatBytes.length; i++) {
-        floatBytes[i] = (byte) ~floatBytes[i];
-      }
-    }
-    return floatBytes;
+  public static byte[] floatToOrderedBytes(float val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES);
+    int ival = Float.floatToIntBits(val);
+    ival ^= ((ival >> (Integer.SIZE - 1)) | Integer.MIN_VALUE);
+    bytes.putInt(ival);
+    return bytes.array();
+  }
+
+  /**
+   * Doubles are treated the same as floats
+   */
+  public static byte[] doubleToOrderedBytes(double val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES);
+    long lng = Double.doubleToLongBits(val);
+    lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE);
+    bytes.putLong(lng);
+    return bytes.array();
   }
 
   /**
@@ -80,11 +92,13 @@ public static byte[] orderFloatLikeBytes(byte[] floatBytes, int size) {
    * This implementation just uses a set size to for all output byte representations. Truncating longer strings
    * and right padding 0 for shorter strings.
    */
-  public static byte[] orderUTF8LikeBytes(byte[] stringBytes, int size) {
-    if (stringBytes == null) {
-      return new byte[size];
+  public static byte[] stringToOrderedBytes(String val, int length) {
+    ByteBuffer bytes = ByteBuffer.allocate(length);
+    if (val != null) {
+      int maxLength = Math.min(length, val.length());
+      bytes.put(val.getBytes(), 0, maxLength);
     }
-    return Arrays.copyOf(stringBytes, size);
+    return bytes.array();
   }
 
   /**
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index 87d69dc99182..b34f950f90c8 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -20,8 +20,6 @@
 
 package org.apache.iceberg.util;
 
-import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Random;
 import org.apache.commons.lang3.RandomStringUtils;
@@ -146,15 +144,15 @@ public void testIntOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
       int aInt = random.nextInt();
       int bInt = random.nextInt();
-      int intCompare = Integer.compare(aInt, bInt);
-      byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aInt), 4);
-      byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bInt), 4);
-      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+      int intCompare = Integer.signum(Integer.compare(aInt, bInt));
+      byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt);
+      byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
-      Assert.assertTrue(String.format(
+      Assert.assertEquals(String.format(
           "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
           aInt, bInt, intCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
-          (intCompare ^ byteCompare) >= 0);
+          intCompare, byteCompare);
     }
   }
 
@@ -163,15 +161,15 @@ public void testLongOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
       long aLong = random.nextInt();
       long bLong = random.nextInt();
-      int longCompare = Long.compare(aLong, bLong);
-      byte[] aBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(aLong), 8);
-      byte[] bBytes = ZOrderByteUtils.orderIntLikeBytes(bytesOf(bLong), 8);
-      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+      int longCompare = Integer.signum(Long.compare(aLong, bLong));
+      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aLong);
+      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bLong);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
-      Assert.assertTrue(String.format(
-          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+      Assert.assertEquals(String.format(
+          "Ordering of longs should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
           aLong, bLong, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
-          (longCompare ^ byteCompare) >= 0);
+          longCompare, byteCompare);
     }
   }
 
@@ -180,15 +178,15 @@ public void testFloatOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
       float aFloat = random.nextFloat();
       float bFloat = random.nextFloat();
-      int floatCompare = Float.compare(aFloat, bFloat);
-      byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aFloat), 4);
-      byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bFloat), 4);
-      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+      int floatCompare = Integer.signum(Float.compare(aFloat, bFloat));
+      byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat);
+      byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
-      Assert.assertTrue(String.format(
-          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+      Assert.assertEquals(String.format(
+          "Ordering of floats should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
           aFloat, bFloat, floatCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
-          (floatCompare ^ byteCompare) >= 0);
+          floatCompare, byteCompare);
     }
   }
 
@@ -197,15 +195,15 @@ public void testDoubleOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
       double aDouble = random.nextDouble();
       double bDouble = random.nextDouble();
-      int doubleCompare = Double.compare(aDouble, bDouble);
-      byte[] aBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(aDouble), 8);
-      byte[] bBytes = ZOrderByteUtils.orderFloatLikeBytes(bytesOf(bDouble), 8);
-      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+      int doubleCompare = Integer.signum(Double.compare(aDouble, bDouble));
+      byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble);
+      byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
-      Assert.assertTrue(String.format(
-          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+      Assert.assertEquals(String.format(
+          "Ordering of doubles should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
           aDouble, bDouble, doubleCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
-          (doubleCompare ^ byteCompare) >= 0);
+          doubleCompare, byteCompare);
     }
   }
 
@@ -214,31 +212,15 @@ public void testStringOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
       String aString = RandomStringUtils.random(random.nextInt(35), true, true);
       String bString = RandomStringUtils.random(random.nextInt(35), true, true);
-      int stringCompare = aString.compareTo(bString);
-      byte[] aBytes = ZOrderByteUtils.orderUTF8LikeBytes(aString.getBytes(StandardCharsets.UTF_8), 128);
-      byte[] bBytes = ZOrderByteUtils.orderUTF8LikeBytes(bString.getBytes(StandardCharsets.UTF_8), 128);
-      int byteCompare = UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes);
+      int stringCompare = Integer.signum(aString.compareTo(bString));
+      byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128);
+      byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
-      Assert.assertTrue(String.format(
-          "Ordering of ints should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+      Assert.assertEquals(String.format(
+          "Ordering of strings should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
           aString, bString, stringCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
-          (stringCompare ^ byteCompare) >= 0);
+          stringCompare, byteCompare);
     }
   }
-
-  private byte[] bytesOf(int num) {
-    return ByteBuffer.allocate(4).putInt(num).array();
-  }
-
-  private byte[] bytesOf(long num) {
-    return ByteBuffer.allocate(8).putLong(num).array();
-  }
-
-  private byte[] bytesOf(float num) {
-    return ByteBuffer.allocate(4).putFloat(num).array();
-  }
-
-  private byte[] bytesOf(double num) {
-    return ByteBuffer.allocate(8).putDouble(num).array();
-  }
 }

From 545e373e055ffdc71b2d0c683675032cc1566af0 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 31 Jan 2022 16:47:36 -0600
Subject: [PATCH 04/30] Clean up RandomStringUtilUsage

---
 build.gradle                                                | 1 -
 .../main/java/org/apache/iceberg/util/ZOrderByteUtils.java  | 6 ++++--
 .../java/org/apache/iceberg/util/TestZOrderByteUtil.java    | 6 +++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/build.gradle b/build.gradle
index 8609dbc959d3..25bf761b242c 100644
--- a/build.gradle
+++ b/build.gradle
@@ -230,7 +230,6 @@ project(':iceberg-core') {
     testImplementation 'org.mock-server:mockserver-netty'
     testImplementation 'org.mock-server:mockserver-client-java'
     testImplementation "org.xerial:sqlite-jdbc"
-    testImplementation "org.apache.commons:commons-lang3"
     testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
   }
 }
diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 571ea24d5039..750831a9a5e7 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -120,7 +120,8 @@ public static byte[] interleaveBits(byte[][] columnsBinary) {
           (byte) (interleavedBytes[interleaveByte] |
               (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >> sourceBit << interleaveBit);
 
-      if (--interleaveBit == -1) {
+      --interleaveBit;
+      if (interleaveBit == -1) {
         // Finished a byte in our interleave byte array start a new byte
         interleaveByte++;
         interleaveBit = 7;
@@ -128,7 +129,8 @@ public static byte[] interleaveBits(byte[][] columnsBinary) {
 
       // Find next column with a byte we can use
       do {
-        if (++sourceColumn == columnsBinary.length) {
+        ++sourceColumn;
+        if (sourceColumn == columnsBinary.length) {
           sourceColumn = 0;
           if (--sourceBit == -1) {
             sourceByte++;
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index b34f950f90c8..17f19ec01af7 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -22,8 +22,8 @@
 
 import java.util.Arrays;
 import java.util.Random;
-import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes;
+import org.apache.iceberg.types.Types;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -210,8 +210,8 @@ public void testDoubleOrdering() {
   @Test
   public void testStringOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {
-      String aString = RandomStringUtils.random(random.nextInt(35), true, true);
-      String bString = RandomStringUtils.random(random.nextInt(35), true, true);
+      String aString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
+      String bString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
       int stringCompare = Integer.signum(aString.compareTo(bString));
       byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128);
       byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128);

From 1374247cb298bc5ff6a0d6bc1517ed5f830b49e0 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 31 Jan 2022 16:56:07 -0600
Subject: [PATCH 05/30] Fix JavaDoc

---
 core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 750831a9a5e7..e41a5b3be763 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -61,7 +61,7 @@ public static byte[] longToOrderBytes(long val) {
 
   /**
    * IEEE 754 :
-   * “If two floating-point numbers in the same format are ordered (say, x \< y),
+   * “If two floating-point numbers in the same format are ordered (say, x {@literal <} y),
    * they are ordered the same way when their bits are reinterpreted as sign-magnitude integers.”
    *
    * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically

From 2c48f0cbb5431e088ff06af7adb2a70e76109eea Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 31 Jan 2022 17:41:05 -0600
Subject: [PATCH 06/30] Add Functions for Smaller Types

---
 .../apache/iceberg/util/ZOrderByteUtils.java  | 18 ++++++++++
 .../iceberg/util/TestZOrderByteUtil.java      | 34 +++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index e41a5b3be763..ae68b69b745e 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -59,6 +59,24 @@ public static byte[] longToOrderBytes(long val) {
     return bytes.array();
   }
 
+  /**
+   * Signed shorts are treated the same as the signed ints
+   */
+  public static byte[] shortToOrderBytes(short val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Short.BYTES);
+    bytes.putShort((short) (val ^ (0x8000)));
+    return bytes.array();
+  }
+
+  /**
+   * Signed tiny ints are treated the same as the signed ints
+   */
+  public static byte[] tinyintToOrderedBytes(byte val) {
+    ByteBuffer bytes = ByteBuffer.allocate(Byte.BYTES);
+    bytes.put((byte) (val ^ (0x80)));
+    return bytes.array();
+  }
+
   /**
    * IEEE 754 :
    * “If two floating-point numbers in the same format are ordered (say, x {@literal <} y),
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index 17f19ec01af7..81caf0ad0fb3 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -173,6 +173,40 @@ public void testLongOrdering() {
     }
   }
 
+  @Test
+  public void testShortOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      short aShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
+      short bShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
+      int longCompare = Integer.signum(Long.compare(aShort, bShort));
+      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aShort);
+      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bShort);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
+
+      Assert.assertEquals(String.format(
+              "Ordering of longs should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+              aShort, bShort, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          longCompare, byteCompare);
+    }
+  }
+
+  @Test
+  public void testTinyOrdering() {
+    for (int i = 0; i < NUM_TESTS; i++) {
+      long aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
+      long bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
+      int longCompare = Integer.signum(Long.compare(aByte, bByte));
+      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aByte);
+      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bByte);
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
+
+      Assert.assertEquals(String.format(
+              "Ordering of longs should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+              aByte, bByte, longCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          longCompare, byteCompare);
+    }
+  }
+
   @Test
   public void testFloatOrdering() {
     for (int i = 0; i < NUM_TESTS; i++) {

From 55fa4c8b5e57dd482a4b944770e2ba28bd7c55ea Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 7 Feb 2022 15:00:45 -0600
Subject: [PATCH 07/30] Updates for reviewer comments

---
 .../org/apache/iceberg/util/ByteBuffers.java  | 10 +++
 .../apache/iceberg/util/ZOrderByteUtils.java  | 84 +++++++++++--------
 .../iceberg/util/TestZOrderByteUtil.java      | 50 +++++++----
 3 files changed, 93 insertions(+), 51 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java
index 213b222dc507..efc05f179f82 100644
--- a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java
+++ b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java
@@ -21,6 +21,7 @@
 
 import java.nio.ByteBuffer;
 import java.util.Arrays;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 
 public class ByteBuffers {
 
@@ -46,6 +47,15 @@ public static byte[] toByteArray(ByteBuffer buffer) {
     }
   }
 
+  public static ByteBuffer reuse(ByteBuffer reuse, int length) {
+    Preconditions.checkArgument(reuse.hasArray() && reuse.arrayOffset() == 0 && reuse.capacity() == length,
+        "Cannot reuse buffer: Should be an array %s, should have an offset of 0 %s, should be of size %s was %s",
+        reuse.hasArray(), reuse.arrayOffset(), length, reuse.capacity());
+    reuse.position(0);
+    reuse.limit(length);
+    return reuse;
+  }
+
   public static ByteBuffer copy(ByteBuffer buffer) {
     if (buffer == null) {
       return null;
diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index ae68b69b745e..deab4450a61d 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -44,35 +44,35 @@ private ZOrderByteUtils() {
    * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially
    * shifts the 0 value so that we don't break our ordering when we cross the new 0 value.
    */
-  public static byte[] intToOrderedBytes(int val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES);
+  public static byte[] intToOrderedBytes(int val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Integer.BYTES);
     bytes.putInt(val ^ 0x80000000);
     return bytes.array();
   }
 
   /**
-   * Signed longs are treated the same as the signed ints
+   * Signed longs are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] longToOrderBytes(long val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES);
+  public static byte[] longToOrderedBytes(long val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Long.BYTES);
     bytes.putLong(val ^ 0x8000000000000000L);
     return bytes.array();
   }
 
   /**
-   * Signed shorts are treated the same as the signed ints
+   * Signed shorts are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] shortToOrderBytes(short val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Short.BYTES);
+  public static byte[] shortToOrderedBytes(short val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Short.BYTES);
     bytes.putShort((short) (val ^ (0x8000)));
     return bytes.array();
   }
 
   /**
-   * Signed tiny ints are treated the same as the signed ints
+   * Signed tiny ints are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] tinyintToOrderedBytes(byte val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Byte.BYTES);
+  public static byte[] tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Byte.BYTES);
     bytes.put((byte) (val ^ (0x80)));
     return bytes.array();
   }
@@ -85,8 +85,8 @@ public static byte[] tinyintToOrderedBytes(byte val) {
    * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically
    * comparable bytes
    */
-  public static byte[] floatToOrderedBytes(float val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Integer.BYTES);
+  public static byte[] floatToOrderedBytes(float val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Float.BYTES);
     int ival = Float.floatToIntBits(val);
     ival ^= ((ival >> (Integer.SIZE - 1)) | Integer.MIN_VALUE);
     bytes.putInt(ival);
@@ -94,10 +94,10 @@ public static byte[] floatToOrderedBytes(float val) {
   }
 
   /**
-   * Doubles are treated the same as floats
+   * Doubles are treated the same as floats in {@link #floatToOrderedBytes(float, ByteBuffer)}
    */
-  public static byte[] doubleToOrderedBytes(double val) {
-    ByteBuffer bytes = ByteBuffer.allocate(Long.BYTES);
+  public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, Double.BYTES);
     long lng = Double.doubleToLongBits(val);
     lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE);
     bytes.putLong(lng);
@@ -108,54 +108,70 @@ public static byte[] doubleToOrderedBytes(double val) {
    * Strings are lexicographically sortable BUT if different byte array lengths will
    * ruin the Z-Ordering. (ZOrder requires that a given column contribute the same number of bytes every time).
    * This implementation just uses a set size to for all output byte representations. Truncating longer strings
-   * and right padding 0 for shorter strings.
+   * and right padding 0 for shorter strings. Requires UTF8 (or ASCII) encoding for ordering guarantees to hold.
    */
-  public static byte[] stringToOrderedBytes(String val, int length) {
-    ByteBuffer bytes = ByteBuffer.allocate(length);
+  public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, length);
+    Arrays.fill(bytes.array(), 0, length, (byte) 0x00);
     if (val != null) {
       int maxLength = Math.min(length, val.length());
+      // We may truncate mid-character
       bytes.put(val.getBytes(), 0, maxLength);
     }
     return bytes.array();
   }
 
   /**
-   * Interleave bits using a naive loop.
-   * @param columnsBinary an array of byte arrays, none of which are empty
-   * @return their bits interleaved
+   * Interleave bits using a naive loop. Variable length inputs are allowed but to get a consistent ordering it is
+   * required that every column contribute the same number of bytes in each invocation. Bits are interleaved from all
+   * columns that have a bit available at that position. Once a Column has no more bits to produce it is skipped in the
+   * interleaving.
+   * @param columnsBinary an array of ordered byte representations of the columns being ZOrdered
+   * @return the columnbytes interleaved
    */
   public static byte[] interleaveBits(byte[][] columnsBinary) {
     int interleavedSize = Arrays.stream(columnsBinary).mapToInt(a -> a.length).sum();
     byte[] interleavedBytes = new byte[interleavedSize];
-    int sourceBit = 7;
-    int sourceByte = 0;
     int sourceColumn = 0;
-    int interleaveBit = 7;
+    int sourceByte = 0;
+    int sourceBit = 7;
     int interleaveByte = 0;
-    while (interleaveByte < interleavedSize) {
-      // Take what we have, Get the source Bit of the source Byte, move it to the interleaveBit position
-      interleavedBytes[interleaveByte] =
-          (byte) (interleavedBytes[interleaveByte] |
-              (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >> sourceBit << interleaveBit);
+    int interleaveBit = 7;
 
+    while (interleaveByte < interleavedSize) {
+      // Take the source bit from source byte and move it to the output bit position
+      interleavedBytes[interleaveByte] |=
+              (columnsBinary[sourceColumn][sourceByte] & 1 << sourceBit) >>> sourceBit << interleaveBit;
       --interleaveBit;
+
+      // Check if an output byte has been completed
       if (interleaveBit == -1) {
-        // Finished a byte in our interleave byte array start a new byte
+        // Move to the next output byte
         interleaveByte++;
+        // Move to the highest order bit of the new output byte
         interleaveBit = 7;
       }
 
-      // Find next column with a byte we can use
+      // Check if the last output byte has been completed
+      if (interleaveByte == interleavedSize) {
+        break;
+      }
+
+      // Find the next source bit to interleave
       do {
+        // Move to next column
         ++sourceColumn;
         if (sourceColumn == columnsBinary.length) {
+          // If the last source column was used, reset to next bit of first column
           sourceColumn = 0;
-          if (--sourceBit == -1) {
+          --sourceBit;
+          if (sourceBit == -1) {
+            // If the last bit of the source byte was used, reset to the highest bit of the next byte
             sourceByte++;
             sourceBit = 7;
           }
         }
-      } while (columnsBinary[sourceColumn].length <= sourceByte && interleaveByte < interleavedSize);
+      } while (columnsBinary[sourceColumn].length <= sourceByte);
     }
     return interleavedBytes;
   }
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index 81caf0ad0fb3..e2ff29d76c3a 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -20,6 +20,7 @@
 
 package org.apache.iceberg.util;
 
+import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.Random;
 import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes;
@@ -36,6 +37,7 @@ public class TestZOrderByteUtil {
   private static final byte OOOOOOOO = (byte) 0;
 
   private static final int NUM_TESTS = 100000;
+  private static final int NUM_INTERLEAVE_TESTS = 1000;
 
   private final Random random = new Random(42);
 
@@ -84,7 +86,7 @@ private String interleaveStrings(String[] strings) {
    */
   @Test
   public void testInterleaveRandomExamples() {
-    for (int test = 0; test < NUM_TESTS; test++) {
+    for (int test = 0; test < NUM_INTERLEAVE_TESTS; test++) {
       int numByteArrays = Math.abs(random.nextInt(6)) + 1;
       byte[][] testBytes =  new byte[numByteArrays][];
       String[] testStrings = new String[numByteArrays];
@@ -141,12 +143,14 @@ public void testInterleaveMixedBits() {
 
   @Test
   public void testIntOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Integer.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Integer.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
       int aInt = random.nextInt();
       int bInt = random.nextInt();
       int intCompare = Integer.signum(Integer.compare(aInt, bInt));
-      byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt);
-      byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt);
+      byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -158,12 +162,14 @@ public void testIntOrdering() {
 
   @Test
   public void testLongOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Long.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Long.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
       long aLong = random.nextInt();
       long bLong = random.nextInt();
       int longCompare = Integer.signum(Long.compare(aLong, bLong));
-      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aLong);
-      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bLong);
+      byte[] aBytes = ZOrderByteUtils.longToOrderedBytes(aLong, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.longToOrderedBytes(bLong, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -175,12 +181,14 @@ public void testLongOrdering() {
 
   @Test
   public void testShortOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Short.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Short.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
       short aShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
       short bShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
       int longCompare = Integer.signum(Long.compare(aShort, bShort));
-      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aShort);
-      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bShort);
+      byte[] aBytes = ZOrderByteUtils.shortToOrderedBytes(aShort, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.shortToOrderedBytes(bShort, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -192,12 +200,14 @@ public void testShortOrdering() {
 
   @Test
   public void testTinyOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Byte.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Byte.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
-      long aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
-      long bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
+      byte aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
+      byte bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
       int longCompare = Integer.signum(Long.compare(aByte, bByte));
-      byte[] aBytes = ZOrderByteUtils.longToOrderBytes(aByte);
-      byte[] bBytes = ZOrderByteUtils.longToOrderBytes(bByte);
+      byte[] aBytes = ZOrderByteUtils.tinyintToOrderedBytes(aByte, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.tinyintToOrderedBytes(bByte, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -209,12 +219,14 @@ public void testTinyOrdering() {
 
   @Test
   public void testFloatOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Float.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Float.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
       float aFloat = random.nextFloat();
       float bFloat = random.nextFloat();
       int floatCompare = Integer.signum(Float.compare(aFloat, bFloat));
-      byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat);
-      byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat);
+      byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -226,12 +238,14 @@ public void testFloatOrdering() {
 
   @Test
   public void testDoubleOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(Double.BYTES);
+    ByteBuffer bBuffer = ByteBuffer.allocate(Double.BYTES);
     for (int i = 0; i < NUM_TESTS; i++) {
       double aDouble = random.nextDouble();
       double bDouble = random.nextDouble();
       int doubleCompare = Integer.signum(Double.compare(aDouble, bDouble));
-      byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble);
-      byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble);
+      byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -243,12 +257,14 @@ public void testDoubleOrdering() {
 
   @Test
   public void testStringOrdering() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(128);
+    ByteBuffer bBuffer = ByteBuffer.allocate(128);
     for (int i = 0; i < NUM_TESTS; i++) {
       String aString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
       String bString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
       int stringCompare = Integer.signum(aString.compareTo(bString));
-      byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128);
-      byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128);
+      byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128, aBuffer);
+      byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128, bBuffer);
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(

From 8c7eef7ce7c50dcb447141d39d5ca08713b4a1d4 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 7 Feb 2022 19:45:49 -0600
Subject: [PATCH 08/30] Specify Output Size

---
 .../org/apache/iceberg/util/ZOrderByteUtils.java     | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index deab4450a61d..f4d28572be84 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -121,16 +121,24 @@ public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reu
     return bytes.array();
   }
 
+  /**
+   * For Testing interleave all available bytes
+   */
+  static byte[] interleaveBits(byte[][] columnsBinary) {
+    return interleaveBits(columnsBinary,
+        Arrays.stream(columnsBinary).mapToInt(column -> column.length).max().getAsInt());
+  }
+
   /**
    * Interleave bits using a naive loop. Variable length inputs are allowed but to get a consistent ordering it is
    * required that every column contribute the same number of bytes in each invocation. Bits are interleaved from all
    * columns that have a bit available at that position. Once a Column has no more bits to produce it is skipped in the
    * interleaving.
    * @param columnsBinary an array of ordered byte representations of the columns being ZOrdered
+   * @param interleavedSize the number of bytes to use in the output
    * @return the columnbytes interleaved
    */
-  public static byte[] interleaveBits(byte[][] columnsBinary) {
-    int interleavedSize = Arrays.stream(columnsBinary).mapToInt(a -> a.length).sum();
+  public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) {
     byte[] interleavedBytes = new byte[interleavedSize];
     int sourceColumn = 0;
     int sourceByte = 0;

From 62a74b9f39afac90c629d261d956a3e7a7d5db24 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 8 Feb 2022 08:48:29 -0600
Subject: [PATCH 09/30] Fix Encoding

Also a patch for the test interleave method length calculation
---
 .../main/java/org/apache/iceberg/util/ZOrderByteUtils.java | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index f4d28572be84..52180cac7a2d 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -20,6 +20,7 @@
 package org.apache.iceberg.util;
 
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 
 /**
@@ -108,7 +109,7 @@ public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) {
    * Strings are lexicographically sortable BUT if different byte array lengths will
    * ruin the Z-Ordering. (ZOrder requires that a given column contribute the same number of bytes every time).
    * This implementation just uses a set size to for all output byte representations. Truncating longer strings
-   * and right padding 0 for shorter strings. Requires UTF8 (or ASCII) encoding for ordering guarantees to hold.
+   * and right padding 0 for shorter strings.
    */
   public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, length);
@@ -116,7 +117,7 @@ public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reu
     if (val != null) {
       int maxLength = Math.min(length, val.length());
       // We may truncate mid-character
-      bytes.put(val.getBytes(), 0, maxLength);
+      bytes.put(val.getBytes(StandardCharsets.UTF_8), 0, maxLength);
     }
     return bytes.array();
   }
@@ -126,7 +127,7 @@ public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reu
    */
   static byte[] interleaveBits(byte[][] columnsBinary) {
     return interleaveBits(columnsBinary,
-        Arrays.stream(columnsBinary).mapToInt(column -> column.length).max().getAsInt());
+        Arrays.stream(columnsBinary).mapToInt(column -> column.length).sum());
   }
 
   /**

From 0bdabea0bb5fa4addec9c8e72c8662ef8f1e79a6 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 8 Feb 2022 11:58:08 -0600
Subject: [PATCH 10/30] Methods return ByteBuffers, Strings are efit into our
 buffer using CharsetEncoder.encode

---
 .../apache/iceberg/util/ZOrderByteUtils.java  | 32 ++++++++++---------
 .../iceberg/util/TestZOrderByteUtil.java      | 31 ++++++++++--------
 2 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 52180cac7a2d..967aa0bf7c5c 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -20,6 +20,8 @@
 package org.apache.iceberg.util;
 
 import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetEncoder;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 
@@ -45,37 +47,37 @@ private ZOrderByteUtils() {
    * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially
    * shifts the 0 value so that we don't break our ordering when we cross the new 0 value.
    */
-  public static byte[] intToOrderedBytes(int val, ByteBuffer reuse) {
+  public static ByteBuffer intToOrderedBytes(int val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Integer.BYTES);
     bytes.putInt(val ^ 0x80000000);
-    return bytes.array();
+    return bytes;
   }
 
   /**
    * Signed longs are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] longToOrderedBytes(long val, ByteBuffer reuse) {
+  public static ByteBuffer longToOrderedBytes(long val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Long.BYTES);
     bytes.putLong(val ^ 0x8000000000000000L);
-    return bytes.array();
+    return bytes;
   }
 
   /**
    * Signed shorts are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] shortToOrderedBytes(short val, ByteBuffer reuse) {
+  public static ByteBuffer shortToOrderedBytes(short val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Short.BYTES);
     bytes.putShort((short) (val ^ (0x8000)));
-    return bytes.array();
+    return bytes;
   }
 
   /**
    * Signed tiny ints are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
-  public static byte[] tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
+  public static ByteBuffer tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Byte.BYTES);
     bytes.put((byte) (val ^ (0x80)));
-    return bytes.array();
+    return bytes;
   }
 
   /**
@@ -86,23 +88,23 @@ public static byte[] tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
    * Which means floats can be treated as sign magnitude integers which can then be converted into lexicographically
    * comparable bytes
    */
-  public static byte[] floatToOrderedBytes(float val, ByteBuffer reuse) {
+  public static ByteBuffer floatToOrderedBytes(float val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Float.BYTES);
     int ival = Float.floatToIntBits(val);
     ival ^= ((ival >> (Integer.SIZE - 1)) | Integer.MIN_VALUE);
     bytes.putInt(ival);
-    return bytes.array();
+    return bytes;
   }
 
   /**
    * Doubles are treated the same as floats in {@link #floatToOrderedBytes(float, ByteBuffer)}
    */
-  public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) {
+  public static ByteBuffer doubleToOrderedBytes(double val, ByteBuffer reuse) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, Double.BYTES);
     long lng = Double.doubleToLongBits(val);
     lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE);
     bytes.putLong(lng);
-    return bytes.array();
+    return bytes;
   }
 
   /**
@@ -111,15 +113,15 @@ public static byte[] doubleToOrderedBytes(double val, ByteBuffer reuse) {
    * This implementation just uses a set size to for all output byte representations. Truncating longer strings
    * and right padding 0 for shorter strings.
    */
-  public static byte[] stringToOrderedBytes(String val, int length, ByteBuffer reuse) {
+  public static ByteBuffer stringToOrderedBytes(String val, int length, ByteBuffer reuse, CharsetEncoder encoder) {
     ByteBuffer bytes = ByteBuffers.reuse(reuse, length);
     Arrays.fill(bytes.array(), 0, length, (byte) 0x00);
     if (val != null) {
       int maxLength = Math.min(length, val.length());
       // We may truncate mid-character
-      bytes.put(val.getBytes(StandardCharsets.UTF_8), 0, maxLength);
+      encoder.encode(CharBuffer.wrap(val), bytes, true);
     }
-    return bytes.array();
+    return bytes;
   }
 
   /**
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index e2ff29d76c3a..bf84319d0d45 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -21,6 +21,8 @@
 package org.apache.iceberg.util;
 
 import java.nio.ByteBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Random;
 import org.apache.iceberg.relocated.com.google.common.primitives.UnsignedBytes;
@@ -149,8 +151,8 @@ public void testIntOrdering() {
       int aInt = random.nextInt();
       int bInt = random.nextInt();
       int intCompare = Integer.signum(Integer.compare(aInt, bInt));
-      byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.intToOrderedBytes(aInt, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.intToOrderedBytes(bInt, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -168,8 +170,8 @@ public void testLongOrdering() {
       long aLong = random.nextInt();
       long bLong = random.nextInt();
       int longCompare = Integer.signum(Long.compare(aLong, bLong));
-      byte[] aBytes = ZOrderByteUtils.longToOrderedBytes(aLong, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.longToOrderedBytes(bLong, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.longToOrderedBytes(aLong, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.longToOrderedBytes(bLong, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -187,8 +189,8 @@ public void testShortOrdering() {
       short aShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
       short bShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
       int longCompare = Integer.signum(Long.compare(aShort, bShort));
-      byte[] aBytes = ZOrderByteUtils.shortToOrderedBytes(aShort, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.shortToOrderedBytes(bShort, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.shortToOrderedBytes(aShort, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.shortToOrderedBytes(bShort, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -206,8 +208,8 @@ public void testTinyOrdering() {
       byte aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
       byte bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
       int longCompare = Integer.signum(Long.compare(aByte, bByte));
-      byte[] aBytes = ZOrderByteUtils.tinyintToOrderedBytes(aByte, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.tinyintToOrderedBytes(bByte, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.tinyintToOrderedBytes(aByte, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.tinyintToOrderedBytes(bByte, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -225,8 +227,8 @@ public void testFloatOrdering() {
       float aFloat = random.nextFloat();
       float bFloat = random.nextFloat();
       int floatCompare = Integer.signum(Float.compare(aFloat, bFloat));
-      byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.floatToOrderedBytes(aFloat, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.floatToOrderedBytes(bFloat, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -244,8 +246,8 @@ public void testDoubleOrdering() {
       double aDouble = random.nextDouble();
       double bDouble = random.nextDouble();
       int doubleCompare = Integer.signum(Double.compare(aDouble, bDouble));
-      byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.doubleToOrderedBytes(aDouble, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.doubleToOrderedBytes(bDouble, bBuffer).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(
@@ -257,14 +259,15 @@ public void testDoubleOrdering() {
 
   @Test
   public void testStringOrdering() {
+    CharsetEncoder encoder =  StandardCharsets.UTF_8.newEncoder();
     ByteBuffer aBuffer = ByteBuffer.allocate(128);
     ByteBuffer bBuffer = ByteBuffer.allocate(128);
     for (int i = 0; i < NUM_TESTS; i++) {
       String aString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
       String bString =  (String) RandomUtil.generatePrimitive(Types.StringType.get(), random);
       int stringCompare = Integer.signum(aString.compareTo(bString));
-      byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128, aBuffer);
-      byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128, bBuffer);
+      byte[] aBytes = ZOrderByteUtils.stringToOrderedBytes(aString, 128, aBuffer, encoder).array();
+      byte[] bBytes = ZOrderByteUtils.stringToOrderedBytes(bString, 128, bBuffer, encoder).array();
       int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
 
       Assert.assertEquals(String.format(

From 1e7e660c671d38fac18dffaf3a8fbd614c7c2605 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 8 Feb 2022 21:49:55 -0600
Subject: [PATCH 11/30] Remove unused string length

---
 .../java/org/apache/iceberg/util/ZOrderByteUtils.java    | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 967aa0bf7c5c..3ec4c0f430f4 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -24,6 +24,7 @@
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 
 /**
  * Within Z-Ordering the byte representations of objects being compared must be ordered,
@@ -114,12 +115,14 @@ public static ByteBuffer doubleToOrderedBytes(double val, ByteBuffer reuse) {
    * and right padding 0 for shorter strings.
    */
   public static ByteBuffer stringToOrderedBytes(String val, int length, ByteBuffer reuse, CharsetEncoder encoder) {
+    Preconditions.checkArgument(encoder.charset().equals(StandardCharsets.UTF_8),
+        "Cannot use an encoder not using UTF_8 as it's Charset");
+
     ByteBuffer bytes = ByteBuffers.reuse(reuse, length);
     Arrays.fill(bytes.array(), 0, length, (byte) 0x00);
     if (val != null) {
-      int maxLength = Math.min(length, val.length());
-      // We may truncate mid-character
-      encoder.encode(CharBuffer.wrap(val), bytes, true);
+      CharBuffer inputBuffer = CharBuffer.wrap(val);
+      encoder.encode(inputBuffer, bytes, true);
     }
     return bytes;
   }

From 41d855c46db008fe2b1cab92c33f818d9cf6bf8d Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 8 Feb 2022 22:05:29 -0600
Subject: [PATCH 12/30] Update docs

---
 .../main/java/org/apache/iceberg/util/ZOrderByteUtils.java  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 3ec4c0f430f4..b008461ea8ca 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -30,10 +30,12 @@
  * Within Z-Ordering the byte representations of objects being compared must be ordered,
  * this requires several types to be transformed when converted to bytes. The goal is to
  * map object's whose byte representation are not lexicographically ordered into representations
- * that are lexicographically ordered.
+ * that are lexicographically ordered. Bytes produced should be compared lexicographically as
+ * unsigned bytes, big-endian.
+ * <p>
  * Most of these techniques are derived from
  * https://aws.amazon.com/blogs/database/z-order-indexing-for-multifaceted-queries-in-amazon-dynamodb-part-2/
- *
+ * <p>
  * Some implementation is taken from
  * https://github.com/apache/hbase/blob/master/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java
  */

From 2dfad579939f1638616485d3320758b62a55d15c Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 24 Jan 2022 17:09:34 -0600
Subject: [PATCH 13/30] Spark: Rewrite Datafiles Implementation Using ZOrder

Use Spark UDFs to create a Z-Value column and then invoke a Spark Sort on it. The resultant data is then saved without the Z-Value Column.
---
 .../iceberg/actions/RewriteDataFiles.java     |   9 +
 .../BaseRewriteDataFilesSparkAction.java      |  11 +
 .../spark/actions/Spark3ZOrderStrategy.java   | 249 ++++++++++++++++++
 .../spark/actions/SparkSortStrategy.java      |   4 +
 .../actions/TestRewriteDataFilesAction.java   | 126 +++++++++
 5 files changed, 399 insertions(+)
 create mode 100644 spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java

diff --git a/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java b/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java
index f00596fa46e8..4ed57716603a 100644
--- a/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java
+++ b/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java
@@ -129,6 +129,15 @@ default RewriteDataFiles sort(SortOrder sortOrder) {
     throw new UnsupportedOperationException("SORT Rewrite Strategy not implemented for this framework");
   }
 
+  /**
+   * Choose Z-ORDER as a strategy for this rewrite operation with a specified list of columns to use
+   * @param columns Columns to be used to generate Z-Values
+   * @return this for method chaining
+   */
+  default RewriteDataFiles zOrder(String... columns) {
+    throw new UnsupportedOperationException("Z-ORDER Rewrite Strategy not implemented for this framework");
+  }
+
   /**
    * A user provided filter for determining which files will be considered by the rewrite strategy. This will be used
    * in addition to whatever rules the rewrite strategy generates. For example this would be used for providing a
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java
index 5350e729c8ea..62cb5b174d43 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java
@@ -106,6 +106,11 @@ protected RewriteDataFiles self() {
     return this;
   }
 
+  /**
+   * The framework specific ZOrder Strategy
+   */
+  protected abstract SortStrategy zOrderStrategy(String... columnNames);
+
   @Override
   public RewriteDataFiles binPack() {
     Preconditions.checkArgument(this.strategy == null,
@@ -130,6 +135,12 @@ public RewriteDataFiles sort() {
     return this;
   }
 
+  @Override
+  public RewriteDataFiles zOrder(String... columnNames) {
+    this.strategy = zOrderStrategy(columnNames);
+    return this;
+  }
+
   @Override
   public RewriteDataFiles filter(Expression expression) {
     filter = Expressions.and(filter, expression);
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
new file mode 100644
index 000000000000..6e9354cf605c
--- /dev/null
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.spark.actions;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import java.util.UUID;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.NullOrder;
+import org.apache.iceberg.PartitionField;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SortDirection;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.spark.FileRewriteCoordinator;
+import org.apache.iceberg.spark.FileScanTaskSetManager;
+import org.apache.iceberg.spark.SparkDistributionAndOrderingUtil;
+import org.apache.iceberg.spark.SparkReadOptions;
+import org.apache.iceberg.spark.SparkWriteOptions;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.types.Types.NestedField;
+import org.apache.iceberg.util.SortOrderUtil;
+import org.apache.iceberg.util.ZOrderByteUtils;
+import org.apache.spark.sql.Column;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
+import org.apache.spark.sql.connector.distributions.Distribution;
+import org.apache.spark.sql.connector.distributions.Distributions;
+import org.apache.spark.sql.connector.expressions.SortOrder;
+import org.apache.spark.sql.expressions.UserDefinedFunction;
+import org.apache.spark.sql.functions;
+import org.apache.spark.sql.internal.SQLConf;
+import org.apache.spark.sql.types.BinaryType;
+import org.apache.spark.sql.types.BooleanType;
+import org.apache.spark.sql.types.ByteType;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.DoubleType;
+import org.apache.spark.sql.types.FloatType;
+import org.apache.spark.sql.types.IntegerType;
+import org.apache.spark.sql.types.LongType;
+import org.apache.spark.sql.types.ShortType;
+import org.apache.spark.sql.types.StringType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.TimestampType;
+import scala.collection.Seq;
+
+public class Spark3ZOrderStrategy extends Spark3SortStrategy {
+  private static final String Z_COLUMN = "ICEZVALUE";
+  private static final Schema Z_SCHEMA = new Schema(NestedField.required(0, Z_COLUMN, Types.BinaryType.get()));
+  private static final org.apache.iceberg.SortOrder Z_SORT_ORDER = org.apache.iceberg.SortOrder.builderFor(Z_SCHEMA)
+      .sortBy(Z_COLUMN, SortDirection.ASC, NullOrder.NULLS_LAST)
+      .build();
+  private static final int STRING_KEY_LENGTH = 60;
+
+  private final List<String> zOrderColNames;
+  private final FileScanTaskSetManager manager = FileScanTaskSetManager.get();
+  private final FileRewriteCoordinator rewriteCoordinator = FileRewriteCoordinator.get();
+
+  public Spark3ZOrderStrategy(Table table, SparkSession spark, List<String> zOrderColNames) {
+    super(table, spark);
+
+    Stream<String> identityPartitionColumns = table.spec().fields().stream()
+        .filter(f -> f.transform().isIdentity())
+        .map(PartitionField::name);
+    List<String> partZOrderCols = identityPartitionColumns
+        .filter(zOrderColNames::contains)
+        .collect(Collectors.toList());
+    Preconditions.checkArgument(partZOrderCols.isEmpty(),
+        "Cannot ZOrder on an Identity partition column as these values are constant within a partition, " +
+            "ZOrdering requested on %s",
+        partZOrderCols);
+
+    this.zOrderColNames = zOrderColNames;
+  }
+
+  @Override
+  public String name() {
+    return "Z-ORDER";
+  }
+
+  @Override
+  protected void validateOptions() {
+    // TODO implement Zorder Strategy in API Module
+    return;
+  }
+
+  @Override
+  public Set<DataFile> rewriteFiles(List<FileScanTask> filesToRewrite) {
+    String groupID = UUID.randomUUID().toString();
+    boolean requiresRepartition = !filesToRewrite.get(0).spec().equals(table().spec());
+
+    SortOrder[] ordering;
+    if (requiresRepartition) {
+      ordering = SparkDistributionAndOrderingUtil.convert(SortOrderUtil.buildSortOrder(table(), sortOrder()));
+    } else {
+      ordering = SparkDistributionAndOrderingUtil.convert(sortOrder());
+    }
+
+    Distribution distribution = Distributions.ordered(ordering);
+
+    try {
+      manager.stageTasks(table(), groupID, filesToRewrite);
+
+      // Disable Adaptive Query Execution as this may change the output partitioning of our write
+      SparkSession cloneSession = spark().cloneSession();
+      cloneSession.conf().set(SQLConf.ADAPTIVE_EXECUTION_ENABLED().key(), false);
+
+      // Reset Shuffle Partitions for our sort
+      long numOutputFiles = numOutputFiles((long) (inputFileSize(filesToRewrite) * sizeEstimateMultiple()));
+      cloneSession.conf().set(SQLConf.SHUFFLE_PARTITIONS().key(), Math.max(1, numOutputFiles));
+
+      Dataset<Row> scanDF = cloneSession.read().format("iceberg")
+          .option(SparkReadOptions.FILE_SCAN_TASK_SET_ID, groupID)
+          .load(table().name());
+
+      Column[] originalColumns = Arrays.stream(scanDF.schema().names())
+          .map(n -> functions.col(n))
+          .toArray(Column[]::new);
+
+      List<StructField> zOrderColumns =  zOrderColNames.stream()
+          .map(scanDF.schema()::apply)
+          .collect(Collectors.toList());
+
+      Column zvalueArray = functions.array(zOrderColumns.stream().map(colStruct ->
+            SparkZOrder.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType())
+          ).toArray(Column[]::new));
+
+      Dataset<Row> zvalueDF = scanDF.withColumn(Z_COLUMN, SparkZOrder.interleaveBytes(zvalueArray));
+
+      SQLConf sqlConf = cloneSession.sessionState().conf();
+      LogicalPlan sortPlan = sortPlan(distribution, ordering, zvalueDF.logicalPlan(), sqlConf);
+      Dataset<Row> sortedDf = new Dataset<>(cloneSession, sortPlan, zvalueDF.encoder());
+      sortedDf
+          .select(originalColumns)
+          .write()
+          .format("iceberg")
+          .option(SparkWriteOptions.REWRITTEN_FILE_SCAN_TASK_SET_ID, groupID)
+          .option(SparkWriteOptions.TARGET_FILE_SIZE_BYTES, writeMaxFileSize())
+          .option(SparkWriteOptions.USE_TABLE_DISTRIBUTION_AND_ORDERING, "false")
+          .mode("append")
+          .save(table().name());
+
+      return rewriteCoordinator.fetchNewDataFiles(table(), groupID);
+    } finally {
+      manager.removeTasks(table(), groupID);
+      rewriteCoordinator.clearRewrite(table(), groupID);
+    }
+  }
+
+  @Override
+  protected org.apache.iceberg.SortOrder sortOrder() {
+    return Z_SORT_ORDER;
+  }
+
+  static class SparkZOrder {
+
+    static byte[] interleaveBits(Seq<byte[]> scalaBinary) {
+      byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary)
+          .toArray(new byte[scalaBinary.size()][]);
+      return ZOrderByteUtils.interleaveBits(columnsBinary);
+    }
+
+    private static final UserDefinedFunction FLOAT_TO_BYTES =
+        functions.udf((Float f) -> ByteBuffer.allocate(4).putFloat(f).array(), DataTypes.BinaryType);
+
+    private static final UserDefinedFunction DOUBLE_TO_BYTES =
+        functions.udf((Double d) -> ByteBuffer.allocate(8).putDouble(d).array(), DataTypes.BinaryType);
+
+    private static UserDefinedFunction getLexicalBytesIntLike(int size) {
+      return functions.udf((byte[] binary) -> ZOrderByteUtils.orderIntLikeBytes(binary, size), DataTypes.BinaryType)
+          .withName("INT-LIKE-LEXICAL-BYTES");
+    }
+
+    private static UserDefinedFunction getLexicalBytesFloatLike(int size) {
+      return functions.udf((byte[] binary) -> ZOrderByteUtils.orderFloatLikeBytes(binary, size), DataTypes.BinaryType)
+          .withName("FLOAT-LIKE-LEXICAL-BYTES");
+    }
+
+    private static UserDefinedFunction getLexicalBytesUTF8Like(int size) {
+      return functions.udf((byte[] binary) -> ZOrderByteUtils.orderUTF8LikeBytes(binary, size), DataTypes.BinaryType)
+          .withName("UTF8-LIKE-LEXICAL-BYTES");
+    }
+
+    private static final UserDefinedFunction INTERLEAVE_UDF =
+        functions.udf((Seq<byte[]> arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType)
+            .withName("INTERLEAVE_BYTES");
+
+    static Column interleaveBytes(Column arrayBinary) {
+      return INTERLEAVE_UDF.apply(arrayBinary);
+    }
+
+    @SuppressWarnings("checkstyle:CyclomaticComplexity")
+    static Column sortedLexicographically(Column column, DataType type) {
+      if (type instanceof ByteType) {
+        return column.cast(DataTypes.BinaryType);
+      } else if (type instanceof ShortType) {
+        return getLexicalBytesIntLike(2).apply(column.cast(DataTypes.BinaryType));
+      } else if (type instanceof IntegerType) {
+        return getLexicalBytesIntLike(4).apply(column.cast(DataTypes.BinaryType));
+      } else if (type instanceof LongType) {
+        return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.BinaryType));
+      } else if (type instanceof FloatType) {
+        return getLexicalBytesFloatLike(4).apply(FLOAT_TO_BYTES.apply(column));
+      } else if (type instanceof DoubleType) {
+        return getLexicalBytesFloatLike(8).apply(DOUBLE_TO_BYTES.apply(column));
+      } else if (type instanceof StringType) {
+        return getLexicalBytesUTF8Like(STRING_KEY_LENGTH).apply(column.cast(DataTypes.BinaryType));
+      } else if (type instanceof BinaryType) {
+        return getLexicalBytesUTF8Like(STRING_KEY_LENGTH).apply(column);
+      } else if (type instanceof BooleanType) {
+        return getLexicalBytesUTF8Like(1).apply(column.cast(DataTypes.BinaryType));
+      } else if (type instanceof TimestampType) {
+        return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.LongType).cast(DataTypes.BinaryType));
+      } else if (type instanceof DateType) {
+        return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.LongType).cast(DataTypes.BinaryType));
+      } else {
+        throw new IllegalArgumentException(
+            String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported",
+                column, type));
+      }
+    }
+  }
+}
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java
index 832ff255579c..d4823560bf17 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java
@@ -155,4 +155,8 @@ protected SparkSession spark() {
   protected LogicalPlan sortPlan(Distribution distribution, SortOrder[] ordering, LogicalPlan plan, SQLConf conf) {
     return DistributionAndOrderingUtils$.MODULE$.prepareQuery(distribution, ordering, plan, conf);
   }
+
+  protected double sizeEstimateMultiple() {
+    return this.sizeEstimateMultiple;
+  }
 }
diff --git a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
index 1d8695053123..f6f3004aecd0 100644
--- a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
+++ b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
@@ -62,6 +62,7 @@
 import org.apache.iceberg.encryption.EncryptedOutputFile;
 import org.apache.iceberg.encryption.EncryptionKeyMetadata;
 import org.apache.iceberg.exceptions.CommitStateUnknownException;
+import org.apache.iceberg.expressions.Expression;
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.hadoop.HadoopTables;
 import org.apache.iceberg.io.CloseableIterable;
@@ -95,6 +96,10 @@
 import org.mockito.Mockito;
 
 import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.apache.spark.sql.functions.current_date;
+import static org.apache.spark.sql.functions.date_add;
+import static org.apache.spark.sql.functions.expr;
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.ArgumentMatchers.argThat;
 import static org.mockito.Mockito.doAnswer;
@@ -1032,6 +1037,83 @@ public void testCommitStateUnknownException() {
     shouldHaveSnapshots(table, 2); // Commit actually Succeeded
   }
 
+  @Test
+  public void testZOrderSort() {
+    int originalFiles = 20;
+    Table table = createTable(originalFiles);
+    shouldHaveLastCommitUnsorted(table, "c2");
+    shouldHaveFiles(table, originalFiles);
+
+    List<Object[]> originalData = currentData();
+    double originalFilesC2 = percentFilesRequired(table, "c2", "foo23");
+    double originalFilesC3 = percentFilesRequired(table, "c3", "bar21");
+    double originalFilesC2C3 = percentFilesRequired(table, new String[]{"c2", "c3"}, new String[]{"foo23", "bar23"});
+
+    Assert.assertTrue("Should require all files to scan c2", originalFilesC2 > 0.99);
+    Assert.assertTrue("Should require all files to scan c3", originalFilesC3 > 0.99);
+
+    RewriteDataFiles.Result result =
+        basicRewrite(table)
+            .zOrder("c2", "c3")
+            .option(SortStrategy.MAX_FILE_SIZE_BYTES, Integer.toString((averageFileSize(table) / 2) + 2))
+            // Divide files in 2
+            .option(RewriteDataFiles.TARGET_FILE_SIZE_BYTES, Integer.toString(averageFileSize(table) / 2))
+            .option(SortStrategy.MIN_INPUT_FILES, "1")
+            .execute();
+
+    Assert.assertEquals("Should have 1 fileGroups", 1, result.rewriteResults().size());
+    int zOrderedFilesTotal = Iterables.size(table.currentSnapshot().addedFiles());
+    Assert.assertTrue("Should have written 40+ files", zOrderedFilesTotal >= 40);
+
+    table.refresh();
+
+    List<Object[]> postRewriteData = currentData();
+    assertEquals("We shouldn't have changed the data", originalData, postRewriteData);
+
+    shouldHaveSnapshots(table, 2);
+    shouldHaveACleanCache(table);
+
+    double filesScannedC2 = percentFilesRequired(table, "c2", "foo23");
+    double filesScannedC3 = percentFilesRequired(table, "c3", "bar21");
+    double filesScannedC2C3 = percentFilesRequired(table, new String[]{"c2", "c3"}, new String[]{"foo23", "bar23"});
+
+    Assert.assertTrue("Should have reduced the number of files required for c2",
+        filesScannedC2 < originalFilesC2);
+    Assert.assertTrue("Should have reduced the number of files required for c3",
+        filesScannedC3 < originalFilesC3);
+    Assert.assertTrue("Should have reduced the number of files required for a c2,c3 predicate",
+        filesScannedC2C3 < originalFilesC2C3);
+  }
+
+  @Test
+  public void testZOrderAllTypesSort() {
+    Table table = createTypeTestTable();
+    shouldHaveFiles(table, 10);
+
+    List<Row> originalRaw = spark.read().format("iceberg").load(tableLocation).sort("longCol").collectAsList();
+    List<Object[]> originalData = rowsToJava(originalRaw);
+
+    RewriteDataFiles.Result result =
+        basicRewrite(table)
+            .zOrder("longCol", "intCol", "floatCol", "doubleCol", "dateCol", "timestampCol", "stringCol")
+            .option(SortStrategy.MIN_INPUT_FILES, "1")
+            .option(SortStrategy.REWRITE_ALL, "true")
+            .execute();
+
+    Assert.assertEquals("Should have 1 fileGroups", 1, result.rewriteResults().size());
+    int zOrderedFilesTotal = Iterables.size(table.currentSnapshot().addedFiles());
+    Assert.assertEquals("Should have written 1 file", 1, zOrderedFilesTotal);
+
+    table.refresh();
+
+    List<Row> postRaw = spark.read().format("iceberg").load(tableLocation).sort("longCol").collectAsList();
+    List<Object[]> postRewriteData = rowsToJava(postRaw);
+    assertEquals("We shouldn't have changed the data", originalData, postRewriteData);
+
+    shouldHaveSnapshots(table, 2);
+    shouldHaveACleanCache(table);
+  }
+
   @Test
   public void testInvalidAPIUsage() {
     Table table = createTable(1);
@@ -1327,6 +1409,35 @@ protected Table createTablePartitioned(int partitions, int files) {
     return createTablePartitioned(partitions, files, SCALE, Maps.newHashMap());
   }
 
+  private Table createTypeTestTable() {
+    Schema schema = new Schema(
+        required(1, "longCol", Types.LongType.get()),
+        required(2, "intCol", Types.IntegerType.get()),
+        required(3, "floatCol", Types.FloatType.get()),
+        optional(4, "doubleCol", Types.DoubleType.get()),
+        optional(6, "dateCol", Types.DateType.get()),
+        optional(7, "timestampCol", Types.TimestampType.withZone()),
+        optional(8, "stringCol", Types.StringType.get()));
+
+    Map<String, String> options = Maps.newHashMap();
+    Table table = TABLES.create(schema, PartitionSpec.unpartitioned(), options, tableLocation);
+
+    spark.range(0, 10, 1, 10)
+        .withColumnRenamed("id", "longCol")
+        .withColumn("intCol", expr("CAST(longCol AS INT)"))
+        .withColumn("floatCol", expr("CAST(longCol AS FLOAT)"))
+        .withColumn("doubleCol", expr("CAST(longCol AS DOUBLE)"))
+        .withColumn("dateCol", date_add(current_date(), 1))
+        .withColumn("timestampCol", expr("TO_TIMESTAMP(dateCol)"))
+        .withColumn("stringCol", expr("CAST(dateCol AS STRING)"))
+        .write()
+        .format("iceberg")
+        .mode("append")
+        .save(tableLocation);
+
+    return table;
+  }
+
   protected int averageFileSize(Table table) {
     table.refresh();
     return (int) Streams.stream(table.newScan().planFiles()).mapToLong(FileScanTask::length).average().getAsDouble();
@@ -1412,6 +1523,21 @@ private Set<String> cacheContents(Table table) {
         .build();
   }
 
+  private double percentFilesRequired(Table table, String col, String value) {
+    return percentFilesRequired(table, new String[]{col}, new String[]{value});
+  }
+
+  private double percentFilesRequired(Table table, String[] cols, String[] values) {
+    Preconditions.checkArgument(cols.length == values.length);
+    Expression restriction = Expressions.alwaysTrue();
+    for (int i = 0; i < cols.length; i++) {
+      restriction = Expressions.and(restriction, Expressions.equal(cols[i], values[i]));
+    }
+    int totalFiles = Iterables.size(table.newScan().planFiles());
+    int filteredFiles = Iterables.size(table.newScan().filter(restriction).planFiles());
+    return (double) filteredFiles / (double) totalFiles;
+  }
+
   class GroupInfoMatcher implements ArgumentMatcher<RewriteFileGroup> {
     private final Set<Integer> groupIDs;
 

From a3e854341eed24c862dc1b2aa81839afea625b64 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 25 Jan 2022 10:08:37 -0600
Subject: [PATCH 14/30] Spark: Adds perf benchmarks for ZOrdering vs Sort
 Rewrite

---
 .../IcebergSortCompactionBenchmark.java       | 272 ++++++++++++++++++
 1 file changed, 272 insertions(+)
 create mode 100644 spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java

diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
new file mode 100644
index 000000000000..3347ebef5017
--- /dev/null
+++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+package org.apache.iceberg.spark.action;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.iceberg.NullOrder;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SortDirection;
+import org.apache.iceberg.SortOrder;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.relocated.com.google.common.io.Files;
+import org.apache.iceberg.spark.Spark3Util;
+import org.apache.iceberg.spark.SparkSchemaUtil;
+import org.apache.iceberg.spark.SparkSessionCatalog;
+import org.apache.iceberg.spark.actions.SparkActions;
+import org.apache.iceberg.types.Types;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.connector.catalog.Identifier;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Timeout;
+
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.apache.spark.sql.functions.col;
+import static org.apache.spark.sql.functions.current_date;
+import static org.apache.spark.sql.functions.date_add;
+import static org.apache.spark.sql.functions.expr;
+
+@Fork(1)
+@State(Scope.Benchmark)
+@Measurement(iterations = 3)
+@BenchmarkMode(Mode.SingleShotTime)
+@Timeout(time = 1000, timeUnit = TimeUnit.HOURS)
+public class IcebergSortCompactionBenchmark {
+
+  private static final String[] NAMESPACE = new String[] {"default"};
+  private static final String NAME = "sortbench";
+  private static final Identifier IDENT = Identifier.of(NAMESPACE, NAME);
+  private static final int NUM_FILES = 8;
+  private static final long NUM_ROWS = 10000000L;
+
+
+  private final Configuration hadoopConf = initHadoopConf();
+  private SparkSession spark;
+
+  @Setup
+  public void setupBench() {
+    setupSpark();
+  }
+
+  @TearDown
+  public void teardownBench() {
+    tearDownSpark();
+  }
+
+  @Setup(Level.Iteration)
+  public void setupIteration() {
+    initTable();
+    appendData();
+  }
+
+  @TearDown(Level.Iteration)
+  public void cleanUpIteration() throws IOException {
+    cleanupFiles();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void sortInt() {
+    SparkActions.get()
+        .rewriteDataFiles(table())
+        .sort(SortOrder
+            .builderFor(table().schema())
+            .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .build())
+        .execute();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void sortString() {
+    SparkActions.get()
+        .rewriteDataFiles(table())
+        .sort(SortOrder
+            .builderFor(table().schema())
+            .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .build())
+        .execute();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void sortFourColumns() {
+    SparkActions.get()
+        .rewriteDataFiles(table())
+        .sort(SortOrder
+            .builderFor(table().schema())
+            .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .sortBy("dateCol", SortDirection.DESC, NullOrder.NULLS_FIRST)
+            .sortBy("doubleCol", SortDirection.DESC, NullOrder.NULLS_FIRST)
+            .build())
+        .execute();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void sortSixColumns() {
+    SparkActions.get()
+        .rewriteDataFiles(table())
+        .sort(SortOrder
+            .builderFor(table().schema())
+            .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .sortBy("dateCol", SortDirection.DESC, NullOrder.NULLS_FIRST)
+            .sortBy("timestampCol", SortDirection.DESC, NullOrder.NULLS_FIRST)
+            .sortBy("doubleCol", SortDirection.DESC, NullOrder.NULLS_FIRST)
+            .sortBy("longCol", SortDirection.DESC, NullOrder.NULLS_FIRST)
+            .build())
+        .execute();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void zSortInt() {
+    SparkActions.get()
+        .rewriteDataFiles(table())
+        .zOrder("intCol")
+        .execute();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void zSortString() {
+    SparkActions.get()
+        .rewriteDataFiles(table())
+        .zOrder("stringCol")
+        .execute();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void zSortFourColumns() {
+    SparkActions.get()
+        .rewriteDataFiles(table())
+        .zOrder("stringCol", "intCol", "dateCol", "doubleCol")
+        .execute();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void zSortSixColumns() {
+    SparkActions.get()
+        .rewriteDataFiles(table())
+        .zOrder("stringCol", "intCol", "dateCol", "timestampCol", "doubleCol", "longCol")
+        .execute();
+  }
+
+  protected Configuration initHadoopConf() {
+    return new Configuration();
+  }
+
+  protected final void initTable() {
+    Schema schema = new Schema(
+        required(1, "longCol", Types.LongType.get()),
+        required(2, "intCol", Types.IntegerType.get()),
+        required(3, "floatCol", Types.FloatType.get()),
+        optional(4, "doubleCol", Types.DoubleType.get()),
+        optional(6, "dateCol", Types.DateType.get()),
+        optional(7, "timestampCol", Types.TimestampType.withZone()),
+        optional(8, "stringCol", Types.StringType.get()));
+
+    SparkSessionCatalog catalog = null;
+    try {
+      catalog = (SparkSessionCatalog)
+          Spark3Util.catalogAndIdentifier(spark(), "spark_catalog").catalog();
+      catalog.dropTable(IDENT);
+      catalog.createTable(IDENT, SparkSchemaUtil.convert(schema), new Transform[0], Collections.emptyMap());
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private void appendData() {
+    Dataset<Row> df = spark().range(0, NUM_ROWS * NUM_FILES, 1, NUM_FILES)
+        .withColumnRenamed("id", "longCol")
+        .withColumn("intCol", expr("CAST(longCol AS INT)"))
+        .withColumn("floatCol", expr("CAST(longCol AS FLOAT)"))
+        .withColumn("doubleCol", expr("CAST(longCol AS DOUBLE)"))
+        .withColumn("dateCol", date_add(current_date(), col("intCol").mod(NUM_FILES)))
+        .withColumn("timestampCol", expr("TO_TIMESTAMP(dateCol)"))
+        .withColumn("stringCol", expr("CAST(dateCol AS STRING)"));
+    writeData(df);
+  }
+
+  private void writeData(Dataset<Row> df) {
+    df.write().format("iceberg").mode(SaveMode.Append).save(NAME);
+  }
+
+  protected final Table table() {
+    try {
+      return Spark3Util.loadIcebergTable(spark(), NAME);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  protected final SparkSession spark() {
+    return spark;
+  }
+
+  protected String getCatalogWarehouse() {
+    String location = Files.createTempDir().getAbsolutePath() + "/" + UUID.randomUUID() + "/";
+    return location;
+  }
+
+  protected void cleanupFiles() throws IOException {
+    spark.sql("DROP TABLE IF EXISTS " + NAME);
+  }
+
+  protected void setupSpark() {
+    SparkSession.Builder builder =
+        SparkSession.builder()
+            .config("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog")
+            .config("spark.sql.catalog.spark_catalog.type", "hadoop")
+            .config("spark.sql.catalog.spark_catalog.warehouse", getCatalogWarehouse())
+            .master("local[*]");
+    spark = builder.getOrCreate();
+    Configuration sparkHadoopConf = spark.sessionState().newHadoopConf();
+    hadoopConf.forEach(entry -> sparkHadoopConf.set(entry.getKey(), entry.getValue()));
+  }
+
+  protected void tearDownSpark() {
+    spark.stop();
+  }
+}

From fa2add8511aaa00383607bfdab1562f28761c5c7 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 31 Jan 2022 17:15:05 -0600
Subject: [PATCH 15/30] WIP

---
 .../spark/actions/Spark3ZOrderStrategy.java   | 55 ++++++++++++++-----
 1 file changed, 42 insertions(+), 13 deletions(-)

diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
index 6e9354cf605c..62c543833361 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
@@ -186,25 +186,54 @@ static byte[] interleaveBits(Seq<byte[]> scalaBinary) {
       return ZOrderByteUtils.interleaveBits(columnsBinary);
     }
 
-    private static final UserDefinedFunction FLOAT_TO_BYTES =
-        functions.udf((Float f) -> ByteBuffer.allocate(4).putFloat(f).array(), DataTypes.BinaryType);
+    private static UserDefinedFunction intToOrderedBytesUDF() {
+      return functions.udf((Integer value) -> {
+        if (value == null) {
+          return null;
+        }
+        return ZOrderByteUtils.intToOrderedBytes(value);
+      }, DataTypes.BinaryType)
+          .withName("INT-LEXICAL-BYTES");
+    }
 
-    private static final UserDefinedFunction DOUBLE_TO_BYTES =
-        functions.udf((Double d) -> ByteBuffer.allocate(8).putDouble(d).array(), DataTypes.BinaryType);
+    private static UserDefinedFunction longToOrderedBytesUDF() {
+      return functions.udf((Long value) -> {
+            if (value == null) {
+              return null;
+            }
+            return ZOrderByteUtils.longToOrderBytes(value);
+          }, DataTypes.BinaryType)
+          .withName("LONG-LEXICAL-BYTES");
+    }
 
-    private static UserDefinedFunction getLexicalBytesIntLike(int size) {
-      return functions.udf((byte[] binary) -> ZOrderByteUtils.orderIntLikeBytes(binary, size), DataTypes.BinaryType)
-          .withName("INT-LIKE-LEXICAL-BYTES");
+    private static UserDefinedFunction floatToOrderedBytesUDF() {
+      return functions.udf((Float value) -> {
+            if (value == null) {
+              return null;
+            }
+            return ZOrderByteUtils.floatToOrderedBytes(value);
+          }, DataTypes.BinaryType)
+          .withName("FLOAT-LEXICAL-BYTES");
     }
 
-    private static UserDefinedFunction getLexicalBytesFloatLike(int size) {
-      return functions.udf((byte[] binary) -> ZOrderByteUtils.orderFloatLikeBytes(binary, size), DataTypes.BinaryType)
-          .withName("FLOAT-LIKE-LEXICAL-BYTES");
+    private static UserDefinedFunction doubleToOrderedBytesUDF() {
+      return functions.udf((Double value) -> {
+            if (value == null) {
+              return null;
+            }
+            return ZOrderByteUtils.doubleToOrderedBytes(value);
+          }, DataTypes.BinaryType)
+          .withName("DOUBLE-LEXICAL-BYTES");
     }
 
-    private static UserDefinedFunction getLexicalBytesUTF8Like(int size) {
-      return functions.udf((byte[] binary) -> ZOrderByteUtils.orderUTF8LikeBytes(binary, size), DataTypes.BinaryType)
-          .withName("UTF8-LIKE-LEXICAL-BYTES");
+    private static UserDefinedFunction stringToOrderedBytesUDF() {
+      return functions.udf((String value) -> {
+            if (value == null) {
+              return null;
+            }
+            return ZOrderByteUtils.stringToOrderedBytes(value);
+          }, DataTypes.BinaryType)
+          .withName("STRING-LEXICAL-BYTES");
     }
 
     private static final UserDefinedFunction INTERLEAVE_UDF =

From 6974f45ca400fc5237f49843079de64ff38c1fef Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 1 Feb 2022 15:21:38 -0600
Subject: [PATCH 16/30] Update to Match new UtilityCode

---
 .../spark/actions/Spark3ZOrderStrategy.java   | 110 +++++++++++-------
 1 file changed, 67 insertions(+), 43 deletions(-)

diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
index 62c543833361..449caa039aec 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
@@ -19,7 +19,6 @@
 
 package org.apache.iceberg.spark.actions;
 
-import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Set;
@@ -71,6 +70,7 @@
 import scala.collection.Seq;
 
 public class Spark3ZOrderStrategy extends Spark3SortStrategy {
+
   private static final String Z_COLUMN = "ICEZVALUE";
   private static final Schema Z_SCHEMA = new Schema(NestedField.required(0, Z_COLUMN, Types.BinaryType.get()));
   private static final org.apache.iceberg.SortOrder Z_SORT_ORDER = org.apache.iceberg.SortOrder.builderFor(Z_SCHEMA)
@@ -91,7 +91,8 @@ public Spark3ZOrderStrategy(Table table, SparkSession spark, List<String> zOrder
     List<String> partZOrderCols = identityPartitionColumns
         .filter(zOrderColNames::contains)
         .collect(Collectors.toList());
-    Preconditions.checkArgument(partZOrderCols.isEmpty(),
+    Preconditions.checkArgument(
+        partZOrderCols.isEmpty(),
         "Cannot ZOrder on an Identity partition column as these values are constant within a partition, " +
             "ZOrdering requested on %s",
         partZOrderCols);
@@ -143,13 +144,13 @@ public Set<DataFile> rewriteFiles(List<FileScanTask> filesToRewrite) {
           .map(n -> functions.col(n))
           .toArray(Column[]::new);
 
-      List<StructField> zOrderColumns =  zOrderColNames.stream()
+      List<StructField> zOrderColumns = zOrderColNames.stream()
           .map(scanDF.schema()::apply)
           .collect(Collectors.toList());
 
       Column zvalueArray = functions.array(zOrderColumns.stream().map(colStruct ->
-            SparkZOrder.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType())
-          ).toArray(Column[]::new));
+          SparkZOrder.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType())
+      ).toArray(Column[]::new));
 
       Dataset<Row> zvalueDF = scanDF.withColumn(Z_COLUMN, SparkZOrder.interleaveBytes(zvalueArray));
 
@@ -180,60 +181,83 @@ protected org.apache.iceberg.SortOrder sortOrder() {
 
   static class SparkZOrder {
 
+    private static final byte[] TINY_EMPTY = new byte[Byte.BYTES];
+    private static final byte[] SHORT_EMPTY = new byte[Short.BYTES];
+    private static final byte[] INT_EMPTY = new byte[Integer.BYTES];
+    private static final byte[] LONG_EMPTY = new byte[Long.BYTES];
+    private static final byte[] FLOAT_EMPTY = new byte[Float.BYTES];
+    private static final byte[] DOUBLE_EMPTY = new byte[Double.BYTES];
+
     static byte[] interleaveBits(Seq<byte[]> scalaBinary) {
       byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary)
           .toArray(new byte[scalaBinary.size()][]);
       return ZOrderByteUtils.interleaveBits(columnsBinary);
     }
 
+    private static UserDefinedFunction tinyToOrderedBytesUDF() {
+      return functions.udf((Byte value) -> {
+        if (value == null) {
+          return TINY_EMPTY;
+        }
+        return ZOrderByteUtils.tinyintToOrderedBytes(value);
+      }, DataTypes.BinaryType)
+        .withName("TINY_ORDERED_BYTES");
+    }
+
+    private static UserDefinedFunction shortToOrderedBytesUDF() {
+      return functions.udf((Short value) -> {
+        if (value == null) {
+          return SHORT_EMPTY;
+        }
+        return ZOrderByteUtils.shortToOrderBytes(value);
+      }, DataTypes.BinaryType)
+        .withName("SHORT_ORDERED_BYTES");
+    }
+
     private static UserDefinedFunction intToOrderedBytesUDF() {
       return functions.udf((Integer value) -> {
         if (value == null) {
-          return null;
+          return INT_EMPTY;
         }
         return ZOrderByteUtils.intToOrderedBytes(value);
       }, DataTypes.BinaryType)
-          .withName("INT-LEXICAL-BYTES");
+        .withName("INT_ORDERED_BYTES");
     }
 
     private static UserDefinedFunction longToOrderedBytesUDF() {
       return functions.udf((Long value) -> {
-            if (value == null) {
-              return null;
-            }
-            return ZOrderByteUtils.longToOrderBytes(value);
-          }, DataTypes.BinaryType)
-          .withName("LONG-LEXICAL-BYTES");
+        if (value == null) {
+          return LONG_EMPTY;
+        }
+        return ZOrderByteUtils.longToOrderBytes(value);
+      }, DataTypes.BinaryType)
+      .withName("LONG_ORDERED_BYTES");
     }
 
     private static UserDefinedFunction floatToOrderedBytesUDF() {
       return functions.udf((Float value) -> {
-            if (value == null) {
-              return null;
-            }
-            return ZOrderByteUtils.floatToOrderedBytes(value);
-          }, DataTypes.BinaryType)
-          .withName("FLOAT-LEXICAL-BYTES");
+        if (value == null) {
+          return FLOAT_EMPTY;
+        }
+        return ZOrderByteUtils.floatToOrderedBytes(value);
+      }, DataTypes.BinaryType)
+        .withName("FLOAT_ORDERED_BYTES");
     }
 
     private static UserDefinedFunction doubleToOrderedBytesUDF() {
       return functions.udf((Double value) -> {
-            if (value == null) {
-              return null;
-            }
-            return ZOrderByteUtils.doubleToOrderedBytes(value);
-          }, DataTypes.BinaryType)
-          .withName("DOUBLE-LEXICAL-BYTES");
+        if (value == null) {
+          return DOUBLE_EMPTY;
+        }
+        return ZOrderByteUtils.doubleToOrderedBytes(value);
+      }, DataTypes.BinaryType)
+        .withName("FLOAT_ORDERED_BYTES");
     }
 
     private static UserDefinedFunction stringToOrderedBytesUDF() {
-      return functions.udf((String value) -> {
-            if (value == null) {
-              return null;
-            }
-            return ZOrderByteUtils.stringToOrderedBytes(value);
-          }, DataTypes.BinaryType)
-          .withName("STRING-LEXICAL-BYTES");
+      return functions.udf((String value) -> ZOrderByteUtils.stringToOrderedBytes(value, STRING_KEY_LENGTH),
+        DataTypes.BinaryType)
+        .withName("STRING-LEXICAL-BYTES");
     }
 
     private static final UserDefinedFunction INTERLEAVE_UDF =
@@ -247,27 +271,27 @@ static Column interleaveBytes(Column arrayBinary) {
     @SuppressWarnings("checkstyle:CyclomaticComplexity")
     static Column sortedLexicographically(Column column, DataType type) {
       if (type instanceof ByteType) {
-        return column.cast(DataTypes.BinaryType);
+        return tinyToOrderedBytesUDF().apply(column);
       } else if (type instanceof ShortType) {
-        return getLexicalBytesIntLike(2).apply(column.cast(DataTypes.BinaryType));
+        return shortToOrderedBytesUDF().apply(column);
       } else if (type instanceof IntegerType) {
-        return getLexicalBytesIntLike(4).apply(column.cast(DataTypes.BinaryType));
+        return intToOrderedBytesUDF().apply(column);
       } else if (type instanceof LongType) {
-        return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.BinaryType));
+        return longToOrderedBytesUDF().apply(column);
       } else if (type instanceof FloatType) {
-        return getLexicalBytesFloatLike(4).apply(FLOAT_TO_BYTES.apply(column));
+        return floatToOrderedBytesUDF().apply(column);
       } else if (type instanceof DoubleType) {
-        return getLexicalBytesFloatLike(8).apply(DOUBLE_TO_BYTES.apply(column));
+        return doubleToOrderedBytesUDF().apply(column);
       } else if (type instanceof StringType) {
-        return getLexicalBytesUTF8Like(STRING_KEY_LENGTH).apply(column.cast(DataTypes.BinaryType));
+        return stringToOrderedBytesUDF().apply(column);
       } else if (type instanceof BinaryType) {
-        return getLexicalBytesUTF8Like(STRING_KEY_LENGTH).apply(column);
+        return stringToOrderedBytesUDF().apply(column);
       } else if (type instanceof BooleanType) {
-        return getLexicalBytesUTF8Like(1).apply(column.cast(DataTypes.BinaryType));
+        return column.cast(DataTypes.BinaryType);
       } else if (type instanceof TimestampType) {
-        return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.LongType).cast(DataTypes.BinaryType));
+        return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
       } else if (type instanceof DateType) {
-        return getLexicalBytesIntLike(8).apply(column.cast(DataTypes.LongType).cast(DataTypes.BinaryType));
+        return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
       } else {
         throw new IllegalArgumentException(
             String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported",

From ba43cae1a76c1a9fd30c946ae3899ab139169137 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Fri, 18 Feb 2022 07:44:08 -0600
Subject: [PATCH 17/30] Checkpoint for Perf Benchmark

---
 .../apache/iceberg/util/ZOrderByteUtils.java  |  10 +-
 .../IcebergSortCompactionBenchmark.java       |  86 ++++++-
 .../spark/actions/Spark3ZOrderStrategy.java   | 143 ++---------
 .../spark/actions/Spark3ZOrderUDF.java        | 241 ++++++++++++++++++
 4 files changed, 347 insertions(+), 133 deletions(-)
 create mode 100644 spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index b008461ea8ca..39ef0dcc14d3 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -19,6 +19,7 @@
 
 package org.apache.iceberg.util;
 
+import java.io.Serializable;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.CharsetEncoder;
@@ -137,6 +138,10 @@ static byte[] interleaveBits(byte[][] columnsBinary) {
         Arrays.stream(columnsBinary).mapToInt(column -> column.length).sum());
   }
 
+  public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) {
+    return interleaveBits(columnsBinary, interleavedSize, ByteBuffer.allocate(interleavedSize));
+  }
+
   /**
    * Interleave bits using a naive loop. Variable length inputs are allowed but to get a consistent ordering it is
    * required that every column contribute the same number of bytes in each invocation. Bits are interleaved from all
@@ -146,8 +151,8 @@ static byte[] interleaveBits(byte[][] columnsBinary) {
    * @param interleavedSize the number of bytes to use in the output
    * @return the columnbytes interleaved
    */
-  public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) {
-    byte[] interleavedBytes = new byte[interleavedSize];
+  public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize, ByteBuffer reuse) {
+    byte[] interleavedBytes = reuse.array();
     int sourceColumn = 0;
     int sourceByte = 0;
     int sourceBit = 7;
@@ -191,4 +196,5 @@ public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize)
     }
     return interleavedBytes;
   }
+
 }
diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
index 3347ebef5017..69b21a5a89ef 100644
--- a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
+++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
@@ -112,6 +112,49 @@ public void sortInt() {
         .execute();
   }
 
+  @Benchmark
+  @Threads(1)
+  public void sortInt2() {
+    SparkActions.get()
+            .rewriteDataFiles(table())
+            .sort(SortOrder
+                    .builderFor(table().schema())
+                    .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+                    .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST)
+                    .build())
+            .execute();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void sortInt3() {
+    SparkActions.get()
+            .rewriteDataFiles(table())
+            .sort(SortOrder
+                    .builderFor(table().schema())
+                    .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+                    .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST)
+                    .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST)
+                    .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST)
+                    .build())
+            .execute();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void sortInt4() {
+    SparkActions.get()
+            .rewriteDataFiles(table())
+            .sort(SortOrder
+                    .builderFor(table().schema())
+                    .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+                    .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST)
+                    .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST)
+                    .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST)
+                    .build())
+            .execute();
+  }
+
   @Benchmark
   @Threads(1)
   public void sortString() {
@@ -165,6 +208,33 @@ public void zSortInt() {
         .execute();
   }
 
+  @Benchmark
+  @Threads(1)
+  public void zSortInt2() {
+    SparkActions.get()
+            .rewriteDataFiles(table())
+            .zOrder("intCol", "intCol2")
+            .execute();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void zSortInt3() {
+    SparkActions.get()
+            .rewriteDataFiles(table())
+            .zOrder("intCol", "intCol2", "intCol3")
+            .execute();
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void zSortInt4() {
+    SparkActions.get()
+            .rewriteDataFiles(table())
+            .zOrder("intCol", "intCol2", "intCol3", "intCol4")
+            .execute();
+  }
+
   @Benchmark
   @Threads(1)
   public void zSortString() {
@@ -200,11 +270,14 @@ protected final void initTable() {
     Schema schema = new Schema(
         required(1, "longCol", Types.LongType.get()),
         required(2, "intCol", Types.IntegerType.get()),
-        required(3, "floatCol", Types.FloatType.get()),
-        optional(4, "doubleCol", Types.DoubleType.get()),
-        optional(6, "dateCol", Types.DateType.get()),
-        optional(7, "timestampCol", Types.TimestampType.withZone()),
-        optional(8, "stringCol", Types.StringType.get()));
+        required(3, "intCol2", Types.IntegerType.get()),
+        required(4, "intCol3", Types.IntegerType.get()),
+        required(5, "intCol4", Types.IntegerType.get()),
+        required(6, "floatCol", Types.FloatType.get()),
+        optional(7, "doubleCol", Types.DoubleType.get()),
+        optional(8, "dateCol", Types.DateType.get()),
+        optional(9, "timestampCol", Types.TimestampType.withZone()),
+        optional(10, "stringCol", Types.StringType.get()));
 
     SparkSessionCatalog catalog = null;
     try {
@@ -221,6 +294,9 @@ private void appendData() {
     Dataset<Row> df = spark().range(0, NUM_ROWS * NUM_FILES, 1, NUM_FILES)
         .withColumnRenamed("id", "longCol")
         .withColumn("intCol", expr("CAST(longCol AS INT)"))
+        .withColumn("intCol2", expr("CAST(longCol AS INT)"))
+        .withColumn("intCol3", expr("CAST(longCol AS INT)"))
+        .withColumn("intCol4", expr("CAST(longCol AS INT)"))
         .withColumn("floatCol", expr("CAST(longCol AS FLOAT)"))
         .withColumn("doubleCol", expr("CAST(longCol AS DOUBLE)"))
         .withColumn("dateCol", date_add(current_date(), col("intCol").mod(NUM_FILES)))
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
index 449caa039aec..4c16349ea9dc 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
@@ -19,7 +19,13 @@
 
 package org.apache.iceberg.spark.actions;
 
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 import java.util.UUID;
@@ -42,6 +48,7 @@
 import org.apache.iceberg.types.Types.NestedField;
 import org.apache.iceberg.util.SortOrderUtil;
 import org.apache.iceberg.util.ZOrderByteUtils;
+import org.apache.spark.api.java.function.MapPartitionsFunction;
 import org.apache.spark.sql.Column;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
@@ -67,6 +74,7 @@
 import org.apache.spark.sql.types.StringType;
 import org.apache.spark.sql.types.StructField;
 import org.apache.spark.sql.types.TimestampType;
+import org.sparkproject.jetty.server.Authentication;
 import scala.collection.Seq;
 
 public class Spark3ZOrderStrategy extends Spark3SortStrategy {
@@ -76,11 +84,13 @@ public class Spark3ZOrderStrategy extends Spark3SortStrategy {
   private static final org.apache.iceberg.SortOrder Z_SORT_ORDER = org.apache.iceberg.SortOrder.builderFor(Z_SCHEMA)
       .sortBy(Z_COLUMN, SortDirection.ASC, NullOrder.NULLS_LAST)
       .build();
-  private static final int STRING_KEY_LENGTH = 60;
+  private static final int STRING_KEY_LENGTH = 128;
 
   private final List<String> zOrderColNames;
-  private final FileScanTaskSetManager manager = FileScanTaskSetManager.get();
-  private final FileRewriteCoordinator rewriteCoordinator = FileRewriteCoordinator.get();
+  private transient FileScanTaskSetManager manager = FileScanTaskSetManager.get();
+  private transient FileRewriteCoordinator rewriteCoordinator = FileRewriteCoordinator.get();
+
+  private final SparkZOrder orderHelper;
 
   public Spark3ZOrderStrategy(Table table, SparkSession spark, List<String> zOrderColNames) {
     super(table, spark);
@@ -97,6 +107,8 @@ public Spark3ZOrderStrategy(Table table, SparkSession spark, List<String> zOrder
             "ZOrdering requested on %s",
         partZOrderCols);
 
+    this.orderHelper = new SparkZOrder(zOrderColNames.size());
+
     this.zOrderColNames = zOrderColNames;
   }
 
@@ -149,10 +161,10 @@ public Set<DataFile> rewriteFiles(List<FileScanTask> filesToRewrite) {
           .collect(Collectors.toList());
 
       Column zvalueArray = functions.array(zOrderColumns.stream().map(colStruct ->
-          SparkZOrder.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType())
+          orderHelper.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType())
       ).toArray(Column[]::new));
 
-      Dataset<Row> zvalueDF = scanDF.withColumn(Z_COLUMN, SparkZOrder.interleaveBytes(zvalueArray));
+      Dataset<Row> zvalueDF = scanDF.withColumn(Z_COLUMN, orderHelper.interleaveBytes(zvalueArray));
 
       SQLConf sqlConf = cloneSession.sessionState().conf();
       LogicalPlan sortPlan = sortPlan(distribution, ordering, zvalueDF.logicalPlan(), sqlConf);
@@ -178,125 +190,4 @@ public Set<DataFile> rewriteFiles(List<FileScanTask> filesToRewrite) {
   protected org.apache.iceberg.SortOrder sortOrder() {
     return Z_SORT_ORDER;
   }
-
-  static class SparkZOrder {
-
-    private static final byte[] TINY_EMPTY = new byte[Byte.BYTES];
-    private static final byte[] SHORT_EMPTY = new byte[Short.BYTES];
-    private static final byte[] INT_EMPTY = new byte[Integer.BYTES];
-    private static final byte[] LONG_EMPTY = new byte[Long.BYTES];
-    private static final byte[] FLOAT_EMPTY = new byte[Float.BYTES];
-    private static final byte[] DOUBLE_EMPTY = new byte[Double.BYTES];
-
-    static byte[] interleaveBits(Seq<byte[]> scalaBinary) {
-      byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary)
-          .toArray(new byte[scalaBinary.size()][]);
-      return ZOrderByteUtils.interleaveBits(columnsBinary);
-    }
-
-    private static UserDefinedFunction tinyToOrderedBytesUDF() {
-      return functions.udf((Byte value) -> {
-        if (value == null) {
-          return TINY_EMPTY;
-        }
-        return ZOrderByteUtils.tinyintToOrderedBytes(value);
-      }, DataTypes.BinaryType)
-        .withName("TINY_ORDERED_BYTES");
-    }
-
-    private static UserDefinedFunction shortToOrderedBytesUDF() {
-      return functions.udf((Short value) -> {
-        if (value == null) {
-          return SHORT_EMPTY;
-        }
-        return ZOrderByteUtils.shortToOrderBytes(value);
-      }, DataTypes.BinaryType)
-        .withName("SHORT_ORDERED_BYTES");
-    }
-
-    private static UserDefinedFunction intToOrderedBytesUDF() {
-      return functions.udf((Integer value) -> {
-        if (value == null) {
-          return INT_EMPTY;
-        }
-        return ZOrderByteUtils.intToOrderedBytes(value);
-      }, DataTypes.BinaryType)
-        .withName("INT_ORDERED_BYTES");
-    }
-
-    private static UserDefinedFunction longToOrderedBytesUDF() {
-      return functions.udf((Long value) -> {
-        if (value == null) {
-          return LONG_EMPTY;
-        }
-        return ZOrderByteUtils.longToOrderBytes(value);
-      }, DataTypes.BinaryType)
-      .withName("LONG_ORDERED_BYTES");
-    }
-
-    private static UserDefinedFunction floatToOrderedBytesUDF() {
-      return functions.udf((Float value) -> {
-        if (value == null) {
-          return FLOAT_EMPTY;
-        }
-        return ZOrderByteUtils.floatToOrderedBytes(value);
-      }, DataTypes.BinaryType)
-        .withName("FLOAT_ORDERED_BYTES");
-    }
-
-    private static UserDefinedFunction doubleToOrderedBytesUDF() {
-      return functions.udf((Double value) -> {
-        if (value == null) {
-          return DOUBLE_EMPTY;
-        }
-        return ZOrderByteUtils.doubleToOrderedBytes(value);
-      }, DataTypes.BinaryType)
-        .withName("FLOAT_ORDERED_BYTES");
-    }
-
-    private static UserDefinedFunction stringToOrderedBytesUDF() {
-      return functions.udf((String value) -> ZOrderByteUtils.stringToOrderedBytes(value, STRING_KEY_LENGTH),
-        DataTypes.BinaryType)
-        .withName("STRING-LEXICAL-BYTES");
-    }
-
-    private static final UserDefinedFunction INTERLEAVE_UDF =
-        functions.udf((Seq<byte[]> arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType)
-            .withName("INTERLEAVE_BYTES");
-
-    static Column interleaveBytes(Column arrayBinary) {
-      return INTERLEAVE_UDF.apply(arrayBinary);
-    }
-
-    @SuppressWarnings("checkstyle:CyclomaticComplexity")
-    static Column sortedLexicographically(Column column, DataType type) {
-      if (type instanceof ByteType) {
-        return tinyToOrderedBytesUDF().apply(column);
-      } else if (type instanceof ShortType) {
-        return shortToOrderedBytesUDF().apply(column);
-      } else if (type instanceof IntegerType) {
-        return intToOrderedBytesUDF().apply(column);
-      } else if (type instanceof LongType) {
-        return longToOrderedBytesUDF().apply(column);
-      } else if (type instanceof FloatType) {
-        return floatToOrderedBytesUDF().apply(column);
-      } else if (type instanceof DoubleType) {
-        return doubleToOrderedBytesUDF().apply(column);
-      } else if (type instanceof StringType) {
-        return stringToOrderedBytesUDF().apply(column);
-      } else if (type instanceof BinaryType) {
-        return stringToOrderedBytesUDF().apply(column);
-      } else if (type instanceof BooleanType) {
-        return column.cast(DataTypes.BinaryType);
-      } else if (type instanceof TimestampType) {
-        return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
-      } else if (type instanceof DateType) {
-        return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
-      } else {
-        throw new IllegalArgumentException(
-            String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported",
-                column, type));
-      }
-    }
-  }
 }
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
new file mode 100644
index 000000000000..d0ee5c2b324c
--- /dev/null
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.iceberg.spark.actions;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.StandardCharsets;
+import org.apache.iceberg.util.ZOrderByteUtils;
+import org.apache.spark.sql.Column;
+import org.apache.spark.sql.expressions.UserDefinedFunction;
+import org.apache.spark.sql.functions;
+import org.apache.spark.sql.types.BinaryType;
+import org.apache.spark.sql.types.BooleanType;
+import org.apache.spark.sql.types.ByteType;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.DoubleType;
+import org.apache.spark.sql.types.FloatType;
+import org.apache.spark.sql.types.IntegerType;
+import org.apache.spark.sql.types.LongType;
+import org.apache.spark.sql.types.ShortType;
+import org.apache.spark.sql.types.StringType;
+import org.apache.spark.sql.types.TimestampType;
+import scala.collection.Seq;
+
+class SparkZOrder implements Serializable {
+    private final int STRING_KEY_LENGTH = 128;
+
+    private final byte[] TINY_EMPTY = new byte[Byte.BYTES];
+    private final byte[] SHORT_EMPTY = new byte[Short.BYTES];
+    private final byte[] INT_EMPTY = new byte[Integer.BYTES];
+    private final byte[] LONG_EMPTY = new byte[Long.BYTES];
+    private final byte[] FLOAT_EMPTY = new byte[Float.BYTES];
+    private final byte[] DOUBLE_EMPTY = new byte[Double.BYTES];
+
+    transient private ThreadLocal<ByteBuffer> outputBuffer;
+    transient private ThreadLocal<byte[][]> inputHolder;
+    transient private ThreadLocal<ByteBuffer>[] inputBuffers;
+    transient private ThreadLocal<CharsetEncoder> encoder;
+
+    private final int numCols;
+
+    private int inputCol = 0;
+    private int totalBytes = 0;
+
+    SparkZOrder(int numCols) {
+        this.numCols = numCols;
+    }
+
+    private void readObject(java.io.ObjectInputStream in)
+            throws IOException, ClassNotFoundException {
+        in.defaultReadObject();
+        inputBuffers = new ThreadLocal[numCols];
+        inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]);
+        encoder = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8.newEncoder());
+    }
+
+
+    private ByteBuffer outputBuffer(int size) {
+        if (outputBuffer == null) {
+            // May over allocate on concurrent calls
+            outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size));
+        }
+        return outputBuffer.get();
+    }
+
+    private ByteBuffer inputBuffer(int position, int size){
+        if (inputBuffers[position] == null) {
+            // May over allocate on concurrent calls
+            inputBuffers[position] = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size));
+        }
+        return inputBuffers[position].get();
+    }
+
+    byte[] interleaveBits(Seq<byte[]> scalaBinary) {
+        byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary)
+                .toArray(inputHolder.get());
+        return ZOrderByteUtils.interleaveBits(columnsBinary, totalBytes, outputBuffer(totalBytes));
+    }
+
+    private UserDefinedFunction tinyToOrderedBytesUDF() {
+        int position = inputCol;
+        UserDefinedFunction udf = functions.udf((Byte value) -> {
+            if (value == null) {
+                return TINY_EMPTY;
+            }
+            return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, Byte.BYTES)).array();
+        }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES");
+
+        this.inputCol++;
+        this.totalBytes+= Byte.BYTES;
+
+        return udf;
+    }
+
+    private UserDefinedFunction shortToOrderedBytesUDF() {
+        int position = inputCol;
+        UserDefinedFunction udf = functions.udf((Short value) -> {
+                    if (value == null) {
+                        return SHORT_EMPTY;
+                    }
+                    return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, Short.BYTES)).array();
+                }, DataTypes.BinaryType)
+                .withName("SHORT_ORDERED_BYTES");
+
+        this.inputCol++;
+        this.totalBytes+= Short.BYTES;
+
+        return udf;
+    }
+
+    private UserDefinedFunction intToOrderedBytesUDF() {
+        int position = inputCol;
+        UserDefinedFunction udf = functions.udf((Integer value) -> {
+                    if (value == null) {
+                        return INT_EMPTY;
+                    }
+                    return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, Integer.BYTES)).array();
+                }, DataTypes.BinaryType)
+                .withName("INT_ORDERED_BYTES");
+
+        this.inputCol++;
+        this.totalBytes += Integer.BYTES;
+
+        return udf;
+    }
+
+    private UserDefinedFunction longToOrderedBytesUDF() {
+        int position = inputCol;
+        UserDefinedFunction udf = functions.udf((Long value) -> {
+                    if (value == null) {
+                        return LONG_EMPTY;
+                    }
+                    return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, Long.BYTES)).array();
+                }, DataTypes.BinaryType)
+                .withName("LONG_ORDERED_BYTES");
+
+        this.inputCol++;
+        this.totalBytes += Long.BYTES;
+
+        return udf;
+    }
+
+    private UserDefinedFunction floatToOrderedBytesUDF() {
+        int position = inputCol;
+        UserDefinedFunction  udf = functions.udf((Float value) -> {
+                    if (value == null) {
+                        return FLOAT_EMPTY;
+                    }
+                    return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, Float.BYTES)).array();
+                }, DataTypes.BinaryType)
+                .withName("FLOAT_ORDERED_BYTES");
+
+        this.inputCol++;
+        this.totalBytes += Float.BYTES;
+
+        return udf;
+    }
+
+    private UserDefinedFunction doubleToOrderedBytesUDF() {
+        int position = inputCol;
+        UserDefinedFunction udf = functions.udf((Double value) -> {
+                    if (value == null) {
+                        return DOUBLE_EMPTY;
+                    }
+                    return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, Double.BYTES)).array();
+                }, DataTypes.BinaryType)
+                .withName("FLOAT_ORDERED_BYTES");
+
+        this.inputCol++;
+        this.totalBytes += Double.BYTES;
+
+        return udf;
+    }
+
+    private UserDefinedFunction stringToOrderedBytesUDF() {
+        int position = inputCol;
+        UserDefinedFunction udf = functions.udf((String value) ->
+                ZOrderByteUtils.stringToOrderedBytes(value, STRING_KEY_LENGTH, inputBuffer(position, STRING_KEY_LENGTH),
+                        encoder.get()).array(), DataTypes.BinaryType).withName("STRING-LEXICAL-BYTES");
+
+        this.inputCol++;
+        this.totalBytes += STRING_KEY_LENGTH;
+
+        return udf;
+    }
+
+    private final UserDefinedFunction INTERLEAVE_UDF =
+            functions.udf((Seq<byte[]> arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType)
+                    .withName("INTERLEAVE_BYTES");
+
+    Column interleaveBytes(Column arrayBinary) {
+                                             return INTERLEAVE_UDF.apply(arrayBinary);
+                                                                                      }
+
+    @SuppressWarnings("checkstyle:CyclomaticComplexity")
+    Column sortedLexicographically(Column column, DataType type) {
+        if (type instanceof ByteType) {
+            return tinyToOrderedBytesUDF().apply(column);
+        } else if (type instanceof ShortType) {
+            return shortToOrderedBytesUDF().apply(column);
+        } else if (type instanceof IntegerType) {
+            return intToOrderedBytesUDF().apply(column);
+        } else if (type instanceof LongType) {
+            return longToOrderedBytesUDF().apply(column);
+        } else if (type instanceof FloatType) {
+            return floatToOrderedBytesUDF().apply(column);
+        } else if (type instanceof DoubleType) {
+            return doubleToOrderedBytesUDF().apply(column);
+        } else if (type instanceof StringType) {
+            return stringToOrderedBytesUDF().apply(column);
+        } else if (type instanceof BinaryType) {
+            return stringToOrderedBytesUDF().apply(column);
+        } else if (type instanceof BooleanType) {
+            return column.cast(DataTypes.BinaryType);
+        } else if (type instanceof TimestampType) {
+            return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
+        } else if (type instanceof DateType) {
+            return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
+        } else {
+            throw new IllegalArgumentException(
+                    String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported",
+                            column, type));
+        }
+    }
+}
\ No newline at end of file

From a50b496c79d58da8957c4d8148c06b7388203d95 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 15 Mar 2022 21:57:29 -0500
Subject: [PATCH 18/30] WIP Reviewer Comments, Cleanup

Change all primtives to use 8 byte buffers, Types now aligned based on magnitude
Perf test still WIP, using new Random generating udfs
---
 .../apache/iceberg/util/ZOrderByteUtils.java  |  38 +-
 .../iceberg/util/TestZOrderByteUtil.java      |  24 +-
 .../IcebergSortCompactionBenchmark.java       |  25 +-
 .../spark/action/RandomGeneratingUDF.java     |  42 ++
 .../spark/actions/Spark3ZOrderStrategy.java   |  30 +-
 .../spark/actions/Spark3ZOrderUDF.java        | 396 +++++++++---------
 6 files changed, 293 insertions(+), 262 deletions(-)
 create mode 100644 spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 39ef0dcc14d3..44141e777295 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -19,7 +19,6 @@
 
 package org.apache.iceberg.util;
 
-import java.io.Serializable;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.CharsetEncoder;
@@ -34,6 +33,8 @@
  * that are lexicographically ordered. Bytes produced should be compared lexicographically as
  * unsigned bytes, big-endian.
  * <p>
+ * All types except for String are stored within an 8 Byte Buffer
+ * <p>
  * Most of these techniques are derived from
  * https://aws.amazon.com/blogs/database/z-order-indexing-for-multifaceted-queries-in-amazon-dynamodb-part-2/
  * <p>
@@ -42,18 +43,23 @@
  */
 public class ZOrderByteUtils {
 
+  public static final int BUFFER_SIZE = 8;
+
   private ZOrderByteUtils() {
 
   }
 
+  static ByteBuffer allocatePrimitiveBuffer() {
+    return ByteBuffer.allocate(BUFFER_SIZE);
+  }
   /**
    * Signed ints do not have their bytes in magnitude order because of the sign bit.
    * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially
    * shifts the 0 value so that we don't break our ordering when we cross the new 0 value.
    */
   public static ByteBuffer intToOrderedBytes(int val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, Integer.BYTES);
-    bytes.putInt(val ^ 0x80000000);
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
+    bytes.putLong(((long) val) ^ 0x8000000000000000L);
     return bytes;
   }
 
@@ -61,7 +67,7 @@ public static ByteBuffer intToOrderedBytes(int val, ByteBuffer reuse) {
    * Signed longs are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
   public static ByteBuffer longToOrderedBytes(long val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, Long.BYTES);
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
     bytes.putLong(val ^ 0x8000000000000000L);
     return bytes;
   }
@@ -70,8 +76,8 @@ public static ByteBuffer longToOrderedBytes(long val, ByteBuffer reuse) {
    * Signed shorts are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
   public static ByteBuffer shortToOrderedBytes(short val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, Short.BYTES);
-    bytes.putShort((short) (val ^ (0x8000)));
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
+    bytes.putLong(((long) val) ^ 0x8000000000000000L);
     return bytes;
   }
 
@@ -79,8 +85,8 @@ public static ByteBuffer shortToOrderedBytes(short val, ByteBuffer reuse) {
    * Signed tiny ints are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
   public static ByteBuffer tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, Byte.BYTES);
-    bytes.put((byte) (val ^ (0x80)));
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
+    bytes.putLong(((long) val) ^ 0x8000000000000000L);
     return bytes;
   }
 
@@ -93,10 +99,10 @@ public static ByteBuffer tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
    * comparable bytes
    */
   public static ByteBuffer floatToOrderedBytes(float val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, Float.BYTES);
-    int ival = Float.floatToIntBits(val);
-    ival ^= ((ival >> (Integer.SIZE - 1)) | Integer.MIN_VALUE);
-    bytes.putInt(ival);
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
+    long lval = Double.doubleToLongBits(val);
+    lval ^= ((lval >> (Integer.SIZE - 1)) | Long.MIN_VALUE);
+    bytes.putLong(lval);
     return bytes;
   }
 
@@ -104,10 +110,10 @@ public static ByteBuffer floatToOrderedBytes(float val, ByteBuffer reuse) {
    * Doubles are treated the same as floats in {@link #floatToOrderedBytes(float, ByteBuffer)}
    */
   public static ByteBuffer doubleToOrderedBytes(double val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, Double.BYTES);
-    long lng = Double.doubleToLongBits(val);
-    lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE);
-    bytes.putLong(lng);
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
+    long lval = Double.doubleToLongBits(val);
+    lval ^= ((lval >> (Integer.SIZE - 1)) | Long.MIN_VALUE);
+    bytes.putLong(lval);
     return bytes;
   }
 
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index bf84319d0d45..858200c370b4 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -145,8 +145,8 @@ public void testInterleaveMixedBits() {
 
   @Test
   public void testIntOrdering() {
-    ByteBuffer aBuffer = ByteBuffer.allocate(Integer.BYTES);
-    ByteBuffer bBuffer = ByteBuffer.allocate(Integer.BYTES);
+    ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
+    ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
     for (int i = 0; i < NUM_TESTS; i++) {
       int aInt = random.nextInt();
       int bInt = random.nextInt();
@@ -164,8 +164,8 @@ public void testIntOrdering() {
 
   @Test
   public void testLongOrdering() {
-    ByteBuffer aBuffer = ByteBuffer.allocate(Long.BYTES);
-    ByteBuffer bBuffer = ByteBuffer.allocate(Long.BYTES);
+    ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
+    ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
     for (int i = 0; i < NUM_TESTS; i++) {
       long aLong = random.nextInt();
       long bLong = random.nextInt();
@@ -183,8 +183,8 @@ public void testLongOrdering() {
 
   @Test
   public void testShortOrdering() {
-    ByteBuffer aBuffer = ByteBuffer.allocate(Short.BYTES);
-    ByteBuffer bBuffer = ByteBuffer.allocate(Short.BYTES);
+    ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
+    ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
     for (int i = 0; i < NUM_TESTS; i++) {
       short aShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
       short bShort = (short) (random.nextInt() % (Short.MAX_VALUE + 1));
@@ -202,8 +202,8 @@ public void testShortOrdering() {
 
   @Test
   public void testTinyOrdering() {
-    ByteBuffer aBuffer = ByteBuffer.allocate(Byte.BYTES);
-    ByteBuffer bBuffer = ByteBuffer.allocate(Byte.BYTES);
+    ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
+    ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
     for (int i = 0; i < NUM_TESTS; i++) {
       byte aByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
       byte bByte = (byte) (random.nextInt() % (Byte.MAX_VALUE + 1));
@@ -221,8 +221,8 @@ public void testTinyOrdering() {
 
   @Test
   public void testFloatOrdering() {
-    ByteBuffer aBuffer = ByteBuffer.allocate(Float.BYTES);
-    ByteBuffer bBuffer = ByteBuffer.allocate(Float.BYTES);
+    ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
+    ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
     for (int i = 0; i < NUM_TESTS; i++) {
       float aFloat = random.nextFloat();
       float bFloat = random.nextFloat();
@@ -240,8 +240,8 @@ public void testFloatOrdering() {
 
   @Test
   public void testDoubleOrdering() {
-    ByteBuffer aBuffer = ByteBuffer.allocate(Double.BYTES);
-    ByteBuffer bBuffer = ByteBuffer.allocate(Double.BYTES);
+    ByteBuffer aBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
+    ByteBuffer bBuffer = ZOrderByteUtils.allocatePrimitiveBuffer();
     for (int i = 0; i < NUM_TESTS; i++) {
       double aDouble = random.nextDouble();
       double bDouble = random.nextDouble();
diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
index 69b21a5a89ef..4cb70960cc0b 100644
--- a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
+++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
@@ -42,6 +42,7 @@
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.sql.connector.catalog.Identifier;
 import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.types.DataTypes;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
 import org.openjdk.jmh.annotations.Fork;
@@ -74,6 +75,7 @@ public class IcebergSortCompactionBenchmark {
   private static final Identifier IDENT = Identifier.of(NAMESPACE, NAME);
   private static final int NUM_FILES = 8;
   private static final long NUM_ROWS = 10000000L;
+  private static final long UNIQUE_VALUES = NUM_ROWS / 10;
 
 
   private final Configuration hadoopConf = initHadoopConf();
@@ -292,16 +294,23 @@ protected final void initTable() {
 
   private void appendData() {
     Dataset<Row> df = spark().range(0, NUM_ROWS * NUM_FILES, 1, NUM_FILES)
-        .withColumnRenamed("id", "longCol")
-        .withColumn("intCol", expr("CAST(longCol AS INT)"))
-        .withColumn("intCol2", expr("CAST(longCol AS INT)"))
-        .withColumn("intCol3", expr("CAST(longCol AS INT)"))
-        .withColumn("intCol4", expr("CAST(longCol AS INT)"))
-        .withColumn("floatCol", expr("CAST(longCol AS FLOAT)"))
-        .withColumn("doubleCol", expr("CAST(longCol AS DOUBLE)"))
+        .drop("id")
+        .withColumn("longCol", new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply())
+        .withColumn("intCol",
+            new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType))
+        .withColumn("intCol2",
+            new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType))
+        .withColumn("intCol3",
+            new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType))
+        .withColumn("intCol4",
+            new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType))
+        .withColumn("floatCol",
+            new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.FloatType))
+        .withColumn("doubleCol",
+            new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.DoubleType))
         .withColumn("dateCol", date_add(current_date(), col("intCol").mod(NUM_FILES)))
         .withColumn("timestampCol", expr("TO_TIMESTAMP(dateCol)"))
-        .withColumn("stringCol", expr("CAST(dateCol AS STRING)"));
+        .withColumn("stringCol", new RandomGeneratingUDF(UNIQUE_VALUES).randomString().apply());
     writeData(df);
   }
 
diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java
new file mode 100644
index 000000000000..5cb6a350c7c7
--- /dev/null
+++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.iceberg.spark.action;
+
+import java.io.Serializable;
+import java.util.Random;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.RandomUtil;
+import org.apache.spark.sql.expressions.UserDefinedFunction;
+import org.apache.spark.sql.types.DataTypes;
+
+import static org.apache.spark.sql.functions.udf;
+
+class RandomGeneratingUDF implements Serializable {
+  private final long uniqueValues;
+  private Random rand = new Random();
+
+  RandomGeneratingUDF(long uniqueValues) {
+    this.uniqueValues = uniqueValues;
+  }
+
+  UserDefinedFunction randomLongUDF() {
+    return udf(() -> rand.nextLong() % (uniqueValues / 2), DataTypes.LongType).asNondeterministic().asNonNullable();
+  }
+
+  UserDefinedFunction randomString() {
+    return udf(() ->  (String) RandomUtil.generatePrimitive(Types.StringType.get(), rand), DataTypes.StringType)
+        .asNondeterministic().asNonNullable();
+  }
+}
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
index 4c16349ea9dc..71a0274d5a08 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
@@ -19,13 +19,8 @@
 
 package org.apache.iceberg.spark.actions;
 
-import java.io.IOException;
-import java.io.Serializable;
 import java.nio.ByteBuffer;
-import java.nio.charset.CharsetEncoder;
-import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 import java.util.UUID;
@@ -47,8 +42,6 @@
 import org.apache.iceberg.types.Types;
 import org.apache.iceberg.types.Types.NestedField;
 import org.apache.iceberg.util.SortOrderUtil;
-import org.apache.iceberg.util.ZOrderByteUtils;
-import org.apache.spark.api.java.function.MapPartitionsFunction;
 import org.apache.spark.sql.Column;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
@@ -57,25 +50,9 @@
 import org.apache.spark.sql.connector.distributions.Distribution;
 import org.apache.spark.sql.connector.distributions.Distributions;
 import org.apache.spark.sql.connector.expressions.SortOrder;
-import org.apache.spark.sql.expressions.UserDefinedFunction;
 import org.apache.spark.sql.functions;
 import org.apache.spark.sql.internal.SQLConf;
-import org.apache.spark.sql.types.BinaryType;
-import org.apache.spark.sql.types.BooleanType;
-import org.apache.spark.sql.types.ByteType;
-import org.apache.spark.sql.types.DataType;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.DateType;
-import org.apache.spark.sql.types.DoubleType;
-import org.apache.spark.sql.types.FloatType;
-import org.apache.spark.sql.types.IntegerType;
-import org.apache.spark.sql.types.LongType;
-import org.apache.spark.sql.types.ShortType;
-import org.apache.spark.sql.types.StringType;
 import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.TimestampType;
-import org.sparkproject.jetty.server.Authentication;
-import scala.collection.Seq;
 
 public class Spark3ZOrderStrategy extends Spark3SortStrategy {
 
@@ -84,13 +61,12 @@ public class Spark3ZOrderStrategy extends Spark3SortStrategy {
   private static final org.apache.iceberg.SortOrder Z_SORT_ORDER = org.apache.iceberg.SortOrder.builderFor(Z_SCHEMA)
       .sortBy(Z_COLUMN, SortDirection.ASC, NullOrder.NULLS_LAST)
       .build();
-  private static final int STRING_KEY_LENGTH = 128;
 
   private final List<String> zOrderColNames;
   private transient FileScanTaskSetManager manager = FileScanTaskSetManager.get();
   private transient FileRewriteCoordinator rewriteCoordinator = FileRewriteCoordinator.get();
 
-  private final SparkZOrder orderHelper;
+  private final Spark3ZOrderUDF orderHelper;
 
   public Spark3ZOrderStrategy(Table table, SparkSession spark, List<String> zOrderColNames) {
     super(table, spark);
@@ -107,7 +83,7 @@ public Spark3ZOrderStrategy(Table table, SparkSession spark, List<String> zOrder
             "ZOrdering requested on %s",
         partZOrderCols);
 
-    this.orderHelper = new SparkZOrder(zOrderColNames.size());
+    this.orderHelper = new Spark3ZOrderUDF(zOrderColNames.size());
 
     this.zOrderColNames = zOrderColNames;
   }
@@ -119,7 +95,7 @@ public String name() {
 
   @Override
   protected void validateOptions() {
-    // TODO implement Zorder Strategy in API Module
+    // TODO implement ZOrder Strategy in API Module
     return;
   }
 
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
index d0ee5c2b324c..839780fef677 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
@@ -1,15 +1,20 @@
 /*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 package org.apache.iceberg.spark.actions;
@@ -38,204 +43,197 @@
 import org.apache.spark.sql.types.TimestampType;
 import scala.collection.Seq;
 
-class SparkZOrder implements Serializable {
-    private final int STRING_KEY_LENGTH = 128;
+class Spark3ZOrderUDF implements Serializable {
+  private static final int STRING_KEY_LENGTH = 16;
 
-    private final byte[] TINY_EMPTY = new byte[Byte.BYTES];
-    private final byte[] SHORT_EMPTY = new byte[Short.BYTES];
-    private final byte[] INT_EMPTY = new byte[Integer.BYTES];
-    private final byte[] LONG_EMPTY = new byte[Long.BYTES];
-    private final byte[] FLOAT_EMPTY = new byte[Float.BYTES];
-    private final byte[] DOUBLE_EMPTY = new byte[Double.BYTES];
+  private static final byte[] PRIMITIVE_EMPTY = new byte[ZOrderByteUtils.BUFFER_SIZE];
 
-    transient private ThreadLocal<ByteBuffer> outputBuffer;
-    transient private ThreadLocal<byte[][]> inputHolder;
-    transient private ThreadLocal<ByteBuffer>[] inputBuffers;
-    transient private ThreadLocal<CharsetEncoder> encoder;
+  private transient ThreadLocal<ByteBuffer> outputBuffer;
+  private transient ThreadLocal<byte[][]> inputHolder;
+  private transient ThreadLocal<ByteBuffer>[] inputBuffers;
+  private transient ThreadLocal<CharsetEncoder> encoder;
 
-    private final int numCols;
+  private final int numCols;
 
-    private int inputCol = 0;
-    private int totalBytes = 0;
+  private int inputCol = 0;
+  private int totalBytes = 0;
 
-    SparkZOrder(int numCols) {
-        this.numCols = numCols;
-    }
-
-    private void readObject(java.io.ObjectInputStream in)
-            throws IOException, ClassNotFoundException {
-        in.defaultReadObject();
-        inputBuffers = new ThreadLocal[numCols];
-        inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]);
-        encoder = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8.newEncoder());
-    }
-
-
-    private ByteBuffer outputBuffer(int size) {
-        if (outputBuffer == null) {
-            // May over allocate on concurrent calls
-            outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size));
-        }
-        return outputBuffer.get();
-    }
-
-    private ByteBuffer inputBuffer(int position, int size){
-        if (inputBuffers[position] == null) {
-            // May over allocate on concurrent calls
-            inputBuffers[position] = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size));
-        }
-        return inputBuffers[position].get();
-    }
-
-    byte[] interleaveBits(Seq<byte[]> scalaBinary) {
-        byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary)
-                .toArray(inputHolder.get());
-        return ZOrderByteUtils.interleaveBits(columnsBinary, totalBytes, outputBuffer(totalBytes));
-    }
-
-    private UserDefinedFunction tinyToOrderedBytesUDF() {
-        int position = inputCol;
-        UserDefinedFunction udf = functions.udf((Byte value) -> {
-            if (value == null) {
-                return TINY_EMPTY;
-            }
-            return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, Byte.BYTES)).array();
-        }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES");
-
-        this.inputCol++;
-        this.totalBytes+= Byte.BYTES;
-
-        return udf;
-    }
-
-    private UserDefinedFunction shortToOrderedBytesUDF() {
-        int position = inputCol;
-        UserDefinedFunction udf = functions.udf((Short value) -> {
-                    if (value == null) {
-                        return SHORT_EMPTY;
-                    }
-                    return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, Short.BYTES)).array();
-                }, DataTypes.BinaryType)
-                .withName("SHORT_ORDERED_BYTES");
-
-        this.inputCol++;
-        this.totalBytes+= Short.BYTES;
-
-        return udf;
-    }
-
-    private UserDefinedFunction intToOrderedBytesUDF() {
-        int position = inputCol;
-        UserDefinedFunction udf = functions.udf((Integer value) -> {
-                    if (value == null) {
-                        return INT_EMPTY;
-                    }
-                    return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, Integer.BYTES)).array();
-                }, DataTypes.BinaryType)
-                .withName("INT_ORDERED_BYTES");
+  Spark3ZOrderUDF(int numCols) {
+    this.numCols = numCols;
+  }
 
-        this.inputCol++;
-        this.totalBytes += Integer.BYTES;
+  private void readObject(java.io.ObjectInputStream in)
+      throws IOException, ClassNotFoundException {
+    in.defaultReadObject();
+    inputBuffers = new ThreadLocal[numCols];
+    inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]);
+    encoder = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8.newEncoder());
+  }
 
-        return udf;
+  private ByteBuffer outputBuffer(int size) {
+    if (outputBuffer == null) {
+      // May over allocate on concurrent calls
+      outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size));
     }
+    return outputBuffer.get();
+  }
 
-    private UserDefinedFunction longToOrderedBytesUDF() {
-        int position = inputCol;
-        UserDefinedFunction udf = functions.udf((Long value) -> {
-                    if (value == null) {
-                        return LONG_EMPTY;
-                    }
-                    return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, Long.BYTES)).array();
-                }, DataTypes.BinaryType)
-                .withName("LONG_ORDERED_BYTES");
-
-        this.inputCol++;
-        this.totalBytes += Long.BYTES;
-
-        return udf;
+  private ByteBuffer inputBuffer(int position, int size) {
+    if (inputBuffers[position] == null) {
+      // May over allocate on concurrent calls
+      inputBuffers[position] = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size));
     }
-
-    private UserDefinedFunction floatToOrderedBytesUDF() {
-        int position = inputCol;
-        UserDefinedFunction  udf = functions.udf((Float value) -> {
-                    if (value == null) {
-                        return FLOAT_EMPTY;
-                    }
-                    return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, Float.BYTES)).array();
-                }, DataTypes.BinaryType)
-                .withName("FLOAT_ORDERED_BYTES");
-
-        this.inputCol++;
-        this.totalBytes += Float.BYTES;
-
-        return udf;
-    }
-
-    private UserDefinedFunction doubleToOrderedBytesUDF() {
-        int position = inputCol;
-        UserDefinedFunction udf = functions.udf((Double value) -> {
-                    if (value == null) {
-                        return DOUBLE_EMPTY;
-                    }
-                    return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, Double.BYTES)).array();
-                }, DataTypes.BinaryType)
-                .withName("FLOAT_ORDERED_BYTES");
-
-        this.inputCol++;
-        this.totalBytes += Double.BYTES;
-
-        return udf;
-    }
-
-    private UserDefinedFunction stringToOrderedBytesUDF() {
-        int position = inputCol;
-        UserDefinedFunction udf = functions.udf((String value) ->
-                ZOrderByteUtils.stringToOrderedBytes(value, STRING_KEY_LENGTH, inputBuffer(position, STRING_KEY_LENGTH),
-                        encoder.get()).array(), DataTypes.BinaryType).withName("STRING-LEXICAL-BYTES");
-
-        this.inputCol++;
-        this.totalBytes += STRING_KEY_LENGTH;
-
-        return udf;
-    }
-
-    private final UserDefinedFunction INTERLEAVE_UDF =
-            functions.udf((Seq<byte[]> arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType)
-                    .withName("INTERLEAVE_BYTES");
-
-    Column interleaveBytes(Column arrayBinary) {
-                                             return INTERLEAVE_UDF.apply(arrayBinary);
-                                                                                      }
-
-    @SuppressWarnings("checkstyle:CyclomaticComplexity")
-    Column sortedLexicographically(Column column, DataType type) {
-        if (type instanceof ByteType) {
-            return tinyToOrderedBytesUDF().apply(column);
-        } else if (type instanceof ShortType) {
-            return shortToOrderedBytesUDF().apply(column);
-        } else if (type instanceof IntegerType) {
-            return intToOrderedBytesUDF().apply(column);
-        } else if (type instanceof LongType) {
-            return longToOrderedBytesUDF().apply(column);
-        } else if (type instanceof FloatType) {
-            return floatToOrderedBytesUDF().apply(column);
-        } else if (type instanceof DoubleType) {
-            return doubleToOrderedBytesUDF().apply(column);
-        } else if (type instanceof StringType) {
-            return stringToOrderedBytesUDF().apply(column);
-        } else if (type instanceof BinaryType) {
-            return stringToOrderedBytesUDF().apply(column);
-        } else if (type instanceof BooleanType) {
-            return column.cast(DataTypes.BinaryType);
-        } else if (type instanceof TimestampType) {
-            return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
-        } else if (type instanceof DateType) {
-            return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
-        } else {
-            throw new IllegalArgumentException(
-                    String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported",
-                            column, type));
-        }
+    return inputBuffers[position].get();
+  }
+
+  byte[] interleaveBits(Seq<byte[]> scalaBinary) {
+    byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary)
+        .toArray(inputHolder.get());
+    return ZOrderByteUtils.interleaveBits(columnsBinary, totalBytes, outputBuffer(totalBytes));
+  }
+
+  private UserDefinedFunction tinyToOrderedBytesUDF() {
+    int position = inputCol;
+    UserDefinedFunction udf = functions.udf((Byte value) -> {
+      if (value == null) {
+        return PRIMITIVE_EMPTY;
+      }
+      return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, Byte.BYTES)).array();
+    }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES");
+
+    this.inputCol++;
+    this.totalBytes += Byte.BYTES;
+
+    return udf;
+  }
+
+  private UserDefinedFunction shortToOrderedBytesUDF() {
+    int position = inputCol;
+    UserDefinedFunction udf = functions.udf((Short value) -> {
+      if (value == null) {
+        return PRIMITIVE_EMPTY;
+      }
+      return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, Short.BYTES)).array();
+    }, DataTypes.BinaryType).withName("SHORT_ORDERED_BYTES");
+
+    this.inputCol++;
+    this.totalBytes += Short.BYTES;
+
+    return udf;
+  }
+
+  private UserDefinedFunction intToOrderedBytesUDF() {
+    int position = inputCol;
+    UserDefinedFunction udf = functions.udf((Integer value) -> {
+      if (value == null) {
+        return PRIMITIVE_EMPTY;
+      }
+      return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, Integer.BYTES)).array();
+    }, DataTypes.BinaryType).withName("INT_ORDERED_BYTES");
+
+    this.inputCol++;
+    this.totalBytes += Integer.BYTES;
+
+    return udf;
+  }
+
+  private UserDefinedFunction longToOrderedBytesUDF() {
+    int position = inputCol;
+    UserDefinedFunction udf = functions.udf((Long value) -> {
+      if (value == null) {
+        return PRIMITIVE_EMPTY;
+      }
+      return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, Long.BYTES)).array();
+    }, DataTypes.BinaryType).withName("LONG_ORDERED_BYTES");
+
+    this.inputCol++;
+    this.totalBytes += Long.BYTES;
+
+    return udf;
+  }
+
+  private UserDefinedFunction floatToOrderedBytesUDF() {
+    int position = inputCol;
+    UserDefinedFunction udf = functions.udf((Float value) -> {
+      if (value == null) {
+        return PRIMITIVE_EMPTY;
+      }
+      return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, Float.BYTES)).array();
+    }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES");
+
+    this.inputCol++;
+    this.totalBytes += Float.BYTES;
+
+    return udf;
+  }
+
+  private UserDefinedFunction doubleToOrderedBytesUDF() {
+    int position = inputCol;
+    UserDefinedFunction udf = functions.udf((Double value) -> {
+      if (value == null) {
+        return PRIMITIVE_EMPTY;
+      }
+      return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, Double.BYTES)).array();
+    }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES");
+
+    this.inputCol++;
+    this.totalBytes += Double.BYTES;
+
+    return udf;
+  }
+
+  private UserDefinedFunction stringToOrderedBytesUDF() {
+    int position = inputCol;
+    UserDefinedFunction udf = functions.udf((String value) ->
+        ZOrderByteUtils.stringToOrderedBytes(
+            value,
+            STRING_KEY_LENGTH,
+            inputBuffer(position, STRING_KEY_LENGTH),
+            encoder.get()).array(), DataTypes.BinaryType)
+          .withName("STRING-LEXICAL-BYTES");
+
+    this.inputCol++;
+    this.totalBytes += STRING_KEY_LENGTH;
+
+    return udf;
+  }
+
+  private final UserDefinedFunction interleaveUDF =
+      functions.udf((Seq<byte[]> arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType)
+          .withName("INTERLEAVE_BYTES");
+
+  Column interleaveBytes(Column arrayBinary) {
+    return interleaveUDF.apply(arrayBinary);
+  }
+
+  @SuppressWarnings("checkstyle:CyclomaticComplexity")
+  Column sortedLexicographically(Column column, DataType type) {
+    if (type instanceof ByteType) {
+      return tinyToOrderedBytesUDF().apply(column);
+    } else if (type instanceof ShortType) {
+      return shortToOrderedBytesUDF().apply(column);
+    } else if (type instanceof IntegerType) {
+      return intToOrderedBytesUDF().apply(column);
+    } else if (type instanceof LongType) {
+      return longToOrderedBytesUDF().apply(column);
+    } else if (type instanceof FloatType) {
+      return floatToOrderedBytesUDF().apply(column);
+    } else if (type instanceof DoubleType) {
+      return doubleToOrderedBytesUDF().apply(column);
+    } else if (type instanceof StringType) {
+      return stringToOrderedBytesUDF().apply(column);
+    } else if (type instanceof BinaryType) {
+      return stringToOrderedBytesUDF().apply(column);
+    } else if (type instanceof BooleanType) {
+      return column.cast(DataTypes.BinaryType);
+    } else if (type instanceof TimestampType) {
+      return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
+    } else if (type instanceof DateType) {
+      return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
+    } else {
+      throw new IllegalArgumentException(
+          String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported",
+              column, type));
     }
-}
\ No newline at end of file
+  }
+}

From 82bfb0738bb5f21f92072a41cd1e54ded295439d Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Fri, 18 Mar 2022 10:10:19 -0500
Subject: [PATCH 19/30] Update benchmarking

---
 .../IcebergSortCompactionBenchmark.java       | 110 +++++++++++-------
 .../spark/actions/Spark3ZOrderUDF.java        |  14 +--
 2 files changed, 72 insertions(+), 52 deletions(-)

diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
index 4cb70960cc0b..1fff9c35186f 100644
--- a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
+++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
@@ -34,6 +34,7 @@
 import org.apache.iceberg.spark.Spark3Util;
 import org.apache.iceberg.spark.SparkSchemaUtil;
 import org.apache.iceberg.spark.SparkSessionCatalog;
+import org.apache.iceberg.spark.actions.Spark3SortStrategy;
 import org.apache.iceberg.spark.actions.SparkActions;
 import org.apache.iceberg.types.Types;
 import org.apache.spark.sql.Dataset;
@@ -65,7 +66,7 @@
 
 @Fork(1)
 @State(Scope.Benchmark)
-@Measurement(iterations = 3)
+@Measurement(iterations = 10)
 @BenchmarkMode(Mode.SingleShotTime)
 @Timeout(time = 1000, timeUnit = TimeUnit.HOURS)
 public class IcebergSortCompactionBenchmark {
@@ -74,9 +75,8 @@ public class IcebergSortCompactionBenchmark {
   private static final String NAME = "sortbench";
   private static final Identifier IDENT = Identifier.of(NAMESPACE, NAME);
   private static final int NUM_FILES = 8;
-  private static final long NUM_ROWS = 10000000L;
-  private static final long UNIQUE_VALUES = NUM_ROWS / 10;
-
+  private static final long NUM_ROWS = 7500000L;
+  private static final long UNIQUE_VALUES = NUM_ROWS / 4;
 
   private final Configuration hadoopConf = initHadoopConf();
   private SparkSession spark;
@@ -107,6 +107,7 @@ public void cleanUpIteration() throws IOException {
   public void sortInt() {
     SparkActions.get()
         .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
         .sort(SortOrder
             .builderFor(table().schema())
             .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -118,43 +119,46 @@ public void sortInt() {
   @Threads(1)
   public void sortInt2() {
     SparkActions.get()
-            .rewriteDataFiles(table())
-            .sort(SortOrder
-                    .builderFor(table().schema())
-                    .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
-                    .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST)
-                    .build())
-            .execute();
+        .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .sort(SortOrder
+            .builderFor(table().schema())
+            .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .build())
+        .execute();
   }
 
   @Benchmark
   @Threads(1)
   public void sortInt3() {
     SparkActions.get()
-            .rewriteDataFiles(table())
-            .sort(SortOrder
-                    .builderFor(table().schema())
-                    .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
-                    .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST)
-                    .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST)
-                    .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST)
-                    .build())
-            .execute();
+        .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .sort(SortOrder
+            .builderFor(table().schema())
+            .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .build())
+        .execute();
   }
 
   @Benchmark
   @Threads(1)
   public void sortInt4() {
     SparkActions.get()
-            .rewriteDataFiles(table())
-            .sort(SortOrder
-                    .builderFor(table().schema())
-                    .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
-                    .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST)
-                    .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST)
-                    .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST)
-                    .build())
-            .execute();
+        .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .sort(SortOrder
+            .builderFor(table().schema())
+            .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .sortBy("intCol2", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .sortBy("intCol3", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .sortBy("intCol4", SortDirection.ASC, NullOrder.NULLS_FIRST)
+            .build())
+        .execute();
   }
 
   @Benchmark
@@ -162,6 +166,7 @@ public void sortInt4() {
   public void sortString() {
     SparkActions.get()
         .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
         .sort(SortOrder
             .builderFor(table().schema())
             .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -174,6 +179,7 @@ public void sortString() {
   public void sortFourColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
         .sort(SortOrder
             .builderFor(table().schema())
             .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -189,6 +195,7 @@ public void sortFourColumns() {
   public void sortSixColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
         .sort(SortOrder
             .builderFor(table().schema())
             .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -206,6 +213,7 @@ public void sortSixColumns() {
   public void zSortInt() {
     SparkActions.get()
         .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
         .zOrder("intCol")
         .execute();
   }
@@ -214,27 +222,30 @@ public void zSortInt() {
   @Threads(1)
   public void zSortInt2() {
     SparkActions.get()
-            .rewriteDataFiles(table())
-            .zOrder("intCol", "intCol2")
-            .execute();
+        .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .zOrder("intCol", "intCol2")
+        .execute();
   }
 
   @Benchmark
   @Threads(1)
   public void zSortInt3() {
     SparkActions.get()
-            .rewriteDataFiles(table())
-            .zOrder("intCol", "intCol2", "intCol3")
-            .execute();
+        .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .zOrder("intCol", "intCol2", "intCol3")
+        .execute();
   }
 
   @Benchmark
   @Threads(1)
   public void zSortInt4() {
     SparkActions.get()
-            .rewriteDataFiles(table())
-            .zOrder("intCol", "intCol2", "intCol3", "intCol4")
-            .execute();
+        .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .zOrder("intCol", "intCol2", "intCol3", "intCol4")
+        .execute();
   }
 
   @Benchmark
@@ -242,6 +253,7 @@ public void zSortInt4() {
   public void zSortString() {
     SparkActions.get()
         .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
         .zOrder("stringCol")
         .execute();
   }
@@ -251,6 +263,7 @@ public void zSortString() {
   public void zSortFourColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
         .zOrder("stringCol", "intCol", "dateCol", "doubleCol")
         .execute();
   }
@@ -260,6 +273,7 @@ public void zSortFourColumns() {
   public void zSortSixColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
+        .option(Spark3SortStrategy.REWRITE_ALL, "true")
         .zOrder("stringCol", "intCol", "dateCol", "timestampCol", "doubleCol", "longCol")
         .execute();
   }
@@ -284,7 +298,7 @@ protected final void initTable() {
     SparkSessionCatalog catalog = null;
     try {
       catalog = (SparkSessionCatalog)
-          Spark3Util.catalogAndIdentifier(spark(), "spark_catalog").catalog();
+                    Spark3Util.catalogAndIdentifier(spark(), "spark_catalog").catalog();
       catalog.dropTable(IDENT);
       catalog.createTable(IDENT, SparkSchemaUtil.convert(schema), new Transform[0], Collections.emptyMap());
     } catch (Exception e) {
@@ -296,17 +310,23 @@ private void appendData() {
     Dataset<Row> df = spark().range(0, NUM_ROWS * NUM_FILES, 1, NUM_FILES)
         .drop("id")
         .withColumn("longCol", new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply())
-        .withColumn("intCol",
+        .withColumn(
+            "intCol",
             new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType))
-        .withColumn("intCol2",
+        .withColumn(
+            "intCol2",
             new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType))
-        .withColumn("intCol3",
+        .withColumn(
+            "intCol3",
             new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType))
-        .withColumn("intCol4",
+        .withColumn(
+            "intCol4",
             new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.IntegerType))
-        .withColumn("floatCol",
+        .withColumn(
+            "floatCol",
             new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.FloatType))
-        .withColumn("doubleCol",
+        .withColumn(
+            "doubleCol",
             new RandomGeneratingUDF(UNIQUE_VALUES).randomLongUDF().apply().cast(DataTypes.DoubleType))
         .withColumn("dateCol", date_add(current_date(), col("intCol").mod(NUM_FILES)))
         .withColumn("timestampCol", expr("TO_TIMESTAMP(dateCol)"))
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
index 839780fef677..1e6eb60b2579 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
@@ -44,7 +44,7 @@
 import scala.collection.Seq;
 
 class Spark3ZOrderUDF implements Serializable {
-  private static final int STRING_KEY_LENGTH = 16;
+  private static final int STRING_KEY_LENGTH = 8;
 
   private static final byte[] PRIMITIVE_EMPTY = new byte[ZOrderByteUtils.BUFFER_SIZE];
 
@@ -98,7 +98,7 @@ private UserDefinedFunction tinyToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, Byte.BYTES)).array();
+      return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
     }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES");
 
     this.inputCol++;
@@ -113,7 +113,7 @@ private UserDefinedFunction shortToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, Short.BYTES)).array();
+      return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
     }, DataTypes.BinaryType).withName("SHORT_ORDERED_BYTES");
 
     this.inputCol++;
@@ -128,7 +128,7 @@ private UserDefinedFunction intToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, Integer.BYTES)).array();
+      return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
     }, DataTypes.BinaryType).withName("INT_ORDERED_BYTES");
 
     this.inputCol++;
@@ -143,7 +143,7 @@ private UserDefinedFunction longToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, Long.BYTES)).array();
+      return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
     }, DataTypes.BinaryType).withName("LONG_ORDERED_BYTES");
 
     this.inputCol++;
@@ -158,7 +158,7 @@ private UserDefinedFunction floatToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, Float.BYTES)).array();
+      return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
     }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES");
 
     this.inputCol++;
@@ -173,7 +173,7 @@ private UserDefinedFunction doubleToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, Double.BYTES)).array();
+      return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
     }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES");
 
     this.inputCol++;

From e96b0206b069ae912008b07e4407b7c4e9f80e33 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Fri, 18 Mar 2022 14:26:11 -0500
Subject: [PATCH 20/30] Reviewer Comments

---
 .../org/apache/iceberg/util/ByteBuffers.java  |   7 +-
 core/benchmark/ZorderResult.txt               | 227 ++++++++++++++++++
 .../util/ZOrderByteUtilsBenchmark.java        | 121 ++++++++++
 .../apache/iceberg/util/ZOrderByteUtils.java  |  34 ++-
 .../iceberg/util/TestZOrderByteUtil.java      |  29 ++-
 jmh.gradle                                    |   2 +-
 .../spark/actions/Spark3ZOrderStrategy.java   |  74 ++++--
 .../spark/actions/Spark3ZOrderUDF.java        |  80 +++---
 .../spark/actions/SparkSortStrategy.java      |  10 +-
 9 files changed, 514 insertions(+), 70 deletions(-)
 create mode 100644 core/benchmark/ZorderResult.txt
 create mode 100644 core/src/jmh/java/org/apache/iceberg/util/ZOrderByteUtilsBenchmark.java

diff --git a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java
index efc05f179f82..4a5001018da6 100644
--- a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java
+++ b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java
@@ -48,9 +48,10 @@ public static byte[] toByteArray(ByteBuffer buffer) {
   }
 
   public static ByteBuffer reuse(ByteBuffer reuse, int length) {
-    Preconditions.checkArgument(reuse.hasArray() && reuse.arrayOffset() == 0 && reuse.capacity() == length,
-        "Cannot reuse buffer: Should be an array %s, should have an offset of 0 %s, should be of size %s was %s",
-        reuse.hasArray(), reuse.arrayOffset(), length, reuse.capacity());
+    Preconditions.checkArgument(reuse.hasArray(), "Cannot reuse a buffer not backed by an array");
+    Preconditions.checkArgument(reuse.arrayOffset() == 0, "Cannot reuse a buffer whose array offset is not 0");
+    Preconditions.checkArgument(reuse.capacity() == length,
+        "Canout use a buffer whose capacity (%s) is not equal to the requested length (%s)", length, reuse.capacity());
     reuse.position(0);
     reuse.limit(length);
     return reuse;
diff --git a/core/benchmark/ZorderResult.txt b/core/benchmark/ZorderResult.txt
new file mode 100644
index 000000000000..559025105ce3
--- /dev/null
+++ b/core/benchmark/ZorderResult.txt
@@ -0,0 +1,227 @@
+# JMH version: 1.32
+# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS
+# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java
+# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant
+# Blackhole mode: full + dont-inline hint
+# Warmup: <none>
+# Measurement: 5 iterations, single-shot each
+# Timeout: 1000 hr per iteration
+# Threads: 1 thread
+# Benchmark mode: Single shot invocation time
+# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns
+
+# Run progress: 0.00% complete, ETA 00:00:00
+# Fork: 1 of 1
+Iteration   1: 11.086 s/op
+Iteration   2: 10.337 s/op
+Iteration   3: 10.606 s/op
+Iteration   4: 10.897 s/op
+Iteration   5: 10.607 s/op
+
+
+Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns":
+  N = 5
+  mean =     10.706 ±(99.9%) 1.117 s/op
+
+  Histogram, s/op:
+    [10.300, 10.350) = 1 
+    [10.350, 10.400) = 0 
+    [10.400, 10.450) = 0 
+    [10.450, 10.500) = 0 
+    [10.500, 10.550) = 0 
+    [10.550, 10.600) = 0 
+    [10.600, 10.650) = 2 
+    [10.650, 10.700) = 0 
+    [10.700, 10.750) = 0 
+    [10.750, 10.800) = 0 
+    [10.800, 10.850) = 0 
+    [10.850, 10.900) = 1 
+    [10.900, 10.950) = 0 
+    [10.950, 11.000) = 0 
+    [11.000, 11.050) = 0 
+    [11.050, 11.100) = 1 
+
+  Percentiles, s/op:
+      p(0.0000) =     10.337 s/op
+     p(50.0000) =     10.607 s/op
+     p(90.0000) =     11.086 s/op
+     p(95.0000) =     11.086 s/op
+     p(99.0000) =     11.086 s/op
+     p(99.9000) =     11.086 s/op
+     p(99.9900) =     11.086 s/op
+     p(99.9990) =     11.086 s/op
+     p(99.9999) =     11.086 s/op
+    p(100.0000) =     11.086 s/op
+
+
+# JMH version: 1.32
+# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS
+# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java
+# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant
+# Blackhole mode: full + dont-inline hint
+# Warmup: <none>
+# Measurement: 5 iterations, single-shot each
+# Timeout: 1000 hr per iteration
+# Threads: 1 thread
+# Benchmark mode: Single shot invocation time
+# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput
+
+# Run progress: 25.00% complete, ETA 00:02:54
+# Fork: 1 of 1
+Iteration   1: 2.521 s/op
+Iteration   2: 2.750 s/op
+Iteration   3: 2.999 s/op
+Iteration   4: 2.972 s/op
+Iteration   5: 3.028 s/op
+
+
+Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput":
+  N = 5
+  mean =      2.854 ±(99.9%) 0.832 s/op
+
+  Histogram, s/op:
+    [2.500, 2.550) = 1 
+    [2.550, 2.600) = 0 
+    [2.600, 2.650) = 0 
+    [2.650, 2.700) = 0 
+    [2.700, 2.750) = 1 
+    [2.750, 2.800) = 0 
+    [2.800, 2.850) = 0 
+    [2.850, 2.900) = 0 
+    [2.900, 2.950) = 0 
+    [2.950, 3.000) = 2 
+    [3.000, 3.050) = 1 
+    [3.050, 3.100) = 0 
+
+  Percentiles, s/op:
+      p(0.0000) =      2.521 s/op
+     p(50.0000) =      2.972 s/op
+     p(90.0000) =      3.028 s/op
+     p(95.0000) =      3.028 s/op
+     p(99.0000) =      3.028 s/op
+     p(99.9000) =      3.028 s/op
+     p(99.9900) =      3.028 s/op
+     p(99.9990) =      3.028 s/op
+     p(99.9999) =      3.028 s/op
+    p(100.0000) =      3.028 s/op
+
+
+# JMH version: 1.32
+# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS
+# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java
+# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant
+# Blackhole mode: full + dont-inline hint
+# Warmup: <none>
+# Measurement: 5 iterations, single-shot each
+# Timeout: 1000 hr per iteration
+# Threads: 1 thread
+# Benchmark mode: Single shot invocation time
+# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns
+
+# Run progress: 50.00% complete, ETA 00:01:15
+# Fork: 1 of 1
+Iteration   1: 7.440 s/op
+Iteration   2: 7.625 s/op
+Iteration   3: 8.216 s/op
+Iteration   4: 8.314 s/op
+Iteration   5: 8.203 s/op
+
+
+Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns":
+  N = 5
+  mean =      7.960 ±(99.9%) 1.532 s/op
+
+  Histogram, s/op:
+    [7.400, 7.500) = 1 
+    [7.500, 7.600) = 0 
+    [7.600, 7.700) = 1 
+    [7.700, 7.800) = 0 
+    [7.800, 7.900) = 0 
+    [7.900, 8.000) = 0 
+    [8.000, 8.100) = 0 
+    [8.100, 8.200) = 0 
+    [8.200, 8.300) = 2 
+
+  Percentiles, s/op:
+      p(0.0000) =      7.440 s/op
+     p(50.0000) =      8.203 s/op
+     p(90.0000) =      8.314 s/op
+     p(95.0000) =      8.314 s/op
+     p(99.0000) =      8.314 s/op
+     p(99.9000) =      8.314 s/op
+     p(99.9900) =      8.314 s/op
+     p(99.9990) =      8.314 s/op
+     p(99.9999) =      8.314 s/op
+    p(100.0000) =      8.314 s/op
+
+
+# JMH version: 1.32
+# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS
+# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java
+# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant
+# Blackhole mode: full + dont-inline hint
+# Warmup: <none>
+# Measurement: 5 iterations, single-shot each
+# Timeout: 1000 hr per iteration
+# Threads: 1 thread
+# Benchmark mode: Single shot invocation time
+# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns
+
+# Run progress: 75.00% complete, ETA 00:00:39
+# Fork: 1 of 1
+Iteration   1: 5.327 s/op
+Iteration   2: 5.212 s/op
+Iteration   3: 5.963 s/op
+Iteration   4: 5.758 s/op
+Iteration   5: 5.827 s/op
+
+
+Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns":
+  N = 5
+  mean =      5.618 ±(99.9%) 1.265 s/op
+
+  Histogram, s/op:
+    [5.200, 5.250) = 1 
+    [5.250, 5.300) = 0 
+    [5.300, 5.350) = 1 
+    [5.350, 5.400) = 0 
+    [5.400, 5.450) = 0 
+    [5.450, 5.500) = 0 
+    [5.500, 5.550) = 0 
+    [5.550, 5.600) = 0 
+    [5.600, 5.650) = 0 
+    [5.650, 5.700) = 0 
+    [5.700, 5.750) = 0 
+    [5.750, 5.800) = 1 
+    [5.800, 5.850) = 1 
+    [5.850, 5.900) = 0 
+    [5.900, 5.950) = 0 
+
+  Percentiles, s/op:
+      p(0.0000) =      5.212 s/op
+     p(50.0000) =      5.758 s/op
+     p(90.0000) =      5.963 s/op
+     p(95.0000) =      5.963 s/op
+     p(99.0000) =      5.963 s/op
+     p(99.9000) =      5.963 s/op
+     p(99.9900) =      5.963 s/op
+     p(99.9990) =      5.963 s/op
+     p(99.9999) =      5.963 s/op
+    p(100.0000) =      5.963 s/op
+
+
+# Run complete. Total time: 00:02:29
+
+REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on
+why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial
+experiments, perform baseline and negative tests that provide experimental control, make sure
+the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts.
+Do not assume the numbers tell you what you want them to tell.
+
+Benchmark                                                        Mode  Cnt   Score   Error  Units
+ZOrderByteUtilsBenchmark.interleaveValuesFourColumns               ss    5  10.706 ± 1.117   s/op
+ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput    ss    5   2.854 ± 0.832   s/op
+ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns              ss    5   7.960 ± 1.532   s/op
+ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns                ss    5   5.618 ± 1.265   s/op
+
+Benchmark result is saved to /Users/russellspitzer/repos/ipr/iceberg-master/core/build/results/jmh/results.txt
diff --git a/core/src/jmh/java/org/apache/iceberg/util/ZOrderByteUtilsBenchmark.java b/core/src/jmh/java/org/apache/iceberg/util/ZOrderByteUtilsBenchmark.java
new file mode 100644
index 000000000000..77f66f12cff3
--- /dev/null
+++ b/core/src/jmh/java/org/apache/iceberg/util/ZOrderByteUtilsBenchmark.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+package org.apache.iceberg.util;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Timeout;
+import org.openjdk.jmh.infra.Blackhole;
+
+@Fork(1)
+@State(Scope.Benchmark)
+@Measurement(iterations = 5)
+@BenchmarkMode(Mode.SingleShotTime)
+@Timeout(time = 1000, timeUnit = TimeUnit.HOURS)
+public class ZOrderByteUtilsBenchmark {
+
+  private static final int NUM_ENTRIES = 10000000;
+
+  private byte[][][] fourColumnInput;
+  private byte[][][] threeColumnInput;
+  private byte[][][] twoColumnInput;
+
+  @Setup
+  public void setupBench() {
+    Random rand = new Random(42);
+    fourColumnInput = new byte[NUM_ENTRIES][][];
+    threeColumnInput = new byte[NUM_ENTRIES][][];
+    twoColumnInput = new byte[NUM_ENTRIES][][];
+    for (int i = 0; i < NUM_ENTRIES; i++) {
+      fourColumnInput[i] = new byte[4][];
+      threeColumnInput[i] = new byte[3][];
+      twoColumnInput[i] = new byte[2][];
+      for (int j = 0; j < 4; j++) {
+        byte[] value = ByteBuffer.allocate(Long.BYTES).putLong(rand.nextLong()).array();
+        if (j < 2) {
+          twoColumnInput[i][j] = value;
+        }
+        if (j < 3) {
+          threeColumnInput[i][j] = value;
+        }
+        fourColumnInput[i][j] = value;
+      }
+    }
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void interleaveValuesFourColumns(Blackhole blackhole) {
+    int outputSize = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE * 4;
+    ByteBuffer outputBuffer = ByteBuffer.allocate(outputSize);
+
+    for (int i = 0; i < fourColumnInput.length; i++) {
+      byte[] interleavedBytes = ZOrderByteUtils.interleaveBits(fourColumnInput[i], outputSize,  outputBuffer);
+      blackhole.consume(interleavedBytes);
+    }
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void interleaveValuesThreeColumns(Blackhole blackhole) {
+    int outputSize = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE * 3;
+    ByteBuffer outputBuffer = ByteBuffer.allocate(outputSize);
+
+    for (int i = 0; i < fourColumnInput.length; i++) {
+      byte[] interleavedBytes = ZOrderByteUtils.interleaveBits(threeColumnInput[i], outputSize,  outputBuffer);
+      blackhole.consume(interleavedBytes);
+    }
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void interleaveValuesTwoColumns(Blackhole blackhole) {
+    int outputSize = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE * 2;
+    ByteBuffer outputBuffer = ByteBuffer.allocate(outputSize);
+
+    for (int i = 0; i < fourColumnInput.length; i++) {
+      byte[] interleavedBytes = ZOrderByteUtils.interleaveBits(twoColumnInput[i], outputSize,  outputBuffer);
+      blackhole.consume(interleavedBytes);
+    }
+  }
+
+  @Benchmark
+  @Threads(1)
+  public void interleaveValuesFourColumns8ByteOutput(Blackhole blackhole) {
+    int outputSize = 8;
+    ByteBuffer outputBuffer = ByteBuffer.allocate(outputSize);
+
+    for (int i = 0; i < fourColumnInput.length; i++) {
+      byte[] interleavedBytes = ZOrderByteUtils.interleaveBits(fourColumnInput[i], outputSize,  outputBuffer);
+      blackhole.consume(interleavedBytes);
+    }
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 44141e777295..9b5e571862c9 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -43,22 +43,23 @@
  */
 public class ZOrderByteUtils {
 
-  public static final int BUFFER_SIZE = 8;
+  public static final int PRIMITIVE_BUFFER_SIZE = 8;
 
   private ZOrderByteUtils() {
 
   }
 
   static ByteBuffer allocatePrimitiveBuffer() {
-    return ByteBuffer.allocate(BUFFER_SIZE);
+    return ByteBuffer.allocate(PRIMITIVE_BUFFER_SIZE);
   }
+
   /**
    * Signed ints do not have their bytes in magnitude order because of the sign bit.
    * To fix this, flip the sign bit so that all negatives are ordered before positives. This essentially
    * shifts the 0 value so that we don't break our ordering when we cross the new 0 value.
    */
   public static ByteBuffer intToOrderedBytes(int val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE);
     bytes.putLong(((long) val) ^ 0x8000000000000000L);
     return bytes;
   }
@@ -67,7 +68,7 @@ public static ByteBuffer intToOrderedBytes(int val, ByteBuffer reuse) {
    * Signed longs are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
   public static ByteBuffer longToOrderedBytes(long val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE);
     bytes.putLong(val ^ 0x8000000000000000L);
     return bytes;
   }
@@ -76,7 +77,7 @@ public static ByteBuffer longToOrderedBytes(long val, ByteBuffer reuse) {
    * Signed shorts are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
   public static ByteBuffer shortToOrderedBytes(short val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE);
     bytes.putLong(((long) val) ^ 0x8000000000000000L);
     return bytes;
   }
@@ -85,7 +86,7 @@ public static ByteBuffer shortToOrderedBytes(short val, ByteBuffer reuse) {
    * Signed tiny ints are treated the same as the signed ints in {@link #intToOrderedBytes(int, ByteBuffer)}
    */
   public static ByteBuffer tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE);
     bytes.putLong(((long) val) ^ 0x8000000000000000L);
     return bytes;
   }
@@ -99,7 +100,7 @@ public static ByteBuffer tinyintToOrderedBytes(byte val, ByteBuffer reuse) {
    * comparable bytes
    */
   public static ByteBuffer floatToOrderedBytes(float val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE);
     long lval = Double.doubleToLongBits(val);
     lval ^= ((lval >> (Integer.SIZE - 1)) | Long.MIN_VALUE);
     bytes.putLong(lval);
@@ -110,7 +111,7 @@ public static ByteBuffer floatToOrderedBytes(float val, ByteBuffer reuse) {
    * Doubles are treated the same as floats in {@link #floatToOrderedBytes(float, ByteBuffer)}
    */
   public static ByteBuffer doubleToOrderedBytes(double val, ByteBuffer reuse) {
-    ByteBuffer bytes = ByteBuffers.reuse(reuse, BUFFER_SIZE);
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, PRIMITIVE_BUFFER_SIZE);
     long lval = Double.doubleToLongBits(val);
     lval ^= ((lval >> (Integer.SIZE - 1)) | Long.MIN_VALUE);
     bytes.putLong(lval);
@@ -137,14 +138,21 @@ public static ByteBuffer stringToOrderedBytes(String val, int length, ByteBuffer
   }
 
   /**
-   * For Testing interleave all available bytes
+   * Return a bytebuffer with the given bytes truncated to length, or filled with 0's to length depending on whether
+   * the given bytes are larger or smaller than the given length.
    */
-  static byte[] interleaveBits(byte[][] columnsBinary) {
-    return interleaveBits(columnsBinary,
-        Arrays.stream(columnsBinary).mapToInt(column -> column.length).sum());
+  public static ByteBuffer byteTruncateOrFill(byte[] val, int length, ByteBuffer reuse) {
+    ByteBuffer bytes = ByteBuffers.reuse(reuse, length);
+    if (val.length < length) {
+      bytes.put(val, 0, val.length);
+      Arrays.fill(bytes.array(), val.length, length, (byte) 0x00);
+    } else {
+      bytes.put(val, 0, length);
+    }
+    return bytes;
   }
 
-  public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) {
+  static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) {
     return interleaveBits(columnsBinary, interleavedSize, ByteBuffer.allocate(interleavedSize));
   }
 
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index 858200c370b4..52ae803a5e18 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -96,7 +96,9 @@ public void testInterleaveRandomExamples() {
         testBytes[byteIndex] = generateRandomBytes();
         testStrings[byteIndex] = bytesToString(testBytes[byteIndex]);
       }
-      byte[] byteResult = ZOrderByteUtils.interleaveBits(testBytes);
+
+      int zOrderSize = Arrays.stream(testBytes).mapToInt(column -> column.length).sum();
+      byte[] byteResult = ZOrderByteUtils.interleaveBits(testBytes, zOrderSize);
       String byteResultAsString = bytesToString(byteResult);
 
       String stringResult = interleaveStrings(testStrings);
@@ -111,7 +113,7 @@ public void testInterleaveEmptyBits() {
     byte[] expected = new byte[40];
 
     Assert.assertArrayEquals("Should combine empty arrays",
-        expected, ZOrderByteUtils.interleaveBits(test));
+        expected, ZOrderByteUtils.interleaveBits(test, 40));
   }
 
   @Test
@@ -124,7 +126,7 @@ public void testInterleaveFullBits() {
     byte[] expected = new byte[]{IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII, IIIIIIII};
 
     Assert.assertArrayEquals("Should combine full arrays",
-        expected, ZOrderByteUtils.interleaveBits(test));
+        expected, ZOrderByteUtils.interleaveBits(test, 6));
   }
 
   @Test
@@ -140,7 +142,7 @@ public void testInterleaveMixedBits() {
         OIOIOIOI, OIOIOIOI,
         OOOOIIII};
     Assert.assertArrayEquals("Should combine mixed byte arrays",
-        expected, ZOrderByteUtils.interleaveBits(test));
+        expected, ZOrderByteUtils.interleaveBits(test, 9));
   }
 
   @Test
@@ -276,4 +278,23 @@ public void testStringOrdering() {
           stringCompare, byteCompare);
     }
   }
+
+  @Test
+  public void testByteTruncateOrFill() {
+    ByteBuffer aBuffer = ByteBuffer.allocate(128);
+    ByteBuffer bBuffer = ByteBuffer.allocate(128);
+    for (int i = 0; i < NUM_TESTS; i++) {
+      byte[] aBytesRaw =  (byte[]) RandomUtil.generatePrimitive(Types.BinaryType.get(), random);
+      byte[] bBytesRaw =  (byte[]) RandomUtil.generatePrimitive(Types.BinaryType.get(), random);
+      int stringCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytesRaw, bBytesRaw));
+      byte[] aBytes = ZOrderByteUtils.byteTruncateOrFill(aBytesRaw, 128, aBuffer).array();
+      byte[] bBytes = ZOrderByteUtils.byteTruncateOrFill(bBytesRaw, 128, bBuffer).array();
+      int byteCompare = Integer.signum(UnsignedBytes.lexicographicalComparator().compare(aBytes, bBytes));
+
+      Assert.assertEquals(String.format(
+              "Ordering of strings should match ordering of bytes, %s ~ %s -> %s != %s ~ %s -> %s ",
+              aBytesRaw, bBytesRaw, stringCompare, Arrays.toString(aBytes), Arrays.toString(bBytes), byteCompare),
+          stringCompare, byteCompare);
+    }
+  }
 }
diff --git a/jmh.gradle b/jmh.gradle
index d458ae2c5903..538fd96af406 100644
--- a/jmh.gradle
+++ b/jmh.gradle
@@ -23,7 +23,7 @@ if (jdkVersion != '8' && jdkVersion != '11') {
 
 def sparkVersions = (System.getProperty("sparkVersions") != null ? System.getProperty("sparkVersions") : System.getProperty("defaultSparkVersions")).split(",")
 def scalaVersion = System.getProperty("scalaVersion") != null ? System.getProperty("scalaVersion") : System.getProperty("defaultScalaVersion")
-def jmhProjects = []
+def jmhProjects = [project(":iceberg-core")]
 
 if (jdkVersion == '8' && sparkVersions.contains("2.4")) {
   jmhProjects.add(project(":iceberg-spark:iceberg-spark-2.4"))
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
index 71a0274d5a08..238e6aaa7e4a 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
@@ -19,13 +19,14 @@
 
 package org.apache.iceberg.spark.actions;
 
-import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.UUID;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
+import org.apache.hadoop.shaded.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileScanTask;
 import org.apache.iceberg.NullOrder;
@@ -33,15 +34,16 @@
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.SortDirection;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.actions.RewriteStrategy;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
-import org.apache.iceberg.spark.FileRewriteCoordinator;
-import org.apache.iceberg.spark.FileScanTaskSetManager;
 import org.apache.iceberg.spark.SparkDistributionAndOrderingUtil;
 import org.apache.iceberg.spark.SparkReadOptions;
 import org.apache.iceberg.spark.SparkWriteOptions;
 import org.apache.iceberg.types.Types;
 import org.apache.iceberg.types.Types.NestedField;
+import org.apache.iceberg.util.PropertyUtil;
 import org.apache.iceberg.util.SortOrderUtil;
+import org.apache.iceberg.util.ZOrderByteUtils;
 import org.apache.spark.sql.Column;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
@@ -62,15 +64,59 @@ public class Spark3ZOrderStrategy extends Spark3SortStrategy {
       .sortBy(Z_COLUMN, SortDirection.ASC, NullOrder.NULLS_LAST)
       .build();
 
+  /**
+   * Controls the amount of bytes interleaved in the ZOrder Algorithm. Default is all bytes being interleaved.
+   */
+  private static final String MAX_OUTPUT_SIZE_KEY = "max-output-size";
+  private static final int DEFAULT_MAX_OUTPUT_SIZE = Integer.MAX_VALUE;
+
+  /**
+  * Controls the number of bytes considered from an input column of a type with variable length (String, Binary).
+  * Default is to use the same size as primitives {@link ZOrderByteUtils#PRIMITIVE_BUFFER_SIZE}
+   */
+  private static final String VAR_LENGTH_CONTRIBUTION_KEY = "var-length-contribution";
+  private static final int DEFAULT_VAR_LENGTH_CONTRIBUTION = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE;
+
   private final List<String> zOrderColNames;
-  private transient FileScanTaskSetManager manager = FileScanTaskSetManager.get();
-  private transient FileRewriteCoordinator rewriteCoordinator = FileRewriteCoordinator.get();
+  private final Spark3ZOrderUDF zOrderUDF;
 
-  private final Spark3ZOrderUDF orderHelper;
+  private int maxOutputSize;
+  private int varLengthContribution;
+
+  @Override
+  public Set<String> validOptions() {
+    return ImmutableSet.<String>builder()
+        .addAll(super.validOptions())
+        .add(VAR_LENGTH_CONTRIBUTION_KEY)
+        .add(MAX_OUTPUT_SIZE_KEY)
+        .build();
+  }
+
+  @Override
+  public RewriteStrategy options(Map<String, String> options) {
+    super.options(options);
+
+    varLengthContribution = PropertyUtil.propertyAsInt(options, VAR_LENGTH_CONTRIBUTION_KEY,
+        DEFAULT_VAR_LENGTH_CONTRIBUTION);
+    Preconditions.checkArgument(varLengthContribution > 0,
+        "Cannot use less than 1 byte for variable length types with zOrder, %s was set to %s",
+        VAR_LENGTH_CONTRIBUTION_KEY, varLengthContribution);
+
+
+    maxOutputSize = PropertyUtil.propertyAsInt(options, MAX_OUTPUT_SIZE_KEY, DEFAULT_MAX_OUTPUT_SIZE);
+    Preconditions.checkArgument(maxOutputSize > 0,
+        "Cannot have the interleaved ZOrder value use less than 1 byte, %s was set to %s",
+        MAX_OUTPUT_SIZE_KEY, maxOutputSize);
+
+    return  this;
+  }
 
   public Spark3ZOrderStrategy(Table table, SparkSession spark, List<String> zOrderColNames) {
     super(table, spark);
 
+    Preconditions.checkArgument(zOrderColNames != null && !zOrderColNames.isEmpty(),
+        "Cannot ZOrder when no columns are specified");
+
     Stream<String> identityPartitionColumns = table.spec().fields().stream()
         .filter(f -> f.transform().isIdentity())
         .map(PartitionField::name);
@@ -80,10 +126,10 @@ public Spark3ZOrderStrategy(Table table, SparkSession spark, List<String> zOrder
     Preconditions.checkArgument(
         partZOrderCols.isEmpty(),
         "Cannot ZOrder on an Identity partition column as these values are constant within a partition, " +
-            "ZOrdering requested on %s",
+            "ZOrdering requested on Identity columns: %s",
         partZOrderCols);
 
-    this.orderHelper = new Spark3ZOrderUDF(zOrderColNames.size());
+    this.zOrderUDF = new Spark3ZOrderUDF(zOrderColNames.size(), varLengthContribution, maxOutputSize);
 
     this.zOrderColNames = zOrderColNames;
   }
@@ -114,7 +160,7 @@ public Set<DataFile> rewriteFiles(List<FileScanTask> filesToRewrite) {
     Distribution distribution = Distributions.ordered(ordering);
 
     try {
-      manager.stageTasks(table(), groupID, filesToRewrite);
+      manager().stageTasks(table(), groupID, filesToRewrite);
 
       // Disable Adaptive Query Execution as this may change the output partitioning of our write
       SparkSession cloneSession = spark().cloneSession();
@@ -137,10 +183,10 @@ public Set<DataFile> rewriteFiles(List<FileScanTask> filesToRewrite) {
           .collect(Collectors.toList());
 
       Column zvalueArray = functions.array(zOrderColumns.stream().map(colStruct ->
-          orderHelper.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType())
+          zOrderUDF.sortedLexicographically(functions.col(colStruct.name()), colStruct.dataType())
       ).toArray(Column[]::new));
 
-      Dataset<Row> zvalueDF = scanDF.withColumn(Z_COLUMN, orderHelper.interleaveBytes(zvalueArray));
+      Dataset<Row> zvalueDF = scanDF.withColumn(Z_COLUMN, zOrderUDF.interleaveBytes(zvalueArray));
 
       SQLConf sqlConf = cloneSession.sessionState().conf();
       LogicalPlan sortPlan = sortPlan(distribution, ordering, zvalueDF.logicalPlan(), sqlConf);
@@ -155,10 +201,10 @@ public Set<DataFile> rewriteFiles(List<FileScanTask> filesToRewrite) {
           .mode("append")
           .save(table().name());
 
-      return rewriteCoordinator.fetchNewDataFiles(table(), groupID);
+      return rewriteCoordinator().fetchNewDataFiles(table(), groupID);
     } finally {
-      manager.removeTasks(table(), groupID);
-      rewriteCoordinator.clearRewrite(table(), groupID);
+      manager().removeTasks(table(), groupID);
+      rewriteCoordinator().clearRewrite(table(), groupID);
     }
   }
 
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
index 1e6eb60b2579..8a23e01451ee 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
@@ -20,6 +20,7 @@
 package org.apache.iceberg.spark.actions;
 
 import java.io.IOException;
+import java.io.ObjectInputStream;
 import java.io.Serializable;
 import java.nio.ByteBuffer;
 import java.nio.charset.CharsetEncoder;
@@ -44,10 +45,12 @@
 import scala.collection.Seq;
 
 class Spark3ZOrderUDF implements Serializable {
-  private static final int STRING_KEY_LENGTH = 8;
-
-  private static final byte[] PRIMITIVE_EMPTY = new byte[ZOrderByteUtils.BUFFER_SIZE];
+  private static final byte[] PRIMITIVE_EMPTY = new byte[ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE];
 
+  /**
+   * Every Spark task runs iteratively on a rows in a single thread so ThreadLocal should protect from
+   * concurrent access to any of these structures.
+   */
   private transient ThreadLocal<ByteBuffer> outputBuffer;
   private transient ThreadLocal<byte[][]> inputHolder;
   private transient ThreadLocal<ByteBuffer>[] inputBuffers;
@@ -56,28 +59,24 @@ class Spark3ZOrderUDF implements Serializable {
   private final int numCols;
 
   private int inputCol = 0;
-  private int totalBytes = 0;
+  private int totalOutputBytes = 0;
+  private final int varTypeSize;
+  private final int maxOutputSize;
 
-  Spark3ZOrderUDF(int numCols) {
+  Spark3ZOrderUDF(int numCols, int varTypeSize, int maxOutputSize) {
     this.numCols = numCols;
+    this.varTypeSize = varTypeSize;
+    this.maxOutputSize = maxOutputSize;
   }
 
-  private void readObject(java.io.ObjectInputStream in)
-      throws IOException, ClassNotFoundException {
+  private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
     in.defaultReadObject();
     inputBuffers = new ThreadLocal[numCols];
     inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]);
+    outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(totalOutputBytes));
     encoder = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8.newEncoder());
   }
 
-  private ByteBuffer outputBuffer(int size) {
-    if (outputBuffer == null) {
-      // May over allocate on concurrent calls
-      outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size));
-    }
-    return outputBuffer.get();
-  }
-
   private ByteBuffer inputBuffer(int position, int size) {
     if (inputBuffers[position] == null) {
       // May over allocate on concurrent calls
@@ -89,7 +88,7 @@ private ByteBuffer inputBuffer(int position, int size) {
   byte[] interleaveBits(Seq<byte[]> scalaBinary) {
     byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary)
         .toArray(inputHolder.get());
-    return ZOrderByteUtils.interleaveBits(columnsBinary, totalBytes, outputBuffer(totalBytes));
+    return ZOrderByteUtils.interleaveBits(columnsBinary, totalOutputBytes, outputBuffer.get());
   }
 
   private UserDefinedFunction tinyToOrderedBytesUDF() {
@@ -98,11 +97,11 @@ private UserDefinedFunction tinyToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
+      return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
     }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalBytes += Byte.BYTES;
+    this.totalOutputBytes += Byte.BYTES;
 
     return udf;
   }
@@ -113,11 +112,11 @@ private UserDefinedFunction shortToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
+      return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
     }, DataTypes.BinaryType).withName("SHORT_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalBytes += Short.BYTES;
+    this.totalOutputBytes += Short.BYTES;
 
     return udf;
   }
@@ -128,11 +127,11 @@ private UserDefinedFunction intToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
+      return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
     }, DataTypes.BinaryType).withName("INT_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalBytes += Integer.BYTES;
+    this.totalOutputBytes += Integer.BYTES;
 
     return udf;
   }
@@ -143,11 +142,11 @@ private UserDefinedFunction longToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
+      return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
     }, DataTypes.BinaryType).withName("LONG_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalBytes += Long.BYTES;
+    this.totalOutputBytes += Long.BYTES;
 
     return udf;
   }
@@ -158,11 +157,11 @@ private UserDefinedFunction floatToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
+      return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
     }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalBytes += Float.BYTES;
+    this.totalOutputBytes += Float.BYTES;
 
     return udf;
   }
@@ -173,11 +172,11 @@ private UserDefinedFunction doubleToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.BUFFER_SIZE)).array();
-    }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES");
+      return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
+    }, DataTypes.BinaryType).withName("DOUBLE_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalBytes += Double.BYTES;
+    this.totalOutputBytes += Double.BYTES;
 
     return udf;
   }
@@ -187,13 +186,26 @@ private UserDefinedFunction stringToOrderedBytesUDF() {
     UserDefinedFunction udf = functions.udf((String value) ->
         ZOrderByteUtils.stringToOrderedBytes(
             value,
-            STRING_KEY_LENGTH,
-            inputBuffer(position, STRING_KEY_LENGTH),
+            varTypeSize,
+            inputBuffer(position, varTypeSize),
             encoder.get()).array(), DataTypes.BinaryType)
           .withName("STRING-LEXICAL-BYTES");
 
     this.inputCol++;
-    this.totalBytes += STRING_KEY_LENGTH;
+    this.totalOutputBytes += varTypeSize;
+
+    return udf;
+  }
+
+  private UserDefinedFunction bytesTruncateUDF() {
+    int position = inputCol;
+    UserDefinedFunction udf = functions.udf((byte[] value) ->
+        ZOrderByteUtils.byteTruncateOrFill(value, varTypeSize, inputBuffer(position, varTypeSize)).array(),
+            DataTypes.BinaryType)
+        .withName("BYTE-TRUNCATE");
+
+    this.inputCol++;
+    this.totalOutputBytes += varTypeSize;
 
     return udf;
   }
@@ -223,9 +235,9 @@ Column sortedLexicographically(Column column, DataType type) {
     } else if (type instanceof StringType) {
       return stringToOrderedBytesUDF().apply(column);
     } else if (type instanceof BinaryType) {
-      return stringToOrderedBytesUDF().apply(column);
+      return bytesTruncateUDF().apply(column);
     } else if (type instanceof BooleanType) {
-      return column.cast(DataTypes.BinaryType);
+      return bytesTruncateUDF().apply(column);
     } else if (type instanceof TimestampType) {
       return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
     } else if (type instanceof DateType) {
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java
index d4823560bf17..6c8f8c027dba 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkSortStrategy.java
@@ -157,6 +157,14 @@ protected LogicalPlan sortPlan(Distribution distribution, SortOrder[] ordering,
   }
 
   protected double sizeEstimateMultiple() {
-    return this.sizeEstimateMultiple;
+    return sizeEstimateMultiple;
+  }
+
+  protected FileScanTaskSetManager manager() {
+    return manager;
+  }
+
+  protected FileRewriteCoordinator rewriteCoordinator() {
+    return rewriteCoordinator;
   }
 }

From 14637da30f6e7162e77db67c18eece2159e9a7ff Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Fri, 18 Mar 2022 17:00:06 -0500
Subject: [PATCH 21/30] Unstage Results

---
 core/benchmark/ZorderResult.txt | 227 --------------------------------
 1 file changed, 227 deletions(-)
 delete mode 100644 core/benchmark/ZorderResult.txt

diff --git a/core/benchmark/ZorderResult.txt b/core/benchmark/ZorderResult.txt
deleted file mode 100644
index 559025105ce3..000000000000
--- a/core/benchmark/ZorderResult.txt
+++ /dev/null
@@ -1,227 +0,0 @@
-# JMH version: 1.32
-# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS
-# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java
-# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant
-# Blackhole mode: full + dont-inline hint
-# Warmup: <none>
-# Measurement: 5 iterations, single-shot each
-# Timeout: 1000 hr per iteration
-# Threads: 1 thread
-# Benchmark mode: Single shot invocation time
-# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns
-
-# Run progress: 0.00% complete, ETA 00:00:00
-# Fork: 1 of 1
-Iteration   1: 11.086 s/op
-Iteration   2: 10.337 s/op
-Iteration   3: 10.606 s/op
-Iteration   4: 10.897 s/op
-Iteration   5: 10.607 s/op
-
-
-Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns":
-  N = 5
-  mean =     10.706 ±(99.9%) 1.117 s/op
-
-  Histogram, s/op:
-    [10.300, 10.350) = 1 
-    [10.350, 10.400) = 0 
-    [10.400, 10.450) = 0 
-    [10.450, 10.500) = 0 
-    [10.500, 10.550) = 0 
-    [10.550, 10.600) = 0 
-    [10.600, 10.650) = 2 
-    [10.650, 10.700) = 0 
-    [10.700, 10.750) = 0 
-    [10.750, 10.800) = 0 
-    [10.800, 10.850) = 0 
-    [10.850, 10.900) = 1 
-    [10.900, 10.950) = 0 
-    [10.950, 11.000) = 0 
-    [11.000, 11.050) = 0 
-    [11.050, 11.100) = 1 
-
-  Percentiles, s/op:
-      p(0.0000) =     10.337 s/op
-     p(50.0000) =     10.607 s/op
-     p(90.0000) =     11.086 s/op
-     p(95.0000) =     11.086 s/op
-     p(99.0000) =     11.086 s/op
-     p(99.9000) =     11.086 s/op
-     p(99.9900) =     11.086 s/op
-     p(99.9990) =     11.086 s/op
-     p(99.9999) =     11.086 s/op
-    p(100.0000) =     11.086 s/op
-
-
-# JMH version: 1.32
-# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS
-# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java
-# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant
-# Blackhole mode: full + dont-inline hint
-# Warmup: <none>
-# Measurement: 5 iterations, single-shot each
-# Timeout: 1000 hr per iteration
-# Threads: 1 thread
-# Benchmark mode: Single shot invocation time
-# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput
-
-# Run progress: 25.00% complete, ETA 00:02:54
-# Fork: 1 of 1
-Iteration   1: 2.521 s/op
-Iteration   2: 2.750 s/op
-Iteration   3: 2.999 s/op
-Iteration   4: 2.972 s/op
-Iteration   5: 3.028 s/op
-
-
-Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput":
-  N = 5
-  mean =      2.854 ±(99.9%) 0.832 s/op
-
-  Histogram, s/op:
-    [2.500, 2.550) = 1 
-    [2.550, 2.600) = 0 
-    [2.600, 2.650) = 0 
-    [2.650, 2.700) = 0 
-    [2.700, 2.750) = 1 
-    [2.750, 2.800) = 0 
-    [2.800, 2.850) = 0 
-    [2.850, 2.900) = 0 
-    [2.900, 2.950) = 0 
-    [2.950, 3.000) = 2 
-    [3.000, 3.050) = 1 
-    [3.050, 3.100) = 0 
-
-  Percentiles, s/op:
-      p(0.0000) =      2.521 s/op
-     p(50.0000) =      2.972 s/op
-     p(90.0000) =      3.028 s/op
-     p(95.0000) =      3.028 s/op
-     p(99.0000) =      3.028 s/op
-     p(99.9000) =      3.028 s/op
-     p(99.9900) =      3.028 s/op
-     p(99.9990) =      3.028 s/op
-     p(99.9999) =      3.028 s/op
-    p(100.0000) =      3.028 s/op
-
-
-# JMH version: 1.32
-# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS
-# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java
-# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant
-# Blackhole mode: full + dont-inline hint
-# Warmup: <none>
-# Measurement: 5 iterations, single-shot each
-# Timeout: 1000 hr per iteration
-# Threads: 1 thread
-# Benchmark mode: Single shot invocation time
-# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns
-
-# Run progress: 50.00% complete, ETA 00:01:15
-# Fork: 1 of 1
-Iteration   1: 7.440 s/op
-Iteration   2: 7.625 s/op
-Iteration   3: 8.216 s/op
-Iteration   4: 8.314 s/op
-Iteration   5: 8.203 s/op
-
-
-Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns":
-  N = 5
-  mean =      7.960 ±(99.9%) 1.532 s/op
-
-  Histogram, s/op:
-    [7.400, 7.500) = 1 
-    [7.500, 7.600) = 0 
-    [7.600, 7.700) = 1 
-    [7.700, 7.800) = 0 
-    [7.800, 7.900) = 0 
-    [7.900, 8.000) = 0 
-    [8.000, 8.100) = 0 
-    [8.100, 8.200) = 0 
-    [8.200, 8.300) = 2 
-
-  Percentiles, s/op:
-      p(0.0000) =      7.440 s/op
-     p(50.0000) =      8.203 s/op
-     p(90.0000) =      8.314 s/op
-     p(95.0000) =      8.314 s/op
-     p(99.0000) =      8.314 s/op
-     p(99.9000) =      8.314 s/op
-     p(99.9900) =      8.314 s/op
-     p(99.9990) =      8.314 s/op
-     p(99.9999) =      8.314 s/op
-    p(100.0000) =      8.314 s/op
-
-
-# JMH version: 1.32
-# VM version: JDK 11.0.4, OpenJDK 64-Bit Server VM, 11.0.4+11-LTS
-# VM invoker: /Library/Java/JavaVirtualMachines/applejdk-11.jdk/Contents/Home/bin/java
-# VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/Users/russellspitzer/repos/ipr/iceberg-master/core/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant
-# Blackhole mode: full + dont-inline hint
-# Warmup: <none>
-# Measurement: 5 iterations, single-shot each
-# Timeout: 1000 hr per iteration
-# Threads: 1 thread
-# Benchmark mode: Single shot invocation time
-# Benchmark: org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns
-
-# Run progress: 75.00% complete, ETA 00:00:39
-# Fork: 1 of 1
-Iteration   1: 5.327 s/op
-Iteration   2: 5.212 s/op
-Iteration   3: 5.963 s/op
-Iteration   4: 5.758 s/op
-Iteration   5: 5.827 s/op
-
-
-Result "org.apache.iceberg.util.ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns":
-  N = 5
-  mean =      5.618 ±(99.9%) 1.265 s/op
-
-  Histogram, s/op:
-    [5.200, 5.250) = 1 
-    [5.250, 5.300) = 0 
-    [5.300, 5.350) = 1 
-    [5.350, 5.400) = 0 
-    [5.400, 5.450) = 0 
-    [5.450, 5.500) = 0 
-    [5.500, 5.550) = 0 
-    [5.550, 5.600) = 0 
-    [5.600, 5.650) = 0 
-    [5.650, 5.700) = 0 
-    [5.700, 5.750) = 0 
-    [5.750, 5.800) = 1 
-    [5.800, 5.850) = 1 
-    [5.850, 5.900) = 0 
-    [5.900, 5.950) = 0 
-
-  Percentiles, s/op:
-      p(0.0000) =      5.212 s/op
-     p(50.0000) =      5.758 s/op
-     p(90.0000) =      5.963 s/op
-     p(95.0000) =      5.963 s/op
-     p(99.0000) =      5.963 s/op
-     p(99.9000) =      5.963 s/op
-     p(99.9900) =      5.963 s/op
-     p(99.9990) =      5.963 s/op
-     p(99.9999) =      5.963 s/op
-    p(100.0000) =      5.963 s/op
-
-
-# Run complete. Total time: 00:02:29
-
-REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on
-why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial
-experiments, perform baseline and negative tests that provide experimental control, make sure
-the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts.
-Do not assume the numbers tell you what you want them to tell.
-
-Benchmark                                                        Mode  Cnt   Score   Error  Units
-ZOrderByteUtilsBenchmark.interleaveValuesFourColumns               ss    5  10.706 ± 1.117   s/op
-ZOrderByteUtilsBenchmark.interleaveValuesFourColumns8ByteOutput    ss    5   2.854 ± 0.832   s/op
-ZOrderByteUtilsBenchmark.interleaveValuesThreeColumns              ss    5   7.960 ± 1.532   s/op
-ZOrderByteUtilsBenchmark.interleaveValuesTwoColumns                ss    5   5.618 ± 1.265   s/op
-
-Benchmark result is saved to /Users/russellspitzer/repos/ipr/iceberg-master/core/build/results/jmh/results.txt

From 2e68428b9ac0729d37d777d9b738d2073dcadfcd Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Fri, 18 Mar 2022 17:22:11 -0500
Subject: [PATCH 22/30] Checkstyle

---
 .../spark/action/RandomGeneratingUDF.java     | 23 +++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java
index 5cb6a350c7c7..cfbd9d4fb3f6 100644
--- a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java
+++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/RandomGeneratingUDF.java
@@ -1,15 +1,20 @@
 /*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 package org.apache.iceberg.spark.action;

From 859c5585ce00314cbf8229711a1d978b1d393b77 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Fri, 18 Mar 2022 17:29:14 -0500
Subject: [PATCH 23/30] More Checkstyle

---
 .../spark/actions/Spark3ZOrderStrategy.java    |  2 +-
 .../iceberg/spark/actions/Spark3ZOrderUDF.java | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
index 238e6aaa7e4a..6854d64a3ddc 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
@@ -26,7 +26,6 @@
 import java.util.UUID;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
-import org.apache.hadoop.shaded.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileScanTask;
 import org.apache.iceberg.NullOrder;
@@ -36,6 +35,7 @@
 import org.apache.iceberg.Table;
 import org.apache.iceberg.actions.RewriteStrategy;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.spark.SparkDistributionAndOrderingUtil;
 import org.apache.iceberg.spark.SparkReadOptions;
 import org.apache.iceberg.spark.SparkWriteOptions;
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
index 8a23e01451ee..da206efb50db 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
@@ -97,7 +97,8 @@ private UserDefinedFunction tinyToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
+      return ZOrderByteUtils.tinyintToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE))
+          .array();
     }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES");
 
     this.inputCol++;
@@ -112,7 +113,8 @@ private UserDefinedFunction shortToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
+      return ZOrderByteUtils.shortToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE))
+          .array();
     }, DataTypes.BinaryType).withName("SHORT_ORDERED_BYTES");
 
     this.inputCol++;
@@ -127,7 +129,8 @@ private UserDefinedFunction intToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
+      return ZOrderByteUtils.intToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE))
+          .array();
     }, DataTypes.BinaryType).withName("INT_ORDERED_BYTES");
 
     this.inputCol++;
@@ -142,7 +145,8 @@ private UserDefinedFunction longToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
+      return ZOrderByteUtils.longToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE))
+          .array();
     }, DataTypes.BinaryType).withName("LONG_ORDERED_BYTES");
 
     this.inputCol++;
@@ -157,7 +161,8 @@ private UserDefinedFunction floatToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
+      return ZOrderByteUtils.floatToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE))
+          .array();
     }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES");
 
     this.inputCol++;
@@ -172,7 +177,8 @@ private UserDefinedFunction doubleToOrderedBytesUDF() {
       if (value == null) {
         return PRIMITIVE_EMPTY;
       }
-      return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE)).array();
+      return ZOrderByteUtils.doubleToOrderedBytes(value, inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE))
+          .array();
     }, DataTypes.BinaryType).withName("DOUBLE_ORDERED_BYTES");
 
     this.inputCol++;

From 46b1a16431b13b14869634af22dfb9fe912b5dcb Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 18 Apr 2022 13:56:14 -0500
Subject: [PATCH 24/30] Rebase and Review Feedback

---
 .../IcebergSortCompactionBenchmark.java       | 32 ++++++++---------
 .../BaseRewriteDataFilesSparkAction.java      |  9 +++--
 ...Strategy.java => SparkZOrderStrategy.java} | 26 ++++++++------
 ...ark3ZOrderUDF.java => SparkZOrderUDF.java} | 36 ++++++++++++++-----
 .../spark/data/SparkParquetWriters.java       | 24 +++++++++++++
 .../actions/TestRewriteDataFilesAction.java   | 18 +++++++---
 6 files changed, 100 insertions(+), 45 deletions(-)
 rename spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/{Spark3ZOrderStrategy.java => SparkZOrderStrategy.java} (89%)
 rename spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/{Spark3ZOrderUDF.java => SparkZOrderUDF.java} (89%)

diff --git a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
index 1fff9c35186f..8c205037f56e 100644
--- a/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
+++ b/spark/v3.2/spark/src/jmh/java/org/apache/iceberg/spark/action/IcebergSortCompactionBenchmark.java
@@ -30,11 +30,11 @@
 import org.apache.iceberg.SortDirection;
 import org.apache.iceberg.SortOrder;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.actions.BinPackStrategy;
 import org.apache.iceberg.relocated.com.google.common.io.Files;
 import org.apache.iceberg.spark.Spark3Util;
 import org.apache.iceberg.spark.SparkSchemaUtil;
 import org.apache.iceberg.spark.SparkSessionCatalog;
-import org.apache.iceberg.spark.actions.Spark3SortStrategy;
 import org.apache.iceberg.spark.actions.SparkActions;
 import org.apache.iceberg.types.Types;
 import org.apache.spark.sql.Dataset;
@@ -107,7 +107,7 @@ public void cleanUpIteration() throws IOException {
   public void sortInt() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .sort(SortOrder
             .builderFor(table().schema())
             .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -120,7 +120,7 @@ public void sortInt() {
   public void sortInt2() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .sort(SortOrder
             .builderFor(table().schema())
             .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -134,7 +134,7 @@ public void sortInt2() {
   public void sortInt3() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .sort(SortOrder
             .builderFor(table().schema())
             .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -150,7 +150,7 @@ public void sortInt3() {
   public void sortInt4() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .sort(SortOrder
             .builderFor(table().schema())
             .sortBy("intCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -166,7 +166,7 @@ public void sortInt4() {
   public void sortString() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .sort(SortOrder
             .builderFor(table().schema())
             .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -179,7 +179,7 @@ public void sortString() {
   public void sortFourColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .sort(SortOrder
             .builderFor(table().schema())
             .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -195,7 +195,7 @@ public void sortFourColumns() {
   public void sortSixColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .sort(SortOrder
             .builderFor(table().schema())
             .sortBy("stringCol", SortDirection.ASC, NullOrder.NULLS_FIRST)
@@ -213,7 +213,7 @@ public void sortSixColumns() {
   public void zSortInt() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .zOrder("intCol")
         .execute();
   }
@@ -223,7 +223,7 @@ public void zSortInt() {
   public void zSortInt2() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .zOrder("intCol", "intCol2")
         .execute();
   }
@@ -233,7 +233,7 @@ public void zSortInt2() {
   public void zSortInt3() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .zOrder("intCol", "intCol2", "intCol3")
         .execute();
   }
@@ -243,7 +243,7 @@ public void zSortInt3() {
   public void zSortInt4() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .zOrder("intCol", "intCol2", "intCol3", "intCol4")
         .execute();
   }
@@ -253,7 +253,7 @@ public void zSortInt4() {
   public void zSortString() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .zOrder("stringCol")
         .execute();
   }
@@ -263,7 +263,7 @@ public void zSortString() {
   public void zSortFourColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .zOrder("stringCol", "intCol", "dateCol", "doubleCol")
         .execute();
   }
@@ -273,7 +273,7 @@ public void zSortFourColumns() {
   public void zSortSixColumns() {
     SparkActions.get()
         .rewriteDataFiles(table())
-        .option(Spark3SortStrategy.REWRITE_ALL, "true")
+        .option(BinPackStrategy.REWRITE_ALL, "true")
         .zOrder("stringCol", "intCol", "dateCol", "timestampCol", "doubleCol", "longCol")
         .execute();
   }
@@ -295,7 +295,7 @@ protected final void initTable() {
         optional(9, "timestampCol", Types.TimestampType.withZone()),
         optional(10, "stringCol", Types.StringType.get()));
 
-    SparkSessionCatalog catalog = null;
+    SparkSessionCatalog catalog;
     try {
       catalog = (SparkSessionCatalog)
                     Spark3Util.catalogAndIdentifier(spark(), "spark_catalog").catalog();
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java
index 62cb5b174d43..3a8d8a81fb86 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteDataFilesSparkAction.java
@@ -106,11 +106,6 @@ protected RewriteDataFiles self() {
     return this;
   }
 
-  /**
-   * The framework specific ZOrder Strategy
-   */
-  protected abstract SortStrategy zOrderStrategy(String... columnNames);
-
   @Override
   public RewriteDataFiles binPack() {
     Preconditions.checkArgument(this.strategy == null,
@@ -440,6 +435,10 @@ private SortStrategy sortStrategy() {
     return new SparkSortStrategy(table, spark());
   }
 
+  private SortStrategy zOrderStrategy(String... columnNames) {
+    return new SparkZOrderStrategy(table, spark(), Lists.newArrayList(columnNames));
+  }
+
   @VisibleForTesting
   static class RewriteExecutionContext {
     private final Map<StructLike, Integer> numGroupsByPartition;
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
similarity index 89%
rename from spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
rename to spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
index 6854d64a3ddc..a38ea69b5959 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
@@ -55,8 +55,11 @@
 import org.apache.spark.sql.functions;
 import org.apache.spark.sql.internal.SQLConf;
 import org.apache.spark.sql.types.StructField;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-public class Spark3ZOrderStrategy extends Spark3SortStrategy {
+public class SparkZOrderStrategy extends SparkSortStrategy {
+  private static final Logger LOG = LoggerFactory.getLogger(SparkZOrderStrategy.class);
 
   private static final String Z_COLUMN = "ICEZVALUE";
   private static final Schema Z_SCHEMA = new Schema(NestedField.required(0, Z_COLUMN, Types.BinaryType.get()));
@@ -78,7 +81,7 @@ public class Spark3ZOrderStrategy extends Spark3SortStrategy {
   private static final int DEFAULT_VAR_LENGTH_CONTRIBUTION = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE;
 
   private final List<String> zOrderColNames;
-  private final Spark3ZOrderUDF zOrderUDF;
+  private final SparkZOrderUDF zOrderUDF;
 
   private int maxOutputSize;
   private int varLengthContribution;
@@ -102,16 +105,15 @@ public RewriteStrategy options(Map<String, String> options) {
         "Cannot use less than 1 byte for variable length types with zOrder, %s was set to %s",
         VAR_LENGTH_CONTRIBUTION_KEY, varLengthContribution);
 
-
     maxOutputSize = PropertyUtil.propertyAsInt(options, MAX_OUTPUT_SIZE_KEY, DEFAULT_MAX_OUTPUT_SIZE);
     Preconditions.checkArgument(maxOutputSize > 0,
         "Cannot have the interleaved ZOrder value use less than 1 byte, %s was set to %s",
         MAX_OUTPUT_SIZE_KEY, maxOutputSize);
 
-    return  this;
+    return this;
   }
 
-  public Spark3ZOrderStrategy(Table table, SparkSession spark, List<String> zOrderColNames) {
+  public SparkZOrderStrategy(Table table, SparkSession spark, List<String> zOrderColNames) {
     super(table, spark);
 
     Preconditions.checkArgument(zOrderColNames != null && !zOrderColNames.isEmpty(),
@@ -123,14 +125,16 @@ public Spark3ZOrderStrategy(Table table, SparkSession spark, List<String> zOrder
     List<String> partZOrderCols = identityPartitionColumns
         .filter(zOrderColNames::contains)
         .collect(Collectors.toList());
-    Preconditions.checkArgument(
-        partZOrderCols.isEmpty(),
-        "Cannot ZOrder on an Identity partition column as these values are constant within a partition, " +
-            "ZOrdering requested on Identity columns: %s",
-        partZOrderCols);
 
-    this.zOrderUDF = new Spark3ZOrderUDF(zOrderColNames.size(), varLengthContribution, maxOutputSize);
+    if (!partZOrderCols.isEmpty()) {
+      LOG.warn("Cannot ZOrder on an Identity partition column as these values are constant within a partition " +
+                      "they will be removed from the ZOrder expression: {}", partZOrderCols);
+      zOrderColNames.removeAll(partZOrderCols);
+      Preconditions.checkArgument(!zOrderColNames.isEmpty(),
+          "Cannot perform ZOrdering, all columns provided were identity partition columns and cannot be used.");
+    }
 
+    this.zOrderUDF = new SparkZOrderUDF(zOrderColNames.size(), varLengthContribution, maxOutputSize);
     this.zOrderColNames = zOrderColNames;
   }
 
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
similarity index 89%
rename from spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
rename to spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
index da206efb50db..8bebfb2b7543 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/Spark3ZOrderUDF.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
@@ -44,7 +44,7 @@
 import org.apache.spark.sql.types.TimestampType;
 import scala.collection.Seq;
 
-class Spark3ZOrderUDF implements Serializable {
+class SparkZOrderUDF implements Serializable {
   private static final byte[] PRIMITIVE_EMPTY = new byte[ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE];
 
   /**
@@ -53,7 +53,7 @@ class Spark3ZOrderUDF implements Serializable {
    */
   private transient ThreadLocal<ByteBuffer> outputBuffer;
   private transient ThreadLocal<byte[][]> inputHolder;
-  private transient ThreadLocal<ByteBuffer>[] inputBuffers;
+  private transient ThreadLocal<ByteBuffer[]> inputBuffers;
   private transient ThreadLocal<CharsetEncoder> encoder;
 
   private final int numCols;
@@ -63,7 +63,7 @@ class Spark3ZOrderUDF implements Serializable {
   private final int varTypeSize;
   private final int maxOutputSize;
 
-  Spark3ZOrderUDF(int numCols, int varTypeSize, int maxOutputSize) {
+  SparkZOrderUDF(int numCols, int varTypeSize, int maxOutputSize) {
     this.numCols = numCols;
     this.varTypeSize = varTypeSize;
     this.maxOutputSize = maxOutputSize;
@@ -71,18 +71,22 @@ class Spark3ZOrderUDF implements Serializable {
 
   private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
     in.defaultReadObject();
-    inputBuffers = new ThreadLocal[numCols];
+    if (totalOutputBytes > maxOutputSize) {
+      totalOutputBytes = maxOutputSize;
+    }
+    inputBuffers = ThreadLocal.withInitial(() -> new ByteBuffer[numCols]);
     inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]);
     outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(totalOutputBytes));
     encoder = ThreadLocal.withInitial(() -> StandardCharsets.UTF_8.newEncoder());
   }
 
   private ByteBuffer inputBuffer(int position, int size) {
-    if (inputBuffers[position] == null) {
-      // May over allocate on concurrent calls
-      inputBuffers[position] = ThreadLocal.withInitial(() -> ByteBuffer.allocate(size));
+    ByteBuffer buffer = inputBuffers.get()[position];
+    if (buffer == null) {
+      buffer = ByteBuffer.allocate(size);
+      inputBuffers.get()[position] = buffer;
     }
-    return inputBuffers[position].get();
+    return buffer;
   }
 
   byte[] interleaveBits(Seq<byte[]> scalaBinary) {
@@ -216,6 +220,20 @@ private UserDefinedFunction bytesTruncateUDF() {
     return udf;
   }
 
+  private UserDefinedFunction booleanToOrderedBytesUDF() {
+    int position = inputCol;
+    UserDefinedFunction udf = functions.udf((Boolean value) -> {
+      ByteBuffer buffer = inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
+      buffer.put(0, (byte) (value ? -127 : 0));
+      return buffer.array();
+    }, DataTypes.BinaryType)
+        .withName("BOOLEAN-LEXICAL-BYTES");
+
+    this.inputCol++;
+    this.totalOutputBytes += ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE;
+    return udf;
+  }
+
   private final UserDefinedFunction interleaveUDF =
       functions.udf((Seq<byte[]> arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType)
           .withName("INTERLEAVE_BYTES");
@@ -243,7 +261,7 @@ Column sortedLexicographically(Column column, DataType type) {
     } else if (type instanceof BinaryType) {
       return bytesTruncateUDF().apply(column);
     } else if (type instanceof BooleanType) {
-      return bytesTruncateUDF().apply(column);
+      return booleanToOrderedBytesUDF().apply(column);
     } else if (type instanceof TimestampType) {
       return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
     } else if (type instanceof DateType) {
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
index 5e268d26ed9c..845fe305e494 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
@@ -19,10 +19,12 @@
 
 package org.apache.iceberg.spark.data;
 
+import java.nio.ByteBuffer;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
+import java.util.UUID;
 import org.apache.iceberg.parquet.ParquetValueReaders.ReusableEntry;
 import org.apache.iceberg.parquet.ParquetValueWriter;
 import org.apache.iceberg.parquet.ParquetValueWriters;
@@ -299,6 +301,28 @@ public void write(int repetitionLevel, byte[] bytes) {
     }
   }
 
+  private static PrimitiveWriter<UTF8String> uuids(ColumnDescriptor desc) {
+    return new UUIDWriter(desc);
+  }
+
+  private static class UUIDWriter extends PrimitiveWriter<UTF8String> {
+    private ByteBuffer buffer = ByteBuffer.allocate(16);
+
+    private UUIDWriter(ColumnDescriptor desc) {
+      super(desc);
+    }
+
+    @Override
+    public void write(int repetitionLevel, UTF8String string) {
+      UUID uuid = UUID.fromString(string.toString());
+      buffer.rewind();
+      buffer.putLong(uuid.getMostSignificantBits());
+      buffer.putLong(uuid.getLeastSignificantBits());
+      buffer.rewind();
+      column.writeBinary(repetitionLevel, Binary.fromReusedByteBuffer(buffer));
+    }
+  }
+
   private static class ArrayDataWriter<E> extends RepeatedWriter<ArrayData, E> {
     private final DataType elementType;
 
diff --git a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
index f6f3004aecd0..d577e0ce4eda 100644
--- a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
+++ b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
@@ -22,6 +22,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.UncheckedIOException;
+import java.nio.ByteBuffer;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
@@ -87,6 +88,9 @@
 import org.apache.iceberg.util.Pair;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
+import org.apache.spark.sql.expressions.UserDefinedFunction;
+import org.apache.spark.sql.functions;
+import org.apache.spark.sql.types.DataTypes;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Rule;
@@ -1093,9 +1097,11 @@ public void testZOrderAllTypesSort() {
     List<Row> originalRaw = spark.read().format("iceberg").load(tableLocation).sort("longCol").collectAsList();
     List<Object[]> originalData = rowsToJava(originalRaw);
 
+    // TODO add in UUID when it is supported in Spark
     RewriteDataFiles.Result result =
         basicRewrite(table)
-            .zOrder("longCol", "intCol", "floatCol", "doubleCol", "dateCol", "timestampCol", "stringCol")
+            .zOrder("longCol", "intCol", "floatCol", "doubleCol", "dateCol", "timestampCol", "stringCol", "binaryCol",
+                "booleanCol")
             .option(SortStrategy.MIN_INPUT_FILES, "1")
             .option(SortStrategy.REWRITE_ALL, "true")
             .execute();
@@ -1415,9 +1421,11 @@ private Table createTypeTestTable() {
         required(2, "intCol", Types.IntegerType.get()),
         required(3, "floatCol", Types.FloatType.get()),
         optional(4, "doubleCol", Types.DoubleType.get()),
-        optional(6, "dateCol", Types.DateType.get()),
-        optional(7, "timestampCol", Types.TimestampType.withZone()),
-        optional(8, "stringCol", Types.StringType.get()));
+        optional(5, "dateCol", Types.DateType.get()),
+        optional(6, "timestampCol", Types.TimestampType.withZone()),
+        optional(7, "stringCol", Types.StringType.get()),
+        optional(8, "booleanCol", Types.BooleanType.get()),
+        optional(9, "binaryCol", Types.BinaryType.get()));
 
     Map<String, String> options = Maps.newHashMap();
     Table table = TABLES.create(schema, PartitionSpec.unpartitioned(), options, tableLocation);
@@ -1430,6 +1438,8 @@ private Table createTypeTestTable() {
         .withColumn("dateCol", date_add(current_date(), 1))
         .withColumn("timestampCol", expr("TO_TIMESTAMP(dateCol)"))
         .withColumn("stringCol", expr("CAST(dateCol AS STRING)"))
+        .withColumn("booleanCol", expr("longCol > 5"))
+        .withColumn("binaryCol", expr("CAST(longCol AS BINARY)"))
         .write()
         .format("iceberg")
         .mode("append")

From a6981c82b458e952daca91970fc54716ed9cce67 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 18 Apr 2022 13:59:17 -0500
Subject: [PATCH 25/30] CheckStyle

---
 .../iceberg/spark/actions/TestRewriteDataFilesAction.java     | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
index d577e0ce4eda..4137bded9404 100644
--- a/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
+++ b/spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java
@@ -22,7 +22,6 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.UncheckedIOException;
-import java.nio.ByteBuffer;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
@@ -88,9 +87,6 @@
 import org.apache.iceberg.util.Pair;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
-import org.apache.spark.sql.expressions.UserDefinedFunction;
-import org.apache.spark.sql.functions;
-import org.apache.spark.sql.types.DataTypes;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Rule;

From ecf04d8cabd76e89df6511308a09babb15761f45 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Mon, 18 Apr 2022 17:53:52 -0500
Subject: [PATCH 26/30] Fix bug with reused Interleave Buffer

---
 .../apache/iceberg/util/ZOrderByteUtils.java  |  2 ++
 .../iceberg/util/TestZOrderByteUtil.java      | 29 +++++++++++++++++++
 .../spark/actions/SparkZOrderStrategy.java    |  4 +--
 .../iceberg/spark/actions/SparkZOrderUDF.java | 28 ++++++++++--------
 4 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 9b5e571862c9..399a8bd21ef1 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -167,6 +167,8 @@ static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize) {
    */
   public static byte[] interleaveBits(byte[][] columnsBinary, int interleavedSize, ByteBuffer reuse) {
     byte[] interleavedBytes = reuse.array();
+    Arrays.fill(interleavedBytes, 0, interleavedSize, (byte) 0x00);
+
     int sourceColumn = 0;
     int sourceByte = 0;
     int sourceBit = 7;
diff --git a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
index 52ae803a5e18..1a2174b679ba 100644
--- a/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
+++ b/core/src/test/java/org/apache/iceberg/util/TestZOrderByteUtil.java
@@ -56,6 +56,13 @@ private String bytesToString(byte[] bytes) {
    */
   private byte[]  generateRandomBytes() {
     int length = Math.abs(random.nextInt(100) + 1);
+    return generateRandomBytes(length);
+  }
+
+  /**
+   * Returns a byte array of a specified length
+   */
+  private byte[]  generateRandomBytes(int length) {
     byte[] result = new byte[length];
     random.nextBytes(result);
     return result;
@@ -107,6 +114,28 @@ public void testInterleaveRandomExamples() {
     }
   }
 
+  @Test
+  public void testReuseInterleaveBuffer() {
+    int numByteArrays = 2;
+    int colLength = 16;
+    ByteBuffer interleaveBuffer = ByteBuffer.allocate(numByteArrays * colLength);
+    for (int test = 0; test < NUM_INTERLEAVE_TESTS; test++) {
+      byte[][] testBytes =  new byte[numByteArrays][];
+      String[] testStrings = new String[numByteArrays];
+      for (int byteIndex = 0;  byteIndex < numByteArrays; byteIndex++) {
+        testBytes[byteIndex] = generateRandomBytes(colLength);
+        testStrings[byteIndex] = bytesToString(testBytes[byteIndex]);
+      }
+
+      byte[] byteResult = ZOrderByteUtils.interleaveBits(testBytes, numByteArrays * colLength, interleaveBuffer);
+      String byteResultAsString = bytesToString(byteResult);
+
+      String stringResult = interleaveStrings(testStrings);
+
+      Assert.assertEquals("String interleave didn't match byte interleave", stringResult, byteResultAsString);
+    }
+  }
+
   @Test
   public void testInterleaveEmptyBits() {
     byte[][] test = new byte[4][10];
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
index a38ea69b5959..234401d58039 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
@@ -81,7 +81,6 @@ public class SparkZOrderStrategy extends SparkSortStrategy {
   private static final int DEFAULT_VAR_LENGTH_CONTRIBUTION = ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE;
 
   private final List<String> zOrderColNames;
-  private final SparkZOrderUDF zOrderUDF;
 
   private int maxOutputSize;
   private int varLengthContribution;
@@ -134,7 +133,6 @@ public SparkZOrderStrategy(Table table, SparkSession spark, List<String> zOrderC
           "Cannot perform ZOrdering, all columns provided were identity partition columns and cannot be used.");
     }
 
-    this.zOrderUDF = new SparkZOrderUDF(zOrderColNames.size(), varLengthContribution, maxOutputSize);
     this.zOrderColNames = zOrderColNames;
   }
 
@@ -151,6 +149,8 @@ protected void validateOptions() {
 
   @Override
   public Set<DataFile> rewriteFiles(List<FileScanTask> filesToRewrite) {
+    SparkZOrderUDF zOrderUDF = new SparkZOrderUDF(zOrderColNames.size(), varLengthContribution, maxOutputSize);
+
     String groupID = UUID.randomUUID().toString();
     boolean requiresRepartition = !filesToRewrite.get(0).spec().equals(table().spec());
 
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
index 8bebfb2b7543..302d969053a8 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
@@ -71,9 +71,6 @@ class SparkZOrderUDF implements Serializable {
 
   private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
     in.defaultReadObject();
-    if (totalOutputBytes > maxOutputSize) {
-      totalOutputBytes = maxOutputSize;
-    }
     inputBuffers = ThreadLocal.withInitial(() -> new ByteBuffer[numCols]);
     inputHolder = ThreadLocal.withInitial(() -> new byte[numCols][]);
     outputBuffer = ThreadLocal.withInitial(() -> ByteBuffer.allocate(totalOutputBytes));
@@ -106,7 +103,7 @@ private UserDefinedFunction tinyToOrderedBytesUDF() {
     }, DataTypes.BinaryType).withName("TINY_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalOutputBytes += Byte.BYTES;
+    increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
 
     return udf;
   }
@@ -122,7 +119,7 @@ private UserDefinedFunction shortToOrderedBytesUDF() {
     }, DataTypes.BinaryType).withName("SHORT_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalOutputBytes += Short.BYTES;
+    increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
 
     return udf;
   }
@@ -138,7 +135,7 @@ private UserDefinedFunction intToOrderedBytesUDF() {
     }, DataTypes.BinaryType).withName("INT_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalOutputBytes += Integer.BYTES;
+    increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
 
     return udf;
   }
@@ -154,7 +151,7 @@ private UserDefinedFunction longToOrderedBytesUDF() {
     }, DataTypes.BinaryType).withName("LONG_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalOutputBytes += Long.BYTES;
+    increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
 
     return udf;
   }
@@ -170,7 +167,7 @@ private UserDefinedFunction floatToOrderedBytesUDF() {
     }, DataTypes.BinaryType).withName("FLOAT_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalOutputBytes += Float.BYTES;
+    increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
 
     return udf;
   }
@@ -186,7 +183,7 @@ private UserDefinedFunction doubleToOrderedBytesUDF() {
     }, DataTypes.BinaryType).withName("DOUBLE_ORDERED_BYTES");
 
     this.inputCol++;
-    this.totalOutputBytes += Double.BYTES;
+    increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
 
     return udf;
   }
@@ -202,7 +199,7 @@ private UserDefinedFunction stringToOrderedBytesUDF() {
           .withName("STRING-LEXICAL-BYTES");
 
     this.inputCol++;
-    this.totalOutputBytes += varTypeSize;
+    increaseOutputSize(varTypeSize);
 
     return udf;
   }
@@ -215,7 +212,7 @@ private UserDefinedFunction bytesTruncateUDF() {
         .withName("BYTE-TRUNCATE");
 
     this.inputCol++;
-    this.totalOutputBytes += varTypeSize;
+    increaseOutputSize(varTypeSize);
 
     return udf;
   }
@@ -230,7 +227,7 @@ private UserDefinedFunction booleanToOrderedBytesUDF() {
         .withName("BOOLEAN-LEXICAL-BYTES");
 
     this.inputCol++;
-    this.totalOutputBytes += ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE;
+    increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
     return udf;
   }
 
@@ -272,4 +269,11 @@ Column sortedLexicographically(Column column, DataType type) {
               column, type));
     }
   }
+
+  private void increaseOutputSize(int bytes) {
+    totalOutputBytes += bytes;
+    if (totalOutputBytes > maxOutputSize) {
+      totalOutputBytes = maxOutputSize;
+    }
+  }
 }

From e5fdd4b1afe19e5abb7d9d24501aebc4f5fa3066 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 19 Apr 2022 14:48:41 -0500
Subject: [PATCH 27/30] Reviewer Feedback

---
 .../spark/actions/SparkZOrderStrategy.java    |  2 +-
 .../iceberg/spark/actions/SparkZOrderUDF.java |  3 +--
 .../spark/data/SparkParquetWriters.java       | 22 -------------------
 3 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
index 234401d58039..ecb00ad4118b 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
@@ -143,7 +143,7 @@ public String name() {
 
   @Override
   protected void validateOptions() {
-    // TODO implement ZOrder Strategy in API Module
+    // Ignore SortStrategy validation
     return;
   }
 
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
index 302d969053a8..dd19e1afff34 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
@@ -265,8 +265,7 @@ Column sortedLexicographically(Column column, DataType type) {
       return longToOrderedBytesUDF().apply(column.cast(DataTypes.LongType));
     } else {
       throw new IllegalArgumentException(
-          String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported",
-              column, type));
+          String.format("Cannot use column %s of type %s in ZOrdering, the type is unsupported", column, type));
     }
   }
 
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
index 845fe305e494..436927f49025 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
@@ -301,28 +301,6 @@ public void write(int repetitionLevel, byte[] bytes) {
     }
   }
 
-  private static PrimitiveWriter<UTF8String> uuids(ColumnDescriptor desc) {
-    return new UUIDWriter(desc);
-  }
-
-  private static class UUIDWriter extends PrimitiveWriter<UTF8String> {
-    private ByteBuffer buffer = ByteBuffer.allocate(16);
-
-    private UUIDWriter(ColumnDescriptor desc) {
-      super(desc);
-    }
-
-    @Override
-    public void write(int repetitionLevel, UTF8String string) {
-      UUID uuid = UUID.fromString(string.toString());
-      buffer.rewind();
-      buffer.putLong(uuid.getMostSignificantBits());
-      buffer.putLong(uuid.getLeastSignificantBits());
-      buffer.rewind();
-      column.writeBinary(repetitionLevel, Binary.fromReusedByteBuffer(buffer));
-    }
-  }
-
   private static class ArrayDataWriter<E> extends RepeatedWriter<ArrayData, E> {
     private final DataType elementType;
 

From f4a100da53bcf82b6340c0158c72ab0412682abf Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 19 Apr 2022 14:53:47 -0500
Subject: [PATCH 28/30] One more cleanup

---
 .../java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
index dd19e1afff34..c370008517e4 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
@@ -42,6 +42,7 @@
 import org.apache.spark.sql.types.ShortType;
 import org.apache.spark.sql.types.StringType;
 import org.apache.spark.sql.types.TimestampType;
+import scala.collection.JavaConverters;
 import scala.collection.Seq;
 
 class SparkZOrderUDF implements Serializable {
@@ -87,8 +88,7 @@ private ByteBuffer inputBuffer(int position, int size) {
   }
 
   byte[] interleaveBits(Seq<byte[]> scalaBinary) {
-    byte[][] columnsBinary = scala.collection.JavaConverters.seqAsJavaList(scalaBinary)
-        .toArray(inputHolder.get());
+    byte[][] columnsBinary = JavaConverters.seqAsJavaList(scalaBinary).toArray(inputHolder.get());
     return ZOrderByteUtils.interleaveBits(columnsBinary, totalOutputBytes, outputBuffer.get());
   }
 

From 49a9703b8f2112999ea0d45c9ac06df8b0f6eeb1 Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Tue, 19 Apr 2022 15:18:32 -0500
Subject: [PATCH 29/30] CheckStyle

---
 .../java/org/apache/iceberg/spark/data/SparkParquetWriters.java | 2 --
 1 file changed, 2 deletions(-)

diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
index 436927f49025..5e268d26ed9c 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
@@ -19,12 +19,10 @@
 
 package org.apache.iceberg.spark.data;
 
-import java.nio.ByteBuffer;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
-import java.util.UUID;
 import org.apache.iceberg.parquet.ParquetValueReaders.ReusableEntry;
 import org.apache.iceberg.parquet.ParquetValueWriter;
 import org.apache.iceberg.parquet.ParquetValueWriters;

From bec34e915e49336ceb2dd177b63cd62a93a43c8e Mon Sep 17 00:00:00 2001
From: Russell_Spitzer <rspitzer@apple.com>
Date: Wed, 20 Apr 2022 16:26:33 -0500
Subject: [PATCH 30/30] Reviewer Comments

---
 .../apache/iceberg/util/ZOrderByteUtils.java  |  1 -
 .../spark/actions/SparkZOrderStrategy.java    |  2 +-
 .../iceberg/spark/actions/SparkZOrderUDF.java | 32 ++++++++-----------
 3 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
index 399a8bd21ef1..8a1b419a3bb0 100644
--- a/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
+++ b/core/src/main/java/org/apache/iceberg/util/ZOrderByteUtils.java
@@ -46,7 +46,6 @@ public class ZOrderByteUtils {
   public static final int PRIMITIVE_BUFFER_SIZE = 8;
 
   private ZOrderByteUtils() {
-
   }
 
   static ByteBuffer allocatePrimitiveBuffer() {
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
index ecb00ad4118b..cdd47fe31372 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderStrategy.java
@@ -127,7 +127,7 @@ public SparkZOrderStrategy(Table table, SparkSession spark, List<String> zOrderC
 
     if (!partZOrderCols.isEmpty()) {
       LOG.warn("Cannot ZOrder on an Identity partition column as these values are constant within a partition " +
-                      "they will be removed from the ZOrder expression: {}", partZOrderCols);
+                      "and will be removed from the ZOrder expression: {}", partZOrderCols);
       zOrderColNames.removeAll(partZOrderCols);
       Preconditions.checkArgument(!zOrderColNames.isEmpty(),
           "Cannot perform ZOrdering, all columns provided were identity partition columns and cannot be used.");
diff --git a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
index c370008517e4..eea3689211e2 100644
--- a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
+++ b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/SparkZOrderUDF.java
@@ -188,6 +188,19 @@ private UserDefinedFunction doubleToOrderedBytesUDF() {
     return udf;
   }
 
+  private UserDefinedFunction booleanToOrderedBytesUDF() {
+    int position = inputCol;
+    UserDefinedFunction udf = functions.udf((Boolean value) -> {
+      ByteBuffer buffer = inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
+      buffer.put(0, (byte) (value ? -127 : 0));
+      return buffer.array();
+    }, DataTypes.BinaryType).withName("BOOLEAN-LEXICAL-BYTES");
+
+    this.inputCol++;
+    increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
+    return udf;
+  }
+
   private UserDefinedFunction stringToOrderedBytesUDF() {
     int position = inputCol;
     UserDefinedFunction udf = functions.udf((String value) ->
@@ -217,20 +230,6 @@ private UserDefinedFunction bytesTruncateUDF() {
     return udf;
   }
 
-  private UserDefinedFunction booleanToOrderedBytesUDF() {
-    int position = inputCol;
-    UserDefinedFunction udf = functions.udf((Boolean value) -> {
-      ByteBuffer buffer = inputBuffer(position, ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
-      buffer.put(0, (byte) (value ? -127 : 0));
-      return buffer.array();
-    }, DataTypes.BinaryType)
-        .withName("BOOLEAN-LEXICAL-BYTES");
-
-    this.inputCol++;
-    increaseOutputSize(ZOrderByteUtils.PRIMITIVE_BUFFER_SIZE);
-    return udf;
-  }
-
   private final UserDefinedFunction interleaveUDF =
       functions.udf((Seq<byte[]> arrayBinary) -> interleaveBits(arrayBinary), DataTypes.BinaryType)
           .withName("INTERLEAVE_BYTES");
@@ -270,9 +269,6 @@ Column sortedLexicographically(Column column, DataType type) {
   }
 
   private void increaseOutputSize(int bytes) {
-    totalOutputBytes += bytes;
-    if (totalOutputBytes > maxOutputSize) {
-      totalOutputBytes = maxOutputSize;
-    }
+    totalOutputBytes = Math.min(totalOutputBytes + bytes, maxOutputSize);
   }
 }