apache · alamb · Jun 29, 2025 · Jun 23, 2025 · Jun 23, 2025 · Jun 23, 2025
diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs
@@ -27,6 +27,7 @@ use arrow_schema::{ArrowError, DataType};
 use core::str;
 use num::ToPrimitive;
 use std::any::Any;
+use std::cmp::Ordering;
 use std::fmt::Debug;
 use std::marker::PhantomData;
 use std::sync::Arc;
@@ -537,17 +538,46 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         left_idx: usize,
         right: &GenericByteViewArray<T>,
         right_idx: usize,
-    ) -> std::cmp::Ordering {
+    ) -> Ordering {
         let l_view = left.views().get_unchecked(left_idx);
         let l_len = *l_view as u32;
 
         let r_view = right.views().get_unchecked(right_idx);
         let r_len = *r_view as u32;
 
         if l_len <= 12 && r_len <= 12 {
-            let l_data = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len as usize) };
-            let r_data = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len as usize) };
-            return l_data.cmp(r_data);
+            // Directly load the 16-byte view as an u128 (little-endian)
+            let l_bits: u128 = unsafe { *left.views().get_unchecked(left_idx) };
+            let r_bits: u128 = unsafe { *right.views().get_unchecked(right_idx) };
+
+            // The lower 32 bits encode the length (little-endian),
+            // the upper 96 bits hold the actual data
+            let l_len = l_bits as u32;
+            let r_len = r_bits as u32;
+
+            // Remove the length bits, leaving only the data
+            let l_data = l_bits >> 32;
+            let r_data = r_bits >> 32;
+
+            // The data is stored in little-endian order. To compare lexicographically,
+            // convert to big-endian:
+            let l_be = l_data.swap_bytes();
+            let r_be = r_data.swap_bytes();
+
+            // Compare only the first min_len bytes
+            let min_len = l_len.min(r_len);
+            // We have all 12 bytes in the high bits, but only want the top min_len
+            let shift = (12 - min_len) * 8;
+            let l_partial = l_be >> shift;
+            let r_partial = r_be >> shift;
+            if l_partial < r_partial {
+                return Ordering::Less;
+            } else if l_partial > r_partial {
+                return Ordering::Greater;
+            }
+
+            // If the prefixes are equal, the shorter one is considered smaller
+            return l_len.cmp(&r_len);
         }
 
         // one of the string is larger than 12 bytes,

diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs
@@ -33,6 +33,7 @@ use arrow_buffer::bit_util::ceil;
 use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
 use arrow_schema::ArrowError;
 use arrow_select::take::take;
+use std::cmp::Ordering;
 use std::ops::Not;
 
 #[derive(Debug, Copy, Clone)]
@@ -571,7 +572,7 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {
         let r_view = unsafe { r.0.views().get_unchecked(r.1) };
         if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
             // For eq case, we can directly compare the inlined bytes
-            return l_view.cmp(r_view).is_eq();
+            return l_view.eq(r_view);
         }
 
         let l_len = *l_view as u32;
@@ -592,15 +593,41 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {
 
     #[inline(always)]
     fn is_lt(l: Self::Item, r: Self::Item) -> bool {
+        // If both arrays use only the inline buffer
         if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
-            let l_view = unsafe { l.0.views().get_unchecked(l.1) };
-            let r_view = unsafe { r.0.views().get_unchecked(r.1) };
-            let l_len = *l_view as u32 as usize;
-            let r_len = *r_view as u32 as usize;
-            let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) };
-            let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) };
-            return l_bytes.cmp(r_bytes).is_lt();
+            // Directly load the 16-byte view as an u128 (little-endian)
+            let l_bits: u128 = unsafe { *l.0.views().get_unchecked(l.1) };
+            let r_bits: u128 = unsafe { *r.0.views().get_unchecked(r.1) };
+
+            // The lower 32 bits encode the length (little-endian),
+            // the upper 96 bits hold the actual data
+            let l_len = l_bits as u32;
+            let r_len = r_bits as u32;
+
+            // Remove the length bits, leaving only the data
+            let l_data = l_bits >> 32;
+            let r_data = r_bits >> 32;
+
+            // The data is stored in little-endian order. To compare lexicographically,
+            // convert to big-endian:
+            let l_be = l_data.swap_bytes();
+            let r_be = r_data.swap_bytes();
+
+            // Compare only the first min_len bytes
+            let min_len = l_len.min(r_len);
+            // We have all 12 bytes in the high bits, but only want the top min_len
+            let shift = (12 - min_len) * 8;
+            let l_partial = l_be >> shift;
+            let r_partial = r_be >> shift;
+            if l_partial != r_partial {
+                return l_partial < r_partial;
+            }
+
+            // If the prefixes are equal, the shorter one is considered smaller
+            return l_len < r_len;
         }
+
+        // Fallback to the generic, unchecked comparison for non-inline cases
         // # Safety
         // The index is within bounds as it is checked in value()
         unsafe { GenericByteViewArray::compare_unchecked(l.0, l.1, r.0, r.1).is_lt() }
@@ -642,17 +669,42 @@ pub fn compare_byte_view<T: ByteViewType>(
     left_idx: usize,
     right: &GenericByteViewArray<T>,
     right_idx: usize,
-) -> std::cmp::Ordering {
+) -> Ordering {
     assert!(left_idx < left.len());
     assert!(right_idx < right.len());
     if left.data_buffers().is_empty() && right.data_buffers().is_empty() {
-        let l_view = unsafe { left.views().get_unchecked(left_idx) };
-        let r_view = unsafe { right.views().get_unchecked(right_idx) };
-        let l_len = *l_view as u32 as usize;
-        let r_len = *r_view as u32 as usize;
-        let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) };
-        let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) };
-        return l_bytes.cmp(r_bytes);
+        // Directly load the 16-byte view as an u128 (little-endian)
+        let l_bits: u128 = unsafe { *left.views().get_unchecked(left_idx) };
+        let r_bits: u128 = unsafe { *right.views().get_unchecked(right_idx) };
+
+        // The lower 32 bits encode the length (little-endian),
+        // the upper 96 bits hold the actual data
+        let l_len = l_bits as u32;
+        let r_len = r_bits as u32;
+
+        // Remove the length bits, leaving only the data
+        let l_data = l_bits >> 32;
+        let r_data = r_bits >> 32;
+
+        // The data is stored in little-endian order. To compare lexicographically,
+        // convert to big-endian:
+        let l_be = l_data.swap_bytes();
+        let r_be = r_data.swap_bytes();
+
+        // Compare only the first min_len bytes
+        let min_len = l_len.min(r_len);
+        // We have all 12 bytes in the high bits, but only want the top min_len
+        let shift = (12 - min_len) * 8;
+        let l_partial = l_be >> shift;
+        let r_partial = r_be >> shift;
+        if l_partial < r_partial {
+            return Ordering::Less;
+        } else if l_partial > r_partial {
+            return Ordering::Greater;
+        }
+
+        // If the prefixes are equal, the shorter one is considered smaller
+        return l_len.cmp(&r_len);
     }
     unsafe { GenericByteViewArray::compare_unchecked(left, left_idx, right, right_idx) }
 }