apache · alamb · Jun 29, 2025 · Jun 23, 2025 · Jun 23, 2025 · Jun 23, 2025
diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs
@@ -27,6 +27,7 @@ use arrow_schema::{ArrowError, DataType};
 use core::str;
 use num::ToPrimitive;
 use std::any::Any;
+use std::cmp::Ordering;
 use std::fmt::Debug;
 use std::marker::PhantomData;
 use std::sync::Arc;
@@ -539,25 +540,30 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         left_idx: usize,
         right: &GenericByteViewArray<T>,
         right_idx: usize,
-    ) -> std::cmp::Ordering {
+    ) -> Ordering {
         let l_view = left.views().get_unchecked(left_idx);
-        let l_len = *l_view as u32;
+        let l_byte_view = ByteView::from(*l_view);
 
         let r_view = right.views().get_unchecked(right_idx);
-        let r_len = *r_view as u32;
+        let r_byte_view = ByteView::from(*r_view);
 
-        if l_len <= MAX_INLINE_VIEW_LEN && r_len <= MAX_INLINE_VIEW_LEN {
-            let l_data = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len as usize) };
-            let r_data = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len as usize) };
-            return l_data.cmp(r_data);
+        let l_len = l_byte_view.length;
+        let r_len = r_byte_view.length;
+
+        if l_len <= 12 && r_len <= 12 {
+            return Self::inline_key_fast(*l_view).cmp(&Self::inline_key_fast(*r_view));
         }
 
         // one of the string is larger than 12 bytes,
         // we then try to compare the inlined data first
-        let l_inlined_data = unsafe { GenericByteViewArray::<T>::inline_value(l_view, 4) };
-        let r_inlined_data = unsafe { GenericByteViewArray::<T>::inline_value(r_view, 4) };
-        if r_inlined_data != l_inlined_data {
-            return l_inlined_data.cmp(r_inlined_data);
+
+        // Note: In theory, ByteView is only used for string which is larger than 12 bytes,
+        // but we can still use it to get the inlined prefix for shorter strings.
+        // The prefix is always the first 4 bytes of the view, for both short and long strings.
+        let l_inlined_be = l_byte_view.prefix.swap_bytes();
+        let r_inlined_be = r_byte_view.prefix.swap_bytes();
+        if l_inlined_be != r_inlined_be {
+            return l_inlined_be.cmp(&r_inlined_be);
         }
 
         // unfortunately, we need to compare the full data
@@ -566,6 +572,46 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
 
         l_full_data.cmp(r_full_data)
     }
+
+    /// Builds a 128-bit composite key for an inline value:
+    /// - High 96 bits: the inline data in big-endian byte order (for correct lexicographical sorting)
+    /// - Low  32 bits: the length in big-endian byte order (so shorter strings are always numerically smaller)
+    ///
+    /// This function extracts the length and the 12-byte inline string data from the
+    /// raw little-endian u128 representation, converts them to big-endian ordering,
+    /// and packs them into a single u128 value suitable for fast comparisons.
+    ///
+    /// # Note
+    /// The input `raw` is assumed to be in little-endian format with the following layout:
+    /// - bytes 0..4: length (u32)
+    /// - bytes 4..16: inline string data (padded with zeros if less than 12 bytes)
+    ///
+    /// The output u128 key places the inline string data in the upper 96 bits (big-endian)
+    /// and the length in the lower 32 bits (big-endian).
+    #[inline(always)]
+    pub fn inline_key_fast(raw: u128) -> u128 {
+        // Convert the raw u128 (little-endian) into bytes for manipulation
+        let raw_bytes = raw.to_le_bytes();
+
+        // Extract the length (first 4 bytes), convert to big-endian u32, and promote to u128
+        let len_le = &raw_bytes[0..4];
+        let len_be = u32::from_le_bytes(len_le.try_into().unwrap()).to_be() as u128;
+
+        // Extract the inline string bytes (next 12 bytes), place them into the lower 12 bytes of a 16-byte array,
+        // padding the upper 4 bytes with zero to form a little-endian u128 value
+        let mut inline_bytes = [0u8; 16];
+        inline_bytes[4..16].copy_from_slice(&raw_bytes[4..16]);
+
+        // Convert to big-endian to ensure correct lexical ordering
+        let inline_u128 = u128::from_le_bytes(inline_bytes).to_be();
+
+        // Shift right by 32 bits to discard the zero padding (upper 4 bytes),
+        // so that the inline string occupies the high 96 bits
+        let inline_part = inline_u128 >> 32;
+
+        // Combine the inline string part (high 96 bits) and length (low 32 bits) into the final key
+        (inline_part << 32) | len_be
+    }
 }
 
 impl<T: ByteViewType + ?Sized> Debug for GenericByteViewArray<T> {
@@ -874,7 +920,8 @@ impl From<Vec<Option<String>>> for StringViewArray {
 #[cfg(test)]
 mod tests {
     use crate::builder::{BinaryViewBuilder, StringViewBuilder};
-    use crate::{Array, BinaryViewArray, StringViewArray};
+    use crate::types::BinaryViewType;
+    use crate::{Array, BinaryViewArray, GenericByteViewArray, StringViewArray};
     use arrow_buffer::{Buffer, ScalarBuffer};
     use arrow_data::ByteView;
 
@@ -1090,4 +1137,54 @@ mod tests {
         assert_eq!(array2, array2.clone());
         assert_eq!(array1, array2);
     }
+
+    #[test]
+    fn test_inline_key_fast_various_lengths() {
+        /// Helper to create a raw u128 value representing an inline ByteView
+        /// - `length`: number of meaningful bytes (<= 12)
+        /// - `data`: the actual inline data (prefix + padding)
+        fn make_raw_inline(length: u32, data: &[u8]) -> u128 {
+            assert!(length as usize <= 12, "Inline length must be ≤ 12");
+            assert!(data.len() == length as usize, "Data must match length");
+
+            let mut raw_bytes = [0u8; 16];
+            raw_bytes[0..4].copy_from_slice(&length.to_le_bytes()); // little-endian length
+            raw_bytes[4..(4 + data.len())].copy_from_slice(data); // inline data
+            u128::from_le_bytes(raw_bytes)
+        }
+
+        // Test multiple inline values of increasing length and content
+        let test_inputs: Vec<&[u8]> = vec![
+            b"a",
+            b"ab",
+            b"abc",
+            b"abcd",
+            b"abcde",
+            b"abcdef",
+            b"abcdefg",
+            b"abcdefgh",
+            b"abcdefghi",
+            b"abcdefghij",
+            b"abcdefghijk",
+            b"abcdefghijkl", // 12 bytes, max inline
+        ];
+
+        let mut previous_key = None;
+
+        for input in test_inputs {
+            let raw = make_raw_inline(input.len() as u32, input);
+            let key = GenericByteViewArray::<BinaryViewType>::inline_key_fast(raw);
+
+            // Validate that keys are monotonically increasing in lexicographic+length order
+            if let Some(prev) = previous_key {
+                assert!(
+                    prev < key,
+                    "Key for {:?} was not greater than previous key",
+                    input
+                );
+            }
+
+            previous_key = Some(key);
+        }
+    }
 }
diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs
@@ -33,6 +33,7 @@ use arrow_buffer::bit_util::ceil;
 use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
 use arrow_schema::ArrowError;
 use arrow_select::take::take;
+use std::cmp::Ordering;
 use std::ops::Not;
 
 #[derive(Debug, Copy, Clone)]
@@ -571,7 +572,7 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {
         let r_view = unsafe { r.0.views().get_unchecked(r.1) };
         if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
             // For eq case, we can directly compare the inlined bytes
-            return l_view.cmp(r_view).is_eq();
+            return l_view == r_view;
         }
 
         let l_len = *l_view as u32;
@@ -592,15 +593,15 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {
 
     #[inline(always)]
     fn is_lt(l: Self::Item, r: Self::Item) -> bool {
+        // If both arrays use only the inline buffer
         if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
             let l_view = unsafe { l.0.views().get_unchecked(l.1) };
             let r_view = unsafe { r.0.views().get_unchecked(r.1) };
-            let l_len = *l_view as u32 as usize;
-            let r_len = *r_view as u32 as usize;
-            let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) };
-            let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) };
-            return l_bytes.cmp(r_bytes).is_lt();
+            return GenericByteViewArray::<T>::inline_key_fast(*l_view)
+                < GenericByteViewArray::<T>::inline_key_fast(*r_view);
         }
+
+        // Fallback to the generic, unchecked comparison for non-inline cases
         // # Safety
         // The index is within bounds as it is checked in value()
         unsafe { GenericByteViewArray::compare_unchecked(l.0, l.1, r.0, r.1).is_lt() }
@@ -642,17 +643,14 @@ pub fn compare_byte_view<T: ByteViewType>(
     left_idx: usize,
     right: &GenericByteViewArray<T>,
     right_idx: usize,
-) -> std::cmp::Ordering {
+) -> Ordering {
     assert!(left_idx < left.len());
     assert!(right_idx < right.len());
     if left.data_buffers().is_empty() && right.data_buffers().is_empty() {
         let l_view = unsafe { left.views().get_unchecked(left_idx) };
         let r_view = unsafe { right.views().get_unchecked(right_idx) };
-        let l_len = *l_view as u32 as usize;
-        let r_len = *r_view as u32 as usize;
-        let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) };
-        let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) };
-        return l_bytes.cmp(r_bytes);
+        return GenericByteViewArray::<T>::inline_key_fast(*l_view)
+            .cmp(&GenericByteViewArray::<T>::inline_key_fast(*r_view));
     }
     unsafe { GenericByteViewArray::compare_unchecked(left, left_idx, right, right_idx) }
 }