Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use arrow_schema::{ArrowError, DataType};
use core::str;
use num::ToPrimitive;
use std::any::Any;
use std::cmp::Ordering;
use std::fmt::Debug;
use std::marker::PhantomData;
use std::sync::Arc;
Expand Down Expand Up @@ -537,17 +538,37 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
left_idx: usize,
right: &GenericByteViewArray<T>,
right_idx: usize,
) -> std::cmp::Ordering {
) -> Ordering {
let l_view = left.views().get_unchecked(left_idx);
let l_len = *l_view as u32;

let r_view = right.views().get_unchecked(right_idx);
let r_len = *r_view as u32;

if l_len <= 12 && r_len <= 12 {
let l_data = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len as usize) };
let r_data = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len as usize) };
return l_data.cmp(r_data);
// Remove the length bits, leaving only the data
let l_data = *l_view >> 32;
let r_data = *r_view >> 32;

// The data is stored in little-endian order. To compare lexicographically,
// convert to big-endian:
let l_be = l_data.swap_bytes();
let r_be = r_data.swap_bytes();

// Compare only the first min_len bytes
let min_len = l_len.min(r_len);
// We have all 12 bytes in the high bits, but only want the top min_len
let shift = (12 - min_len) * 8;
let l_partial = l_be >> shift;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be possible to OR the length back into the lower bits, which would then allow getting a result with a single u128 comparison. I think it would also be beneficial to extract this code block into a shared helper function, and add some unit tests for it. The generic code here might not be well convered by tests because of the fast path for inline buffers elsewhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @jhorstmann for review, good suggestion, i will address it!

let r_partial = r_be >> shift;
if l_partial < r_partial {
return Ordering::Less;
} else if l_partial > r_partial {
return Ordering::Greater;
}

// If the prefixes are equal, the shorter one is considered smaller
return l_len.cmp(&r_len);
}

// one of the string is larger than 12 bytes,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can change this code below to use (l_view >> 32) as u32 as well (or ByteView if it generates the same code). It seems that is a bit faster for the prefix comparison:

lt scalar StringViewArray
                        time:   [34.533 ms 34.567 ms 34.601 ms]
                        change: [−11.030% −10.827% −10.620%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 4 outliers among 100 measurements (4.00%)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point @Dandandan , i will change to ByteView prefix.

Expand Down
84 changes: 68 additions & 16 deletions arrow-ord/src/cmp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use arrow_buffer::bit_util::ceil;
use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
use arrow_schema::ArrowError;
use arrow_select::take::take;
use std::cmp::Ordering;
use std::ops::Not;

#[derive(Debug, Copy, Clone)]
Expand Down Expand Up @@ -571,7 +572,7 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {
let r_view = unsafe { r.0.views().get_unchecked(r.1) };
if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
// For eq case, we can directly compare the inlined bytes
return l_view.cmp(r_view).is_eq();
return l_view.eq(r_view);
}

let l_len = *l_view as u32;
Expand All @@ -592,15 +593,41 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {

#[inline(always)]
fn is_lt(l: Self::Item, r: Self::Item) -> bool {
// If both arrays use only the inline buffer
if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
let l_view = unsafe { l.0.views().get_unchecked(l.1) };
let r_view = unsafe { r.0.views().get_unchecked(r.1) };
let l_len = *l_view as u32 as usize;
let r_len = *r_view as u32 as usize;
let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) };
let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) };
return l_bytes.cmp(r_bytes).is_lt();
// Directly load the 16-byte view as an u128 (little-endian)
let l_bits: u128 = unsafe { *l.0.views().get_unchecked(l.1) };
let r_bits: u128 = unsafe { *r.0.views().get_unchecked(r.1) };

// The lower 32 bits encode the length (little-endian),
// the upper 96 bits hold the actual data
let l_len = l_bits as u32;
let r_len = r_bits as u32;

// Remove the length bits, leaving only the data
let l_data = l_bits >> 32;
let r_data = r_bits >> 32;

// The data is stored in little-endian order. To compare lexicographically,
// convert to big-endian:
let l_be = l_data.swap_bytes();
let r_be = r_data.swap_bytes();

// Compare only the first min_len bytes
let min_len = l_len.min(r_len);
// We have all 12 bytes in the high bits, but only want the top min_len
let shift = (12 - min_len) * 8;
let l_partial = l_be >> shift;
let r_partial = r_be >> shift;
if l_partial != r_partial {
return l_partial < r_partial;
}

// If the prefixes are equal, the shorter one is considered smaller
return l_len < r_len;
}

// Fallback to the generic, unchecked comparison for non-inline cases
// # Safety
// The index is within bounds as it is checked in value()
unsafe { GenericByteViewArray::compare_unchecked(l.0, l.1, r.0, r.1).is_lt() }
Expand Down Expand Up @@ -642,17 +669,42 @@ pub fn compare_byte_view<T: ByteViewType>(
left_idx: usize,
right: &GenericByteViewArray<T>,
right_idx: usize,
) -> std::cmp::Ordering {
) -> Ordering {
assert!(left_idx < left.len());
assert!(right_idx < right.len());
if left.data_buffers().is_empty() && right.data_buffers().is_empty() {
let l_view = unsafe { left.views().get_unchecked(left_idx) };
let r_view = unsafe { right.views().get_unchecked(right_idx) };
let l_len = *l_view as u32 as usize;
let r_len = *r_view as u32 as usize;
let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) };
let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) };
return l_bytes.cmp(r_bytes);
// Directly load the 16-byte view as an u128 (little-endian)
let l_bits: u128 = unsafe { *left.views().get_unchecked(left_idx) };
let r_bits: u128 = unsafe { *right.views().get_unchecked(right_idx) };

// The lower 32 bits encode the length (little-endian),
// the upper 96 bits hold the actual data
let l_len = l_bits as u32;
let r_len = r_bits as u32;

// Remove the length bits, leaving only the data
let l_data = l_bits >> 32;
let r_data = r_bits >> 32;

// The data is stored in little-endian order. To compare lexicographically,
// convert to big-endian:
let l_be = l_data.swap_bytes();
let r_be = r_data.swap_bytes();

// Compare only the first min_len bytes
let min_len = l_len.min(r_len);
// We have all 12 bytes in the high bits, but only want the top min_len
let shift = (12 - min_len) * 8;
let l_partial = l_be >> shift;
let r_partial = r_be >> shift;
if l_partial < r_partial {
Copy link
Contributor

@Dandandan Dandandan Jun 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this can be written as l_partial.cmp(r_partial).then_with(|| { l_len.cmp(&r_len) })

return Ordering::Less;
} else if l_partial > r_partial {
return Ordering::Greater;
}

// If the prefixes are equal, the shorter one is considered smaller
return l_len.cmp(&r_len);
}
unsafe { GenericByteViewArray::compare_unchecked(left, left_idx, right, right_idx) }
}
Expand Down
Loading