-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Perf: Add prefix compare for inlined compare and change use of inline_value to inline it to a u128 #7748
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Perf: Add prefix compare for inlined compare and change use of inline_value to inline it to a u128 #7748
Changes from 7 commits
5968c4f
061243c
59ede5b
e673331
b805dfe
5980297
6fb2f84
3428523
1b5a1da
8d4da6a
52c2fc3
c8c7038
7194e8d
1a858a1
442402b
96fd53a
9580de1
5f80dc7
039e38b
918a789
153cf85
d650fb3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,7 @@ use arrow_schema::{ArrowError, DataType}; | |
| use core::str; | ||
| use num::ToPrimitive; | ||
| use std::any::Any; | ||
| use std::cmp::Ordering; | ||
| use std::fmt::Debug; | ||
| use std::marker::PhantomData; | ||
| use std::sync::Arc; | ||
|
|
@@ -537,17 +538,46 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> { | |
| left_idx: usize, | ||
| right: &GenericByteViewArray<T>, | ||
| right_idx: usize, | ||
| ) -> std::cmp::Ordering { | ||
| ) -> Ordering { | ||
| let l_view = left.views().get_unchecked(left_idx); | ||
| let l_len = *l_view as u32; | ||
|
|
||
| let r_view = right.views().get_unchecked(right_idx); | ||
| let r_len = *r_view as u32; | ||
|
|
||
| if l_len <= 12 && r_len <= 12 { | ||
| let l_data = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len as usize) }; | ||
| let r_data = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len as usize) }; | ||
| return l_data.cmp(r_data); | ||
| // Directly load the 16-byte view as an u128 (little-endian) | ||
| let l_bits: u128 = unsafe { *left.views().get_unchecked(left_idx) }; | ||
| let r_bits: u128 = unsafe { *right.views().get_unchecked(right_idx) }; | ||
|
|
||
| // The lower 32 bits encode the length (little-endian), | ||
| // the upper 96 bits hold the actual data | ||
| let l_len = l_bits as u32; | ||
| let r_len = r_bits as u32; | ||
|
|
||
| // Remove the length bits, leaving only the data | ||
| let l_data = l_bits >> 32; | ||
| let r_data = r_bits >> 32; | ||
|
|
||
| // The data is stored in little-endian order. To compare lexicographically, | ||
| // convert to big-endian: | ||
| let l_be = l_data.swap_bytes(); | ||
| let r_be = r_data.swap_bytes(); | ||
|
|
||
| // Compare only the first min_len bytes | ||
| let min_len = l_len.min(r_len); | ||
| // We have all 12 bytes in the high bits, but only want the top min_len | ||
| let shift = (12 - min_len) * 8; | ||
| let l_partial = l_be >> shift; | ||
| let r_partial = r_be >> shift; | ||
| if l_partial < r_partial { | ||
| return Ordering::Less; | ||
| } else if l_partial > r_partial { | ||
| return Ordering::Greater; | ||
| } | ||
|
|
||
| // If the prefixes are equal, the shorter one is considered smaller | ||
| return l_len.cmp(&r_len); | ||
| } | ||
|
|
||
| // one of the string is larger than 12 bytes, | ||
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,7 @@ use arrow_buffer::bit_util::ceil; | |
| use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer}; | ||
| use arrow_schema::ArrowError; | ||
| use arrow_select::take::take; | ||
| use std::cmp::Ordering; | ||
| use std::ops::Not; | ||
|
|
||
| #[derive(Debug, Copy, Clone)] | ||
|
|
@@ -571,7 +572,7 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> { | |
| let r_view = unsafe { r.0.views().get_unchecked(r.1) }; | ||
| if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() { | ||
| // For eq case, we can directly compare the inlined bytes | ||
| return l_view.cmp(r_view).is_eq(); | ||
| return l_view.eq(r_view); | ||
zhuqi-lucas marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| let l_len = *l_view as u32; | ||
|
|
@@ -592,15 +593,41 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> { | |
|
|
||
| #[inline(always)] | ||
| fn is_lt(l: Self::Item, r: Self::Item) -> bool { | ||
| // If both arrays use only the inline buffer | ||
| if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() { | ||
| let l_view = unsafe { l.0.views().get_unchecked(l.1) }; | ||
| let r_view = unsafe { r.0.views().get_unchecked(r.1) }; | ||
| let l_len = *l_view as u32 as usize; | ||
| let r_len = *r_view as u32 as usize; | ||
| let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) }; | ||
| let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) }; | ||
| return l_bytes.cmp(r_bytes).is_lt(); | ||
| // Directly load the 16-byte view as an u128 (little-endian) | ||
| let l_bits: u128 = unsafe { *l.0.views().get_unchecked(l.1) }; | ||
| let r_bits: u128 = unsafe { *r.0.views().get_unchecked(r.1) }; | ||
|
|
||
| // The lower 32 bits encode the length (little-endian), | ||
| // the upper 96 bits hold the actual data | ||
| let l_len = l_bits as u32; | ||
| let r_len = r_bits as u32; | ||
|
|
||
| // Remove the length bits, leaving only the data | ||
| let l_data = l_bits >> 32; | ||
| let r_data = r_bits >> 32; | ||
|
|
||
| // The data is stored in little-endian order. To compare lexicographically, | ||
| // convert to big-endian: | ||
| let l_be = l_data.swap_bytes(); | ||
| let r_be = r_data.swap_bytes(); | ||
|
|
||
| // Compare only the first min_len bytes | ||
| let min_len = l_len.min(r_len); | ||
| // We have all 12 bytes in the high bits, but only want the top min_len | ||
| let shift = (12 - min_len) * 8; | ||
| let l_partial = l_be >> shift; | ||
| let r_partial = r_be >> shift; | ||
| if l_partial != r_partial { | ||
| return l_partial < r_partial; | ||
| } | ||
|
|
||
| // If the prefixes are equal, the shorter one is considered smaller | ||
| return l_len < r_len; | ||
| } | ||
|
|
||
| // Fallback to the generic, unchecked comparison for non-inline cases | ||
| // # Safety | ||
| // The index is within bounds as it is checked in value() | ||
| unsafe { GenericByteViewArray::compare_unchecked(l.0, l.1, r.0, r.1).is_lt() } | ||
|
|
@@ -642,17 +669,42 @@ pub fn compare_byte_view<T: ByteViewType>( | |
| left_idx: usize, | ||
| right: &GenericByteViewArray<T>, | ||
| right_idx: usize, | ||
| ) -> std::cmp::Ordering { | ||
| ) -> Ordering { | ||
| assert!(left_idx < left.len()); | ||
| assert!(right_idx < right.len()); | ||
| if left.data_buffers().is_empty() && right.data_buffers().is_empty() { | ||
| let l_view = unsafe { left.views().get_unchecked(left_idx) }; | ||
| let r_view = unsafe { right.views().get_unchecked(right_idx) }; | ||
| let l_len = *l_view as u32 as usize; | ||
| let r_len = *r_view as u32 as usize; | ||
| let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) }; | ||
| let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) }; | ||
| return l_bytes.cmp(r_bytes); | ||
| // Directly load the 16-byte view as an u128 (little-endian) | ||
| let l_bits: u128 = unsafe { *left.views().get_unchecked(left_idx) }; | ||
| let r_bits: u128 = unsafe { *right.views().get_unchecked(right_idx) }; | ||
|
|
||
| // The lower 32 bits encode the length (little-endian), | ||
| // the upper 96 bits hold the actual data | ||
| let l_len = l_bits as u32; | ||
| let r_len = r_bits as u32; | ||
|
|
||
| // Remove the length bits, leaving only the data | ||
| let l_data = l_bits >> 32; | ||
| let r_data = r_bits >> 32; | ||
|
|
||
| // The data is stored in little-endian order. To compare lexicographically, | ||
| // convert to big-endian: | ||
| let l_be = l_data.swap_bytes(); | ||
| let r_be = r_data.swap_bytes(); | ||
|
|
||
| // Compare only the first min_len bytes | ||
| let min_len = l_len.min(r_len); | ||
| // We have all 12 bytes in the high bits, but only want the top min_len | ||
| let shift = (12 - min_len) * 8; | ||
| let l_partial = l_be >> shift; | ||
| let r_partial = r_be >> shift; | ||
| if l_partial < r_partial { | ||
|
||
| return Ordering::Less; | ||
| } else if l_partial > r_partial { | ||
| return Ordering::Greater; | ||
| } | ||
|
|
||
| // If the prefixes are equal, the shorter one is considered smaller | ||
| return l_len.cmp(&r_len); | ||
| } | ||
| unsafe { GenericByteViewArray::compare_unchecked(left, left_idx, right, right_idx) } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It might be possible to OR the length back into the lower bits, which would then allow getting a result with a single u128 comparison. I think it would also be beneficial to extract this code block into a shared helper function, and add some unit tests for it. The generic code here might not be well convered by tests because of the fast path for inline buffers elsewhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you @jhorstmann for review, good suggestion, i will address it!