-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Support Utf8View for bit_length kernel
#6671
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
507dcfa
67201b7
2ad7170
de5e33a
4ab6cfd
cdfc465
a3650ef
098cf3e
2a0c8b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,8 +19,9 @@ | |
|
|
||
| use arrow_array::*; | ||
| use arrow_array::{cast::AsArray, types::*}; | ||
| use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer}; | ||
| use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer}; | ||
| use arrow_schema::{ArrowError, DataType}; | ||
| use cast::as_string_array; | ||
| use std::sync::Arc; | ||
|
|
||
| fn length_impl<P: ArrowPrimitiveType>( | ||
|
|
@@ -115,6 +116,8 @@ pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> { | |
| /// * bit_length of null is null. | ||
| /// * bit_length is in number of bits | ||
| pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> { | ||
| println!("In Array bit_length()"); | ||
|
|
||
| if let Some(d) = array.as_any_dictionary_opt() { | ||
| let lengths = bit_length(d.values().as_ref())?; | ||
| return Ok(d.with_values(lengths)); | ||
|
|
@@ -137,6 +140,18 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> { | |
| let list = array.as_string::<i64>(); | ||
| Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls())) | ||
| } | ||
| DataType::Utf8View => { | ||
| let list = array.as_string_view(); | ||
| let bit_lengths = list | ||
| .views() | ||
| .iter() | ||
| .map(|view| (*view as i32) * 8) | ||
|
||
| .collect::<Vec<i32>>(); | ||
| Ok(Arc::new(Int32Array::new( | ||
| bit_lengths.into(), | ||
| array.nulls().cloned(), | ||
| ))) | ||
| } | ||
| DataType::Binary => { | ||
| let list = array.as_binary::<i32>(); | ||
| Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls())) | ||
|
|
@@ -462,6 +477,21 @@ mod tests { | |
| }) | ||
| } | ||
|
|
||
| #[test] | ||
| fn bit_length_test_utf8view() { | ||
| bit_length_cases() | ||
austin362667 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| .into_iter() | ||
| .for_each(|(input, len, expected)| { | ||
| let string_array = StringViewArray::from(input); | ||
| let result = bit_length(&string_array).unwrap(); | ||
| assert_eq!(len, result.len()); | ||
| let result = result.as_any().downcast_ref::<Int32Array>().unwrap(); | ||
| expected.iter().enumerate().for_each(|(i, value)| { | ||
| assert_eq!(*value, result.value(i)); | ||
| }); | ||
| }) | ||
| } | ||
|
|
||
| #[test] | ||
| fn bit_length_binary() { | ||
| let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"]; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.