From f54df86455dba0ccf5979c3aa763ad53a869a796 Mon Sep 17 00:00:00 2001 From: Mustafa Akur Date: Fri, 27 Sep 2024 19:31:39 -0700 Subject: [PATCH 1/3] Add take_arrays util function --- arrow-select/src/take.rs | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index ed7179fd36ce..d0c67811a7f4 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -94,6 +94,64 @@ pub fn take( } } +/// Take elements by index from [Vec], creating a new [Vec] from those indexes. +/// +/// ```text +/// ┌────────┬────────┐ +/// │ │ │ ┌────────┐ ┌────────┬────────┐ +/// │ A │ 1 │ │ │ │ │ │ +/// ├────────┼────────┤ │ 0 │ │ A │ 1 │ +/// │ │ │ ├────────┤ ├────────┼────────┤ +/// │ D │ 4 │ │ │ │ │ │ +/// ├────────┼────────┤ │ 2 │ take_arrays(values,indices) │ B │ 2 │ +/// │ │ │ ├────────┤ ├────────┼────────┤ +/// │ B │ 2 │ │ │ ───────────────────────────► │ │ │ +/// ├────────┼────────┤ │ 3 │ │ C │ 3 │ +/// │ │ │ ├────────┤ ├────────┼────────┤ +/// │ C │ 3 │ │ │ │ │ │ +/// ├────────┼────────┤ │ 1 │ │ D │ 4 │ +/// │ │ │ └────────┘ └────────┼────────┘ +/// │ E │ 5 │ +/// └────────┴────────┘ +/// values arrays indices array result +/// ``` +/// +/// # Errors +/// This function errors whenever: +/// * An index cannot be casted to `usize` (typically 32 bit architectures) +/// * An index is out of bounds and `options` is set to check bounds. +/// +/// # Safety +/// +/// When `options` is not set to check bounds, taking indexes after `len` will panic. +/// +/// # Examples +/// ``` +/// # use std::sync::Arc; +/// # use arrow_array::{StringArray, UInt32Array, cast::AsArray}; +/// # use arrow_select::take::{take, take_arrays}; +/// let string_values = Arc::new(StringArray::from(vec!["zero", "one", "two"])); +/// let values = Arc::new(UInt32Array::from(vec![0, 1, 2])); +/// +/// // Take items at index 2, and 1: +/// let indices = UInt32Array::from(vec![2, 1]); +/// let taken_arrays = take_arrays(&[string_values, values], &indices, None).unwrap(); +/// let taken_string = taken_arrays[0].as_string::(); +/// assert_eq!(*taken_string, StringArray::from(vec!["two", "one"])); +/// let taken_values = taken_arrays[1].as_primitive(); +/// assert_eq!(*taken_values, UInt32Array::from(vec![2, 1])); +/// ``` +pub fn take_arrays( + arrays: &[ArrayRef], + indices: &dyn Array, + options: Option, +) -> Result, ArrowError> { + arrays + .iter() + .map(|array| take(array.as_ref(), indices, options.clone())) + .collect() +} + /// Verifies that the non-null values of `indices` are all `< len` fn check_bounds( len: usize, From 0d8eec681777239a2c5c9df3bbb3de326d5e2c0d Mon Sep 17 00:00:00 2001 From: Mustafa Akur Date: Fri, 27 Sep 2024 19:36:13 -0700 Subject: [PATCH 2/3] Update comments --- arrow-select/src/take.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index d0c67811a7f4..cc69e6983a1a 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -94,7 +94,8 @@ pub fn take( } } -/// Take elements by index from [Vec], creating a new [Vec] from those indexes. +/// For each [ArrayRef] in the [Vec], take elements by index and create a new +/// [Vec] from those indices. /// /// ```text /// ┌────────┬────────┐ From a1c244a964458a9bd07415e022a62387fde3b2b1 Mon Sep 17 00:00:00 2001 From: Mustafa Akur Date: Fri, 27 Sep 2024 19:48:19 -0700 Subject: [PATCH 3/3] Minor changes --- arrow-select/src/take.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index cc69e6983a1a..6d037fc41984 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -94,8 +94,8 @@ pub fn take( } } -/// For each [ArrayRef] in the [Vec], take elements by index and create a new -/// [Vec] from those indices. +/// For each [ArrayRef] in the [`Vec`], take elements by index and create a new +/// [`Vec`] from those indices. /// /// ```text /// ┌────────┬────────┐