From 752869899cf7f8ddcdc4fa3a2792004eb61c84dd Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 14 Aug 2025 12:46:18 -0400 Subject: [PATCH 01/21] Stash changes. --- .../functions/src/regex/regexpreplace.rs | 116 ++++++++++++++---- 1 file changed, 90 insertions(+), 26 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 3a83564ff11f..a35f3a17fa61 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -24,7 +24,9 @@ use arrow::array::{new_null_array, ArrayIter, AsArray}; use arrow::array::{Array, ArrayRef, OffsetSizeTrait}; use arrow::array::{ArrayAccessor, StringViewArray}; use arrow::datatypes::DataType; -use datafusion_common::cast::as_string_view_array; +use datafusion_common::cast::{ + as_large_string_array, as_string_array, as_string_view_array, +}; use datafusion_common::exec_err; use datafusion_common::plan_err; use datafusion_common::ScalarValue; @@ -95,13 +97,22 @@ impl Default for RegexpReplaceFunc { impl RegexpReplaceFunc { pub fn new() -> Self { use DataType::*; + use TypeSignature::*; Self { signature: Signature::one_of( vec![ - TypeSignature::Exact(vec![Utf8, Utf8, Utf8]), - TypeSignature::Exact(vec![Utf8View, Utf8, Utf8]), - TypeSignature::Exact(vec![Utf8, Utf8, Utf8, Utf8]), - TypeSignature::Exact(vec![Utf8View, Utf8, Utf8, Utf8]), + Uniform(3, vec![Utf8, Utf8View]), + Exact(vec![Utf8, Utf8View, Utf8]), + Exact(vec![Utf8, Utf8, Utf8View]), + Exact(vec![Utf8, Utf8View, Utf8View]), + Exact(vec![Utf8View, Utf8, Utf8View]), + Exact(vec![Utf8View, Utf8View, Utf8]), + Exact(vec![Utf8View, Utf8, Utf8]), + Exact(vec![LargeUtf8, Utf8, Utf8]), + Exact(vec![LargeUtf8, Utf8View, Utf8]), + Exact(vec![LargeUtf8, Utf8, Utf8View]), + Exact(vec![LargeUtf8, Utf8View, Utf8View]), + Uniform(4, vec![Utf8, Utf8View]), ], Volatility::Immutable, ), @@ -398,12 +409,37 @@ fn _regexp_replace_early_abort( /// Note: If the array is empty or the first argument is null, /// then calls the given early abort function. macro_rules! fetch_string_arg { - ($ARG:expr, $NAME:expr, $T:ident, $EARLY_ABORT:ident, $ARRAY_SIZE:expr) => {{ - let array = as_generic_string_array::<$T>($ARG)?; - if array.len() == 0 || array.is_null(0) { - return $EARLY_ABORT(array, $ARRAY_SIZE); - } else { - array.value(0) + ($ARG:expr, $NAME:expr, $EARLY_ABORT:ident, $ARRAY_SIZE:expr) => {{ + let string_array_type = ($ARG).data_type(); + match string_array_type { + DataType::Utf8 => { + let array = as_string_array($ARG)?; + if array.len() == 0 || array.is_null(0) { + return $EARLY_ABORT(array, $ARRAY_SIZE); + } else { + array.value(0) + } + } + DataType::LargeUtf8 => { + let array = as_large_string_array($ARG)?; + if array.len() == 0 || array.is_null(0) { + return $EARLY_ABORT(array, $ARRAY_SIZE); + } else { + array.value(0) + } + } + DataType::Utf8View => { + let array = as_string_view_array($ARG)?; + if array.len() == 0 || array.is_null(0) { + return $EARLY_ABORT(array, $ARRAY_SIZE); + } else { + array.value(0) + } + } + _ => unreachable!( + "Invalid data type for regexp_replace: {}", + string_array_type + ), } }}; } @@ -417,23 +453,17 @@ fn _regexp_replace_static_pattern_replace( args: &[ArrayRef], ) -> Result { let array_size = args[0].len(); - let pattern = fetch_string_arg!( - &args[1], - "pattern", - i32, - _regexp_replace_early_abort, - array_size - ); + let pattern = + fetch_string_arg!(&args[1], "pattern", _regexp_replace_early_abort, array_size); let replacement = fetch_string_arg!( &args[2], "replacement", - i32, _regexp_replace_early_abort, array_size ); let flags = match args.len() { 3 => None, - 4 => Some(fetch_string_arg!(&args[3], "flags", i32, _regexp_replace_early_abort, array_size)), + 4 => Some(fetch_string_arg!(&args[3], "flags", _regexp_replace_early_abort, array_size)), other => { return exec_err!( "regexp_replace was called with {other} arguments. It requires at least 3 and at most 4." @@ -640,7 +670,7 @@ mod tests { use super::*; macro_rules! static_pattern_regexp_replace { - ($name:ident, $T:ty, $O:ty) => { + ($name:ident, $T:ty, $U:ty, $O:ty) => { #[test] fn $name() { let values = vec!["abc", "acd", "abcd1234567890123", "123456789012abc"]; @@ -650,8 +680,8 @@ mod tests { vec!["afooc", "acd", "afoocd1234567890123", "123456789012afooc"]; let values = <$T>::from(values); - let patterns = StringArray::from(patterns); - let replacements = StringArray::from(replacement); + let patterns = <$U>::from(patterns); + let replacements = <$U>::from(replacement); let expected = <$T>::from(expected); let re = _regexp_replace_static_pattern_replace::<$O>(&[ @@ -666,9 +696,43 @@ mod tests { }; } - static_pattern_regexp_replace!(string_array, StringArray, i32); - static_pattern_regexp_replace!(string_view_array, StringViewArray, i32); - static_pattern_regexp_replace!(large_string_array, LargeStringArray, i64); + static_pattern_regexp_replace!( + string_array_string_arrays, + StringArray, + StringArray, + i32 + ); + static_pattern_regexp_replace!( + string_view_array_string_arrays, + StringViewArray, + StringArray, + i32 + ); + static_pattern_regexp_replace!( + large_string_array_string_arrays, + LargeStringArray, + StringArray, + i64 + ); + + static_pattern_regexp_replace!( + string_array_string_view_arrays, + StringArray, + StringViewArray, + i32 + ); + static_pattern_regexp_replace!( + string_view_array_string_view_arrays, + StringViewArray, + StringViewArray, + i32 + ); + static_pattern_regexp_replace!( + large_string_array_string_view_arrays, + LargeStringArray, + StringViewArray, + i64 + ); macro_rules! static_pattern_regexp_replace_with_flags { ($name:ident, $T:ty, $O: ty) => { From 802b7757a120cc50528a1510c985e8e6cf3fb222 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 14 Aug 2025 12:55:19 -0400 Subject: [PATCH 02/21] Signature cleanup, more test scenarios. --- .../functions/src/regex/regexpreplace.rs | 56 +++++++++++++------ 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index a35f3a17fa61..67935cee4d63 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -29,6 +29,7 @@ use datafusion_common::cast::{ }; use datafusion_common::exec_err; use datafusion_common::plan_err; +use datafusion_common::types::logical_string; use datafusion_common::ScalarValue; use datafusion_common::{ cast::as_generic_string_array, internal_err, DataFusionError, Result, @@ -37,6 +38,7 @@ use datafusion_expr::function::Hint; use datafusion_expr::ColumnarValue; use datafusion_expr::TypeSignature; use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; use datafusion_macros::user_doc; use regex::Regex; use std::any::Any; @@ -96,23 +98,22 @@ impl Default for RegexpReplaceFunc { impl RegexpReplaceFunc { pub fn new() -> Self { - use DataType::*; use TypeSignature::*; + use TypeSignatureClass::*; Self { signature: Signature::one_of( vec![ - Uniform(3, vec![Utf8, Utf8View]), - Exact(vec![Utf8, Utf8View, Utf8]), - Exact(vec![Utf8, Utf8, Utf8View]), - Exact(vec![Utf8, Utf8View, Utf8View]), - Exact(vec![Utf8View, Utf8, Utf8View]), - Exact(vec![Utf8View, Utf8View, Utf8]), - Exact(vec![Utf8View, Utf8, Utf8]), - Exact(vec![LargeUtf8, Utf8, Utf8]), - Exact(vec![LargeUtf8, Utf8View, Utf8]), - Exact(vec![LargeUtf8, Utf8, Utf8View]), - Exact(vec![LargeUtf8, Utf8View, Utf8View]), - Uniform(4, vec![Utf8, Utf8View]), + Coercible(vec![ + Coercion::new_exact(Native(logical_string())), + Coercion::new_exact(Native(logical_string())), + Coercion::new_exact(Native(logical_string())), + ]), + Coercible(vec![ + Coercion::new_exact(Native(logical_string())), + Coercion::new_exact(Native(logical_string())), + Coercion::new_exact(Native(logical_string())), + Coercion::new_exact(Native(logical_string())), + ]), ], Volatility::Immutable, ), @@ -136,13 +137,13 @@ impl ScalarUDFImpl for RegexpReplaceFunc { fn return_type(&self, arg_types: &[DataType]) -> Result { use DataType::*; Ok(match &arg_types[0] { - LargeUtf8 | LargeBinary => LargeUtf8, - Utf8 | Binary => Utf8, - Utf8View | BinaryView => Utf8View, + LargeUtf8 => LargeUtf8, + Utf8 => Utf8, + Utf8View => Utf8View, Null => Null, Dictionary(_, t) => match **t { - LargeUtf8 | LargeBinary => LargeUtf8, - Utf8 | Binary => Utf8, + LargeUtf8 => LargeUtf8, + Utf8 => Utf8, Null => Null, _ => { return plan_err!( @@ -734,6 +735,25 @@ mod tests { i64 ); + static_pattern_regexp_replace!( + string_array_large_string_arrays, + StringArray, + LargeStringArray, + i32 + ); + static_pattern_regexp_replace!( + string_view_array_large_string_arrays, + StringViewArray, + LargeStringArray, + i32 + ); + static_pattern_regexp_replace!( + large_string_array_large_string_arrays, + LargeStringArray, + LargeStringArray, + i64 + ); + macro_rules! static_pattern_regexp_replace_with_flags { ($name:ident, $T:ty, $O: ty) => { #[test] From b15898cf14407fda0df602af016f669635857613 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 14 Aug 2025 13:00:59 -0400 Subject: [PATCH 03/21] Minor test renaming. --- .../functions/src/regex/regexpreplace.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 67935cee4d63..d707b232da3c 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -698,57 +698,57 @@ mod tests { } static_pattern_regexp_replace!( - string_array_string_arrays, + string_array_string_args, StringArray, StringArray, i32 ); static_pattern_regexp_replace!( - string_view_array_string_arrays, + string_view_array_string_args, StringViewArray, StringArray, i32 ); static_pattern_regexp_replace!( - large_string_array_string_arrays, + large_string_array_string_args, LargeStringArray, StringArray, i64 ); static_pattern_regexp_replace!( - string_array_string_view_arrays, + string_array_string_view_args, StringArray, StringViewArray, i32 ); static_pattern_regexp_replace!( - string_view_array_string_view_arrays, + string_view_array_string_view_args, StringViewArray, StringViewArray, i32 ); static_pattern_regexp_replace!( - large_string_array_string_view_arrays, + large_string_array_string_view_args, LargeStringArray, StringViewArray, i64 ); static_pattern_regexp_replace!( - string_array_large_string_arrays, + string_array_large_string_args, StringArray, LargeStringArray, i32 ); static_pattern_regexp_replace!( - string_view_array_large_string_arrays, + string_view_array_large_string_args, StringViewArray, LargeStringArray, i32 ); static_pattern_regexp_replace!( - large_string_array_large_string_arrays, + large_string_array_large_string_args, LargeStringArray, LargeStringArray, i64 From 22f19d7cb3fb13d878622a3bf18d27679e0135e6 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 14 Aug 2025 13:05:23 -0400 Subject: [PATCH 04/21] Simplify signature. --- .../functions/src/regex/regexpreplace.rs | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index d707b232da3c..0ddcc8bb4318 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -29,7 +29,6 @@ use datafusion_common::cast::{ }; use datafusion_common::exec_err; use datafusion_common::plan_err; -use datafusion_common::types::logical_string; use datafusion_common::ScalarValue; use datafusion_common::{ cast::as_generic_string_array, internal_err, DataFusionError, Result, @@ -38,7 +37,6 @@ use datafusion_expr::function::Hint; use datafusion_expr::ColumnarValue; use datafusion_expr::TypeSignature; use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility}; -use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; use datafusion_macros::user_doc; use regex::Regex; use std::any::Any; @@ -98,23 +96,9 @@ impl Default for RegexpReplaceFunc { impl RegexpReplaceFunc { pub fn new() -> Self { - use TypeSignature::*; - use TypeSignatureClass::*; Self { signature: Signature::one_of( - vec![ - Coercible(vec![ - Coercion::new_exact(Native(logical_string())), - Coercion::new_exact(Native(logical_string())), - Coercion::new_exact(Native(logical_string())), - ]), - Coercible(vec![ - Coercion::new_exact(Native(logical_string())), - Coercion::new_exact(Native(logical_string())), - Coercion::new_exact(Native(logical_string())), - Coercion::new_exact(Native(logical_string())), - ]), - ], + vec![TypeSignature::String(3), TypeSignature::String(4)], Volatility::Immutable, ), } From 936f32973281c574ee419ee7c30934a8f7414df5 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 14 Aug 2025 13:16:45 -0400 Subject: [PATCH 05/21] Update tests. --- .../test_files/string/string_literal.slt | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/datafusion/sqllogictest/test_files/string/string_literal.slt b/datafusion/sqllogictest/test_files/string/string_literal.slt index 79b783f89a61..f602dbb54b08 100644 --- a/datafusion/sqllogictest/test_files/string/string_literal.slt +++ b/datafusion/sqllogictest/test_files/string/string_literal.slt @@ -303,6 +303,26 @@ SELECT regexp_replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'x ---- fooxx +query T +SELECT regexp_replace(arrow_cast('foobar', 'LargeUtf8'), 'bar', 'xx', 'gi') +---- +fooxx + +query T +SELECT regexp_replace(arrow_cast('foobar', 'Utf8View'), 'bar', 'xx', 'gi') +---- +fooxx + +query T +SELECT regexp_replace('foobar', arrow_cast('bar', 'LargeUtf8'), 'xx', 'gi') +---- +fooxx + +query T +SELECT regexp_replace('foobar', arrow_cast('bar', 'Utf8View'), 'xx', 'gi') +---- +fooxx + query T SELECT repeat('foo', 3) ---- From b74d9410c544fd85411b2fd00f2a28c22ea45c46 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 14 Aug 2025 15:19:06 -0400 Subject: [PATCH 06/21] Signature change for binary input support. --- .../functions/src/regex/regexpreplace.rs | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 0ddcc8bb4318..c195c1def620 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -29,6 +29,7 @@ use datafusion_common::cast::{ }; use datafusion_common::exec_err; use datafusion_common::plan_err; +use datafusion_common::types::{logical_binary, logical_string, NativeType}; use datafusion_common::ScalarValue; use datafusion_common::{ cast::as_generic_string_array, internal_err, DataFusionError, Result, @@ -37,6 +38,7 @@ use datafusion_expr::function::Hint; use datafusion_expr::ColumnarValue; use datafusion_expr::TypeSignature; use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; use datafusion_macros::user_doc; use regex::Regex; use std::any::Any; @@ -96,9 +98,31 @@ impl Default for RegexpReplaceFunc { impl RegexpReplaceFunc { pub fn new() -> Self { + use TypeSignature::*; + use TypeSignatureClass::*; Self { signature: Signature::one_of( - vec![TypeSignature::String(3), TypeSignature::String(4)], + vec![ + Coercible(vec![ + Coercion::new_implicit( + Native(logical_string()), + vec![Native(logical_string()), Native(logical_binary())], + NativeType::String, + ), + Coercion::new_exact(Native(logical_string())), + Coercion::new_exact(Native(logical_string())), + ]), + Coercible(vec![ + Coercion::new_implicit( + Native(logical_string()), + vec![Native(logical_string()), Native(logical_binary())], + NativeType::String, + ), + Coercion::new_exact(Native(logical_string())), + Coercion::new_exact(Native(logical_string())), + Coercion::new_exact(Native(logical_string())), + ]), + ], Volatility::Immutable, ), } From 4a5acf8e517afafc568aa821188a579a11304002 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 14 Aug 2025 15:20:46 -0400 Subject: [PATCH 07/21] Return type changes for binary. --- datafusion/functions/src/regex/regexpreplace.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index c195c1def620..8184e9255c98 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -145,13 +145,13 @@ impl ScalarUDFImpl for RegexpReplaceFunc { fn return_type(&self, arg_types: &[DataType]) -> Result { use DataType::*; Ok(match &arg_types[0] { - LargeUtf8 => LargeUtf8, - Utf8 => Utf8, - Utf8View => Utf8View, + LargeUtf8 | LargeBinary => LargeUtf8, + Utf8 | Binary => Utf8, + Utf8View | BinaryView => Utf8View, Null => Null, Dictionary(_, t) => match **t { - LargeUtf8 => LargeUtf8, - Utf8 => Utf8, + LargeUtf8 | LargeBinary => LargeUtf8, + Utf8 | Binary => Utf8, Null => Null, _ => { return plan_err!( From 926a6dbf328eb7b3390f611e0976b061897035a6 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 15 Aug 2025 10:39:32 -0400 Subject: [PATCH 08/21] Stash. --- datafusion/functions/src/regex/regexpreplace.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 63d0f34c081e..db8085cbd463 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -258,15 +258,16 @@ fn regex_replace_posix_groups(replacement: &str) -> String { /// # Ok(()) /// # } /// ``` -pub fn regexp_replace<'a, T: OffsetSizeTrait, V, B>( - string_array: V, - pattern_array: B, - replacement_array: B, +pub fn regexp_replace<'a, T: OffsetSizeTrait, U, V, W>( + string_array: U, + pattern_array: V, + replacement_array: W, flags: Option<&ArrayRef>, ) -> Result where + U: ArrayAccessor, V: ArrayAccessor, - B: ArrayAccessor, + W: ArrayAccessor, { // Default implementation for regexp_replace, assumes all args are arrays // and args is a sequence of 3 or 4 elements. @@ -634,7 +635,7 @@ pub fn specialize_regexp_replace( let string_array = args[0].as_string_view(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); - regexp_replace::( + regexp_replace::( string_array, pattern_array, replacement_array, @@ -645,7 +646,7 @@ pub fn specialize_regexp_replace( let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); - regexp_replace::( + regexp_replace::( string_array, pattern_array, replacement_array, @@ -656,7 +657,7 @@ pub fn specialize_regexp_replace( let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); - regexp_replace::( + regexp_replace::( string_array, pattern_array, replacement_array, From 94adedc233a81df1cbb6ecee611a949a9b9ef93a Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 15 Aug 2025 10:56:49 -0400 Subject: [PATCH 09/21] Stash. --- .../functions/src/regex/regexpreplace.rs | 55 ++++++++++++------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index db8085cbd463..e257b42290b1 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -258,16 +258,17 @@ fn regex_replace_posix_groups(replacement: &str) -> String { /// # Ok(()) /// # } /// ``` -pub fn regexp_replace<'a, T: OffsetSizeTrait, U, V, W>( +pub fn regexp_replace<'a, T: OffsetSizeTrait, U, V, W, X>( string_array: U, pattern_array: V, replacement_array: W, - flags: Option<&ArrayRef>, + flags_array: Option, ) -> Result where - U: ArrayAccessor, - V: ArrayAccessor, - W: ArrayAccessor, + U: ArrayAccessor, + V: ArrayAccessor, + W: ArrayAccessor, + X: ArrayAccessor, { // Default implementation for regexp_replace, assumes all args are arrays // and args is a sequence of 3 or 4 elements. @@ -281,7 +282,7 @@ where let pattern_array_iter = ArrayIter::new(pattern_array); let replacement_array_iter = ArrayIter::new(replacement_array); - match flags { + match flags_array { None => { let result_iter = string_array_iter .zip(pattern_array_iter) @@ -328,13 +329,13 @@ where } } } - Some(flags) => { - let flags_array = as_generic_string_array::(flags)?; + Some(flags_array) => { + let flags_array_iter = ArrayIter::new(flags_array); let result_iter = string_array_iter .zip(pattern_array_iter) .zip(replacement_array_iter) - .zip(flags_array.iter()) + .zip(flags_array_iter) .map(|(((string, pattern), replacement), flags)| { match (string, pattern, replacement, flags) { (Some(string), Some(pattern), Some(replacement), Some(flags)) => { @@ -377,7 +378,7 @@ where re.replace(string, replacement.as_str()) } })) - .transpose() + .transpose() } _ => Ok(None), } @@ -635,33 +636,45 @@ pub fn specialize_regexp_replace( let string_array = args[0].as_string_view(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); - regexp_replace::( + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( string_array, pattern_array, replacement_array, - args.get(3), + flags_array, ) } DataType::Utf8 => { let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); - regexp_replace::( + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( string_array, pattern_array, replacement_array, - args.get(3), + flags_array, ) } DataType::LargeUtf8 => { let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); - regexp_replace::( + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( string_array, pattern_array, replacement_array, - args.get(3), + flags_array, ) } other => { @@ -829,7 +842,7 @@ mod tests { Arc::new(patterns), Arc::new(replacements), ]) - .unwrap(); + .unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -846,7 +859,7 @@ mod tests { Arc::new(patterns), Arc::new(replacements), ]) - .unwrap(); + .unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -865,7 +878,7 @@ mod tests { Arc::new(replacements), Arc::new(flags), ]) - .unwrap(); + .unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -920,7 +933,7 @@ mod tests { Arc::new(patterns), Arc::new(replacements), ]) - .unwrap(); + .unwrap(); assert_eq!(re.as_ref(), &expected); assert_eq!(re.null_count(), 4); @@ -948,7 +961,7 @@ mod tests { Arc::new(patterns), Arc::new(replacements), ]) - .unwrap(); + .unwrap(); assert_eq!(re.as_ref(), &expected); assert_eq!(re.null_count(), 3); } From 67ec0ba2a939c5cc944652e28cb03b304ef1f887 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 15 Aug 2025 11:06:14 -0400 Subject: [PATCH 10/21] Stash. --- .../functions/src/regex/regexpreplace.rs | 368 +++++++++++++++++- 1 file changed, 364 insertions(+), 4 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index e257b42290b1..0c3ee59104fc 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -631,8 +631,8 @@ pub fn specialize_regexp_replace( .map(|arg| arg.to_array(inferred_length)) .collect::>>()?; - match args[0].data_type() { - DataType::Utf8View => { + match (args[0].data_type(), args[1].data_type(), args[2].data_type()) { + (DataType::Utf8View, DataType::Utf8, DataType::Utf8) => { let string_array = args[0].as_string_view(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); @@ -647,7 +647,7 @@ pub fn specialize_regexp_replace( flags_array, ) } - DataType::Utf8 => { + (DataType::Utf8, DataType::Utf8, DataType::Utf8) => { let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); @@ -662,7 +662,367 @@ pub fn specialize_regexp_replace( flags_array, ) } - DataType::LargeUtf8 => { + (DataType::LargeUtf8, DataType::Utf8, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8View, DataType::Utf8View, DataType::Utf8) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8View, DataType::LargeUtf8, DataType::Utf8) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8View, DataType::Utf8, DataType::Utf8View) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8View, DataType::Utf8View, DataType::Utf8View) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8View, DataType::LargeUtf8, DataType::Utf8View) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8View, DataType::Utf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8View, DataType::Utf8View, DataType::LargeUtf8) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8View, DataType::LargeUtf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8, DataType::Utf8View, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8, DataType::LargeUtf8, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8, DataType::Utf8, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8, DataType::Utf8View, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8, DataType::Utf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8, DataType::Utf8View, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8, DataType::LargeUtf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8View, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::LargeUtf8, DataType::LargeUtf8, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8View, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::LargeUtf8, DataType::LargeUtf8, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8View, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = match args.get(3) { + Some(flags) => { Some(as_generic_string_array::(flags)?) } + None => None + }; + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::LargeUtf8, DataType::LargeUtf8, DataType::LargeUtf8) => { let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); From a84588dcfbe4f9966acebc8539dbbe5ead0ec6e6 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 15 Aug 2025 13:04:33 -0400 Subject: [PATCH 11/21] Stash. --- .../functions/src/regex/regexpreplace.rs | 2331 +++++++++++++---- 1 file changed, 1798 insertions(+), 533 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 0c3ee59104fc..b609f06a2cd3 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -258,17 +258,17 @@ fn regex_replace_posix_groups(replacement: &str) -> String { /// # Ok(()) /// # } /// ``` -pub fn regexp_replace<'a, T: OffsetSizeTrait, U, V, W, X>( +pub fn regexp_replace_with_flags<'a, T: OffsetSizeTrait, U, V, W, X>( string_array: U, pattern_array: V, replacement_array: W, - flags_array: Option, + flags_array: X, ) -> Result where - U: ArrayAccessor, - V: ArrayAccessor, - W: ArrayAccessor, - X: ArrayAccessor, + U: ArrayAccessor, + V: ArrayAccessor, + W: ArrayAccessor, + X: ArrayAccessor, { // Default implementation for regexp_replace, assumes all args are arrays // and args is a sequence of 3 or 4 elements. @@ -281,138 +281,145 @@ where let string_array_iter = ArrayIter::new(string_array); let pattern_array_iter = ArrayIter::new(pattern_array); let replacement_array_iter = ArrayIter::new(replacement_array); + let flags_array_iter = ArrayIter::new(flags_array); + + let result_iter = string_array_iter + .zip(pattern_array_iter) + .zip(replacement_array_iter) + .zip(flags_array_iter) + .map(|(((string, pattern), replacement), flags)| { + match (string, pattern, replacement, flags) { + (Some(string), Some(pattern), Some(replacement), Some(flags)) => { + let replacement = regex_replace_posix_groups(replacement); + + // format flags into rust pattern + let (pattern, replace_all) = if flags == "g" { + (pattern.to_string(), true) + } else if flags.contains('g') { + ( + format!( + "(?{}){}", + flags.to_string().replace('g', ""), + pattern + ), + true, + ) + } else { + (format!("(?{flags}){pattern}"), false) + }; - match flags_array { - None => { - let result_iter = string_array_iter - .zip(pattern_array_iter) - .zip(replacement_array_iter) - .map(|((string, pattern), replacement)| { - match (string, pattern, replacement) { - (Some(string), Some(pattern), Some(replacement)) => { - let replacement = regex_replace_posix_groups(replacement); - // if patterns hashmap already has regexp then use else create and return - let re = match patterns.get(pattern) { - Some(re) => Ok(re), - None => match Regex::new(pattern) { - Ok(re) => { - patterns.insert(pattern.to_string(), re); - Ok(patterns.get(pattern).unwrap()) - } - Err(err) => { - Err(DataFusionError::External(Box::new(err))) - } - }, - }; - - Some(re.map(|re| re.replace(string, replacement.as_str()))) - .transpose() - } - _ => Ok(None), - } - }); + // if patterns hashmap already has regexp then use else create and return + let re = match patterns.get(&pattern) { + Some(re) => Ok(re), + None => match Regex::new(pattern.as_str()) { + Ok(re) => { + patterns.insert(pattern.clone(), re); + Ok(patterns.get(&pattern).unwrap()) + } + Err(err) => Err(DataFusionError::External(Box::new(err))), + }, + }; - match datatype { - DataType::Utf8 | DataType::LargeUtf8 => { - let result = - result_iter.collect::>>()?; - Ok(Arc::new(result) as ArrayRef) - } - DataType::Utf8View => { - let result = result_iter.collect::>()?; - Ok(Arc::new(result) as ArrayRef) - } - other => { - exec_err!( - "Unsupported data type {other:?} for function regex_replace" - ) + Some(re.map(|re| { + if replace_all { + re.replace_all(string, replacement.as_str()) + } else { + re.replace(string, replacement.as_str()) + } + })) + .transpose() } + _ => Ok(None), } + }); + + match datatype { + DataType::Utf8 | DataType::LargeUtf8 => { + let result = result_iter.collect::>>()?; + Ok(Arc::new(result) as ArrayRef) } - Some(flags_array) => { - let flags_array_iter = ArrayIter::new(flags_array); - - let result_iter = string_array_iter - .zip(pattern_array_iter) - .zip(replacement_array_iter) - .zip(flags_array_iter) - .map(|(((string, pattern), replacement), flags)| { - match (string, pattern, replacement, flags) { - (Some(string), Some(pattern), Some(replacement), Some(flags)) => { - let replacement = regex_replace_posix_groups(replacement); - - // format flags into rust pattern - let (pattern, replace_all) = if flags == "g" { - (pattern.to_string(), true) - } else if flags.contains('g') { - ( - format!( - "(?{}){}", - flags.to_string().replace('g', ""), - pattern - ), - true, - ) - } else { - (format!("(?{flags}){pattern}"), false) - }; - - // if patterns hashmap already has regexp then use else create and return - let re = match patterns.get(&pattern) { - Some(re) => Ok(re), - None => match Regex::new(pattern.as_str()) { - Ok(re) => { - patterns.insert(pattern.clone(), re); - Ok(patterns.get(&pattern).unwrap()) - } - Err(err) => { - Err(DataFusionError::External(Box::new(err))) - } - }, - }; - - Some(re.map(|re| { - if replace_all { - re.replace_all(string, replacement.as_str()) - } else { - re.replace(string, replacement.as_str()) - } - })) - .transpose() - } - _ => Ok(None), - } - }); + DataType::Utf8View => { + let result = result_iter.collect::>()?; + Ok(Arc::new(result) as ArrayRef) + } + other => { + exec_err!("Unsupported data type {other:?} for function regex_replace") + } + } +} - match datatype { - DataType::Utf8 | DataType::LargeUtf8 => { - let result = - result_iter.collect::>>()?; - Ok(Arc::new(result) as ArrayRef) - } - DataType::Utf8View => { - let result = result_iter.collect::>()?; - Ok(Arc::new(result) as ArrayRef) - } - other => { - exec_err!( - "Unsupported data type {other:?} for function regex_replace" - ) +pub fn regexp_replace_without_flags<'a, T: OffsetSizeTrait, U, V, W>( + string_array: U, + pattern_array: V, + replacement_array: W, +) -> Result +where + U: ArrayAccessor, + V: ArrayAccessor, + W: ArrayAccessor, +{ + // Default implementation for regexp_replace, assumes all args are arrays + // and args is a sequence of 3 or 4 elements. + + // creating Regex is expensive so create hashmap for memoization + let mut patterns: HashMap = HashMap::new(); + + let datatype = string_array.data_type().to_owned(); + + let string_array_iter = ArrayIter::new(string_array); + let pattern_array_iter = ArrayIter::new(pattern_array); + let replacement_array_iter = ArrayIter::new(replacement_array); + + let result_iter = string_array_iter + .zip(pattern_array_iter) + .zip(replacement_array_iter) + .map(|((string, pattern), replacement)| { + match (string, pattern, replacement) { + (Some(string), Some(pattern), Some(replacement)) => { + let replacement = regex_replace_posix_groups(replacement); + // if patterns hashmap already has regexp then use else create and return + let re = match patterns.get(pattern) { + Some(re) => Ok(re), + None => match Regex::new(pattern) { + Ok(re) => { + patterns.insert(pattern.to_string(), re); + Ok(patterns.get(pattern).unwrap()) + } + Err(err) => Err(DataFusionError::External(Box::new(err))), + }, + }; + + Some(re.map(|re| re.replace(string, replacement.as_str()))) + .transpose() } + _ => Ok(None), } + }); + + match datatype { + DataType::Utf8 | DataType::LargeUtf8 => { + let result = result_iter.collect::>>()?; + Ok(Arc::new(result) as ArrayRef) + } + DataType::Utf8View => { + let result = result_iter.collect::>()?; + Ok(Arc::new(result) as ArrayRef) + } + other => { + exec_err!("Unsupported data type {other:?} for function regex_replace") } } } fn _regexp_replace_early_abort( - input_array: T, + _input_array: T, sz: usize, ) -> Result { // Mimicking the existing behavior of regexp_replace, if any of the scalar arguments // are actually null, then the result will be an array of the same size as the first argument with all nulls. // // Also acts like an early abort mechanism when the input array is empty. - Ok(new_null_array(input_array.data_type(), sz)) + Ok(new_null_array(&DataType::Utf8, sz)) } /// Get the first argument from the given string array. @@ -631,416 +638,1674 @@ pub fn specialize_regexp_replace( .map(|arg| arg.to_array(inferred_length)) .collect::>>()?; - match (args[0].data_type(), args[1].data_type(), args[2].data_type()) { - (DataType::Utf8View, DataType::Utf8, DataType::Utf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8, DataType::Utf8, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8View, DataType::Utf8View, DataType::Utf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8View, DataType::LargeUtf8, DataType::Utf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8View, DataType::Utf8, DataType::Utf8View) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8View, DataType::Utf8View, DataType::Utf8View) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8View, DataType::LargeUtf8, DataType::Utf8View) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8View, DataType::Utf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8View, DataType::Utf8View, DataType::LargeUtf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8View, DataType::LargeUtf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8, DataType::Utf8View, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8, DataType::LargeUtf8, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8, DataType::Utf8, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8, DataType::Utf8View, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8, DataType::Utf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8, DataType::Utf8View, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::Utf8, DataType::LargeUtf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8View, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::LargeUtf8, DataType::LargeUtf8, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8View, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::LargeUtf8, DataType::LargeUtf8, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8View, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - (DataType::LargeUtf8, DataType::LargeUtf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = match args.get(3) { - Some(flags) => { Some(as_generic_string_array::(flags)?) } - None => None - }; - regexp_replace::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) + if args.get(3).is_none() { + match ( + args[0].data_type(), + args[1].data_type(), + args[2].data_type(), + ) { + (DataType::Utf8View, DataType::Utf8, DataType::Utf8) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8, DataType::Utf8, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8View, DataType::Utf8View, DataType::Utf8) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8View, DataType::LargeUtf8, DataType::Utf8) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8View, DataType::Utf8, DataType::Utf8View) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8View, DataType::Utf8View, DataType::Utf8View) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8View, DataType::LargeUtf8, DataType::Utf8View) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8View, DataType::Utf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8View, DataType::Utf8View, DataType::LargeUtf8) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8View, DataType::LargeUtf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8, DataType::Utf8View, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8, DataType::LargeUtf8, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8, DataType::Utf8, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8, DataType::Utf8View, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8, DataType::Utf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8, DataType::Utf8View, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::Utf8, DataType::LargeUtf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8View, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::LargeUtf8, DataType::LargeUtf8, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8View, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::LargeUtf8, DataType::LargeUtf8, DataType::Utf8View) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::LargeUtf8, DataType::Utf8View, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + (DataType::LargeUtf8, DataType::LargeUtf8, DataType::LargeUtf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + regexp_replace_without_flags::( + string_array, + pattern_array, + replacement_array, + ) + } + other => { + exec_err!( + "Unsupported data type {other:?} for function regex_replace" + ) + } } - other => { - exec_err!( - "Unsupported data type {other:?} for function regex_replace" - ) + } else { + match ( + args[0].data_type(), + args[1].data_type(), + args[2].data_type(), + args[3].data_type(), + ) { + ( + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + (DataType::Utf8, DataType::Utf8, DataType::Utf8, DataType::Utf8) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + // Cases with Utf8View as 4th argument (flags) + ( + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8, + DataType::Utf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8View, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string_view(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + // Cases with LargeUtf8 as 4th argument (flags) + ( + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8, + DataType::Utf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8, + DataType::Utf8View, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8View, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::LargeUtf8, + DataType::Utf8View, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::Utf8View, + DataType::LargeUtf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8View, + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8, + DataType::Utf8View, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8View, + DataType::Utf8View, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::LargeUtf8, + DataType::Utf8View, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::Utf8View, + DataType::LargeUtf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::Utf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8, + DataType::Utf8View, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8View, + DataType::Utf8View, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::Utf8View, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::Utf8View, + DataType::LargeUtf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args[3].as_string::(); + regexp_replace_with_flags::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + other => { + exec_err!( + "Unsupported data type {other:?} for function regex_replace" + ) + } } } } @@ -1202,7 +2467,7 @@ mod tests { Arc::new(patterns), Arc::new(replacements), ]) - .unwrap(); + .unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -1219,7 +2484,7 @@ mod tests { Arc::new(patterns), Arc::new(replacements), ]) - .unwrap(); + .unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -1238,7 +2503,7 @@ mod tests { Arc::new(replacements), Arc::new(flags), ]) - .unwrap(); + .unwrap(); assert_eq!(re.as_ref(), &expected); } @@ -1293,7 +2558,7 @@ mod tests { Arc::new(patterns), Arc::new(replacements), ]) - .unwrap(); + .unwrap(); assert_eq!(re.as_ref(), &expected); assert_eq!(re.null_count(), 4); @@ -1321,7 +2586,7 @@ mod tests { Arc::new(patterns), Arc::new(replacements), ]) - .unwrap(); + .unwrap(); assert_eq!(re.as_ref(), &expected); assert_eq!(re.null_count(), 3); } From 3c9fa9f22fd852fc93a6f34b991f71118c255720 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 15 Aug 2025 14:35:17 -0400 Subject: [PATCH 12/21] Fix regx bench. --- Cargo.lock | 2 +- datafusion/functions/benches/regx.rs | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 49ea25978745..87170ba24e50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2465,7 +2465,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "49.0.0" +version = "49.0.1" dependencies = [ "arrow", "datafusion-common", diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs index cd5d98700676..fb8d0d289d48 100644 --- a/datafusion/functions/benches/regx.rs +++ b/datafusion/functions/benches/regx.rs @@ -26,7 +26,8 @@ use datafusion_functions::regex::regexpcount::regexp_count_func; use datafusion_functions::regex::regexpinstr::regexp_instr_func; use datafusion_functions::regex::regexplike::regexp_like; use datafusion_functions::regex::regexpmatch::regexp_match; -use datafusion_functions::regex::regexpreplace::regexp_replace; +use datafusion_functions::regex::regexpreplace::regexp_replace_with_flags; +use datafusion_functions::regex::regexpreplace::regexp_replace_without_flags; use rand::distr::Alphanumeric; use rand::prelude::IndexedRandom; use rand::rngs::ThreadRng; @@ -267,11 +268,11 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - regexp_replace::( + regexp_replace_with_flags::( data.as_string::(), regex.as_string::(), replacement.as_string::(), - Some(&flags), + flags.as_string::(), ) .expect("regexp_replace should work on valid values"), ) @@ -282,19 +283,18 @@ fn criterion_benchmark(c: &mut Criterion) { let mut rng = rand::rng(); let data = cast(&data(&mut rng), &DataType::Utf8View).unwrap(); let regex = cast(®ex(&mut rng), &DataType::Utf8View).unwrap(); - // flags are not allowed to be utf8view according to the function - let flags = Arc::new(flags(&mut rng)) as ArrayRef; + let flags = cast(&crate::regex(&mut rng), &DataType::Utf8View).unwrap(); let replacement = Arc::new(StringViewArray::from_iter_values(iter::repeat_n( "XX", 1000, ))); b.iter(|| { black_box( - regexp_replace::( + regexp_replace_with_flags::( data.as_string_view(), regex.as_string_view(), - &replacement, - Some(&flags), + &*replacement, + flags.as_string_view(), ) .expect("regexp_replace should work on valid values"), ) From c29734031f828c470ac36ae6716f7963c78b7eb2 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 15 Aug 2025 14:51:00 -0400 Subject: [PATCH 13/21] Clippy. --- datafusion/functions/benches/regx.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs index fb8d0d289d48..7d8acf04bbe7 100644 --- a/datafusion/functions/benches/regx.rs +++ b/datafusion/functions/benches/regx.rs @@ -27,7 +27,6 @@ use datafusion_functions::regex::regexpinstr::regexp_instr_func; use datafusion_functions::regex::regexplike::regexp_like; use datafusion_functions::regex::regexpmatch::regexp_match; use datafusion_functions::regex::regexpreplace::regexp_replace_with_flags; -use datafusion_functions::regex::regexpreplace::regexp_replace_without_flags; use rand::distr::Alphanumeric; use rand::prelude::IndexedRandom; use rand::rngs::ThreadRng; From 1b23e2140f6b40b7a2e91f08c6b53a0208bf652e Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Fri, 15 Aug 2025 16:19:26 -0400 Subject: [PATCH 14/21] Fix bench regx. --- datafusion/functions/benches/regx.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs index 7d8acf04bbe7..fd5ed6ef2c4c 100644 --- a/datafusion/functions/benches/regx.rs +++ b/datafusion/functions/benches/regx.rs @@ -282,7 +282,7 @@ fn criterion_benchmark(c: &mut Criterion) { let mut rng = rand::rng(); let data = cast(&data(&mut rng), &DataType::Utf8View).unwrap(); let regex = cast(®ex(&mut rng), &DataType::Utf8View).unwrap(); - let flags = cast(&crate::regex(&mut rng), &DataType::Utf8View).unwrap(); + let flags = cast(&flags(&mut rng), &DataType::Utf8View).unwrap(); let replacement = Arc::new(StringViewArray::from_iter_values(iter::repeat_n( "XX", 1000, ))); From a0c7b18c31b8f712fbfcba5dd8f0da12f916ee51 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 20 Aug 2025 12:05:38 -0400 Subject: [PATCH 15/21] Refactor signature. I need to remove the match arms that aren't used anymore, update the .slt test for string_view.slt, and understand why String(3) and String(4) is not equivalent to this. --- .../functions/src/regex/regexpreplace.rs | 33 ++++++------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index b609f06a2cd3..ce8a923d06d3 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -29,7 +29,6 @@ use datafusion_common::cast::{ }; use datafusion_common::exec_err; use datafusion_common::plan_err; -use datafusion_common::types::{logical_binary, logical_string, NativeType}; use datafusion_common::ScalarValue; use datafusion_common::{ cast::as_generic_string_array, internal_err, DataFusionError, Result, @@ -38,7 +37,7 @@ use datafusion_expr::function::Hint; use datafusion_expr::ColumnarValue; use datafusion_expr::TypeSignature; use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility}; -use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; +use datafusion_expr_common::signature::TypeSignatureClass; use datafusion_macros::user_doc; use regex::Regex; use std::any::Any; @@ -98,30 +97,18 @@ impl Default for RegexpReplaceFunc { impl RegexpReplaceFunc { pub fn new() -> Self { + use DataType::*; use TypeSignature::*; use TypeSignatureClass::*; Self { signature: Signature::one_of( vec![ - Coercible(vec![ - Coercion::new_implicit( - Native(logical_string()), - vec![Native(logical_string()), Native(logical_binary())], - NativeType::String, - ), - Coercion::new_exact(Native(logical_string())), - Coercion::new_exact(Native(logical_string())), - ]), - Coercible(vec![ - Coercion::new_implicit( - Native(logical_string()), - vec![Native(logical_string()), Native(logical_binary())], - NativeType::String, - ), - Coercion::new_exact(Native(logical_string())), - Coercion::new_exact(Native(logical_string())), - Coercion::new_exact(Native(logical_string())), - ]), + Exact(vec![Utf8View, Utf8View, Utf8View]), + Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]), + Exact(vec![Utf8, Utf8, Utf8]), + Exact(vec![Utf8View, Utf8View, Utf8View, Utf8View]), + Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8, LargeUtf8]), + Exact(vec![Utf8, Utf8, Utf8, Utf8]), ], Volatility::Immutable, ), @@ -412,14 +399,14 @@ where } fn _regexp_replace_early_abort( - _input_array: T, + input_array: T, sz: usize, ) -> Result { // Mimicking the existing behavior of regexp_replace, if any of the scalar arguments // are actually null, then the result will be an array of the same size as the first argument with all nulls. // // Also acts like an early abort mechanism when the input array is empty. - Ok(new_null_array(&DataType::Utf8, sz)) + Ok(new_null_array(input_array.data_type(), sz)) } /// Get the first argument from the given string array. From 92392823334fb669bc370f25d40dbdff145d8304 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 20 Aug 2025 12:17:36 -0400 Subject: [PATCH 16/21] Remove unnecessary match arms. --- .../functions/src/regex/regexpreplace.rs | 1578 +---------------- 1 file changed, 4 insertions(+), 1574 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index ce8a923d06d3..63bcfeedb961 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -37,7 +37,6 @@ use datafusion_expr::function::Hint; use datafusion_expr::ColumnarValue; use datafusion_expr::TypeSignature; use datafusion_expr::{Documentation, ScalarUDFImpl, Signature, Volatility}; -use datafusion_expr_common::signature::TypeSignatureClass; use datafusion_macros::user_doc; use regex::Regex; use std::any::Any; @@ -99,7 +98,6 @@ impl RegexpReplaceFunc { pub fn new() -> Self { use DataType::*; use TypeSignature::*; - use TypeSignatureClass::*; Self { signature: Signature::one_of( vec![ @@ -631,16 +629,6 @@ pub fn specialize_regexp_replace( args[1].data_type(), args[2].data_type(), ) { - (DataType::Utf8View, DataType::Utf8, DataType::Utf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } (DataType::Utf8, DataType::Utf8, DataType::Utf8) => { let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); @@ -651,46 +639,6 @@ pub fn specialize_regexp_replace( replacement_array, ) } - (DataType::LargeUtf8, DataType::Utf8, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8View, DataType::Utf8View, DataType::Utf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8View, DataType::LargeUtf8, DataType::Utf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8View, DataType::Utf8, DataType::Utf8View) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } (DataType::Utf8View, DataType::Utf8View, DataType::Utf8View) => { let string_array = args[0].as_string_view(); let pattern_array = args[1].as_string_view(); @@ -701,196 +649,6 @@ pub fn specialize_regexp_replace( replacement_array, ) } - (DataType::Utf8View, DataType::LargeUtf8, DataType::Utf8View) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8View, DataType::Utf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8View, DataType::Utf8View, DataType::LargeUtf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8View, DataType::LargeUtf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8, DataType::Utf8View, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8, DataType::LargeUtf8, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8, DataType::Utf8, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8, DataType::Utf8View, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8, DataType::Utf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8, DataType::Utf8View, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::Utf8, DataType::LargeUtf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8View, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::LargeUtf8, DataType::LargeUtf8, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8View, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::LargeUtf8, DataType::LargeUtf8, DataType::Utf8View) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } - (DataType::LargeUtf8, DataType::Utf8View, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( - string_array, - pattern_array, - replacement_array, - ) - } (DataType::LargeUtf8, DataType::LargeUtf8, DataType::LargeUtf8) => { let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); @@ -914,23 +672,6 @@ pub fn specialize_regexp_replace( args[2].data_type(), args[3].data_type(), ) { - ( - DataType::Utf8View, - DataType::Utf8, - DataType::Utf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } (DataType::Utf8, DataType::Utf8, DataType::Utf8, DataType::Utf8) => { let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); @@ -943,33 +684,16 @@ pub fn specialize_regexp_replace( flags_array, ) } - ( - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } ( DataType::Utf8View, DataType::Utf8View, - DataType::Utf8, - DataType::Utf8, + DataType::Utf8View, + DataType::Utf8View, ) => { let string_array = args[0].as_string_view(); let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); + let replacement_array = args[2].as_string_view(); + let flags_array = args[3].as_string_view(); regexp_replace_with_flags::( string_array, pattern_array, @@ -977,1300 +701,6 @@ pub fn specialize_regexp_replace( flags_array, ) } - ( - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - // Cases with Utf8View as 4th argument (flags) - ( - DataType::Utf8View, - DataType::Utf8, - DataType::Utf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8, - DataType::Utf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - // Cases with LargeUtf8 as 4th argument (flags) - ( - DataType::Utf8View, - DataType::Utf8, - DataType::Utf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8, - DataType::Utf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8, - DataType::Utf8View, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8View, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::LargeUtf8, - DataType::Utf8View, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::Utf8View, - DataType::LargeUtf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8View, - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8, - DataType::Utf8View, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8View, - DataType::Utf8View, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::LargeUtf8, - DataType::Utf8View, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::Utf8View, - DataType::LargeUtf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::Utf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8, - DataType::Utf8View, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8View, - DataType::Utf8View, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::Utf8View, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } - ( - DataType::LargeUtf8, - DataType::Utf8View, - DataType::LargeUtf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( - string_array, - pattern_array, - replacement_array, - flags_array, - ) - } ( DataType::LargeUtf8, DataType::LargeUtf8, From fbb094a3499145e2ba70bc4030ce57a5048974f3 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Mon, 8 Sep 2025 07:01:21 -0400 Subject: [PATCH 17/21] Update string_view slt test. --- datafusion/sqllogictest/test_files/string/string_view.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index a72c8f574484..7d10a0615d45 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -804,7 +804,7 @@ EXPLAIN SELECT FROM test; ---- logical_plan -01)Projection: regexp_replace(test.column1_utf8view, Utf8("^https?://(?:www\.)?([^/]+)/.*$"), Utf8("\1")) AS k +01)Projection: regexp_replace(test.column1_utf8view, Utf8View("^https?://(?:www\.)?([^/]+)/.*$"), Utf8View("\1")) AS k 02)--TableScan: test projection=[column1_utf8view] ## Ensure no casts for REPEAT From 32333a5e42d654336c780c2e37b4f08fff34d38f Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Mon, 8 Sep 2025 09:01:55 -0500 Subject: [PATCH 18/21] Reduce diff by returning to single function with a match arm instead of two. --- datafusion/functions/benches/regx.rs | 10 +- .../functions/src/regex/regexpreplace.rs | 254 +++++++++--------- 2 files changed, 130 insertions(+), 134 deletions(-) diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs index fd5ed6ef2c4c..e8542a78fef0 100644 --- a/datafusion/functions/benches/regx.rs +++ b/datafusion/functions/benches/regx.rs @@ -26,7 +26,7 @@ use datafusion_functions::regex::regexpcount::regexp_count_func; use datafusion_functions::regex::regexpinstr::regexp_instr_func; use datafusion_functions::regex::regexplike::regexp_like; use datafusion_functions::regex::regexpmatch::regexp_match; -use datafusion_functions::regex::regexpreplace::regexp_replace_with_flags; +use datafusion_functions::regex::regexpreplace::regexp_replace; use rand::distr::Alphanumeric; use rand::prelude::IndexedRandom; use rand::rngs::ThreadRng; @@ -267,11 +267,11 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - regexp_replace_with_flags::( + regexp_replace::( data.as_string::(), regex.as_string::(), replacement.as_string::(), - flags.as_string::(), + Some(flags.as_string::()), ) .expect("regexp_replace should work on valid values"), ) @@ -289,11 +289,11 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - regexp_replace_with_flags::( + regexp_replace::( data.as_string_view(), regex.as_string_view(), &*replacement, - flags.as_string_view(), + Some(flags.as_string_view()), ) .expect("regexp_replace should work on valid values"), ) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 63bcfeedb961..4fcbccd86a9f 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -243,11 +243,11 @@ fn regex_replace_posix_groups(replacement: &str) -> String { /// # Ok(()) /// # } /// ``` -pub fn regexp_replace_with_flags<'a, T: OffsetSizeTrait, U, V, W, X>( +pub fn regexp_replace<'a, T: OffsetSizeTrait, U, V, W, X>( string_array: U, pattern_array: V, replacement_array: W, - flags_array: X, + flags_array: Option, ) -> Result where U: ArrayAccessor, @@ -266,132 +266,125 @@ where let string_array_iter = ArrayIter::new(string_array); let pattern_array_iter = ArrayIter::new(pattern_array); let replacement_array_iter = ArrayIter::new(replacement_array); - let flags_array_iter = ArrayIter::new(flags_array); - - let result_iter = string_array_iter - .zip(pattern_array_iter) - .zip(replacement_array_iter) - .zip(flags_array_iter) - .map(|(((string, pattern), replacement), flags)| { - match (string, pattern, replacement, flags) { - (Some(string), Some(pattern), Some(replacement), Some(flags)) => { - let replacement = regex_replace_posix_groups(replacement); - - // format flags into rust pattern - let (pattern, replace_all) = if flags == "g" { - (pattern.to_string(), true) - } else if flags.contains('g') { - ( - format!( - "(?{}){}", - flags.to_string().replace('g', ""), - pattern - ), - true, - ) - } else { - (format!("(?{flags}){pattern}"), false) - }; - - // if patterns hashmap already has regexp then use else create and return - let re = match patterns.get(&pattern) { - Some(re) => Ok(re), - None => match Regex::new(pattern.as_str()) { - Ok(re) => { - patterns.insert(pattern.clone(), re); - Ok(patterns.get(&pattern).unwrap()) - } - Err(err) => Err(DataFusionError::External(Box::new(err))), - }, - }; - Some(re.map(|re| { - if replace_all { - re.replace_all(string, replacement.as_str()) - } else { - re.replace(string, replacement.as_str()) + match flags_array { + None => { + let result_iter = string_array_iter + .zip(pattern_array_iter) + .zip(replacement_array_iter) + .map(|((string, pattern), replacement)| { + match (string, pattern, replacement) { + (Some(string), Some(pattern), Some(replacement)) => { + let replacement = regex_replace_posix_groups(replacement); + // if patterns hashmap already has regexp then use else create and return + let re = match patterns.get(pattern) { + Some(re) => Ok(re), + None => match Regex::new(pattern) { + Ok(re) => { + patterns.insert(pattern.to_string(), re); + Ok(patterns.get(pattern).unwrap()) + } + Err(err) => { + Err(DataFusionError::External(Box::new(err))) + } + }, + }; + + Some(re.map(|re| re.replace(string, replacement.as_str()))) + .transpose() } - })) - .transpose() + _ => Ok(None), + } + }); + + match datatype { + DataType::Utf8 | DataType::LargeUtf8 => { + let result = + result_iter.collect::>>()?; + Ok(Arc::new(result) as ArrayRef) + } + DataType::Utf8View => { + let result = result_iter.collect::>()?; + Ok(Arc::new(result) as ArrayRef) + } + other => { + exec_err!( + "Unsupported data type {other:?} for function regex_replace" + ) } - _ => Ok(None), } - }); - - match datatype { - DataType::Utf8 | DataType::LargeUtf8 => { - let result = result_iter.collect::>>()?; - Ok(Arc::new(result) as ArrayRef) - } - DataType::Utf8View => { - let result = result_iter.collect::>()?; - Ok(Arc::new(result) as ArrayRef) - } - other => { - exec_err!("Unsupported data type {other:?} for function regex_replace") } - } -} - -pub fn regexp_replace_without_flags<'a, T: OffsetSizeTrait, U, V, W>( - string_array: U, - pattern_array: V, - replacement_array: W, -) -> Result -where - U: ArrayAccessor, - V: ArrayAccessor, - W: ArrayAccessor, -{ - // Default implementation for regexp_replace, assumes all args are arrays - // and args is a sequence of 3 or 4 elements. - - // creating Regex is expensive so create hashmap for memoization - let mut patterns: HashMap = HashMap::new(); - - let datatype = string_array.data_type().to_owned(); - - let string_array_iter = ArrayIter::new(string_array); - let pattern_array_iter = ArrayIter::new(pattern_array); - let replacement_array_iter = ArrayIter::new(replacement_array); - - let result_iter = string_array_iter - .zip(pattern_array_iter) - .zip(replacement_array_iter) - .map(|((string, pattern), replacement)| { - match (string, pattern, replacement) { - (Some(string), Some(pattern), Some(replacement)) => { - let replacement = regex_replace_posix_groups(replacement); - // if patterns hashmap already has regexp then use else create and return - let re = match patterns.get(pattern) { - Some(re) => Ok(re), - None => match Regex::new(pattern) { - Ok(re) => { - patterns.insert(pattern.to_string(), re); - Ok(patterns.get(pattern).unwrap()) - } - Err(err) => Err(DataFusionError::External(Box::new(err))), - }, - }; + Some(flags_array) => { + let flags_array_iter = ArrayIter::new(flags_array); + + let result_iter = string_array_iter + .zip(pattern_array_iter) + .zip(replacement_array_iter) + .zip(flags_array_iter) + .map(|(((string, pattern), replacement), flags)| { + match (string, pattern, replacement, flags) { + (Some(string), Some(pattern), Some(replacement), Some(flags)) => { + let replacement = regex_replace_posix_groups(replacement); + + // format flags into rust pattern + let (pattern, replace_all) = if flags == "g" { + (pattern.to_string(), true) + } else if flags.contains('g') { + ( + format!( + "(?{}){}", + flags.to_string().replace('g', ""), + pattern + ), + true, + ) + } else { + (format!("(?{flags}){pattern}"), false) + }; + + // if patterns hashmap already has regexp then use else create and return + let re = match patterns.get(&pattern) { + Some(re) => Ok(re), + None => match Regex::new(pattern.as_str()) { + Ok(re) => { + patterns.insert(pattern.clone(), re); + Ok(patterns.get(&pattern).unwrap()) + } + Err(err) => { + Err(DataFusionError::External(Box::new(err))) + } + }, + }; + + Some(re.map(|re| { + if replace_all { + re.replace_all(string, replacement.as_str()) + } else { + re.replace(string, replacement.as_str()) + } + })) + .transpose() + } + _ => Ok(None), + } + }); - Some(re.map(|re| re.replace(string, replacement.as_str()))) - .transpose() + match datatype { + DataType::Utf8 | DataType::LargeUtf8 => { + let result = + result_iter.collect::>>()?; + Ok(Arc::new(result) as ArrayRef) + } + DataType::Utf8View => { + let result = result_iter.collect::>()?; + Ok(Arc::new(result) as ArrayRef) + } + other => { + exec_err!( + "Unsupported data type {other:?} for function regex_replace" + ) } - _ => Ok(None), } - }); - - match datatype { - DataType::Utf8 | DataType::LargeUtf8 => { - let result = result_iter.collect::>>()?; - Ok(Arc::new(result) as ArrayRef) - } - DataType::Utf8View => { - let result = result_iter.collect::>()?; - Ok(Arc::new(result) as ArrayRef) - } - other => { - exec_err!("Unsupported data type {other:?} for function regex_replace") } } } @@ -633,30 +626,33 @@ pub fn specialize_regexp_replace( let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( + regexp_replace::( string_array, pattern_array, replacement_array, + None::<&GenericStringArray>, ) } (DataType::Utf8View, DataType::Utf8View, DataType::Utf8View) => { let string_array = args[0].as_string_view(); let pattern_array = args[1].as_string_view(); let replacement_array = args[2].as_string_view(); - regexp_replace_without_flags::( + regexp_replace::( string_array, pattern_array, replacement_array, + None::<&StringViewArray>, ) } (DataType::LargeUtf8, DataType::LargeUtf8, DataType::LargeUtf8) => { let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); - regexp_replace_without_flags::( + regexp_replace::( string_array, pattern_array, replacement_array, + None::<&GenericStringArray>, ) } other => { @@ -677,11 +673,11 @@ pub fn specialize_regexp_replace( let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( + regexp_replace::( string_array, pattern_array, replacement_array, - flags_array, + Some(flags_array), ) } ( @@ -694,11 +690,11 @@ pub fn specialize_regexp_replace( let pattern_array = args[1].as_string_view(); let replacement_array = args[2].as_string_view(); let flags_array = args[3].as_string_view(); - regexp_replace_with_flags::( + regexp_replace::( string_array, pattern_array, replacement_array, - flags_array, + Some(flags_array), ) } ( @@ -711,11 +707,11 @@ pub fn specialize_regexp_replace( let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); let flags_array = args[3].as_string::(); - regexp_replace_with_flags::( + regexp_replace::( string_array, pattern_array, replacement_array, - flags_array, + Some(flags_array), ) } other => { From a59284777c999609d860275c9f0248f94b734104 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 9 Sep 2025 06:21:52 -0700 Subject: [PATCH 19/21] Simplify template args. --- .../functions/src/regex/regexpreplace.rs | 88 ++++--------------- 1 file changed, 16 insertions(+), 72 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index 4fcbccd86a9f..e8ef8d95135c 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -243,17 +243,14 @@ fn regex_replace_posix_groups(replacement: &str) -> String { /// # Ok(()) /// # } /// ``` -pub fn regexp_replace<'a, T: OffsetSizeTrait, U, V, W, X>( +pub fn regexp_replace<'a, T: OffsetSizeTrait, U>( string_array: U, - pattern_array: V, - replacement_array: W, - flags_array: Option, + pattern_array: U, + replacement_array: U, + flags_array: Option, ) -> Result where U: ArrayAccessor, - V: ArrayAccessor, - W: ArrayAccessor, - X: ArrayAccessor, { // Default implementation for regexp_replace, assumes all args are arrays // and args is a sequence of 3 or 4 elements. @@ -626,7 +623,7 @@ pub fn specialize_regexp_replace( let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); - regexp_replace::( + regexp_replace::( string_array, pattern_array, replacement_array, @@ -637,7 +634,7 @@ pub fn specialize_regexp_replace( let string_array = args[0].as_string_view(); let pattern_array = args[1].as_string_view(); let replacement_array = args[2].as_string_view(); - regexp_replace::( + regexp_replace::( string_array, pattern_array, replacement_array, @@ -648,7 +645,7 @@ pub fn specialize_regexp_replace( let string_array = args[0].as_string::(); let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); - regexp_replace::( + regexp_replace::( string_array, pattern_array, replacement_array, @@ -673,7 +670,7 @@ pub fn specialize_regexp_replace( let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); let flags_array = args[3].as_string::(); - regexp_replace::( + regexp_replace::( string_array, pattern_array, replacement_array, @@ -690,7 +687,7 @@ pub fn specialize_regexp_replace( let pattern_array = args[1].as_string_view(); let replacement_array = args[2].as_string_view(); let flags_array = args[3].as_string_view(); - regexp_replace::( + regexp_replace::( string_array, pattern_array, replacement_array, @@ -707,7 +704,7 @@ pub fn specialize_regexp_replace( let pattern_array = args[1].as_string::(); let replacement_array = args[2].as_string::(); let flags_array = args[3].as_string::(); - regexp_replace::( + regexp_replace::( string_array, pattern_array, replacement_array, @@ -731,7 +728,7 @@ mod tests { use super::*; macro_rules! static_pattern_regexp_replace { - ($name:ident, $T:ty, $U:ty, $O:ty) => { + ($name:ident, $T:ty, $O:ty) => { #[test] fn $name() { let values = vec!["abc", "acd", "abcd1234567890123", "123456789012abc"]; @@ -741,8 +738,8 @@ mod tests { vec!["afooc", "acd", "afoocd1234567890123", "123456789012afooc"]; let values = <$T>::from(values); - let patterns = <$U>::from(patterns); - let replacements = <$U>::from(replacement); + let patterns = <$T>::from(patterns); + let replacements = <$T>::from(replacement); let expected = <$T>::from(expected); let re = _regexp_replace_static_pattern_replace::<$O>(&[ @@ -757,62 +754,9 @@ mod tests { }; } - static_pattern_regexp_replace!( - string_array_string_args, - StringArray, - StringArray, - i32 - ); - static_pattern_regexp_replace!( - string_view_array_string_args, - StringViewArray, - StringArray, - i32 - ); - static_pattern_regexp_replace!( - large_string_array_string_args, - LargeStringArray, - StringArray, - i64 - ); - - static_pattern_regexp_replace!( - string_array_string_view_args, - StringArray, - StringViewArray, - i32 - ); - static_pattern_regexp_replace!( - string_view_array_string_view_args, - StringViewArray, - StringViewArray, - i32 - ); - static_pattern_regexp_replace!( - large_string_array_string_view_args, - LargeStringArray, - StringViewArray, - i64 - ); - - static_pattern_regexp_replace!( - string_array_large_string_args, - StringArray, - LargeStringArray, - i32 - ); - static_pattern_regexp_replace!( - string_view_array_large_string_args, - StringViewArray, - LargeStringArray, - i32 - ); - static_pattern_regexp_replace!( - large_string_array_large_string_args, - LargeStringArray, - LargeStringArray, - i64 - ); + static_pattern_regexp_replace!(string_array, StringArray, i32); + static_pattern_regexp_replace!(string_view_array, StringViewArray, i32); + static_pattern_regexp_replace!(large_string_array, LargeStringArray, i64); macro_rules! static_pattern_regexp_replace_with_flags { ($name:ident, $T:ty, $O: ty) => { From 77540755b6050298dc260068a61e32c5fdf59d53 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 9 Sep 2025 06:23:05 -0700 Subject: [PATCH 20/21] Fix benchmark compilation. --- datafusion/functions/benches/regx.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs index e8542a78fef0..c18241f799e3 100644 --- a/datafusion/functions/benches/regx.rs +++ b/datafusion/functions/benches/regx.rs @@ -267,7 +267,7 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - regexp_replace::( + regexp_replace::( data.as_string::(), regex.as_string::(), replacement.as_string::(), @@ -289,7 +289,7 @@ fn criterion_benchmark(c: &mut Criterion) { b.iter(|| { black_box( - regexp_replace::( + regexp_replace::( data.as_string_view(), regex.as_string_view(), &*replacement, From e368cb2d406fe5e8ceb27aaa078ee0052f43902b Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 24 Sep 2025 09:32:26 -0400 Subject: [PATCH 21/21] Address PR feedback. --- .../functions/src/regex/regexpreplace.rs | 170 +++++++----------- 1 file changed, 62 insertions(+), 108 deletions(-) diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index e8ef8d95135c..ca3d19822e13 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -101,12 +101,8 @@ impl RegexpReplaceFunc { Self { signature: Signature::one_of( vec![ - Exact(vec![Utf8View, Utf8View, Utf8View]), - Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8]), - Exact(vec![Utf8, Utf8, Utf8]), - Exact(vec![Utf8View, Utf8View, Utf8View, Utf8View]), - Exact(vec![LargeUtf8, LargeUtf8, LargeUtf8, LargeUtf8]), - Exact(vec![Utf8, Utf8, Utf8, Utf8]), + Uniform(3, vec![Utf8View, LargeUtf8, Utf8]), + Uniform(4, vec![Utf8View, LargeUtf8, Utf8]), ], Volatility::Immutable, ), @@ -613,109 +609,67 @@ pub fn specialize_regexp_replace( .map(|arg| arg.to_array(inferred_length)) .collect::>>()?; - if args.get(3).is_none() { - match ( - args[0].data_type(), - args[1].data_type(), - args[2].data_type(), - ) { - (DataType::Utf8, DataType::Utf8, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace::( - string_array, - pattern_array, - replacement_array, - None::<&GenericStringArray>, - ) - } - (DataType::Utf8View, DataType::Utf8View, DataType::Utf8View) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - regexp_replace::( - string_array, - pattern_array, - replacement_array, - None::<&StringViewArray>, - ) - } - (DataType::LargeUtf8, DataType::LargeUtf8, DataType::LargeUtf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - regexp_replace::( - string_array, - pattern_array, - replacement_array, - None::<&GenericStringArray>, - ) - } - other => { - exec_err!( - "Unsupported data type {other:?} for function regex_replace" - ) - } + match ( + args[0].data_type(), + args[1].data_type(), + args[2].data_type(), + args.get(3).map(|a| a.data_type()), + ) { + ( + DataType::Utf8, + DataType::Utf8, + DataType::Utf8, + Some(DataType::Utf8) | None, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args.get(3).map(|a| a.as_string::()); + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) } - } else { - match ( - args[0].data_type(), - args[1].data_type(), - args[2].data_type(), - args[3].data_type(), - ) { - (DataType::Utf8, DataType::Utf8, DataType::Utf8, DataType::Utf8) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace::( - string_array, - pattern_array, - replacement_array, - Some(flags_array), - ) - } - ( - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8View, - DataType::Utf8View, - ) => { - let string_array = args[0].as_string_view(); - let pattern_array = args[1].as_string_view(); - let replacement_array = args[2].as_string_view(); - let flags_array = args[3].as_string_view(); - regexp_replace::( - string_array, - pattern_array, - replacement_array, - Some(flags_array), - ) - } - ( - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - DataType::LargeUtf8, - ) => { - let string_array = args[0].as_string::(); - let pattern_array = args[1].as_string::(); - let replacement_array = args[2].as_string::(); - let flags_array = args[3].as_string::(); - regexp_replace::( - string_array, - pattern_array, - replacement_array, - Some(flags_array), - ) - } - other => { - exec_err!( - "Unsupported data type {other:?} for function regex_replace" - ) - } + ( + DataType::Utf8View, + DataType::Utf8View, + DataType::Utf8View, + Some(DataType::Utf8View) | None, + ) => { + let string_array = args[0].as_string_view(); + let pattern_array = args[1].as_string_view(); + let replacement_array = args[2].as_string_view(); + let flags_array = args.get(3).map(|a| a.as_string_view()); + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + ( + DataType::LargeUtf8, + DataType::LargeUtf8, + DataType::LargeUtf8, + Some(DataType::LargeUtf8) | None, + ) => { + let string_array = args[0].as_string::(); + let pattern_array = args[1].as_string::(); + let replacement_array = args[2].as_string::(); + let flags_array = args.get(3).map(|a| a.as_string::()); + regexp_replace::( + string_array, + pattern_array, + replacement_array, + flags_array, + ) + } + other => { + exec_err!( + "Unsupported data type {other:?} for function regex_replace" + ) } } }