@@ -166,7 +166,8 @@ std::shared_ptr<array_info> do_arrow_compute_binary(
166166}
167167
168168std::shared_ptr<array_info> do_arrow_compute_unary (
169- std::shared_ptr<ExprResult> left_res, const std::string& comparator) {
169+ std::shared_ptr<ExprResult> left_res, const std::string& comparator,
170+ const arrow::compute::FunctionOptions* func_options) {
170171 // Try to convert the results of our children into array
171172 // or scalar results to see which one they are.
172173 std::shared_ptr<ArrayExprResult> left_as_array =
@@ -187,7 +188,7 @@ std::shared_ptr<array_info> do_arrow_compute_unary(
187188 }
188189
189190 arrow::Result<arrow::Datum> cmp_res =
190- arrow::compute::CallFunction (comparator, {src1});
191+ arrow::compute::CallFunction (comparator, {src1}, func_options );
191192 if (!cmp_res.ok ()) [[unlikely]] {
192193 throw std::runtime_error (
193194 " do_array_compute_unary: Error in Arrow compute: " +
@@ -665,6 +666,31 @@ std::shared_ptr<ExprResult> PhysicalArrowExpression::ProcessBatch(
665666 // which returns a struct. To match the output dtype of Pandas, we Cast
666667 // to Date32 instead.
667668 result = do_arrow_compute_cast (res, duckdb::LogicalType::DATE);
669+ } else if (scalar_func_data.arrow_func_name == " match_substring_regex" ) {
670+ if (!PyTuple_Check (scalar_func_data.args ) ||
671+ PyTuple_Size (scalar_func_data.args ) != 1 ) {
672+ throw std::runtime_error (
673+ " match_substring_regex args not a 1-element tuple." );
674+ }
675+
676+ // Get the first element (borrowed reference)
677+ PyObject* py_str = PyTuple_GetItem (scalar_func_data.args , 0 );
678+
679+ if (!PyUnicode_Check (py_str)) {
680+ throw std::runtime_error (
681+ " match_substring_regex args element is not a Python string." );
682+ }
683+
684+ // Convert to UTF‑8 C string
685+ const char * c_str = PyUnicode_AsUTF8 (py_str);
686+ if (!c_str) {
687+ throw std::runtime_error (
688+ " match_substring_regex error extracting Python string." );
689+ }
690+
691+ arrow::compute::MatchSubstringOptions opts (c_str);
692+ result = do_arrow_compute_unary (res, scalar_func_data.arrow_func_name ,
693+ &opts);
668694 } else {
669695 result = do_arrow_compute_unary (res, scalar_func_data.arrow_func_name );
670696 }
0 commit comments