From c239e094b62e4bcb329a1b2b170f6e1cfc941ff9 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sun, 5 Jan 2025 12:01:38 +0200 Subject: [PATCH 1/2] extract agg_funcs expressions to folders based on spark grouping --- native/spark-expr/src/{ => agg_funcs}/avg.rs | 0 .../src/{ => agg_funcs}/avg_decimal.rs | 0 .../src/{ => agg_funcs}/correlation.rs | 4 +-- .../src/{ => agg_funcs}/covariance.rs | 0 native/spark-expr/src/agg_funcs/mod.rs | 32 +++++++++++++++++++ .../spark-expr/src/{ => agg_funcs}/stddev.rs | 2 +- .../src/{ => agg_funcs}/sum_decimal.rs | 0 .../src/{ => agg_funcs}/variance.rs | 0 native/spark-expr/src/lib.rs | 15 +++------ 9 files changed, 40 insertions(+), 13 deletions(-) rename native/spark-expr/src/{ => agg_funcs}/avg.rs (100%) rename native/spark-expr/src/{ => agg_funcs}/avg_decimal.rs (100%) rename native/spark-expr/src/{ => agg_funcs}/correlation.rs (98%) rename native/spark-expr/src/{ => agg_funcs}/covariance.rs (100%) create mode 100644 native/spark-expr/src/agg_funcs/mod.rs rename native/spark-expr/src/{ => agg_funcs}/stddev.rs (99%) rename native/spark-expr/src/{ => agg_funcs}/sum_decimal.rs (100%) rename native/spark-expr/src/{ => agg_funcs}/variance.rs (100%) diff --git a/native/spark-expr/src/avg.rs b/native/spark-expr/src/agg_funcs/avg.rs similarity index 100% rename from native/spark-expr/src/avg.rs rename to native/spark-expr/src/agg_funcs/avg.rs diff --git a/native/spark-expr/src/avg_decimal.rs b/native/spark-expr/src/agg_funcs/avg_decimal.rs similarity index 100% rename from native/spark-expr/src/avg_decimal.rs rename to native/spark-expr/src/agg_funcs/avg_decimal.rs diff --git a/native/spark-expr/src/correlation.rs b/native/spark-expr/src/agg_funcs/correlation.rs similarity index 98% rename from native/spark-expr/src/correlation.rs rename to native/spark-expr/src/agg_funcs/correlation.rs index e4ddab95de..5d6f9e0b43 100644 --- a/native/spark-expr/src/correlation.rs +++ b/native/spark-expr/src/agg_funcs/correlation.rs @@ -19,8 +19,8 @@ use arrow::compute::{and, filter, is_not_null}; use std::{any::Any, sync::Arc}; -use crate::covariance::CovarianceAccumulator; -use crate::stddev::StddevAccumulator; +use crate::agg_funcs::covariance::CovarianceAccumulator; +use crate::agg_funcs::stddev::StddevAccumulator; use arrow::{ array::ArrayRef, datatypes::{DataType, Field}, diff --git a/native/spark-expr/src/covariance.rs b/native/spark-expr/src/agg_funcs/covariance.rs similarity index 100% rename from native/spark-expr/src/covariance.rs rename to native/spark-expr/src/agg_funcs/covariance.rs diff --git a/native/spark-expr/src/agg_funcs/mod.rs b/native/spark-expr/src/agg_funcs/mod.rs new file mode 100644 index 0000000000..252da78890 --- /dev/null +++ b/native/spark-expr/src/agg_funcs/mod.rs @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +mod avg; +mod avg_decimal; +mod correlation; +mod covariance; +mod stddev; +mod sum_decimal; +mod variance; + +pub use avg::Avg; +pub use avg_decimal::AvgDecimal; +pub use correlation::Correlation; +pub use covariance::Covariance; +pub use stddev::Stddev; +pub use sum_decimal::SumDecimal; +pub use variance::Variance; diff --git a/native/spark-expr/src/stddev.rs b/native/spark-expr/src/agg_funcs/stddev.rs similarity index 99% rename from native/spark-expr/src/stddev.rs rename to native/spark-expr/src/agg_funcs/stddev.rs index 1ec5ffb69a..39dffa1c8e 100644 --- a/native/spark-expr/src/stddev.rs +++ b/native/spark-expr/src/agg_funcs/stddev.rs @@ -17,7 +17,7 @@ use std::{any::Any, sync::Arc}; -use crate::variance::VarianceAccumulator; +use crate::agg_funcs::variance::VarianceAccumulator; use arrow::{ array::ArrayRef, datatypes::{DataType, Field}, diff --git a/native/spark-expr/src/sum_decimal.rs b/native/spark-expr/src/agg_funcs/sum_decimal.rs similarity index 100% rename from native/spark-expr/src/sum_decimal.rs rename to native/spark-expr/src/agg_funcs/sum_decimal.rs diff --git a/native/spark-expr/src/variance.rs b/native/spark-expr/src/agg_funcs/variance.rs similarity index 100% rename from native/spark-expr/src/variance.rs rename to native/spark-expr/src/agg_funcs/variance.rs diff --git a/native/spark-expr/src/lib.rs b/native/spark-expr/src/lib.rs index 9cf0de30b7..055d390398 100644 --- a/native/spark-expr/src/lib.rs +++ b/native/spark-expr/src/lib.rs @@ -23,18 +23,10 @@ mod cast; mod error; mod if_expr; -mod avg; -pub use avg::Avg; mod bitwise_not; pub use bitwise_not::{bitwise_not, BitwiseNotExpr}; -mod avg_decimal; -pub use avg_decimal::AvgDecimal; mod checkoverflow; pub use checkoverflow::CheckOverflow; -mod correlation; -pub use correlation::Correlation; -mod covariance; -pub use covariance::Covariance; mod strings; pub use strings::{Contains, EndsWith, Like, StartsWith, StringSpaceExpr, SubstringExpr}; mod kernels; @@ -53,6 +45,8 @@ mod struct_funcs; mod sum_decimal; pub use sum_decimal::SumDecimal; mod negative; +pub mod spark_hash; +mod structs; pub use negative::{create_negate_expr, NegativeExpr}; mod normalize_nan; mod temporal; @@ -65,9 +59,10 @@ pub use unbound::UnboundColumn; pub mod utils; pub use normalize_nan::NormalizeNaNAndZero; -mod variance; -pub use variance::Variance; +mod agg_funcs; mod comet_scalar_funcs; +pub use agg_funcs::*; + pub use cast::{spark_cast, Cast, SparkCastOptions}; pub use comet_scalar_funcs::create_comet_physical_fun; pub use error::{SparkError, SparkResult}; From 58473d981f893e7c071b2277840016e4b6e2cfd0 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Tue, 7 Jan 2025 11:34:55 +0000 Subject: [PATCH 2/2] fix rebase --- native/spark-expr/src/lib.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/native/spark-expr/src/lib.rs b/native/spark-expr/src/lib.rs index 055d390398..3c2a701970 100644 --- a/native/spark-expr/src/lib.rs +++ b/native/spark-expr/src/lib.rs @@ -38,15 +38,9 @@ mod static_invoke; pub use schema_adapter::SparkSchemaAdapterFactory; pub use static_invoke::*; -pub mod spark_hash; -mod stddev; -pub use stddev::Stddev; -mod struct_funcs; -mod sum_decimal; -pub use sum_decimal::SumDecimal; mod negative; pub mod spark_hash; -mod structs; +mod struct_funcs; pub use negative::{create_negate_expr, NegativeExpr}; mod normalize_nan; mod temporal;