1717
1818use std:: sync:: Arc ;
1919
20+ use arrow:: {
21+ array:: { AsArray , Float64Array } ,
22+ datatypes:: Float64Type ,
23+ } ;
2024use arrow_schema:: DataType ;
2125use datafusion:: datasource:: file_format:: options:: CsvReadOptions ;
2226
@@ -113,26 +117,19 @@ fn make_partition_evaluator() -> Result<Box<dyn PartitionEvaluator>> {
113117 Ok ( Box :: new ( MyPartitionEvaluator :: new ( ) ) )
114118}
115119
116-
117-
118120/// This implements the lowest level evaluation for a window function
119121///
120122/// It handles calculating the value of the window function for each
121123/// distinct values of `PARTITION BY` (each car type in our example)
122124#[ derive( Clone , Debug ) ]
123- struct MyPartitionEvaluator {
124- }
125+ struct MyPartitionEvaluator { }
125126
126127impl MyPartitionEvaluator {
127- fn new ( ) -> Self
128- {
128+ fn new ( ) -> Self {
129129 Self { }
130130 }
131131}
132132
133-
134-
135-
136133/// These different evaluation methods are called depending on the various settings of WindowUDF
137134impl PartitionEvaluator for MyPartitionEvaluator {
138135 fn get_range ( & self , _idx : usize , _n_rows : usize ) -> Result < std:: ops:: Range < usize > > {
@@ -142,14 +139,48 @@ impl PartitionEvaluator for MyPartitionEvaluator {
142139 }
143140
144141 /// This function is given the values of each partition
145- fn evaluate ( & self , values : & [ arrow:: array:: ArrayRef ] , num_rows : usize ) -> Result < arrow:: array:: ArrayRef > {
146- println ! ( "processing num_rows={num_rows}, values:\n {values:#?}" ) ;
147- Err ( DataFusionError :: NotImplemented (
148- "evaluate is not implemented by default" . into ( ) ,
149- ) )
142+ fn evaluate (
143+ & self ,
144+ values : & [ arrow:: array:: ArrayRef ] ,
145+ _num_rows : usize ,
146+ ) -> Result < arrow:: array:: ArrayRef > {
147+ // datafusion has handled ensuring we get the correct input argument
148+ assert_eq ! ( values. len( ) , 1 ) ;
149+
150+ // For this example, we convert convert the input argument to an
151+ // array of floating point numbers to calculate a moving average
152+ let arr: & Float64Array = values[ 0 ] . as_ref ( ) . as_primitive :: < Float64Type > ( ) ;
153+
154+ // implement a simple moving average by averaging the current
155+ // value with the previous value
156+ //
157+ // value | avg
158+ // ------+------
159+ // 10 | 10
160+ // 20 | 15
161+ // 30 | 25
162+ // 30 | 30
163+ //
164+ let mut previous_value = None ;
165+ let new_values: Float64Array = arr
166+ . values ( )
167+ . iter ( )
168+ . map ( |& value| {
169+ let new_value = previous_value
170+ . map ( |previous_value| ( value + previous_value) / 2.0 )
171+ . unwrap_or ( value) ;
172+ previous_value = Some ( value) ;
173+ new_value
174+ } )
175+ . collect ( ) ;
176+
177+ Ok ( Arc :: new ( new_values) )
150178 }
151179
152- fn evaluate_stateful ( & mut self , _values : & [ arrow:: array:: ArrayRef ] ) -> Result < datafusion_common:: ScalarValue > {
180+ fn evaluate_stateful (
181+ & mut self ,
182+ _values : & [ arrow:: array:: ArrayRef ] ,
183+ ) -> Result < datafusion_common:: ScalarValue > {
153184 Err ( DataFusionError :: NotImplemented (
154185 "evaluate_stateful is not implemented by default" . into ( ) ,
155186 ) )
@@ -176,5 +207,4 @@ impl PartitionEvaluator for MyPartitionEvaluator {
176207 }
177208}
178209
179-
180210// TODO show how to use other evaluate methods
0 commit comments