@@ -4,6 +4,7 @@ use std::path::PathBuf;
44use std:: sync:: Arc ;
55
66use polars_core:: error:: PolarsResult ;
7+ use polars_core:: frame:: DataFrame ;
78use polars_core:: prelude:: DataType ;
89use polars_core:: scalar:: Scalar ;
910use polars_io:: cloud:: CloudOptions ;
@@ -99,6 +100,13 @@ impl SinkTarget {
99100 ) ) ,
100101 }
101102 }
103+
104+ pub fn to_display_string ( & self ) -> String {
105+ match self {
106+ Self :: Path ( p) => p. display ( ) . to_string ( ) ,
107+ Self :: Dyn ( _) => "dynamic-target" . to_string ( ) ,
108+ }
109+ }
102110}
103111
104112impl fmt:: Debug for SinkTarget {
@@ -260,6 +268,47 @@ pub enum PartitionTargetCallback {
260268 Python ( polars_utils:: python_function:: PythonFunction ) ,
261269}
262270
271+ #[ cfg_attr( feature = "python" , pyo3:: pyclass) ]
272+ pub struct SinkWritten {
273+ pub file_idx : usize ,
274+ pub part_idx : usize ,
275+ pub in_part_idx : usize ,
276+ pub keys : Vec < PartitionTargetContextKey > ,
277+ pub file_path : PathBuf ,
278+ pub full_path : PathBuf ,
279+ pub num_rows : usize ,
280+ pub file_size : usize ,
281+ pub gathered : Option < DataFrame > ,
282+ }
283+
284+ #[ cfg_attr( feature = "python" , pyo3:: pyclass) ]
285+ pub struct SinkFinishContext {
286+ pub written : Vec < SinkWritten > ,
287+ }
288+
289+ #[ derive( Clone , Debug , PartialEq ) ]
290+ pub enum SinkFinishCallback {
291+ Rust ( SpecialEq < Arc < dyn Fn ( DataFrame ) -> PolarsResult < ( ) > + Send + Sync > > ) ,
292+ #[ cfg( feature = "python" ) ]
293+ Python ( polars_utils:: python_function:: PythonFunction ) ,
294+ }
295+
296+ impl SinkFinishCallback {
297+ pub fn call ( & self , df : DataFrame ) -> PolarsResult < ( ) > {
298+ match self {
299+ Self :: Rust ( f) => f ( df) ,
300+ #[ cfg( feature = "python" ) ]
301+ Self :: Python ( f) => pyo3:: Python :: with_gil ( |py| {
302+ let converter =
303+ polars_utils:: python_convert_registry:: get_python_convert_registry ( ) ;
304+ let df = ( converter. to_py . df ) ( Box :: new ( df) as Box < dyn std:: any:: Any > ) ?;
305+ f. call1 ( py, ( df, ) ) ?;
306+ PolarsResult :: Ok ( ( ) )
307+ } ) ,
308+ }
309+ }
310+ }
311+
263312impl PartitionTargetCallback {
264313 pub fn call ( & self , ctx : PartitionTargetContext ) -> PolarsResult < SinkTarget > {
265314 match self {
@@ -277,6 +326,60 @@ impl PartitionTargetCallback {
277326 }
278327}
279328
329+ #[ cfg( feature = "serde" ) ]
330+ impl serde:: Serialize for SinkFinishCallback {
331+ fn serialize < S > ( & self , _serializer : S ) -> Result < S :: Ok , S :: Error >
332+ where
333+ S : serde:: Serializer ,
334+ {
335+ use serde:: ser:: Error ;
336+
337+ #[ cfg( feature = "python" ) ]
338+ if let Self :: Python ( v) = self {
339+ return v. serialize ( _serializer) ;
340+ }
341+
342+ Err ( S :: Error :: custom ( format ! ( "cannot serialize {self:?}" ) ) )
343+ }
344+ }
345+
346+ #[ cfg( feature = "serde" ) ]
347+ impl < ' de > serde:: Deserialize < ' de > for SinkFinishCallback {
348+ fn deserialize < D > ( _deserializer : D ) -> Result < Self , D :: Error >
349+ where
350+ D : serde:: Deserializer < ' de > ,
351+ {
352+ #[ cfg( feature = "python" ) ]
353+ {
354+ Ok ( Self :: Python (
355+ polars_utils:: python_function:: PythonFunction :: deserialize ( _deserializer) ?,
356+ ) )
357+ }
358+ #[ cfg( not( feature = "python" ) ) ]
359+ {
360+ use serde:: de:: Error ;
361+ Err ( D :: Error :: custom (
362+ "cannot deserialize PartitionOutputCallback" ,
363+ ) )
364+ }
365+ }
366+ }
367+
368+ #[ cfg( feature = "dsl-schema" ) ]
369+ impl schemars:: JsonSchema for SinkFinishCallback {
370+ fn schema_name ( ) -> String {
371+ "PartitionTargetCallback" . to_owned ( )
372+ }
373+
374+ fn schema_id ( ) -> std:: borrow:: Cow < ' static , str > {
375+ std:: borrow:: Cow :: Borrowed ( concat ! ( module_path!( ) , "::" , "SinkFinishCallback" ) )
376+ }
377+
378+ fn json_schema ( generator : & mut schemars:: r#gen:: SchemaGenerator ) -> schemars:: schema:: Schema {
379+ Vec :: < u8 > :: json_schema ( generator)
380+ }
381+ }
382+
280383#[ cfg( feature = "serde" ) ]
281384impl < ' de > serde:: Deserialize < ' de > for PartitionTargetCallback {
282385 fn deserialize < D > ( _deserializer : D ) -> Result < Self , D :: Error >
@@ -331,6 +434,23 @@ impl schemars::JsonSchema for PartitionTargetCallback {
331434 }
332435}
333436
437+ #[ cfg_attr( feature = "serde" , derive( serde:: Serialize , serde:: Deserialize ) ) ]
438+ #[ cfg_attr( feature = "dsl-schema" , derive( schemars:: JsonSchema ) ) ]
439+ #[ derive( Clone , Debug , PartialEq ) ]
440+ pub struct SortColumn {
441+ pub expr : Expr ,
442+ pub descending : bool ,
443+ pub nulls_last : bool ,
444+ }
445+
446+ #[ cfg_attr( feature = "serde" , derive( serde:: Serialize , serde:: Deserialize ) ) ]
447+ #[ derive( Clone , Debug , PartialEq ) ]
448+ pub struct SortColumnIR {
449+ pub expr : ExprIR ,
450+ pub descending : bool ,
451+ pub nulls_last : bool ,
452+ }
453+
334454#[ cfg_attr( feature = "serde" , derive( serde:: Serialize , serde:: Deserialize ) ) ]
335455#[ cfg_attr( feature = "dsl-schema" , derive( schemars:: JsonSchema ) ) ]
336456#[ derive( Clone , Debug , PartialEq ) ]
@@ -341,6 +461,8 @@ pub struct PartitionSinkType {
341461 pub sink_options : SinkOptions ,
342462 pub variant : PartitionVariant ,
343463 pub cloud_options : Option < polars_io:: cloud:: CloudOptions > ,
464+ pub per_partition_sort_by : Option < Vec < SortColumn > > ,
465+ pub finish_callback : Option < SinkFinishCallback > ,
344466}
345467
346468#[ cfg_attr( feature = "serde" , derive( serde:: Serialize , serde:: Deserialize ) ) ]
@@ -352,6 +474,8 @@ pub struct PartitionSinkTypeIR {
352474 pub sink_options : SinkOptions ,
353475 pub variant : PartitionVariantIR ,
354476 pub cloud_options : Option < polars_io:: cloud:: CloudOptions > ,
477+ pub per_partition_sort_by : Option < Vec < SortColumnIR > > ,
478+ pub finish_callback : Option < SinkFinishCallback > ,
355479}
356480
357481#[ cfg_attr( feature = "serde" , derive( serde:: Serialize , serde:: Deserialize ) ) ]
@@ -392,23 +516,44 @@ pub enum PartitionVariantIR {
392516 } ,
393517}
394518
519+ #[ cfg( feature = "cse" ) ]
395520impl SinkTypeIR {
396- #[ cfg( feature = "cse" ) ]
397521 pub ( crate ) fn traverse_and_hash < H : Hasher > ( & self , expr_arena : & Arena < AExpr > , state : & mut H ) {
398522 std:: mem:: discriminant ( self ) . hash ( state) ;
399523 match self {
400524 Self :: Memory => { } ,
401525 Self :: File ( f) => f. hash ( state) ,
402- Self :: Partition ( f) => {
403- f. file_type . hash ( state) ;
404- f. sink_options . hash ( state) ;
405- f. variant . traverse_and_hash ( expr_arena, state) ;
406- f. cloud_options . hash ( state) ;
407- } ,
526+ Self :: Partition ( f) => f. traverse_and_hash ( expr_arena, state) ,
527+ }
528+ }
529+ }
530+
531+ #[ cfg( feature = "cse" ) ]
532+ impl PartitionSinkTypeIR {
533+ pub ( crate ) fn traverse_and_hash < H : Hasher > ( & self , expr_arena : & Arena < AExpr > , state : & mut H ) {
534+ self . file_type . hash ( state) ;
535+ self . sink_options . hash ( state) ;
536+ self . variant . traverse_and_hash ( expr_arena, state) ;
537+ self . cloud_options . hash ( state) ;
538+ std:: mem:: discriminant ( & self . per_partition_sort_by ) . hash ( state) ;
539+ if let Some ( v) = & self . per_partition_sort_by {
540+ v. len ( ) . hash ( state) ;
541+ for v in v {
542+ v. traverse_and_hash ( expr_arena, state) ;
543+ }
408544 }
409545 }
410546}
411547
548+ #[ cfg( feature = "cse" ) ]
549+ impl SortColumnIR {
550+ pub ( crate ) fn traverse_and_hash < H : Hasher > ( & self , expr_arena : & Arena < AExpr > , state : & mut H ) {
551+ self . expr . traverse_and_hash ( expr_arena, state) ;
552+ self . descending . hash ( state) ;
553+ self . nulls_last . hash ( state) ;
554+ }
555+ }
556+
412557impl PartitionVariantIR {
413558 #[ cfg( feature = "cse" ) ]
414559 pub ( crate ) fn traverse_and_hash < H : Hasher > ( & self , expr_arena : & Arena < AExpr > , state : & mut H ) {
0 commit comments