@@ -29,9 +29,6 @@ use arrow::array::RecordBatch;
2929use arrow:: datatypes:: SchemaRef ;
3030use datafusion:: catalog:: Session ;
3131use datafusion:: execution:: context:: SessionState ;
32- use datafusion:: execution:: memory_pool:: FairSpillPool ;
33- use datafusion:: execution:: runtime_env:: RuntimeEnvBuilder ;
34- use datafusion:: execution:: SessionStateBuilder ;
3532use delta_kernel:: engine:: arrow_conversion:: TryIntoArrow as _;
3633use delta_kernel:: expressions:: Scalar ;
3734use delta_kernel:: table_properties:: DataSkippingNumIndexedCols ;
@@ -51,7 +48,7 @@ use uuid::Uuid;
5148
5249use super :: write:: writer:: { PartitionWriter , PartitionWriterConfig } ;
5350use super :: { CustomExecuteHandler , Operation } ;
54- use crate :: delta_datafusion:: DeltaTableProvider ;
51+ use crate :: delta_datafusion:: { DeltaRuntimeEnvBuilder , DeltaSessionContext , DeltaTableProvider } ;
5552use crate :: errors:: { DeltaResult , DeltaTableError } ;
5653use crate :: kernel:: transaction:: { CommitBuilder , CommitProperties , DEFAULT_RETRIES , PROTOCOL } ;
5754use crate :: kernel:: EagerSnapshot ;
@@ -215,8 +212,6 @@ pub struct OptimizeBuilder<'a> {
215212 preserve_insertion_order : bool ,
216213 /// Maximum number of concurrent tasks (default is number of cpus)
217214 max_concurrent_tasks : usize ,
218- /// Maximum number of bytes allowed in memory before spilling to disk
219- max_spill_size : usize ,
220215 /// Optimize type
221216 optimize_type : OptimizeType ,
222217 /// Datafusion session state relevant for executing the input plan
@@ -234,6 +229,33 @@ impl super::Operation<()> for OptimizeBuilder<'_> {
234229 }
235230}
236231
232+ /// Create a SessionState configured for optimize operations with custom spill settings.
233+ ///
234+ /// This is the recommended way to configure memory and disk limits for optimize operations.
235+ /// The created SessionState should be passed to [`OptimizeBuilder`] via [`with_session_state`](OptimizeBuilder::with_session_state).
236+ ///
237+ /// # Arguments
238+ /// * `max_spill_size` - Maximum bytes in memory before spilling to disk. If `None`, uses DataFusion's default memory pool.
239+ /// * `max_temp_directory_size` - Maximum disk space for temporary spill files. If `None`, uses DataFusion's default disk manager.
240+ pub fn create_session_state_for_optimize (
241+ max_spill_size : Option < usize > ,
242+ max_temp_directory_size : Option < u64 > ,
243+ ) -> SessionState {
244+ if max_spill_size. is_none ( ) && max_temp_directory_size. is_none ( ) {
245+ return DeltaSessionContext :: new ( ) . state ( ) ;
246+ }
247+
248+ let mut builder = DeltaRuntimeEnvBuilder :: new ( ) ;
249+ if let Some ( spill_size) = max_spill_size {
250+ builder = builder. with_max_spill_size ( spill_size) ;
251+ }
252+ if let Some ( directory_size) = max_temp_directory_size {
253+ builder = builder. with_max_temp_directory_size ( directory_size) ;
254+ }
255+
256+ DeltaSessionContext :: with_runtime_env ( builder. build ( ) ) . state ( )
257+ }
258+
237259impl < ' a > OptimizeBuilder < ' a > {
238260 /// Create a new [`OptimizeBuilder`]
239261 pub fn new ( log_store : LogStoreRef , snapshot : EagerSnapshot ) -> Self {
@@ -246,7 +268,6 @@ impl<'a> OptimizeBuilder<'a> {
246268 commit_properties : CommitProperties :: default ( ) ,
247269 preserve_insertion_order : false ,
248270 max_concurrent_tasks : num_cpus:: get ( ) ,
249- max_spill_size : 20 * 1024 * 1024 * 1024 , // 20 GB.
250271 optimize_type : OptimizeType :: Compact ,
251272 min_commit_interval : None ,
252273 session : None ,
@@ -296,16 +317,6 @@ impl<'a> OptimizeBuilder<'a> {
296317 self
297318 }
298319
299- /// Max spill size
300- #[ deprecated(
301- since = "0.29.0" ,
302- note = "Pass in a `SessionState` configured with a `RuntimeEnv` and a `FairSpillPool`"
303- ) ]
304- pub fn with_max_spill_size ( mut self , max_spill_size : usize ) -> Self {
305- self . max_spill_size = max_spill_size;
306- self
307- }
308-
309320 /// Min commit interval
310321 pub fn with_min_commit_interval ( mut self , min_commit_interval : Duration ) -> Self {
311322 self . min_commit_interval = Some ( min_commit_interval) ;
@@ -349,17 +360,7 @@ impl<'a> std::future::IntoFuture for OptimizeBuilder<'a> {
349360 let session = this
350361 . session
351362 . and_then ( |session| session. as_any ( ) . downcast_ref :: < SessionState > ( ) . cloned ( ) )
352- . unwrap_or_else ( || {
353- let memory_pool = FairSpillPool :: new ( this. max_spill_size ) ;
354- let runtime = RuntimeEnvBuilder :: new ( )
355- . with_memory_pool ( Arc :: new ( memory_pool) )
356- . build_arc ( )
357- . unwrap ( ) ;
358- SessionStateBuilder :: new ( )
359- . with_default_features ( )
360- . with_runtime_env ( runtime)
361- . build ( )
362- } ) ;
363+ . unwrap_or_else ( || create_session_state_for_optimize ( None , None ) ) ;
363364 let plan = create_merge_plan (
364365 & this. log_store ,
365366 this. optimize_type ,
0 commit comments