@@ -117,10 +117,10 @@ impl From<DataFusionError> for DeltaTableError {
117117/// Convenience trait for calling common methods on snapshot hierarchies
118118pub trait DataFusionMixins {
119119 /// The physical datafusion schema of a table
120- fn arrow_schema ( & self ) -> DeltaResult < ArrowSchemaRef > ;
120+ fn read_schema ( & self ) -> ArrowSchemaRef ;
121121
122122 /// Get the table schema as an [`ArrowSchemaRef`]
123- fn input_schema ( & self ) -> DeltaResult < ArrowSchemaRef > ;
123+ fn input_schema ( & self ) -> ArrowSchemaRef ;
124124
125125 /// Parse an expression string into a datafusion [`Expr`]
126126 fn parse_predicate_expression (
@@ -131,49 +131,77 @@ pub trait DataFusionMixins {
131131}
132132
133133impl DataFusionMixins for Snapshot {
134- fn arrow_schema ( & self ) -> DeltaResult < ArrowSchemaRef > {
135- _arrow_schema ( self . table_configuration ( ) , true )
134+ fn read_schema ( & self ) -> ArrowSchemaRef {
135+ _arrow_schema (
136+ self . arrow_schema ( ) ,
137+ self . metadata ( ) . partition_columns ( ) ,
138+ true ,
139+ )
136140 }
137141
138- fn input_schema ( & self ) -> DeltaResult < ArrowSchemaRef > {
139- _arrow_schema ( self . table_configuration ( ) , false )
142+ fn input_schema ( & self ) -> ArrowSchemaRef {
143+ _arrow_schema (
144+ self . arrow_schema ( ) ,
145+ self . metadata ( ) . partition_columns ( ) ,
146+ false ,
147+ )
140148 }
141149
142150 fn parse_predicate_expression (
143151 & self ,
144152 expr : impl AsRef < str > ,
145153 session : & impl Session ,
146154 ) -> DeltaResult < Expr > {
147- let schema = DFSchema :: try_from ( self . arrow_schema ( ) ? . as_ref ( ) . to_owned ( ) ) ?;
155+ let schema = DFSchema :: try_from ( self . read_schema ( ) . as_ref ( ) . to_owned ( ) ) ?;
148156 parse_predicate_expression ( & schema, expr, session)
149157 }
150158}
151159
152160impl DataFusionMixins for LogDataHandler < ' _ > {
153- fn arrow_schema ( & self ) -> DeltaResult < ArrowSchemaRef > {
154- _arrow_schema ( self . table_configuration ( ) , true )
161+ fn read_schema ( & self ) -> ArrowSchemaRef {
162+ _arrow_schema (
163+ Arc :: new (
164+ self . table_configuration ( )
165+ . schema ( )
166+ . as_ref ( )
167+ . try_into_arrow ( )
168+ . unwrap ( ) ,
169+ ) ,
170+ self . table_configuration ( ) . metadata ( ) . partition_columns ( ) ,
171+ true ,
172+ )
155173 }
156174
157- fn input_schema ( & self ) -> DeltaResult < ArrowSchemaRef > {
158- _arrow_schema ( self . table_configuration ( ) , false )
175+ fn input_schema ( & self ) -> ArrowSchemaRef {
176+ _arrow_schema (
177+ Arc :: new (
178+ self . table_configuration ( )
179+ . schema ( )
180+ . as_ref ( )
181+ . try_into_arrow ( )
182+ . unwrap ( ) ,
183+ ) ,
184+ self . table_configuration ( ) . metadata ( ) . partition_columns ( ) ,
185+ false ,
186+ )
159187 }
160188
161189 fn parse_predicate_expression (
162190 & self ,
163191 expr : impl AsRef < str > ,
164192 session : & impl Session ,
165193 ) -> DeltaResult < Expr > {
166- let schema = DFSchema :: try_from ( self . arrow_schema ( ) ? . as_ref ( ) . to_owned ( ) ) ?;
194+ let schema = DFSchema :: try_from ( self . read_schema ( ) . as_ref ( ) . to_owned ( ) ) ?;
167195 parse_predicate_expression ( & schema, expr, session)
168196 }
169197}
170198
171199impl DataFusionMixins for EagerSnapshot {
172- fn arrow_schema ( & self ) -> DeltaResult < ArrowSchemaRef > {
173- self . snapshot ( ) . arrow_schema ( )
200+ fn read_schema ( & self ) -> ArrowSchemaRef {
201+ self . snapshot ( ) . read_schema ( )
174202 }
175203
176- fn input_schema ( & self ) -> DeltaResult < ArrowSchemaRef > {
204+ fn input_schema ( & self ) -> ArrowSchemaRef {
177205 self . snapshot ( ) . input_schema ( )
178206 }
179207
@@ -187,22 +215,20 @@ impl DataFusionMixins for EagerSnapshot {
187215}
188216
189217fn _arrow_schema (
190- snapshot : & TableConfiguration ,
218+ schema : SchemaRef ,
219+ partition_columns : & [ String ] ,
191220 wrap_partitions : bool ,
192- ) -> DeltaResult < ArrowSchemaRef > {
193- let meta = snapshot. metadata ( ) ;
194- let schema = snapshot. schema ( ) ;
195-
221+ ) -> ArrowSchemaRef {
196222 let fields = schema
197223 . fields ( )
198- . filter ( |f| !meta. partition_columns ( ) . contains ( & f. name ( ) . to_string ( ) ) )
199- . map ( |f| f. try_into_arrow ( ) )
224+ . into_iter ( )
225+ . filter ( |f| !partition_columns. contains ( & f. name ( ) . to_string ( ) ) )
226+ . cloned ( )
200227 . chain (
201228 // We need stable order between logical and physical schemas, but the order of
202229 // partitioning columns is not always the same in the json schema and the array
203- meta. partition_columns ( ) . iter ( ) . map ( |partition_col| {
204- let f = schema. field ( partition_col) . unwrap ( ) ;
205- let field: Field = f. try_into_arrow ( ) ?;
230+ partition_columns. iter ( ) . map ( |partition_col| {
231+ let field = schema. field_with_name ( partition_col) . unwrap ( ) ;
206232 let corrected = if wrap_partitions {
207233 match field. data_type ( ) {
208234 // Only dictionary-encode types that may be large
@@ -218,12 +244,11 @@ fn _arrow_schema(
218244 } else {
219245 field. data_type ( ) . clone ( )
220246 } ;
221- Ok ( field. with_data_type ( corrected) )
247+ Arc :: new ( field. clone ( ) . with_data_type ( corrected) )
222248 } ) ,
223249 )
224- . collect :: < Result < Vec < Field > , _ > > ( ) ?;
225-
226- Ok ( Arc :: new ( ArrowSchema :: new ( fields) ) )
250+ . collect :: < Vec < _ > > ( ) ;
251+ Arc :: new ( ArrowSchema :: new ( fields) )
227252}
228253
229254pub ( crate ) fn files_matching_predicate < ' a > (
@@ -234,8 +259,8 @@ pub(crate) fn files_matching_predicate<'a>(
234259 ( !filters. is_empty ( ) ) . then_some ( conjunction ( filters. iter ( ) . cloned ( ) ) )
235260 {
236261 let expr = SessionContext :: new ( )
237- . create_physical_expr ( predicate, & log_data. arrow_schema ( ) ? . to_dfschema ( ) ?) ?;
238- let pruning_predicate = PruningPredicate :: try_new ( expr, log_data. arrow_schema ( ) ? ) ?;
262+ . create_physical_expr ( predicate, & log_data. read_schema ( ) . to_dfschema ( ) ?) ?;
263+ let pruning_predicate = PruningPredicate :: try_new ( expr, log_data. read_schema ( ) ) ?;
239264 let mask = pruning_predicate. prune ( & log_data) ?;
240265
241266 Ok ( Either :: Left ( log_data. into_iter ( ) . zip ( mask) . filter_map (
@@ -294,7 +319,7 @@ pub(crate) fn df_logical_schema(
294319) -> DeltaResult < SchemaRef > {
295320 let input_schema = match schema {
296321 Some ( schema) => schema,
297- None => snapshot. input_schema ( ) ? ,
322+ None => snapshot. input_schema ( ) ,
298323 } ;
299324 let table_partition_cols = snapshot. metadata ( ) . partition_columns ( ) ;
300325
0 commit comments