diff --git a/Cargo.lock b/Cargo.lock
index f371a1822b50..4e2827580ea5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2463,7 +2463,6 @@ dependencies = [
  "datafusion-functions",
  "datafusion-functions-aggregate-common",
  "datafusion-physical-expr-common",
- "half",
  "hashbrown 0.14.5",
  "indexmap 2.12.1",
  "insta",
diff --git a/datafusion-examples/examples/custom_data_source/csv_json_opener.rs b/datafusion-examples/examples/custom_data_source/csv_json_opener.rs
index 6fad8f4d5824..48b885839f13 100644
--- a/datafusion-examples/examples/custom_data_source/csv_json_opener.rs
+++ b/datafusion-examples/examples/custom_data_source/csv_json_opener.rs
@@ -64,22 +64,22 @@ async fn csv_opener() -> Result<()> {
         ..Default::default()
     };
 
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        Arc::new(CsvSource::new(Arc::clone(&schema)).with_csv_options(options.clone())),
-    )
-    .with_projection_indices(Some(vec![12, 0]))
-    .with_limit(Some(5))
-    .with_file(PartitionedFile::new(path.display().to_string(), 10))
-    .build();
-
-    let config = CsvSource::new(Arc::clone(&schema))
+    let source = CsvSource::new(Arc::clone(&schema))
         .with_csv_options(options)
         .with_comment(Some(b'#'))
-        .with_batch_size(8192)
-        .with_projection(&scan_config);
+        .with_batch_size(8192);
+
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+            .with_projection_indices(Some(vec![12, 0]))?
+            .with_limit(Some(5))
+            .with_file(PartitionedFile::new(path.display().to_string(), 10))
+            .build();
 
-    let opener = config.create_file_opener(object_store, &scan_config, 0);
+    let opener =
+        scan_config
+            .file_source()
+            .create_file_opener(object_store, &scan_config, 0)?;
 
     let mut result = vec![];
     let mut stream =
@@ -133,7 +133,7 @@ async fn json_opener() -> Result<()> {
         ObjectStoreUrl::local_filesystem(),
         Arc::new(JsonSource::new(schema)),
     )
-    .with_projection_indices(Some(vec![1, 0]))
+    .with_projection_indices(Some(vec![1, 0]))?
     .with_limit(Some(5))
     .with_file(PartitionedFile::new(path.to_string(), 10))
     .build();
diff --git a/datafusion-examples/examples/custom_data_source/default_column_values.rs b/datafusion-examples/examples/custom_data_source/default_column_values.rs
index 19d00e0a0d6f..9fe816502565 100644
--- a/datafusion-examples/examples/custom_data_source/default_column_values.rs
+++ b/datafusion-examples/examples/custom_data_source/default_column_values.rs
@@ -258,7 +258,7 @@ impl TableProvider for DefaultValueTableProvider {
             ObjectStoreUrl::parse("memory://")?,
             Arc::new(parquet_source),
         )
-        .with_projection_indices(projection.cloned())
+        .with_projection_indices(projection.cloned())?
         .with_limit(limit)
         .with_file_group(file_group)
         .with_expr_adapter(Some(Arc::new(DefaultValuePhysicalExprAdapterFactory) as _));
diff --git a/datafusion-examples/examples/data_io/parquet_advanced_index.rs b/datafusion-examples/examples/data_io/parquet_advanced_index.rs
index 304e490bd63b..caa3be2111e3 100644
--- a/datafusion-examples/examples/data_io/parquet_advanced_index.rs
+++ b/datafusion-examples/examples/data_io/parquet_advanced_index.rs
@@ -502,7 +502,7 @@ impl TableProvider for IndexTableProvider {
         );
         let file_scan_config = FileScanConfigBuilder::new(object_store_url, file_source)
             .with_limit(limit)
-            .with_projection_indices(projection.cloned())
+            .with_projection_indices(projection.cloned())?
             .with_file(partitioned_file)
             .build();
 
diff --git a/datafusion-examples/examples/data_io/parquet_index.rs b/datafusion-examples/examples/data_io/parquet_index.rs
index 7c708046f8a8..5f6fe3e2975a 100644
--- a/datafusion-examples/examples/data_io/parquet_index.rs
+++ b/datafusion-examples/examples/data_io/parquet_index.rs
@@ -247,7 +247,7 @@ impl TableProvider for IndexTableProvider {
             Arc::new(ParquetSource::new(self.schema()).with_predicate(predicate));
         let mut file_scan_config_builder =
             FileScanConfigBuilder::new(object_store_url, source)
-                .with_projection_indices(projection.cloned())
+                .with_projection_indices(projection.cloned())?
                 .with_limit(limit);
 
         // Transform to the format needed to pass to DataSourceExec
diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs
index 33d5c86bf88d..cbe538ae35cc 100644
--- a/datafusion/catalog-listing/src/table.rs
+++ b/datafusion/catalog-listing/src/table.rs
@@ -504,7 +504,7 @@ impl TableProvider for ListingTable {
                     .with_file_groups(partitioned_file_lists)
                     .with_constraints(self.constraints.clone())
                     .with_statistics(statistics)
-                    .with_projection_indices(projection)
+                    .with_projection_indices(projection)?
                     .with_limit(limit)
                     .with_output_ordering(output_ordering)
                     .with_expr_adapter(self.expr_adapter_factory.clone())
diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs
index cb8a6cf29541..6edf628e2d6d 100644
--- a/datafusion/core/src/dataframe/parquet.rs
+++ b/datafusion/core/src/dataframe/parquet.rs
@@ -150,7 +150,7 @@ mod tests {
         let plan = df.explain(false, false)?.collect().await?;
         // Filters all the way to Parquet
         let formatted = pretty::pretty_format_batches(&plan)?.to_string();
-        assert!(formatted.contains("FilterExec: id@0 = 1"));
+        assert!(formatted.contains("FilterExec: id@0 = 1"), "{formatted}");
 
         Ok(())
     }
diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs
index 7c55d452c4e1..2756ea21cd00 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -92,7 +92,7 @@ pub(crate) mod test_util {
                 )
                 .with_file_groups(file_groups)
                 .with_statistics(statistics)
-                .with_projection_indices(projection)
+                .with_projection_indices(projection)?
                 .with_limit(limit)
                 .build(),
             )
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index 1cf8c573acd9..b97ab0e9cacf 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -84,7 +84,7 @@ mod tests {
         let source = Arc::new(AvroSource::new(Arc::clone(&file_schema)));
         let conf = FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
             .with_file(meta.into())
-            .with_projection_indices(Some(vec![0, 1, 2]))
+            .with_projection_indices(Some(vec![0, 1, 2]))?
             .build();
 
         let source_exec = DataSourceExec::from_data_source(conf);
@@ -156,7 +156,7 @@ mod tests {
         let source = Arc::new(AvroSource::new(Arc::clone(&file_schema)));
         let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file(meta.into())
-            .with_projection_indices(projection)
+            .with_projection_indices(projection)?
             .build();
 
         let source_exec = DataSourceExec::from_data_source(conf);
@@ -231,7 +231,7 @@ mod tests {
         let conf = FileScanConfigBuilder::new(object_store_url, source)
             // select specific columns of the files as well as the partitioning
             // column which is supposed to be the last column in the table schema.
-            .with_projection_indices(projection)
+            .with_projection_indices(projection)?
             .with_file(partitioned_file)
             .build();
 
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index 1af6b330fd11..660be4faffbc 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -126,10 +126,10 @@ mod tests {
         let source =
             Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
         let config =
-            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
                 .with_file_compression_type(file_compression_type)
                 .with_newlines_in_values(false)
-                .with_projection_indices(Some(vec![0, 2, 4]))
+                .with_projection_indices(Some(vec![0, 2, 4]))?
                 .build();
 
         assert_eq!(13, config.file_schema().fields().len());
@@ -199,10 +199,10 @@ mod tests {
         let source =
             Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
         let config =
-            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
                 .with_newlines_in_values(false)
                 .with_file_compression_type(file_compression_type.to_owned())
-                .with_projection_indices(Some(vec![4, 0, 2]))
+                .with_projection_indices(Some(vec![4, 0, 2]))?
                 .build();
         assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
@@ -271,7 +271,7 @@ mod tests {
         let source =
             Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
         let config =
-            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
                 .with_newlines_in_values(false)
                 .with_file_compression_type(file_compression_type.to_owned())
                 .with_limit(Some(5))
@@ -342,7 +342,7 @@ mod tests {
         let source =
             Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
         let config =
-            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
                 .with_newlines_in_values(false)
                 .with_file_compression_type(file_compression_type.to_owned())
                 .with_limit(Some(5))
@@ -411,12 +411,12 @@ mod tests {
         let source =
             Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
         let config =
-            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
                 .with_newlines_in_values(false)
                 .with_file_compression_type(file_compression_type.to_owned())
                 // We should be able to project on the partition column
                 // Which is supposed to be after the file fields
-                .with_projection_indices(Some(vec![0, num_file_schema_fields]))
+                .with_projection_indices(Some(vec![0, num_file_schema_fields]))?
                 .build();
 
         // we don't have `/date=xx/` in the path but that is ok because
@@ -517,7 +517,7 @@ mod tests {
         let source =
             Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
         let config =
-            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
                 .with_newlines_in_values(false)
                 .with_file_compression_type(file_compression_type.to_owned())
                 .build();
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index f36708901a1d..3efea0330258 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -302,6 +302,7 @@ mod tests {
         let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file_groups(file_groups)
             .with_projection_indices(Some(vec![0, 2]))
+            .unwrap()
             .with_file_compression_type(file_compression_type.to_owned())
             .build();
         let exec = DataSourceExec::from_data_source(conf);
@@ -349,7 +350,7 @@ mod tests {
         let source = Arc::new(JsonSource::new(Arc::clone(&file_schema)));
         let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file_groups(file_groups)
-            .with_projection_indices(Some(vec![3, 0, 2]))
+            .with_projection_indices(Some(vec![3, 0, 2]))?
             .with_file_compression_type(file_compression_type.to_owned())
             .build();
         let exec = DataSourceExec::from_data_source(conf);
diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs
index b27dcf56e33c..90953e3f5df9 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet.rs
@@ -198,6 +198,7 @@ mod tests {
                 FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
                     .with_file_group(file_group)
                     .with_projection_indices(self.projection.clone())
+                    .unwrap()
                     .build();
             DataSourceExec::from_data_source(base_config)
         }
@@ -1664,6 +1665,7 @@ mod tests {
             .with_file(partitioned_file)
             // file has 10 cols so index 12 should be month and 13 should be day
             .with_projection_indices(Some(vec![0, 1, 2, 12, 13]))
+            .unwrap()
             .build();
 
         let parquet_exec = DataSourceExec::from_data_source(config);
diff --git a/datafusion/core/src/datasource/view_test.rs b/datafusion/core/src/datasource/view_test.rs
index 85ad9ff664ad..7227294ba1f1 100644
--- a/datafusion/core/src/datasource/view_test.rs
+++ b/datafusion/core/src/datasource/view_test.rs
@@ -358,7 +358,10 @@ mod tests {
             .to_string();
         assert!(formatted.contains("DataSourceExec: "));
         assert!(formatted.contains("file_type=parquet"));
-        assert!(formatted.contains("projection=[bool_col, int_col], limit=10"));
+        assert!(
+            formatted.contains("projection=[bool_col, int_col], limit=10"),
+            "{formatted}"
+        );
         Ok(())
     }
 
diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs
index 2bf9c89576e9..b0ff3eb3ae41 100644
--- a/datafusion/core/src/test/mod.rs
+++ b/datafusion/core/src/test/mod.rs
@@ -105,9 +105,10 @@ pub fn scan_partitioned_csv(
     };
     let table_schema = TableSchema::from_file_schema(schema);
     let source = Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
-    let config = FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
-        .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
-        .build();
+    let config =
+        FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)?)
+            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+            .build();
     Ok(DataSourceExec::from_data_source(config))
 }
 
diff --git a/datafusion/core/tests/parquet/schema_coercion.rs b/datafusion/core/tests/parquet/schema_coercion.rs
index 51e5242cbafd..e8666f07595e 100644
--- a/datafusion/core/tests/parquet/schema_coercion.rs
+++ b/datafusion/core/tests/parquet/schema_coercion.rs
@@ -122,6 +122,7 @@ async fn multi_parquet_coercion_projection() {
     )
     .with_file_group(file_group)
     .with_projection_indices(Some(vec![1, 0, 2]))
+    .unwrap()
     .build();
 
     let parquet_exec = DataSourceExec::from_data_source(config);
diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs
index 30fd86440566..c32f7b2d0ba9 100644
--- a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs
+++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs
@@ -141,14 +141,14 @@ impl FileSource for TestSource {
         _object_store: Arc<dyn ObjectStore>,
         _base_config: &FileScanConfig,
         _partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        Arc::new(TestOpener {
+    ) -> Result<Arc<dyn FileOpener>> {
+        Ok(Arc::new(TestOpener {
             batches: self.batches.clone(),
             batch_size: self.batch_size,
             schema: Arc::clone(&self.schema),
             projection: self.projection.clone(),
             predicate: self.predicate.clone(),
-        })
+        }))
     }
 
     fn filter(&self) -> Option<Arc<dyn PhysicalExpr>> {
@@ -166,13 +166,6 @@ impl FileSource for TestSource {
         })
     }
 
-    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(TestSource {
-            projection: config.projection_exprs.as_ref().map(|p| p.column_indices()),
-            ..self.clone()
-        })
-    }
-
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
         &self.metrics
     }
diff --git a/datafusion/core/tests/physical_optimizer/partition_statistics.rs b/datafusion/core/tests/physical_optimizer/partition_statistics.rs
index 7045cb8ea133..12c31b39452e 100644
--- a/datafusion/core/tests/physical_optimizer/partition_statistics.rs
+++ b/datafusion/core/tests/physical_optimizer/partition_statistics.rs
@@ -620,7 +620,7 @@ mod test {
         let plan_string = get_plan_string(&aggregate_exec_partial).swap_remove(0);
         assert_snapshot!(
             plan_string,
-            @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]"
+            @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)], ordering_mode=Sorted"
         );
 
         let p0_statistics = aggregate_exec_partial.partition_statistics(Some(0))?;
diff --git a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
index 9d39a80fb9df..80f4fbc3051c 100644
--- a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
@@ -397,6 +397,7 @@ fn create_simple_csv_exec() -> Arc<dyn ExecutionPlan> {
         })
         .with_file(PartitionedFile::new("x".to_string(), 100))
         .with_projection_indices(Some(vec![0, 1, 2, 3, 4]))
+        .unwrap()
         .build();
 
     DataSourceExec::from_data_source(config)
@@ -421,6 +422,7 @@ fn create_projecting_csv_exec() -> Arc<dyn ExecutionPlan> {
         })
         .with_file(PartitionedFile::new("x".to_string(), 100))
         .with_projection_indices(Some(vec![3, 2, 1]))
+        .unwrap()
         .build();
 
     DataSourceExec::from_data_source(config)
@@ -703,10 +705,7 @@ fn test_projection_after_projection() -> Result<()> {
 
     assert_snapshot!(
         actual,
-        @r"
-    ProjectionExec: expr=[b@1 as new_b, c@2 + e@4 as binary, b@1 as newest_b]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-    "
+        @"DataSourceExec: file_groups={1 group: [[x]]}, projection=[b@1 as new_b, c@2 + e@4 as binary, b@1 as newest_b], file_type=csv, has_header=false"
     );
 
     Ok(())
@@ -773,8 +772,7 @@ fn test_output_req_after_projection() -> Result<()> {
         actual,
         @r"
     OutputRequirementExec: order_by=[(b@2, asc), (c@0 + new_a@1, asc)], dist_by=HashPartitioned[[new_a@1, b@2]])
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
     "
     );
 
@@ -864,8 +862,7 @@ fn test_coalesce_partitions_after_projection() -> Result<()> {
         actual,
         @r"
     CoalescePartitionsExec
-      ProjectionExec: expr=[b@1 as b, a@0 as a_new, d@3 as d]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[b, a@0 as a_new, d], file_type=csv, has_header=false
     "
     );
 
@@ -922,8 +919,7 @@ fn test_filter_after_projection() -> Result<()> {
         actual,
         @r"
     FilterExec: b@1 - a_new@0 > d@2 - a_new@0
-      ProjectionExec: expr=[a@0 as a_new, b@1 as b, d@3 as d]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a@0 as a_new, b, d], file_type=csv, has_header=false
     "
     );
 
@@ -1025,10 +1021,8 @@ fn test_join_after_projection() -> Result<()> {
         actual,
         @r"
     SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b_from_left@1, c_from_right@1)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2
-      ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-      ProjectionExec: expr=[a@0 as a_from_right, c@2 as c_from_right]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a@0 as a_from_right, c@2 as c_from_right], file_type=csv, has_header=false
     "
     );
 
@@ -1410,8 +1404,7 @@ fn test_repartition_after_projection() -> Result<()> {
         actual,
         @r"
     RepartitionExec: partitioning=Hash([a@1, b_new@0, d_new@2], 6), input_partitions=1
-      ProjectionExec: expr=[b@1 as b_new, a@0 as a, d@3 as d_new]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[b@1 as b_new, a, d@3 as d_new], file_type=csv, has_header=false
     "
     );
 
@@ -1481,8 +1474,7 @@ fn test_sort_after_projection() -> Result<()> {
         actual,
         @r"
     SortExec: expr=[b@2 ASC, c@0 + new_a@1 ASC], preserve_partitioning=[false]
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
     "
     );
 
@@ -1535,8 +1527,7 @@ fn test_sort_preserving_after_projection() -> Result<()> {
         actual,
         @r"
     SortPreservingMergeExec: [b@2 ASC, c@0 + new_a@1 ASC]
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
     "
     );
 
@@ -1580,12 +1571,9 @@ fn test_union_after_projection() -> Result<()> {
         actual,
         @r"
     UnionExec
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
-      ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[c, a@0 as new_a, b], file_type=csv, has_header=false
     "
     );
 
@@ -1616,6 +1604,7 @@ fn partitioned_data_source() -> Arc<DataSourceExec> {
     )
     .with_file(PartitionedFile::new("x".to_string(), 100))
     .with_projection_indices(Some(vec![0, 1, 2]))
+    .unwrap()
     .build();
 
     DataSourceExec::from_data_source(config)
@@ -1653,10 +1642,7 @@ fn test_partition_col_projection_pushdown() -> Result<()> {
     let actual = after_optimize_string.trim();
     assert_snapshot!(
         actual,
-        @r"
-    ProjectionExec: expr=[string_col@1 as string_col, partition_col@2 as partition_col, int_col@0 as int_col]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[int_col, string_col, partition_col], file_type=csv, has_header=false
-    "
+        @"DataSourceExec: file_groups={1 group: [[x]]}, projection=[string_col, partition_col, int_col], file_type=csv, has_header=false"
     );
 
     Ok(())
@@ -1699,10 +1685,7 @@ fn test_partition_col_projection_pushdown_expr() -> Result<()> {
     let actual = after_optimize_string.trim();
     assert_snapshot!(
         actual,
-        @r"
-    ProjectionExec: expr=[string_col@1 as string_col, CAST(partition_col@2 AS Utf8View) as partition_col, int_col@0 as int_col]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[int_col, string_col, partition_col], file_type=csv, has_header=false
-    "
+        @"DataSourceExec: file_groups={1 group: [[x]]}, projection=[string_col, CAST(partition_col@2 AS Utf8View) as partition_col, int_col], file_type=csv, has_header=false"
     );
 
     Ok(())
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index 4a67046e933d..1656bdf66f2c 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -819,8 +819,7 @@ async fn test_physical_plan_display_indent_multi_children() {
             DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
         CoalesceBatchesExec: target_batch_size=4096
           RepartitionExec: partitioning=Hash([c2@0], 9000), input_partitions=1
-            ProjectionExec: expr=[c1@0 as c2]
-              DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
+            DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1@0 as c2], file_type=csv, has_header=true
     "
     );
 }
diff --git a/datafusion/datasource-arrow/src/file_format.rs b/datafusion/datasource-arrow/src/file_format.rs
index ef478e268890..7754748fbf86 100644
--- a/datafusion/datasource-arrow/src/file_format.rs
+++ b/datafusion/datasource-arrow/src/file_format.rs
@@ -208,7 +208,7 @@ impl FileFormat for ArrowFormat {
             conf.table_partition_cols().clone(),
         );
 
-        let source: Arc<dyn FileSource> =
+        let mut source: Arc<dyn FileSource> =
             match is_object_in_arrow_ipc_file_format(object_store, object_location).await
             {
                 Ok(true) => Arc::new(ArrowSource::new_file_source(table_schema)),
@@ -216,6 +216,13 @@ impl FileFormat for ArrowFormat {
                 Err(e) => Err(e)?,
             };
 
+        // Preserve projection from the original file source
+        if let Some(projection) = conf.file_source.projection() {
+            if let Some(new_source) = source.try_pushdown_projection(projection)? {
+                source = new_source;
+            }
+        }
+
         let config = FileScanConfigBuilder::from(conf)
             .with_source(source)
             .build();
diff --git a/datafusion/datasource-arrow/src/source.rs b/datafusion/datasource-arrow/src/source.rs
index 3132d8a10d5c..070e4fae6df6 100644
--- a/datafusion/datasource-arrow/src/source.rs
+++ b/datafusion/datasource-arrow/src/source.rs
@@ -34,22 +34,27 @@
 use std::sync::Arc;
 use std::{any::Any, io::Cursor};
 
-use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
+use datafusion_datasource::schema_adapter::{
+    DefaultSchemaAdapterFactory, SchemaAdapterFactory,
+};
 use datafusion_datasource::{as_file_source, TableSchema};
 
 use arrow::buffer::Buffer;
+use arrow::datatypes::SchemaRef;
 use arrow::ipc::reader::{FileDecoder, FileReader, StreamReader};
 use datafusion_common::error::Result;
 use datafusion_common::exec_datafusion_err;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
+use datafusion_datasource::projection::{ProjectionOpener, SplitProjection};
 use datafusion_datasource::PartitionedFile;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
+use datafusion_physical_plan::projection::ProjectionExprs;
 
 use datafusion_datasource::file_stream::FileOpenFuture;
 use datafusion_datasource::file_stream::FileOpener;
-use futures::StreamExt;
+use futures::{StreamExt, TryStreamExt};
 use itertools::Itertools;
 use object_store::{GetOptions, GetRange, GetResultPayload, ObjectStore};
 
@@ -66,6 +71,8 @@ enum ArrowFormat {
 pub(crate) struct ArrowStreamFileOpener {
     object_store: Arc<dyn ObjectStore>,
     projection: Option<Vec<usize>>,
+    projected_schema: Option<SchemaRef>,
+    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
 }
 
 impl FileOpener for ArrowStreamFileOpener {
@@ -77,27 +84,52 @@ impl FileOpener for ArrowStreamFileOpener {
         }
         let object_store = Arc::clone(&self.object_store);
         let projection = self.projection.clone();
+        let projected_schema = self.projected_schema.clone();
+        let schema_adapter_factory = self.schema_adapter_factory.clone();
+
         Ok(Box::pin(async move {
             let r = object_store
                 .get(&partitioned_file.object_meta.location)
                 .await?;
-            match r.payload {
+
+            let stream = match r.payload {
                 #[cfg(not(target_arch = "wasm32"))]
-                GetResultPayload::File(file, _) => Ok(futures::stream::iter(
+                GetResultPayload::File(file, _) => futures::stream::iter(
                     StreamReader::try_new(file.try_clone()?, projection.clone())?,
                 )
                 .map(|r| r.map_err(Into::into))
-                .boxed()),
+                .boxed(),
                 GetResultPayload::Stream(_) => {
                     let bytes = r.bytes().await?;
                     let cursor = Cursor::new(bytes);
-                    Ok(futures::stream::iter(StreamReader::try_new(
+                    futures::stream::iter(StreamReader::try_new(
                         cursor,
                         projection.clone(),
                     )?)
                     .map(|r| r.map_err(Into::into))
-                    .boxed())
+                    .boxed()
                 }
+            };
+
+            // If we have a schema adapter factory and projected schema, use them to normalize the schema
+            if let (Some(factory), Some(proj_schema)) =
+                (schema_adapter_factory, projected_schema)
+            {
+                Ok(stream
+                    .and_then(move |batch| {
+                        let factory = Arc::clone(&factory);
+                        let proj_schema = Arc::clone(&proj_schema);
+                        async move {
+                            let schema_adapter =
+                                factory.create_with_projected_schema(proj_schema);
+                            let (schema_mapper, _) =
+                                schema_adapter.map_schema(batch.schema().as_ref())?;
+                            schema_mapper.map_batch(batch)
+                        }
+                    })
+                    .boxed())
+            } else {
+                Ok(stream)
             }
         }))
     }
@@ -107,12 +139,17 @@ impl FileOpener for ArrowStreamFileOpener {
 pub(crate) struct ArrowFileOpener {
     object_store: Arc<dyn ObjectStore>,
     projection: Option<Vec<usize>>,
+    projected_schema: Option<SchemaRef>,
+    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
 }
 
 impl FileOpener for ArrowFileOpener {
     fn open(&self, partitioned_file: PartitionedFile) -> Result<FileOpenFuture> {
         let object_store = Arc::clone(&self.object_store);
         let projection = self.projection.clone();
+        let projected_schema = self.projected_schema.clone();
+        let schema_adapter_factory = self.schema_adapter_factory.clone();
+
         Ok(Box::pin(async move {
             let range = partitioned_file.range.clone();
             match range {
@@ -120,23 +157,44 @@ impl FileOpener for ArrowFileOpener {
                     let r = object_store
                         .get(&partitioned_file.object_meta.location)
                         .await?;
-                    match r.payload {
+                    let stream = match r.payload {
                         #[cfg(not(target_arch = "wasm32"))]
-                        GetResultPayload::File(file, _) => Ok(futures::stream::iter(
+                        GetResultPayload::File(file, _) => futures::stream::iter(
                             FileReader::try_new(file.try_clone()?, projection.clone())?,
                         )
                         .map(|r| r.map_err(Into::into))
-                        .boxed()),
+                        .boxed(),
                         GetResultPayload::Stream(_) => {
                             let bytes = r.bytes().await?;
                             let cursor = Cursor::new(bytes);
-                            Ok(futures::stream::iter(FileReader::try_new(
+                            futures::stream::iter(FileReader::try_new(
                                 cursor,
                                 projection.clone(),
                             )?)
                             .map(|r| r.map_err(Into::into))
-                            .boxed())
+                            .boxed()
                         }
+                    };
+
+                    // Apply schema adaptation if available
+                    if let (Some(factory), Some(proj_schema)) =
+                        (schema_adapter_factory, projected_schema)
+                    {
+                        Ok(stream
+                            .and_then(move |batch| {
+                                let factory = Arc::clone(&factory);
+                                let proj_schema = Arc::clone(&proj_schema);
+                                async move {
+                                    let schema_adapter =
+                                        factory.create_with_projected_schema(proj_schema);
+                                    let (schema_mapper, _) = schema_adapter
+                                        .map_schema(batch.schema().as_ref())?;
+                                    schema_mapper.map_batch(batch)
+                                }
+                            })
+                            .boxed())
+                    } else {
+                        Ok(stream)
                     }
                 }
                 Some(range) => {
@@ -226,7 +284,7 @@ impl FileOpener for ArrowFileOpener {
                         )
                         .await?;
 
-                    Ok(futures::stream::iter(
+                    let stream = futures::stream::iter(
                         recordbatches
                             .into_iter()
                             .zip(recordbatch_results)
@@ -237,7 +295,29 @@ impl FileOpener for ArrowFileOpener {
                             }),
                     )
                     .map(|r| r.map_err(Into::into))
-                    .boxed())
+                    .boxed();
+
+                    // Apply schema adaptation if available
+                    if let (Some(factory), Some(proj_schema)) =
+                        (schema_adapter_factory, projected_schema)
+                    {
+                        Ok(stream
+                            .and_then(move |batch| {
+                                let factory = Arc::clone(&factory);
+                                let proj_schema = Arc::clone(&proj_schema);
+                                async move {
+                                    let schema_adapter =
+                                        factory.create_with_projected_schema(proj_schema);
+                                    let (schema_mapper, projection) = schema_adapter
+                                        .map_schema(batch.schema().as_ref())?;
+                                    let batch = batch.project(&projection)?;
+                                    schema_mapper.map_batch(batch)
+                                }
+                            })
+                            .boxed())
+                    } else {
+                        Ok(stream)
+                    }
                 }
             }
         }))
@@ -248,29 +328,34 @@ impl FileOpener for ArrowFileOpener {
 #[derive(Clone)]
 pub struct ArrowSource {
     format: ArrowFormat,
-    table_schema: TableSchema,
     metrics: ExecutionPlanMetricsSet,
     schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    projection: SplitProjection,
+    table_schema: TableSchema,
 }
 
 impl ArrowSource {
     /// Creates an [`ArrowSource`] for file format
     pub fn new_file_source(table_schema: impl Into<TableSchema>) -> Self {
+        let table_schema = table_schema.into();
         Self {
             format: ArrowFormat::File,
-            table_schema: table_schema.into(),
             metrics: ExecutionPlanMetricsSet::new(),
             schema_adapter_factory: None,
+            projection: SplitProjection::unprojected(&table_schema),
+            table_schema,
         }
     }
 
     /// Creates an [`ArrowSource`] for stream format
     pub fn new_stream_file_source(table_schema: impl Into<TableSchema>) -> Self {
+        let table_schema = table_schema.into();
         Self {
             format: ArrowFormat::Stream,
-            table_schema: table_schema.into(),
             metrics: ExecutionPlanMetricsSet::new(),
             schema_adapter_factory: None,
+            projection: SplitProjection::unprojected(&table_schema),
+            table_schema,
         }
     }
 }
@@ -279,19 +364,43 @@ impl FileSource for ArrowSource {
     fn create_file_opener(
         &self,
         object_store: Arc<dyn ObjectStore>,
-        base_config: &FileScanConfig,
+        _base_config: &FileScanConfig,
         _partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        match self.format {
+    ) -> Result<Arc<dyn FileOpener>> {
+        let split_projection = self.projection.clone();
+        // For schema adaptation, we only use the file schema (not partition columns)
+        let projected_file_schema = SchemaRef::from(
+            self.table_schema
+                .file_schema()
+                .project(&split_projection.file_indices)?,
+        );
+
+        // Use provided schema adapter factory, or default to DefaultSchemaAdapterFactory
+        // This ensures schema normalization (removing metadata differences) happens during execution
+        let schema_adapter_factory = self
+            .schema_adapter_factory
+            .clone()
+            .unwrap_or_else(|| Arc::new(DefaultSchemaAdapterFactory));
+
+        let opener: Arc<dyn FileOpener> = match self.format {
             ArrowFormat::File => Arc::new(ArrowFileOpener {
                 object_store,
-                projection: base_config.file_column_projection_indices(),
+                projection: Some(split_projection.file_indices.clone()),
+                projected_schema: Some(Arc::clone(&projected_file_schema)),
+                schema_adapter_factory: Some(schema_adapter_factory),
             }),
             ArrowFormat::Stream => Arc::new(ArrowStreamFileOpener {
                 object_store,
-                projection: base_config.file_column_projection_indices(),
+                projection: Some(split_projection.file_indices.clone()),
+                projected_schema: Some(projected_file_schema),
+                schema_adapter_factory: Some(schema_adapter_factory),
             }),
-        }
+        };
+        ProjectionOpener::try_new(
+            split_projection,
+            opener,
+            self.table_schema.file_schema(),
+        )
     }
 
     fn as_any(&self) -> &dyn Any {
@@ -302,10 +411,6 @@ impl FileSource for ArrowSource {
         Arc::new(Self { ..self.clone() })
     }
 
-    fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
-    }
-
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
         &self.metrics
     }
@@ -381,6 +486,22 @@ impl FileSource for ArrowSource {
     fn table_schema(&self) -> &TableSchema {
         &self.table_schema
     }
+
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        source.projection = SplitProjection::new(
+            self.table_schema().file_schema(),
+            &source.projection.source.try_merge(projection)?,
+        );
+        Ok(Some(Arc::new(source)))
+    }
+
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        Some(&self.projection.source)
+    }
 }
 
 /// `FileOpener` wrapper for both Arrow IPC file and stream formats
@@ -408,6 +529,8 @@ impl ArrowOpener {
             inner: Arc::new(ArrowFileOpener {
                 object_store,
                 projection,
+                projected_schema: None,
+                schema_adapter_factory: None,
             }),
         }
     }
@@ -420,6 +543,8 @@ impl ArrowOpener {
             inner: Arc::new(ArrowStreamFileOpener {
                 object_store,
                 projection,
+                projected_schema: None,
+                schema_adapter_factory: None,
             }),
         }
     }
@@ -479,7 +604,7 @@ mod tests {
             )
             .build();
 
-            let file_opener = source.create_file_opener(object_store, &scan_config, 0);
+            let file_opener = source.create_file_opener(object_store, &scan_config, 0)?;
             let mut stream = file_opener.open(partitioned_file)?.await?;
 
             assert!(stream.next().await.is_some());
@@ -521,7 +646,7 @@ mod tests {
         )
         .build();
 
-        let file_opener = source.create_file_opener(object_store, &scan_config, 0);
+        let file_opener = source.create_file_opener(object_store, &scan_config, 0)?;
         let mut stream = file_opener.open(partitioned_file)?.await?;
 
         assert!(stream.next().await.is_some());
@@ -562,7 +687,7 @@ mod tests {
         )
         .build();
 
-        let file_opener = source.create_file_opener(object_store, &scan_config, 0);
+        let file_opener = source.create_file_opener(object_store, &scan_config, 0)?;
         let result = file_opener.open(partitioned_file);
         assert!(result.is_err());
 
@@ -615,6 +740,8 @@ mod tests {
         let opener = ArrowStreamFileOpener {
             object_store,
             projection: Some(vec![0]), // just the first column
+            projected_schema: None,
+            schema_adapter_factory: None,
         };
 
         let mut stream = opener.open(partitioned_file)?.await?;
diff --git a/datafusion/datasource-avro/src/file_format.rs b/datafusion/datasource-avro/src/file_format.rs
index 50aecf97b299..6df26a79f0e6 100644
--- a/datafusion/datasource-avro/src/file_format.rs
+++ b/datafusion/datasource-avro/src/file_format.rs
@@ -35,7 +35,7 @@ use datafusion_common::{Result, Statistics};
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_compression_type::FileCompressionType;
 use datafusion_datasource::file_format::{FileFormat, FileFormatFactory};
-use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
+use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_datasource::source::DataSourceExec;
 use datafusion_physical_plan::ExecutionPlan;
 use datafusion_session::Session;
@@ -154,11 +154,7 @@ impl FileFormat for AvroFormat {
         _state: &dyn Session,
         conf: FileScanConfig,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let file_schema = Arc::clone(conf.file_schema());
-        let config = FileScanConfigBuilder::from(conf)
-            .with_source(Arc::new(AvroSource::new(file_schema)))
-            .build();
-        Ok(DataSourceExec::from_data_source(config))
+        Ok(DataSourceExec::from_data_source(conf))
     }
 
     fn file_source(
diff --git a/datafusion/datasource-avro/src/source.rs b/datafusion/datasource-avro/src/source.rs
index e83113f40ea0..1ba3ad435041 100644
--- a/datafusion/datasource-avro/src/source.rs
+++ b/datafusion/datasource-avro/src/source.rs
@@ -26,10 +26,12 @@ use datafusion_common::error::Result;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_datasource::file_stream::FileOpener;
+use datafusion_datasource::projection::{ProjectionOpener, SplitProjection};
 use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
 use datafusion_datasource::TableSchema;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
+use datafusion_physical_plan::projection::ProjectionExprs;
 
 use object_store::ObjectStore;
 
@@ -38,7 +40,7 @@ use object_store::ObjectStore;
 pub struct AvroSource {
     table_schema: TableSchema,
     batch_size: Option<usize>,
-    projection: Option<Vec<String>>,
+    projection: SplitProjection,
     metrics: ExecutionPlanMetricsSet,
     schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
 }
@@ -46,21 +48,30 @@ pub struct AvroSource {
 impl AvroSource {
     /// Initialize an AvroSource with the provided schema
     pub fn new(table_schema: impl Into<TableSchema>) -> Self {
+        let table_schema = table_schema.into();
         Self {
-            table_schema: table_schema.into(),
+            projection: SplitProjection::unprojected(&table_schema),
+            table_schema,
             batch_size: None,
-            projection: None,
             metrics: ExecutionPlanMetricsSet::new(),
             schema_adapter_factory: None,
         }
     }
 
     fn open<R: std::io::Read>(&self, reader: R) -> Result<AvroReader<'static, R>> {
+        let file_schema = self.table_schema.file_schema();
+        let projection = Some(
+            self.projection
+                .file_indices
+                .iter()
+                .map(|&idx| file_schema.field(idx).name().clone())
+                .collect::<Vec<_>>(),
+        );
         AvroReader::try_new(
             reader,
             &Arc::clone(self.table_schema.file_schema()),
             self.batch_size.expect("Batch size must set before open"),
-            self.projection.clone().as_ref(),
+            projection.as_ref(),
         )
     }
 }
@@ -71,11 +82,17 @@ impl FileSource for AvroSource {
         object_store: Arc<dyn ObjectStore>,
         _base_config: &FileScanConfig,
         _partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        Arc::new(private::AvroOpener {
+    ) -> Result<Arc<dyn FileOpener>> {
+        let mut opener = Arc::new(private::AvroOpener {
             config: Arc::new(self.clone()),
             object_store,
-        })
+        }) as Arc<dyn FileOpener>;
+        opener = ProjectionOpener::try_new(
+            self.projection.clone(),
+            Arc::clone(&opener),
+            self.table_schema.file_schema(),
+        )?;
+        Ok(opener)
     }
 
     fn as_any(&self) -> &dyn Any {
@@ -92,10 +109,20 @@ impl FileSource for AvroSource {
         Arc::new(conf)
     }
 
-    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.projection = config.projected_file_column_names();
-        Arc::new(conf)
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        let new_projection = self.projection.source.try_merge(projection)?;
+        let split_projection =
+            SplitProjection::new(self.table_schema.file_schema(), &new_projection);
+        source.projection = split_projection;
+        Ok(Some(Arc::new(source)))
+    }
+
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        Some(&self.projection.source)
     }
 
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
diff --git a/datafusion/datasource-csv/src/mod.rs b/datafusion/datasource-csv/src/mod.rs
index 9af57c43103c..3038bfb7ef42 100644
--- a/datafusion/datasource-csv/src/mod.rs
+++ b/datafusion/datasource-csv/src/mod.rs
@@ -27,6 +27,7 @@ pub mod source;
 
 use std::sync::Arc;
 
+use datafusion_common::Result;
 use datafusion_datasource::file_groups::FileGroup;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::{file::FileSource, file_scan_config::FileScanConfig};
@@ -37,8 +38,10 @@ pub use file_format::*;
 pub fn partitioned_csv_config(
     file_groups: Vec<FileGroup>,
     file_source: Arc<dyn FileSource>,
-) -> FileScanConfig {
-    FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
-        .with_file_groups(file_groups)
-        .build()
+) -> Result<FileScanConfig> {
+    Ok(
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .with_file_groups(file_groups)
+            .build(),
+    )
 }
diff --git a/datafusion/datasource-csv/src/source.rs b/datafusion/datasource-csv/src/source.rs
index b68c55c79783..95f369962733 100644
--- a/datafusion/datasource-csv/src/source.rs
+++ b/datafusion/datasource-csv/src/source.rs
@@ -17,7 +17,9 @@
 
 //! Execution plan for reading CSV files
 
+use datafusion_datasource::projection::{ProjectionOpener, SplitProjection};
 use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
+use datafusion_physical_plan::projection::ProjectionExprs;
 use std::any::Any;
 use std::fmt;
 use std::io::{Read, Seek, SeekFrom};
@@ -88,7 +90,7 @@ pub struct CsvSource {
     options: CsvOptions,
     batch_size: Option<usize>,
     table_schema: TableSchema,
-    file_projection: Option<Vec<usize>>,
+    projection: SplitProjection,
     metrics: ExecutionPlanMetricsSet,
     schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
 }
@@ -96,11 +98,12 @@ pub struct CsvSource {
 impl CsvSource {
     /// Returns a [`CsvSource`]
     pub fn new(table_schema: impl Into<TableSchema>) -> Self {
+        let table_schema = table_schema.into();
         Self {
             options: CsvOptions::default(),
-            table_schema: table_schema.into(),
+            projection: SplitProjection::unprojected(&table_schema),
+            table_schema,
             batch_size: None,
-            file_projection: None,
             metrics: ExecutionPlanMetricsSet::new(),
             schema_adapter_factory: None,
         }
@@ -194,9 +197,7 @@ impl CsvSource {
         if let Some(terminator) = self.terminator() {
             builder = builder.with_terminator(terminator);
         }
-        if let Some(proj) = &self.file_projection {
-            builder = builder.with_projection(proj.clone());
-        }
+        builder = builder.with_projection(self.projection.file_indices.clone());
         if let Some(escape) = self.escape() {
             builder = builder.with_escape(escape)
         }
@@ -243,14 +244,20 @@ impl FileSource for CsvSource {
         &self,
         object_store: Arc<dyn ObjectStore>,
         base_config: &FileScanConfig,
-        partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        Arc::new(CsvOpener {
+        partition_index: usize,
+    ) -> Result<Arc<dyn FileOpener>> {
+        let mut opener = Arc::new(CsvOpener {
             config: Arc::new(self.clone()),
             file_compression_type: base_config.file_compression_type,
             object_store,
-            partition_index: partition,
-        })
+            partition_index,
+        }) as Arc<dyn FileOpener>;
+        opener = ProjectionOpener::try_new(
+            self.projection.clone(),
+            Arc::clone(&opener),
+            self.table_schema.file_schema(),
+        )?;
+        Ok(opener)
     }
 
     fn as_any(&self) -> &dyn Any {
@@ -267,10 +274,20 @@ impl FileSource for CsvSource {
         Arc::new(conf)
     }
 
-    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.file_projection = config.file_column_projection_indices();
-        Arc::new(conf)
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        let new_projection = self.projection.source.try_merge(projection)?;
+        let split_projection =
+            SplitProjection::new(self.table_schema.file_schema(), &new_projection);
+        source.projection = split_projection;
+        Ok(Some(Arc::new(source)))
+    }
+
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        Some(&self.projection.source)
     }
 
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
diff --git a/datafusion/datasource-json/src/file_format.rs b/datafusion/datasource-json/src/file_format.rs
index afb12e526271..27d1c6d960b9 100644
--- a/datafusion/datasource-json/src/file_format.rs
+++ b/datafusion/datasource-json/src/file_format.rs
@@ -254,16 +254,10 @@ impl FileFormat for JsonFormat {
         _state: &dyn Session,
         conf: FileScanConfig,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let table_schema = TableSchema::new(
-            Arc::clone(conf.file_schema()),
-            conf.table_partition_cols().clone(),
-        );
-        let source = Arc::new(JsonSource::new(table_schema));
         let conf = FileScanConfigBuilder::from(conf)
             .with_file_compression_type(FileCompressionType::from(
                 self.options.compression,
             ))
-            .with_source(source)
             .build();
         Ok(DataSourceExec::from_data_source(conf))
     }
diff --git a/datafusion/datasource-json/src/source.rs b/datafusion/datasource-json/src/source.rs
index fd382efc75d3..db070d2033f4 100644
--- a/datafusion/datasource-json/src/source.rs
+++ b/datafusion/datasource-json/src/source.rs
@@ -29,10 +29,12 @@ use datafusion_common_runtime::JoinSet;
 use datafusion_datasource::decoder::{deserialize_stream, DecoderDeserializer};
 use datafusion_datasource::file_compression_type::FileCompressionType;
 use datafusion_datasource::file_stream::{FileOpenFuture, FileOpener};
+use datafusion_datasource::projection::{ProjectionOpener, SplitProjection};
 use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
 use datafusion_datasource::{
     as_file_source, calculate_range, ListingTableUrl, PartitionedFile, RangeCalculation,
 };
+use datafusion_physical_plan::projection::ProjectionExprs;
 use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
 use arrow::json::ReaderBuilder;
@@ -79,13 +81,16 @@ pub struct JsonSource {
     batch_size: Option<usize>,
     metrics: ExecutionPlanMetricsSet,
     schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    projection: SplitProjection,
 }
 
 impl JsonSource {
     /// Initialize a JsonSource with the provided schema
     pub fn new(table_schema: impl Into<datafusion_datasource::TableSchema>) -> Self {
+        let table_schema = table_schema.into();
         Self {
-            table_schema: table_schema.into(),
+            projection: SplitProjection::unprojected(&table_schema),
+            table_schema,
             batch_size: None,
             metrics: ExecutionPlanMetricsSet::new(),
             schema_adapter_factory: None,
@@ -105,15 +110,29 @@ impl FileSource for JsonSource {
         object_store: Arc<dyn ObjectStore>,
         base_config: &FileScanConfig,
         _partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        Arc::new(JsonOpener {
+    ) -> Result<Arc<dyn FileOpener>> {
+        // Get the projected file schema for JsonOpener
+        let file_schema = self.table_schema.file_schema();
+        let projected_schema =
+            Arc::new(file_schema.project(&self.projection.file_indices)?);
+
+        let mut opener = Arc::new(JsonOpener {
             batch_size: self
                 .batch_size
                 .expect("Batch size must set before creating opener"),
-            projected_schema: base_config.projected_file_schema(),
+            projected_schema,
             file_compression_type: base_config.file_compression_type,
             object_store,
-        })
+        }) as Arc<dyn FileOpener>;
+
+        // Wrap with ProjectionOpener
+        opener = ProjectionOpener::try_new(
+            self.projection.clone(),
+            Arc::clone(&opener),
+            self.table_schema.file_schema(),
+        )?;
+
+        Ok(opener)
     }
 
     fn as_any(&self) -> &dyn Any {
@@ -130,8 +149,20 @@ impl FileSource for JsonSource {
         Arc::new(conf)
     }
 
-    fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        let new_projection = self.projection.source.try_merge(projection)?;
+        let split_projection =
+            SplitProjection::new(self.table_schema.file_schema(), &new_projection);
+        source.projection = split_projection;
+        Ok(Some(Arc::new(source)))
+    }
+
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        Some(&self.projection.source)
     }
 
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs
index 385bfb5472a5..a2ce16cd530d 100644
--- a/datafusion/datasource-parquet/src/file_format.rs
+++ b/datafusion/datasource-parquet/src/file_format.rs
@@ -460,12 +460,13 @@ impl FileFormat for ParquetFormat {
             metadata_size_hint = Some(metadata);
         }
 
-        let table_schema = TableSchema::new(
-            Arc::clone(conf.file_schema()),
-            conf.table_partition_cols().clone(),
-        );
-        let mut source = ParquetSource::new(table_schema)
-            .with_table_parquet_options(self.options.clone());
+        let mut source = conf
+            .file_source()
+            .as_any()
+            .downcast_ref::<ParquetSource>()
+            .cloned()
+            .ok_or_else(|| internal_datafusion_err!("Expected ParquetSource"))?;
+        source = source.with_table_parquet_options(self.options.clone());
 
         // Use the CachedParquetFileReaderFactory
         let metadata_cache = state.runtime_env().cache_manager.get_file_metadata_cache();
diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs
index ad7474af80c2..5ed74ecfd98f 100644
--- a/datafusion/datasource-parquet/src/source.rs
+++ b/datafusion/datasource-parquet/src/source.rs
@@ -31,6 +31,7 @@ use datafusion_common::config::ConfigOptions;
 use datafusion_common::config::EncryptionFactoryOptions;
 use datafusion_datasource::as_file_source;
 use datafusion_datasource::file_stream::FileOpener;
+use datafusion_datasource::projection::{ProjectionOpener, SplitProjection};
 use datafusion_datasource::schema_adapter::{
     DefaultSchemaAdapterFactory, SchemaAdapterFactory,
 };
@@ -51,6 +52,7 @@ use datafusion_physical_plan::filter_pushdown::{
 };
 use datafusion_physical_plan::metrics::Count;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
+use datafusion_physical_plan::projection::ProjectionExprs;
 use datafusion_physical_plan::DisplayFormatType;
 
 #[cfg(feature = "parquet_encryption")]
@@ -286,6 +288,8 @@ pub struct ParquetSource {
     pub(crate) batch_size: Option<usize>,
     /// Optional hint for the size of the parquet metadata
     pub(crate) metadata_size_hint: Option<usize>,
+    /// Projection information for column pushdown
+    pub(crate) projection: SplitProjection,
     #[cfg(feature = "parquet_encryption")]
     pub(crate) encryption_factory: Option<Arc<dyn EncryptionFactory>>,
 }
@@ -297,8 +301,10 @@ impl ParquetSource {
     /// Uses default `TableParquetOptions`.
     /// To set custom options, use [ParquetSource::with_table_parquet_options`].
     pub fn new(table_schema: impl Into<TableSchema>) -> Self {
+        let table_schema = table_schema.into();
         Self {
-            table_schema: table_schema.into(),
+            projection: SplitProjection::unprojected(&table_schema),
+            table_schema,
             table_parquet_options: TableParquetOptions::default(),
             metrics: ExecutionPlanMetricsSet::new(),
             predicate: None,
@@ -512,10 +518,8 @@ impl FileSource for ParquetSource {
         object_store: Arc<dyn ObjectStore>,
         base_config: &FileScanConfig,
         partition: usize,
-    ) -> Arc<dyn FileOpener> {
-        let projection = base_config
-            .file_column_projection_indices()
-            .unwrap_or_else(|| (0..base_config.file_schema().fields().len()).collect());
+    ) -> datafusion_common::Result<Arc<dyn FileOpener>> {
+        let split_projection = self.projection.clone();
 
         let (expr_adapter_factory, schema_adapter_factory) = match (
             base_config.expr_adapter_factory.as_ref(),
@@ -576,9 +580,9 @@ impl FileSource for ParquetSource {
             .as_ref()
             .map(|time_unit| parse_coerce_int96_string(time_unit.as_str()).unwrap());
 
-        Arc::new(ParquetOpener {
+        let mut opener = Arc::new(ParquetOpener {
             partition_index: partition,
-            projection: Arc::from(projection),
+            projection: Arc::from(split_projection.file_indices.clone()),
             batch_size: self
                 .batch_size
                 .expect("Batch size must set before creating ParquetOpener"),
@@ -602,7 +606,13 @@ impl FileSource for ParquetSource {
             #[cfg(feature = "parquet_encryption")]
             encryption_factory: self.get_encryption_factory_with_config(),
             max_predicate_cache_size: self.max_predicate_cache_size(),
-        })
+        }) as Arc<dyn FileOpener>;
+        opener = ProjectionOpener::try_new(
+            split_projection.clone(),
+            Arc::clone(&opener),
+            self.table_schema.file_schema(),
+        )?;
+        Ok(opener)
     }
 
     fn as_any(&self) -> &dyn Any {
@@ -623,8 +633,20 @@ impl FileSource for ParquetSource {
         Arc::new(conf)
     }
 
-    fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> datafusion_common::Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        let new_projection = self.projection.source.try_merge(projection)?;
+        let split_projection =
+            SplitProjection::new(self.table_schema.file_schema(), &new_projection);
+        source.projection = split_projection;
+        Ok(Some(Arc::new(source)))
+    }
+
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        Some(&self.projection.source)
     }
 
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
diff --git a/datafusion/datasource/src/file.rs b/datafusion/datasource/src/file.rs
index 9ec34b5dda0c..3668e0e4a77e 100644
--- a/datafusion/datasource/src/file.rs
+++ b/datafusion/datasource/src/file.rs
@@ -28,6 +28,7 @@ use crate::file_stream::FileOpener;
 use crate::schema_adapter::SchemaAdapterFactory;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::{not_impl_err, Result};
+use datafusion_physical_expr::projection::ProjectionExprs;
 use datafusion_physical_expr::{LexOrdering, PhysicalExpr};
 use datafusion_physical_plan::filter_pushdown::{FilterPushdownPropagation, PushedDown};
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
@@ -57,7 +58,7 @@ pub trait FileSource: Send + Sync {
         object_store: Arc<dyn ObjectStore>,
         base_config: &FileScanConfig,
         partition: usize,
-    ) -> Arc<dyn FileOpener>;
+    ) -> Result<Arc<dyn FileOpener>>;
     /// Any
     fn as_any(&self) -> &dyn Any;
     /// Returns the table schema for this file source.
@@ -66,12 +67,14 @@ pub trait FileSource: Send + Sync {
     fn table_schema(&self) -> &crate::table_schema::TableSchema;
     /// Initialize new type with batch size configuration
     fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource>;
-    /// Initialize new instance with projection information
-    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource>;
     /// Returns the filter expression that will be applied during the file scan.
     fn filter(&self) -> Option<Arc<dyn PhysicalExpr>> {
         None
     }
+    /// Return the projection that will be applied to the output stream on top of the table schema.
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        None
+    }
     /// Return execution plan metrics
     fn metrics(&self) -> &ExecutionPlanMetricsSet;
     /// String representation of file source such as "csv", "json", "parquet"
@@ -126,6 +129,35 @@ pub trait FileSource: Send + Sync {
         ))
     }
 
+    /// Try to push down a projection into a this FileSource.
+    ///
+    /// `FileSource` implementations that support projection pushdown should
+    /// override this method and return a new `FileSource` instance with the
+    /// projection incorporated.
+    ///
+    /// If a `FileSource` does accept a projection it is expected to handle
+    /// the projection in it's entirety, including partition columns.
+    /// For example, the `FileSource` may translate that projection into a
+    /// file format specific projection (e.g. Parquet can push down struct field access,
+    /// some other file formats like Vortex can push down computed expressions into un-decoded data)
+    /// and also need to handle partition column projection (generally done by replacing partition column
+    /// references with literal values derived from each files partition values).
+    ///
+    /// Not all FileSource's can handle complex expression pushdowns. For example,
+    /// a CSV file source may only support simple column selections. In such cases,
+    /// the `FileSource` can use [`SplitProjection`] and [`ProjectionOpener`]
+    /// to split the projection into a pushdownable part and a non-pushdownable part.
+    /// These helpers also handle partition column projection.
+    ///
+    /// [`SplitProjection`]: crate::projection::SplitProjection
+    /// [`ProjectionOpener`]: crate::projection::ProjectionOpener
+    fn try_pushdown_projection(
+        &self,
+        _projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        Ok(None)
+    }
+
     /// Set optional schema adapter factory.
     ///
     /// [`SchemaAdapterFactory`] allows user to specify how fields from the
diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs
index 4387996a2981..12654ee5b6af 100644
--- a/datafusion/datasource/src/file_scan_config.rs
+++ b/datafusion/datasource/src/file_scan_config.rs
@@ -27,43 +27,30 @@ use crate::{
     source::DataSource, statistics::MinMaxStatistics, PartitionedFile,
 };
 use arrow::datatypes::FieldRef;
-use arrow::{
-    array::{
-        ArrayData, ArrayRef, BufferBuilder, DictionaryArray, RecordBatch,
-        RecordBatchOptions,
-    },
-    buffer::Buffer,
-    datatypes::{ArrowNativeType, DataType, Schema, SchemaRef, UInt16Type},
-};
+use arrow::datatypes::{DataType, Schema, SchemaRef};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::{
-    exec_datafusion_err, exec_err, internal_datafusion_err, ColumnStatistics,
-    Constraints, Result, ScalarValue, Statistics,
+    internal_datafusion_err, internal_err, ColumnStatistics, Constraints, Result,
+    ScalarValue, Statistics,
 };
 use datafusion_execution::{
     object_store::ObjectStoreUrl, SendableRecordBatchStream, TaskContext,
 };
 use datafusion_expr::Operator;
-use datafusion_physical_expr::expressions::{BinaryExpr, Column};
+use datafusion_physical_expr::expressions::BinaryExpr;
 use datafusion_physical_expr::projection::ProjectionExprs;
 use datafusion_physical_expr::utils::reassign_expr_columns;
 use datafusion_physical_expr::{split_conjunction, EquivalenceProperties, Partitioning};
 use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
-use datafusion_physical_plan::projection::{
-    all_alias_free_columns, new_projections_for_columns, ProjectionExpr,
-};
 use datafusion_physical_plan::{
     display::{display_orderings, ProjectSchemaDisplay},
     filter_pushdown::FilterPushdownPropagation,
     metrics::ExecutionPlanMetricsSet,
     DisplayAs, DisplayFormatType,
 };
-use std::{
-    any::Any, borrow::Cow, collections::HashMap, fmt::Debug, fmt::Formatter,
-    fmt::Result as FmtResult, marker::PhantomData, sync::Arc,
-};
+use std::{any::Any, fmt::Debug, fmt::Formatter, fmt::Result as FmtResult, sync::Arc};
 
 use datafusion_physical_expr::equivalence::project_orderings;
 use datafusion_physical_plan::coop::cooperative;
@@ -81,7 +68,6 @@ use log::{debug, warn};
 /// # use std::sync::Arc;
 /// # use arrow::datatypes::{Field, Fields, DataType, Schema, SchemaRef};
 /// # use object_store::ObjectStore;
-/// # use datafusion_common::Statistics;
 /// # use datafusion_common::Result;
 /// # use datafusion_datasource::file::FileSource;
 /// # use datafusion_datasource::file_groups::FileGroup;
@@ -91,6 +77,7 @@ use log::{debug, warn};
 /// # use datafusion_datasource::source::DataSourceExec;
 /// # use datafusion_datasource::table_schema::TableSchema;
 /// # use datafusion_execution::object_store::ObjectStoreUrl;
+/// # use datafusion_physical_expr::projection::ProjectionExprs;
 /// # use datafusion_physical_plan::ExecutionPlan;
 /// # use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 /// # use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
@@ -107,15 +94,16 @@ use log::{debug, warn};
 /// #    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>
 /// # };
 /// # impl FileSource for ParquetSource {
-/// #  fn create_file_opener(&self, _: Arc<dyn ObjectStore>, _: &FileScanConfig, _: usize) -> Arc<dyn FileOpener> { unimplemented!() }
+/// #  fn create_file_opener(&self, _: Arc<dyn ObjectStore>, _: &FileScanConfig, _: usize) -> Result<Arc<dyn FileOpener>> { unimplemented!() }
 /// #  fn as_any(&self) -> &dyn Any { self  }
 /// #  fn table_schema(&self) -> &TableSchema { &self.table_schema }
 /// #  fn with_batch_size(&self, _: usize) -> Arc<dyn FileSource> { unimplemented!() }
-/// #  fn with_projection(&self, _: &FileScanConfig) -> Arc<dyn FileSource> { unimplemented!() }
 /// #  fn metrics(&self) -> &ExecutionPlanMetricsSet { unimplemented!() }
 /// #  fn file_type(&self) -> &str { "parquet" }
 /// #  fn with_schema_adapter_factory(&self, factory: Arc<dyn SchemaAdapterFactory>) -> Result<Arc<dyn FileSource>> { Ok(Arc::new(Self {table_schema: self.table_schema.clone(), schema_adapter_factory: Some(factory)} )) }
 /// #  fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> { self.schema_adapter_factory.clone() }
+/// #  // Note that this implementation drops the projection on the floor, it is not complete!
+/// #  fn try_pushdown_projection(&self, projection: &ProjectionExprs) -> Result<Option<Arc<dyn FileSource>>> { Ok(Some(Arc::new(self.clone()) as Arc<dyn FileSource>)) }
 /// #  }
 /// # impl ParquetSource {
 /// #  fn new(table_schema: impl Into<TableSchema>) -> Self { Self {table_schema: table_schema.into(), schema_adapter_factory: None} }
@@ -126,6 +114,7 @@ use log::{debug, warn};
 /// let config = FileScanConfigBuilder::new(object_store_url, file_source)
 ///   .with_limit(Some(1000))            // read only the first 1000 records
 ///   .with_projection_indices(Some(vec![2, 3])) // project columns 2 and 3
+///   .expect("Failed to push down projection")
 ///    // Read /tmp/file1.parquet with known size of 1234 bytes in a single group
 ///   .with_file(PartitionedFile::new("file1.parquet", 1234))
 ///   // Read /tmp/file2.parquet 56 bytes and /tmp/file3.parquet 78 bytes
@@ -166,12 +155,6 @@ pub struct FileScanConfig {
     pub file_groups: Vec<FileGroup>,
     /// Table constraints
     pub constraints: Constraints,
-    /// Physical expressions defining the projection to apply when reading data.
-    ///
-    /// Each expression in the projection can reference columns from both the file
-    /// schema and table partition columns. If `None`, all columns from the table
-    /// schema are projected.
-    pub projection_exprs: Option<ProjectionExprs>,
     /// The maximum number of records to read from this plan. If `None`,
     /// all records after filtering are returned.
     pub limit: Option<usize>,
@@ -239,6 +222,7 @@ pub struct FileScanConfig {
 ///     .with_limit(Some(1000))
 ///     // Project only the first column
 ///     .with_projection_indices(Some(vec![0]))
+///     .expect("Failed to push down projection")
 ///     // Add a file group with two files
 ///     .with_file_group(FileGroup::new(vec![
 ///         PartitionedFile::new("data/date=2024-01-01/file1.parquet", 1024),
@@ -256,7 +240,6 @@ pub struct FileScanConfigBuilder {
     object_store_url: ObjectStoreUrl,
     file_source: Arc<dyn FileSource>,
     limit: Option<usize>,
-    projection_indices: Option<Vec<usize>>,
     constraints: Option<Constraints>,
     file_groups: Vec<FileGroup>,
     statistics: Option<Statistics>,
@@ -287,7 +270,6 @@ impl FileScanConfigBuilder {
             file_compression_type: None,
             new_lines_in_values: None,
             limit: None,
-            projection_indices: None,
             constraints: None,
             batch_size: None,
             expr_adapter_factory: None,
@@ -321,15 +303,48 @@ impl FileScanConfigBuilder {
     /// Use [`Self::with_projection_indices`] instead. This method will be removed in a future release.
     #[deprecated(since = "51.0.0", note = "Use with_projection_indices instead")]
     pub fn with_projection(self, indices: Option<Vec<usize>>) -> Self {
-        self.with_projection_indices(indices)
+        match self.clone().with_projection_indices(indices) {
+            Ok(builder) => builder,
+            Err(e) => {
+                warn!("Failed to push down projection in FileScanConfigBuilder::with_projection: {e}");
+                self
+            }
+        }
     }
 
     /// Set the columns on which to project the data using column indices.
     ///
     /// Indexes that are higher than the number of columns of `file_schema` refer to `table_partition_cols`.
-    pub fn with_projection_indices(mut self, indices: Option<Vec<usize>>) -> Self {
-        self.projection_indices = indices;
-        self
+    pub fn with_projection_indices(
+        mut self,
+        indices: Option<Vec<usize>>,
+    ) -> Result<Self> {
+        let projection_exprs = indices.map(|indices| {
+            ProjectionExprs::from_indices(
+                &indices,
+                self.file_source.table_schema().table_schema(),
+            )
+        });
+        let Some(projection_exprs) = projection_exprs else {
+            return Ok(self);
+        };
+        let new_source = self
+            .file_source
+            .try_pushdown_projection(&projection_exprs)
+            .map_err(|e| {
+                internal_datafusion_err!(
+                    "Failed to push down projection in FileScanConfigBuilder::build: {e}"
+                )
+            })?;
+        if let Some(new_source) = new_source {
+            self.file_source = new_source;
+        } else {
+            internal_err!(
+                "FileSource {} does not support projection pushdown",
+                self.file_source.file_type()
+            )?;
+        }
+        Ok(self)
     }
 
     /// Set the table constraints
@@ -423,12 +438,14 @@ impl FileScanConfigBuilder {
     ///
     /// This method takes ownership of the builder and returns the constructed `FileScanConfig`.
     /// Any unset optional fields will use their default values.
+    ///
+    /// # Errors
+    /// Returns an error if projection pushdown fails or if schema operations fail.
     pub fn build(self) -> FileScanConfig {
         let Self {
             object_store_url,
             file_source,
             limit,
-            projection_indices,
             constraints,
             file_groups,
             statistics,
@@ -443,25 +460,14 @@ impl FileScanConfigBuilder {
         let statistics = statistics.unwrap_or_else(|| {
             Statistics::new_unknown(file_source.table_schema().table_schema())
         });
-
         let file_compression_type =
             file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED);
         let new_lines_in_values = new_lines_in_values.unwrap_or(false);
 
-        // Convert projection indices to ProjectionExprs using the final table schema
-        // (which now includes partition columns if they were added)
-        let projection_exprs = projection_indices.map(|indices| {
-            ProjectionExprs::from_indices(
-                &indices,
-                file_source.table_schema().table_schema(),
-            )
-        });
-
         FileScanConfig {
             object_store_url,
             file_source,
             limit,
-            projection_exprs,
             constraints,
             file_groups,
             output_ordering,
@@ -485,9 +491,6 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
             file_compression_type: Some(config.file_compression_type),
             new_lines_in_values: Some(config.new_lines_in_values),
             limit: config.limit,
-            projection_indices: config
-                .projection_exprs
-                .map(|p| p.ordered_column_indices()),
             constraints: Some(config.constraints),
             batch_size: config.batch_size,
             expr_adapter_factory: config.expr_adapter_factory,
@@ -506,12 +509,9 @@ impl DataSource for FileScanConfig {
             .batch_size
             .unwrap_or_else(|| context.session_config().batch_size());
 
-        let source = self
-            .file_source
-            .with_batch_size(batch_size)
-            .with_projection(self);
+        let source = self.file_source.with_batch_size(batch_size);
 
-        let opener = source.create_file_opener(object_store, self, partition);
+        let opener = source.create_file_opener(object_store, self, partition)?;
 
         let stream = FileStream::new(self, partition, opener, source.metrics())?;
         Ok(Box::pin(cooperative(stream)))
@@ -524,14 +524,35 @@ impl DataSource for FileScanConfig {
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                let schema = self.projected_schema();
+                let schema = self.projected_schema().map_err(|_| std::fmt::Error {})?;
                 let orderings = get_projected_output_ordering(self, &schema);
 
                 write!(f, "file_groups=")?;
                 FileGroupsDisplay(&self.file_groups).fmt_as(t, f)?;
 
                 if !schema.fields().is_empty() {
-                    write!(f, ", projection={}", ProjectSchemaDisplay(&schema))?;
+                    if let Some(projection) = self.file_source.projection() {
+                        // This matches what ProjectionExec does.
+                        // TODO: can we put this into ProjectionExprs so that it's shared code?
+                        let expr: Vec<String> = projection
+                            .as_ref()
+                            .iter()
+                            .map(|proj_expr| {
+                                if let Some(column) = proj_expr.expr.as_any().downcast_ref::<datafusion_physical_expr::expressions::Column>() {
+                                    if column.name() == proj_expr.alias {
+                                        column.name().to_string()
+                                    } else {
+                                        format!("{} as {}", proj_expr.expr, proj_expr.alias)
+                                    }
+                                } else {
+                                    format!("{} as {}", proj_expr.expr, proj_expr.alias)
+                                }
+                            })
+                            .collect();
+                        write!(f, ", projection=[{}]", expr.join(", "))?;
+                    } else {
+                        write!(f, ", projection={}", ProjectSchemaDisplay(&schema))?;
+                    }
                 }
 
                 if let Some(limit) = self.limit {
@@ -578,15 +599,17 @@ impl DataSource for FileScanConfig {
     }
 
     fn eq_properties(&self) -> EquivalenceProperties {
-        let (schema, constraints, _, orderings) = self.project();
-        let mut eq_properties =
-            EquivalenceProperties::new_with_orderings(Arc::clone(&schema), orderings)
-                .with_constraints(constraints);
+        let schema = self.file_source.table_schema().table_schema();
+        let mut eq_properties = EquivalenceProperties::new_with_orderings(
+            Arc::clone(schema),
+            self.output_ordering.clone(),
+        )
+        .with_constraints(self.constraints.clone());
+
         if let Some(filter) = self.file_source.filter() {
             // We need to remap column indexes to match the projected schema since that's what the equivalence properties deal with.
             // Note that this will *ignore* any non-projected columns: these don't factor into ordering / equivalence.
-            match Self::add_filter_equivalence_info(&filter, &mut eq_properties, &schema)
-            {
+            match Self::add_filter_equivalence_info(&filter, &mut eq_properties, schema) {
                 Ok(()) => {}
                 Err(e) => {
                     warn!("Failed to add filter equivalence info: {e}");
@@ -595,6 +618,24 @@ impl DataSource for FileScanConfig {
                 }
             }
         }
+
+        if let Some(projection) = self.file_source.projection() {
+            match (
+                projection.project_schema(schema),
+                projection.projection_mapping(schema),
+            ) {
+                (Ok(output_schema), Ok(mapping)) => {
+                    eq_properties =
+                        eq_properties.project(&mapping, Arc::new(output_schema));
+                }
+                (Err(e), _) | (_, Err(e)) => {
+                    warn!("Failed to project equivalence properties: {e}");
+                    #[cfg(debug_assertions)]
+                    panic!("Failed to project equivalence properties: {e}");
+                }
+            }
+        }
+
         eq_properties
     }
 
@@ -630,7 +671,7 @@ impl DataSource for FileScanConfig {
                 }
             }
             // If no statistics available for this partition, return unknown
-            Ok(Statistics::new_unknown(&self.projected_schema()))
+            Ok(Statistics::new_unknown(self.projected_schema()?.as_ref()))
         } else {
             // Return aggregate statistics across all partitions
             Ok(self.projected_stats())
@@ -654,43 +695,16 @@ impl DataSource for FileScanConfig {
 
     fn try_swapping_with_projection(
         &self,
-        projection: &[ProjectionExpr],
+        projection: &ProjectionExprs,
     ) -> Result<Option<Arc<dyn DataSource>>> {
-        // This process can be moved into CsvExec, but it would be an overlap of their responsibility.
-
-        // Must be all column references, with no table partition columns (which can not be projected)
-        let partitioned_columns_in_proj = projection.iter().any(|proj_expr| {
-            proj_expr
-                .expr
-                .as_any()
-                .downcast_ref::<Column>()
-                .map(|expr| expr.index() >= self.file_schema().fields().len())
-                .unwrap_or(false)
-        });
-
-        // If there is any non-column or alias-carrier expression, Projection should not be removed.
-        let no_aliases = all_alias_free_columns(projection);
-
-        Ok((no_aliases && !partitioned_columns_in_proj).then(|| {
-            let file_scan = self.clone();
-            let source = Arc::clone(&file_scan.file_source);
-            let new_projections = new_projections_for_columns(
-                projection,
-                &file_scan
-                    .projection_exprs
-                    .as_ref()
-                    .map(|p| p.ordered_column_indices())
-                    .unwrap_or_else(|| (0..self.file_schema().fields().len()).collect()),
-            );
-
-            Arc::new(
-                FileScanConfigBuilder::from(file_scan)
-                    // Assign projected statistics to source
-                    .with_projection_indices(Some(new_projections))
-                    .with_source(source)
-                    .build(),
-            ) as _
-        }))
+        match self.file_source.try_pushdown_projection(projection)? {
+            Some(new_source) => {
+                let mut new_file_scan_config = self.clone();
+                new_file_scan_config.file_source = new_source;
+                Ok(Some(Arc::new(new_file_scan_config) as Arc<dyn DataSource>))
+            }
+            None => Ok(None),
+        }
     }
 
     fn try_pushdown_filters(
@@ -698,15 +712,49 @@ impl DataSource for FileScanConfig {
         filters: Vec<Arc<dyn PhysicalExpr>>,
         config: &ConfigOptions,
     ) -> Result<FilterPushdownPropagation<Arc<dyn DataSource>>> {
-        let result = self.file_source.try_pushdown_filters(filters, config)?;
+        // Remap filter Column indices to match the table schema (file + partition columns).
+        // This is necessary because filters may have been created against a different schema
+        // (e.g., after projection pushdown) and need to be remapped to the table schema
+        // before being passed to the file source and ultimately serialized.
+        // For example, the filter being pushed down is `c1_c2 > 5` and it was created
+        // against the output schema of the this `DataSource` which has projection `c1 + c2 as c1_c2`.
+        // Thus we need to rewrite the filter back to `c1 + c2 > 5` before passing it to the file source.
+        let table_schema = self.file_source.table_schema().table_schema();
+        // If there's a projection with aliases, first map the filters back through
+        // the projection expressions before remapping to the table schema.
+        let filters_to_remap = if let Some(projection) = self.file_source.projection() {
+            use datafusion_physical_plan::projection::update_expr;
+            filters
+                .into_iter()
+                .map(|filter| {
+                    update_expr(&filter, projection.as_ref(), true)?.ok_or_else(|| {
+                        internal_datafusion_err!(
+                            "Failed to map filter expression through projection: {}",
+                            filter
+                        )
+                    })
+                })
+                .collect::<Result<Vec<_>>>()?
+        } else {
+            filters
+        };
+        // Now remap column indices to match the table schema.
+        let remapped_filters: Result<Vec<_>> = filters_to_remap
+            .into_iter()
+            .map(|filter| reassign_expr_columns(filter, table_schema.as_ref()))
+            .collect();
+        let remapped_filters = remapped_filters?;
+
+        let result = self
+            .file_source
+            .try_pushdown_filters(remapped_filters, config)?;
         match result.updated_node {
             Some(new_file_source) => {
-                let file_scan_config = FileScanConfigBuilder::from(self.clone())
-                    .with_source(new_file_source)
-                    .build();
+                let mut new_file_scan_config = self.clone();
+                new_file_scan_config.file_source = new_file_source;
                 Ok(FilterPushdownPropagation {
                     filters: result.filters,
-                    updated_node: Some(Arc::new(file_scan_config) as _),
+                    updated_node: Some(Arc::new(new_file_scan_config) as _),
                 })
             }
             None => {
@@ -732,7 +780,7 @@ impl FileScanConfig {
     }
 
     fn projection_indices(&self) -> Vec<usize> {
-        match &self.projection_exprs {
+        match self.file_source.projection() {
             Some(proj) => proj.ordered_column_indices(),
             None => (0..self.file_schema().fields().len()
                 + self.table_partition_cols().len())
@@ -777,26 +825,12 @@ impl FileScanConfig {
         }
     }
 
-    pub fn projected_schema(&self) -> Arc<Schema> {
-        let table_fields: Vec<_> = self
-            .projection_indices()
-            .into_iter()
-            .map(|idx| {
-                if idx < self.file_schema().fields().len() {
-                    self.file_schema().field(idx).clone()
-                } else {
-                    let partition_idx = idx - self.file_schema().fields().len();
-                    Arc::unwrap_or_clone(Arc::clone(
-                        &self.table_partition_cols()[partition_idx],
-                    ))
-                }
-            })
-            .collect();
-
-        Arc::new(Schema::new_with_metadata(
-            table_fields,
-            self.file_schema().metadata().clone(),
-        ))
+    pub fn projected_schema(&self) -> Result<Arc<Schema>> {
+        let schema = self.file_source.table_schema().table_schema();
+        match self.file_source.projection() {
+            Some(proj) => Ok(Arc::new(proj.project_schema(schema)?)),
+            None => Ok(Arc::clone(schema)),
+        }
     }
 
     fn add_filter_equivalence_info(
@@ -841,64 +875,8 @@ impl FileScanConfig {
         self.new_lines_in_values
     }
 
-    /// Project the schema, constraints, and the statistics on the given column indices
-    pub fn project(&self) -> (SchemaRef, Constraints, Statistics, Vec<LexOrdering>) {
-        if self.projection_exprs.is_none() && self.table_partition_cols().is_empty() {
-            return (
-                Arc::clone(self.file_schema()),
-                self.constraints.clone(),
-                self.statistics().clone(),
-                self.output_ordering.clone(),
-            );
-        }
-
-        let schema = self.projected_schema();
-        let constraints = self.projected_constraints();
-        let stats = self.projected_stats();
-
-        let output_ordering = get_projected_output_ordering(self, &schema);
-
-        (schema, constraints, stats, output_ordering)
-    }
-
-    pub fn projected_file_column_names(&self) -> Option<Vec<String>> {
-        let fields = self.file_schema().fields();
-
-        self.projection_exprs.as_ref().map(|p| {
-            let column_indices = p.ordered_column_indices();
-
-            column_indices
-                .iter()
-                .filter(|&&col_i| col_i < fields.len())
-                .map(|&col_i| self.file_schema().field(col_i).name())
-                .cloned()
-                .collect::<Vec<_>>()
-        })
-    }
-
-    /// Projects only file schema, ignoring partition columns
-    pub fn projected_file_schema(&self) -> SchemaRef {
-        let fields = self.file_column_projection_indices().map(|indices| {
-            indices
-                .iter()
-                .map(|col_idx| self.file_schema().field(*col_idx))
-                .cloned()
-                .collect::<Vec<_>>()
-        });
-
-        fields.map_or_else(
-            || Arc::clone(self.file_schema()),
-            |f| {
-                Arc::new(Schema::new_with_metadata(
-                    f,
-                    self.file_schema().metadata.clone(),
-                ))
-            },
-        )
-    }
-
     pub fn file_column_projection_indices(&self) -> Option<Vec<usize>> {
-        self.projection_exprs.as_ref().map(|p| {
+        self.file_source.projection().as_ref().map(|p| {
             p.ordered_column_indices()
                 .into_iter()
                 .filter(|&i| i < self.file_schema().fields().len())
@@ -1090,7 +1068,7 @@ impl Debug for FileScanConfig {
 
 impl DisplayAs for FileScanConfig {
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult {
-        let schema = self.projected_schema();
+        let schema = self.projected_schema().map_err(|_| std::fmt::Error {})?;
         let orderings = get_projected_output_ordering(self, &schema);
 
         write!(f, "file_groups=")?;
@@ -1114,252 +1092,6 @@ impl DisplayAs for FileScanConfig {
     }
 }
 
-/// A helper that projects partition columns into the file record batches.
-///
-/// One interesting trick is the usage of a cache for the key buffers of the partition column
-/// dictionaries. Indeed, the partition columns are constant, so the dictionaries that represent them
-/// have all their keys equal to 0. This enables us to re-use the same "all-zero" buffer across batches,
-/// which makes the space consumption of the partition columns O(batch_size) instead of O(record_count).
-pub struct PartitionColumnProjector {
-    /// An Arrow buffer initialized to zeros that represents the key array of all partition
-    /// columns (partition columns are materialized by dictionary arrays with only one
-    /// value in the dictionary, thus all the keys are equal to zero).
-    key_buffer_cache: ZeroBufferGenerators,
-    /// Mapping between the indexes in the list of partition columns and the target
-    /// schema. Sorted by index in the target schema so that we can iterate on it to
-    /// insert the partition columns in the target record batch.
-    projected_partition_indexes: Vec<(usize, usize)>,
-    /// The schema of the table once the projection was applied.
-    projected_schema: SchemaRef,
-}
-
-impl PartitionColumnProjector {
-    // Create a projector to insert the partitioning columns into batches read from files
-    // - `projected_schema`: the target schema with both file and partitioning columns
-    // - `table_partition_cols`: all the partitioning column names
-    pub fn new(projected_schema: SchemaRef, table_partition_cols: &[String]) -> Self {
-        let mut idx_map = HashMap::new();
-        for (partition_idx, partition_name) in table_partition_cols.iter().enumerate() {
-            if let Ok(schema_idx) = projected_schema.index_of(partition_name) {
-                idx_map.insert(partition_idx, schema_idx);
-            }
-        }
-
-        let mut projected_partition_indexes: Vec<_> = idx_map.into_iter().collect();
-        projected_partition_indexes.sort_by(|(_, a), (_, b)| a.cmp(b));
-
-        Self {
-            projected_partition_indexes,
-            key_buffer_cache: Default::default(),
-            projected_schema,
-        }
-    }
-
-    // Transform the batch read from the file by inserting the partitioning columns
-    // to the right positions as deduced from `projected_schema`
-    // - `file_batch`: batch read from the file, with internal projection applied
-    // - `partition_values`: the list of partition values, one for each partition column
-    #[expect(clippy::needless_pass_by_value)]
-    pub fn project(
-        &mut self,
-        file_batch: RecordBatch,
-        partition_values: &[ScalarValue],
-    ) -> Result<RecordBatch> {
-        let expected_cols =
-            self.projected_schema.fields().len() - self.projected_partition_indexes.len();
-
-        if file_batch.columns().len() != expected_cols {
-            return exec_err!(
-                "Unexpected batch schema from file, expected {} cols but got {}",
-                expected_cols,
-                file_batch.columns().len()
-            );
-        }
-
-        let mut cols = file_batch.columns().to_vec();
-        for &(pidx, sidx) in &self.projected_partition_indexes {
-            let p_value = partition_values.get(pidx).ok_or_else(|| {
-                exec_datafusion_err!("Invalid partitioning found on disk")
-            })?;
-
-            let mut partition_value = Cow::Borrowed(p_value);
-
-            // check if user forgot to dict-encode the partition value
-            let field = self.projected_schema.field(sidx);
-            let expected_data_type = field.data_type();
-            let actual_data_type = partition_value.data_type();
-            if let DataType::Dictionary(key_type, _) = expected_data_type {
-                if !matches!(actual_data_type, DataType::Dictionary(_, _)) {
-                    warn!("Partition value for column {} was not dictionary-encoded, applied auto-fix.", field.name());
-                    partition_value = Cow::Owned(ScalarValue::Dictionary(
-                        key_type.clone(),
-                        Box::new(partition_value.as_ref().clone()),
-                    ));
-                }
-            }
-
-            cols.insert(
-                sidx,
-                create_output_array(
-                    &mut self.key_buffer_cache,
-                    partition_value.as_ref(),
-                    file_batch.num_rows(),
-                )?,
-            )
-        }
-
-        RecordBatch::try_new_with_options(
-            Arc::clone(&self.projected_schema),
-            cols,
-            &RecordBatchOptions::new().with_row_count(Some(file_batch.num_rows())),
-        )
-        .map_err(Into::into)
-    }
-}
-
-#[derive(Debug, Default)]
-struct ZeroBufferGenerators {
-    gen_i8: ZeroBufferGenerator<i8>,
-    gen_i16: ZeroBufferGenerator<i16>,
-    gen_i32: ZeroBufferGenerator<i32>,
-    gen_i64: ZeroBufferGenerator<i64>,
-    gen_u8: ZeroBufferGenerator<u8>,
-    gen_u16: ZeroBufferGenerator<u16>,
-    gen_u32: ZeroBufferGenerator<u32>,
-    gen_u64: ZeroBufferGenerator<u64>,
-}
-
-/// Generate a arrow [`Buffer`] that contains zero values.
-#[derive(Debug, Default)]
-struct ZeroBufferGenerator<T>
-where
-    T: ArrowNativeType,
-{
-    cache: Option<Buffer>,
-    _t: PhantomData<T>,
-}
-
-impl<T> ZeroBufferGenerator<T>
-where
-    T: ArrowNativeType,
-{
-    const SIZE: usize = size_of::<T>();
-
-    fn get_buffer(&mut self, n_vals: usize) -> Buffer {
-        match &mut self.cache {
-            Some(buf) if buf.len() >= n_vals * Self::SIZE => {
-                buf.slice_with_length(0, n_vals * Self::SIZE)
-            }
-            _ => {
-                let mut key_buffer_builder = BufferBuilder::<T>::new(n_vals);
-                key_buffer_builder.advance(n_vals); // keys are all 0
-                self.cache.insert(key_buffer_builder.finish()).clone()
-            }
-        }
-    }
-}
-
-fn create_dict_array<T>(
-    buffer_gen: &mut ZeroBufferGenerator<T>,
-    dict_val: &ScalarValue,
-    len: usize,
-    data_type: DataType,
-) -> Result<ArrayRef>
-where
-    T: ArrowNativeType,
-{
-    let dict_vals = dict_val.to_array()?;
-
-    let sliced_key_buffer = buffer_gen.get_buffer(len);
-
-    // assemble pieces together
-    let mut builder = ArrayData::builder(data_type)
-        .len(len)
-        .add_buffer(sliced_key_buffer);
-    builder = builder.add_child_data(dict_vals.to_data());
-    Ok(Arc::new(DictionaryArray::<UInt16Type>::from(
-        builder.build().unwrap(),
-    )))
-}
-
-fn create_output_array(
-    key_buffer_cache: &mut ZeroBufferGenerators,
-    val: &ScalarValue,
-    len: usize,
-) -> Result<ArrayRef> {
-    if let ScalarValue::Dictionary(key_type, dict_val) = &val {
-        match key_type.as_ref() {
-            DataType::Int8 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_i8,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::Int16 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_i16,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::Int32 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_i32,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::Int64 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_i64,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::UInt8 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_u8,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::UInt16 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_u16,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::UInt32 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_u32,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            DataType::UInt64 => {
-                return create_dict_array(
-                    &mut key_buffer_cache.gen_u64,
-                    dict_val,
-                    len,
-                    val.data_type(),
-                );
-            }
-            _ => {}
-        }
-    }
-
-    val.to_array_of_size(len)
-}
-
 /// The various listing tables does not attempt to read all files
 /// concurrently, instead they will read files in sequence within a
 /// partition.  This is an important property as it allows plans to
@@ -1436,7 +1168,8 @@ fn get_projected_output_ordering(
             }
 
             let indices = base_config
-                .projection_exprs
+                .file_source
+                .projection()
                 .as_ref()
                 .map(|p| p.ordered_column_indices());
 
@@ -1492,6 +1225,8 @@ pub fn wrap_partition_value_in_dict(val: ScalarValue) -> ScalarValue {
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+
     use super::*;
     use crate::test_util::col;
     use crate::TableSchema;
@@ -1500,53 +1235,15 @@ mod tests {
         verify_sort_integrity,
     };
 
-    use arrow::array::{Int32Array, RecordBatch};
     use arrow::datatypes::Field;
+    use datafusion_common::internal_err;
     use datafusion_common::stats::Precision;
-    use datafusion_common::{assert_batches_eq, internal_err};
     use datafusion_expr::{Operator, SortExpr};
     use datafusion_physical_expr::create_physical_sort_expr;
     use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal};
+    use datafusion_physical_expr::projection::ProjectionExpr;
     use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
-    /// Returns the column names on the schema
-    pub fn columns(schema: &Schema) -> Vec<String> {
-        schema.fields().iter().map(|f| f.name().clone()).collect()
-    }
-
-    #[test]
-    fn physical_plan_config_no_projection() {
-        let file_schema = aggr_test_schema();
-        let conf = config_for_projection(
-            Arc::clone(&file_schema),
-            None,
-            Statistics::new_unknown(&file_schema),
-            to_partition_cols(vec![(
-                "date".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            )]),
-        );
-
-        let (proj_schema, _, proj_statistics, _) = conf.project();
-        assert_eq!(proj_schema.fields().len(), file_schema.fields().len() + 1);
-        assert_eq!(
-            proj_schema.field(file_schema.fields().len()).name(),
-            "date",
-            "partition columns are the last columns"
-        );
-        assert_eq!(
-            proj_statistics.column_statistics.len(),
-            file_schema.fields().len() + 1
-        );
-        // TODO implement tests for partition column statistics once implemented
-
-        let col_names = conf.projected_file_column_names();
-        assert_eq!(col_names, None);
-
-        let col_indices = conf.file_column_projection_indices();
-        assert_eq!(col_indices, None);
-    }
-
     #[test]
     fn physical_plan_config_no_projection_tab_cols_as_field() {
         let file_schema = aggr_test_schema();
@@ -1567,7 +1264,7 @@ mod tests {
         );
 
         // verify the proj_schema includes the last column and exactly the same the field it is defined
-        let proj_schema = conf.projected_schema();
+        let proj_schema = conf.projected_schema().unwrap();
         assert_eq!(proj_schema.fields().len(), file_schema.fields().len() + 1);
         assert_eq!(
             *proj_schema.field(file_schema.fields().len()),
@@ -1576,223 +1273,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn physical_plan_config_with_projection() {
-        let file_schema = aggr_test_schema();
-        let conf = config_for_projection(
-            Arc::clone(&file_schema),
-            Some(vec![file_schema.fields().len(), 0]),
-            Statistics {
-                num_rows: Precision::Inexact(10),
-                // assign the column index to distinct_count to help assert
-                // the source statistic after the projection
-                column_statistics: (0..file_schema.fields().len())
-                    .map(|i| ColumnStatistics {
-                        distinct_count: Precision::Inexact(i),
-                        ..Default::default()
-                    })
-                    .collect(),
-                total_byte_size: Precision::Absent,
-            },
-            to_partition_cols(vec![(
-                "date".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            )]),
-        );
-
-        let (proj_schema, _, proj_statistics, _) = conf.project();
-        assert_eq!(
-            columns(&proj_schema),
-            vec!["date".to_owned(), "c1".to_owned()]
-        );
-        let proj_stat_cols = proj_statistics.column_statistics;
-        assert_eq!(proj_stat_cols.len(), 2);
-        // TODO implement tests for proj_stat_cols[0] once partition column
-        // statistics are implemented
-        assert_eq!(proj_stat_cols[1].distinct_count, Precision::Inexact(0));
-
-        let col_names = conf.projected_file_column_names();
-        assert_eq!(col_names, Some(vec!["c1".to_owned()]));
-
-        let col_indices = conf.file_column_projection_indices();
-        assert_eq!(col_indices, Some(vec![0]));
-    }
-
-    #[test]
-    fn partition_column_projector() {
-        let file_batch = build_table_i32(
-            ("a", &vec![0, 1, 2]),
-            ("b", &vec![-2, -1, 0]),
-            ("c", &vec![10, 11, 12]),
-        );
-        let partition_cols = vec![
-            (
-                "year".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            ),
-            (
-                "month".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            ),
-            (
-                "day".to_owned(),
-                wrap_partition_type_in_dict(DataType::Utf8),
-            ),
-        ];
-        // create a projected schema
-        let statistics = Statistics {
-            num_rows: Precision::Inexact(3),
-            total_byte_size: Precision::Absent,
-            column_statistics: Statistics::unknown_column(&file_batch.schema()),
-        };
-
-        let conf = config_for_projection(
-            file_batch.schema(),
-            // keep all cols from file and 2 from partitioning
-            Some(vec![
-                0,
-                1,
-                2,
-                file_batch.schema().fields().len(),
-                file_batch.schema().fields().len() + 2,
-            ]),
-            statistics.clone(),
-            to_partition_cols(partition_cols.clone()),
-        );
-
-        let source_statistics = conf.statistics();
-        let conf_stats = conf.partition_statistics(None).unwrap();
-
-        // projection should be reflected in the file source statistics
-        assert_eq!(conf_stats.num_rows, Precision::Inexact(3));
-
-        // 3 original statistics + 2 partition statistics
-        assert_eq!(conf_stats.column_statistics.len(), 5);
-
-        // file statics should not be modified
-        assert_eq!(source_statistics, statistics);
-        assert_eq!(source_statistics.column_statistics.len(), 3);
-
-        let proj_schema = conf.projected_schema();
-        // created a projector for that projected schema
-        let mut proj = PartitionColumnProjector::new(
-            proj_schema,
-            &partition_cols
-                .iter()
-                .map(|x| x.0.clone())
-                .collect::<Vec<_>>(),
-        );
-
-        // project first batch
-        let projected_batch = proj
-            .project(
-                // file_batch is ok here because we kept all the file cols in the projection
-                file_batch,
-                &[
-                    wrap_partition_value_in_dict(ScalarValue::from("2021")),
-                    wrap_partition_value_in_dict(ScalarValue::from("10")),
-                    wrap_partition_value_in_dict(ScalarValue::from("26")),
-                ],
-            )
-            .expect("Projection of partition columns into record batch failed");
-        let expected = [
-            "+---+----+----+------+-----+",
-            "| a | b  | c  | year | day |",
-            "+---+----+----+------+-----+",
-            "| 0 | -2 | 10 | 2021 | 26  |",
-            "| 1 | -1 | 11 | 2021 | 26  |",
-            "| 2 | 0  | 12 | 2021 | 26  |",
-            "+---+----+----+------+-----+",
-        ];
-        assert_batches_eq!(expected, &[projected_batch]);
-
-        // project another batch that is larger than the previous one
-        let file_batch = build_table_i32(
-            ("a", &vec![5, 6, 7, 8, 9]),
-            ("b", &vec![-10, -9, -8, -7, -6]),
-            ("c", &vec![12, 13, 14, 15, 16]),
-        );
-        let projected_batch = proj
-            .project(
-                // file_batch is ok here because we kept all the file cols in the projection
-                file_batch,
-                &[
-                    wrap_partition_value_in_dict(ScalarValue::from("2021")),
-                    wrap_partition_value_in_dict(ScalarValue::from("10")),
-                    wrap_partition_value_in_dict(ScalarValue::from("27")),
-                ],
-            )
-            .expect("Projection of partition columns into record batch failed");
-        let expected = [
-            "+---+-----+----+------+-----+",
-            "| a | b   | c  | year | day |",
-            "+---+-----+----+------+-----+",
-            "| 5 | -10 | 12 | 2021 | 27  |",
-            "| 6 | -9  | 13 | 2021 | 27  |",
-            "| 7 | -8  | 14 | 2021 | 27  |",
-            "| 8 | -7  | 15 | 2021 | 27  |",
-            "| 9 | -6  | 16 | 2021 | 27  |",
-            "+---+-----+----+------+-----+",
-        ];
-        assert_batches_eq!(expected, &[projected_batch]);
-
-        // project another batch that is smaller than the previous one
-        let file_batch = build_table_i32(
-            ("a", &vec![0, 1, 3]),
-            ("b", &vec![2, 3, 4]),
-            ("c", &vec![4, 5, 6]),
-        );
-        let projected_batch = proj
-            .project(
-                // file_batch is ok here because we kept all the file cols in the projection
-                file_batch,
-                &[
-                    wrap_partition_value_in_dict(ScalarValue::from("2021")),
-                    wrap_partition_value_in_dict(ScalarValue::from("10")),
-                    wrap_partition_value_in_dict(ScalarValue::from("28")),
-                ],
-            )
-            .expect("Projection of partition columns into record batch failed");
-        let expected = [
-            "+---+---+---+------+-----+",
-            "| a | b | c | year | day |",
-            "+---+---+---+------+-----+",
-            "| 0 | 2 | 4 | 2021 | 28  |",
-            "| 1 | 3 | 5 | 2021 | 28  |",
-            "| 3 | 4 | 6 | 2021 | 28  |",
-            "+---+---+---+------+-----+",
-        ];
-        assert_batches_eq!(expected, &[projected_batch]);
-
-        // forgot to dictionary-wrap the scalar value
-        let file_batch = build_table_i32(
-            ("a", &vec![0, 1, 2]),
-            ("b", &vec![-2, -1, 0]),
-            ("c", &vec![10, 11, 12]),
-        );
-        let projected_batch = proj
-            .project(
-                // file_batch is ok here because we kept all the file cols in the projection
-                file_batch,
-                &[
-                    ScalarValue::from("2021"),
-                    ScalarValue::from("10"),
-                    ScalarValue::from("26"),
-                ],
-            )
-            .expect("Projection of partition columns into record batch failed");
-        let expected = [
-            "+---+----+----+------+-----+",
-            "| a | b  | c  | year | day |",
-            "+---+----+----+------+-----+",
-            "| 0 | -2 | 10 | 2021 | 26  |",
-            "| 1 | -1 | 11 | 2021 | 26  |",
-            "| 2 | 0  | 12 | 2021 | 26  |",
-            "+---+----+----+------+-----+",
-        ];
-        assert_batches_eq!(expected, &[projected_batch]);
-    }
-
     #[test]
     fn test_projected_file_schema_with_partition_col() {
         let schema = aggr_test_schema();
@@ -1808,13 +1288,13 @@ mod tests {
         ];
 
         // Projected file schema for config with projection including partition column
-        let projection = config_for_projection(
+        let config = config_for_projection(
             schema.clone(),
             Some(vec![0, 3, 5, schema.fields().len()]),
             Statistics::new_unknown(&schema),
             to_partition_cols(partition_cols),
-        )
-        .projected_file_schema();
+        );
+        let projection = projected_file_schema(&config);
 
         // Assert partition column filtered out in projected file schema
         let expected_columns = vec!["c1", "c4", "c6"];
@@ -1826,6 +1306,16 @@ mod tests {
         assert_eq!(expected_columns, actual_columns);
     }
 
+    /// Projects only file schema, ignoring partition columns
+    fn projected_file_schema(config: &FileScanConfig) -> SchemaRef {
+        let file_schema = config.file_source.table_schema().file_schema();
+        if let Some(file_indices) = config.file_column_projection_indices() {
+            Arc::new(file_schema.project(&file_indices).unwrap())
+        } else {
+            Arc::clone(file_schema)
+        }
+    }
+
     #[test]
     fn test_projected_file_schema_without_projection() {
         let schema = aggr_test_schema();
@@ -1841,13 +1331,13 @@ mod tests {
         ];
 
         // Projected file schema for config without projection
-        let projection = config_for_projection(
+        let config = config_for_projection(
             schema.clone(),
             None,
             Statistics::new_unknown(&schema),
             to_partition_cols(partition_cols),
-        )
-        .projected_file_schema();
+        );
+        let projection = projected_file_schema(&config);
 
         // Assert projected file schema is equal to file schema
         assert_eq!(projection.fields(), schema.fields());
@@ -2174,6 +1664,7 @@ mod tests {
             Arc::new(MockSource::new(table_schema.clone())),
         )
         .with_projection_indices(projection)
+        .unwrap()
         .with_statistics(statistics)
         .build()
     }
@@ -2186,29 +1677,6 @@ mod tests {
             .collect::<Vec<_>>()
     }
 
-    /// returns record batch with 3 columns of i32 in memory
-    pub fn build_table_i32(
-        a: (&str, &Vec<i32>),
-        b: (&str, &Vec<i32>),
-        c: (&str, &Vec<i32>),
-    ) -> RecordBatch {
-        let schema = Schema::new(vec![
-            Field::new(a.0, DataType::Int32, false),
-            Field::new(b.0, DataType::Int32, false),
-            Field::new(c.0, DataType::Int32, false),
-        ]);
-
-        RecordBatch::try_new(
-            Arc::new(schema),
-            vec![
-                Arc::new(Int32Array::from(a.1.clone())),
-                Arc::new(Int32Array::from(b.1.clone())),
-                Arc::new(Int32Array::from(c.1.clone())),
-            ],
-        )
-        .unwrap()
-    }
-
     #[test]
     fn test_file_scan_config_builder() {
         let file_schema = aggr_test_schema();
@@ -2236,6 +1704,7 @@ mod tests {
         let config = builder
             .with_limit(Some(1000))
             .with_projection_indices(Some(vec![0, 1]))
+            .unwrap()
             .with_statistics(Statistics::new_unknown(&file_schema))
             .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
                 "test.parquet".to_string(),
@@ -2254,7 +1723,11 @@ mod tests {
         assert_eq!(*config.file_schema(), file_schema);
         assert_eq!(config.limit, Some(1000));
         assert_eq!(
-            config.projection_exprs.as_ref().map(|p| p.column_indices()),
+            config
+                .file_source
+                .projection()
+                .as_ref()
+                .map(|p| p.column_indices()),
             Some(vec![0, 1])
         );
         assert_eq!(config.table_partition_cols().len(), 1);
@@ -2294,15 +1767,17 @@ mod tests {
             Arc::clone(&file_source),
         )
         .with_projection_indices(Some(vec![0, 1, 2]))
+        .unwrap()
         .build();
 
         // Simulate projection being updated. Since the filter has already been pushed down,
         // the new projection won't include the filtered column.
+        let exprs = ProjectionExprs::new(vec![ProjectionExpr::new(
+            col("c1", &file_schema).unwrap(),
+            "c1".to_string(),
+        )]);
         let data_source = config
-            .try_swapping_with_projection(&[ProjectionExpr::new(
-                col("c3", &file_schema).unwrap(),
-                "c3".to_string(),
-            )])
+            .try_swapping_with_projection(&exprs)
             .unwrap()
             .unwrap();
 
@@ -2345,9 +1820,16 @@ mod tests {
         assert_eq!(config.object_store_url, object_store_url);
         assert_eq!(*config.file_schema(), file_schema);
         assert_eq!(config.limit, None);
+        // When no projection is specified, the file source should have an unprojected projection
+        // (i.e., all columns)
+        let expected_projection: Vec<usize> = (0..file_schema.fields().len()).collect();
         assert_eq!(
-            config.projection_exprs.as_ref().map(|p| p.column_indices()),
-            None
+            config
+                .file_source
+                .projection()
+                .as_ref()
+                .map(|p| p.column_indices()),
+            Some(expected_projection)
         );
         assert!(config.table_partition_cols().is_empty());
         assert!(config.file_groups.is_empty());
@@ -2399,6 +1881,7 @@ mod tests {
             Arc::clone(&file_source),
         )
         .with_projection_indices(Some(vec![0, 2]))
+        .unwrap()
         .with_limit(Some(10))
         .with_file(file.clone())
         .with_constraints(Constraints::default())
@@ -2417,7 +1900,8 @@ mod tests {
         assert_eq!(*new_config.file_schema(), schema);
         assert_eq!(
             new_config
-                .projection_exprs
+                .file_source
+                .projection()
                 .as_ref()
                 .map(|p| p.column_indices()),
             Some(vec![0, 2])
@@ -2641,7 +2125,8 @@ mod tests {
             ObjectStoreUrl::parse("test:///").unwrap(),
             Arc::new(MockSource::new(table_schema.clone())),
         )
-        .with_projection_indices(Some(vec![0, 2])) // Only project columns 0 and 2
+        .with_projection_indices(Some(vec![0, 2]))
+        .unwrap() // Only project columns 0 and 2
         .with_file_groups(vec![file_group])
         .build();
 
diff --git a/datafusion/datasource/src/file_stream.rs b/datafusion/datasource/src/file_stream.rs
index 0568b4cc4e5f..7b24b71348bf 100644
--- a/datafusion/datasource/src/file_stream.rs
+++ b/datafusion/datasource/src/file_stream.rs
@@ -27,7 +27,7 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use crate::file_scan_config::{FileScanConfig, PartitionColumnProjector};
+use crate::file_scan_config::FileScanConfig;
 use crate::PartitionedFile;
 use arrow::datatypes::SchemaRef;
 use datafusion_common::error::Result;
@@ -38,7 +38,6 @@ use datafusion_physical_plan::metrics::{
 
 use arrow::record_batch::RecordBatch;
 use datafusion_common::instant::Instant;
-use datafusion_common::ScalarValue;
 
 use futures::future::BoxFuture;
 use futures::stream::BoxStream;
@@ -56,8 +55,6 @@ pub struct FileStream {
     /// A dynamic [`FileOpener`]. Calling `open()` returns a [`FileOpenFuture`],
     /// which can be resolved to a stream of `RecordBatch`.
     file_opener: Arc<dyn FileOpener>,
-    /// The partition column projector
-    pc_projector: PartitionColumnProjector,
     /// The stream state
     state: FileStreamState,
     /// File stream specific metrics
@@ -76,15 +73,7 @@ impl FileStream {
         file_opener: Arc<dyn FileOpener>,
         metrics: &ExecutionPlanMetricsSet,
     ) -> Result<Self> {
-        let projected_schema = config.projected_schema();
-        let pc_projector = PartitionColumnProjector::new(
-            Arc::clone(&projected_schema),
-            &config
-                .table_partition_cols()
-                .iter()
-                .map(|x| x.name().clone())
-                .collect::<Vec<_>>(),
-        );
+        let projected_schema = config.projected_schema()?;
 
         let file_group = config.file_groups[partition].clone();
 
@@ -93,7 +82,6 @@ impl FileStream {
             projected_schema,
             remain: config.limit,
             file_opener,
-            pc_projector,
             state: FileStreamState::Idle,
             file_stream_metrics: FileStreamMetrics::new(metrics, partition),
             baseline_metrics: BaselineMetrics::new(metrics, partition),
@@ -114,15 +102,9 @@ impl FileStream {
     ///
     /// Since file opening is mostly IO (and may involve a
     /// bunch of sequential IO), it can be parallelized with decoding.
-    fn start_next_file(&mut self) -> Option<Result<(FileOpenFuture, Vec<ScalarValue>)>> {
+    fn start_next_file(&mut self) -> Option<Result<FileOpenFuture>> {
         let part_file = self.file_iter.pop_front()?;
-
-        let partition_values = part_file.partition_values.clone();
-        Some(
-            self.file_opener
-                .open(part_file)
-                .map(|future| (future, partition_values)),
-        )
+        Some(self.file_opener.open(part_file))
     }
 
     fn poll_inner(&mut self, cx: &mut Context<'_>) -> Poll<Option<Result<RecordBatch>>> {
@@ -132,12 +114,7 @@ impl FileStream {
                     self.file_stream_metrics.time_opening.start();
 
                     match self.start_next_file().transpose() {
-                        Ok(Some((future, partition_values))) => {
-                            self.state = FileStreamState::Open {
-                                future,
-                                partition_values,
-                            }
-                        }
+                        Ok(Some(future)) => self.state = FileStreamState::Open { future },
                         Ok(None) => return Poll::Ready(None),
                         Err(e) => {
                             self.state = FileStreamState::Error;
@@ -145,13 +122,8 @@ impl FileStream {
                         }
                     }
                 }
-                FileStreamState::Open {
-                    future,
-                    partition_values,
-                } => match ready!(future.poll_unpin(cx)) {
+                FileStreamState::Open { future } => match ready!(future.poll_unpin(cx)) {
                     Ok(reader) => {
-                        let partition_values = mem::take(partition_values);
-
                         // include time needed to start opening in `start_next_file`
                         self.file_stream_metrics.time_opening.stop();
                         let next = self.start_next_file().transpose();
@@ -159,22 +131,14 @@ impl FileStream {
                         self.file_stream_metrics.time_scanning_total.start();
 
                         match next {
-                            Ok(Some((next_future, next_partition_values))) => {
+                            Ok(Some(next_future)) => {
                                 self.state = FileStreamState::Scan {
-                                    partition_values,
                                     reader,
-                                    next: Some((
-                                        NextOpen::Pending(next_future),
-                                        next_partition_values,
-                                    )),
+                                    next: Some(NextOpen::Pending(next_future)),
                                 };
                             }
                             Ok(None) => {
-                                self.state = FileStreamState::Scan {
-                                    reader,
-                                    partition_values,
-                                    next: None,
-                                };
+                                self.state = FileStreamState::Scan { reader, next: None };
                             }
                             Err(e) => {
                                 self.state = FileStreamState::Error;
@@ -196,13 +160,9 @@ impl FileStream {
                         }
                     }
                 },
-                FileStreamState::Scan {
-                    reader,
-                    partition_values,
-                    next,
-                } => {
+                FileStreamState::Scan { reader, next } => {
                     // We need to poll the next `FileOpenFuture` here to drive it forward
-                    if let Some((next_open_future, _)) = next {
+                    if let Some(next_open_future) = next {
                         if let NextOpen::Pending(f) = next_open_future {
                             if let Poll::Ready(reader) = f.as_mut().poll(cx) {
                                 *next_open_future = NextOpen::Ready(reader);
@@ -213,31 +173,22 @@ impl FileStream {
                         Some(Ok(batch)) => {
                             self.file_stream_metrics.time_scanning_until_data.stop();
                             self.file_stream_metrics.time_scanning_total.stop();
-                            let result = self
-                                .pc_projector
-                                .project(batch, partition_values)
-                                .map(|batch| match &mut self.remain {
-                                    Some(remain) => {
-                                        if *remain > batch.num_rows() {
-                                            *remain -= batch.num_rows();
-                                            batch
-                                        } else {
-                                            let batch = batch.slice(0, *remain);
-                                            self.state = FileStreamState::Limit;
-                                            *remain = 0;
-                                            batch
-                                        }
+                            let batch = match &mut self.remain {
+                                Some(remain) => {
+                                    if *remain > batch.num_rows() {
+                                        *remain -= batch.num_rows();
+                                        batch
+                                    } else {
+                                        let batch = batch.slice(0, *remain);
+                                        self.state = FileStreamState::Limit;
+                                        *remain = 0;
+                                        batch
                                     }
-                                    None => batch,
-                                });
-
-                            if result.is_err() {
-                                // If the partition value projection fails, this is not governed by
-                                // the `OnError` behavior
-                                self.state = FileStreamState::Error
-                            }
+                                }
+                                None => batch,
+                            };
                             self.file_stream_metrics.time_scanning_total.start();
-                            return Poll::Ready(Some(result));
+                            return Poll::Ready(Some(Ok(batch)));
                         }
                         Some(Err(err)) => {
                             self.file_stream_metrics.file_scan_errors.add(1);
@@ -247,22 +198,19 @@ impl FileStream {
                             match self.on_error {
                                 // If `OnError::Skip` we skip the file as soon as we hit the first error
                                 OnError::Skip => match mem::take(next) {
-                                    Some((future, partition_values)) => {
+                                    Some(future) => {
                                         self.file_stream_metrics.time_opening.start();
 
                                         match future {
                                             NextOpen::Pending(future) => {
-                                                self.state = FileStreamState::Open {
-                                                    future,
-                                                    partition_values,
-                                                }
+                                                self.state =
+                                                    FileStreamState::Open { future }
                                             }
                                             NextOpen::Ready(reader) => {
                                                 self.state = FileStreamState::Open {
                                                     future: Box::pin(std::future::ready(
                                                         reader,
                                                     )),
-                                                    partition_values,
                                                 }
                                             }
                                         }
@@ -280,22 +228,18 @@ impl FileStream {
                             self.file_stream_metrics.time_scanning_total.stop();
 
                             match mem::take(next) {
-                                Some((future, partition_values)) => {
+                                Some(future) => {
                                     self.file_stream_metrics.time_opening.start();
 
                                     match future {
                                         NextOpen::Pending(future) => {
-                                            self.state = FileStreamState::Open {
-                                                future,
-                                                partition_values,
-                                            }
+                                            self.state = FileStreamState::Open { future }
                                         }
                                         NextOpen::Ready(reader) => {
                                             self.state = FileStreamState::Open {
                                                 future: Box::pin(std::future::ready(
                                                     reader,
                                                 )),
-                                                partition_values,
                                             }
                                         }
                                     }
@@ -373,21 +317,16 @@ pub enum FileStreamState {
     Open {
         /// A [`FileOpenFuture`] returned by [`FileOpener::open`]
         future: FileOpenFuture,
-        /// The partition values for this file
-        partition_values: Vec<ScalarValue>,
     },
     /// Scanning the [`BoxStream`] returned by the completion of a [`FileOpenFuture`]
     /// returned by [`FileOpener::open`]
     Scan {
-        /// Partitioning column values for the current batch_iter
-        partition_values: Vec<ScalarValue>,
         /// The reader instance
         reader: BoxStream<'static, Result<RecordBatch>>,
-        /// A [`FileOpenFuture`] for the next file to be processed,
-        /// and its corresponding partition column values, if any.
+        /// A [`FileOpenFuture`] for the next file to be processed.
         /// This allows the next file to be opened in parallel while the
         /// current file is read.
-        next: Option<(NextOpen, Vec<ScalarValue>)>,
+        next: Option<NextOpen>,
     },
     /// Encountered an error
     Error,
diff --git a/datafusion/datasource/src/memory.rs b/datafusion/datasource/src/memory.rs
index 036574ddd3c3..731a7f5d8669 100644
--- a/datafusion/datasource/src/memory.rs
+++ b/datafusion/datasource/src/memory.rs
@@ -35,11 +35,12 @@ use datafusion_common::{
 };
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::equivalence::project_orderings;
+use datafusion_physical_expr::projection::ProjectionExprs;
 use datafusion_physical_expr::utils::collect_columns;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
 use datafusion_physical_plan::memory::MemoryStream;
 use datafusion_physical_plan::projection::{
-    all_alias_free_columns, new_projections_for_columns, ProjectionExpr,
+    all_alias_free_columns, new_projections_for_columns,
 };
 use datafusion_physical_plan::{
     common, ColumnarValue, DisplayAs, DisplayFormatType, Partitioning, PhysicalExpr,
@@ -230,15 +231,16 @@ impl DataSource for MemorySourceConfig {
 
     fn try_swapping_with_projection(
         &self,
-        projection: &[ProjectionExpr],
+        projection: &ProjectionExprs,
     ) -> Result<Option<Arc<dyn DataSource>>> {
         // If there is any non-column or alias-carrier expression, Projection should not be removed.
         // This process can be moved into MemoryExec, but it would be an overlap of their responsibility.
-        all_alias_free_columns(projection)
+        let exprs = projection.iter().cloned().collect_vec();
+        all_alias_free_columns(exprs.as_slice())
             .then(|| {
                 let all_projections = (0..self.schema.fields().len()).collect();
                 let new_projections = new_projections_for_columns(
-                    projection,
+                    &exprs,
                     self.projection().as_ref().unwrap_or(&all_projections),
                 );
 
diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs
index 2c7d40d2fb3b..a749204ee1cc 100644
--- a/datafusion/datasource/src/mod.rs
+++ b/datafusion/datasource/src/mod.rs
@@ -41,6 +41,7 @@ pub mod file_scan_config;
 pub mod file_sink_config;
 pub mod file_stream;
 pub mod memory;
+pub mod projection;
 pub mod schema_adapter;
 pub mod sink;
 pub mod source;
diff --git a/datafusion/datasource/src/projection.rs b/datafusion/datasource/src/projection.rs
new file mode 100644
index 000000000000..9e3139f4fbd3
--- /dev/null
+++ b/datafusion/datasource/src/projection.rs
@@ -0,0 +1,631 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::datatypes::{Schema, SchemaRef};
+use datafusion_common::{
+    tree_node::{Transformed, TransformedResult, TreeNode},
+    Result, ScalarValue,
+};
+use datafusion_physical_expr::{
+    expressions::{Column, Literal},
+    projection::{ProjectionExpr, ProjectionExprs},
+};
+use futures::{FutureExt, StreamExt};
+use itertools::Itertools;
+
+use crate::{
+    file_stream::{FileOpenFuture, FileOpener},
+    PartitionedFile, TableSchema,
+};
+
+/// A file opener that handles applying a projection on top of an inner opener.
+///
+/// This includes handling partition columns.
+///
+/// Any projection pushed down will be split up into:
+/// - Simple column indices / column selection
+/// - A remainder projection that this opener applies on top of it
+///
+/// This is meant to simplify projection pushdown for sources like CSV
+/// that can only handle "simple" column selection.
+pub struct ProjectionOpener {
+    inner: Arc<dyn FileOpener>,
+    projection: ProjectionExprs,
+    input_schema: SchemaRef,
+    partition_columns: Vec<PartitionColumnIndex>,
+}
+
+impl ProjectionOpener {
+    pub fn try_new(
+        projection: SplitProjection,
+        inner: Arc<dyn FileOpener>,
+        file_schema: &Schema,
+    ) -> Result<Arc<dyn FileOpener>> {
+        Ok(Arc::new(ProjectionOpener {
+            inner,
+            projection: projection.remapped_projection,
+            input_schema: Arc::new(file_schema.project(&projection.file_indices)?),
+            partition_columns: projection.partition_columns,
+        }))
+    }
+}
+
+impl FileOpener for ProjectionOpener {
+    fn open(&self, partitioned_file: PartitionedFile) -> Result<FileOpenFuture> {
+        let partition_values = partitioned_file.partition_values.clone();
+        // Modify any references to partition columns in the projection expressions
+        // and substitute them with literal values from PartitionedFile.partition_values
+        let projection = if self.partition_columns.is_empty() {
+            self.projection.clone()
+        } else {
+            inject_partition_columns_into_projection(
+                &self.projection,
+                &self.partition_columns,
+                partition_values,
+            )
+        };
+        let projector = projection.make_projector(&self.input_schema)?;
+
+        let inner = self.inner.open(partitioned_file)?;
+
+        Ok(async move {
+            let stream = inner.await?;
+            let stream = stream.map(move |batch| {
+                let batch = batch?;
+                let batch = projector.project_batch(&batch)?;
+                Ok(batch)
+            });
+            Ok(stream.boxed())
+        }
+        .boxed())
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct PartitionColumnIndex {
+    /// The index of this partition column in the remainder projection (>= num_file_columns)
+    pub in_remainder_projection: usize,
+    /// The index of this partition column in the partition_values array
+    pub in_partition_values: usize,
+}
+
+fn inject_partition_columns_into_projection(
+    projection: &ProjectionExprs,
+    partition_columns: &[PartitionColumnIndex],
+    partition_values: Vec<ScalarValue>,
+) -> ProjectionExprs {
+    // Pre-create all literals for partition columns to avoid cloning ScalarValues multiple times.
+    let partition_literals: Vec<Arc<Literal>> = partition_values
+        .into_iter()
+        .map(|value| Arc::new(Literal::new(value)))
+        .collect();
+
+    let projections = projection
+        .iter()
+        .map(|projection| {
+            let expr = Arc::clone(&projection.expr)
+                .transform(|expr| {
+                    let original_expr = Arc::clone(&expr);
+                    if let Some(column) = expr.as_any().downcast_ref::<Column>() {
+                        // Check if this column index corresponds to a partition column
+                        if let Some(pci) = partition_columns
+                            .iter()
+                            .find(|pci| pci.in_remainder_projection == column.index())
+                        {
+                            let literal =
+                                Arc::clone(&partition_literals[pci.in_partition_values]);
+                            return Ok(Transformed::yes(literal));
+                        }
+                    }
+                    Ok(Transformed::no(original_expr))
+                })
+                .data()
+                .expect("infallible transform");
+            ProjectionExpr::new(expr, projection.alias.clone())
+        })
+        .collect_vec();
+    ProjectionExprs::new(projections)
+}
+
+/// At a high level the goal of SplitProjection is to take a ProjectionExprs meant to be applied to the table schema
+/// and split that into:
+/// - The projection indices into the file schema (file_indices)
+/// - The projection indices into the partition values (partition_value_indices), which pre-compute both the index into the table schema
+///   and the index into the partition values array
+/// - A remapped projection that can be applied after the file projection is applied
+///   This remapped projection has the following properties:
+///     - Column indices referring to file columns are remapped to [0..file_indices.len())
+///     - Column indices referring to partition columns are remapped to [file_indices.len()..)
+///
+///   This allows the ProjectionOpener to easily identify which columns in the remapped projection
+///   refer to partition columns and substitute them with literals from the partition values.
+#[derive(Debug, Clone)]
+pub struct SplitProjection {
+    /// The original projection this [`SplitProjection`] was derived from
+    pub source: ProjectionExprs,
+    /// Column indices to read from file (public for file sources)
+    pub file_indices: Vec<usize>,
+    /// Pre-computed partition column mappings (internal, used by ProjectionOpener)
+    pub(crate) partition_columns: Vec<PartitionColumnIndex>,
+    /// The remapped projection (internal, used by ProjectionOpener)
+    pub(crate) remapped_projection: ProjectionExprs,
+}
+
+impl SplitProjection {
+    pub fn unprojected(table_schema: &TableSchema) -> Self {
+        let projection = ProjectionExprs::from_indices(
+            &(0..table_schema.table_schema().fields().len()).collect_vec(),
+            table_schema.table_schema(),
+        );
+        Self::new(table_schema.file_schema(), &projection)
+    }
+
+    /// Creates a new [`SplitProjection`] by splitting a projection into
+    /// simple file column indices and a remainder projection that is applied after reading the file.
+    ///
+    /// In other words: we get a `Vec<usize>` projection that is meant to be applied on top of `file_schema`
+    /// and a remainder projection that is applied to the result of that first projection.
+    ///
+    /// Here `file_schema` is expected to be the *logical* schema of the file, that is the
+    /// table schema minus any partition columns.
+    /// Partition columns are always expected to be at the end of the table schema.
+    /// Note that `file_schema` is *not* the physical schema of the file.
+    pub fn new(logical_file_schema: &Schema, projection: &ProjectionExprs) -> Self {
+        let num_file_schema_columns = logical_file_schema.fields().len();
+
+        // Collect all unique columns and classify as file or partition
+        let mut file_columns = Vec::new();
+        let mut partition_columns = Vec::new();
+        let mut all_columns = std::collections::HashMap::new();
+
+        // Extract all unique column references (index -> name)
+        for proj_expr in projection {
+            proj_expr
+                .expr
+                .apply(|expr| {
+                    if let Some(column) = expr.as_any().downcast_ref::<Column>() {
+                        all_columns
+                            .entry(column.index())
+                            .or_insert_with(|| column.name().to_string());
+                    }
+                    Ok(datafusion_common::tree_node::TreeNodeRecursion::Continue)
+                })
+                .expect("infallible apply");
+        }
+
+        // Sort by index and classify into file vs partition columns
+        let mut sorted_columns: Vec<_> = all_columns
+            .into_iter()
+            .map(|(idx, name)| (name, idx))
+            .collect();
+        sorted_columns.sort_by_key(|(_, idx)| *idx);
+
+        // Separate file and partition columns, assigning final indices
+        // Pre-create all remapped columns to avoid duplicate Arc'd expressions
+        let mut column_mapping = std::collections::HashMap::new();
+        let mut file_idx = 0;
+        let mut partition_idx = 0;
+
+        for (name, original_index) in sorted_columns {
+            let new_index = if original_index < num_file_schema_columns {
+                // File column: gets index [0..num_file_columns)
+                file_columns.push(original_index);
+                let idx = file_idx;
+                file_idx += 1;
+                idx
+            } else {
+                // Partition column: gets index [num_file_columns..)
+                partition_columns.push(original_index);
+                let idx = file_idx + partition_idx;
+                partition_idx += 1;
+                idx
+            };
+
+            // Pre-create the remapped column so all references can share the same Arc
+            let new_column: Arc<dyn datafusion_physical_plan::PhysicalExpr> =
+                Arc::new(Column::new(&name, new_index));
+            column_mapping.insert(original_index, new_column);
+        }
+
+        // Single tree transformation: remap all column references using pre-created columns
+        let remapped_projection = projection
+            .iter()
+            .map(|proj_expr| {
+                let expr = Arc::clone(&proj_expr.expr)
+                    .transform(|expr| {
+                        let original_expr = Arc::clone(&expr);
+                        if let Some(column) = expr.as_any().downcast_ref::<Column>() {
+                            if let Some(new_column) = column_mapping.get(&column.index())
+                            {
+                                return Ok(Transformed::yes(Arc::clone(new_column)));
+                            }
+                        }
+                        Ok(Transformed::no(original_expr))
+                    })
+                    .data()
+                    .expect("infallible transform");
+                ProjectionExpr::new(expr, proj_expr.alias.clone())
+            })
+            .collect_vec();
+
+        // Pre-compute partition column mappings for ProjectionOpener
+        let num_file_columns = file_columns.len();
+        let partition_column_mappings = partition_columns
+            .iter()
+            .enumerate()
+            .map(|(partition_idx, &table_index)| PartitionColumnIndex {
+                in_remainder_projection: num_file_columns + partition_idx,
+                in_partition_values: table_index - num_file_schema_columns,
+            })
+            .collect_vec();
+
+        Self {
+            source: projection.clone(),
+            file_indices: file_columns,
+            partition_columns: partition_column_mappings,
+            remapped_projection: ProjectionExprs::from(remapped_projection),
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::sync::Arc;
+
+    use arrow::array::AsArray;
+    use arrow::datatypes::{DataType, SchemaRef};
+    use datafusion_common::{record_batch, DFSchema, ScalarValue};
+    use datafusion_expr::{col, execution_props::ExecutionProps, Expr};
+    use datafusion_physical_expr::{create_physical_exprs, projection::ProjectionExpr};
+    use itertools::Itertools;
+
+    use super::*;
+
+    fn create_projection_exprs<'a>(
+        exprs: impl IntoIterator<Item = &'a Expr>,
+        schema: &SchemaRef,
+    ) -> ProjectionExprs {
+        let df_schema = DFSchema::try_from(Arc::clone(schema)).unwrap();
+        let physical_exprs =
+            create_physical_exprs(exprs, &df_schema, &ExecutionProps::default()).unwrap();
+        let projection_exprs = physical_exprs
+            .into_iter()
+            .enumerate()
+            .map(|(i, e)| ProjectionExpr::new(Arc::clone(&e), format!("col{i}")))
+            .collect_vec();
+        ProjectionExprs::from(projection_exprs)
+    }
+
+    #[test]
+    fn test_split_projection_with_partition_columns() {
+        use arrow::array::AsArray;
+        use arrow::datatypes::Field;
+        // Simulate the avro_exec_with_partition test scenario:
+        // file_schema has 3 fields
+        let file_schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("bool_col", DataType::Boolean, false),
+            Field::new("tinyint_col", DataType::Int8, false),
+        ]));
+
+        // table_schema has 4 fields (3 file + 1 partition)
+        let table_schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("bool_col", DataType::Boolean, false),
+            Field::new("tinyint_col", DataType::Int8, false),
+            Field::new("date", DataType::Utf8, false), // partition column at index 3
+        ]));
+
+        // projection indices: [0, 1, 3, 2]
+        // This should select: id (0), bool_col (1), date (3-partition), tinyint_col (2)
+        let projection_indices = vec![0, 1, 3, 2];
+
+        // Create projection expressions from indices using the table schema
+        let projection =
+            ProjectionExprs::from_indices(&projection_indices, &table_schema);
+
+        // Call SplitProjection to separate file and partition columns
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        // The file_indices should be [0, 1, 2] (all file columns needed)
+        assert_eq!(split.file_indices, vec![0, 1, 2]);
+
+        // Should have 1 partition column at in_partition_values index 0
+        assert_eq!(split.partition_columns.len(), 1);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+
+        // Now create a batch with only the file columns
+        let file_batch = record_batch!(
+            ("id", Int32, vec![4]),
+            ("bool_col", Boolean, vec![true]),
+            ("tinyint_col", Int8, vec![0])
+        )
+        .unwrap();
+
+        // After the fix, the remainder projection should have remapped indices:
+        // - File columns: [0, 1, 2] (unchanged since they're already in order)
+        // - Partition column: [3] (stays at index 3, which is >= num_file_columns)
+        // So the remainder expects input columns [0, 1, 2] and references column [3] for partition
+
+        // Verify that we can inject partition columns and apply the projection
+        let partition_values = vec![ScalarValue::from("2021-10-26")];
+
+        // Create partition column mapping
+        let partition_columns = vec![PartitionColumnIndex {
+            in_remainder_projection: 3, // partition column is at index 3 in remainder
+            in_partition_values: 0,     // first partition value
+        }];
+
+        // Inject partition columns (replaces Column(3) with Literal)
+        let injected_projection = inject_partition_columns_into_projection(
+            &split.remapped_projection,
+            &partition_columns,
+            partition_values,
+        );
+
+        // Now the projection should work on the file batch
+        let projector = injected_projection
+            .make_projector(&file_batch.schema())
+            .unwrap();
+        let result = projector.project_batch(&file_batch).unwrap();
+
+        // Verify the output has the correct column order: id, bool_col, date, tinyint_col
+        assert_eq!(result.num_columns(), 4);
+        assert_eq!(
+            result
+                .column(0)
+                .as_primitive::<arrow::datatypes::Int32Type>()
+                .value(0),
+            4
+        );
+        assert!(result.column(1).as_boolean().value(0));
+        assert_eq!(result.column(2).as_string::<i32>().value(0), "2021-10-26");
+        assert_eq!(
+            result
+                .column(3)
+                .as_primitive::<arrow::datatypes::Int8Type>()
+                .value(0),
+            0
+        );
+    }
+
+    // ========================================================================
+    // Comprehensive Test Suite for SplitProjection
+    // ========================================================================
+
+    // Helper to create test schemas with file and partition columns
+    fn create_test_schemas(
+        file_cols: usize,
+        partition_cols: usize,
+    ) -> (SchemaRef, SchemaRef) {
+        use arrow::datatypes::Field;
+
+        let file_fields: Vec<_> = (0..file_cols)
+            .map(|i| Field::new(format!("col_{i}"), DataType::Int32, false))
+            .collect();
+
+        let mut table_fields = file_fields.clone();
+        table_fields.extend(
+            (0..partition_cols)
+                .map(|i| Field::new(format!("part_{i}"), DataType::Utf8, false)),
+        );
+
+        (
+            Arc::new(Schema::new(file_fields)),
+            Arc::new(Schema::new(table_fields)),
+        )
+    }
+
+    // ========================================================================
+    // Partition Column Handling Tests
+    // ========================================================================
+
+    #[test]
+    fn test_split_projection_only_file_columns() {
+        let (file_schema, table_schema) = create_test_schemas(3, 2);
+        // Select only file columns [0, 1, 2]
+        let projection = ProjectionExprs::from_indices(&[0, 1, 2], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, vec![0, 1, 2]);
+        assert_eq!(split.partition_columns.len(), 0);
+    }
+
+    #[test]
+    fn test_split_projection_only_partition_columns() {
+        let (file_schema, table_schema) = create_test_schemas(3, 2);
+        // Select only partition columns [3, 4]
+        let projection = ProjectionExprs::from_indices(&[3, 4], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, Vec::<usize>::new());
+        assert_eq!(split.partition_columns.len(), 2);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+        assert_eq!(split.partition_columns[1].in_partition_values, 1);
+    }
+
+    #[test]
+    fn test_split_projection_multiple_partition_columns() {
+        let (file_schema, table_schema) = create_test_schemas(2, 3);
+        // File cols: 0, 1; Partition cols: 2, 3, 4
+        // Select: [0, 2, 4, 1, 3] (mixed file and partition)
+        let projection = ProjectionExprs::from_indices(&[0, 2, 4, 1, 3], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, vec![0, 1]);
+        assert_eq!(split.partition_columns.len(), 3);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+        assert_eq!(split.partition_columns[1].in_partition_values, 1);
+        assert_eq!(split.partition_columns[2].in_partition_values, 2);
+
+        // Verify remapped projection has correct indices
+        // File columns should be at [0, 1], partition columns at [2, 3, 4]
+        assert_eq!(split.remapped_projection.iter().count(), 5);
+    }
+
+    #[test]
+    fn test_split_projection_partition_columns_reverse_order() {
+        let (file_schema, table_schema) = create_test_schemas(2, 2);
+        // File cols: 0, 1; Partition cols: 2, 3
+        // Select: [3, 2] (partitions in reverse)
+        let projection = ProjectionExprs::from_indices(&[3, 2], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, Vec::<usize>::new());
+        assert_eq!(split.partition_columns.len(), 2);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+        assert_eq!(split.partition_columns[1].in_partition_values, 1);
+    }
+
+    #[test]
+    fn test_split_projection_interleaved_file_and_partition() {
+        let (file_schema, table_schema) = create_test_schemas(3, 3);
+        // File cols: 0, 1, 2; Partition cols: 3, 4, 5
+        // Select: [0, 3, 1, 4, 2, 5] (alternating)
+        let projection =
+            ProjectionExprs::from_indices(&[0, 3, 1, 4, 2, 5], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, vec![0, 1, 2]);
+        assert_eq!(split.partition_columns.len(), 3);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+        assert_eq!(split.partition_columns[1].in_partition_values, 1);
+        assert_eq!(split.partition_columns[2].in_partition_values, 2);
+    }
+
+    #[test]
+    fn test_split_projection_expression_with_file_and_partition_columns() {
+        use arrow::datatypes::Field;
+
+        // Create schemas: 2 file columns, 1 partition column
+        let file_schema = Arc::new(Schema::new(vec![
+            Field::new("file_a", DataType::Int32, false),
+            Field::new("file_b", DataType::Int32, false),
+        ]));
+        let table_schema = Arc::new(Schema::new(vec![
+            Field::new("file_a", DataType::Int32, false),
+            Field::new("file_b", DataType::Int32, false),
+            Field::new("part_c", DataType::Int32, false),
+        ]));
+
+        // Create expression: file_a + part_c
+        let exprs = [col("file_a") + col("part_c")];
+        let projection = create_projection_exprs(exprs.iter(), &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        // Should extract both columns
+        assert_eq!(split.file_indices, vec![0]);
+        assert_eq!(split.partition_columns.len(), 1);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+    }
+
+    // ========================================================================
+    // Category 4: Boundary Conditions
+    // ========================================================================
+
+    #[test]
+    fn test_split_projection_boundary_last_file_column() {
+        let (file_schema, table_schema) = create_test_schemas(3, 2);
+        // Last file column is index 2
+        let projection = ProjectionExprs::from_indices(&[2], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, vec![2]);
+        assert_eq!(split.partition_columns.len(), 0);
+    }
+
+    #[test]
+    fn test_split_projection_boundary_first_partition_column() {
+        let (file_schema, table_schema) = create_test_schemas(3, 2);
+        // First partition column is index 3
+        let projection = ProjectionExprs::from_indices(&[3], &table_schema);
+
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        assert_eq!(split.file_indices, Vec::<usize>::new());
+        assert_eq!(split.partition_columns.len(), 1);
+        assert_eq!(split.partition_columns[0].in_partition_values, 0);
+    }
+
+    // ========================================================================
+    // Category 6: Integration Tests
+    // ========================================================================
+
+    #[test]
+    fn test_inject_partition_columns_multiple_partitions() {
+        let data =
+            record_batch!(("col_0", Int32, vec![1]), ("col_1", Int32, vec![2])).unwrap();
+
+        // Create projection that references file columns and partition columns
+        let (file_schema, table_schema) = create_test_schemas(2, 2);
+        // Projection: [0, 2, 1, 3] = [file_0, part_0, file_1, part_1]
+        let projection = ProjectionExprs::from_indices(&[0, 2, 1, 3], &table_schema);
+        let split = SplitProjection::new(&file_schema, &projection);
+
+        // Create partition column mappings
+        let partition_columns = vec![
+            PartitionColumnIndex {
+                in_remainder_projection: 2, // First partition column at index 2
+                in_partition_values: 0,
+            },
+            PartitionColumnIndex {
+                in_remainder_projection: 3, // Second partition column at index 3
+                in_partition_values: 1,
+            },
+        ];
+
+        let partition_values =
+            vec![ScalarValue::from("part_a"), ScalarValue::from("part_b")];
+
+        let injected = inject_partition_columns_into_projection(
+            &split.remapped_projection,
+            &partition_columns,
+            partition_values,
+        );
+
+        // Apply projection
+        let projector = injected.make_projector(&data.schema()).unwrap();
+        let result = projector.project_batch(&data).unwrap();
+
+        assert_eq!(result.num_columns(), 4);
+        assert_eq!(
+            result
+                .column(0)
+                .as_primitive::<arrow::datatypes::Int32Type>()
+                .value(0),
+            1
+        );
+        assert_eq!(result.column(1).as_string::<i32>().value(0), "part_a");
+        assert_eq!(
+            result
+                .column(2)
+                .as_primitive::<arrow::datatypes::Int32Type>()
+                .value(0),
+            2
+        );
+        assert_eq!(result.column(3).as_string::<i32>().value(0), "part_b");
+    }
+}
diff --git a/datafusion/datasource/src/source.rs b/datafusion/datasource/src/source.rs
index de79512a4101..781083c0f14d 100644
--- a/datafusion/datasource/src/source.rs
+++ b/datafusion/datasource/src/source.rs
@@ -22,12 +22,13 @@ use std::fmt;
 use std::fmt::{Debug, Formatter};
 use std::sync::Arc;
 
+use datafusion_physical_expr::projection::ProjectionExprs;
 use datafusion_physical_plan::execution_plan::{
     Boundedness, EmissionType, SchedulingType,
 };
 use datafusion_physical_plan::metrics::SplitMetrics;
 use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
-use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr};
+use datafusion_physical_plan::projection::ProjectionExec;
 use datafusion_physical_plan::stream::BatchSplitStream;
 use datafusion_physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
@@ -174,7 +175,7 @@ pub trait DataSource: Send + Sync + Debug {
     }
     fn try_swapping_with_projection(
         &self,
-        _projection: &[ProjectionExpr],
+        _projection: &ProjectionExprs,
     ) -> Result<Option<Arc<dyn DataSource>>>;
     /// Try to push down filters into this DataSource.
     /// See [`ExecutionPlan::handle_child_pushdown_result`] for more details.
@@ -319,7 +320,7 @@ impl ExecutionPlan for DataSourceExec {
     ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
         match self
             .data_source
-            .try_swapping_with_projection(projection.expr())?
+            .try_swapping_with_projection(projection.projection_expr())?
         {
             Some(new_data_source) => {
                 Ok(Some(Arc::new(DataSourceExec::new(new_data_source))))
@@ -342,7 +343,7 @@ impl ExecutionPlan for DataSourceExec {
             .collect_vec();
         let res = self
             .data_source
-            .try_pushdown_filters(parent_filters.clone(), config)?;
+            .try_pushdown_filters(parent_filters, config)?;
         match res.updated_node {
             Some(data_source) => {
                 let mut new_node = self.clone();
diff --git a/datafusion/datasource/src/test_util.rs b/datafusion/datasource/src/test_util.rs
index 5d5b277dcf04..6806cd73996f 100644
--- a/datafusion/datasource/src/test_util.rs
+++ b/datafusion/datasource/src/test_util.rs
@@ -35,29 +35,32 @@ pub(crate) struct MockSource {
     schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
     filter: Option<Arc<dyn PhysicalExpr>>,
     table_schema: crate::table_schema::TableSchema,
+    projection: crate::projection::SplitProjection,
 }
 
 impl Default for MockSource {
     fn default() -> Self {
+        let table_schema =
+            crate::table_schema::TableSchema::new(Arc::new(Schema::empty()), vec![]);
         Self {
             metrics: ExecutionPlanMetricsSet::new(),
             schema_adapter_factory: None,
             filter: None,
-            table_schema: crate::table_schema::TableSchema::new(
-                Arc::new(Schema::empty()),
-                vec![],
-            ),
+            projection: crate::projection::SplitProjection::unprojected(&table_schema),
+            table_schema,
         }
     }
 }
 
 impl MockSource {
     pub fn new(table_schema: impl Into<crate::table_schema::TableSchema>) -> Self {
+        let table_schema = table_schema.into();
         Self {
             metrics: ExecutionPlanMetricsSet::new(),
             schema_adapter_factory: None,
             filter: None,
-            table_schema: table_schema.into(),
+            projection: crate::projection::SplitProjection::unprojected(&table_schema),
+            table_schema,
         }
     }
 
@@ -73,7 +76,7 @@ impl FileSource for MockSource {
         _object_store: Arc<dyn ObjectStore>,
         _base_config: &FileScanConfig,
         _partition: usize,
-    ) -> Arc<dyn FileOpener> {
+    ) -> Result<Arc<dyn FileOpener>> {
         unimplemented!()
     }
 
@@ -89,10 +92,6 @@ impl FileSource for MockSource {
         Arc::new(Self { ..self.clone() })
     }
 
-    fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
-    }
-
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
         &self.metrics
     }
@@ -118,6 +117,26 @@ impl FileSource for MockSource {
     fn table_schema(&self) -> &crate::table_schema::TableSchema {
         &self.table_schema
     }
+
+    fn try_pushdown_projection(
+        &self,
+        projection: &datafusion_physical_plan::projection::ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        let mut source = self.clone();
+        let new_projection = self.projection.source.try_merge(projection)?;
+        let split_projection = crate::projection::SplitProjection::new(
+            self.table_schema.file_schema(),
+            &new_projection,
+        );
+        source.projection = split_projection;
+        Ok(Some(Arc::new(source)))
+    }
+
+    fn projection(
+        &self,
+    ) -> Option<&datafusion_physical_plan::projection::ProjectionExprs> {
+        Some(&self.projection.source)
+    }
 }
 
 /// Create a column expression
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index 9374297ecba3..11e0cc5c604f 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -48,7 +48,6 @@ datafusion-expr = { workspace = true }
 datafusion-expr-common = { workspace = true }
 datafusion-functions-aggregate-common = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
-half = { workspace = true }
 hashbrown = { workspace = true }
 indexmap = { workspace = true }
 itertools = { workspace = true, features = ["use_std"] }
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 0b8c4ee5fbec..8e8baef3e71b 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -159,6 +159,11 @@ impl ProjectionExec {
         self.projector.projection().as_ref()
     }
 
+    /// The projection expressions as a [`ProjectionExprs`].
+    pub fn projection_expr(&self) -> &ProjectionExprs {
+        self.projector.projection()
+    }
+
     /// The input plan
     pub fn input(&self) -> &Arc<dyn ExecutionPlan> {
         &self.input
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index f9400d14a59c..789176862bf0 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -1009,6 +1009,15 @@ message PhysicalSortExprNodeCollection {
   repeated PhysicalSortExprNode physical_sort_expr_nodes = 1;
 }
 
+message ProjectionExpr {
+  string alias = 1;
+  PhysicalExprNode expr = 2;
+}
+
+message ProjectionExprs {
+  repeated ProjectionExpr projections = 1;
+}
+
 message FileScanExecConf {
   repeated FileGroup file_groups = 1;
   datafusion_common.Schema schema = 2;
@@ -1024,6 +1033,8 @@ message FileScanExecConf {
 
   datafusion_common.Constraints constraints = 11;
   optional uint64 batch_size = 12;
+
+  optional ProjectionExprs projection_exprs = 13;
 }
 
 message ParquetScanExecNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 4cf834d0601e..230bfa495a4b 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -5949,6 +5949,9 @@ impl serde::Serialize for FileScanExecConf {
         if self.batch_size.is_some() {
             len += 1;
         }
+        if self.projection_exprs.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.FileScanExecConf", len)?;
         if !self.file_groups.is_empty() {
             struct_ser.serialize_field("fileGroups", &self.file_groups)?;
@@ -5982,6 +5985,9 @@ impl serde::Serialize for FileScanExecConf {
             #[allow(clippy::needless_borrows_for_generic_args)]
             struct_ser.serialize_field("batchSize", ToString::to_string(&v).as_str())?;
         }
+        if let Some(v) = self.projection_exprs.as_ref() {
+            struct_ser.serialize_field("projectionExprs", v)?;
+        }
         struct_ser.end()
     }
 }
@@ -6007,6 +6013,8 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
             "constraints",
             "batch_size",
             "batchSize",
+            "projection_exprs",
+            "projectionExprs",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -6021,6 +6029,7 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
             OutputOrdering,
             Constraints,
             BatchSize,
+            ProjectionExprs,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -6052,6 +6061,7 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
                             "outputOrdering" | "output_ordering" => Ok(GeneratedField::OutputOrdering),
                             "constraints" => Ok(GeneratedField::Constraints),
                             "batchSize" | "batch_size" => Ok(GeneratedField::BatchSize),
+                            "projectionExprs" | "projection_exprs" => Ok(GeneratedField::ProjectionExprs),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -6081,6 +6091,7 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
                 let mut output_ordering__ = None;
                 let mut constraints__ = None;
                 let mut batch_size__ = None;
+                let mut projection_exprs__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::FileGroups => {
@@ -6148,6 +6159,12 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
                                 map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| x.0)
                             ;
                         }
+                        GeneratedField::ProjectionExprs => {
+                            if projection_exprs__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("projectionExprs"));
+                            }
+                            projection_exprs__ = map_.next_value()?;
+                        }
                     }
                 }
                 Ok(FileScanExecConf {
@@ -6161,6 +6178,7 @@ impl<'de> serde::Deserialize<'de> for FileScanExecConf {
                     output_ordering: output_ordering__.unwrap_or_default(),
                     constraints: constraints__,
                     batch_size: batch_size__,
+                    projection_exprs: projection_exprs__,
                 })
             }
         }
@@ -19268,6 +19286,205 @@ impl<'de> serde::Deserialize<'de> for ProjectionExecNode {
         deserializer.deserialize_struct("datafusion.ProjectionExecNode", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for ProjectionExpr {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if !self.alias.is_empty() {
+            len += 1;
+        }
+        if self.expr.is_some() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.ProjectionExpr", len)?;
+        if !self.alias.is_empty() {
+            struct_ser.serialize_field("alias", &self.alias)?;
+        }
+        if let Some(v) = self.expr.as_ref() {
+            struct_ser.serialize_field("expr", v)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for ProjectionExpr {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "alias",
+            "expr",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            Alias,
+            Expr,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "alias" => Ok(GeneratedField::Alias),
+                            "expr" => Ok(GeneratedField::Expr),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = ProjectionExpr;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.ProjectionExpr")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ProjectionExpr, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut alias__ = None;
+                let mut expr__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::Alias => {
+                            if alias__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("alias"));
+                            }
+                            alias__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::Expr => {
+                            if expr__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("expr"));
+                            }
+                            expr__ = map_.next_value()?;
+                        }
+                    }
+                }
+                Ok(ProjectionExpr {
+                    alias: alias__.unwrap_or_default(),
+                    expr: expr__,
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.ProjectionExpr", FIELDS, GeneratedVisitor)
+    }
+}
+impl serde::Serialize for ProjectionExprs {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut len = 0;
+        if !self.projections.is_empty() {
+            len += 1;
+        }
+        let mut struct_ser = serializer.serialize_struct("datafusion.ProjectionExprs", len)?;
+        if !self.projections.is_empty() {
+            struct_ser.serialize_field("projections", &self.projections)?;
+        }
+        struct_ser.end()
+    }
+}
+impl<'de> serde::Deserialize<'de> for ProjectionExprs {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "projections",
+        ];
+
+        #[allow(clippy::enum_variant_names)]
+        enum GeneratedField {
+            Projections,
+        }
+        impl<'de> serde::Deserialize<'de> for GeneratedField {
+            fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                struct GeneratedVisitor;
+
+                impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+                    type Value = GeneratedField;
+
+                    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                        write!(formatter, "expected one of: {:?}", &FIELDS)
+                    }
+
+                    #[allow(unused_variables)]
+                    fn visit_str<E>(self, value: &str) -> std::result::Result<GeneratedField, E>
+                    where
+                        E: serde::de::Error,
+                    {
+                        match value {
+                            "projections" => Ok(GeneratedField::Projections),
+                            _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
+                        }
+                    }
+                }
+                deserializer.deserialize_identifier(GeneratedVisitor)
+            }
+        }
+        struct GeneratedVisitor;
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = ProjectionExprs;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                formatter.write_str("struct datafusion.ProjectionExprs")
+            }
+
+            fn visit_map<V>(self, mut map_: V) -> std::result::Result<ProjectionExprs, V::Error>
+                where
+                    V: serde::de::MapAccess<'de>,
+            {
+                let mut projections__ = None;
+                while let Some(k) = map_.next_key()? {
+                    match k {
+                        GeneratedField::Projections => {
+                            if projections__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("projections"));
+                            }
+                            projections__ = Some(map_.next_value()?);
+                        }
+                    }
+                }
+                Ok(ProjectionExprs {
+                    projections: projections__.unwrap_or_default(),
+                })
+            }
+        }
+        deserializer.deserialize_struct("datafusion.ProjectionExprs", FIELDS, GeneratedVisitor)
+    }
+}
 impl serde::Serialize for ProjectionNode {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 12b417627411..b2d0bc7751f9 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -1542,6 +1542,18 @@ pub struct PhysicalSortExprNodeCollection {
     pub physical_sort_expr_nodes: ::prost::alloc::vec::Vec<PhysicalSortExprNode>,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ProjectionExpr {
+    #[prost(string, tag = "1")]
+    pub alias: ::prost::alloc::string::String,
+    #[prost(message, optional, tag = "2")]
+    pub expr: ::core::option::Option<PhysicalExprNode>,
+}
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct ProjectionExprs {
+    #[prost(message, repeated, tag = "1")]
+    pub projections: ::prost::alloc::vec::Vec<ProjectionExpr>,
+}
+#[derive(Clone, PartialEq, ::prost::Message)]
 pub struct FileScanExecConf {
     #[prost(message, repeated, tag = "1")]
     pub file_groups: ::prost::alloc::vec::Vec<FileGroup>,
@@ -1563,6 +1575,8 @@ pub struct FileScanExecConf {
     pub constraints: ::core::option::Option<super::datafusion_common::Constraints>,
     #[prost(uint64, optional, tag = "12")]
     pub batch_size: ::core::option::Option<u64>,
+    #[prost(message, optional, tag = "13")]
+    pub projection_exprs: ::core::option::Option<ProjectionExprs>,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct ParquetScanExecNode {
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index f1a9abe6ea7b..d40d835f793c 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -42,6 +42,7 @@ use datafusion_datasource_parquet::file_format::ParquetSink;
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_execution::{FunctionRegistry, TaskContext};
 use datafusion_expr::WindowFunctionDefinition;
+use datafusion_physical_expr::projection::{ProjectionExpr, ProjectionExprs};
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr, ScalarFunctionExpr};
 use datafusion_physical_plan::expressions::{
     in_list, BinaryExpr, CaseExpr, CastExpr, Column, IsNotNullExpr, IsNullExpr, LikeExpr,
@@ -519,11 +520,6 @@ pub fn parse_protobuf_file_scan_config(
     file_source: Arc<dyn FileSource>,
 ) -> Result<FileScanConfig> {
     let schema: Arc<Schema> = parse_protobuf_file_scan_schema(proto)?;
-    let projection = proto
-        .projection
-        .iter()
-        .map(|i| *i as usize)
-        .collect::<Vec<_>>();
 
     let constraints = convert_required!(proto.constraints)?;
     let statistics = convert_required!(proto.statistics)?;
@@ -550,11 +546,38 @@ pub fn parse_protobuf_file_scan_config(
         output_ordering.extend(LexOrdering::new(sort_exprs));
     }
 
+    // Parse projection expressions if present and apply to file source
+    let file_source = if let Some(proto_projection_exprs) = &proto.projection_exprs {
+        let projection_exprs: Vec<ProjectionExpr> = proto_projection_exprs
+            .projections
+            .iter()
+            .map(|proto_expr| {
+                let expr = parse_physical_expr(
+                    proto_expr.expr.as_ref().ok_or_else(|| {
+                        internal_datafusion_err!("ProjectionExpr missing expr field")
+                    })?,
+                    ctx,
+                    &schema,
+                    codec,
+                )?;
+                Ok(ProjectionExpr::new(expr, proto_expr.alias.clone()))
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let projection_exprs = ProjectionExprs::new(projection_exprs);
+
+        // Apply projection to file source
+        file_source
+            .try_pushdown_projection(&projection_exprs)?
+            .unwrap_or(file_source)
+    } else {
+        file_source
+    };
+
     let config = FileScanConfigBuilder::new(object_store_url, file_source)
         .with_file_groups(file_groups)
         .with_constraints(constraints)
         .with_statistics(statistics)
-        .with_projection_indices(Some(projection))
         .with_limit(proto.limit.as_ref().map(|sl| sl.limit as usize))
         .with_output_ordering(output_ordering)
         .with_batch_size(proto.batch_size.map(|s| s as usize))
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index fc7818fe461a..89dd0b50650b 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -666,7 +666,6 @@ impl protobuf::PhysicalPlanNode {
         &self,
         scan: &protobuf::ParquetScanExecNode,
         ctx: &TaskContext,
-
         extension_codec: &dyn PhysicalExtensionCodec,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         #[cfg(feature = "parquet")]
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 1ae85618b92a..146e9258111a 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -528,18 +528,31 @@ pub fn serialize_file_scan_config(
             .with_metadata(conf.file_schema().metadata.clone()),
     );
 
+    let projection_exprs = conf
+        .file_source
+        .projection()
+        .as_ref()
+        .map(|projection_exprs| {
+            let projections = projection_exprs.iter().cloned().collect::<Vec<_>>();
+            Ok::<_, DataFusionError>(protobuf::ProjectionExprs {
+                projections: projections
+                    .into_iter()
+                    .map(|expr| {
+                        Ok(protobuf::ProjectionExpr {
+                            alias: expr.alias.to_string(),
+                            expr: Some(serialize_physical_expr(&expr.expr, codec)?),
+                        })
+                    })
+                    .collect::<Result<Vec<_>>>()?,
+            })
+        })
+        .transpose()?;
+
     Ok(protobuf::FileScanExecConf {
         file_groups,
         statistics: Some((&conf.statistics()).into()),
         limit: conf.limit.map(|l| protobuf::ScanLimit { limit: l as u32 }),
-        projection: conf
-            .projection_exprs
-            .as_ref()
-            .map(|p| p.column_indices())
-            .unwrap_or((0..schema.fields().len()).collect::<Vec<_>>())
-            .iter()
-            .map(|n| *n as u32)
-            .collect(),
+        projection: vec![],
         schema: Some(schema.as_ref().try_into()?),
         table_partition_cols: conf
             .table_partition_cols()
@@ -555,6 +568,7 @@ pub fn serialize_file_scan_config(
             .collect::<Vec<_>>(),
         constraints: Some(conf.constraints.clone().into()),
         batch_size: conf.batch_size.map(|s| s as u64),
+        projection_exprs,
     })
 }
 
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index c50f41625c70..0bcdd610c26f 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -928,7 +928,7 @@ async fn roundtrip_parquet_exec_with_table_partition_cols() -> Result<()> {
     let file_source = Arc::new(ParquetSource::new(table_schema.clone()));
     let scan_config =
         FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
-            .with_projection_indices(Some(vec![0, 1]))
+            .with_projection_indices(Some(vec![0, 1]))?
             .with_file_group(FileGroup::new(vec![file_group]))
             .with_newlines_in_values(false)
             .build();
@@ -1813,7 +1813,7 @@ async fn roundtrip_projection_source() -> Result<()> {
                 1024,
             )])])
             .with_statistics(statistics)
-            .with_projection_indices(Some(vec![0, 1, 2]))
+            .with_projection_indices(Some(vec![0, 1, 2]))?
             .build();
 
     let filter = Arc::new(
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt
index 6ac28997a990..b4462533f60d 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -952,23 +952,11 @@ explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table2;
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│       ProjectionExec      │
+02)│       DataSourceExec      │
 03)│    --------------------   │
-04)│        bigint_col:        │
-05)│         bigint_col        │
-06)│                           │
-07)│      int_col: int_col     │
-08)│                           │
-09)│          sum_col:         │
-10)│  CAST(int_col AS Int64) + │
-11)│         bigint_col        │
-12)└─────────────┬─────────────┘
-13)┌─────────────┴─────────────┐
-14)│       DataSourceExec      │
-15)│    --------------------   │
-16)│          files: 1         │
-17)│      format: parquet      │
-18)└───────────────────────────┘
+04)│          files: 1         │
+05)│      format: parquet      │
+06)└───────────────────────────┘
 
 # Query with projection on memory
 query TT
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index f217ba1bd5a0..6b1b2ca17747 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3501,11 +3501,10 @@ logical_plan
 05)----TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)NestedLoopJoinExec: join_type=Inner, filter=example(join_proj_push_down_1@0, join_proj_push_down_2@1) > 3, projection=[a0@0, a@1, b@2, c@3, d@4, a0@6, a@7, b@8, c@9, d@10]
-02)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, CAST(a@1 AS Float64) as join_proj_push_down_1]
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-04)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, CAST(a@1 AS Float64) as join_proj_push_down_2]
-05)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
-06)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d, CAST(a@1 AS Float64) as join_proj_push_down_1], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+03)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, CAST(a@1 AS Float64) as join_proj_push_down_2]
+04)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1, maintains_sort_order=true
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 ####
 # Config teardown
diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt
index edba5354e001..f34e1156a785 100644
--- a/datafusion/sqllogictest/test_files/math.slt
+++ b/datafusion/sqllogictest/test_files/math.slt
@@ -713,9 +713,7 @@ EXPLAIN SELECT log(NULL, c2) from aggregate_simple;
 logical_plan
 01)Projection: Float64(NULL) AS log(NULL,aggregate_simple.c2)
 02)--TableScan: aggregate_simple projection=[]
-physical_plan
-01)ProjectionExec: expr=[NULL as log(NULL,aggregate_simple.c2)]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_simple.csv]]}, file_type=csv, has_header=true
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_simple.csv]]}, projection=[NULL as log(NULL,aggregate_simple.c2)], file_type=csv, has_header=true
 
 # Float 16/32/64 for log
 query RT
diff --git a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
index d94d48d45af9..7feefc169fca 100644
--- a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
+++ b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
@@ -97,9 +97,7 @@ logical_plan
 01)Sort: a_big ASC NULLS LAST, multiple_ordered_table.b ASC NULLS LAST
 02)--Projection: multiple_ordered_table.a, multiple_ordered_table.a AS a_big, multiple_ordered_table.b
 03)----TableScan: multiple_ordered_table projection=[a, b]
-physical_plan
-01)ProjectionExec: expr=[a@0 as a, a@0 as a_big, b@1 as b]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, a@1 as a_big, b], output_ordering=[a@0 ASC NULLS LAST, b@2 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
 EXPLAIN
@@ -111,9 +109,7 @@ logical_plan
 01)Sort: multiple_ordered_table.a ASC NULLS LAST, multiple_ordered_table.b ASC NULLS LAST
 02)--Projection: multiple_ordered_table.a, multiple_ordered_table.a AS a_big, multiple_ordered_table.b
 03)----TableScan: multiple_ordered_table projection=[a, b]
-physical_plan
-01)ProjectionExec: expr=[a@0 as a, a@0 as a_big, b@1 as b]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, a@1 as a_big, b], output_ordering=[a@0 ASC NULLS LAST, b@2 ASC NULLS LAST], file_type=csv, has_header=true
 
 
 # test for cast Utf8
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index 8f0cb5e53d76..03ba12ee9875 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -1357,10 +1357,8 @@ physical_plan
 01)SortPreservingMergeExec: [d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], fetch=2
 02)--SortExec: TopK(fetch=2), expr=[d@4 ASC NULLS LAST, c@1 ASC NULLS LAST, a@2 ASC NULLS LAST, a0@3 ASC NULLS LAST, b@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----UnionExec
-04)------ProjectionExec: expr=[b@1 as b, c@2 as c, a@0 as a, NULL as a0, d@3 as d]
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[c@2 ASC NULLS LAST], file_type=csv, has_header=true
-06)------ProjectionExec: expr=[b@1 as b, c@2 as c, NULL as a, a0@0 as a0, d@3 as d]
-07)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, b, c, d], output_ordering=[c@2 ASC NULLS LAST], file_type=csv, has_header=true
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, a, NULL as a0, d], output_ordering=[c@1 ASC NULLS LAST], file_type=csv, has_header=true
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, NULL as a, a0, d], output_ordering=[c@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 # Test: run the query from above
 query IIIII
diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
index fe909e70ffb0..5a559bdb9483 100644
--- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
+++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
@@ -274,5 +274,4 @@ logical_plan
 02)--TableScan: test_table projection=[constant_col]
 physical_plan
 01)SortPreservingMergeExec: [constant_col@0 ASC NULLS LAST]
-02)--SortExec: expr=[constant_col@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[constant_col], file_type=parquet
+02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[constant_col], file_type=parquet
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index 30dbcc978c9b..ab0859fa4ade 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1420,9 +1420,7 @@ logical_plan
 01)Sort: annotated_data_finite2.a ASC NULLS LAST
 02)--Projection: annotated_data_finite2.a, annotated_data_finite2.b, Int64(2)
 03)----TableScan: annotated_data_finite2 projection=[a, b]
-physical_plan
-01)ProjectionExec: expr=[a@0 as a, b@1 as b, 2 as Int64(2)]
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, 2 as Int64(2)], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 # source is ordered by a,b,c
 # when filter result is constant for column a
diff --git a/datafusion/sqllogictest/test_files/topk.slt b/datafusion/sqllogictest/test_files/topk.slt
index 6c4d7be5ab8a..7364fccd8e57 100644
--- a/datafusion/sqllogictest/test_files/topk.slt
+++ b/datafusion/sqllogictest/test_files/topk.slt
@@ -371,8 +371,7 @@ explain select number, letter, age, number as column4, letter as column5 from pa
 ----
 physical_plan
 01)SortExec: TopK(fetch=3), expr=[number@0 DESC, letter@1 ASC NULLS LAST, age@2 DESC], preserve_partitioning=[false], sort_prefix=[number@0 DESC, letter@1 ASC NULLS LAST]
-02)--ProjectionExec: expr=[number@0 as number, letter@1 as letter, age@2 as age, number@0 as column4, letter@1 as column5]
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age, number@0 as column4, letter@1 as column5], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet, predicate=DynamicFilter [ empty ]
 
 # Verify that the sort prefix is correctly computed over normalized, order-maintaining projections (number + 1, number, number + 1, age)
 query TT
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index c20598239c94..5c4cf3fba71d 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -601,8 +601,7 @@ physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST]
 02)--UnionExec
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], file_type=csv, has_header=true
-04)----ProjectionExec: expr=[c1a@0 as c1]
-05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1a], output_ordering=[c1a@0 ASC NULLS LAST], file_type=csv, has_header=true
+04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1a@0 as c1], file_type=csv, has_header=true
 
 statement ok
 drop table t1
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index c4319c665bd0..537b334ecbe4 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -1021,8 +1021,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=1)@0 as UNNEST(t.column1), column2@1 as column2]
 02)--UnnestExec
-03)----ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1@0 as __unnest_placeholder(t.column1), column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
 
 # Explain should have a SortExec at the top because we order by the output of the unnest (i.e. discarding the ordering)
 query TT
@@ -1038,8 +1037,7 @@ physical_plan
 01)SortExec: expr=[unnested@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=1)@0 as unnested, column2@1 as column2]
 03)----UnnestExec
-04)------ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
-05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1@0 as __unnest_placeholder(t.column1), column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
 
 # cleanup
 statement ok
@@ -1084,8 +1082,7 @@ logical_plan
 physical_plan
 01)SortExec: expr=[__unnest_placeholder(struct(t.column1,t.column2,t.column3)).c0@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--UnnestExec
-03)----ProjectionExec: expr=[struct(column1@0, column2@1, column3@2) as __unnest_placeholder(struct(t.column1,t.column2,t.column3))]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_tuples.parquet]]}, projection=[column1, column2, column3], output_ordering=[column1@0 ASC NULLS LAST], file_type=parquet
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_tuples.parquet]]}, projection=[struct(column1@0, column2@1, column3@2) as __unnest_placeholder(struct(t.column1,t.column2,t.column3))], file_type=parquet
 
 # cleanup
 statement ok
@@ -1136,8 +1133,7 @@ logical_plan
 04)------TableScan: t projection=[column1, column2]
 physical_plan
 01)UnnestExec
-02)--ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct.parquet]]}, projection=[column1@0 as __unnest_placeholder(t.column1), column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
 
 # cleanup
 statement ok
@@ -1207,8 +1203,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=2)@0 as UNNEST(UNNEST(t.column1)), __unnest_placeholder(t.column2,depth=1)@1 as UNNEST(t.column2), __unnest_placeholder(t.column3).s1@2 as __unnest_placeholder(t.column3).s1, __unnest_placeholder(t.column3).s2@3 as __unnest_placeholder(t.column3).s2, __unnest_placeholder(t.column3).s3@4 as __unnest_placeholder(t.column3).s3, column4@5 as column4]
 02)--UnnestExec
-03)----ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as __unnest_placeholder(t.column2), column3@2 as __unnest_placeholder(t.column3), column4@3 as column4]
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct_arrays.parquet]]}, projection=[column1, column2, column3, column4], output_ordering=[column4@3 ASC NULLS LAST], file_type=parquet
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct_arrays.parquet]]}, projection=[column1@0 as __unnest_placeholder(t.column1), column2@1 as __unnest_placeholder(t.column2), column3@2 as __unnest_placeholder(t.column3), column4], output_ordering=[column4@3 ASC NULLS LAST], file_type=parquet
 
 # cleanup
 statement ok
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index b7ef74e6c167..acb2f07e01b2 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -2267,8 +2267,7 @@ physical_plan
 07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 08)--------------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false]
-10)------------------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c8@2 as c8, c9@3 as c9, c1@0 as c1_alias]
-11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], file_type=csv, has_header=true
+10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9, c1@0 as c1_alias], file_type=csv, has_header=true
 
 query IIIII
 SELECT c9,
@@ -2683,8 +2682,7 @@ physical_plan
 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, inc_col@3 as inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@5 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@6 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@7 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@12 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@13 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@14 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@15 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@22 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@23 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@25 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@26 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]
 06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING": Int64 }, frame: RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 4 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING": Int64 }, frame: RANGE BETWEEN 2 PRECEDING AND 6 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 8 FOLLOWING], mode=[Sorted]
-08)--------------ProjectionExec: expr=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col, desc_col@2 as desc_col]
-09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col, desc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts, inc_col, desc_col], output_ordering=[ts@2 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIIIIIIIIIIIIIIIIIIIIII
 SELECT
@@ -2839,8 +2837,7 @@ physical_plan
 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as min1, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as min2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as max1, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@12 as count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@7 as count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@13 as avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@8 as avg2, inc_col@3 as inc_col]
 04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
-06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col]
-07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts, inc_col], output_ordering=[ts@2 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIIIIIIRR
 SELECT
@@ -3157,8 +3154,7 @@ physical_plan
 11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[Sorted]
 12)----------------------SortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
 13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-14)--------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
-15)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
+14)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[CAST(c@3 AS Int64) as __common_expr_1, a, b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIIIIIIIIIIIII
 SELECT a, b, c,
@@ -5941,9 +5937,7 @@ physical_plan
 03)----BoundedWindowAggExec: wdw=[sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(nullable Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(nullable Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortPreservingMergeExec: [c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], fetch=5
 05)--------SortExec: TopK(fetch=5), expr=[c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], preserve_partitioning=[true]
-06)----------ProjectionExec: expr=[__common_expr_3@0 as __common_expr_1, __common_expr_3@0 AND c2@2 < 4 AND c1@1 > 0 as __common_expr_2, c1@1 as c1, c2@2 as c2]
-07)------------ProjectionExec: expr=[c2@1 >= 2 as __common_expr_3, c1@0 as c1, c2@1 as c2]
-08)--------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-0.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-1.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-2.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-3.csv]]}, projection=[c1, c2], file_type=csv, has_header=false
+06)----------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-0.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-1.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-2.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-3.csv]]}, projection=[c2@1 >= 2 as __common_expr_1, c2@1 >= 2 AND c2@1 < 4 AND c1@0 > 0 as __common_expr_2, c1, c2], file_type=csv, has_header=false
 
 
 # FILTER filters out some rows
diff --git a/datafusion/substrait/src/physical_plan/consumer.rs b/datafusion/substrait/src/physical_plan/consumer.rs
index 8ce71acecca3..1feee6386318 100644
--- a/datafusion/substrait/src/physical_plan/consumer.rs
+++ b/datafusion/substrait/src/physical_plan/consumer.rs
@@ -152,7 +152,7 @@ pub async fn from_substrait_rel(
                                 .map(|item| item.field as usize)
                                 .collect();
                             base_config_builder = base_config_builder
-                                .with_projection_indices(Some(column_indices));
+                                .with_projection_indices(Some(column_indices))?;
                         }
                     }
 
diff --git a/datafusion/substrait/src/physical_plan/producer.rs b/datafusion/substrait/src/physical_plan/producer.rs
index 20d41c2e6112..557e80146b9a 100644
--- a/datafusion/substrait/src/physical_plan/producer.rs
+++ b/datafusion/substrait/src/physical_plan/producer.rs
@@ -92,7 +92,7 @@ pub fn to_substrait_rel(
             };
 
             let mut select_struct = None;
-            if let Some(projection) = file_config.projection_exprs.as_ref() {
+            if let Some(projection) = file_config.file_source().projection().as_ref() {
                 let struct_items = projection
                     .column_indices()
                     .into_iter()
diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index 7a827847dde5..25c209c5ebe8 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -202,6 +202,121 @@ Additionally, the FFI structure for Scalar UDF's no longer contains a
 `return_type` call. This code was not used since the `ForeignScalarUDF`
 struct implements the `return_field_from_args` instead.
 
+### Projection handling moved from FileScanConfig to FileSource
+
+Projection handling has been moved from `FileScanConfig` into `FileSource` implementations. This enables format-specific projection pushdown (e.g., Parquet can push down struct field access, Vortex can push down computed expressions into un-decoded data).
+
+**Who is affected:**
+
+- Users who have implemented custom `FileSource` implementations
+- Users who use `FileScanConfigBuilder::with_projection_indices` directly
+
+**Breaking changes:**
+
+1. **`FileSource::with_projection` replaced with `try_pushdown_projection`:**
+
+   The `with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource>` method has been removed and replaced with `try_pushdown_projection(&self, projection: &ProjectionExprs) -> Result<Option<Arc<dyn FileSource>>>`.
+
+2. **`FileScanConfig.projection_exprs` field removed:**
+
+   Projections are now stored in the `FileSource` directly, not in `FileScanConfig`.
+   Various public helper methods that access projection information have been removed from `FileScanConfig`.
+
+3. **`FileScanConfigBuilder::with_projection_indices` now returns `Result<Self>`:**
+
+   This method can now fail if the projection pushdown fails.
+
+4. **`FileSource::create_file_opener` now returns `Result<Arc<dyn FileOpener>>`:**
+
+   Previously returned `Arc<dyn FileOpener>` directly.
+   Any `FileSource` implementation that may fail to create a `FileOpener` should now return an appropriate error.
+
+5. **`DataSource::try_swapping_with_projection` signature changed:**
+
+   Parameter changed from `&[ProjectionExpr]` to `&ProjectionExprs`.
+
+**Migration guide:**
+
+If you have a custom `FileSource` implementation:
+
+**Before:**
+
+```rust,ignore
+impl FileSource for MyCustomSource {
+    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource> {
+        // Apply projection from config
+        Arc::new(Self { /* ... */ })
+    }
+
+    fn create_file_opener(
+        &self,
+        object_store: Arc<dyn ObjectStore>,
+        base_config: &FileScanConfig,
+        partition: usize,
+    ) -> Arc<dyn FileOpener> {
+        Arc::new(MyOpener { /* ... */ })
+    }
+}
+```
+
+**After:**
+
+```rust,ignore
+impl FileSource for MyCustomSource {
+    fn try_pushdown_projection(
+        &self,
+        projection: &ProjectionExprs,
+    ) -> Result<Option<Arc<dyn FileSource>>> {
+        // Return None if projection cannot be pushed down
+        // Return Some(new_source) with projection applied if it can
+        Ok(Some(Arc::new(Self {
+            projection: Some(projection.clone()),
+            /* ... */
+        })))
+    }
+
+    fn projection(&self) -> Option<&ProjectionExprs> {
+        self.projection.as_ref()
+    }
+
+    fn create_file_opener(
+        &self,
+        object_store: Arc<dyn ObjectStore>,
+        base_config: &FileScanConfig,
+        partition: usize,
+    ) -> Result<Arc<dyn FileOpener>> {
+        Ok(Arc::new(MyOpener { /* ... */ }))
+    }
+}
+```
+
+We recommend you look at [#18627](https://github.com/apache/datafusion/pull/18627)
+that introduced these changes for more examples for how this was handled for the various built in file sources.
+
+We have added [`SplitProjection`](https://docs.rs/datafusion-datasource/latest/datafusion_datasource/projection/struct.SplitProjection.html) and [`ProjectionOpener`](https://docs.rs/datafusion-datasource/latest/datafusion_datasource/projection/struct.ProjectionOpener.html) helpers to make it easier to handle projections in your `FileSource` implementations.
+
+For file sources that can only handle simple column selections (not computed expressions), use the `SplitProjection` and `ProjectionOpener` helpers to split the projection into pushdownable and non-pushdownable parts:
+
+```rust,ignore
+use datafusion_datasource::projection::{SplitProjection, ProjectionOpener};
+
+// In try_pushdown_projection:
+let split = SplitProjection::new(projection, self.table_schema())?;
+// Use split.file_projection() for what to push down to the file format
+// The ProjectionOpener wrapper will handle the rest
+```
+
+**For `FileScanConfigBuilder` users:**
+
+```diff
+let config = FileScanConfigBuilder::new(url, source)
+-   .with_projection_indices(Some(vec![0, 2, 3]))
++   .with_projection_indices(Some(vec![0, 2, 3]))?
+    .build();
+```
+
+**Handling projections in `FileSource`:**
+
 ## DataFusion `51.0.0`
 
 ### `arrow` / `parquet` updated to 57.0.0