Skip to content

Commit 09e2a9b

Browse files
authored
chore: deprecate file_actions on state (#3863)
# Description In one of @fvaleye's latest PRs (#3841), we got rid of the last call sites of `file_actions` on `DeltaTableState`. Since we generally discourage materialising `Add` actions and rather do processing/planning on the view into the underlying arrow data, it's high time we deprecate these methods and remove the last invocations within our tests. This PR does just that. Signed-off-by: Robert Pack <[email protected]>
1 parent a144752 commit 09e2a9b

File tree

5 files changed

+63
-32
lines changed

5 files changed

+63
-32
lines changed

crates/core/src/protocol/checkpoints.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,12 @@ mod tests {
677677
count,
678678
"Expected {count} transactions"
679679
);
680-
let pre_checkpoint_actions = table.snapshot()?.file_actions(&table.log_store).await?;
680+
let pre_checkpoint_actions: Vec<_> = table
681+
.snapshot()?
682+
.snapshot()
683+
.file_views(&table.log_store, None)
684+
.try_collect()
685+
.await?;
681686

682687
let before = table.version();
683688
let res = create_checkpoint(&table, None).await;
@@ -692,7 +697,12 @@ mod tests {
692697
"Why on earth did a checkpoint creata version?"
693698
);
694699

695-
let post_checkpoint_actions = table.snapshot()?.file_actions(&table.log_store).await?;
700+
let post_checkpoint_actions: Vec<_> = table
701+
.snapshot()?
702+
.snapshot()
703+
.file_views(&table.log_store, None)
704+
.try_collect()
705+
.await?;
696706

697707
assert_eq!(
698708
pre_checkpoint_actions.len(),

crates/core/src/protocol/mod.rs

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,7 @@ mod tests {
710710
use arrow::compute::sort_to_indices;
711711
use arrow::datatypes::{DataType, Date32Type, Field, Fields, TimestampMicrosecondType};
712712
use arrow::record_batch::RecordBatch;
713+
use futures::TryStreamExt;
713714
use pretty_assertions::assert_eq;
714715
use std::path::Path;
715716
use std::sync::Arc;
@@ -1116,16 +1117,14 @@ mod tests {
11161117
let mut table = crate::open_table(table_uri).await.unwrap();
11171118
table.load().await.unwrap();
11181119

1119-
assert_eq!(
1120-
2,
1121-
table
1122-
.snapshot()
1123-
.unwrap()
1124-
.file_actions(&table.log_store)
1125-
.await
1126-
.unwrap()
1127-
.len()
1128-
);
1120+
let snapshot = table.snapshot().unwrap().snapshot();
1121+
let files: Vec<_> = snapshot
1122+
.file_views(&table.log_store, None)
1123+
.try_collect()
1124+
.await
1125+
.unwrap();
1126+
1127+
assert_eq!(2, files.len());
11291128
}
11301129

11311130
#[tokio::test]

crates/core/src/table/state.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,20 @@ impl DeltaTableState {
140140

141141
/// Full list of add actions representing all parquet files that are part of the current
142142
/// delta table state.
143+
#[deprecated(
144+
since = "0.29.1",
145+
note = "Use `.snapshot().file_views(log_store, predicate)` instead."
146+
)]
143147
pub async fn file_actions(&self, log_store: &dyn LogStore) -> DeltaResult<Vec<Add>> {
144148
self.file_actions_iter(log_store).try_collect().await
145149
}
146150

147151
/// Full list of add actions representing all parquet files that are part of the current
148152
/// delta table state.
153+
#[deprecated(
154+
since = "0.29.1",
155+
note = "Use `.snapshot().file_views(log_store, predicate)` instead."
156+
)]
149157
pub fn file_actions_iter(&self, log_store: &dyn LogStore) -> BoxStream<'_, DeltaResult<Add>> {
150158
self.snapshot
151159
.file_views(log_store, None)

crates/core/src/writer/json.rs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,8 @@ mod tests {
474474
use super::*;
475475

476476
use arrow_schema::ArrowError;
477+
#[cfg(feature = "datafusion")]
478+
use futures::TryStreamExt;
477479
use parquet::file::reader::FileReader;
478480
use parquet::file::serialized_reader::SerializedFileReader;
479481
use std::fs::File;
@@ -784,10 +786,12 @@ mod tests {
784786
writer.write(vec![data]).await.unwrap();
785787
writer.flush_and_commit(&mut table).await.unwrap();
786788
assert_eq!(table.version(), Some(1));
787-
let add_actions = table
788-
.state
789+
let add_actions: Vec<_> = table
790+
.snapshot()
789791
.unwrap()
790-
.file_actions(&table.log_store)
792+
.snapshot()
793+
.file_views(&table.log_store, None)
794+
.try_collect()
791795
.await
792796
.unwrap();
793797
assert_eq!(add_actions.len(), 1);
@@ -798,7 +802,7 @@ mod tests {
798802
.into_iter()
799803
.next()
800804
.unwrap()
801-
.stats
805+
.stats()
802806
.unwrap()
803807
.parse::<serde_json::Value>()
804808
.unwrap()
@@ -847,10 +851,12 @@ mod tests {
847851
writer.write(vec![data]).await.unwrap();
848852
writer.flush_and_commit(&mut table).await.unwrap();
849853
assert_eq!(table.version(), Some(1));
850-
let add_actions = table
851-
.state
854+
let add_actions: Vec<_> = table
855+
.snapshot()
852856
.unwrap()
853-
.file_actions(&table.log_store)
857+
.snapshot()
858+
.file_views(&table.log_store, None)
859+
.try_collect()
854860
.await
855861
.unwrap();
856862
assert_eq!(add_actions.len(), 1);
@@ -861,7 +867,7 @@ mod tests {
861867
.into_iter()
862868
.next()
863869
.unwrap()
864-
.stats
870+
.stats()
865871
.unwrap()
866872
.parse::<serde_json::Value>()
867873
.unwrap()

crates/core/src/writer/record_batch.rs

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -527,16 +527,20 @@ fn lexsort_to_indices(arrays: &[ArrayRef]) -> UInt32Array {
527527

528528
#[cfg(test)]
529529
mod tests {
530-
use super::*;
531-
use crate::operations::create::CreateBuilder;
532-
use crate::writer::test_utils::*;
533-
use crate::{DeltaOps, DeltaResult};
534530
use arrow::json::ReaderBuilder;
535531
use arrow_array::{Int32Array, RecordBatch, StringArray};
536532
use arrow_schema::{DataType, Field, Schema as ArrowSchema};
537533
use delta_kernel::schema::StructType;
534+
#[cfg(feature = "datafusion")]
535+
use futures::TryStreamExt;
538536
use std::path::Path;
539537

538+
use crate::operations::create::CreateBuilder;
539+
use crate::writer::test_utils::*;
540+
use crate::{DeltaOps, DeltaResult};
541+
542+
use super::*;
543+
540544
#[tokio::test]
541545
async fn test_buffer_len_includes_unflushed_row_group() {
542546
let table_dir = tempfile::tempdir().unwrap();
@@ -1156,10 +1160,12 @@ mod tests {
11561160
writer.write(batch).await.unwrap();
11571161
writer.flush_and_commit(&mut table).await.unwrap();
11581162
assert_eq!(table.version(), Some(1));
1159-
let add_actions = table
1160-
.state
1163+
let add_actions: Vec<_> = table
1164+
.snapshot()
11611165
.unwrap()
1162-
.file_actions(&table.log_store)
1166+
.snapshot()
1167+
.file_views(&table.log_store, None)
1168+
.try_collect()
11631169
.await
11641170
.unwrap();
11651171
assert_eq!(add_actions.len(), 1);
@@ -1170,7 +1176,7 @@ mod tests {
11701176
.into_iter()
11711177
.next()
11721178
.unwrap()
1173-
.stats
1179+
.stats()
11741180
.unwrap()
11751181
.parse::<serde_json::Value>()
11761182
.unwrap()
@@ -1210,10 +1216,12 @@ mod tests {
12101216
writer.write(batch).await.unwrap();
12111217
writer.flush_and_commit(&mut table).await.unwrap();
12121218
assert_eq!(table.version(), Some(1));
1213-
let add_actions = table
1214-
.state
1219+
let add_actions: Vec<_> = table
1220+
.snapshot()
12151221
.unwrap()
1216-
.file_actions(&table.log_store)
1222+
.snapshot()
1223+
.file_views(&table.log_store, None)
1224+
.try_collect()
12171225
.await
12181226
.unwrap();
12191227
assert_eq!(add_actions.len(), 1);
@@ -1224,7 +1232,7 @@ mod tests {
12241232
.into_iter()
12251233
.next()
12261234
.unwrap()
1227-
.stats
1235+
.stats()
12281236
.unwrap()
12291237
.parse::<serde_json::Value>()
12301238
.unwrap()

0 commit comments

Comments
 (0)