@@ -51,7 +51,9 @@ use crate::connectors::data_format::{
5151} ;
5252use crate :: connectors:: data_lake:: buffering:: PayloadType ;
5353use crate :: connectors:: data_lake:: ArrowDataType ;
54- use crate :: connectors:: data_storage:: { ConnectorMode , ConversionError , ValuesMap } ;
54+ use crate :: connectors:: data_storage:: {
55+ CommitPossibility , ConnectorMode , ConversionError , ValuesMap ,
56+ } ;
5557use crate :: connectors:: metadata:: ParquetMetadata ;
5658use crate :: connectors:: scanner:: S3Scanner ;
5759use crate :: connectors:: {
@@ -701,48 +703,26 @@ impl DeltaTableReader {
701703 }
702704 let mut current_version = table. version ( ) ;
703705
704- let mut parquet_files_queue = {
705- let history = runtime. block_on ( async {
706- Ok :: < Vec < DeltaTableCommitInfo > , ReadError > ( table. history ( None ) . await ?)
707- } ) ?;
708- Self :: get_reader_actions ( & table, path, history, & column_types) ?
709- } ;
706+ let mut parquet_files_queue = VecDeque :: new ( ) ;
710707 let mut backfilling_entries_queue = VecDeque :: new ( ) ;
708+ let mut snapshot_loading_needed = backfilling_thresholds. is_empty ( ) ;
711709
712710 if let Some ( start_from_timestamp_ms) = start_from_timestamp_ms {
713711 assert ! ( backfilling_thresholds. is_empty( ) ) ; // Checked upstream in python_api.rs
714- let current_timestamp = current_unix_timestamp_ms ( ) ;
715- if start_from_timestamp_ms > current_timestamp. try_into ( ) . unwrap ( ) {
716- warn ! ( "The timestamp {start_from_timestamp_ms} is greater than the current timestamp {current_timestamp}. All new entries will be read." ) ;
717- }
718- let ( earliest_version, latest_version) = runtime. block_on ( async {
719- Ok :: < ( i64 , i64 ) , ReadError > ( (
720- table. get_earliest_version ( ) . await ?,
721- table. get_latest_version ( ) . await ?,
722- ) )
723- } ) ?;
724- let snapshot = table. snapshot ( ) ?;
725-
726- let mut last_version_below_threshold = None ;
727- for version in earliest_version..=latest_version {
728- let Some ( timestamp) = snapshot. version_timestamp ( version) else {
729- continue ;
730- } ;
731- if timestamp < start_from_timestamp_ms {
732- last_version_below_threshold = Some ( version) ;
733- } else {
734- break ;
735- }
736- }
737- if let Some ( last_version_below_threshold) = last_version_below_threshold {
738- runtime
739- . block_on ( async { table. load_version ( last_version_below_threshold) . await } ) ?;
740- current_version = last_version_below_threshold;
741- } else {
742- current_version = earliest_version;
743- warn ! ( "All available versions are newer than the specified timestamp {start_from_timestamp_ms}. The read will start from the beginning." ) ;
744- }
745- parquet_files_queue. clear ( ) ;
712+ Self :: handle_start_from_timestamp_ms (
713+ & runtime,
714+ & mut table,
715+ start_from_timestamp_ms,
716+ is_append_only,
717+ & mut current_version,
718+ & mut snapshot_loading_needed,
719+ ) ?;
720+ } else {
721+ snapshot_loading_needed = true ;
722+ }
723+ if snapshot_loading_needed {
724+ parquet_files_queue =
725+ Self :: get_reader_actions_for_table ( & runtime, & table, path, & column_types) ?;
746726 }
747727
748728 if !backfilling_thresholds. is_empty ( ) {
@@ -771,6 +751,64 @@ impl DeltaTableReader {
771751 } )
772752 }
773753
754+ fn handle_start_from_timestamp_ms (
755+ runtime : & TokioRuntime ,
756+ table : & mut DeltaTable ,
757+ start_from_timestamp_ms : i64 ,
758+ is_append_only : bool ,
759+ current_version : & mut i64 ,
760+ snapshot_loading_needed : & mut bool ,
761+ ) -> Result < ( ) , ReadError > {
762+ let current_timestamp = current_unix_timestamp_ms ( ) ;
763+ if start_from_timestamp_ms > current_timestamp. try_into ( ) . unwrap ( ) {
764+ warn ! ( "The timestamp {start_from_timestamp_ms} is greater than the current timestamp {current_timestamp}. All new entries will be read." ) ;
765+ }
766+ let ( earliest_version, latest_version) = runtime. block_on ( async {
767+ Ok :: < ( i64 , i64 ) , ReadError > ( (
768+ table. get_earliest_version ( ) . await ?,
769+ table. get_latest_version ( ) . await ?,
770+ ) )
771+ } ) ?;
772+ let snapshot = table. snapshot ( ) ?;
773+
774+ let mut last_version_below_threshold = None ;
775+ let mut version_at_threshold = None ;
776+ for version in earliest_version..=latest_version {
777+ let Some ( timestamp) = snapshot. version_timestamp ( version) else {
778+ continue ;
779+ } ;
780+ if timestamp < start_from_timestamp_ms {
781+ last_version_below_threshold = Some ( version) ;
782+ } else {
783+ if timestamp == start_from_timestamp_ms {
784+ version_at_threshold = Some ( version) ;
785+ }
786+ break ;
787+ }
788+ }
789+
790+ if !is_append_only && version_at_threshold. is_some ( ) {
791+ * current_version = version_at_threshold. unwrap ( ) ;
792+ } else if let Some ( last_version_below_threshold) = last_version_below_threshold {
793+ * current_version = last_version_below_threshold;
794+ } else {
795+ * current_version = earliest_version;
796+ warn ! (
797+ "All available versions are newer than the specified timestamp {start_from_timestamp_ms}. The read will start from the beginning, version {current_version}."
798+ ) ;
799+ // NB: All versions are newer than the requested one, meaning that we need to read the
800+ // full state at the `earliest_version` and then continue incrementally.
801+ }
802+
803+ if is_append_only && last_version_below_threshold. is_some ( ) {
804+ // We've found the threshold version, we read only diffs from this version onwards.
805+ * snapshot_loading_needed = false ;
806+ }
807+
808+ runtime. block_on ( async { table. load_version ( * current_version) . await } ) ?;
809+ Ok ( ( ) )
810+ }
811+
774812 fn record_batch_has_pathway_fields ( batch : & ArrowRecordBatch ) -> bool {
775813 for ( field, _) in SPECIAL_OUTPUT_FIELDS {
776814 if let Some ( time_column) = batch. column_by_name ( field) {
@@ -891,7 +929,8 @@ impl DeltaTableReader {
891929 if is_new_block {
892930 backfilling_entries_queue. push_back ( BackfillingEntry :: SourceEvent (
893931 ReadResult :: FinishedSource {
894- commit_allowed : true ,
932+ // Applicable only for append-only tables, hence no need to avoid squashing diff = +1 with diff = -1
933+ commit_possibility : CommitPossibility :: Possible ,
895934 } ,
896935 ) ) ;
897936 backfilling_entries_queue. push_back ( BackfillingEntry :: SourceEvent (
@@ -909,7 +948,9 @@ impl DeltaTableReader {
909948 }
910949 backfilling_entries_queue. push_back ( BackfillingEntry :: SourceEvent (
911950 ReadResult :: FinishedSource {
912- commit_allowed : true ,
951+ // Same as above, we don't force commits, since the situation with losing/collapsing +1 and -1 events
952+ // is not possible here
953+ commit_possibility : CommitPossibility :: Possible ,
913954 } ,
914955 ) ) ;
915956 if pathway_meta_column_added {
@@ -943,6 +984,18 @@ impl DeltaTableReader {
943984 }
944985 }
945986
987+ fn get_reader_actions_for_table (
988+ runtime : & TokioRuntime ,
989+ table : & DeltaTable ,
990+ base_path : & str ,
991+ column_types : & HashMap < String , Type > ,
992+ ) -> Result < VecDeque < DeltaReaderAction > , ReadError > {
993+ let history = runtime. block_on ( async {
994+ Ok :: < Vec < DeltaTableCommitInfo > , ReadError > ( table. history ( None ) . await ?)
995+ } ) ?;
996+ Self :: get_reader_actions ( table, base_path, history, column_types)
997+ }
998+
946999 fn get_reader_actions (
9471000 table : & DeltaTable ,
9481001 base_path : & str ,
@@ -1219,12 +1272,21 @@ impl DeltaTableReader {
12191272 None => {
12201273 // The Pathway time advancement (e.g. commit) is only possible if it was the
12211274 // last Parquet block within a version.
1275+ let is_last_in_version = self
1276+ . current_action
1277+ . as_ref ( )
1278+ . expect ( "current action must be set if there's a reader" )
1279+ . is_last_in_version ;
1280+
12221281 let source_event = ReadResult :: FinishedSource {
1223- commit_allowed : self
1224- . current_action
1225- . as_ref ( )
1226- . expect ( "current action must be set if there's a reader" )
1227- . is_last_in_version ,
1282+ commit_possibility : if is_last_in_version {
1283+ // The versions are read on-line, force to avoid squashing same-key events
1284+ // with the previous or the next versions.
1285+ // Note that it can be less strict if the batch only has additions.
1286+ CommitPossibility :: Forced
1287+ } else {
1288+ CommitPossibility :: Forbidden
1289+ } ,
12281290 } ;
12291291 self . reader = None ;
12301292 self . current_action = None ;
0 commit comments