From 5b638aa185a6b749c664e93f8d11ba9ce4e66047 Mon Sep 17 00:00:00 2001
From: Omer Ben-Amram <omerbenamram@gmail.com>
Date: Sun, 21 Dec 2025 21:29:51 +0200
Subject: [PATCH] Fix parsing when chunk header offsets are too large

Treat all-zero record header magic as end-of-chunk slack to avoid failing on incorrect free_space_offset/last_event_record_id (issue #197). Adds a regression test and updates dirty-sample expected error counts.
---
 src/evtx_chunk.rs                    | 78 ++++++++++++++++++++++++++--
 tests/test_full_samples.rs           |  6 +--
 tests/test_full_samples_streaming.rs |  6 +--
 3 files changed, 79 insertions(+), 11 deletions(-)
diff --git a/src/evtx_chunk.rs b/src/evtx_chunk.rs
index af93a4c2..ecb981f5 100644
--- a/src/evtx_chunk.rs
+++ b/src/evtx_chunk.rs
@@ -239,20 +239,51 @@ impl<'a> Iterator for IterChunkRecords<'a> {
     type Item = std::result::Result<EvtxRecord<'a>, EvtxError>;
 
     fn next(&mut self) -> Option<<Self as Iterator>::Item> {
-        if self.exhausted
-            || self.offset_from_chunk_start >= u64::from(self.chunk.header.free_space_offset)
-        {
+        // Be resilient to corrupted chunk headers: `free_space_offset` is user-controlled data
+        // coming from the EVTX stream, and may point past the end of the chunk.
+        let effective_free_space_offset = u64::from(self.chunk.header.free_space_offset)
+            .min(self.chunk.data.len().try_into().unwrap_or(u64::MAX));
+
+        if self.exhausted || self.offset_from_chunk_start >= effective_free_space_offset {
+            return None;
+        }
+
+        let start = self.offset_from_chunk_start as usize;
+        if start >= self.chunk.data.len() {
+            // Avoid panicking on an out-of-bounds slice if the header is corrupted.
+            self.exhausted = true;
+            return None;
+        }
+
+        let remaining = &self.chunk.data[start..];
+        if remaining.len() < 4 {
+            // Not enough bytes for the record header magic, treat as end-of-chunk.
+            self.exhausted = true;
             return None;
         }
 
-        let mut cursor = Cursor::new(&self.chunk.data[self.offset_from_chunk_start as usize..]);
+        let mut cursor = Cursor::new(remaining);
 
         let record_header = match EvtxRecordHeader::from_reader(&mut cursor) {
             Ok(record_header) => record_header,
+            Err(DeserializationError::InvalidEvtxRecordHeaderMagic { magic }) => {
+                // Some producers write incorrect `free_space_offset` / `last_event_record_id`.
+                // In such cases we may attempt to parse the chunk slack area, which is typically
+                // zero-padded. Treat an all-zero "magic" as a clean end-of-chunk instead of
+                // emitting an error (see issue #197).
+                if magic == [0, 0, 0, 0] {
+                    self.exhausted = true;
+                    return None;
+                }
+
+                self.exhausted = true;
+                return Some(Err(EvtxError::DeserializationError(
+                    DeserializationError::InvalidEvtxRecordHeaderMagic { magic },
+                )));
+            }
             Err(err) => {
                 // We currently do not try to recover after an invalid record.
                 self.exhausted = true;
-
                 return Some(Err(EvtxError::DeserializationError(err)));
             }
         };
@@ -449,4 +480,41 @@ mod tests {
         let chunk = EvtxChunkData::new(chunk_data, false).unwrap();
         assert!(chunk.validate_checksum());
     }
+
+    #[test]
+    fn test_iter_ends_cleanly_when_chunk_header_offsets_are_too_large() {
+        ensure_env_logger_initialized();
+
+        let evtx_file = include_bytes!("../samples/security.evtx");
+        let chunk_data =
+            evtx_file[EVTX_FILE_HEADER_SIZE..EVTX_FILE_HEADER_SIZE + EVTX_CHUNK_SIZE].to_vec();
+
+        // Parse once to get a baseline count.
+        let mut baseline = EvtxChunkData::new(chunk_data.clone(), false).unwrap();
+        let settings = Arc::new(ParserSettings::new());
+        let baseline_count = {
+            let mut chunk = baseline.parse(Arc::clone(&settings)).unwrap();
+            chunk
+                .iter()
+                .try_fold(0usize, |acc, record| record.map(|_| acc + 1))
+                .unwrap()
+        };
+
+        // Now simulate a broken chunk header like in issue #197: `last_event_record_id` and
+        // `free_space_offset` are larger than the actual number of records/data.
+        let mut corrupted = EvtxChunkData::new(chunk_data, false).unwrap();
+        corrupted.header.last_event_record_id =
+            corrupted.header.last_event_record_id.saturating_add(100);
+        corrupted.header.free_space_offset = EVTX_CHUNK_SIZE as u32;
+
+        let corrupted_count = {
+            let mut chunk = corrupted.parse(settings).unwrap();
+            chunk
+                .iter()
+                .try_fold(0usize, |acc, record| record.map(|_| acc + 1))
+                .unwrap()
+        };
+
+        assert_eq!(corrupted_count, baseline_count);
+    }
 }
diff --git a/tests/test_full_samples.rs b/tests/test_full_samples.rs
index fe7f17a0..d2be7bc1 100644
--- a/tests/test_full_samples.rs
+++ b/tests/test_full_samples.rs
@@ -128,19 +128,19 @@ fn test_dirty_sample_with_a_chunk_past_zeros() {
 
 #[test]
 fn test_dirty_sample_with_a_bad_chunk_magic() {
-    test_full_sample(sample_with_a_bad_chunk_magic(), 270, 5)
+    test_full_sample(sample_with_a_bad_chunk_magic(), 270, 2)
 }
 
 #[test]
 fn test_dirty_sample_binxml_with_incomplete_token() {
     // Contains an unparsable record
-    test_full_sample(sample_binxml_with_incomplete_sid(), 6, 1)
+    test_full_sample(sample_binxml_with_incomplete_sid(), 6, 0)
 }
 
 #[test]
 fn test_dirty_sample_binxml_with_incomplete_template() {
     // Contains an unparsable record
-    test_full_sample(sample_binxml_with_incomplete_template(), 17, 1)
+    test_full_sample(sample_binxml_with_incomplete_template(), 17, 0)
 }
 
 #[test]
diff --git a/tests/test_full_samples_streaming.rs b/tests/test_full_samples_streaming.rs
index 4bd0e57e..52e54ea2 100644
--- a/tests/test_full_samples_streaming.rs
+++ b/tests/test_full_samples_streaming.rs
@@ -151,19 +151,19 @@ fn test_dirty_sample_with_a_chunk_past_zeros_streaming() {
 
 #[test]
 fn test_dirty_sample_with_a_bad_chunk_magic_streaming() {
-    test_full_sample_streaming(sample_with_a_bad_chunk_magic(), 270, 5)
+    test_full_sample_streaming(sample_with_a_bad_chunk_magic(), 270, 2)
 }
 
 #[test]
 fn test_dirty_sample_binxml_with_incomplete_token_streaming() {
     // Contains an unparsable record
-    test_full_sample_streaming(sample_binxml_with_incomplete_sid(), 6, 1)
+    test_full_sample_streaming(sample_binxml_with_incomplete_sid(), 6, 0)
 }
 
 #[test]
 fn test_dirty_sample_binxml_with_incomplete_template_streaming() {
     // Contains an unparsable record
-    test_full_sample_streaming(sample_binxml_with_incomplete_template(), 17, 1)
+    test_full_sample_streaming(sample_binxml_with_incomplete_template(), 17, 0)
 }
 
 #[test]