Skip to content

Commit 1c9450e

Browse files
committed
breakup cache from evtx_dump
1 parent c2afd19 commit 1c9450e

6 files changed

Lines changed: 559 additions & 296 deletions

File tree

src/bin/evtx_dump.rs

Lines changed: 5 additions & 253 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,6 @@ mod dump_template_instances;
2424
#[path = "evtx_dump/extract_wevt_templates.rs"]
2525
mod extract_wevt_templates;
2626

27-
#[cfg(feature = "wevt_templates")]
28-
#[path = "evtx_dump/wevt_cache.rs"]
29-
mod wevt_cache;
30-
3127
#[cfg(all(not(target_env = "msvc"), feature = "fast-alloc"))]
3228
use tikv_jemallocator::Jemalloc;
3329

@@ -65,7 +61,7 @@ struct EvtxDump {
6561
/// When set, only the specified events (offseted reltaive to file) will be outputted.
6662
ranges: Option<Ranges>,
6763
#[cfg(feature = "wevt_templates")]
68-
wevt_cache: Option<std::sync::Arc<wevt_cache::WevtCache>>,
64+
wevt_cache: Option<std::sync::Arc<evtx::wevt_templates::WevtCache>>,
6965
}
7066

7167
impl EvtxDump {
@@ -182,7 +178,7 @@ impl EvtxDump {
182178
#[cfg(feature = "wevt_templates")]
183179
let wevt_cache = matches
184180
.get_one::<String>("wevt-cache-index")
185-
.map(|p| wevt_cache::WevtCache::load(p).map(std::sync::Arc::new))
181+
.map(|p| evtx::wevt_templates::WevtCache::load(p).map(std::sync::Arc::new))
186182
.transpose()?;
187183

188184
Ok(EvtxDump {
@@ -221,7 +217,7 @@ impl EvtxDump {
221217
if let Some(cache) = self.wevt_cache.clone() {
222218
let iter = parser.serialized_records(move |record_res| {
223219
record_res
224-
.and_then(|record| render_record_xml_with_wevt_cache(record, &cache))
220+
.and_then(|record| record.into_xml_with_wevt_cache(cache.as_ref()))
225221
});
226222
for record in iter {
227223
self.dump_record(record)?
@@ -242,10 +238,9 @@ impl EvtxDump {
242238
JsonParserKind::Streaming => {
243239
#[cfg(feature = "wevt_templates")]
244240
if let Some(cache) = self.wevt_cache.clone() {
245-
let indent = self.parser_settings.should_indent();
246241
let iter = parser.serialized_records(move |record_res| {
247242
record_res.and_then(|record| {
248-
render_record_json_with_wevt_cache(record, &cache, indent, true)
243+
record.into_json_stream_with_wevt_cache(cache.as_ref())
249244
})
250245
});
251246
for record in iter {
@@ -265,12 +260,9 @@ impl EvtxDump {
265260
JsonParserKind::Legacy => {
266261
#[cfg(feature = "wevt_templates")]
267262
if let Some(cache) = self.wevt_cache.clone() {
268-
let indent = self.parser_settings.should_indent();
269263
let iter = parser.serialized_records(move |record_res| {
270264
record_res.and_then(|record| {
271-
render_record_json_with_wevt_cache(
272-
record, &cache, indent, false,
273-
)
265+
record.into_json_with_wevt_cache(cache.as_ref())
274266
})
275267
});
276268
for record in iter {
@@ -645,243 +637,3 @@ fn main() -> Result<()> {
645637

646638
Ok(())
647639
}
648-
649-
#[cfg(feature = "wevt_templates")]
650-
fn extract_template_guid_from_error(err: &evtx::err::EvtxError) -> Option<String> {
651-
use evtx::err::{DeserializationError, EvtxError};
652-
match err {
653-
EvtxError::FailedToParseRecord { source, .. } => extract_template_guid_from_error(source),
654-
EvtxError::DeserializationError(DeserializationError::FailedToDeserializeTemplate {
655-
template_id,
656-
..
657-
}) => Some(template_id.to_string()),
658-
_ => None,
659-
}
660-
}
661-
662-
#[cfg(feature = "wevt_templates")]
663-
fn binxml_value_to_string_lossy(value: &evtx::binxml::value_variant::BinXmlValue<'_>) -> String {
664-
use evtx::binxml::value_variant::BinXmlValue;
665-
match value {
666-
BinXmlValue::EvtHandle | BinXmlValue::BinXmlType(_) | BinXmlValue::EvtXml => String::new(),
667-
_ => value.as_cow_str().into_owned(),
668-
}
669-
}
670-
671-
#[cfg(feature = "wevt_templates")]
672-
fn substitutions_from_template_instance<'a>(
673-
tpl: &evtx::model::deserialized::BinXmlTemplateRef<'a>,
674-
) -> Vec<String> {
675-
use evtx::model::deserialized::BinXMLDeserializedTokens;
676-
tpl.substitution_array
677-
.iter()
678-
.map(|t| match t {
679-
BinXMLDeserializedTokens::Value(v) => binxml_value_to_string_lossy(v),
680-
_ => String::new(),
681-
})
682-
.collect()
683-
}
684-
685-
#[cfg(feature = "wevt_templates")]
686-
fn resolve_template_guid_from_record<'a>(
687-
record: &evtx::EvtxRecord<'a>,
688-
tpl: &evtx::model::deserialized::BinXmlTemplateRef<'a>,
689-
) -> Option<String> {
690-
if let Some(g) = tpl.template_guid.as_ref() {
691-
return Some(g.to_string());
692-
}
693-
694-
record
695-
.chunk
696-
.template_table
697-
.get_template(tpl.template_def_offset)
698-
.map(|def| def.header.guid.to_string())
699-
}
700-
701-
#[cfg(feature = "wevt_templates")]
702-
struct TemplateInstanceInfo {
703-
/// Normalized GUID (lowercased, braces stripped) if we can resolve it.
704-
guid: Option<String>,
705-
substitutions: Vec<String>,
706-
}
707-
708-
#[cfg(feature = "wevt_templates")]
709-
fn collect_template_instances<'a>(record: &evtx::EvtxRecord<'a>) -> Vec<TemplateInstanceInfo> {
710-
use evtx::model::deserialized::BinXMLDeserializedTokens;
711-
let mut out = Vec::new();
712-
713-
for t in &record.tokens {
714-
let BinXMLDeserializedTokens::TemplateInstance(tpl) = t else {
715-
continue;
716-
};
717-
718-
let guid =
719-
resolve_template_guid_from_record(record, tpl).map(|g| wevt_cache::normalize_guid(&g));
720-
let substitutions = substitutions_from_template_instance(tpl);
721-
722-
out.push(TemplateInstanceInfo {
723-
guid,
724-
substitutions,
725-
});
726-
}
727-
728-
out
729-
}
730-
731-
#[cfg(feature = "wevt_templates")]
732-
fn select_template_instance_for_guid<'a>(
733-
instances: &'a [TemplateInstanceInfo],
734-
guid: &str,
735-
) -> Option<&'a TemplateInstanceInfo> {
736-
let want = wevt_cache::normalize_guid(guid);
737-
738-
match instances.len() {
739-
0 => None,
740-
1 => Some(&instances[0]),
741-
_ => {
742-
let matches: Vec<&TemplateInstanceInfo> = instances
743-
.iter()
744-
.filter(|i| i.guid.as_ref().is_some_and(|g| g == &want))
745-
.collect();
746-
747-
if matches.len() == 1 {
748-
Some(matches[0])
749-
} else {
750-
None
751-
}
752-
}
753-
}
754-
}
755-
756-
#[cfg(feature = "wevt_templates")]
757-
/// Render a record as XML, using the EVTX’s embedded templates first.
758-
///
759-
/// If rendering fails *specifically because a template definition cannot be deserialized* and the
760-
/// error contains a concrete template GUID, we will deterministically attempt to render the record
761-
/// using the provided offline WEVT cache:
762-
/// - We only use the cache when the error is `FailedToDeserializeTemplate { template_id: GUID }`.
763-
/// - We only proceed when we can unambiguously select the matching `TemplateInstance` substitution
764-
/// array for that GUID (single instance, or a unique GUID match among multiple instances).
765-
/// - Otherwise we return the original error unchanged.
766-
fn render_record_xml_with_wevt_cache<'a>(
767-
record: evtx::EvtxRecord<'a>,
768-
cache: &std::sync::Arc<wevt_cache::WevtCache>,
769-
) -> evtx::err::Result<SerializedEvtxRecord<String>> {
770-
let record_id = record.event_record_id;
771-
let timestamp = record.timestamp;
772-
let instances = collect_template_instances(&record);
773-
774-
match record.into_xml() {
775-
Ok(r) => Ok(r),
776-
Err(e) => {
777-
// Deterministic rule: only attempt cache rendering when the failure explicitly
778-
// indicates a template GUID (i.e. template deserialization failure).
779-
let Some(guid) = extract_template_guid_from_error(&e) else {
780-
return Err(e);
781-
};
782-
783-
let Some(tpl) = select_template_instance_for_guid(&instances, &guid) else {
784-
return Err(e);
785-
};
786-
let subs = &tpl.substitutions;
787-
788-
match cache.render_by_template_guid(&guid, subs) {
789-
Ok(xml_fragment) => {
790-
log::info!(
791-
"wevt-cache used: record_id={} template_guid={}",
792-
record_id,
793-
guid
794-
);
795-
Ok(SerializedEvtxRecord {
796-
event_record_id: record_id,
797-
timestamp,
798-
data: xml_fragment,
799-
})
800-
}
801-
Err(render_err) => {
802-
eprintln!(
803-
"wevt-cache render failed for record {} template_guid={}: {render_err}",
804-
record_id, guid
805-
);
806-
Err(e)
807-
}
808-
}
809-
}
810-
}
811-
}
812-
813-
#[cfg(feature = "wevt_templates")]
814-
/// Render a record as JSON, using the EVTX’s embedded templates first.
815-
///
816-
/// This follows the same deterministic WEVT-cache rule as `render_record_xml_with_wevt_cache`:
817-
/// only on an explicit template-GUID deserialization failure and only with an unambiguous
818-
/// `TemplateInstance` substitution array.
819-
///
820-
/// When the cache is used, the JSON output is a synthetic object that contains the rendered XML
821-
/// fragment under `xml` (and includes metadata fields like `template_guid` and `record_id`).
822-
fn render_record_json_with_wevt_cache<'a>(
823-
record: evtx::EvtxRecord<'a>,
824-
cache: &std::sync::Arc<wevt_cache::WevtCache>,
825-
indent: bool,
826-
use_streaming_json: bool,
827-
) -> evtx::err::Result<SerializedEvtxRecord<String>> {
828-
let record_id = record.event_record_id;
829-
let timestamp = record.timestamp;
830-
let instances = collect_template_instances(&record);
831-
832-
let normal = if use_streaming_json {
833-
record.into_json_stream()
834-
} else {
835-
record.into_json()
836-
};
837-
838-
match normal {
839-
Ok(r) => Ok(r),
840-
Err(e) => {
841-
let Some(guid) = extract_template_guid_from_error(&e) else {
842-
return Err(e);
843-
};
844-
let Some(tpl) = select_template_instance_for_guid(&instances, &guid) else {
845-
return Err(e);
846-
};
847-
let subs = &tpl.substitutions;
848-
849-
match cache.render_by_template_guid(&guid, subs) {
850-
Ok(xml_fragment) => {
851-
log::info!(
852-
"wevt-cache used: record_id={} template_guid={}",
853-
record_id,
854-
guid
855-
);
856-
let v = serde_json::json!({
857-
"_wevt_cache_used": true,
858-
"template_guid": guid,
859-
"record_id": record_id,
860-
"timestamp": timestamp.to_rfc3339(),
861-
"xml": xml_fragment,
862-
});
863-
864-
let data = if indent {
865-
serde_json::to_string_pretty(&v)
866-
.map_err(evtx::err::SerializationError::from)?
867-
} else {
868-
serde_json::to_string(&v).map_err(evtx::err::SerializationError::from)?
869-
};
870-
871-
Ok(SerializedEvtxRecord {
872-
event_record_id: record_id,
873-
timestamp,
874-
data,
875-
})
876-
}
877-
Err(render_err) => {
878-
eprintln!(
879-
"wevt-cache render failed for record {} template_guid={}: {render_err}",
880-
record_id, guid
881-
);
882-
Err(e)
883-
}
884-
}
885-
}
886-
}
887-
}

src/bin/evtx_dump/apply_wevt_cache.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ pub fn run(matches: &ArgMatches) -> Result<()> {
109109
#[cfg(feature = "wevt_templates")]
110110
mod imp {
111111
use super::*;
112-
use crate::wevt_cache;
113112
use evtx::EvtxParser;
114113
use evtx::ParserSettings;
115114
use evtx::binxml::value_variant::BinXmlValue;
@@ -134,7 +133,7 @@ mod imp {
134133
}
135134

136135
fn normalize_guid(s: &str) -> String {
137-
wevt_cache::normalize_guid(s)
136+
evtx::wevt_templates::normalize_guid(s)
138137
}
139138

140139
fn parse_resource_id(v: &JsonValue) -> Option<String> {

src/binxml/tokens.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,74 @@ pub(crate) fn read_template_definition_cursor<'a>(
180180
Ok(template)
181181
}
182182

183+
/// Strictly read a `TemplateDefinitionHeader` at a known offset in an EVTX chunk buffer.
184+
///
185+
/// This does **not** scan for signatures or guess offsets. It only succeeds when the bytes at the
186+
/// provided `offset` look like a valid template definition header followed by a BinXML fragment
187+
/// header (`StartOfStream` + version tuple). This is used by higher-level "offline WEVT cache"
188+
/// logic to match a record's `TemplateInstance` to a template GUID without fully deserializing the
189+
/// template.
190+
pub(crate) fn try_read_template_definition_header_at(
191+
chunk_data: &[u8],
192+
offset: u32,
193+
) -> Result<BinXmlTemplateDefinitionHeader> {
194+
let off = offset as usize;
195+
let mut cursor = ByteCursor::with_pos(chunk_data, off)?;
196+
197+
// Read the header using the canonical parser.
198+
let header = read_template_definition_header_cursor(&mut cursor)?;
199+
200+
// Validate next_template_offset is either:
201+
// - 0 (end of list)
202+
// - equal to itself (observed termination sentinel)
203+
// - a forward in-chunk offset
204+
if header.next_template_offset != 0 && header.next_template_offset != offset {
205+
if header.next_template_offset <= offset {
206+
return Err(DeserializationError::Io(std::io::Error::new(
207+
std::io::ErrorKind::InvalidData,
208+
"template next_template_offset is not forward",
209+
)));
210+
}
211+
if (header.next_template_offset as usize) >= chunk_data.len() {
212+
return Err(DeserializationError::Io(std::io::Error::new(
213+
std::io::ErrorKind::InvalidData,
214+
"template next_template_offset out of bounds",
215+
)));
216+
}
217+
}
218+
219+
// We should now be positioned immediately after the template header.
220+
let data_size_usize = header.data_size as usize;
221+
if data_size_usize < 4 {
222+
return Err(DeserializationError::Io(std::io::Error::new(
223+
std::io::ErrorKind::InvalidData,
224+
"template data_size too small",
225+
)));
226+
}
227+
228+
// Ensure the full template fragment range is in-bounds (strict; we do not accept a header that
229+
// points past the chunk end).
230+
let data_start = cursor.pos();
231+
let data_end = data_start.saturating_add(data_size_usize);
232+
if data_end > chunk_data.len() {
233+
return Err(DeserializationError::Io(std::io::Error::new(
234+
std::io::ErrorKind::InvalidData,
235+
"template data_size out of bounds",
236+
)));
237+
}
238+
239+
// Verify BinXML fragment header: StartOfStream (0x0f) + major/minor/flags.
240+
let frag = cursor.take_bytes(4, "template fragment header")?;
241+
if frag[0] != 0x0f || frag[1] != 0x01 || frag[2] != 0x01 {
242+
return Err(DeserializationError::Io(std::io::Error::new(
243+
std::io::ErrorKind::InvalidData,
244+
"template does not start with BinXML fragment header (StartOfStream 1.1)",
245+
)));
246+
}
247+
248+
Ok(header)
249+
}
250+
183251
pub(crate) fn read_entity_ref_cursor(
184252
cursor: &mut ByteCursor<'_>,
185253
name_encoding: BinXmlNameEncoding,

0 commit comments

Comments
 (0)