diff --git a/.gitignore b/.gitignore
index 1e93fd2a..fa7afb2a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,8 @@ evtx-wasm/evtx-viewer/public/pkg
 profile/*
 binaries/*
 benchmarks/*
+!benchmarks/omer-pc_ablation_matrix_t1_20251227.json
+scripts/FlameGraph
 .PRE_PATH
 
 # Local-only fixtures (e.g. PE/CRIM blobs for WEVT_TEMPLATE work). Keep out of git.
@@ -24,3 +26,7 @@ samples_local/
 
 # Local vendor checkouts (for format research / patching upstream).
 external/
+profile_results/*
+
+# Local scratch space (worktrees, ablation builds, etc).
+tmp/
diff --git a/Cargo.lock b/Cargo.lock
index 5275d7bc..77149371 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -549,6 +549,7 @@ dependencies = [
  "anyhow",
  "assert_cmd",
  "bitflags 2.8.0",
+ "bumpalo",
  "byteorder",
  "chrono",
  "clap",
@@ -562,6 +563,7 @@ dependencies = [
  "hashbrown",
  "indoc",
  "insta",
+ "itoa",
  "log",
  "predicates",
  "pretty_assertions",
@@ -569,6 +571,7 @@ dependencies = [
  "rayon",
  "rexpect",
  "rpmalloc",
+ "ryu",
  "serde",
  "serde_json",
  "simplelog",
diff --git a/Cargo.toml b/Cargo.toml
index 821f16f5..122ba4c7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -40,6 +40,9 @@ tempfile = { version = "3.3.0", optional = true }
 
 serde = "1"
 serde_json = { version = "1", features = ["preserve_order"]}
+bumpalo = { version = "3", features = ["collections"] }
+itoa = "1"
+ryu = "1"
 
 [target.'cfg(not(windows))'.dependencies]
 # jemalloc is significantly more peformant than the system allocator.
@@ -58,6 +61,8 @@ multithreading = ["rayon"]
 # Enable WEVT_TEMPLATE extraction helpers (used by `evtx_dump extract-wevt-templates`).
 wevt_templates = ["glob", "goblin"]
 
+# Perf ablation toggles (used for local benchmarking; no effect unless enabled).
+
 [dev-dependencies]
 insta = { version = "1", features = ["json"] }
 pretty_assertions = "1.2.1"
diff --git a/PERF.md b/PERF.md
new file mode 100644
index 00000000..4fdfe455
--- /dev/null
+++ b/PERF.md
@@ -0,0 +1,414 @@
+# Performance theses (living document)
+
+This file is a running log of **hypotheses (“theses”)** and the **measurement protocol** used to validate them one by one.
+It is modeled after `~/Workspace/mft/PERF.md` and is intended to be **agent-executable**: another agent should be able to
+reproduce the same artifacts and conclusions.
+
+Context / north star:
+- We have a Zig implementation (`~/Workspace/zig-evtx`) that is materially faster.
+- Our working hypothesis is that a large part of the gap is **allocator churn** in Rust (many small alloc/free + clone/memmove),
+  while Zig leans on arena-style allocation and lower-copy dataflow.
+
+Principles:
+- **One change per experiment** (or one tightly-coupled set), with before/after measurements.
+- Prefer **end-to-end CLI throughput** on a fixed input (`samples/security_big_sample.evtx`) as the primary KPI.
+- Keep a **saved profile** for every checkpoint so we can explain wins/regressions.
+- When results are noisy, prefer **median** and **min** over mean, and record variance.
+
+---
+
+## Canonical workloads (copy/paste)
+
+Build (always):
+
+```bash
+cd /Users/omerba/Workspace/evtx
+cargo build --release --features fast-alloc --locked --offline --bin evtx_dump
+```
+
+W1 (JSONL, end-to-end, single-thread, write suppressed):
+
+```bash
+./target/release/evtx_dump -t 1 -o jsonl samples/security_big_sample.evtx > /dev/null
+```
+
+W2 (optional, multi-thread throughput; **not** used for baseline allocator-churn tracking):
+
+```bash
+./target/release/evtx_dump -t 8 -o jsonl samples/security_big_sample.evtx > /dev/null
+```
+
+Notes:
+- Redirecting output is critical; otherwise you benchmark terminal I/O and buffering, not parsing/serialization.
+- **All reference baselines in this repo use `-t 1`**. It best highlights allocator churn and per-chunk work on a single core.
+
+---
+
+## Quiet-machine guard (recommended)
+
+Benchmarks are extremely sensitive to background load (Spotlight indexing, builds, browser tabs, etc).
+To avoid “busy machine” noise, use `scripts/ensure_quiet.sh`:
+
+```bash
+cd /Users/omerba/Workspace/evtx
+./scripts/ensure_quiet.sh
+```
+
+For hyperfine runs, prefer using it as a prepare hook (prepare time is not included in timings):
+
+```bash
+hyperfine --prepare ./scripts/ensure_quiet.sh ...
+```
+
+For the Rust-vs-Zig harness, enable it via:
+
+```bash
+QUIET_CHECK=1 ./profile_comparison.sh --bench-only
+```
+
+Tune thresholds via env vars (see `scripts/ensure_quiet.sh`):
+- `QUIET_IDLE_MIN` (default `90`)
+- `QUIET_LOAD1_MAX` (default `2.0`)
+- `QUIET_MAX_WAIT_SEC` (default `60`)
+
+---
+
+## Baseline harness (Rust vs Zig)
+
+Use `profile_comparison.sh` for quick Rust-vs-Zig baselines and to print top leaf frames (helpful to validate allocator-churn hypotheses):
+
+```bash
+cd /Users/omerba/Workspace/evtx
+./profile_comparison.sh --bench-only
+./profile_comparison.sh --top-leaves
+```
+
+Environment variables (see script header for full list):
+- `SAMPLE_FILE` (defaults to `samples/security_big_sample.evtx`)
+- `RUNS` (hyperfine runs)
+- `OUTPUT_DIR` (defaults to `./profile_results`, ignored by git)
+- `ZIG_BINARY` (defaults to `~/Workspace/zig-evtx/zig-out/bin/evtx_dump_zig`)
+
+---
+
+## Baseline environment (2025-12-27)
+
+- **OS**: Darwin 25.2.0 (arm64)
+- **HW**: Apple M3 Pro, 11 cores, 36 GB RAM
+- **Toolchain**: rustc 1.92.0 (LLVM 21.1.3), cargo 1.92.0
+- **Tools**: hyperfine 1.20.0, samply 0.13.1, zig 0.15.2
+
+---
+
+## Baseline environment (omer-pc, 2025-12-27)
+
+- **OS**: Arch Linux (kernel 6.17.9, x86_64)
+- **HW**: AMD Ryzen 9 3900X (12C/24T), 62 GiB RAM
+- **Toolchain**: rustc 1.92.0 (LLVM 21.1.6), cargo 1.92.0
+- **Tools**: hyperfine 1.20.0 (no Zig / no samply on this box)
+
+---
+
+## Baseline numbers (omer-pc, 2025-12-27)
+
+Measured on `omer-pc` via SSH. We sync two trees (`origin/master` snapshot and this branch) and compare end-to-end JSONL throughput.
+We gate runs with `scripts/ensure_quiet.sh` but loosened load-average tolerance because the box maintains a steady load (~4) while
+being effectively idle (CPU idle ~99%).
+
+W1 (JSONL, `-t 1`, output suppressed) — **reference baseline**:
+- **master**: **median 883.6 ms**, mean 891.5 ms ± 28.7 ms (range 873.6–993.2 ms)
+- **branch**: **median 599.6 ms**, mean 601.1 ms ± 6.1 ms (range 589.7–611.9 ms)
+- **speedup**: ~**1.47×** (≈ **32%** lower wall time)
+
+Repro commands (on `omer-pc`):
+
+```bash
+BASE=/tmp/evtx-bench
+SAMPLE=$BASE/master/samples/security_big_sample.evtx
+
+# Wait for a "quiet enough" machine before each benchmark batch.
+QUIET_IDLE_MIN=95 QUIET_LOAD1_MAX=8 $BASE/branch/scripts/ensure_quiet.sh
+
+hyperfine --warmup 3 --runs 20 \
+  "$BASE/master/target/release/evtx_dump -t 1 -o jsonl $SAMPLE > /dev/null" \
+  "$BASE/branch/target/release/evtx_dump -t 1 -o jsonl $SAMPLE > /dev/null"
+```
+
+Raw JSON capture (temporary on that run): `/tmp/evtx-bench.11jAUq/hyperfine_master_vs_branch_t1.json`.
+
+---
+
+## Agent playbook (reproducible workflow)
+
+### Naming & artifacts (do this consistently)
+
+Pick the next hypothesis ID: `H{N}` (monotonic, don’t reuse IDs).
+
+- **Branch**: `perf/h{N}-{short-slug}` (example: `perf/h7-no-clone-template-expansion`)
+- **Saved binaries** (so benchmarks are stable and diffable):
+  - `target/release/evtx_dump.h{N}_before`
+  - `target/release/evtx_dump.h{N}_after`
+- **Hyperfine JSON**:
+  - `target/perf/h{N}-before-vs-after.hyperfine.json`
+- **Samply profiles** (merge by running many iterations):
+  - `target/perf/samply/h{N}_before.profile.json.gz`
+  - `target/perf/samply/h{N}_after.profile.json.gz`
+
+### Step-by-step: run an experiment end-to-end
+
+#### 0) Start a new thesis
+
+```bash
+cd /Users/omerba/Workspace/evtx
+git checkout -b perf/h{N}-{short-slug}
+```
+
+Add an entry under “Theses / hypotheses backlog” with:
+- **Claim**
+- **Evidence** (what profile frames point at, especially allocator churn: malloc/free/memmove)
+- **Change** (minimal code change to test)
+- **Success metric** (e.g. W1 improves ≥ 5% median)
+- **Guardrails** (correctness constraints; “don’t regress too much”)
+
+#### 1) Build + snapshot the **before** binary
+
+```bash
+cd /Users/omerba/Workspace/evtx
+cargo build --release --features fast-alloc --locked --offline --bin evtx_dump
+cp -f target/release/evtx_dump target/release/evtx_dump.h{N}_before
+```
+
+#### 2) Record a stable **before** profile (Samply)
+
+We merge many iterations so leaf frames are stable.
+
+```bash
+cd /Users/omerba/Workspace/evtx
+mkdir -p target/perf/samply
+samply record --save-only --unstable-presymbolicate --reuse-threads --main-thread-only \
+  -o target/perf/samply/h{N}_before.profile.json.gz \
+  --iteration-count 200 -- \
+  ./target/release/evtx_dump.h{N}_before -t 1 -o jsonl samples/security_big_sample.evtx > /dev/null
+```
+
+To view (serve locally and open the printed Firefox Profiler URL):
+
+```bash
+cd /Users/omerba/Workspace/evtx
+samply load --no-open -P 4033 target/perf/samply/h{N}_before.profile.json.gz
+```
+
+What to record from the UI:
+- **Invert call stack** for top **leaf/self** frames (watch for malloc/free/memmove, hashing, formatting).
+- Normal Call Tree for inclusive buckets (template expansion, JSON emission, UTF-16 decode).
+
+#### 3) Implement the change (keep it tight)
+
+Primary focus areas (given allocator-churn hypothesis):
+- Reduce clone/memmove in template expansion / token streaming.
+- Avoid building intermediate `serde_json::Value` on hot paths (stream instead).
+- Reduce per-record temporary allocations (strings/vectors/buffers), ideally by reusing buffers or using arenas.
+
+If you find yourself changing 5+ unrelated things, split into multiple theses.
+
+#### 4) Build + snapshot the **after** binary
+
+```bash
+cd /Users/omerba/Workspace/evtx
+cargo build --release --features fast-alloc --locked --offline --bin evtx_dump
+cp -f target/release/evtx_dump target/release/evtx_dump.h{N}_after
+```
+
+#### 5) Benchmark **before vs after in the same hyperfine command**
+
+Always run both saved binaries in a single invocation and export JSON.
+
+```bash
+cd /Users/omerba/Workspace/evtx
+mkdir -p target/perf
+hyperfine --warmup 5 --runs 40 \
+  --export-json target/perf/h{N}-before-vs-after.hyperfine.json \
+  './target/release/evtx_dump.h{N}_before -t 1 -o jsonl samples/security_big_sample.evtx > /dev/null' \
+  './target/release/evtx_dump.h{N}_after  -t 1 -o jsonl samples/security_big_sample.evtx > /dev/null'
+```
+
+If variance is high, amortize noise by looping inside each hyperfine run (keep the before/after pair in one command):
+
+```bash
+cd /Users/omerba/Workspace/evtx
+hyperfine --warmup 2 --runs 15 \
+  --export-json target/perf/h{N}-before-vs-after.hyperfine.json \
+  --command-name 'before (20x)' "bash -lc 'for i in {1..20}; do ./target/release/evtx_dump.h{N}_before -t 1 -o jsonl samples/security_big_sample.evtx > /dev/null; done'" \
+  --command-name 'after  (20x)' "bash -lc 'for i in {1..20}; do ./target/release/evtx_dump.h{N}_after  -t 1 -o jsonl samples/security_big_sample.evtx > /dev/null; done'"
+```
+
+#### 6) Record an **after** profile (Samply)
+
+```bash
+cd /Users/omerba/Workspace/evtx
+samply record --save-only --unstable-presymbolicate --reuse-threads --main-thread-only \
+  -o target/perf/samply/h{N}_after.profile.json.gz \
+  --iteration-count 200 -- \
+  ./target/release/evtx_dump.h{N}_after -t 1 -o jsonl samples/security_big_sample.evtx > /dev/null
+```
+
+#### 7) Correctness checks (pick strictness to match the thesis)
+
+Always:
+
+```bash
+cd /Users/omerba/Workspace/evtx
+cargo test --features fast-alloc --locked --offline
+```
+
+Semantic JSONL equality on a bounded range (preferred; formatting differences allowed):
+
+```bash
+cd /Users/omerba/Workspace/evtx
+rm -f /tmp/evtx_before.jsonl /tmp/evtx_after.jsonl
+./target/release/evtx_dump.h{N}_before -t 1 -o jsonl samples/security_big_sample.evtx > /tmp/evtx_before.jsonl
+./target/release/evtx_dump.h{N}_after  -t 1 -o jsonl samples/security_big_sample.evtx > /tmp/evtx_after.jsonl
+python3 - <<'PY'
+import json
+b = [json.loads(l) for l in open("/tmp/evtx_before.jsonl")]
+a = [json.loads(l) for l in open("/tmp/evtx_after.jsonl")]
+assert b == a, "semantic JSONL mismatch"
+print("OK: semantic JSONL identical")
+PY
+```
+
+#### 8) Update this file (`PERF.md`) with a write-up
+
+Add a section under “Completed optimizations” (or “Rejected”) with:
+- **What changed**
+- **Benchmarks** (paste exact hyperfine command)
+- **Extracted medians** (from exported JSON)
+- **Speedup** (ratio and %)
+- **Profile delta** (top leaf before/after; call out allocator churn shifts explicitly)
+- **Correctness check**
+- **Artifacts**: profile paths + hyperfine JSON path
+
+#### 9) PR-quality finish
+
+```bash
+cd /Users/omerba/Workspace/evtx
+cargo fmt
+cargo clippy --all-targets --features fast-alloc --locked --offline
+```
+
+Commit message should match the thesis and observable change:
+
+```bash
+git commit -am "perf: H{N} {short description}"
+```
+
+---
+
+## Attribution study: per-optimization deltas (omer-pc, 2025-12-27)
+
+We measured how much each optimization contributes by doing a “one change reverted at a time” run:
+- **Baseline**: this branch (`--features fast-alloc`), `-t 1`, JSONL, output suppressed.
+- **Variant**: same, but revert exactly one optimization.
+
+Artifact (exported hyperfine JSON, includes exact commands + full run distributions):
+- `benchmarks/omer-pc_ablation_matrix_t1_20251227.json`
+
+Results (median wall time deltas vs baseline; lower is better):
+
+| Variant | Median (ms) | Δ vs baseline |
+|---|---:|---:|
+| baseline | 605.5 | (base) |
+| revert: pre-expand templates | 750.1 | +23.88% |
+| revert: chrono datetime formatting | 625.6 | +3.31% |
+| revert: serde_json values | 615.6 | +1.66% |
+| revert: serde_json strings | 611.8 | +1.03% |
+| revert: UTF-16 ASCII fast-path | 600.6 | -0.81% |
+
+Notes:
+- This run was quiet-gated (`scripts/ensure_quiet.sh`, `QUIET_IDLE_MIN=95 QUIET_LOAD1_MAX=8`).
+- The feature toggles used to build these variants were temporary and have since been removed; the JSON is the stable record.
+
+---
+
+## Theses / hypotheses backlog
+
+Template (copy/paste):
+
+### H{N} — {short title}
+- **Claim**:
+- **Evidence**:
+- **Change**:
+- **Success metric**:
+- **Guardrails**:
+
+### H1 — Kill remaining allocator churn in streaming JSON output (keys + buffered values)
+- **Claim**: We can get a meaningful additional W1 speedup by eliminating the remaining hot-path heap churn in `JsonStreamOutput`
+  (key allocation + `serde_json::Value` buffering), which currently shows up as `_rjem_malloc` / `_rjem_sdallocx` + `_platform_memmove`.
+- **Evidence**:
+  - **Samply (macOS, W1 `-t 1`, 120 iterations, output→`/dev/null`)** shows allocator + memmove as major leaf cost:
+    - `_platform_memmove` ~7.1% leaf (top caller: `JsonStreamOutput::visit_open_start_element` ~29.5%, then `write_key` / `write_json_string_ncname`)
+    - `_rjem_malloc` ~3.0% leaf (top caller: `RawVec::finish_grow` ~28.8%, then `JsonStreamOutput::visit_open_start_element` / `write_key`)
+    - `RawVec::grow_one` callers: `XmlElementBuilder::attribute_value` ~35.7% and `JsonStreamOutput::visit_characters` ~35.6%
+    - Remaining `serde_json` overhead is still measurable (`BinXmlValue -> serde_json::Value` + `Serializer::serialize_str` show up in top leaves),
+      due to `buffered_values` / `data_values` paths.
+  - **Zig renderer avoids this class of overhead entirely**:
+    - It writes JSON directly from IR nodes without allocating per-key `String`s, and without buffering into `serde_json::Value`.
+    - It uses a fixed-size, stack-allocated name-count table (`MAX_UNIQUE_NAMES = 64`) + pointer-equality fast path for name keys
+      instead of hashing/allocating keys (`zig-evtx/src/parser/render_json.zig`, and rationale in `zig-evtx/docs/architecture.md`).
+- **Change**:
+  - **Reuse memory across records** (Zig-style) instead of allocating fresh per record:
+    - Today `EvtxRecord::into_json_stream()` constructs a new `Vec<u8>` + a new `JsonStreamOutput` every record. Introduce a
+      reusable per-thread/per-chunk “scratch” JSON emitter that:
+        - keeps the output `Vec<u8>` and calls `clear()` per record (capacity retained),
+        - keeps `frames` / `elements` vectors and clears them per record (capacity retained),
+        - reuses duplicate-key tracking storage (see next bullets) instead of re-allocating HashSets.
+    - The existing `EvtxChunkData.arena` is **per-chunk** and cannot be reset per record because it backs template cache + values,
+      but we can add a **separate scratch bump** (per record) and `reset()` it after each record to recycle memory aggressively.
+  - Make `JsonStreamOutput` lifetime-aware (`JsonStreamOutput<'a, W>`) so it can **store borrowed keys**:
+    - Change `ElementState.name: String` → `Cow<'a, str>` (or `&'a str` where possible) to avoid `to_owned()`/`clone()` per element.
+    - Replace `ObjectFrame.used_keys: HashSet<String>` with a borrowed-key structure and only allocate suffix keys on collision.
+      If we keep hashing, store `&'a str` (borrowed) and allocate only suffixed strings into the per-record scratch bump.
+      (Alternative: Zig-style fixed table + linear scan for ≤64 keys, avoiding hashing altogether.)
+  - Replace `buffered_values: Vec<serde_json::Value>` and `data_values: Vec<serde_json::Value>` with a **borrow-friendly scalar buffer**
+    (plain `Vec` with preallocation + reuse; avoid `smallvec`), and serialize via `write_binxml_value` / `write_json_string_*`
+    to eliminate `serde_json::to_writer` from the hot path.
+- **Success metric**:
+  - **W1 median improves ≥ 8%** on `omer-pc` (quiet-gated), vs current branch baseline.
+  - Samply shows reduced share of `_platform_memmove`, `_rjem_malloc`, and fewer `RawVec::grow_one` samples under JSON output.
+- **Guardrails**:
+  - Preserve legacy JSON semantics (duplicate key suffixing, EventData/Data special handling, `separate_json_attributes` behavior).
+  - `cargo test --features fast-alloc --locked --offline` stays green, especially streaming parity suites.
+
+---
+
+## Completed optimizations
+
+### Stream template expansion (avoid pre-expanding templates)
+- **What changed**: Template expansion happens inline during streaming output, so substitution values can be *moved on last use* instead of cloned. This avoids building an expanded token Vec up-front.
+- **Where**: `src/binxml/assemble.rs` (streaming path).
+- **Impact (omer-pc, `-t 1`)**: reverting to the older “pre-expand templates” approach regresses **+23.88%** median (605.5 ms → 750.1 ms). This is the dominant contributor in the ablation study.
+
+### JSON string serialization (avoid `serde_json` for string escaping)
+- **What changed**: Serialize strings directly with a fast “no-escape needed” check + manual escaping for `"` `\\` control chars.
+- **Where**: `src/json_stream_output.rs` (`write_json_string_*`).
+- **Impact (omer-pc, `-t 1`)**: reverting to `serde_json::to_writer` for strings regresses **+1.03%** median (605.5 ms → 611.8 ms).
+
+### JSON value serialization (avoid `serde_json::Value` allocations)
+- **What changed**: Serialize `BinXmlValue` primitives directly (itoa/ryu for numbers; direct writes for bool/null/binary), avoiding intermediate JSON value construction.
+- **Where**: `src/json_stream_output.rs` (`write_binxml_value`).
+- **Impact (omer-pc, `-t 1`)**: reverting to `serde_json::Value` regresses **+1.66%** median (605.5 ms → 615.6 ms).
+
+### Datetime formatting (avoid chrono format string parsing)
+- **What changed**: Write ISO-8601 timestamps directly (`YYYY-MM-DDTHH:MM:SS.ffffffZ`) instead of `dt.format(...).to_string()`.
+- **Where**: `src/json_stream_output.rs` (FileTime/SysTime serialization).
+- **Impact (omer-pc, `-t 1`)**: reverting to chrono formatting regresses **+3.31%** median (605.5 ms → 625.6 ms).
+
+---
+
+## Rejected theses
+
+### UTF-16 ASCII fast-path (rejected; removed)
+- **What changed**: Tried scanning UTF-16 units for “all <= 0x7F” and building an ASCII string directly.
+- **Where**: `src/utils/utf16.rs` (`decode_utf16_units_z`).
+- **Result (omer-pc, `-t 1`)**: reverting this “fast path” was **-0.81%** (slightly faster), i.e. the scan overhead outweighed the benefit for our canonical workload (within noise but wrong direction).
+- **Decision**: Removed the ASCII fast-path; use `String::from_utf16` unconditionally.
diff --git a/README.md b/README.md
index 2494676f..cf4095e2 100644
--- a/README.md
+++ b/README.md
@@ -132,6 +132,9 @@ The parallel version is enabled when compiling with feature "multithreading" (en
 When using multithreading - `evtx` is significantly faster than any other parser available.
 For single core performance, it is both the fastest and the only cross-platform parser than supports both xml and JSON outputs.
 
+For hypothesis-driven performance work (before/after binaries, hyperfine JSON, samply profiles, allocator-churn tracking vs Zig),
+see `PERF.md` and the baseline harness `profile_comparison.sh`.
+
 Performance was benched on my machine using `hyperfine` (statistical measurements tool).
 
 I'm running tests on a 12-Core AMD Ryzen 3900X.
diff --git a/benchmarks/omer-pc_ablation_matrix_t1_20251227.json b/benchmarks/omer-pc_ablation_matrix_t1_20251227.json
new file mode 100644
index 00000000..bc86524c
--- /dev/null
+++ b/benchmarks/omer-pc_ablation_matrix_t1_20251227.json
@@ -0,0 +1,460 @@
+{
+  "results": [
+    {
+      "command": "base",
+      "mean": 0.60513749939,
+      "stddev": 0.005661344396316902,
+      "median": 0.6055234488400001,
+      "user": 0.59162841,
+      "system": 0.021744949999999996,
+      "min": 0.59478164284,
+      "max": 0.61629095784,
+      "times": [
+        0.61498400184,
+        0.60620906084,
+        0.61254258184,
+        0.60523038084,
+        0.60811120984,
+        0.59720068084,
+        0.60442999684,
+        0.59478164284,
+        0.60581651684,
+        0.60395581684,
+        0.60437034684,
+        0.60078882184,
+        0.59947955984,
+        0.60117159284,
+        0.60586942784,
+        0.60605893484,
+        0.61629095784,
+        0.59778456484,
+        0.6085920098400001,
+        0.60908188184
+      ],
+      "memory_usage_byte": [
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688
+      ],
+      "exit_codes": [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ]
+    },
+    {
+      "command": "no_utf16_ascii",
+      "mean": 0.59808146814,
+      "stddev": 0.007481310899538414,
+      "median": 0.6006146808399999,
+      "user": 0.5845784100000001,
+      "system": 0.021469449999999998,
+      "min": 0.58242899884,
+      "max": 0.61030006684,
+      "times": [
+        0.60334786984,
+        0.60349042784,
+        0.60039635884,
+        0.60650053384,
+        0.58925710784,
+        0.61030006684,
+        0.59548852784,
+        0.59333542284,
+        0.59943199884,
+        0.6010722818400001,
+        0.60203285284,
+        0.59787566084,
+        0.59685879584,
+        0.58436045784,
+        0.60369944484,
+        0.60083300284,
+        0.58633493884,
+        0.58242899884,
+        0.60217867284,
+        0.60240594084
+      ],
+      "memory_usage_byte": [
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688
+      ],
+      "exit_codes": [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ]
+    },
+    {
+      "command": "serde_json_strings",
+      "mean": 0.61099211199,
+      "stddev": 0.005107713048399839,
+      "median": 0.61178113834,
+      "user": 0.59889576,
+      "system": 0.020386949999999997,
+      "min": 0.60082815384,
+      "max": 0.61922538884,
+      "times": [
+        0.61482342184,
+        0.61116296084,
+        0.6153596878400001,
+        0.61406838684,
+        0.60980614584,
+        0.60537379884,
+        0.61786933984,
+        0.60499723184,
+        0.61239931584,
+        0.61267857984,
+        0.60082815384,
+        0.60439250584,
+        0.61758809084,
+        0.61922538884,
+        0.60457359384,
+        0.60935900284,
+        0.60788871684,
+        0.61282610884,
+        0.60976971184,
+        0.61485209684
+      ],
+      "memory_usage_byte": [
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688
+      ],
+      "exit_codes": [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ]
+    },
+    {
+      "command": "serde_json_values",
+      "mean": 0.6145567666399999,
+      "stddev": 0.007329728032545908,
+      "median": 0.61557219884,
+      "user": 0.60215581,
+      "system": 0.02030175,
+      "min": 0.60026745784,
+      "max": 0.62922361084,
+      "times": [
+        0.61158007084,
+        0.61053566184,
+        0.61083971184,
+        0.60026745784,
+        0.62039820984,
+        0.62161717184,
+        0.60585678484,
+        0.62089000784,
+        0.61359159384,
+        0.62922361084,
+        0.61699127584,
+        0.61473075684,
+        0.62259423884,
+        0.61530646384,
+        0.60360744684,
+        0.61963534884,
+        0.61769044884,
+        0.60320315684,
+        0.61583793384,
+        0.61673798084
+      ],
+      "memory_usage_byte": [
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688
+      ],
+      "exit_codes": [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ]
+    },
+    {
+      "command": "preexpand_templates",
+      "mean": 0.7486929303400001,
+      "stddev": 0.008586896271259931,
+      "median": 0.75010821484,
+      "user": 0.73411511,
+      "system": 0.0218717,
+      "min": 0.73226054484,
+      "max": 0.75912729684,
+      "times": [
+        0.75156541884,
+        0.75298336784,
+        0.74313062584,
+        0.74954982284,
+        0.75862805684,
+        0.74733161684,
+        0.75912729684,
+        0.74567727684,
+        0.74805660984,
+        0.74526153884,
+        0.7331119378400001,
+        0.75066660684,
+        0.75908595684,
+        0.74213192484,
+        0.73311063784,
+        0.7566222098400001,
+        0.73226054484,
+        0.75559764184,
+        0.7525542228400001,
+        0.75740529184
+      ],
+      "memory_usage_byte": [
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688
+      ],
+      "exit_codes": [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ]
+    },
+    {
+      "command": "chrono_datetime_format",
+      "mean": 0.6239454818400001,
+      "stddev": 0.006776627218743692,
+      "median": 0.62557498734,
+      "user": 0.6105368600000001,
+      "system": 0.021592550000000002,
+      "min": 0.61390945184,
+      "max": 0.63519739484,
+      "times": [
+        0.62959976584,
+        0.63323290884,
+        0.61432134984,
+        0.62799851784,
+        0.62077427784,
+        0.62508265584,
+        0.63519739484,
+        0.62779836484,
+        0.62987203384,
+        0.61390945184,
+        0.61399171584,
+        0.62895694884,
+        0.62606731884,
+        0.63142501484,
+        0.61804914684,
+        0.62105696584,
+        0.6167927748400001,
+        0.61756777684,
+        0.61931845684,
+        0.62789679584
+      ],
+      "memory_usage_byte": [
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688,
+        117874688
+      ],
+      "exit_codes": [
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0
+      ]
+    }
+  ]
+}
diff --git a/docs/performance-optimizations.md b/docs/performance-optimizations.md
new file mode 100644
index 00000000..59b64dd6
--- /dev/null
+++ b/docs/performance-optimizations.md
@@ -0,0 +1,283 @@
+# Performance Optimizations: Closing the Gap with Zig
+
+This document details the performance optimizations applied to the Rust EVTX parser, inspired by analysis of a Zig implementation that was ~5x faster.
+
+## Summary
+
+| Optimization | Individual Speedup |
+|--------------|-------------------|
+| ASCII Fast Path | ~5% faster |
+| Direct JSON Writing | ~4% faster |
+
+**Current benchmark** (single-threaded, security_big_sample.evtx):
+- Rust: **574 ms**
+- Zig: **166 ms**
+- Gap: **Zig is 3.46x faster**
+
+The remaining gap is due to **architectural differences** identified via profiling.
+
+---
+
+## Profiling Analysis (Current State)
+
+Flamegraph profiling using macOS `sample` + FlameGraph perl scripts:
+
+```
+Top leaf functions (by samples):
+  56  _xzm_free                    ─┐
+  48  _xzm_xzone_malloc_tiny        │ Memory allocation: ~170 samples (29%)
+  17  _xzm_xzone_malloc             │
+  15  _free                         │
+  14  _malloc_zone_malloc          ─┘
+  38  _platform_memmove            ── Copying (from clones): 38 samples (6%)
+  36  stream_expand_token          ─┐
+  18  _expand_templates             │ Template expansion: 54 samples (9%)
+  34  SipHash::write               ── HashMap hashing: 34 samples (6%)
+  16  read_utf16_string            ── String conversion: 16 samples (3%)
+  16  BinXmlValue::from            ── serde_json conversion: 16 samples (3%)
+```
+
+**Key insight**: The #1 bottleneck is **memory allocation/deallocation** (~29% of CPU time).
+
+This is fundamentally architectural:
+- Rust clones `BinXMLDeserializedTokens` during template expansion
+- Each clone allocates memory for `Vec<>` and `String` fields
+- Zig uses arena allocation (no individual malloc/free calls)
+
+---
+
+## Optimization 1: ASCII Fast Path for UTF-16 to UTF-8 Conversion
+
+**File**: `src/utils/binxml_utils.rs`
+**Speedup**: ~5% faster
+
+### The Problem
+
+The original `read_utf16_string` function used Rust's `decode_utf16` iterator for every string:
+
+```rust
+// Before: Every character goes through the iterator
+decode_utf16(buffer.into_iter().take_while(|&byte| byte != 0x00))
+    .map(|r| r.map_err(|_e| Error::from(ErrorKind::InvalidData)))
+    .collect()
+```
+
+This approach has overhead:
+- Iterator state management per character
+- Surrogate pair handling for every codepoint
+- Allocations for collecting results
+
+### The Insight
+
+~95% of EVTX strings are **pure ASCII**:
+- Element names: `"Event"`, `"System"`, `"Provider"`
+- Attribute names: `"Name"`, `"Guid"`, `"EventID"`
+- Short values: `"SYSTEM"`, `"Security"`, `"4624"`
+
+For ASCII, UTF-16LE is trivial: the low byte IS the UTF-8 character (high byte is 0).
+
+### The Solution
+
+```rust
+// Find actual string length (stop at NUL)
+let actual_len = buffer.iter().position(|&c| c == 0).unwrap_or(buffer.len());
+
+// ASCII fast path: if all code units are <= 0x7F, directly convert
+let all_ascii = buffer[..actual_len].iter().all(|&c| c <= 0x7F);
+
+if all_ascii {
+    // Direct conversion: each u16 <= 0x7F maps to exactly one u8
+    let mut result = String::with_capacity(actual_len);
+    for &c in &buffer[..actual_len] {
+        result.push(c as u8 as char);
+    }
+    return Ok(result);
+}
+
+// Fallback: use decode_utf16 for non-ASCII strings
+decode_utf16(buffer.into_iter().take(actual_len))
+    .map(|r| r.map_err(|_e| Error::from(ErrorKind::InvalidData)))
+    .collect()
+```
+
+### Why It's Faster
+
+1. **No iterator overhead** for ASCII strings
+2. **Simple loop** instead of complex surrogate handling
+3. **Pre-allocated capacity** based on known length
+4. **Single scan** to check ASCII + convert
+
+### Benchmark
+
+```
+Before: 146.1 ms ± 8.0 ms
+After:  139.2 ms ± 7.6 ms
+Speedup: 1.05x (5% faster)
+```
+
+---
+
+## Optimization 2: Direct JSON String Writing
+
+**File**: `src/json_stream_output.rs`
+**Speedup**: ~4% faster
+
+### The Problem
+
+The streaming JSON output used `serde_json::to_writer` for all string serialization:
+
+```rust
+fn write_key(&mut self, key: &str) -> SerializationResult<()> {
+    self.write_comma_if_needed()?;
+    let unique_key = self.reserve_unique_key(key);
+
+    // Overhead: serde_json parsing, escaping, buffering
+    serde_json::to_writer(self.writer_mut(), &unique_key)?;
+    self.write_bytes(b":")
+}
+```
+
+This adds overhead:
+- Function call into serde_json
+- Escape character scanning
+- Potential buffering
+
+### The Insight
+
+XML element and attribute names follow **NCName rules** (Namespaced Colon-less Name):
+- Start with letter or underscore
+- Contain only letters, digits, hyphens, underscores, periods
+- **No characters that need JSON escaping** (no quotes, backslashes, control chars)
+
+### The Solution
+
+```rust
+/// Write a JSON string directly without escaping.
+/// Only safe for NCName strings (XML element/attribute names).
+#[inline]
+fn write_json_string_ncname(&mut self, s: &str) -> SerializationResult<()> {
+    self.write_bytes(b"\"")?;
+    self.write_bytes(s.as_bytes())?;
+    self.write_bytes(b"\"")
+}
+
+fn write_key(&mut self, key: &str) -> SerializationResult<()> {
+    self.write_comma_if_needed()?;
+    let unique_key = self.reserve_unique_key(key);
+
+    // Direct write: no escaping needed for NCName
+    self.write_json_string_ncname(&unique_key)?;
+    self.write_bytes(b":")
+}
+```
+
+Also replaced fixed string keys with direct byte writes:
+
+```rust
+// Before
+serde_json::to_writer(self.writer_mut(), "#attributes")?;
+
+// After
+self.write_bytes(b"\"#attributes\":")?;
+```
+
+### Why It's Faster
+
+1. **No escape scanning** for NCName strings
+2. **No function call overhead** to serde_json
+3. **Direct byte writes** avoid intermediate processing
+4. **Inlined** for hot path optimization
+
+### Benchmark
+
+```
+Before: 140.1 ms ± 10.8 ms
+After:  135.3 ms ± 12.1 ms
+Speedup: 1.04x (4% faster)
+```
+
+---
+
+## Current Benchmark
+
+Single-threaded JSON output on `security_big_sample.evtx` (30 MB):
+
+```
+Rust:  574 ms ± 5 ms
+Zig:   166 ms ± 12 ms
+
+Gap: Zig is 3.46x faster
+```
+
+Multi-threaded:
+```
+Rust:  273 ms (8 threads)
+Zig:   ~50 ms (estimated)
+```
+
+---
+
+## Remaining Opportunities (Architectural Changes Required)
+
+The Zig parser is ~3.5x faster due to fundamental architectural differences:
+
+### 1. Arena Allocator (~29% of CPU time)
+
+**Problem**: Profiling shows 170+ samples in malloc/free - the #1 bottleneck.
+
+**Zig approach**:
+- Uses arena allocation (`std.heap.ArenaAllocator`) for all chunk processing
+- Allocations are bump-pointer (O(1), no metadata)
+- Atomic deallocation: just reset the bump pointer when done with chunk
+- No individual `free()` calls
+
+**Rust solution**: Use `bumpalo` crate for per-chunk allocations. Requires:
+- Modifying `EvtxChunk` to hold an arena
+- Changing token types to allocate from arena
+- Resetting arena between chunks
+
+### 2. Reference-Based Template Expansion (~15% of CPU time)
+
+**Problem**: Rust clones `BinXMLDeserializedTokens` for every token during template expansion.
+
+```rust
+// Current: clones for every token
+stream_expand_token(val.clone(), chunk, ...)?;
+stream_expand_token(other.clone(), chunk, ...)?;
+```
+
+**Zig approach**:
+- Templates stored as IR with `Placeholder` nodes
+- Instantiation clones just the tree structure (cheap memcpy)
+- Actual data (strings, etc.) is shared via arena references
+
+**Rust solution**: Change `stream_expand_token` to take `&BinXMLDeserializedTokens<'a>` instead of owned value. Requires careful lifetime management.
+
+### 3. Reduce HashMap Usage (~6% of CPU time)
+
+**Problem**: `reserve_unique_key` does HashSet lookups for every JSON key to detect duplicates.
+
+**Observation**: Most JSON objects have < 20 keys. A linear scan of a `SmallVec` would be faster than hashing for small N.
+
+### 4. SIMD String Processing (~3% of CPU time)
+
+**Problem**: UTF-16 to UTF-8 conversion is still 16 samples despite ASCII fast path.
+
+**Zig approach**: SIMD for strings >= 16 code units, processing 8 characters at once.
+
+**Rust solution**: Use `simdutf` or `encoding_rs` crate for bulk conversion.
+
+---
+
+## Conclusion
+
+The low-hanging fruit (ASCII fast path, direct JSON writing) gave ~10% improvement total.
+
+The remaining 3.5x gap requires **architectural changes**:
+1. Arena allocator (biggest impact, most invasive)
+2. Reference-based template expansion
+3. Smaller data structures for key deduplication
+4. SIMD string conversion
+
+These changes would require significant refactoring of the core data structures.
+
diff --git a/profile_comparison.sh b/profile_comparison.sh
new file mode 100755
index 00000000..214554b3
--- /dev/null
+++ b/profile_comparison.sh
@@ -0,0 +1,499 @@
+#!/usr/bin/env bash
+#
+# profile_comparison.sh - Compare Rust vs Zig EVTX parser performance
+#
+# See PERF.md for the hypothesis-driven workflow (before/after binaries, hyperfine JSON,
+# samply profiles) and how to interpret allocator churn vs the Zig implementation.
+#
+# Usage:
+#   ./profile_comparison.sh                    # Build + benchmark (no profiling)
+#   ./profile_comparison.sh --bench-only       # Skip builds, just benchmark
+#   ./profile_comparison.sh --profile-only     # Skip builds, just profile (opens samply UI)
+#   ./profile_comparison.sh --top-leaves       # Profile both and print top leaf functions
+#   ./profile_comparison.sh --flamegraph       # Generate flamegraphs (requires sudo on macOS)
+#
+# Environment variables:
+#   SAMPLE_FILE     - EVTX file to use (default: samples/security_big_sample.evtx)
+#   RUNS            - Number of hyperfine runs (default: 5)
+#   OUTPUT_DIR      - Directory for results (default: ./profile_results)
+#   ZIG_BINARY      - Path to Zig binary (default: ~/Workspace/zig-evtx/zig-out/bin/evtx_dump_zig)
+#   TOP_LEAVES_N    - Number of leaf functions to print (default: 20)
+#   QUIET_CHECK     - If set (e.g. 1), wait for a quiet system before profiling and use
+#                    `hyperfine --prepare ./scripts/ensure_quiet.sh` for benchmarks.
+#                    Tune thresholds via QUIET_* env vars (see `scripts/ensure_quiet.sh`).
+#   BENCH_MT        - If set (e.g. 1), also run the 8-thread benchmark comparison (default: 0).
+#                    Single-thread is the baseline KPI for allocator-churn work; multi-thread is
+#                    useful for end-to-end throughput comparisons.
+#
+
+set -euo pipefail
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Configuration
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+: "${SAMPLE_FILE:=$SCRIPT_DIR/samples/security_big_sample.evtx}"
+: "${RUNS:=5}"
+: "${OUTPUT_DIR:=$SCRIPT_DIR/profile_results}"
+: "${ZIG_BINARY:=$HOME/Workspace/zig-evtx/zig-out/bin/evtx_dump_zig}"
+: "${ZIG_PROJECT:=$HOME/Workspace/zig-evtx}"
+: "${TOP_LEAVES_N:=20}"
+: "${TOP_LEAVES_WEIGHT:=cpu}" # cpu | samples | wall
+
+RUST_BINARY="$SCRIPT_DIR/target/release/evtx_dump"
+
+QUIET_SCRIPT="$SCRIPT_DIR/scripts/ensure_quiet.sh"
+QUIET_CHECK="${QUIET_CHECK:-0}"
+BENCH_MT="${BENCH_MT:-0}"
+HYPERFINE_PREPARE_ARGS=()
+if [[ "$QUIET_CHECK" != "0" ]]; then
+    if [[ ! -f "$QUIET_SCRIPT" ]]; then
+        echo -e "${RED}Error: QUIET_CHECK is set but missing: $QUIET_SCRIPT${NC}"
+        exit 1
+    fi
+    # `hyperfine --prepare` runs outside the measured timings; it’s ideal for waiting for quiet.
+    HYPERFINE_PREPARE_ARGS=(--prepare "$QUIET_SCRIPT")
+fi
+
+maybe_wait_for_quiet() {
+    if [[ "$QUIET_CHECK" != "0" ]]; then
+        "$QUIET_SCRIPT"
+    fi
+}
+
+print_top_leaves_table() {
+    local profile_json="$1"
+    local label="$2"
+    local syms_json="${profile_json%.json}.syms.json"
+
+    if [[ ! -f "$profile_json" ]]; then
+        echo -e "${RED}Error: missing profile: $profile_json${NC}"
+        return 1
+    fi
+    if [[ ! -f "$syms_json" ]]; then
+        echo -e "${RED}Error: missing symbols sidecar: $syms_json${NC}"
+        echo "Re-record with samply using --unstable-presymbolicate."
+        return 1
+    fi
+
+    echo ""
+    echo -e "${BLUE}=== Top leaf functions (${label}, ${TOP_LEAVES_WEIGHT}) ===${NC}"
+
+    python3 - "$profile_json" "$syms_json" "$TOP_LEAVES_N" "$TOP_LEAVES_WEIGHT" <<'PY'
+import bisect
+import json
+import sys
+
+profile_path = sys.argv[1]
+syms_path = sys.argv[2]
+top_n = int(sys.argv[3])
+weight_mode = (sys.argv[4] if len(sys.argv) > 4 else "cpu").strip().lower()
+
+def load_json(path: str):
+    with open(path, "rb") as f:
+        data = f.read()
+    if path.endswith(".gz"):
+        import gzip
+        data = gzip.decompress(data)
+    return json.loads(data)
+
+profile = load_json(profile_path)
+syms = load_json(syms_path)
+
+string_table = syms.get("string_table") or []
+syms_data = syms.get("data") or []
+
+def norm_hex(s: str) -> str:
+    return s.upper()
+
+syms_by_code = {}
+syms_by_name = {}
+preprocessed = {}
+
+for entry in syms_data:
+    code_id = entry.get("code_id")
+    if isinstance(code_id, str) and code_id:
+        syms_by_code[norm_hex(code_id)] = entry
+        syms_by_code[norm_hex(code_id) + "0"] = entry  # common breakpad form
+    debug_name = entry.get("debug_name")
+    if isinstance(debug_name, str) and debug_name:
+        syms_by_name[debug_name] = entry
+
+    st = entry.get("symbol_table") or []
+    st_sorted = sorted(st, key=lambda x: int(x.get("rva", 0)))
+    rvas = [int(x.get("rva", 0)) for x in st_sorted]
+    ends = [int(x.get("rva", 0)) + int(x.get("size", 0)) for x in st_sorted]
+    names = []
+    for x in st_sorted:
+        si = x.get("symbol", 0)
+        if isinstance(si, int) and 0 <= si < len(string_table):
+            names.append(string_table[si])
+        else:
+            names.append("UNKNOWN")
+    preprocessed[id(entry)] = (rvas, ends, names)
+
+libs = profile.get("libs") or []
+
+def match_entry_for_lib(lib: dict):
+    for key in (lib.get("codeId"), lib.get("breakpadId")):
+        if isinstance(key, str) and key:
+            k = norm_hex(key)
+            if k in syms_by_code:
+                return syms_by_code[k]
+            if k.endswith("0") and k[:-1] in syms_by_code:
+                return syms_by_code[k[:-1]]
+    for key in (lib.get("debugName"), lib.get("name")):
+        if isinstance(key, str) and key and key in syms_by_name:
+            return syms_by_name[key]
+    return None
+
+lib_entries = [match_entry_for_lib(lib) for lib in libs]
+
+def lookup_symbol(lib_index: int | None, rva: int | None) -> str:
+    if lib_index is None or rva is None:
+        return "UNKNOWN"
+    lib = libs[lib_index] if 0 <= lib_index < len(libs) else {}
+    entry = lib_entries[lib_index] if 0 <= lib_index < len(lib_entries) else None
+    if entry is None:
+        name = lib.get("debugName") or lib.get("name") or f"lib{lib_index}"
+        return f"{name} @ 0x{int(rva):x}"
+
+    rvas, ends, names = preprocessed[id(entry)]
+    i = bisect.bisect_right(rvas, int(rva)) - 1
+    if i >= 0 and int(rva) < ends[i]:
+        return names[i]
+
+    name = lib.get("debugName") or lib.get("name") or entry.get("debug_name") or f"lib{lib_index}"
+    return f"{name} @ 0x{int(rva):x}"
+
+counts: dict[str, int] = {}
+total = 0
+
+for thread in (profile.get("threads") or []):
+    samples = thread.get("samples") or {}
+    stacks = samples.get("stack") or []
+    sample_weights = samples.get("weight")
+    cpu_deltas = samples.get("threadCPUDelta")
+    wall_deltas = samples.get("timeDeltas")
+
+    stack_table = thread.get("stackTable") or {}
+    frame_table = thread.get("frameTable") or {}
+    func_table = thread.get("funcTable") or {}
+    resource_table = thread.get("resourceTable") or {}
+
+    stack_frame = stack_table.get("frame") or []
+    frame_addr = frame_table.get("address") or []
+    frame_func = frame_table.get("func") or []
+    func_resource = func_table.get("resource") or []
+    resource_lib = resource_table.get("lib") or []
+
+    for idx, stack_id in enumerate(stacks):
+        if not isinstance(stack_id, int):
+            continue
+        if stack_id < 0 or stack_id >= len(stack_frame):
+            continue
+        frame_id = stack_frame[stack_id]
+        if not isinstance(frame_id, int):
+            continue
+        if frame_id < 0 or frame_id >= len(frame_addr) or frame_id >= len(frame_func):
+            continue
+
+        rva = frame_addr[frame_id]
+        func_id = frame_func[frame_id]
+
+        lib_index = None
+        if isinstance(func_id, int) and 0 <= func_id < len(func_resource):
+            resource_id = func_resource[func_id]
+            if isinstance(resource_id, int) and 0 <= resource_id < len(resource_lib):
+                lib_index = resource_lib[resource_id]
+
+        w = 1
+        if weight_mode == "cpu" and isinstance(cpu_deltas, list) and idx < len(cpu_deltas):
+            try:
+                w = int(cpu_deltas[idx])
+            except Exception:
+                w = 0
+        elif weight_mode == "wall" and isinstance(wall_deltas, list) and idx < len(wall_deltas):
+            # timeDeltas is in ms (float). Keep as ms*1000 integer so output formatting is consistent.
+            try:
+                w = int(float(wall_deltas[idx]) * 1000.0)
+            except Exception:
+                w = 0
+        elif isinstance(sample_weights, list) and idx < len(sample_weights):
+            try:
+                w = int(sample_weights[idx])
+            except Exception:
+                w = 1
+
+        total += w
+        leaf = lookup_symbol(lib_index, rva)
+        counts[leaf] = counts.get(leaf, 0) + w
+
+items = sorted(counts.items(), key=lambda kv: kv[1], reverse=True)[:top_n]
+
+if weight_mode == "cpu":
+    header = "CPU ms"
+    divisor = 1000.0  # µs -> ms
+elif weight_mode == "wall":
+    header = "Wall ms"
+    divisor = 1000.0  # (ms*1000) -> ms
+else:
+    header = "Samples"
+    divisor = 1.0
+
+print(f"| # | {header} | % | Leaf |")
+print("| -: | --: | --: | --- |")
+for i, (name, count) in enumerate(items, start=1):
+    pct = (count / total * 100.0) if total else 0.0
+    v = count / divisor
+    if divisor == 1.0:
+        v_str = str(int(v))
+    else:
+        v_str = f"{v:,.1f}"
+    print(f"| {i} | {v_str} | {pct:5.1f}% | {name} |")
+PY
+}
+
+# Parse arguments
+BUILD=true
+BENCH=true
+PROFILE=false
+FLAMEGRAPH=false
+TOP_LEAVES=false
+
+for arg in "$@"; do
+    case $arg in
+        --bench-only)
+            BUILD=false
+            PROFILE=false
+            ;;
+        --profile-only)
+            BUILD=false
+            BENCH=false
+            PROFILE=true
+            ;;
+        --top-leaves)
+            BENCH=false
+            PROFILE=true
+            TOP_LEAVES=true
+            ;;
+        --flamegraph)
+            BUILD=false
+            BENCH=false
+            FLAMEGRAPH=true
+            ;;
+        --help|-h)
+            head -20 "$0" | tail -18
+            exit 0
+            ;;
+    esac
+done
+
+echo -e "${BLUE}=== EVTX Parser Performance Comparison ===${NC}"
+echo ""
+
+# Create output directory
+mkdir -p "$OUTPUT_DIR"
+
+# Validate sample file exists
+if [[ ! -f "$SAMPLE_FILE" ]]; then
+    echo -e "${RED}Error: Sample file not found: $SAMPLE_FILE${NC}"
+    exit 1
+fi
+
+SAMPLE_SIZE=$(ls -lh "$SAMPLE_FILE" | awk '{print $5}')
+echo -e "Sample file: ${GREEN}$SAMPLE_FILE${NC} ($SAMPLE_SIZE)"
+echo ""
+
+# Build phase
+if [[ "$BUILD" == true ]]; then
+    echo -e "${YELLOW}Building Rust (release + fast-alloc)...${NC}"
+    (cd "$SCRIPT_DIR" && cargo build --release --features fast-alloc 2>&1 | tail -3)
+
+    if [[ -d "$ZIG_PROJECT" ]]; then
+        echo -e "${YELLOW}Building Zig (ReleaseFast)...${NC}"
+        (cd "$ZIG_PROJECT" && zig build -Doptimize=ReleaseFast 2>&1 | tail -3) || echo "Zig build skipped"
+    fi
+    echo ""
+fi
+
+# Validate binaries exist
+if [[ ! -x "$RUST_BINARY" ]]; then
+    echo -e "${RED}Error: Rust binary not found: $RUST_BINARY${NC}"
+    echo "Run: cargo build --release --features fast-alloc"
+    exit 1
+fi
+
+if [[ ! -x "$ZIG_BINARY" ]]; then
+    echo -e "${RED}Error: Zig binary not found: $ZIG_BINARY${NC}"
+    echo "Run: cd ~/Workspace/zig-evtx && zig build -Doptimize=ReleaseFast"
+    exit 1
+fi
+
+# Benchmark phase
+if [[ "$BENCH" == true ]]; then
+    echo -e "${YELLOW}Running benchmarks (${RUNS} runs each)...${NC}"
+    echo ""
+
+    TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+    BENCH_FILE="$OUTPUT_DIR/benchmark_${TIMESTAMP}.md"
+
+    hyperfine \
+        "${HYPERFINE_PREPARE_ARGS[@]}" \
+        --warmup 2 \
+        --runs "$RUNS" \
+        --export-markdown "$BENCH_FILE" \
+        --export-json "$OUTPUT_DIR/benchmark_${TIMESTAMP}.json" \
+        -n "Rust (fast-alloc)" "$RUST_BINARY -t 1 -o jsonl $SAMPLE_FILE" \
+        -n "Zig" "$ZIG_BINARY -t 1 --no-checks -o jsonl $SAMPLE_FILE" \
+        2>&1
+
+    echo ""
+    echo -e "${GREEN}Benchmark results saved to: $BENCH_FILE${NC}"
+
+    # Optional: multi-threaded comparison (off by default; enable with BENCH_MT=1).
+    if [[ "${BENCH_MT}" != "0" ]]; then
+        echo ""
+        echo -e "${YELLOW}Running multi-threaded comparison (8 threads)...${NC}"
+
+        hyperfine \
+            "${HYPERFINE_PREPARE_ARGS[@]}" \
+            --warmup 2 \
+            --runs "$RUNS" \
+            --export-markdown "$OUTPUT_DIR/benchmark_mt_${TIMESTAMP}.md" \
+            -n "Rust 8T" "$RUST_BINARY -t 8 -o jsonl $SAMPLE_FILE" \
+            -n "Zig 8T" "$ZIG_BINARY -t 8 --no-checks -o jsonl $SAMPLE_FILE" \
+            2>&1 || echo "Multi-threaded benchmark failed (may need --features multithreading)"
+
+        echo ""
+    fi
+fi
+
+# Profile phase (samply - opens browser UI)
+if [[ "$PROFILE" == true ]]; then
+    echo -e "${YELLOW}Profiling with samply...${NC}"
+    echo ""
+
+    if ! command -v samply &> /dev/null; then
+        echo -e "${RED}samply not found. Install with: cargo install samply${NC}"
+        exit 1
+    fi
+
+    if [[ "$TOP_LEAVES" == true ]]; then
+        choice=3
+    else
+        echo -e "${BLUE}Choose what to profile:${NC}"
+        echo "  1) Rust only"
+        echo "  2) Zig only"
+        echo "  3) Both (Rust first, then Zig)"
+        read -p "Selection [1-3]: " choice
+    fi
+
+    case $choice in
+        1)
+            echo -e "${YELLOW}Profiling Rust...${NC}"
+            # Save profile + sidecar symbols file so `samply load` shows function names.
+            maybe_wait_for_quiet
+            samply record --unstable-presymbolicate -o "$OUTPUT_DIR/rust_profile.json" -- \
+                "$RUST_BINARY" -t 1 -o jsonl "$SAMPLE_FILE"
+            ;;
+        2)
+            echo -e "${YELLOW}Profiling Zig...${NC}"
+            # Save profile + sidecar symbols file so `samply load` shows function names.
+            maybe_wait_for_quiet
+            samply record --unstable-presymbolicate -o "$OUTPUT_DIR/zig_profile.json" -- \
+                "$ZIG_BINARY" -t 1 --no-checks -o jsonl "$SAMPLE_FILE"
+            ;;
+        3)
+            echo -e "${YELLOW}Recording Rust profile...${NC}"
+            maybe_wait_for_quiet
+            samply record --save-only --unstable-presymbolicate -o "$OUTPUT_DIR/rust_profile.json" -- \
+                "$RUST_BINARY" -t 1 -o jsonl "$SAMPLE_FILE" > /dev/null 2>&1
+
+            echo -e "${YELLOW}Recording Zig profile...${NC}"
+            maybe_wait_for_quiet
+            samply record --save-only --unstable-presymbolicate -o "$OUTPUT_DIR/zig_profile.json" -- \
+                "$ZIG_BINARY" -t 1 --no-checks -o jsonl "$SAMPLE_FILE" > /dev/null 2>&1
+
+            echo ""
+            echo -e "${GREEN}Profiles saved:${NC}"
+            echo "  Rust: $OUTPUT_DIR/rust_profile.json"
+            echo "        $OUTPUT_DIR/rust_profile.syms.json"
+            echo "  Zig:  $OUTPUT_DIR/zig_profile.json"
+            echo "        $OUTPUT_DIR/zig_profile.syms.json"
+            echo ""
+            echo "View with:"
+            echo "  samply load $OUTPUT_DIR/rust_profile.json"
+            echo "  samply load $OUTPUT_DIR/zig_profile.json"
+
+            if [[ "$TOP_LEAVES" == true ]]; then
+                print_top_leaves_table "$OUTPUT_DIR/rust_profile.json" "Rust"
+                print_top_leaves_table "$OUTPUT_DIR/zig_profile.json" "Zig"
+            fi
+            ;;
+    esac
+fi
+
+# Flamegraph phase (cargo-flamegraph - may need sudo on macOS)
+if [[ "$FLAMEGRAPH" == true ]]; then
+    echo -e "${YELLOW}Generating flamegraphs...${NC}"
+    echo -e "${RED}Note: This may require sudo on macOS${NC}"
+    echo ""
+
+    if ! command -v cargo-flamegraph &> /dev/null && ! command -v flamegraph &> /dev/null; then
+        echo -e "${RED}flamegraph not found. Install with: cargo install flamegraph${NC}"
+        exit 1
+    fi
+
+    TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+
+    # Rust flamegraph
+    echo -e "${YELLOW}Generating Rust flamegraph...${NC}"
+    (cd "$SCRIPT_DIR" && cargo flamegraph \
+        --root \
+        --bin evtx_dump \
+        --features fast-alloc \
+        --output "$OUTPUT_DIR/flamegraph_rust_${TIMESTAMP}.svg" \
+        -- -t 1 -o jsonl "$SAMPLE_FILE" > /dev/null 2>&1) || {
+        echo -e "${RED}Rust flamegraph failed (may need sudo)${NC}"
+    }
+
+    # For Zig, use dtrace directly or samply
+    echo -e "${YELLOW}Generating Zig flamegraph via samply...${NC}"
+    maybe_wait_for_quiet
+    samply record --save-only --unstable-presymbolicate -o "$OUTPUT_DIR/zig_profile_${TIMESTAMP}.json" \
+        -- "$ZIG_BINARY" -t 1 --no-checks -o jsonl "$SAMPLE_FILE" > /dev/null 2>&1
+
+    echo ""
+    echo -e "${GREEN}Flamegraphs saved to: $OUTPUT_DIR/${NC}"
+    ls -la "$OUTPUT_DIR"/*.svg 2>/dev/null || echo "(SVG files may require sudo)"
+fi
+
+# Summary
+echo ""
+echo -e "${BLUE}=== Quick Commands ===${NC}"
+echo ""
+echo "# Benchmark only:"
+echo "  ./profile_comparison.sh --bench-only"
+echo ""
+echo "# Benchmark (wait for quiet machine via scripts/ensure_quiet.sh):"
+echo "  QUIET_CHECK=1 ./profile_comparison.sh --bench-only"
+echo ""
+echo "# Benchmark + multi-thread comparison:"
+echo "  BENCH_MT=1 ./profile_comparison.sh --bench-only"
+echo ""
+echo "# Interactive profiling (opens browser):"
+echo "  ./profile_comparison.sh --profile-only"
+echo ""
+echo "# View saved profiles:"
+echo "  samply load $OUTPUT_DIR/rust_profile.json"
+echo "  samply load $OUTPUT_DIR/zig_profile.json"
+echo ""
+echo "# Generate flamegraphs (may need sudo):"
+echo "  ./profile_comparison.sh --flamegraph"
+echo ""
diff --git a/scripts/ensure_quiet.sh b/scripts/ensure_quiet.sh
new file mode 100755
index 00000000..eca28d71
--- /dev/null
+++ b/scripts/ensure_quiet.sh
@@ -0,0 +1,142 @@
+#!/usr/bin/env bash
+#
+# ensure_quiet.sh - block until the system is "quiet enough" for benchmarking.
+#
+# Intended usage:
+#   hyperfine --prepare ./scripts/ensure_quiet.sh ...
+#   QUIET_CHECK=1 ./profile_comparison.sh --bench-only
+#
+# Thresholds (override via env):
+#   QUIET_IDLE_MIN=90            # minimum CPU idle percentage
+#   QUIET_LOAD1_MAX=2.0          # maximum 1-minute load average
+#   QUIET_STABLE_SAMPLES=3       # consecutive passing samples required
+#   QUIET_SAMPLE_INTERVAL_SEC=0.25
+#   QUIET_MAX_WAIT_SEC=60
+#   QUIET_VERBOSE=0              # set to 1 to print every sample
+#
+# Notes:
+# - `hyperfine --prepare` does NOT include prepare time in the measured timings, so
+#   waiting for quiet won’t skew results, it just prevents noisy runs.
+
+set -euo pipefail
+
+IDLE_MIN="${QUIET_IDLE_MIN:-90}"
+LOAD1_MAX="${QUIET_LOAD1_MAX:-2.0}"
+STABLE_SAMPLES="${QUIET_STABLE_SAMPLES:-3}"
+SAMPLE_INTERVAL_SEC="${QUIET_SAMPLE_INTERVAL_SEC:-0.25}"
+MAX_WAIT_SEC="${QUIET_MAX_WAIT_SEC:-60}"
+VERBOSE="${QUIET_VERBOSE:-0}"
+
+script_name="$(basename "$0")"
+
+die() {
+    echo "[$script_name] error: $*" >&2
+    exit 2
+}
+
+get_cpu_idle_percent() {
+    local os
+    os="$(uname -s)"
+
+    if [[ "$os" == "Darwin" ]]; then
+        # Use the *second* sample from `top -l 2` to avoid the "since boot/last call" artifact.
+        local line
+        line="$(top -l 2 -n 0 | grep '^CPU usage' | tail -n 1 || true)"
+        if [[ -z "$line" ]]; then
+            die "failed to read CPU usage from top (Darwin)"
+        fi
+        # Example: "CPU usage: 12.89% user, 15.2% sys, 72.8% idle"
+        echo "$line" | sed -E 's/.* ([0-9.]+)% idle.*/\1/'
+        return 0
+    fi
+
+    if [[ "$os" == "Linux" ]]; then
+        # Compute idle% from /proc/stat deltas.
+        # Fields: user nice system idle iowait irq softirq steal guest guest_nice
+        local u1 n1 s1 i1 w1 irq1 sirq1 st1
+        local u2 n2 s2 i2 w2 irq2 sirq2 st2
+        read -r _ u1 n1 s1 i1 w1 irq1 sirq1 st1 _ < /proc/stat || die "failed to read /proc/stat"
+        sleep 0.10
+        read -r _ u2 n2 s2 i2 w2 irq2 sirq2 st2 _ < /proc/stat || die "failed to read /proc/stat (2)"
+
+        local idle1=$((i1 + w1))
+        local idle2=$((i2 + w2))
+        local total1=$((u1 + n1 + s1 + i1 + w1 + irq1 + sirq1 + st1))
+        local total2=$((u2 + n2 + s2 + i2 + w2 + irq2 + sirq2 + st2))
+        local didle=$((idle2 - idle1))
+        local dtotal=$((total2 - total1))
+
+        if (( dtotal <= 0 )); then
+            die "invalid /proc/stat delta"
+        fi
+
+        awk -v idle="$didle" -v total="$dtotal" 'BEGIN { printf "%.2f", (idle / total) * 100.0 }'
+        return 0
+    fi
+
+    die "unsupported OS for idle sampling: $os"
+}
+
+get_load1() {
+    local os
+    os="$(uname -s)"
+
+    if [[ "$os" == "Darwin" ]]; then
+        # Example: "{ 5.34 5.49 4.95 }"
+        sysctl -n vm.loadavg | sed -E 's/^\{ ([0-9.]+) .*/\1/'
+        return 0
+    fi
+
+    if [[ "$os" == "Linux" ]]; then
+        awk '{print $1}' /proc/loadavg
+        return 0
+    fi
+
+    die "unsupported OS for load sampling: $os"
+}
+
+float_ge() {
+    # $1 >= $2
+    awk -v a="$1" -v b="$2" 'BEGIN { exit !(a >= b) }'
+}
+
+float_le() {
+    # $1 <= $2
+    awk -v a="$1" -v b="$2" 'BEGIN { exit !(a <= b) }'
+}
+
+deadline=$((SECONDS + MAX_WAIT_SEC))
+ok_streak=0
+samples=0
+
+while true; do
+    samples=$((samples + 1))
+
+    idle="$(get_cpu_idle_percent)"
+    load1="$(get_load1)"
+
+    if float_ge "$idle" "$IDLE_MIN" && float_le "$load1" "$LOAD1_MAX"; then
+        ok_streak=$((ok_streak + 1))
+    else
+        ok_streak=0
+    fi
+
+    if [[ "$VERBOSE" != "0" ]]; then
+        echo "[$script_name] idle=${idle}% (min ${IDLE_MIN}%) load1=${load1} (max ${LOAD1_MAX}) streak=${ok_streak}/${STABLE_SAMPLES}" >&2
+    fi
+
+    if (( ok_streak >= STABLE_SAMPLES )); then
+        exit 0
+    fi
+
+    if (( SECONDS >= deadline )); then
+        echo "[$script_name] system not quiet enough after ${MAX_WAIT_SEC}s." >&2
+        echo "[$script_name] last sample: idle=${idle}% (min ${IDLE_MIN}%), load1=${load1} (max ${LOAD1_MAX})." >&2
+        echo "[$script_name] top CPU processes:" >&2
+        ps -Ao %cpu,pid,command | sed 1d | sort -nr | head -n 10 >&2 || true
+        exit 1
+    fi
+
+    sleep "$SAMPLE_INTERVAL_SEC"
+done
+
diff --git a/src/binxml/assemble.rs b/src/binxml/assemble.rs
index 83e5fd05..ba9b8e42 100644
--- a/src/binxml/assemble.rs
+++ b/src/binxml/assemble.rs
@@ -211,30 +211,55 @@ fn expand_token_substitution<'a>(
     substitution_descriptor: &TemplateSubstitutionDescriptor,
     chunk: &'a EvtxChunk<'a>,
     stack: &mut Vec<BinXMLDeserializedTokens<'a>>,
+    remaining_uses: &mut [u32],
 ) -> Result<()> {
     if substitution_descriptor.ignore {
         return Ok(());
     }
+    // NOTE: BinXML substitution indices can be referenced multiple times within a template.
+    // We can only move the substitution value on its *last* use; otherwise we must clone.
+    let value = take_or_clone_substitution_value(
+        template,
+        substitution_descriptor.substitution_index,
+        remaining_uses,
+    );
 
-    let value = template
-        .substitution_array
-        .get_mut(substitution_descriptor.substitution_index as usize);
+    _expand_templates(value, chunk, stack)?;
 
-    if let Some(value) = value {
-        let value = mem::replace(
-            value,
+    Ok(())
+}
+
+fn take_or_clone_substitution_value<'a>(
+    template: &mut BinXmlTemplateRef<'a>,
+    substitution_index: u16,
+    remaining_uses: &mut [u32],
+) -> BinXMLDeserializedTokens<'a> {
+    let idx = substitution_index as usize;
+
+    if idx >= template.substitution_array.len() {
+        return BinXMLDeserializedTokens::Value(BinXmlValue::NullType);
+    }
+    debug_assert!(
+        idx < remaining_uses.len(),
+        "remaining_uses must be sized to substitution_array"
+    );
+
+    let remaining = remaining_uses[idx];
+    debug_assert!(
+        remaining > 0,
+        "remaining_uses for idx {idx} should be > 0 when expanding a substitution"
+    );
+
+    remaining_uses[idx] = remaining.saturating_sub(1);
+
+    if remaining == 1 {
+        mem::replace(
+            &mut template.substitution_array[idx],
             BinXMLDeserializedTokens::Value(BinXmlValue::NullType),
-        );
-        _expand_templates(value, chunk, stack)?;
+        )
     } else {
-        _expand_templates(
-            BinXMLDeserializedTokens::Value(BinXmlValue::NullType),
-            chunk,
-            stack,
-        )?;
+        template.substitution_array[idx].clone()
     }
-
-    Ok(())
 }
 
 fn expand_template<'a>(
@@ -246,11 +271,30 @@ fn expand_template<'a>(
         .template_table
         .get_template(template.template_def_offset)
     {
+        let mut remaining_uses = vec![0u32; template.substitution_array.len()];
+        for token in template_def.tokens.iter() {
+            if let BinXMLDeserializedTokens::Substitution(desc) = token {
+                if desc.ignore {
+                    continue;
+                }
+                let idx = desc.substitution_index as usize;
+                if idx < remaining_uses.len() {
+                    remaining_uses[idx] += 1;
+                }
+            }
+        }
+
         // We expect to find all the templates in the template cache.
         // Clone from cache since the cache owns the tokens.
         for token in template_def.tokens.iter() {
             if let BinXMLDeserializedTokens::Substitution(substitution_descriptor) = token {
-                expand_token_substitution(&mut template, substitution_descriptor, chunk, stack)?;
+                expand_token_substitution(
+                    &mut template,
+                    substitution_descriptor,
+                    chunk,
+                    stack,
+                    &mut remaining_uses,
+                )?;
             } else {
                 _expand_templates(token.clone(), chunk, stack)?;
             }
@@ -267,12 +311,32 @@ fn expand_template<'a>(
         let template_def = read_template_definition_cursor(
             &mut cursor,
             Some(chunk),
+            chunk.arena,
             chunk.settings.get_ansi_codec(),
         )?;
 
+        let mut remaining_uses = vec![0u32; template.substitution_array.len()];
+        for token in template_def.tokens.iter() {
+            if let BinXMLDeserializedTokens::Substitution(desc) = token {
+                if desc.ignore {
+                    continue;
+                }
+                let idx = desc.substitution_index as usize;
+                if idx < remaining_uses.len() {
+                    remaining_uses[idx] += 1;
+                }
+            }
+        }
+
         for token in template_def.tokens {
             if let BinXMLDeserializedTokens::Substitution(substitution_descriptor) = token {
-                expand_token_substitution(&mut template, &substitution_descriptor, chunk, stack)?;
+                expand_token_substitution(
+                    &mut template,
+                    &substitution_descriptor,
+                    chunk,
+                    stack,
+                    &mut remaining_uses,
+                )?;
             } else {
                 _expand_templates(token, chunk, stack)?;
             }
@@ -341,7 +405,19 @@ fn stream_expand_token<'a, T: BinXmlOutput>(
             }
         }
         BinXMLDeserializedTokens::Value(value) => {
-            if let Some(b) = current_element.as_mut() {
+            // Handle BinXmlType by expanding nested tokens inline
+            if let BinXmlValue::BinXmlType(nested_tokens) = value {
+                for nested in nested_tokens {
+                    stream_expand_token(
+                        nested,
+                        chunk,
+                        visitor,
+                        element_stack,
+                        current_element,
+                        current_pi,
+                    )?;
+                }
+            } else if let Some(b) = current_element.as_mut() {
                 b.attribute_value(Cow::Owned(value))?;
             } else {
                 visitor.visit_characters(Cow::Owned(value))?;
@@ -401,90 +477,145 @@ fn stream_expand_token<'a, T: BinXmlOutput>(
             }
         }
         BinXMLDeserializedTokens::StartOfStream | BinXMLDeserializedTokens::EndOfStream => {}
-        BinXMLDeserializedTokens::TemplateInstance(template) => {
-            if let Some(template_def) = chunk
-                .template_table
-                .get_template(template.template_def_offset)
-            {
-                for t in template_def.tokens.iter() {
-                    match t {
-                        BinXMLDeserializedTokens::Substitution(desc) => {
-                            if desc.ignore {
-                                continue;
-                            }
-                            if let Some(val) = template
-                                .substitution_array
-                                .get(desc.substitution_index as usize)
-                            {
-                                stream_expand_token(
-                                    val.clone(),
-                                    chunk,
-                                    visitor,
-                                    element_stack,
-                                    current_element,
-                                    current_pi,
-                                )?;
-                            } else {
-                                visitor.visit_characters(Cow::Owned(BinXmlValue::NullType))?;
-                            }
-                        }
-                        other => stream_expand_token(
-                            other.clone(),
-                            chunk,
-                            visitor,
-                            element_stack,
-                            current_element,
-                            current_pi,
-                        )?,
-                    }
+        BinXMLDeserializedTokens::TemplateInstance(mut template) => {
+            stream_expand_template(
+                &mut template,
+                chunk,
+                visitor,
+                element_stack,
+                current_element,
+                current_pi,
+            )?;
+        }
+        BinXMLDeserializedTokens::Substitution(_) => {
+            return Err(EvtxError::FailedToCreateRecordModel(
+                "Substitution token should not appear in input stream",
+            ));
+        }
+        BinXMLDeserializedTokens::CDATASection | BinXMLDeserializedTokens::CharRef => {
+            return Err(EvtxError::FailedToCreateRecordModel(
+                "Unimplemented CDATA/CharRef",
+            ));
+        }
+    }
+    Ok(())
+}
+
+/// Streaming expansion for borrowed tokens (e.g. template cache tokens).
+///
+/// This avoids cloning `BinXMLDeserializedTokens` / `BinXmlValue` on the hot path.
+fn stream_expand_token_ref<'a, T: BinXmlOutput>(
+    token: &'a BinXMLDeserializedTokens<'a>,
+    chunk: &'a EvtxChunk<'a>,
+    visitor: &mut T,
+    element_stack: &mut Vec<XmlElement<'a>>,
+    current_element: &mut Option<XmlElementBuilder<'a>>,
+    current_pi: &mut Option<XmlPIBuilder<'a>>,
+) -> Result<()> {
+    match token {
+        BinXMLDeserializedTokens::FragmentHeader(_) | BinXMLDeserializedTokens::AttributeList => {}
+        BinXMLDeserializedTokens::OpenStartElement(elem) => {
+            let mut builder = XmlElementBuilder::new();
+            builder.name(expand_string_ref(&elem.name, chunk)?);
+            *current_element = Some(builder);
+        }
+        BinXMLDeserializedTokens::Attribute(attr) => {
+            if let Some(b) = current_element.as_mut() {
+                b.attribute_name(expand_string_ref(&attr.name, chunk)?);
+            } else {
+                return Err(EvtxError::FailedToCreateRecordModel(
+                    "attribute - Bad parser state",
+                ));
+            }
+        }
+        BinXMLDeserializedTokens::Value(value) => {
+            // Handle BinXmlType by expanding nested tokens inline
+            if let BinXmlValue::BinXmlType(nested_tokens) = value {
+                for nested in nested_tokens.iter() {
+                    stream_expand_token_ref(
+                        nested,
+                        chunk,
+                        visitor,
+                        element_stack,
+                        current_element,
+                        current_pi,
+                    )?;
                 }
+            } else if let Some(b) = current_element.as_mut() {
+                b.attribute_value(Cow::Borrowed(value))?;
             } else {
-                let mut cursor =
-                    ByteCursor::with_pos(chunk.data, template.template_def_offset as usize)?;
-                let template_def = read_template_definition_cursor(
-                    &mut cursor,
-                    Some(chunk),
-                    chunk.settings.get_ansi_codec(),
-                )?;
-                // For templates not in cache, expand them first then visit
-                let expanded = expand_templates(template_def.tokens, chunk)?;
-                for t in expanded {
-                    match t {
-                        BinXMLDeserializedTokens::Substitution(desc) => {
-                            if desc.ignore {
-                                continue;
-                            }
-                            if let Some(val) = template
-                                .substitution_array
-                                .get(desc.substitution_index as usize)
-                            {
-                                stream_expand_token(
-                                    val.clone(),
-                                    chunk,
-                                    visitor,
-                                    element_stack,
-                                    current_element,
-                                    current_pi,
-                                )?;
-                            } else {
-                                visitor.visit_characters(Cow::Owned(BinXmlValue::NullType))?;
-                            }
-                        }
-                        other => stream_expand_token(
-                            other,
-                            chunk,
-                            visitor,
-                            element_stack,
-                            current_element,
-                            current_pi,
-                        )?,
-                    }
+                visitor.visit_characters(Cow::Borrowed(value))?;
+            }
+        }
+        BinXMLDeserializedTokens::CloseStartElement => {
+            let element = current_element
+                .take()
+                .ok_or(EvtxError::FailedToCreateRecordModel(
+                    "close start - Bad parser state",
+                ))?
+                .finish()?;
+            visitor.visit_open_start_element(&element)?;
+            element_stack.push(element);
+        }
+        BinXMLDeserializedTokens::CloseEmptyElement => {
+            let element = current_element
+                .take()
+                .ok_or(EvtxError::FailedToCreateRecordModel(
+                    "close empty - Bad parser state",
+                ))?
+                .finish()?;
+            visitor.visit_open_start_element(&element)?;
+            visitor.visit_close_element(&element)?;
+        }
+        BinXMLDeserializedTokens::CloseElement => {
+            let element = element_stack
+                .pop()
+                .ok_or(EvtxError::FailedToCreateRecordModel(
+                    "close element - Bad parser state",
+                ))?;
+            visitor.visit_close_element(&element)?;
+        }
+        BinXMLDeserializedTokens::EntityRef(entity) => {
+            match expand_string_ref(&entity.name, chunk)? {
+                Cow::Borrowed(s) => visitor.visit_entity_reference(s)?,
+                Cow::Owned(s) => {
+                    let tmp = s;
+                    visitor.visit_entity_reference(&tmp)?;
                 }
             }
         }
+        BinXMLDeserializedTokens::PITarget(name) => {
+            let mut b = XmlPIBuilder::new();
+            b.name(expand_string_ref(&name.name, chunk)?);
+            *current_pi = Some(b);
+        }
+        BinXMLDeserializedTokens::PIData(data) => {
+            let mut b = current_pi
+                .take()
+                .ok_or(EvtxError::FailedToCreateRecordModel(
+                    "PI Data without PI target - Bad parser state",
+                ))?;
+            b.data(Cow::Borrowed(data.as_str()));
+            if let XmlModel::PI(pi) = b.finish() {
+                visitor.visit_processing_instruction(&pi)?;
+            }
+        }
+        BinXMLDeserializedTokens::StartOfStream | BinXMLDeserializedTokens::EndOfStream => {}
+        BinXMLDeserializedTokens::TemplateInstance(template) => {
+            // Not expected inside template definitions, but handle defensively.
+            let mut owned = template.clone();
+            stream_expand_template(
+                &mut owned,
+                chunk,
+                visitor,
+                element_stack,
+                current_element,
+                current_pi,
+            )?;
+        }
         BinXMLDeserializedTokens::Substitution(_) => {
             return Err(EvtxError::FailedToCreateRecordModel(
-                "Call `expand_templates` before calling this function",
+                "Substitution token should not appear in input stream",
             ));
         }
         BinXMLDeserializedTokens::CDATASection | BinXMLDeserializedTokens::CharRef => {
@@ -496,17 +627,148 @@ fn stream_expand_token<'a, T: BinXmlOutput>(
     Ok(())
 }
 
+/// Expand a template instance inline during streaming.
+/// This expands substitution tokens inline.
+///
+/// We *move* substitution values on their last use and *clone* them for earlier uses when a
+/// template references the same substitution index multiple times (the BinXML format permits
+/// this).
+fn stream_expand_template<'a, T: BinXmlOutput>(
+    template: &mut BinXmlTemplateRef<'a>,
+    chunk: &'a EvtxChunk<'a>,
+    visitor: &mut T,
+    element_stack: &mut Vec<XmlElement<'a>>,
+    current_element: &mut Option<XmlElementBuilder<'a>>,
+    current_pi: &mut Option<XmlPIBuilder<'a>>,
+) -> Result<()> {
+    if let Some(template_def) = chunk
+        .template_table
+        .get_template(template.template_def_offset)
+    {
+        let mut remaining_uses = vec![0u32; template.substitution_array.len()];
+        for t in template_def.tokens.iter() {
+            if let BinXMLDeserializedTokens::Substitution(desc) = t {
+                if desc.ignore {
+                    continue;
+                }
+                let idx = desc.substitution_index as usize;
+                if idx < remaining_uses.len() {
+                    remaining_uses[idx] += 1;
+                }
+            }
+        }
+
+        for t in template_def.tokens.iter() {
+            match t {
+                BinXMLDeserializedTokens::Substitution(desc) => {
+                    if desc.ignore {
+                        continue;
+                    }
+                    // Move the substitution value only on its last use. If the template
+                    // references the same substitution index multiple times, earlier uses must
+                    // clone to preserve correctness.
+                    let token = take_or_clone_substitution_value(
+                        template,
+                        desc.substitution_index,
+                        &mut remaining_uses,
+                    );
+
+                    stream_expand_token(
+                        token,
+                        chunk,
+                        visitor,
+                        element_stack,
+                        current_element,
+                        current_pi,
+                    )?;
+                }
+                // Template definition tokens from cache are handled by reference (no cloning).
+                other => stream_expand_token_ref(
+                    other,
+                    chunk,
+                    visitor,
+                    element_stack,
+                    current_element,
+                    current_pi,
+                )?,
+            }
+        }
+    } else {
+        // Template not in cache - read directly from chunk
+        debug!(
+            "Template in offset {} was not found in cache (streaming)",
+            template.template_def_offset
+        );
+        let mut cursor = ByteCursor::with_pos(chunk.data, template.template_def_offset as usize)?;
+        let template_def = read_template_definition_cursor(
+            &mut cursor,
+            Some(chunk),
+            chunk.arena,
+            chunk.settings.get_ansi_codec(),
+        )?;
+        let mut remaining_uses = vec![0u32; template.substitution_array.len()];
+        for t in template_def.tokens.iter() {
+            if let BinXMLDeserializedTokens::Substitution(desc) = t {
+                if desc.ignore {
+                    continue;
+                }
+                let idx = desc.substitution_index as usize;
+                if idx < remaining_uses.len() {
+                    remaining_uses[idx] += 1;
+                }
+            }
+        }
+
+        // For templates read directly, we own the tokens, so iterate them
+        for t in template_def.tokens {
+            match t {
+                BinXMLDeserializedTokens::Substitution(desc) => {
+                    if desc.ignore {
+                        continue;
+                    }
+                    let token = take_or_clone_substitution_value(
+                        template,
+                        desc.substitution_index,
+                        &mut remaining_uses,
+                    );
+
+                    stream_expand_token(
+                        token,
+                        chunk,
+                        visitor,
+                        element_stack,
+                        current_element,
+                        current_pi,
+                    )?;
+                }
+                other => stream_expand_token(
+                    other,
+                    chunk,
+                    visitor,
+                    element_stack,
+                    current_element,
+                    current_pi,
+                )?,
+            }
+        }
+    }
+    Ok(())
+}
+
 pub fn parse_tokens_streaming<'a, T: BinXmlOutput>(
     tokens: Vec<BinXMLDeserializedTokens<'a>>,
     chunk: &'a EvtxChunk<'a>,
     visitor: &mut T,
 ) -> Result<()> {
-    let expanded = expand_templates(tokens, chunk)?;
+    // OPTIMIZATION: Process tokens directly without pre-expanding templates.
+    // Template expansion happens inline in stream_expand_token/stream_expand_template,
+    // which allows us to move substitution values (on their last use) instead of always cloning.
     visitor.visit_start_of_stream()?;
     let mut element_stack: Vec<XmlElement<'a>> = Vec::new();
     let mut current_element: Option<XmlElementBuilder<'a>> = None;
     let mut current_pi: Option<XmlPIBuilder<'a>> = None;
-    for token in expanded {
+
+    for token in tokens {
         stream_expand_token(
             token,
             chunk,
@@ -519,3 +781,51 @@ pub fn parse_tokens_streaming<'a, T: BinXmlOutput>(
     visitor.visit_end_of_stream()?;
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use bumpalo::Bump;
+    use bumpalo::collections::String as BumpString;
+
+    #[test]
+    fn repeated_template_substitution_index_preserves_value() {
+        let arena = Bump::new();
+        let s = BumpString::from_str_in("hello", &arena);
+
+        let mut template = BinXmlTemplateRef {
+            template_id: 0,
+            template_def_offset: 0,
+            template_guid: None,
+            substitution_array: vec![BinXMLDeserializedTokens::Value(BinXmlValue::StringType(s))],
+        };
+
+        // Simulate a template definition that references substitution index 0 twice.
+        let mut remaining_uses = vec![2u32];
+
+        let first = take_or_clone_substitution_value(&mut template, 0u16, &mut remaining_uses);
+        let second = take_or_clone_substitution_value(&mut template, 0u16, &mut remaining_uses);
+
+        assert_eq!(remaining_uses[0], 0);
+
+        match first {
+            BinXMLDeserializedTokens::Value(BinXmlValue::StringType(s)) => {
+                assert_eq!(s.as_str(), "hello")
+            }
+            other => panic!("expected StringType, got {other:?}"),
+        }
+
+        match second {
+            BinXMLDeserializedTokens::Value(BinXmlValue::StringType(s)) => {
+                assert_eq!(s.as_str(), "hello")
+            }
+            other => panic!("expected StringType, got {other:?}"),
+        }
+
+        // The last use moves the value out; leaving NullType behind is fine (no further uses).
+        assert!(matches!(
+            template.substitution_array[0],
+            BinXMLDeserializedTokens::Value(BinXmlValue::NullType)
+        ));
+    }
+}
diff --git a/src/binxml/deserializer.rs b/src/binxml/deserializer.rs
index c8e8ecfc..6c2a31a9 100644
--- a/src/binxml/deserializer.rs
+++ b/src/binxml/deserializer.rs
@@ -1,6 +1,7 @@
 use crate::err::{DeserializationError, DeserializationResult as Result};
 use crate::utils::ByteCursor;
 
+use bumpalo::Bump;
 use log::trace;
 
 use crate::binxml::name::BinXmlNameEncoding;
@@ -22,6 +23,7 @@ use std::io::Cursor;
 pub struct IterTokens<'a> {
     cursor: ByteCursor<'a>,
     chunk: Option<&'a EvtxChunk<'a>>,
+    arena: &'a Bump,
     data_size: Option<u32>,
     data_read_so_far: u32,
     eof: bool,
@@ -38,6 +40,7 @@ pub struct BinXmlDeserializer<'a> {
     data: &'a [u8],
     offset: u64,
     chunk: Option<&'a EvtxChunk<'a>>,
+    arena: &'a Bump,
     /// Whether element start headers include the dependency identifier (u16).
     has_dep_id: bool,
     ansi_codec: EncodingRef,
@@ -49,6 +52,7 @@ impl<'a> BinXmlDeserializer<'a> {
         data: &'a [u8],
         start_offset: u64,
         chunk: Option<&'a EvtxChunk<'a>>,
+        arena: &'a Bump,
         has_dep_id: bool,
         ansi_codec: EncodingRef,
     ) -> Self {
@@ -56,6 +60,7 @@ impl<'a> BinXmlDeserializer<'a> {
             data,
             offset: start_offset,
             chunk,
+            arena,
             has_dep_id,
             ansi_codec,
             name_encoding: BinXmlNameEncoding::Offset,
@@ -66,6 +71,7 @@ impl<'a> BinXmlDeserializer<'a> {
         data: &'a [u8],
         start_offset: u64,
         chunk: Option<&'a EvtxChunk<'a>>,
+        arena: &'a Bump,
         has_dep_id: bool,
         ansi_codec: EncodingRef,
         name_encoding: BinXmlNameEncoding,
@@ -74,6 +80,7 @@ impl<'a> BinXmlDeserializer<'a> {
             data,
             offset: start_offset,
             chunk,
+            arena,
             has_dep_id,
             ansi_codec,
             name_encoding,
@@ -84,13 +91,21 @@ impl<'a> BinXmlDeserializer<'a> {
     pub fn read_binxml_fragment(
         cursor: &mut Cursor<&'a [u8]>,
         chunk: Option<&'a EvtxChunk<'a>>,
+        arena: &'a Bump,
         data_size: Option<u32>,
         has_dep_id: bool,
         ansi_codec: EncodingRef,
     ) -> Result<Vec<BinXMLDeserializedTokens<'a>>> {
         let offset = cursor.position();
 
-        let de = BinXmlDeserializer::init(cursor.get_ref(), offset, chunk, has_dep_id, ansi_codec);
+        let de = BinXmlDeserializer::init(
+            cursor.get_ref(),
+            offset,
+            chunk,
+            arena,
+            has_dep_id,
+            ansi_codec,
+        );
 
         let mut tokens = vec![];
         let mut iterator = de.iter_tokens(data_size)?;
@@ -128,6 +143,7 @@ impl<'a> BinXmlDeserializer<'a> {
         Ok(IterTokens {
             cursor,
             chunk: self.chunk,
+            arena: self.arena,
             data_size,
             data_read_so_far: 0,
             eof: false,
@@ -176,7 +192,13 @@ impl<'a> IterTokens<'a> {
             BinXMLRawToken::CloseEmptyElement => Ok(BinXMLDeserializedTokens::CloseEmptyElement),
             BinXMLRawToken::CloseElement => Ok(BinXMLDeserializedTokens::CloseElement),
             BinXMLRawToken::Value => Ok(BinXMLDeserializedTokens::Value(
-                BinXmlValue::from_binxml_cursor(cursor, self.chunk, None, self.ansi_codec)?,
+                BinXmlValue::from_binxml_cursor(
+                    cursor,
+                    self.chunk,
+                    self.arena,
+                    None,
+                    self.ansi_codec,
+                )?,
             )),
             BinXMLRawToken::Attribute(_token_information) => {
                 Ok(BinXMLDeserializedTokens::Attribute(read_attribute_cursor(
@@ -202,7 +224,7 @@ impl<'a> IterTokens<'a> {
                 read_processing_instruction_data_cursor(cursor)?,
             )),
             BinXMLRawToken::TemplateInstance => Ok(BinXMLDeserializedTokens::TemplateInstance(
-                read_template_cursor(cursor, self.chunk, self.ansi_codec)?,
+                read_template_cursor(cursor, self.chunk, self.arena, self.ansi_codec)?,
             )),
             BinXMLRawToken::NormalSubstitution => Ok(BinXMLDeserializedTokens::Substitution(
                 read_substitution_descriptor_cursor(cursor, false)?,
@@ -285,6 +307,7 @@ mod tests {
     use crate::binxml::name::{BinXmlNameEncoding, read_wevt_inline_name_at};
     use crate::evtx_chunk::EvtxChunkData;
     use crate::{ParserSettings, ensure_env_logger_initialized};
+    use bumpalo::Bump;
     use std::sync::Arc;
 
     #[test]
@@ -376,10 +399,12 @@ mod tests {
         // CloseEmptyElement + EndOfStream
         buf.extend_from_slice(&[0x03, 0x00]);
 
+        let arena = Bump::new();
         let de = BinXmlDeserializer::init_with_name_encoding(
             &buf,
             0,
             None,
+            &arena,
             true,
             encoding::all::WINDOWS_1252,
             BinXmlNameEncoding::WevtInline,
@@ -430,10 +455,12 @@ mod tests {
 
         buf.extend_from_slice(&[0x03, 0x00]); // CloseEmptyElement + EndOfStream
 
+        let arena = Bump::new();
         let de = BinXmlDeserializer::init_with_name_encoding(
             &buf,
             0,
             None,
+            &arena,
             true,
             encoding::all::WINDOWS_1252,
             BinXmlNameEncoding::WevtInline,
diff --git a/src/binxml/tokens.rs b/src/binxml/tokens.rs
index 916474e0..764246e5 100644
--- a/src/binxml/tokens.rs
+++ b/src/binxml/tokens.rs
@@ -13,6 +13,7 @@ use crate::binxml::value_variant::{BinXmlValue, BinXmlValueType};
 use log::{error, trace, warn};
 
 use crate::evtx_chunk::EvtxChunk;
+use bumpalo::Bump;
 use encoding::EncodingRef;
 
 fn with_cursor<'a, T>(
@@ -29,6 +30,7 @@ fn with_cursor<'a, T>(
 pub(crate) fn read_template_cursor<'a>(
     cursor: &mut ByteCursor<'a>,
     chunk: Option<&'a EvtxChunk<'a>>,
+    arena: &'a Bump,
     ansi_codec: EncodingRef,
 ) -> Result<BinXmlTemplateRef<'a>> {
     trace!("TemplateInstance at {}", cursor.position());
@@ -85,6 +87,7 @@ pub(crate) fn read_template_cursor<'a>(
             &descriptor.value_type,
             cursor,
             chunk,
+            arena,
             Some(descriptor.size),
             ansi_codec,
         )?;
@@ -155,6 +158,7 @@ fn read_template_definition_header_cursor(
 pub(crate) fn read_template_definition_cursor<'a>(
     cursor: &mut ByteCursor<'a>,
     chunk: Option<&'a EvtxChunk<'a>>,
+    arena: &'a Bump,
     ansi_codec: EncodingRef,
 ) -> Result<BinXMLTemplateDefinition<'a>> {
     let header = read_template_definition_header_cursor(cursor)?;
@@ -166,7 +170,14 @@ pub(crate) fn read_template_definition_cursor<'a>(
     );
 
     let template = match with_cursor(cursor, |c| {
-        BinXmlDeserializer::read_binxml_fragment(c, chunk, Some(header.data_size), true, ansi_codec)
+        BinXmlDeserializer::read_binxml_fragment(
+            c,
+            chunk,
+            arena,
+            Some(header.data_size),
+            true,
+            ansi_codec,
+        )
     }) {
         Ok(tokens) => BinXMLTemplateDefinition { header, tokens },
         Err(e) => {
diff --git a/src/binxml/value_variant.rs b/src/binxml/value_variant.rs
index 0d6f714b..85775562 100644
--- a/src/binxml/value_variant.rs
+++ b/src/binxml/value_variant.rs
@@ -6,6 +6,9 @@ use crate::utils::ByteCursor;
 use crate::utils::invalid_data;
 use crate::utils::windows::{filetime_to_datetime, read_sid, read_systime, systime_from_bytes};
 
+use bumpalo::Bump;
+use bumpalo::collections::String as BumpString;
+use bumpalo::collections::Vec as BumpVec;
 use chrono::{DateTime, Utc};
 use encoding::EncodingRef;
 use log::{trace, warn};
@@ -19,12 +22,12 @@ use winstructs::security::Sid;
 
 static DATETIME_FORMAT: &str = "%Y-%m-%dT%H:%M:%S%.6fZ";
 
-#[derive(Debug, PartialOrd, PartialEq, Clone)]
+#[derive(Debug, PartialEq, Clone)]
 pub enum BinXmlValue<'a> {
     NullType,
-    // String may originate in substitution.
-    StringType(String),
-    AnsiStringType(Cow<'a, str>),
+    // Arena-allocated strings for O(1) mass deallocation.
+    StringType(BumpString<'a>),
+    AnsiStringType(BumpString<'a>),
     Int8Type(i8),
     UInt8Type(u8),
     Int16Type(i16),
@@ -42,33 +45,33 @@ pub enum BinXmlValue<'a> {
     FileTimeType(DateTime<Utc>),
     SysTimeType(DateTime<Utc>),
     SidType(Sid),
-    HexInt32Type(Cow<'a, str>),
-    HexInt64Type(Cow<'a, str>),
+    HexInt32Type(BumpString<'a>),
+    HexInt64Type(BumpString<'a>),
     EvtHandle,
     // Because of the recursive type, we instantiate this enum via a method of the Deserializer
-    BinXmlType(Vec<BinXMLDeserializedTokens<'a>>),
+    BinXmlType(BumpVec<'a, BinXMLDeserializedTokens<'a>>),
     EvtXml,
-    StringArrayType(Vec<String>),
+    StringArrayType(BumpVec<'a, BumpString<'a>>),
     AnsiStringArrayType,
-    Int8ArrayType(Vec<i8>),
-    UInt8ArrayType(Vec<u8>),
-    Int16ArrayType(Vec<i16>),
-    UInt16ArrayType(Vec<u16>),
-    Int32ArrayType(Vec<i32>),
-    UInt32ArrayType(Vec<u32>),
-    Int64ArrayType(Vec<i64>),
-    UInt64ArrayType(Vec<u64>),
-    Real32ArrayType(Vec<f32>),
-    Real64ArrayType(Vec<f64>),
-    BoolArrayType(Vec<bool>),
+    Int8ArrayType(BumpVec<'a, i8>),
+    UInt8ArrayType(BumpVec<'a, u8>),
+    Int16ArrayType(BumpVec<'a, i16>),
+    UInt16ArrayType(BumpVec<'a, u16>),
+    Int32ArrayType(BumpVec<'a, i32>),
+    UInt32ArrayType(BumpVec<'a, u32>),
+    Int64ArrayType(BumpVec<'a, i64>),
+    UInt64ArrayType(BumpVec<'a, u64>),
+    Real32ArrayType(BumpVec<'a, f32>),
+    Real64ArrayType(BumpVec<'a, f64>),
+    BoolArrayType(BumpVec<'a, bool>),
     BinaryArrayType,
-    GuidArrayType(Vec<Guid>),
+    GuidArrayType(BumpVec<'a, Guid>),
     SizeTArrayType,
-    FileTimeArrayType(Vec<DateTime<Utc>>),
-    SysTimeArrayType(Vec<DateTime<Utc>>),
-    SidArrayType(Vec<Sid>),
-    HexInt32ArrayType(Vec<Cow<'a, str>>),
-    HexInt64ArrayType(Vec<Cow<'a, str>>),
+    FileTimeArrayType(BumpVec<'a, DateTime<Utc>>),
+    SysTimeArrayType(BumpVec<'a, DateTime<Utc>>),
+    SidArrayType(BumpVec<'a, Sid>),
+    HexInt32ArrayType(BumpVec<'a, BumpString<'a>>),
+    HexInt64ArrayType(BumpVec<'a, BumpString<'a>>),
     EvtArrayHandle,
     BinXmlArrayType,
     EvtXmlArrayType,
@@ -182,9 +185,24 @@ impl BinXmlValueType {
 }
 
 impl<'a> BinXmlValue<'a> {
+    /// Allocate a string into the provided arena.
+    #[inline]
+    fn alloc_str(s: &str, arena: &'a Bump) -> BumpString<'a> {
+        BumpString::from_str_in(s, arena)
+    }
+
+    /// Move a heap `Vec` into a bump-allocated `Vec` (same element type).
+    #[inline]
+    fn vec_to_bump_vec<T>(v: Vec<T>, arena: &'a Bump) -> BumpVec<'a, T> {
+        let mut out = BumpVec::with_capacity_in(v.len(), arena);
+        out.extend(v);
+        out
+    }
+
     pub(crate) fn from_binxml_cursor(
         cursor: &mut ByteCursor<'a>,
         chunk: Option<&'a EvtxChunk<'a>>,
+        arena: &'a Bump,
         size: Option<u16>,
         ansi_codec: EncodingRef,
     ) -> Result<BinXmlValue<'a>> {
@@ -197,8 +215,14 @@ impl<'a> BinXmlValue<'a> {
             },
         )?;
 
-        let data =
-            Self::deserialize_value_type_cursor(&value_type, cursor, chunk, size, ansi_codec)?;
+        let data = Self::deserialize_value_type_cursor(
+            &value_type,
+            cursor,
+            chunk,
+            arena,
+            size,
+            ansi_codec,
+        )?;
 
         Ok(data)
     }
@@ -206,13 +230,14 @@ impl<'a> BinXmlValue<'a> {
     pub fn from_binxml_stream(
         cursor: &mut Cursor<&'a [u8]>,
         chunk: Option<&'a EvtxChunk<'a>>,
+        arena: &'a Bump,
         size: Option<u16>,
         ansi_codec: EncodingRef,
     ) -> Result<BinXmlValue<'a>> {
         let start = cursor.position() as usize;
         let buf = *cursor.get_ref();
         let mut c = ByteCursor::with_pos(buf, start)?;
-        let v = Self::from_binxml_cursor(&mut c, chunk, size, ansi_codec)?;
+        let v = Self::from_binxml_cursor(&mut c, chunk, arena, size, ansi_codec)?;
         cursor.set_position(c.position());
         Ok(v)
     }
@@ -221,6 +246,7 @@ impl<'a> BinXmlValue<'a> {
         value_type: &BinXmlValueType,
         cursor: &mut ByteCursor<'a>,
         chunk: Option<&'a EvtxChunk<'a>>,
+        arena: &'a Bump,
         size: Option<u16>,
         ansi_codec: EncodingRef,
     ) -> Result<BinXmlValue<'a>> {
@@ -243,13 +269,15 @@ impl<'a> BinXmlValue<'a> {
                 } else {
                     cursor.utf16_by_char_count_trimmed(sz_bytes / 2, "<string_value>")?
                 };
-                BinXmlValue::StringType(s.unwrap_or_else(|| "".to_owned()))
+                let s = s.unwrap_or_default();
+                BinXmlValue::StringType(Self::alloc_str(&s, arena))
             }
-            (BinXmlValueType::StringType, None) => BinXmlValue::StringType(
-                cursor
+            (BinXmlValueType::StringType, None) => {
+                let s = cursor
                     .len_prefixed_utf16_string(false, "<string_value>")?
-                    .unwrap_or_default(),
-            ),
+                    .unwrap_or_default();
+                BinXmlValue::StringType(Self::alloc_str(&s, arena))
+            }
 
             (BinXmlValueType::AnsiStringType, Some(sz)) => {
                 let sz_bytes = usize::from(sz);
@@ -262,7 +290,7 @@ impl<'a> BinXmlValue<'a> {
                         encoding_used: ansi_codec.name(),
                         inner_message: m.to_string(),
                     })?;
-                BinXmlValue::AnsiStringType(Cow::Owned(s))
+                BinXmlValue::AnsiStringType(Self::alloc_str(&s, arena))
             }
             // AnsiString are always sized according to docs
             (BinXmlValueType::AnsiStringType, None) => {
@@ -325,11 +353,15 @@ impl<'a> BinXmlValue<'a> {
 
             (BinXmlValueType::SizeTType, Some(4)) => {
                 let v = i32::from_le_bytes(cursor.array::<4>("sizet32")?);
-                BinXmlValue::HexInt32Type(Cow::Owned(format!("0x{:x}", v)))
+                let mut s = BumpString::new_in(arena);
+                write!(&mut s, "0x{:x}", v).expect("write to bump string");
+                BinXmlValue::HexInt32Type(s)
             }
             (BinXmlValueType::SizeTType, Some(8)) => {
                 let v = i64::from_le_bytes(cursor.array::<8>("sizet64")?);
-                BinXmlValue::HexInt64Type(Cow::Owned(format!("0x{:x}", v)))
+                let mut s = BumpString::new_in(arena);
+                write!(&mut s, "0x{:x}", v).expect("write to bump string");
+                BinXmlValue::HexInt64Type(s)
             }
             (BinXmlValueType::SizeTType, _) => {
                 return Err(DeserializationError::UnimplementedValueVariant {
@@ -347,11 +379,15 @@ impl<'a> BinXmlValue<'a> {
 
             (BinXmlValueType::HexInt32Type, _) => {
                 let v = i32::from_le_bytes(cursor.array::<4>("hex32")?);
-                BinXmlValue::HexInt32Type(Cow::Owned(format!("0x{:x}", v)))
+                let mut s = BumpString::new_in(arena);
+                write!(&mut s, "0x{:x}", v).expect("write to bump string");
+                BinXmlValue::HexInt32Type(s)
             }
             (BinXmlValueType::HexInt64Type, _) => {
                 let v = i64::from_le_bytes(cursor.array::<8>("hex64")?);
-                BinXmlValue::HexInt64Type(Cow::Owned(format!("0x{:x}", v)))
+                let mut s = BumpString::new_in(arena);
+                write!(&mut s, "0x{:x}", v).expect("write to bump string");
+                BinXmlValue::HexInt64Type(s)
             }
 
             (BinXmlValueType::BinXmlType, size) => {
@@ -360,10 +396,14 @@ impl<'a> BinXmlValue<'a> {
                 let mut c = Cursor::new(cursor.buf());
                 c.set_position(start_pos);
                 let tokens = BinXmlDeserializer::read_binxml_fragment(
-                    &mut c, chunk, data_size, false, ansi_codec,
+                    &mut c, chunk, arena, data_size, false, ansi_codec,
                 )?;
                 cursor.set_pos_u64(c.position(), "advance after BinXmlType")?;
-                BinXmlValue::BinXmlType(tokens)
+                let mut out = BumpVec::with_capacity_in(tokens.len(), arena);
+                for t in tokens {
+                    out.push(t);
+                }
+                BinXmlValue::BinXmlType(out)
             }
 
             (BinXmlValueType::BinaryType, Some(sz)) => {
@@ -376,108 +416,159 @@ impl<'a> BinXmlValue<'a> {
                 let size_usize = usize::from(sz);
                 let start = cursor.pos();
                 let end = start.saturating_add(size_usize);
-                let mut out: Vec<String> = Vec::new();
+                let mut out: BumpVec<'a, BumpString<'a>> = BumpVec::new_in(arena);
                 while cursor.pos() < end {
-                    out.push(cursor.null_terminated_utf16_string("string_array")?);
+                    let s = cursor.null_terminated_utf16_string("string_array")?;
+                    out.push(Self::alloc_str(&s, arena));
                 }
                 BinXmlValue::StringArrayType(out)
             }
             (BinXmlValueType::Int8ArrayType, Some(sz)) => {
                 let bytes = cursor.take_bytes(usize::from(sz), "i8_array")?;
-                BinXmlValue::Int8ArrayType(bytes.iter().map(|&b| b as i8).collect())
-            }
-            (BinXmlValueType::UInt8ArrayType, Some(sz)) => BinXmlValue::UInt8ArrayType(
-                cursor.take_bytes(usize::from(sz), "u8_array")?.to_vec(),
-            ),
-            (BinXmlValueType::Int16ArrayType, Some(sz)) => BinXmlValue::Int16ArrayType(
-                cursor.read_sized_vec_aligned::<2, _>(sz, "i16_array", |_off, b| {
-                    Ok(i16::from_le_bytes(*b))
-                })?,
-            ),
-            (BinXmlValueType::UInt16ArrayType, Some(sz)) => BinXmlValue::UInt16ArrayType(
-                cursor.read_sized_vec_aligned::<2, _>(sz, "u16_array", |_off, b| {
-                    Ok(u16::from_le_bytes(*b))
-                })?,
-            ),
-            (BinXmlValueType::Int32ArrayType, Some(sz)) => BinXmlValue::Int32ArrayType(
-                cursor.read_sized_vec_aligned::<4, _>(sz, "i32_array", |_off, b| {
-                    Ok(i32::from_le_bytes(*b))
-                })?,
-            ),
-            (BinXmlValueType::UInt32ArrayType, Some(sz)) => BinXmlValue::UInt32ArrayType(
-                cursor.read_sized_vec_aligned::<4, _>(sz, "u32_array", |_off, b| {
-                    Ok(u32::from_le_bytes(*b))
-                })?,
-            ),
-            (BinXmlValueType::Int64ArrayType, Some(sz)) => BinXmlValue::Int64ArrayType(
-                cursor.read_sized_vec_aligned::<8, _>(sz, "i64_array", |_off, b| {
-                    Ok(i64::from_le_bytes(*b))
-                })?,
-            ),
-            (BinXmlValueType::UInt64ArrayType, Some(sz)) => BinXmlValue::UInt64ArrayType(
-                cursor.read_sized_vec_aligned::<8, _>(sz, "u64_array", |_off, b| {
-                    Ok(u64::from_le_bytes(*b))
-                })?,
-            ),
-            (BinXmlValueType::Real32ArrayType, Some(sz)) => BinXmlValue::Real32ArrayType(
-                cursor.read_sized_vec_aligned::<4, _>(sz, "f32_array", |_off, b| {
-                    Ok(f32::from_le_bytes(*b))
-                })?,
-            ),
-            (BinXmlValueType::Real64ArrayType, Some(sz)) => BinXmlValue::Real64ArrayType(
-                cursor.read_sized_vec_aligned::<8, _>(sz, "f64_array", |_off, b| {
-                    Ok(f64::from_le_bytes(*b))
-                })?,
-            ),
-            (BinXmlValueType::BoolArrayType, Some(sz)) => BinXmlValue::BoolArrayType(
-                cursor.read_sized_vec_aligned::<4, _>(sz, "bool_array", |off, b| {
-                    let raw = i32::from_le_bytes(*b);
-                    Ok(match raw {
-                        0 => false,
-                        1 => true,
-                        other => {
-                            warn!(
-                                "invalid boolean value {} at offset {}; treating as {}",
-                                other,
-                                off,
+                let mut out = BumpVec::with_capacity_in(bytes.len(), arena);
+                for &b in bytes {
+                    out.push(b as i8);
+                }
+                BinXmlValue::Int8ArrayType(out)
+            }
+            (BinXmlValueType::UInt8ArrayType, Some(sz)) => {
+                let bytes = cursor.take_bytes(usize::from(sz), "u8_array")?;
+                let mut out = BumpVec::with_capacity_in(bytes.len(), arena);
+                out.extend(bytes.iter().copied());
+                BinXmlValue::UInt8ArrayType(out)
+            }
+            (BinXmlValueType::Int16ArrayType, Some(sz)) => {
+                BinXmlValue::Int16ArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<2, _>(sz, "i16_array", |_off, b| {
+                        Ok(i16::from_le_bytes(*b))
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::UInt16ArrayType, Some(sz)) => {
+                BinXmlValue::UInt16ArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<2, _>(sz, "u16_array", |_off, b| {
+                        Ok(u16::from_le_bytes(*b))
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::Int32ArrayType, Some(sz)) => {
+                BinXmlValue::Int32ArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<4, _>(sz, "i32_array", |_off, b| {
+                        Ok(i32::from_le_bytes(*b))
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::UInt32ArrayType, Some(sz)) => {
+                BinXmlValue::UInt32ArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<4, _>(sz, "u32_array", |_off, b| {
+                        Ok(u32::from_le_bytes(*b))
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::Int64ArrayType, Some(sz)) => {
+                BinXmlValue::Int64ArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<8, _>(sz, "i64_array", |_off, b| {
+                        Ok(i64::from_le_bytes(*b))
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::UInt64ArrayType, Some(sz)) => {
+                BinXmlValue::UInt64ArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<8, _>(sz, "u64_array", |_off, b| {
+                        Ok(u64::from_le_bytes(*b))
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::Real32ArrayType, Some(sz)) => {
+                BinXmlValue::Real32ArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<4, _>(sz, "f32_array", |_off, b| {
+                        Ok(f32::from_le_bytes(*b))
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::Real64ArrayType, Some(sz)) => {
+                BinXmlValue::Real64ArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<8, _>(sz, "f64_array", |_off, b| {
+                        Ok(f64::from_le_bytes(*b))
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::BoolArrayType, Some(sz)) => {
+                BinXmlValue::BoolArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<4, _>(sz, "bool_array", |off, b| {
+                        let raw = i32::from_le_bytes(*b);
+                        Ok(match raw {
+                            0 => false,
+                            1 => true,
+                            other => {
+                                warn!(
+                                    "invalid boolean value {} at offset {}; treating as {}",
+                                    other,
+                                    off,
+                                    other != 0
+                                );
                                 other != 0
-                            );
-                            other != 0
-                        }
-                    })
-                })?,
-            ),
-            (BinXmlValueType::GuidArrayType, Some(sz)) => BinXmlValue::GuidArrayType(
-                cursor.read_sized_vec_aligned::<16, _>(sz, "guid_array", |off, b| {
-                    Guid::from_buffer(b).map_err(|_| invalid_data("guid", off))
-                })?,
-            ),
-            (BinXmlValueType::FileTimeArrayType, Some(sz)) => BinXmlValue::FileTimeArrayType(
-                cursor.read_sized_vec_aligned::<8, _>(sz, "filetime_array", |_off, b| {
-                    Ok(filetime_to_datetime(u64::from_le_bytes(*b)))
-                })?,
-            ),
-            (BinXmlValueType::SysTimeArrayType, Some(sz)) => BinXmlValue::SysTimeArrayType(
-                cursor.read_sized_vec_aligned::<16, _>(sz, "systime_array", |_off, b| {
-                    systime_from_bytes(b)
-                })?,
-            ),
-            (BinXmlValueType::SidArrayType, Some(sz)) => {
-                // SID size is variable; we can only preallocate with a heuristic.
-                BinXmlValue::SidArrayType(cursor.read_sized_vec(sz, 8, |c| read_sid(c))?)
-            }
-            (BinXmlValueType::HexInt32ArrayType, Some(sz)) => BinXmlValue::HexInt32ArrayType(
-                cursor.read_sized_vec_aligned::<4, _>(sz, "hex32_array", |_off, b| {
-                    let v = i32::from_le_bytes(*b);
-                    Ok(Cow::Owned(format!("0x{:x}", v)))
-                })?,
-            ),
-            (BinXmlValueType::HexInt64ArrayType, Some(sz)) => BinXmlValue::HexInt64ArrayType(
-                cursor.read_sized_vec_aligned::<8, _>(sz, "hex64_array", |_off, b| {
-                    let v = i64::from_le_bytes(*b);
-                    Ok(Cow::Owned(format!("0x{:x}", v)))
-                })?,
+                            }
+                        })
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::GuidArrayType, Some(sz)) => {
+                BinXmlValue::GuidArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<16, _>(sz, "guid_array", |off, b| {
+                        Guid::from_buffer(b).map_err(|_| invalid_data("guid", off))
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::FileTimeArrayType, Some(sz)) => {
+                BinXmlValue::FileTimeArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<8, _>(sz, "filetime_array", |_off, b| {
+                        Ok(filetime_to_datetime(u64::from_le_bytes(*b)))
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::SysTimeArrayType, Some(sz)) => {
+                BinXmlValue::SysTimeArrayType(Self::vec_to_bump_vec(
+                    cursor.read_sized_vec_aligned::<16, _>(sz, "systime_array", |_off, b| {
+                        systime_from_bytes(b)
+                    })?,
+                    arena,
+                ))
+            }
+            (BinXmlValueType::SidArrayType, Some(sz)) => BinXmlValue::SidArrayType(
+                Self::vec_to_bump_vec(cursor.read_sized_vec(sz, 8, |c| read_sid(c))?, arena),
             ),
+            (BinXmlValueType::HexInt32ArrayType, Some(sz)) => {
+                let hex_strings =
+                    cursor.read_sized_vec_aligned::<4, _>(sz, "hex32_array", |_off, b| {
+                        let v = i32::from_le_bytes(*b);
+                        let mut s = BumpString::new_in(arena);
+                        write!(&mut s, "0x{:x}", v).expect("write to bump string");
+                        Ok(s)
+                    })?;
+                BinXmlValue::HexInt32ArrayType(Self::vec_to_bump_vec(hex_strings, arena))
+            }
+            (BinXmlValueType::HexInt64ArrayType, Some(sz)) => {
+                let hex_strings =
+                    cursor.read_sized_vec_aligned::<8, _>(sz, "hex64_array", |_off, b| {
+                        let v = i64::from_le_bytes(*b);
+                        let mut s = BumpString::new_in(arena);
+                        write!(&mut s, "0x{:x}", v).expect("write to bump string");
+                        Ok(s)
+                    })?;
+                BinXmlValue::HexInt64ArrayType(Self::vec_to_bump_vec(hex_strings, arena))
+            }
 
             _ => {
                 return Err(DeserializationError::UnimplementedValueVariant {
@@ -495,21 +586,23 @@ impl<'a> BinXmlValue<'a> {
         value_type: &BinXmlValueType,
         cursor: &mut Cursor<&'a [u8]>,
         chunk: Option<&'a EvtxChunk<'a>>,
+        arena: &'a Bump,
         size: Option<u16>,
         ansi_codec: EncodingRef,
     ) -> Result<BinXmlValue<'a>> {
         let start = cursor.position() as usize;
         let buf = *cursor.get_ref();
         let mut c = ByteCursor::with_pos(buf, start)?;
-        let v = Self::deserialize_value_type_cursor(value_type, &mut c, chunk, size, ansi_codec)?;
+        let v = Self::deserialize_value_type_cursor(
+            value_type, &mut c, chunk, arena, size, ansi_codec,
+        )?;
         cursor.set_position(c.position());
         Ok(v)
     }
 }
 
-fn to_delimited_list<N: ToString>(ns: impl AsRef<Vec<N>>) -> String {
-    ns.as_ref()
-        .iter()
+fn to_delimited_list<N: ToString>(ns: &[N]) -> String {
+    ns.iter()
         .map(ToString::to_string)
         .collect::<Vec<String>>()
         .join(",")
@@ -519,8 +612,8 @@ impl<'c> From<BinXmlValue<'c>> for serde_json::Value {
     fn from(value: BinXmlValue<'c>) -> Self {
         match value {
             BinXmlValue::NullType => Value::Null,
-            BinXmlValue::StringType(s) => json!(s),
-            BinXmlValue::AnsiStringType(s) => json!(s.into_owned()),
+            BinXmlValue::StringType(s) => json!(s.as_str()),
+            BinXmlValue::AnsiStringType(s) => json!(s.as_str()),
             BinXmlValue::Int8Type(num) => json!(num),
             BinXmlValue::UInt8Type(num) => json!(num),
             BinXmlValue::Int16Type(num) => json!(num),
@@ -547,30 +640,70 @@ impl<'c> From<BinXmlValue<'c>> for serde_json::Value {
             BinXmlValue::FileTimeType(tm) => json!(tm.format(DATETIME_FORMAT).to_string()),
             BinXmlValue::SysTimeType(tm) => json!(tm.format(DATETIME_FORMAT).to_string()),
             BinXmlValue::SidType(sid) => json!(sid.to_string()),
-            BinXmlValue::HexInt32Type(hex_string) => json!(hex_string),
-            BinXmlValue::HexInt64Type(hex_string) => json!(hex_string),
-            BinXmlValue::StringArrayType(s) => json!(s),
-            BinXmlValue::Int8ArrayType(numbers) => json!(numbers),
-            BinXmlValue::UInt8ArrayType(numbers) => json!(numbers),
-            BinXmlValue::Int16ArrayType(numbers) => json!(numbers),
-            BinXmlValue::UInt16ArrayType(numbers) => json!(numbers),
-            BinXmlValue::Int32ArrayType(numbers) => json!(numbers),
-            BinXmlValue::UInt32ArrayType(numbers) => json!(numbers),
-            BinXmlValue::Int64ArrayType(numbers) => json!(numbers),
-            BinXmlValue::UInt64ArrayType(numbers) => json!(numbers),
-            BinXmlValue::Real32ArrayType(numbers) => json!(numbers),
-            BinXmlValue::Real64ArrayType(numbers) => json!(numbers),
-            BinXmlValue::BoolArrayType(bools) => json!(bools),
+            BinXmlValue::HexInt32Type(hex_string) => json!(hex_string.as_str()),
+            BinXmlValue::HexInt64Type(hex_string) => json!(hex_string.as_str()),
+            BinXmlValue::StringArrayType(s) => {
+                json!(s.iter().map(|bs| bs.as_str()).collect::<Vec<_>>())
+            }
+            BinXmlValue::Int8ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::UInt8ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::Int16ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::UInt16ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::Int32ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::UInt32ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::Int64ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::UInt64ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::Real32ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::Real64ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::BoolArrayType(bools) => json!(bools.iter().copied().collect::<Vec<_>>()),
             BinXmlValue::GuidArrayType(guids) => {
                 json!(guids.iter().map(Guid::to_string).collect::<Vec<String>>())
             }
-            BinXmlValue::FileTimeArrayType(filetimes) => json!(filetimes),
-            BinXmlValue::SysTimeArrayType(systimes) => json!(systimes),
+            BinXmlValue::FileTimeArrayType(filetimes) => {
+                json!(
+                    filetimes
+                        .iter()
+                        .map(|tm| tm.format(DATETIME_FORMAT).to_string())
+                        .collect::<Vec<_>>()
+                )
+            }
+            BinXmlValue::SysTimeArrayType(systimes) => {
+                json!(
+                    systimes
+                        .iter()
+                        .map(|tm| tm.format(DATETIME_FORMAT).to_string())
+                        .collect::<Vec<_>>()
+                )
+            }
             BinXmlValue::SidArrayType(sids) => {
                 json!(sids.iter().map(Sid::to_string).collect::<Vec<String>>())
             }
-            BinXmlValue::HexInt32ArrayType(hex_strings) => json!(hex_strings),
-            BinXmlValue::HexInt64ArrayType(hex_strings) => json!(hex_strings),
+            BinXmlValue::HexInt32ArrayType(hex_strings) => {
+                json!(hex_strings.iter().map(|bs| bs.as_str()).collect::<Vec<_>>())
+            }
+            BinXmlValue::HexInt64ArrayType(hex_strings) => {
+                json!(hex_strings.iter().map(|bs| bs.as_str()).collect::<Vec<_>>())
+            }
             BinXmlValue::EvtHandle => {
                 panic!("Unsupported conversion, call `expand_templates` first")
             }
@@ -587,8 +720,8 @@ impl<'c> From<&'c BinXmlValue<'c>> for serde_json::Value {
     fn from(value: &'c BinXmlValue) -> Self {
         match value {
             BinXmlValue::NullType => Value::Null,
-            BinXmlValue::StringType(s) => json!(s),
-            BinXmlValue::AnsiStringType(s) => json!(s.as_ref()),
+            BinXmlValue::StringType(s) => json!(s.as_str()),
+            BinXmlValue::AnsiStringType(s) => json!(s.as_str()),
             BinXmlValue::Int8Type(num) => json!(num),
             BinXmlValue::UInt8Type(num) => json!(num),
             BinXmlValue::Int16Type(num) => json!(num),
@@ -615,30 +748,70 @@ impl<'c> From<&'c BinXmlValue<'c>> for serde_json::Value {
             BinXmlValue::FileTimeType(tm) => json!(tm.format(DATETIME_FORMAT).to_string()),
             BinXmlValue::SysTimeType(tm) => json!(tm.format(DATETIME_FORMAT).to_string()),
             BinXmlValue::SidType(sid) => json!(sid.to_string()),
-            BinXmlValue::HexInt32Type(hex_string) => json!(hex_string),
-            BinXmlValue::HexInt64Type(hex_string) => json!(hex_string),
-            BinXmlValue::StringArrayType(s) => json!(s),
-            BinXmlValue::Int8ArrayType(numbers) => json!(numbers),
-            BinXmlValue::UInt8ArrayType(numbers) => json!(numbers),
-            BinXmlValue::Int16ArrayType(numbers) => json!(numbers),
-            BinXmlValue::UInt16ArrayType(numbers) => json!(numbers),
-            BinXmlValue::Int32ArrayType(numbers) => json!(numbers),
-            BinXmlValue::UInt32ArrayType(numbers) => json!(numbers),
-            BinXmlValue::Int64ArrayType(numbers) => json!(numbers),
-            BinXmlValue::UInt64ArrayType(numbers) => json!(numbers),
-            BinXmlValue::Real32ArrayType(numbers) => json!(numbers),
-            BinXmlValue::Real64ArrayType(numbers) => json!(numbers),
-            BinXmlValue::BoolArrayType(bools) => json!(bools),
+            BinXmlValue::HexInt32Type(hex_string) => json!(hex_string.as_str()),
+            BinXmlValue::HexInt64Type(hex_string) => json!(hex_string.as_str()),
+            BinXmlValue::StringArrayType(s) => {
+                json!(s.iter().map(|bs| bs.as_str()).collect::<Vec<_>>())
+            }
+            BinXmlValue::Int8ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::UInt8ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::Int16ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::UInt16ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::Int32ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::UInt32ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::Int64ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::UInt64ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::Real32ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::Real64ArrayType(numbers) => {
+                json!(numbers.iter().copied().collect::<Vec<_>>())
+            }
+            BinXmlValue::BoolArrayType(bools) => json!(bools.iter().copied().collect::<Vec<_>>()),
             BinXmlValue::GuidArrayType(guids) => {
                 json!(guids.iter().map(Guid::to_string).collect::<Vec<String>>())
             }
-            BinXmlValue::FileTimeArrayType(filetimes) => json!(filetimes),
-            BinXmlValue::SysTimeArrayType(systimes) => json!(systimes),
+            BinXmlValue::FileTimeArrayType(filetimes) => {
+                json!(
+                    filetimes
+                        .iter()
+                        .map(|tm| tm.format(DATETIME_FORMAT).to_string())
+                        .collect::<Vec<_>>()
+                )
+            }
+            BinXmlValue::SysTimeArrayType(systimes) => {
+                json!(
+                    systimes
+                        .iter()
+                        .map(|tm| tm.format(DATETIME_FORMAT).to_string())
+                        .collect::<Vec<_>>()
+                )
+            }
             BinXmlValue::SidArrayType(sids) => {
                 json!(sids.iter().map(Sid::to_string).collect::<Vec<String>>())
             }
-            BinXmlValue::HexInt32ArrayType(hex_strings) => json!(hex_strings),
-            BinXmlValue::HexInt64ArrayType(hex_strings) => json!(hex_strings),
+            BinXmlValue::HexInt32ArrayType(hex_strings) => {
+                json!(hex_strings.iter().map(|bs| bs.as_str()).collect::<Vec<_>>())
+            }
+            BinXmlValue::HexInt64ArrayType(hex_strings) => {
+                json!(hex_strings.iter().map(|bs| bs.as_str()).collect::<Vec<_>>())
+            }
             BinXmlValue::EvtHandle => {
                 panic!("Unsupported conversion, call `expand_templates` first")
             }
@@ -655,8 +828,8 @@ impl BinXmlValue<'_> {
     pub fn as_cow_str(&self) -> Cow<'_, str> {
         match self {
             BinXmlValue::NullType => Cow::Borrowed(""),
-            BinXmlValue::StringType(s) => Cow::Borrowed(s.as_ref()),
-            BinXmlValue::AnsiStringType(s) => Cow::Borrowed(s.as_ref()),
+            BinXmlValue::StringType(s) => Cow::Borrowed(s.as_str()),
+            BinXmlValue::AnsiStringType(s) => Cow::Borrowed(s.as_str()),
             BinXmlValue::Int8Type(num) => Cow::Owned(num.to_string()),
             BinXmlValue::UInt8Type(num) => Cow::Owned(num.to_string()),
             BinXmlValue::Int16Type(num) => Cow::Owned(num.to_string()),
@@ -680,26 +853,42 @@ impl BinXmlValue<'_> {
             BinXmlValue::FileTimeType(tm) => Cow::Owned(tm.format(DATETIME_FORMAT).to_string()),
             BinXmlValue::SysTimeType(tm) => Cow::Owned(tm.format(DATETIME_FORMAT).to_string()),
             BinXmlValue::SidType(sid) => Cow::Owned(sid.to_string()),
-            BinXmlValue::HexInt32Type(hex_string) => hex_string.clone(),
-            BinXmlValue::HexInt64Type(hex_string) => hex_string.clone(),
-            BinXmlValue::StringArrayType(s) => Cow::Owned(s.join(",")),
-            BinXmlValue::Int8ArrayType(numbers) => Cow::Owned(to_delimited_list(numbers)),
-            BinXmlValue::UInt8ArrayType(numbers) => Cow::Owned(to_delimited_list(numbers)),
-            BinXmlValue::Int16ArrayType(numbers) => Cow::Owned(to_delimited_list(numbers)),
-            BinXmlValue::UInt16ArrayType(numbers) => Cow::Owned(to_delimited_list(numbers)),
-            BinXmlValue::Int32ArrayType(numbers) => Cow::Owned(to_delimited_list(numbers)),
-            BinXmlValue::UInt32ArrayType(numbers) => Cow::Owned(to_delimited_list(numbers)),
-            BinXmlValue::Int64ArrayType(numbers) => Cow::Owned(to_delimited_list(numbers)),
-            BinXmlValue::UInt64ArrayType(numbers) => Cow::Owned(to_delimited_list(numbers)),
-            BinXmlValue::Real32ArrayType(numbers) => Cow::Owned(to_delimited_list(numbers)),
-            BinXmlValue::Real64ArrayType(numbers) => Cow::Owned(to_delimited_list(numbers)),
-            BinXmlValue::BoolArrayType(bools) => Cow::Owned(to_delimited_list(bools)),
-            BinXmlValue::GuidArrayType(guids) => Cow::Owned(to_delimited_list(guids)),
-            BinXmlValue::FileTimeArrayType(filetimes) => Cow::Owned(to_delimited_list(filetimes)),
-            BinXmlValue::SysTimeArrayType(systimes) => Cow::Owned(to_delimited_list(systimes)),
-            BinXmlValue::SidArrayType(sids) => Cow::Owned(to_delimited_list(sids)),
-            BinXmlValue::HexInt32ArrayType(hex_strings) => Cow::Owned(hex_strings.join(",")),
-            BinXmlValue::HexInt64ArrayType(hex_strings) => Cow::Owned(hex_strings.join(",")),
+            BinXmlValue::HexInt32Type(hex_string) => Cow::Borrowed(hex_string.as_str()),
+            BinXmlValue::HexInt64Type(hex_string) => Cow::Borrowed(hex_string.as_str()),
+            BinXmlValue::StringArrayType(s) => {
+                Cow::Owned(s.iter().map(|bs| bs.as_str()).collect::<Vec<_>>().join(","))
+            }
+            BinXmlValue::Int8ArrayType(numbers) => Cow::Owned(to_delimited_list(&numbers[..])),
+            BinXmlValue::UInt8ArrayType(numbers) => Cow::Owned(to_delimited_list(&numbers[..])),
+            BinXmlValue::Int16ArrayType(numbers) => Cow::Owned(to_delimited_list(&numbers[..])),
+            BinXmlValue::UInt16ArrayType(numbers) => Cow::Owned(to_delimited_list(&numbers[..])),
+            BinXmlValue::Int32ArrayType(numbers) => Cow::Owned(to_delimited_list(&numbers[..])),
+            BinXmlValue::UInt32ArrayType(numbers) => Cow::Owned(to_delimited_list(&numbers[..])),
+            BinXmlValue::Int64ArrayType(numbers) => Cow::Owned(to_delimited_list(&numbers[..])),
+            BinXmlValue::UInt64ArrayType(numbers) => Cow::Owned(to_delimited_list(&numbers[..])),
+            BinXmlValue::Real32ArrayType(numbers) => Cow::Owned(to_delimited_list(&numbers[..])),
+            BinXmlValue::Real64ArrayType(numbers) => Cow::Owned(to_delimited_list(&numbers[..])),
+            BinXmlValue::BoolArrayType(bools) => Cow::Owned(to_delimited_list(&bools[..])),
+            BinXmlValue::GuidArrayType(guids) => Cow::Owned(to_delimited_list(&guids[..])),
+            BinXmlValue::FileTimeArrayType(filetimes) => {
+                Cow::Owned(to_delimited_list(&filetimes[..]))
+            }
+            BinXmlValue::SysTimeArrayType(systimes) => Cow::Owned(to_delimited_list(&systimes[..])),
+            BinXmlValue::SidArrayType(sids) => Cow::Owned(to_delimited_list(&sids[..])),
+            BinXmlValue::HexInt32ArrayType(hex_strings) => Cow::Owned(
+                hex_strings
+                    .iter()
+                    .map(|bs| bs.as_str())
+                    .collect::<Vec<_>>()
+                    .join(","),
+            ),
+            BinXmlValue::HexInt64ArrayType(hex_strings) => Cow::Owned(
+                hex_strings
+                    .iter()
+                    .map(|bs| bs.as_str())
+                    .collect::<Vec<_>>()
+                    .join(","),
+            ),
             BinXmlValue::EvtHandle => {
                 panic!("Unsupported conversion, call `expand_templates` first")
             }
diff --git a/src/evtx_chunk.rs b/src/evtx_chunk.rs
index 2121ebf6..f4359f46 100644
--- a/src/evtx_chunk.rs
+++ b/src/evtx_chunk.rs
@@ -5,6 +5,7 @@ use crate::err::{
 use crate::evtx_record::{EVTX_RECORD_HEADER_SIZE, EvtxRecord, EvtxRecordHeader};
 use crate::utils::bytes;
 
+use bumpalo::Bump;
 use log::{debug, info, trace};
 use std::io::Cursor;
 
@@ -56,6 +57,9 @@ pub struct EvtxChunkHeader {
 pub struct EvtxChunkData {
     pub header: EvtxChunkHeader,
     pub data: Vec<u8>,
+    /// Arena allocator for per-chunk allocations.
+    /// Provides O(1) allocation and O(1) mass deallocation when chunk is dropped.
+    pub arena: Bump,
 }
 
 impl EvtxChunkData {
@@ -64,7 +68,14 @@ impl EvtxChunkData {
     pub fn new(data: Vec<u8>, validate_checksum: bool) -> EvtxChunkResult<Self> {
         let header = EvtxChunkHeader::from_bytes(&data)?;
 
-        let chunk = EvtxChunkData { header, data };
+        // Arena with 64KB initial capacity - typical chunk processing needs
+        let arena = Bump::with_capacity(64 * 1024);
+
+        let chunk = EvtxChunkData {
+            header,
+            data,
+            arena,
+        };
         if validate_checksum && !chunk.validate_checksum() {
             // TODO: return checksum here.
             return Err(ChunkError::InvalidChunkChecksum {
@@ -78,7 +89,13 @@ impl EvtxChunkData {
 
     /// Require that the settings live at least as long as &self.
     pub fn parse(&mut self, settings: Arc<ParserSettings>) -> EvtxChunkResult<EvtxChunk<'_>> {
-        EvtxChunk::new(&self.data, &self.header, Arc::clone(&settings))
+        EvtxChunk::new(&self.data, &self.header, &self.arena, Arc::clone(&settings))
+    }
+
+    /// Reset the arena for reuse between chunk processing cycles.
+    /// This allows the same EvtxChunkData to be reused with fresh arena memory.
+    pub fn reset_arena(&mut self) {
+        self.arena.reset();
     }
 
     pub fn validate_data_checksum(&self) -> bool {
@@ -157,6 +174,9 @@ pub struct EvtxChunk<'chunk> {
     pub header: &'chunk EvtxChunkHeader,
     pub string_cache: StringCache,
     pub template_table: TemplateCache<'chunk>,
+    /// Arena allocator for temporary allocations during parsing.
+    /// Allocations are O(1) and freed atomically when chunk is dropped.
+    pub arena: &'chunk Bump,
 
     pub settings: Arc<ParserSettings>,
 }
@@ -166,6 +186,7 @@ impl<'chunk> EvtxChunk<'chunk> {
     pub fn new(
         data: &'chunk [u8],
         header: &'chunk EvtxChunkHeader,
+        arena: &'chunk Bump,
         settings: Arc<ParserSettings>,
     ) -> EvtxChunkResult<EvtxChunk<'chunk>> {
         let _cursor = Cursor::new(data);
@@ -175,16 +196,21 @@ impl<'chunk> EvtxChunk<'chunk> {
             .map_err(|e| ChunkError::FailedToBuildStringCache { source: e })?;
 
         info!("Initializing template cache");
-        let template_table =
-            TemplateCache::populate(data, &header.template_offsets, settings.get_ansi_codec())
-                .map_err(|e| ChunkError::FailedToBuildTemplateCache {
-                    message: e.to_string(),
-                    source: Box::new(e),
-                })?;
+        let template_table = TemplateCache::populate(
+            data,
+            &header.template_offsets,
+            arena,
+            settings.get_ansi_codec(),
+        )
+        .map_err(|e| ChunkError::FailedToBuildTemplateCache {
+            message: e.to_string(),
+            source: Box::new(e),
+        })?;
 
         Ok(EvtxChunk {
             header,
             data,
+            arena,
             string_cache,
             template_table,
             settings,
@@ -311,6 +337,7 @@ impl<'a> Iterator for IterChunkRecords<'a> {
             self.chunk.data,
             record_start + EVTX_RECORD_HEADER_SIZE as u64,
             Some(self.chunk),
+            self.chunk.arena,
             false,
             self.settings.get_ansi_codec(),
         );
diff --git a/src/evtx_parser.rs b/src/evtx_parser.rs
index d893f79e..efd9050b 100644
--- a/src/evtx_parser.rs
+++ b/src/evtx_parser.rs
@@ -724,7 +724,7 @@ mod tests {
         }
     }
 
-     #[test]
+    #[test]
     fn test_parse_event_with_zero_() {
         ensure_env_logger_initialized();
         let evtx_file = include_bytes!("../samples/new-user-security.evtx");
diff --git a/src/json_output.rs b/src/json_output.rs
index bf873807..98b5c5e8 100644
--- a/src/json_output.rs
+++ b/src/json_output.rs
@@ -341,8 +341,8 @@ impl BinXmlOutput for JsonOutput {
 
         // A small optimization in case we already have an owned string.
         fn value_to_json(value: Cow<BinXmlValue>) -> Value {
-            if let Cow::Owned(BinXmlValue::StringType(value)) = value {
-                json!(value)
+            if let Cow::Owned(BinXmlValue::StringType(ref s)) = value {
+                json!(s.as_str())
             } else {
                 value.into_owned().into()
             }
@@ -438,9 +438,25 @@ impl BinXmlOutput for JsonOutput {
         let xml_event = BytesText::from_escaped(&entity_ref);
         match xml_event.unescape() {
             Ok(escaped) => {
-                let as_string = escaped.to_string();
-
-                self.visit_characters(Cow::Owned(BinXmlValue::StringType(as_string)))?;
+                // Directly set string value without creating BinXmlValue (which would need arena)
+                let separate_json_attributes = self.separate_json_attributes;
+                let current_value = self.get_or_create_current_path();
+                let json_str = json!(escaped.as_ref());
+
+                match current_value {
+                    Value::Null => {
+                        *current_value = json_str;
+                    }
+                    Value::Object(object) => {
+                        if separate_json_attributes && object.is_empty() {
+                            *current_value = json_str;
+                        }
+                    }
+                    Value::String(s) => {
+                        s.push_str(escaped.as_ref());
+                    }
+                    _ => {}
+                }
                 Ok(())
             }
             Err(_) => Err(JsonStructureError {
@@ -476,6 +492,8 @@ mod tests {
     use crate::binxml::value_variant::BinXmlValue;
     use crate::model::xml::{XmlAttribute, XmlElement};
     use crate::{BinXmlOutput, JsonOutput, ParserSettings};
+    use bumpalo::Bump;
+    use bumpalo::collections::String as BumpString;
     use pretty_assertions::assert_eq;
     use quick_xml::Reader;
     use quick_xml::events::{BytesStart, Event};
@@ -492,7 +510,7 @@ mod tests {
         }
     }
 
-    fn event_to_element(event: BytesStart) -> XmlElement {
+    fn event_to_element<'a>(event: BytesStart, arena: &'a Bump) -> XmlElement<'a> {
         let mut attrs = vec![];
 
         for attr in event.attributes() {
@@ -500,7 +518,10 @@ mod tests {
             attrs.push(XmlAttribute {
                 name: Cow::Owned(BinXmlName::from_string(bytes_to_string(attr.key.as_ref()))),
                 // We have to compromise here and assume all values are strings.
-                value: Cow::Owned(BinXmlValue::StringType(bytes_to_string(&attr.value))),
+                value: Cow::Owned(BinXmlValue::StringType(BumpString::from_str_in(
+                    &bytes_to_string(&attr.value),
+                    arena,
+                ))),
             });
         }
 
@@ -514,6 +535,7 @@ mod tests {
 
     /// Converts an XML string to JSON, panics in xml is invalid.
     fn xml_to_json(xml: &str, settings: &ParserSettings) -> String {
+        let arena = Bump::new();
         let mut reader = Reader::from_str(xml);
         reader.config_mut().trim_text(true);
 
@@ -525,7 +547,7 @@ mod tests {
                 Ok(event) => match event {
                     Event::Start(start) => {
                         output
-                            .visit_open_start_element(&event_to_element(start))
+                            .visit_open_start_element(&event_to_element(start, &arena))
                             .expect("Open start element");
                     }
                     Event::End(_) => output
@@ -533,7 +555,7 @@ mod tests {
                         .expect("Close element"),
                     Event::Empty(empty) => {
                         output
-                            .visit_open_start_element(&event_to_element(empty))
+                            .visit_open_start_element(&event_to_element(empty, &arena))
                             .expect("Empty Open start element");
 
                         output
@@ -541,9 +563,9 @@ mod tests {
                             .expect("Empty Close");
                     }
                     Event::Text(text) => output
-                        .visit_characters(Cow::Owned(BinXmlValue::StringType(bytes_to_string(
-                            text.as_ref(),
-                        ))))
+                        .visit_characters(Cow::Owned(BinXmlValue::StringType(
+                            BumpString::from_str_in(&bytes_to_string(text.as_ref()), &arena),
+                        )))
                         .expect("Text element"),
                     Event::Comment(_) => {}
                     Event::CData(_) => unimplemented!(),
diff --git a/src/json_stream_output.rs b/src/json_stream_output.rs
index 538f743f..7b6875c8 100644
--- a/src/json_stream_output.rs
+++ b/src/json_stream_output.rs
@@ -5,6 +5,8 @@ use crate::xml_output::BinXmlOutput;
 use crate::binxml::name::BinXmlName;
 use crate::binxml::value_variant::BinXmlValue;
 use crate::model::xml::{BinXmlPI, XmlElement};
+use chrono::{Datelike, Timelike};
+use hashbrown::HashSet;
 use quick_xml::events::BytesText;
 use serde_json::Value as JsonValue;
 use std::borrow::Cow;
@@ -46,7 +48,7 @@ struct ObjectFrame {
     /// Whether we've already written any field in this object.
     first_field: bool,
     /// Keys already used in this object (for duplicate key handling).
-    used_keys: std::collections::HashSet<String>,
+    used_keys: HashSet<String>,
 }
 
 pub struct JsonStreamOutput<W: Write> {
@@ -129,6 +131,283 @@ impl<W: Write> JsonStreamOutput<W> {
             .map_err(SerializationError::from)
     }
 
+    /// Write a JSON string directly without escaping.
+    /// Only safe for NCName strings (XML element/attribute names) which don't contain
+    /// characters that need JSON escaping (no quotes, backslashes, control chars).
+    #[inline]
+    fn write_json_string_ncname(&mut self, s: &str) -> SerializationResult<()> {
+        self.write_bytes(b"\"")?;
+        self.write_bytes(s.as_bytes())?;
+        self.write_bytes(b"\"")
+    }
+
+    /// Write a JSON string with proper escaping for special characters.
+    /// Uses a fast path for strings that don't need escaping.
+    fn write_json_string_escaped(&mut self, s: &str) -> SerializationResult<()> {
+        // Fast path: check if escaping is needed
+        let needs_escape = s
+            .bytes()
+            .any(|b| matches!(b, b'"' | b'\\' | b'\n' | b'\r' | b'\t' | 0..=0x1F));
+
+        if !needs_escape {
+            return self.write_json_string_ncname(s);
+        }
+
+        // Slow path: escape special characters
+        self.write_bytes(b"\"")?;
+        for c in s.chars() {
+            match c {
+                '"' => self.write_bytes(b"\\\"")?,
+                '\\' => self.write_bytes(b"\\\\")?,
+                '\n' => self.write_bytes(b"\\n")?,
+                '\r' => self.write_bytes(b"\\r")?,
+                '\t' => self.write_bytes(b"\\t")?,
+                c if c.is_control() => {
+                    write!(self.writer_mut(), "\\u{:04x}", c as u32)
+                        .map_err(SerializationError::from)?;
+                }
+                c => {
+                    let mut buf = [0u8; 4];
+                    let encoded = c.encode_utf8(&mut buf);
+                    self.write_bytes(encoded.as_bytes())?;
+                }
+            }
+        }
+        self.write_bytes(b"\"")
+    }
+
+    /// Write a BinXmlValue directly to JSON output without creating intermediate JsonValue.
+    /// This is the zero-allocation path for value serialization.
+    fn write_binxml_value(&mut self, value: &BinXmlValue) -> SerializationResult<()> {
+        match value {
+            BinXmlValue::NullType => self.write_bytes(b"null"),
+            BinXmlValue::StringType(s) => self.write_json_string_escaped(s.as_str()),
+            BinXmlValue::AnsiStringType(s) => self.write_json_string_escaped(s.as_str()),
+            BinXmlValue::Int8Type(n) => {
+                let mut buf = itoa::Buffer::new();
+                self.write_bytes(buf.format(*n).as_bytes())
+            }
+            BinXmlValue::UInt8Type(n) => {
+                let mut buf = itoa::Buffer::new();
+                self.write_bytes(buf.format(*n).as_bytes())
+            }
+            BinXmlValue::Int16Type(n) => {
+                let mut buf = itoa::Buffer::new();
+                self.write_bytes(buf.format(*n).as_bytes())
+            }
+            BinXmlValue::UInt16Type(n) => {
+                let mut buf = itoa::Buffer::new();
+                self.write_bytes(buf.format(*n).as_bytes())
+            }
+            BinXmlValue::Int32Type(n) => {
+                let mut buf = itoa::Buffer::new();
+                self.write_bytes(buf.format(*n).as_bytes())
+            }
+            BinXmlValue::UInt32Type(n) => {
+                let mut buf = itoa::Buffer::new();
+                self.write_bytes(buf.format(*n).as_bytes())
+            }
+            BinXmlValue::Int64Type(n) => {
+                let mut buf = itoa::Buffer::new();
+                self.write_bytes(buf.format(*n).as_bytes())
+            }
+            BinXmlValue::UInt64Type(n) => {
+                let mut buf = itoa::Buffer::new();
+                self.write_bytes(buf.format(*n).as_bytes())
+            }
+            BinXmlValue::Real32Type(n) => {
+                let mut buf = ryu::Buffer::new();
+                self.write_bytes(buf.format(*n).as_bytes())
+            }
+            BinXmlValue::Real64Type(n) => {
+                let mut buf = ryu::Buffer::new();
+                self.write_bytes(buf.format(*n).as_bytes())
+            }
+            BinXmlValue::BoolType(b) => self.write_bytes(if *b { b"true" } else { b"false" }),
+            BinXmlValue::BinaryType(bytes) => {
+                self.write_bytes(b"\"")?;
+                for byte in *bytes {
+                    write!(self.writer_mut(), "{:02X}", byte).map_err(SerializationError::from)?;
+                }
+                self.write_bytes(b"\"")
+            }
+            BinXmlValue::GuidType(guid) => {
+                // Use Guid's Display impl, write as JSON string
+                write!(self.writer_mut(), "\"{}\"", guid).map_err(SerializationError::from)
+            }
+            BinXmlValue::SizeTType(n) => {
+                let mut buf = itoa::Buffer::new();
+                self.write_bytes(buf.format(*n).as_bytes())
+            }
+            BinXmlValue::FileTimeType(dt) | BinXmlValue::SysTimeType(dt) => {
+                // Fast ISO-8601 with microseconds (avoids strftime parser overhead):
+                // YYYY-MM-DDTHH:MM:SS.ffffffZ
+                write!(
+                    self.writer_mut(),
+                    "\"{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:06}Z\"",
+                    dt.year(),
+                    dt.month(),
+                    dt.day(),
+                    dt.hour(),
+                    dt.minute(),
+                    dt.second(),
+                    dt.timestamp_subsec_micros()
+                )
+                .map_err(SerializationError::from)
+            }
+            BinXmlValue::SidType(sid) => {
+                self.write_bytes(b"\"")?;
+                write!(self.writer_mut(), "{}", sid).map_err(SerializationError::from)?;
+                self.write_bytes(b"\"")
+            }
+            BinXmlValue::HexInt32Type(s) | BinXmlValue::HexInt64Type(s) => {
+                self.write_json_string_escaped(s.as_str())
+            }
+            BinXmlValue::EvtHandle | BinXmlValue::EvtXml => self.write_bytes(b"null"),
+            // Arrays
+            BinXmlValue::StringArrayType(arr) => {
+                self.write_bytes(b"[")?;
+                for (i, s) in arr.iter().enumerate() {
+                    if i > 0 {
+                        self.write_bytes(b",")?;
+                    }
+                    self.write_json_string_escaped(s.as_str())?;
+                }
+                self.write_bytes(b"]")
+            }
+            BinXmlValue::Int8ArrayType(arr) => self.write_int_array(arr.iter().map(|n| *n as i64)),
+            BinXmlValue::UInt8ArrayType(arr) => {
+                self.write_uint_array(arr.iter().map(|n| *n as u64))
+            }
+            BinXmlValue::Int16ArrayType(arr) => self.write_int_array(arr.iter().map(|n| *n as i64)),
+            BinXmlValue::UInt16ArrayType(arr) => {
+                self.write_uint_array(arr.iter().map(|n| *n as u64))
+            }
+            BinXmlValue::Int32ArrayType(arr) => self.write_int_array(arr.iter().map(|n| *n as i64)),
+            BinXmlValue::UInt32ArrayType(arr) => {
+                self.write_uint_array(arr.iter().map(|n| *n as u64))
+            }
+            BinXmlValue::Int64ArrayType(arr) => self.write_int_array(arr.iter().copied()),
+            BinXmlValue::UInt64ArrayType(arr) => self.write_uint_array(arr.iter().copied()),
+            BinXmlValue::Real32ArrayType(arr) => {
+                self.write_bytes(b"[")?;
+                for (i, n) in arr.iter().enumerate() {
+                    if i > 0 {
+                        self.write_bytes(b",")?;
+                    }
+                    let mut buf = ryu::Buffer::new();
+                    self.write_bytes(buf.format(*n).as_bytes())?;
+                }
+                self.write_bytes(b"]")
+            }
+            BinXmlValue::Real64ArrayType(arr) => {
+                self.write_bytes(b"[")?;
+                for (i, n) in arr.iter().enumerate() {
+                    if i > 0 {
+                        self.write_bytes(b",")?;
+                    }
+                    let mut buf = ryu::Buffer::new();
+                    self.write_bytes(buf.format(*n).as_bytes())?;
+                }
+                self.write_bytes(b"]")
+            }
+            BinXmlValue::BoolArrayType(arr) => {
+                self.write_bytes(b"[")?;
+                for (i, b) in arr.iter().enumerate() {
+                    if i > 0 {
+                        self.write_bytes(b",")?;
+                    }
+                    self.write_bytes(if *b { b"true" } else { b"false" })?;
+                }
+                self.write_bytes(b"]")
+            }
+            BinXmlValue::GuidArrayType(arr) => {
+                self.write_bytes(b"[")?;
+                for (i, guid) in arr.iter().enumerate() {
+                    if i > 0 {
+                        self.write_bytes(b",")?;
+                    }
+                    write!(self.writer_mut(), "\"{}\"", guid).map_err(SerializationError::from)?;
+                }
+                self.write_bytes(b"]")
+            }
+            BinXmlValue::FileTimeArrayType(arr) | BinXmlValue::SysTimeArrayType(arr) => {
+                self.write_bytes(b"[")?;
+                for (i, dt) in arr.iter().enumerate() {
+                    if i > 0 {
+                        self.write_bytes(b",")?;
+                    }
+                    write!(
+                        self.writer_mut(),
+                        "\"{}\"",
+                        dt.format("%Y-%m-%dT%H:%M:%S%.6fZ")
+                    )
+                    .map_err(SerializationError::from)?;
+                }
+                self.write_bytes(b"]")
+            }
+            BinXmlValue::SidArrayType(arr) => {
+                self.write_bytes(b"[")?;
+                for (i, sid) in arr.iter().enumerate() {
+                    if i > 0 {
+                        self.write_bytes(b",")?;
+                    }
+                    self.write_bytes(b"\"")?;
+                    write!(self.writer_mut(), "{}", sid).map_err(SerializationError::from)?;
+                    self.write_bytes(b"\"")?;
+                }
+                self.write_bytes(b"]")
+            }
+            BinXmlValue::HexInt32ArrayType(arr) | BinXmlValue::HexInt64ArrayType(arr) => {
+                self.write_bytes(b"[")?;
+                for (i, s) in arr.iter().enumerate() {
+                    if i > 0 {
+                        self.write_bytes(b",")?;
+                    }
+                    self.write_json_string_escaped(s.as_str())?;
+                }
+                self.write_bytes(b"]")
+            }
+            BinXmlValue::AnsiStringArrayType
+            | BinXmlValue::BinaryArrayType
+            | BinXmlValue::SizeTArrayType
+            | BinXmlValue::EvtArrayHandle
+            | BinXmlValue::BinXmlArrayType
+            | BinXmlValue::EvtXmlArrayType => self.write_bytes(b"null"),
+            BinXmlValue::BinXmlType(_) => self.write_bytes(b"null"),
+        }
+    }
+
+    /// Helper for writing integer arrays
+    fn write_int_array(&mut self, iter: impl Iterator<Item = i64>) -> SerializationResult<()> {
+        self.write_bytes(b"[")?;
+        let mut buf = itoa::Buffer::new();
+        let mut first = true;
+        for n in iter {
+            if !first {
+                self.write_bytes(b",")?;
+            }
+            first = false;
+            self.write_bytes(buf.format(n).as_bytes())?;
+        }
+        self.write_bytes(b"]")
+    }
+
+    /// Helper for writing unsigned integer arrays
+    fn write_uint_array(&mut self, iter: impl Iterator<Item = u64>) -> SerializationResult<()> {
+        self.write_bytes(b"[")?;
+        let mut buf = itoa::Buffer::new();
+        let mut first = true;
+        for n in iter {
+            if !first {
+                self.write_bytes(b",")?;
+            }
+            first = false;
+            self.write_bytes(buf.format(n).as_bytes())?;
+        }
+        self.write_bytes(b"]")
+    }
+
     fn current_frame_mut(&mut self) -> &mut ObjectFrame {
         self.frames
             .last_mut()
@@ -175,17 +454,37 @@ impl<W: Write> JsonStreamOutput<W> {
     fn write_key(&mut self, key: &str) -> SerializationResult<()> {
         self.write_comma_if_needed()?;
 
-        // Check for duplicate keys and find a unique name
-        let unique_key = self.reserve_unique_key(key);
+        // Fast path: avoid allocating a second String for the common case.
+        // Reserve the key in the set, but write from `&str` directly.
+        let frame = self
+            .frames
+            .last_mut()
+            .expect("no current JSON object frame");
 
-        serde_json::to_writer(self.writer_mut(), &unique_key).map_err(SerializationError::from)?;
+        if frame.used_keys.insert(key.to_owned()) {
+            // Keys derived from XML NCName don't need escaping
+            self.write_json_string_ncname(key)?;
+        } else {
+            // Find next available suffix.
+            let mut suffix = 1;
+            loop {
+                let candidate = format!("{}_{}", key, suffix);
+                if !frame.used_keys.contains(&candidate) {
+                    frame.used_keys.insert(candidate.clone());
+                    self.write_json_string_ncname(&candidate)?;
+                    break;
+                }
+                suffix += 1;
+            }
+        }
         self.write_bytes(b":")
     }
 
     /// Write a pre-reserved key directly (no duplicate checking needed).
     fn write_reserved_key(&mut self, key: &str) -> SerializationResult<()> {
         self.write_comma_if_needed()?;
-        serde_json::to_writer(self.writer_mut(), key).map_err(SerializationError::from)?;
+        // Keys derived from XML NCName don't need escaping
+        self.write_json_string_ncname(key)?;
         self.write_bytes(b":")
     }
 
@@ -195,7 +494,8 @@ impl<W: Write> JsonStreamOutput<W> {
         self.write_bytes(b"{")?;
         self.frames.push(ObjectFrame {
             first_field: true,
-            used_keys: std::collections::HashSet::new(),
+            // Heuristic: nested objects tend to have a moderate number of keys.
+            used_keys: HashSet::with_capacity(32),
         });
         Ok(())
     }
@@ -233,7 +533,7 @@ impl<W: Write> JsonStreamOutput<W> {
                 self.write_bytes(b"{")?;
                 self.frames.push(ObjectFrame {
                     first_field: true,
-                    used_keys: std::collections::HashSet::new(),
+                    used_keys: HashSet::with_capacity(32),
                 });
 
                 self.elements[parent_index].kind = ElementValueKind::Object;
@@ -253,7 +553,7 @@ impl<W: Write> JsonStreamOutput<W> {
                 self.write_bytes(b"{")?;
                 self.frames.push(ObjectFrame {
                     first_field: true,
-                    used_keys: std::collections::HashSet::new(),
+                    used_keys: HashSet::with_capacity(32),
                 });
 
                 // Write the buffered text as #text if not in separate mode
@@ -355,6 +655,43 @@ impl<W: Write> JsonStreamOutput<W> {
         self.data_inside_element = false;
         Ok(())
     }
+
+    /// Helper to handle entity reference strings without needing arena for BinXmlValue
+    fn handle_entity_string(&mut self, s: &str) -> SerializationResult<()> {
+        // Aggregated `<EventData><Data>...</Data>...</EventData>` case.
+        if let Some(owner_depth) = self.data_owner_depth {
+            let current_depth = self.elements.len();
+            if self.data_inside_element && current_depth == owner_depth {
+                self.write_json_string_escaped(s)?;
+                return Ok(());
+            }
+        }
+
+        let Some(index) = self.elements.len().checked_sub(1) else {
+            return Ok(());
+        };
+
+        let kind = self.elements[index].kind;
+        let json_value = JsonValue::String(s.to_string());
+
+        match kind {
+            ElementValueKind::Pending => {
+                self.elements[index].buffered_values.push(json_value);
+                self.elements[index].kind = ElementValueKind::Scalar;
+            }
+            ElementValueKind::Scalar => {
+                self.elements[index].buffered_values.push(json_value);
+            }
+            ElementValueKind::Object => {
+                // Match legacy `JsonOutput`: once an element has been materialized as an object
+                // (attributes and/or child elements), entity references are ignored.
+                let _ = json_value;
+                return Ok(());
+            }
+        }
+
+        Ok(())
+    }
 }
 
 impl<W: Write> BinXmlOutput for JsonStreamOutput<W> {
@@ -363,7 +700,8 @@ impl<W: Write> BinXmlOutput for JsonStreamOutput<W> {
         self.write_bytes(b"{")?;
         self.frames.push(ObjectFrame {
             first_field: true,
-            used_keys: std::collections::HashSet::new(),
+            // Root objects can have many keys; pre-reserve to reduce rehashing.
+            used_keys: HashSet::with_capacity(128),
         });
         Ok(())
     }
@@ -443,8 +781,7 @@ impl<W: Write> BinXmlOutput for JsonStreamOutput<W> {
         let mut has_json_attributes = false;
         if !is_data {
             for attr in &element.attributes {
-                let json_value: JsonValue = JsonValue::from(attr.value.as_ref());
-                if !json_value.is_null() {
+                if !matches!(attr.value.as_ref(), BinXmlValue::NullType) {
                     has_json_attributes = true;
                     break;
                 }
@@ -471,15 +808,14 @@ impl<W: Write> BinXmlOutput for JsonStreamOutput<W> {
                 if !first_field {
                     self.write_bytes(b",")?;
                 }
-                serde_json::to_writer(self.writer_mut(), "#attributes")
-                    .map_err(SerializationError::from)?;
-                self.write_bytes(b":")?;
+                // "#attributes" is a fixed ASCII key, no escaping needed
+                self.write_bytes(b"\"#attributes\":")?;
 
                 // Start attributes object.
                 self.write_bytes(b"{")?;
                 self.frames.push(ObjectFrame {
                     first_field: true,
-                    used_keys: std::collections::HashSet::new(),
+                    used_keys: HashSet::new(),
                 });
 
                 {
@@ -491,8 +827,7 @@ impl<W: Write> BinXmlOutput for JsonStreamOutput<W> {
                             continue;
                         }
 
-                        let json_value: JsonValue = JsonValue::from(attr.value.as_ref());
-                        if json_value.is_null() {
+                        if matches!(attr.value.as_ref(), BinXmlValue::NullType) {
                             continue;
                         }
 
@@ -507,11 +842,10 @@ impl<W: Write> BinXmlOutput for JsonStreamOutput<W> {
                         if !is_first {
                             self.write_bytes(b",")?;
                         }
-                        serde_json::to_writer(self.writer_mut(), attr_key)
-                            .map_err(SerializationError::from)?;
+                        // Attribute keys are XML NCName, no escaping needed
+                        self.write_json_string_ncname(attr_key)?;
                         self.write_bytes(b":")?;
-                        serde_json::to_writer(self.writer_mut(), &json_value)
-                            .map_err(SerializationError::from)?;
+                        self.write_binxml_value(attr.value.as_ref())?;
                     }
                 }
 
@@ -541,14 +875,13 @@ impl<W: Write> BinXmlOutput for JsonStreamOutput<W> {
                 self.write_bytes(b"{")?;
                 self.frames.push(ObjectFrame {
                     first_field: true,
-                    used_keys: std::collections::HashSet::new(),
+                    used_keys: HashSet::new(),
                 });
 
                 {
                     for attr in &element.attributes {
                         let attr_name = attr.name.as_str();
-                        let json_value: JsonValue = JsonValue::from(attr.value.as_ref());
-                        if json_value.is_null() {
+                        if matches!(attr.value.as_ref(), BinXmlValue::NullType) {
                             continue;
                         }
 
@@ -563,11 +896,10 @@ impl<W: Write> BinXmlOutput for JsonStreamOutput<W> {
                         if !is_first {
                             self.write_bytes(b",")?;
                         }
-                        serde_json::to_writer(self.writer_mut(), attr_name)
-                            .map_err(SerializationError::from)?;
+                        // Attribute names are XML NCName, no escaping needed
+                        self.write_json_string_ncname(attr_name)?;
                         self.write_bytes(b":")?;
-                        serde_json::to_writer(self.writer_mut(), &json_value)
-                            .map_err(SerializationError::from)?;
+                        self.write_binxml_value(attr.value.as_ref())?;
                     }
                 }
 
@@ -671,9 +1003,8 @@ impl<W: Write> BinXmlOutput for JsonStreamOutput<W> {
                         if !is_first {
                             self.write_bytes(b",")?;
                         }
-                        serde_json::to_writer(self.writer_mut(), "#text")
-                            .map_err(SerializationError::from)?;
-                        self.write_bytes(b":")?;
+                        // "#text" is a fixed ASCII key, no escaping needed
+                        self.write_bytes(b"\"#text\":")?;
 
                         if elem.buffered_values.len() == 1 {
                             // Single value: write directly.
@@ -768,8 +1099,8 @@ impl<W: Write> BinXmlOutput for JsonStreamOutput<W> {
         let xml_event = BytesText::from_escaped(&entity_ref);
         match xml_event.unescape() {
             Ok(escaped) => {
-                let as_string = escaped.to_string();
-                self.visit_characters(Cow::Owned(BinXmlValue::StringType(as_string)))
+                // Directly handle string without creating BinXmlValue (which would need arena)
+                self.handle_entity_string(escaped.as_ref())
             }
             Err(_) => Err(SerializationError::JsonStructureError {
                 message: format!("Unterminated XML Entity {}", entity_ref),
@@ -797,6 +1128,8 @@ mod tests {
     use crate::binxml::value_variant::BinXmlValue;
     use crate::model::xml::{XmlAttribute, XmlElement};
     use crate::{BinXmlOutput, JsonOutput, ParserSettings};
+    use bumpalo::Bump;
+    use bumpalo::collections::String as BumpString;
     use pretty_assertions::assert_eq;
     use quick_xml::Reader;
     use quick_xml::events::{BytesStart, Event};
@@ -806,7 +1139,7 @@ mod tests {
         String::from_utf8(bytes.to_vec()).expect("UTF8 Input")
     }
 
-    fn event_to_element(event: BytesStart) -> XmlElement {
+    fn event_to_element<'a>(event: BytesStart, arena: &'a Bump) -> XmlElement<'a> {
         let mut attrs = vec![];
 
         for attr in event.attributes() {
@@ -814,7 +1147,10 @@ mod tests {
             attrs.push(XmlAttribute {
                 name: Cow::Owned(BinXmlName::from_string(bytes_to_string(attr.key.as_ref()))),
                 // We have to compromise here and assume all values are strings.
-                value: Cow::Owned(BinXmlValue::StringType(bytes_to_string(&attr.value))),
+                value: Cow::Owned(BinXmlValue::StringType(BumpString::from_str_in(
+                    &bytes_to_string(&attr.value),
+                    arena,
+                ))),
             });
         }
 
@@ -828,6 +1164,7 @@ mod tests {
 
     /// Converts an XML string to JSON using the legacy `JsonOutput`.
     fn xml_to_json_legacy(xml: &str, settings: &ParserSettings) -> String {
+        let arena = Bump::new();
         let mut reader = Reader::from_str(xml);
         reader.config_mut().trim_text(true);
 
@@ -840,7 +1177,7 @@ mod tests {
             match reader.read_event() {
                 Ok(event) => match event {
                     Event::Start(start) => {
-                        let elem = event_to_element(start);
+                        let elem = event_to_element(start, &arena);
                         output
                             .visit_open_start_element(&elem)
                             .expect("Open start element");
@@ -851,16 +1188,16 @@ mod tests {
                         output.visit_close_element(&elem).expect("Close element");
                     }
                     Event::Empty(empty) => {
-                        let elem = event_to_element(empty);
+                        let elem = event_to_element(empty, &arena);
                         output
                             .visit_open_start_element(&elem)
                             .expect("Empty Open start element");
                         output.visit_close_element(&elem).expect("Empty Close");
                     }
                     Event::Text(text) => output
-                        .visit_characters(Cow::Owned(BinXmlValue::StringType(bytes_to_string(
-                            text.as_ref(),
-                        ))))
+                        .visit_characters(Cow::Owned(BinXmlValue::StringType(
+                            BumpString::from_str_in(&bytes_to_string(text.as_ref()), &arena),
+                        )))
                         .expect("Text element"),
                     Event::Comment(_) => {}
                     Event::CData(_) => unimplemented!(),
@@ -881,6 +1218,7 @@ mod tests {
 
     /// Converts an XML string to JSON using the streaming `JsonStreamOutput`.
     fn xml_to_json_streaming(xml: &str, settings: &ParserSettings) -> String {
+        let arena = Bump::new();
         let mut reader = Reader::from_str(xml);
         reader.config_mut().trim_text(true);
 
@@ -894,7 +1232,7 @@ mod tests {
             match reader.read_event() {
                 Ok(event) => match event {
                     Event::Start(start) => {
-                        let elem = event_to_element(start);
+                        let elem = event_to_element(start, &arena);
                         output
                             .visit_open_start_element(&elem)
                             .expect("Open start element");
@@ -905,16 +1243,16 @@ mod tests {
                         output.visit_close_element(&elem).expect("Close element");
                     }
                     Event::Empty(empty) => {
-                        let elem = event_to_element(empty);
+                        let elem = event_to_element(empty, &arena);
                         output
                             .visit_open_start_element(&elem)
                             .expect("Empty Open start element");
                         output.visit_close_element(&elem).expect("Empty Close");
                     }
                     Event::Text(text) => output
-                        .visit_characters(Cow::Owned(BinXmlValue::StringType(bytes_to_string(
-                            text.as_ref(),
-                        ))))
+                        .visit_characters(Cow::Owned(BinXmlValue::StringType(
+                            BumpString::from_str_in(&bytes_to_string(text.as_ref()), &arena),
+                        )))
                         .expect("Text element"),
                     Event::Comment(_) => {}
                     Event::CData(_) => unimplemented!(),
@@ -1071,6 +1409,8 @@ mod tests {
     fn test_multiple_character_nodes_concatenation() {
         use crate::model::xml::XmlElement;
 
+        let arena = Bump::new();
+
         // Test by directly calling the visitor methods to simulate multiple character nodes
         let settings = ParserSettings::new().num_threads(1);
 
@@ -1088,10 +1428,14 @@ mod tests {
         legacy_output.visit_open_start_element(&event_elem).unwrap();
         legacy_output.visit_open_start_element(&msg_elem).unwrap();
         legacy_output
-            .visit_characters(Cow::Owned(BinXmlValue::StringType("Part1".to_string())))
+            .visit_characters(Cow::Owned(BinXmlValue::StringType(
+                BumpString::from_str_in("Part1", &arena),
+            )))
             .unwrap();
         legacy_output
-            .visit_characters(Cow::Owned(BinXmlValue::StringType("Part2".to_string())))
+            .visit_characters(Cow::Owned(BinXmlValue::StringType(
+                BumpString::from_str_in("Part2", &arena),
+            )))
             .unwrap();
         legacy_output.visit_close_element(&msg_elem).unwrap();
         legacy_output.visit_close_element(&event_elem).unwrap();
@@ -1109,10 +1453,14 @@ mod tests {
             .visit_open_start_element(&msg_elem)
             .unwrap();
         streaming_output
-            .visit_characters(Cow::Owned(BinXmlValue::StringType("Part1".to_string())))
+            .visit_characters(Cow::Owned(BinXmlValue::StringType(
+                BumpString::from_str_in("Part1", &arena),
+            )))
             .unwrap();
         streaming_output
-            .visit_characters(Cow::Owned(BinXmlValue::StringType("Part2".to_string())))
+            .visit_characters(Cow::Owned(BinXmlValue::StringType(
+                BumpString::from_str_in("Part2", &arena),
+            )))
             .unwrap();
         streaming_output.visit_close_element(&msg_elem).unwrap();
         streaming_output.visit_close_element(&event_elem).unwrap();
diff --git a/src/model/deserialized.rs b/src/model/deserialized.rs
index 678d6bdb..94a9c984 100644
--- a/src/model/deserialized.rs
+++ b/src/model/deserialized.rs
@@ -5,7 +5,7 @@ use crate::ChunkOffset;
 use std::fmt::{self, Formatter};
 use winstructs::guid::Guid;
 
-#[derive(Debug, PartialOrd, PartialEq, Clone)]
+#[derive(Debug, PartialEq, Clone)]
 pub enum BinXMLDeserializedTokens<'a> {
     FragmentHeader(BinXMLFragmentHeader),
     TemplateInstance(BinXmlTemplateRef<'a>),
@@ -56,7 +56,7 @@ impl fmt::Display for BinXmlTemplateDefinitionHeader {
     }
 }
 
-#[derive(Debug, PartialOrd, PartialEq, Clone)]
+#[derive(Debug, PartialEq, Clone)]
 pub struct BinXMLTemplateDefinition<'a> {
     pub header: BinXmlTemplateDefinitionHeader,
     pub tokens: Vec<BinXMLDeserializedTokens<'a>>,
@@ -67,7 +67,7 @@ pub struct BinXmlEntityReference {
     pub name: BinXmlNameRef,
 }
 
-#[derive(Debug, PartialOrd, PartialEq, Clone)]
+#[derive(Debug, PartialEq, Clone)]
 pub struct BinXmlTemplateRef<'a> {
     pub template_id: u32,
     pub template_def_offset: ChunkOffset,
diff --git a/src/model/xml.rs b/src/model/xml.rs
index 00f2fe63..1f3a4fd2 100644
--- a/src/model/xml.rs
+++ b/src/model/xml.rs
@@ -5,7 +5,7 @@ use crate::err::EvtxError;
 use log::error;
 use std::borrow::Cow;
 
-#[derive(Debug, PartialOrd, PartialEq, Clone)]
+#[derive(Debug, PartialEq, Clone)]
 pub enum XmlModel<'a> {
     OpenElement(XmlElement<'a>),
     CloseElement,
@@ -59,7 +59,7 @@ impl<'a> XmlElementBuilder<'a> {
             Some(_) => {
                 return Err(EvtxError::FailedToCreateRecordModel(
                     "invalid state, there should not be a value",
-                ))
+                ));
             }
         }
 
@@ -109,13 +109,13 @@ impl<'a> XmlPIBuilder<'a> {
     }
 }
 
-#[derive(Debug, PartialOrd, PartialEq, Clone)]
+#[derive(Debug, PartialEq, Clone)]
 pub struct XmlAttribute<'a> {
     pub name: Cow<'a, BinXmlName>,
     pub value: Cow<'a, BinXmlValue<'a>>,
 }
 
-#[derive(Debug, PartialOrd, PartialEq, Clone)]
+#[derive(Debug, PartialEq, Clone)]
 pub struct XmlElement<'a> {
     pub name: Cow<'a, BinXmlName>,
     pub attributes: Vec<XmlAttribute<'a>>,
diff --git a/src/string_cache.rs b/src/string_cache.rs
index 307deed1..7f0b5cc8 100644
--- a/src/string_cache.rs
+++ b/src/string_cache.rs
@@ -11,7 +11,8 @@ pub struct StringCache(HashMap<ChunkOffset, BinXmlName>);
 
 impl StringCache {
     pub fn populate(data: &[u8], offsets: &[ChunkOffset]) -> DeserializationResult<Self> {
-        let mut cache = HashMap::new();
+        // Offsets can contain many duplicates / zeros; reserve a minimal baseline.
+        let mut cache = HashMap::with_capacity(offsets.len());
 
         for &offset in offsets.iter().filter(|&&offset| offset > 0) {
             let mut cursor = ByteCursor::with_pos(data, offset as usize)?;
diff --git a/src/template_cache.rs b/src/template_cache.rs
index 330e0ba2..fbf508e4 100644
--- a/src/template_cache.rs
+++ b/src/template_cache.rs
@@ -5,6 +5,7 @@ use crate::ChunkOffset;
 use crate::model::deserialized::BinXMLTemplateDefinition;
 use crate::utils::ByteCursor;
 
+use bumpalo::Bump;
 use encoding::EncodingRef;
 use log::trace;
 use std::collections::HashMap;
@@ -22,16 +23,20 @@ impl<'chunk> TemplateCache<'chunk> {
     pub fn populate(
         data: &'chunk [u8],
         offsets: &[ChunkOffset],
+        arena: &'chunk Bump,
         ansi_codec: EncodingRef,
     ) -> DeserializationResult<Self> {
-        let mut cache = HashMap::new();
+        // Reserve a minimal baseline; actual number of cached templates may be higher
+        // due to chained template buckets.
+        let mut cache = HashMap::with_capacity(offsets.len());
 
         for offset in offsets.iter().filter(|&&offset| offset > 0) {
             let mut cursor = ByteCursor::with_pos(data, *offset as usize)?;
 
             loop {
                 let table_offset = cursor.pos() as ChunkOffset;
-                let definition = read_template_definition_cursor(&mut cursor, None, ansi_codec)?;
+                let definition =
+                    read_template_definition_cursor(&mut cursor, None, arena, ansi_codec)?;
                 let next_template_offset = definition.header.next_template_offset;
 
                 cache.insert(table_offset, definition);
diff --git a/src/utils/utf16.rs b/src/utils/utf16.rs
index 9c40aa68..929813ee 100644
--- a/src/utils/utf16.rs
+++ b/src/utils/utf16.rs
@@ -21,7 +21,6 @@ pub(crate) fn decode_utf16le_bytes_z(bytes: &[u8]) -> Result<String, Utf16LeDeco
 /// Decode UTF-16 code units until the first NUL (0x0000), if present.
 pub(crate) fn decode_utf16_units_z(units: &[u16]) -> Result<String, Utf16LeDecodeError> {
     let end = units.iter().position(|&c| c == 0).unwrap_or(units.len());
-    String::from_utf16(&units[..end]).map_err(|_| Utf16LeDecodeError::InvalidData)
+    let slice = &units[..end];
+    String::from_utf16(slice).map_err(|_| Utf16LeDecodeError::InvalidData)
 }
-
-
diff --git a/src/utils/windows.rs b/src/utils/windows.rs
index e9972be7..3cb260a7 100644
--- a/src/utils/windows.rs
+++ b/src/utils/windows.rs
@@ -70,11 +70,8 @@ pub(crate) fn read_sid(cursor: &mut ByteCursor<'_>) -> DeserializationResult<Sid
         })?;
 
     let mut c = Cursor::new(remaining);
-    let sid = Sid::from_reader(&mut c).map_err(|e| {
-        DeserializationError::Io(io::Error::new(io::ErrorKind::InvalidData, e))
-    })?;
+    let sid = Sid::from_reader(&mut c)
+        .map_err(|e| DeserializationError::Io(io::Error::new(io::ErrorKind::InvalidData, e)))?;
     cursor.advance(c.position() as usize, "sid")?;
     Ok(sid)
 }
-
-
diff --git a/src/wevt_templates/binxml.rs b/src/wevt_templates/binxml.rs
index d101432e..87c46588 100644
--- a/src/wevt_templates/binxml.rs
+++ b/src/wevt_templates/binxml.rs
@@ -9,6 +9,7 @@
 //! - `docs/wevt_templates.md` (project notes + curated links)
 //! - MS-EVEN6 (BinXml `Name` structure and NameHash)
 
+use bumpalo::Bump;
 use encoding::EncodingRef;
 
 pub(super) const TEMP_BINXML_OFFSET: usize = 40;
@@ -22,6 +23,7 @@ pub(super) const TEMP_BINXML_OFFSET: usize = 40;
 /// BinXML fragment (starting at offset 40 from the beginning of `TEMP`).
 pub fn parse_temp_binxml_fragment<'a>(
     temp_bytes: &'a [u8],
+    arena: &'a Bump,
     ansi_codec: EncodingRef,
 ) -> crate::err::Result<(
     Vec<crate::model::deserialized::BinXMLDeserializedTokens<'a>>,
@@ -44,6 +46,7 @@ pub fn parse_temp_binxml_fragment<'a>(
         binxml,
         0,
         None,
+        arena,
         true,
         ansi_codec,
         BinXmlNameEncoding::WevtInline,
@@ -69,6 +72,7 @@ pub fn parse_temp_binxml_fragment<'a>(
 /// Returns `(tokens, bytes_consumed)` where `bytes_consumed` is the number of bytes read from `binxml`.
 pub fn parse_wevt_binxml_fragment<'a>(
     binxml: &'a [u8],
+    arena: &'a Bump,
     ansi_codec: EncodingRef,
 ) -> crate::err::Result<(
     Vec<crate::model::deserialized::BinXMLDeserializedTokens<'a>>,
@@ -82,6 +86,7 @@ pub fn parse_wevt_binxml_fragment<'a>(
         binxml,
         0,
         None,
+        arena,
         true,
         ansi_codec,
         BinXmlNameEncoding::WevtInline,
diff --git a/src/wevt_templates/error.rs b/src/wevt_templates/error.rs
index 3d1b7f5c..7490cd95 100644
--- a/src/wevt_templates/error.rs
+++ b/src/wevt_templates/error.rs
@@ -17,5 +17,3 @@ pub enum WevtTemplateExtractError {
     #[error("failed to decode UTF-16 resource name")]
     InvalidResourceName,
 }
-
-
diff --git a/src/wevt_templates/manifest/mod.rs b/src/wevt_templates/manifest/mod.rs
index 886dcf63..723655e5 100644
--- a/src/wevt_templates/manifest/mod.rs
+++ b/src/wevt_templates/manifest/mod.rs
@@ -33,5 +33,3 @@ mod util;
 
 pub use error::WevtManifestError;
 pub use types::*;
-
-
diff --git a/src/wevt_templates/manifest/types.rs b/src/wevt_templates/manifest/types.rs
index 66a7ca38..1d447c27 100644
--- a/src/wevt_templates/manifest/types.rs
+++ b/src/wevt_templates/manifest/types.rs
@@ -278,5 +278,3 @@ impl Provider<'_> {
             .and_then(|t| t.templates.iter().find(|tpl| tpl.offset == offset))
     }
 }
-
-
diff --git a/src/wevt_templates/render.rs b/src/wevt_templates/render.rs
index 5c886f4f..be124732 100644
--- a/src/wevt_templates/render.rs
+++ b/src/wevt_templates/render.rs
@@ -28,6 +28,7 @@ pub fn render_temp_to_xml(
     use crate::err::{EvtxError, Result};
     use crate::model::xml::{XmlElement, XmlElementBuilder, XmlModel, XmlPIBuilder};
     use crate::xml_output::{BinXmlOutput, XmlOutput};
+    use bumpalo::Bump;
     use std::borrow::Cow;
 
     if temp_bytes.len() < TEMP_BINXML_OFFSET {
@@ -39,7 +40,8 @@ pub fn render_temp_to_xml(
     }
 
     let binxml = &temp_bytes[TEMP_BINXML_OFFSET..];
-    let (tokens, _bytes_consumed) = parse_temp_binxml_fragment(temp_bytes, ansi_codec)?;
+    let arena = Bump::new();
+    let (tokens, _bytes_consumed) = parse_temp_binxml_fragment(temp_bytes, &arena, ansi_codec)?;
 
     fn resolve_name<'a>(
         binxml: &'a [u8],
@@ -112,7 +114,10 @@ pub fn render_temp_to_xml(
             }
             crate::model::deserialized::BinXMLDeserializedTokens::Substitution(sub) => {
                 let placeholder = format!("{{sub:{}}}", sub.substitution_index);
-                let value = BinXmlValue::StringType(placeholder);
+                let value = BinXmlValue::StringType(bumpalo::collections::String::from_str_in(
+                    &placeholder,
+                    &arena,
+                ));
                 match current_element {
                     None => model.push(XmlModel::Value(Cow::Owned(value))),
                     Some(ref mut builder) => {
@@ -230,6 +235,7 @@ pub fn render_temp_to_xml_with_substitution_values(
     use crate::err::{EvtxError, Result};
     use crate::model::xml::{XmlElement, XmlElementBuilder, XmlModel, XmlPIBuilder};
     use crate::xml_output::{BinXmlOutput, XmlOutput};
+    use bumpalo::Bump;
     use std::borrow::Cow;
 
     if temp_bytes.len() < TEMP_BINXML_OFFSET {
@@ -241,7 +247,8 @@ pub fn render_temp_to_xml_with_substitution_values(
     }
 
     let binxml = &temp_bytes[TEMP_BINXML_OFFSET..];
-    let (tokens, _bytes_consumed) = parse_temp_binxml_fragment(temp_bytes, ansi_codec)?;
+    let arena = Bump::new();
+    let (tokens, _bytes_consumed) = parse_temp_binxml_fragment(temp_bytes, &arena, ansi_codec)?;
 
     fn resolve_name<'a>(
         binxml: &'a [u8],
@@ -316,7 +323,8 @@ pub fn render_temp_to_xml_with_substitution_values(
                 }
                 let idx = sub.substitution_index as usize;
                 let s = substitution_values.get(idx).cloned().unwrap_or_default();
-                let value = BinXmlValue::StringType(s);
+                let value =
+                    BinXmlValue::StringType(bumpalo::collections::String::from_str_in(&s, &arena));
 
                 match current_element {
                     None => model.push(XmlModel::Value(Cow::Owned(value))),
@@ -413,8 +421,7 @@ pub fn render_temp_to_xml_with_substitution_values(
 
     output.visit_end_of_stream()?;
 
-    String::from_utf8(output.into_writer())
-        .map_err(|e| EvtxError::calculation_error(e.to_string()))
+    String::from_utf8(output.into_writer()).map_err(|e| EvtxError::calculation_error(e.to_string()))
 }
 
 /// Render a parsed template definition to XML.
@@ -434,10 +441,12 @@ pub fn render_template_definition_to_xml(
     use crate::err::{EvtxError, Result};
     use crate::model::xml::{XmlElement, XmlElementBuilder, XmlModel, XmlPIBuilder};
     use crate::xml_output::{BinXmlOutput, XmlOutput};
+    use bumpalo::Bump;
     use std::borrow::Cow;
 
     let binxml = template.binxml;
-    let (tokens, _bytes_consumed) = parse_wevt_binxml_fragment(binxml, ansi_codec)?;
+    let arena = Bump::new();
+    let (tokens, _bytes_consumed) = parse_wevt_binxml_fragment(binxml, &arena, ansi_codec)?;
 
     fn resolve_name<'a>(
         binxml: &'a [u8],
@@ -518,7 +527,10 @@ pub fn render_template_definition_to_xml(
                     placeholder = format!("{{sub:{idx}:{name}}}");
                 }
 
-                let value = BinXmlValue::StringType(placeholder);
+                let value = BinXmlValue::StringType(bumpalo::collections::String::from_str_in(
+                    &placeholder,
+                    &arena,
+                ));
                 match current_element {
                     None => model.push(XmlModel::Value(Cow::Owned(value))),
                     Some(ref mut builder) => {
@@ -639,10 +651,12 @@ pub fn render_template_definition_to_xml_with_substitution_values(
     use crate::err::{EvtxError, Result};
     use crate::model::xml::{XmlElement, XmlElementBuilder, XmlModel, XmlPIBuilder};
     use crate::xml_output::{BinXmlOutput, XmlOutput};
+    use bumpalo::Bump;
     use std::borrow::Cow;
 
     let binxml = template.binxml;
-    let (tokens, _bytes_consumed) = parse_wevt_binxml_fragment(binxml, ansi_codec)?;
+    let arena = Bump::new();
+    let (tokens, _bytes_consumed) = parse_wevt_binxml_fragment(binxml, &arena, ansi_codec)?;
 
     fn resolve_name<'a>(
         binxml: &'a [u8],
@@ -717,7 +731,8 @@ pub fn render_template_definition_to_xml_with_substitution_values(
                 }
                 let idx = sub.substitution_index as usize;
                 let s = substitution_values.get(idx).cloned().unwrap_or_default();
-                let value = BinXmlValue::StringType(s);
+                let value =
+                    BinXmlValue::StringType(bumpalo::collections::String::from_str_in(&s, &arena));
 
                 match current_element {
                     None => model.push(XmlModel::Value(Cow::Owned(value))),
diff --git a/src/wevt_templates/types.rs b/src/wevt_templates/types.rs
index b5824a0b..8826dce2 100644
--- a/src/wevt_templates/types.rs
+++ b/src/wevt_templates/types.rs
@@ -47,5 +47,3 @@ pub struct WevtTempTemplateRef {
     pub temp_size: u32,
     pub header: WevtTempTemplateHeader,
 }
-
-
diff --git a/src/xml_output.rs b/src/xml_output.rs
index f95745c0..74818990 100644
--- a/src/xml_output.rs
+++ b/src/xml_output.rs
@@ -1,14 +1,14 @@
+use crate::ParserSettings;
 use crate::binxml::value_variant::BinXmlValue;
 use crate::err::{SerializationError, SerializationResult};
 use crate::model::xml::{BinXmlPI, XmlElement};
-use crate::ParserSettings;
 
 use log::trace;
 use std::io::Write;
 
+use quick_xml::Writer;
 use quick_xml::events::attributes::Attribute;
 use quick_xml::events::{BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
-use quick_xml::Writer;
 
 use crate::binxml::name::BinXmlName;
 use std::borrow::Cow;
diff --git a/tests/fixtures.rs b/tests/fixtures.rs
index a20f3858..2bc13990 100644
--- a/tests/fixtures.rs
+++ b/tests/fixtures.rs
@@ -1,8 +1,8 @@
 #![allow(dead_code)]
 use std::path::PathBuf;
 
-use std::sync::Once;
 use std::sync::Mutex;
+use std::sync::Once;
 
 static LOGGER_INIT: Once = Once::new();
 
diff --git a/tests/test_cli_interactive.rs b/tests/test_cli_interactive.rs
index b4d58d4c..088358bc 100644
--- a/tests/test_cli_interactive.rs
+++ b/tests/test_cli_interactive.rs
@@ -16,7 +16,11 @@ mod tests {
     use std::time::{Duration, Instant};
     use tempfile::tempdir;
 
-    fn wait_for_file_len_at_least(path: &std::path::Path, min_len: usize, timeout: Duration) -> usize {
+    fn wait_for_file_len_at_least(
+        path: &std::path::Path,
+        min_len: usize,
+        timeout: Duration,
+    ) -> usize {
         let start = Instant::now();
         loop {
             if let Ok(meta) = std::fs::metadata(path) {
@@ -26,7 +30,9 @@ mod tests {
                 }
             }
             if start.elapsed() >= timeout {
-                let len = std::fs::metadata(path).map(|m| m.len() as usize).unwrap_or(0);
+                let len = std::fs::metadata(path)
+                    .map(|m| m.len() as usize)
+                    .unwrap_or(0);
                 return len;
             }
             std::thread::sleep(Duration::from_millis(25));
diff --git a/tests/test_record_id_public.rs b/tests/test_record_id_public.rs
index 885dc980..09568b9a 100644
--- a/tests/test_record_id_public.rs
+++ b/tests/test_record_id_public.rs
@@ -5,4 +5,3 @@ fn record_id_is_public() {
     let id: RecordId = 42;
     assert_eq!(id, 42);
 }
-
diff --git a/tests/test_wevt_templates.rs b/tests/test_wevt_templates.rs
index 73053520..acdff1f6 100644
--- a/tests/test_wevt_templates.rs
+++ b/tests/test_wevt_templates.rs
@@ -1465,11 +1465,14 @@ mod wevt_templates_research {
         // NameHash validation (MS-EVEN6) and the current token support.
         let manifest = CrimManifest::parse(&r.data).expect("manifest parse should succeed");
         let mut parsed_templates = 0usize;
+        let mut arena = bumpalo::Bump::new();
         for provider in &manifest.providers {
             if let Some(ttbl) = provider.wevt.elements.templates.as_ref() {
                 for tpl in &ttbl.templates {
-                    let _ = parse_wevt_binxml_fragment(tpl.binxml, encoding::all::WINDOWS_1252)
-                        .expect("BinXML parse should succeed");
+                    arena.reset();
+                    let _ =
+                        parse_wevt_binxml_fragment(tpl.binxml, &arena, encoding::all::WINDOWS_1252)
+                            .expect("BinXML parse should succeed");
                     parsed_templates += 1;
                 }
             }