Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions .cursor/commands/improvement_pass.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,14 @@ hyperfine -w 10 -r 20 \
| tee "benchmarks/benchmark_pre_${TAG}.txt"

# Optional: PRE flamegraph for this pass's main scenario
sudo make flamegraph-prod \
sudo TAG="$TAG" make flamegraph-prod \
FLAME_FILE="samples/security_big_sample.evtx" \
DURATION=30 \
FORMAT=json \
BIN="$PRE"
FORMAT=json

mv profile/flamegraph.svg "profile/flamegraph_${TAG}_${TS}_pre.svg" || true
cp profile/top_leaf.txt "profile/top_leaf_${TAG}_${TS}_pre.txt" || true
cp profile/top_titles.txt "profile/top_titles_${TAG}_${TS}_pre.txt" || true
mv "profile/flamegraph_${TAG}.svg" "profile/flamegraph_${TAG}_${TS}_pre.svg" || true
cp "profile/top_leaf_${TAG}.txt" "profile/top_leaf_${TAG}_${TS}_pre.txt" || true
cp "profile/top_titles_${TAG}.txt" "profile/top_titles_${TAG}_${TS}_pre.txt" || true
```

- **Use the PRE benchmark + flamegraph** to:
Expand Down Expand Up @@ -104,12 +103,12 @@ hyperfine -w 10 -r 20 \
| tee "/workspace/benchmarks/benchmark_pair_${TAG}_${TS}.txt"

# POST flamegraph for the same scenario
OUT_DIR=/workspace/profile_post FORMAT=jsonl DURATION=30 \
/workspace/scripts/flamegraph_prod.sh "$POST"
OUT_DIR=/workspace/profile_post FORMAT=json DURATION=30 BIN="$POST" \
/workspace/scripts/flamegraph_prod.sh

mv /workspace/profile/flamegraph.svg "/workspace/profile_post/flamegraph_${TAG}_${TS}_post.svg" || true
cp /workspace/profile/top_leaf.txt "/workspace/profile_post/top_leaf_${TAG}_${TS}_post.txt" || true
cp /workspace/profile/top_titles.txt "/workspace/profile_post/top_titles_${TAG}_${TS}_post.txt" || true
mv "/workspace/profile_post/flamegraph_${TAG}.svg" "/workspace/profile_post/flamegraph_${TAG}_${TS}_post.svg" || true
cp "/workspace/profile_post/top_leaf_${TAG}.txt" "/workspace/profile_post/top_leaf_${TAG}_${TS}_post.txt" || true
cp "/workspace/profile_post/top_titles_${TAG}.txt" "/workspace/profile_post/top_titles_${TAG}_${TS}_post.txt" || true

echo "PRE: $PRE"
echo "POST: $POST"
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ repomix-output.txt
evtx-wasm/evtx-viewer/public/pkg
# Samples are being copied by build scripts before deploying
**/public/samples/

profile/*
15 changes: 15 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FLAME_FILE ?= samples/security_big_sample.evtx
FORMAT ?= json
DURATION ?= 30
BIN ?= ./target/release/evtx_dump

.PHONY: flamegraph-prod
flamegraph-prod:
@echo "Building release binary with fast allocator..."
cargo build --release --features fast-alloc
@echo "Cleaning up previous trace files..."
@rm -rf cargo-flamegraph.trace
BIN="$(BIN)" FLAME_FILE="$(FLAME_FILE)" FORMAT="$(FORMAT)" DURATION="$(DURATION)" \
bash scripts/flamegraph_prod.sh


188 changes: 188 additions & 0 deletions scripts/flamegraph_prod.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#!/usr/bin/env bash
set -euo pipefail

# Simple production-style flamegraph helper using perf + inferno (Linux)
# or cargo-flamegraph (macOS).
# Intended to be invoked via `make flamegraph-prod` with environment
# overrides, e.g.:
# FLAME_FILE=samples/security_big_sample.evtx \
# FORMAT=json \
# DURATION=30 \
# BIN=./target/release/evtx_dump \
# make flamegraph-prod
#
OS="$(uname -s || echo unknown)"

ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"

# Optional label for this run (used in output filenames).
: "${TAG:=default}"

: "${BIN:=$ROOT_DIR/target/release/evtx_dump}"
: "${FLAME_FILE:=$ROOT_DIR/samples/security_big_sample.evtx}"
: "${FORMAT:=json}"
: "${DURATION:=30}"
# For JSON formats, choose parser implementation: streaming | legacy.
: "${JSON_PARSER:=streaming}"
: "${OUT_DIR:=$ROOT_DIR/profile}"

mkdir -p "$OUT_DIR"

echo "Profiling"
echo " FLAME_FILE=$FLAME_FILE"
echo " FORMAT=$FORMAT"
echo " DURATION=${DURATION}s"
echo " OUT_DIR=$OUT_DIR"
echo " TAG=$TAG"

# Map FORMAT to evtx_dump arguments.
case "$FORMAT" in
json|jsonl)
# Use streaming JSON path by default; caller can change via JSON_PARSER env.
FMT_ARGS=(-t 1 -o "$FORMAT" --json-parser "$JSON_PARSER")
;;
xml)
FMT_ARGS=(-t 1 -o xml)
;;
*)
echo "warning: unknown FORMAT='$FORMAT', defaulting to json" >&2
FMT_ARGS=(-t 1 -o json --json-parser streaming)
;;
esac

if [[ "$OS" == "Darwin" ]]; then
# macOS path: use cargo-flamegraph (wraps dtrace + inferno).
if ! command -v cargo >/dev/null 2>&1; then
echo "error: cargo not found in PATH; required for cargo-flamegraph on macOS." >&2
exit 1
fi

echo "Detected macOS; using cargo flamegraph (you may be prompted for sudo)."

FOLDED_STACKS="$OUT_DIR/stacks_${TAG}.folded"

# Ask cargo-flamegraph to tee the folded stacks into our own file.
(cd "$ROOT_DIR" && \
cargo flamegraph \
--root \
--bin evtx_dump \
--output "$OUT_DIR/flamegraph_${TAG}.svg" \
--post-process "tee $FOLDED_STACKS" \
-- "${FMT_ARGS[@]}" "$FLAME_FILE")

if [[ -f "$FOLDED_STACKS" ]] && [[ -s "$FOLDED_STACKS" ]]; then
# Extract top leafs (leaf functions) from folded stacks
{
echo "Top leaf functions (by total samples):"
awk '{
n = split($1, stack, ";");
if (n > 0) {
leaf = stack[n];
count = $2 + 0;
leafs[leaf] += count;
}
}
END {
for (f in leafs) {
printf "%d %s\n", leafs[f], f;
}
}' "$FOLDED_STACKS" | sort -nr | head -20 | awk '{printf " %s: %s\n", $2, $1}'
} > "$OUT_DIR/top_leaf_${TAG}.txt"

# Extract top titles (root functions) from folded stacks
{
echo "Top title functions (by total samples):"
awk '{
n = split($1, stack, ";");
if (n > 0) {
title = stack[1];
count = $2 + 0;
titles[title] += count;
}
}
END {
for (f in titles) {
printf "%d %s\n", titles[f], f;
}
}' "$FOLDED_STACKS" | sort -nr | head -20 | awk '{printf " %s: %s\n", $2, $1}'
} > "$OUT_DIR/top_titles_${TAG}.txt"

echo "Top leafs written to $OUT_DIR/top_leaf_${TAG}.txt"
echo "Top titles written to $OUT_DIR/top_titles_${TAG}.txt"
else
echo "warning: folded stacks file is empty or missing, skipping text summaries" >&2
fi

echo "Flamegraph written to $OUT_DIR/flamegraph_${TAG}.svg"
exit 0
fi

# Linux / perf + inferno path.
#
# Requirements:
# - perf
# - inferno-collapse-perf
# - inferno-flamegraph

if ! command -v perf >/dev/null 2>&1; then
echo "error: perf not found in PATH; flamegraph_prod.sh currently expects Linux + perf." >&2
exit 1
fi

if ! command -v inferno-collapse-perf >/dev/null 2>&1; then
echo "error: inferno-collapse-perf not found in PATH." >&2
exit 1
fi

if ! command -v inferno-flamegraph >/dev/null 2>&1; then
echo "error: inferno-flamegraph not found in PATH." >&2
exit 1
fi

perf record -F 999 -g --output "$OUT_DIR/perf.data" -- \
"$BIN" "${FMT_ARGS[@]}" "$FLAME_FILE" >/dev/null

perf script -i "$OUT_DIR/perf.data" | inferno-collapse-perf > "$OUT_DIR/stacks.folded"
cat "$OUT_DIR/stacks.folded" | inferno-flamegraph > "$OUT_DIR/flamegraph_${TAG}.svg"

# Extract top leafs (functions at end of stack) and top titles (functions at start of stack)
# Folded format: "func1;func2;func3 12345" where number is sample count
{
echo "Top leaf functions (by total samples):"
awk '{
n = split($1, stack, ";");
if (n > 0) {
leaf = stack[n];
count = $2 + 0;
leafs[leaf] += count;
}
}
END {
for (f in leafs) {
printf "%d %s\n", leafs[f], f;
}
}' "$OUT_DIR/stacks.folded" | sort -nr | head -20 | awk '{printf " %s: %s\n", $2, $1}'
} > "$OUT_DIR/top_leaf_${TAG}.txt"

{
echo "Top title functions (by total samples):"
awk '{
n = split($1, stack, ";");
if (n > 0) {
title = stack[1];
count = $2 + 0;
titles[title] += count;
}
}
END {
for (f in titles) {
printf "%d %s\n", titles[f], f;
}
}' "$OUT_DIR/stacks.folded" | sort -nr | head -20 | awk '{printf " %s: %s\n", $2, $1}'
} > "$OUT_DIR/top_titles_${TAG}.txt"

echo "Flamegraph written to $OUT_DIR/flamegraph_${TAG}.svg"
echo "Top leafs written to $OUT_DIR/top_leaf_${TAG}.txt"
echo "Top titles written to $OUT_DIR/top_titles_${TAG}.txt"


41 changes: 38 additions & 3 deletions src/bin/evtx_dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,21 @@ pub enum EvtxOutputFormat {
XML,
}

#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum JsonParserKind {
/// Original JSON path: builds a full `serde_json::Value` per record.
Legacy,
/// Streaming JSON path: writes JSON directly to the output writer.
Streaming,
}

struct EvtxDump {
parser_settings: ParserSettings,
input: PathBuf,
show_record_number: bool,
output_format: EvtxOutputFormat,
output: Box<dyn Write>,
json_parser: JsonParserKind,
verbosity_level: Option<Level>,
stop_after_error: bool,
/// When set, only the specified events (offseted reltaive to file) will be outputted.
Expand All @@ -63,6 +72,15 @@ impl EvtxDump {
_ => EvtxOutputFormat::XML,
};

let json_parser = match matches
.get_one::<String>("json-parser")
.map(|s| s.as_str())
{
Some("legacy") => JsonParserKind::Legacy,
Some("streaming") | None => JsonParserKind::Streaming,
_ => JsonParserKind::Streaming,
};

let no_indent = match (
matches.get_flag("no-indent"),
matches.get_one::<String>("output-format"),
Expand Down Expand Up @@ -161,6 +179,7 @@ impl EvtxDump {
show_record_number: !no_show_record_number,
output_format,
output,
json_parser,
verbosity_level,
stop_after_error,
ranges: event_ranges,
Expand All @@ -184,9 +203,18 @@ impl EvtxDump {
}
}
EvtxOutputFormat::JSON => {
for record in parser.records_json() {
self.dump_record(record)?
}
match self.json_parser {
JsonParserKind::Streaming => {
for record in parser.records_json_stream() {
self.dump_record(record)?
}
}
JsonParserKind::Legacy => {
for record in parser.records_json() {
self.dump_record(record)?
}
}
};
}
};

Expand Down Expand Up @@ -387,6 +415,13 @@ fn main() -> Result<()> {
"jsonl" - (jsonlines) same as json with --no-indent --dont-show-record-number
"#)),
)
.arg(
Arg::new("json-parser")
.long("json-parser")
.value_parser(["legacy", "streaming"])
.default_value("streaming")
.help("Select JSON parser implementation: legacy (tree-based) or streaming"),
)
.arg(
Arg::new("output-target")
.long("output")
Expand Down
Loading
Loading