diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 19c001f6..548accb4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,3 +17,5 @@ jobs: - uses: Swatinem/rust-cache@v2 - name: Run tests run: cargo test + - name: Run utf16-simd tests + run: cargo test --manifest-path utf16-simd/Cargo.toml diff --git a/.gitignore b/.gitignore index 1e93fd2a..e0ba232d 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,8 @@ samples_local/ # Local vendor checkouts (for format research / patching upstream). external/ +perf/ + +# Large local benchmark/sample corpora (do not commit) +benchmark_parsers/ +samples/EVTX-ATTACK-SAMPLES/ diff --git a/Cargo.lock b/Cargo.lock index 5275d7bc..86de5b02 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,19 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -17,21 +30,6 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "anes" version = "0.1.6" @@ -90,9 +88,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.96" +version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "assert_cmd" @@ -140,9 +138,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.17.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "bytecount" @@ -156,6 +154,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "bytes" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" + [[package]] name = "camino" version = "1.1.9" @@ -214,13 +218,8 @@ version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" dependencies = [ - "android-tzdata", - "iana-time-zone", - "js-sys", "num-traits", "serde", - "wasm-bindgen", - "windows-targets", ] [[package]] @@ -302,12 +301,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - [[package]] name = "crc32fast" version = "1.4.2" @@ -386,9 +379,9 @@ checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] name = "deranged" -version = "0.3.11" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" dependencies = [ "powerfmt", ] @@ -420,9 +413,9 @@ checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "encode_unicode" @@ -546,11 +539,12 @@ dependencies = [ name = "evtx" version = "0.9.0" dependencies = [ + "ahash", "anyhow", "assert_cmd", "bitflags 2.8.0", + "bumpalo", "byteorder", - "chrono", "clap", "crc32fast", "criterion", @@ -562,6 +556,7 @@ dependencies = [ "hashbrown", "indoc", "insta", + "jiff", "log", "predicates", "pretty_assertions", @@ -573,10 +568,13 @@ dependencies = [ "serde_json", "simplelog", "skeptic", + "sonic-rs", "tempfile", - "thiserror 2.0.11", + "thiserror 2.0.17", "tikv-jemallocator", + "utf16-simd", "winstructs", + "zmij", ] [[package]] @@ -585,6 +583,18 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "faststr" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baec6a0289d7f1fe5665586ef7340af82e3037207bef60f5785e57569776f0c8" +dependencies = [ + "bytes", + "rkyv", + "serde", + "simdutf8", +] + [[package]] name = "float-cmp" version = "0.10.0" @@ -662,29 +672,6 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" -[[package]] -name = "iana-time-zone" -version = "0.1.61" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - [[package]] name = "indexmap" version = "2.7.1" @@ -697,9 +684,12 @@ dependencies = [ [[package]] name = "indoc" -version = "2.0.5" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] [[package]] name = "insta" @@ -747,6 +737,47 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +[[package]] +name = "jiff" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a87d9b8105c23642f50cbbae03d1f75d8422c5cb98ce7ee9271f7ff7505be6b8" +dependencies = [ + "jiff-static", + "jiff-tzdb-platform", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", + "windows-sys", +] + +[[package]] +name = "jiff-static" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b787bebb543f8969132630c51fd0afab173a86c6abae56ff3b9e5e3e3f9f6e58" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -787,6 +818,26 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "munge" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c" +dependencies = [ + "munge_macro", +] + +[[package]] +name = "munge_macro" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "nix" version = "0.27.1" @@ -868,7 +919,7 @@ checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.111", ] [[package]] @@ -911,6 +962,21 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "portable-atomic" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -959,13 +1025,33 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0" dependencies = [ "unicode-ident", ] +[[package]] +name = "ptr_meta" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "pulldown-cmark" version = "0.9.6" @@ -988,13 +1074,22 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.38" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] +[[package]] +name = "rancor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee" +dependencies = [ + "ptr_meta", +] + [[package]] name = "rayon" version = "1.10.0" @@ -1015,6 +1110,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "regex" version = "1.11.1" @@ -1044,6 +1159,12 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rend" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6" + [[package]] name = "rexpect" version = "0.6.0" @@ -1057,6 +1178,35 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "rkyv" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35a640b26f007713818e9a9b65d34da1cf58538207b052916a83d80e43f3ffa4" +dependencies = [ + "bytes", + "hashbrown", + "indexmap", + "munge", + "ptr_meta", + "rancor", + "rend", + "rkyv_derive", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd83f5f173ff41e00337d97f6572e416d022ef8a19f371817259ae960324c482" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "rpmalloc" version = "0.2.2" @@ -1128,7 +1278,7 @@ checksum = "ed76efe62313ab6610570951494bdaa81568026e0318eaa55f167de70eeea67d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.111", ] [[package]] @@ -1142,22 +1292,32 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.218" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.218" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.111", ] [[package]] @@ -1175,9 +1335,9 @@ dependencies = [ [[package]] name = "shell-words" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" +checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77" [[package]] name = "shlex" @@ -1185,6 +1345,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "similar" version = "2.7.0" @@ -1217,6 +1383,45 @@ dependencies = [ "walkdir", ] +[[package]] +name = "sonic-number" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8a74044c092f4f43ca7a6cfd62854cf9fb5ac8502b131347c990bf22bef1dfe" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "sonic-rs" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4425ea8d66ec950e0a8f2ef52c766cc3d68d661d9a0845c353c40833179fd866" +dependencies = [ + "ahash", + "bumpalo", + "bytes", + "cfg-if", + "faststr", + "itoa", + "ref-cast", + "ryu", + "serde", + "simdutf8", + "sonic-number", + "sonic-simd", + "thiserror 2.0.17", +] + +[[package]] +name = "sonic-simd" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5707edbfb34a40c9f2a55fa09a49101d9fec4e0cc171ce386086bd9616f34257" +dependencies = [ + "cfg-if", +] + [[package]] name = "strsim" version = "0.11.1" @@ -1236,9 +1441,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.98" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -1285,11 +1490,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.11" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ - "thiserror-impl 2.0.11", + "thiserror-impl 2.0.17", ] [[package]] @@ -1300,18 +1505,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.111", ] [[package]] name = "thiserror-impl" -version = "2.0.11" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.111", ] [[package]] @@ -1336,9 +1541,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.37" +version = "0.3.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" +checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", "itoa", @@ -1353,15 +1558,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.2" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" +checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" [[package]] name = "time-macros" -version = "0.2.19" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" +checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" dependencies = [ "num-conv", "time-core", @@ -1377,6 +1582,21 @@ dependencies = [ "serde_json", ] +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "unicase" version = "2.8.1" @@ -1391,9 +1611,16 @@ checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" [[package]] name = "unicode-width" -version = "0.2.0" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "utf16-simd" +version = "0.1.0" +dependencies = [ + "sonic-rs", +] [[package]] name = "utf8parse" @@ -1401,6 +1628,16 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "version_check" version = "0.9.5" @@ -1457,7 +1694,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.111", "wasm-bindgen-shared", ] @@ -1479,7 +1716,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.111", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1512,15 +1749,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "windows-core" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" -dependencies = [ - "windows-targets", -] - [[package]] name = "windows-sys" version = "0.59.0" @@ -1626,8 +1854,34 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "zerocopy" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "zeroize" -version = "1.8.1" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zmij" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +checksum = "f5858cd3a46fff31e77adea2935e357e3a2538d870741617bfb7c943e218fee6" diff --git a/Cargo.toml b/Cargo.toml index 821f16f5..a88bddef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ exclude = ["**/*.evtx", "**/*.dat"] [dependencies] bitflags = "2" crc32fast = "1" -chrono = { version = "0.4", features = ["serde"] } encoding = "0.2.33" byteorder = "1" quick-xml = "^0.37" @@ -23,6 +22,7 @@ thiserror = "2" log = { version = "0.4.17", features = ["release_max_level_debug"] } winstructs = "0.3.0" hashbrown = { version = "^0.15", features = ["inline-more"] } +ahash = "0.8" # Optional: PE parsing for `evtx_dump extract-wevt-templates` resource extraction. goblin = { version = "0.10", optional = true } # Optional: used by `evtx_dump extract-wevt-templates` for cross-platform glob expansion. @@ -40,6 +40,14 @@ tempfile = { version = "3.3.0", optional = true } serde = "1" serde_json = { version = "1", features = ["preserve_order"]} +sonic-rs = "0.5.6" +zmij = "1.0.1" +jiff = { version = "0.2.17", features = ["logging", "serde"] } +bumpalo = { version = "3.19.1", features = ["collections"] } +# `utf16-simd` is designed to be usable as a standalone crate. We opt into its +# `sonic-writeext` feature here to integrate with `sonic-rs::writer::WriteExt` +# in the hot-path JSON/XML renderers. +utf16-simd = { path = "utf16-simd", features = ["sonic-writeext"] } [target.'cfg(not(windows))'.dependencies] # jemalloc is significantly more peformant than the system allocator. @@ -57,6 +65,7 @@ evtx_dump = ["simplelog", "clap", "dialoguer", "indoc", "anyhow", "tempfile", "w multithreading = ["rayon"] # Enable WEVT_TEMPLATE extraction helpers (used by `evtx_dump extract-wevt-templates`). wevt_templates = ["glob", "goblin"] +bench = [] [dev-dependencies] insta = { version = "1", features = ["json"] } @@ -87,3 +96,15 @@ required-features = ["evtx_dump"] name = "benchmark" path = "./src/benches/benchmark.rs" harness = false + +[[bench]] +name = "json_text_content" +path = "./src/benches/json_text_content.rs" +harness = false +required-features = ["bench"] + +[[bench]] +name = "tree_build" +path = "./src/benches/tree_build.rs" +harness = false +required-features = ["bench"] diff --git a/Makefile b/Makefile index 7a8270c4..7620e741 100644 --- a/Makefile +++ b/Makefile @@ -12,11 +12,3 @@ flamegraph-prod: BIN="$(BIN)" FLAME_FILE="$(FLAME_FILE)" FORMAT="$(FORMAT)" DURATION="$(DURATION)" \ bash scripts/flamegraph_prod.sh -.PHONY: compare-streaming-legacy -compare-streaming-legacy: - @echo "Building comparison tool with fast allocator..." - cargo build --release --features fast-alloc --bin compare_streaming_legacy - @echo "Running legacy vs streaming JSON comparison..." - ./target/release/compare_streaming_legacy $(FILE) - - diff --git a/build_pgo.sh b/build_pgo.sh index 8cca6cc5..8dd57a5c 100755 --- a/build_pgo.sh +++ b/build_pgo.sh @@ -39,11 +39,11 @@ RUSTFLAGS="-Cprofile-generate=/tmp/pgo-data" \ cargo build --release --bin evtx_dump --target $TARGET echo "Running instrumented binary" -for i in $(find samples -name "*.evtx"); do +find samples -name "*.evtx" -print0 | while IFS= read -r -d '' i; do echo "Processing $i" - ./target/$TARGET/release/evtx_dump -t 1 -o json $i 1>/dev/null 2>&1 - ./target/$TARGET/release/evtx_dump -t 1 -o xml $i 1>/dev/null 2>&1 - ./target/$TARGET/release/evtx_dump -t 8 -o json $i 1>/dev/null 2>&1 + ./target/$TARGET/release/evtx_dump -t 1 -o json "$i" 1>/dev/null 2>&1 + ./target/$TARGET/release/evtx_dump -t 1 -o xml "$i" 1>/dev/null 2>&1 + ./target/$TARGET/release/evtx_dump -t 8 -o json "$i" 1>/dev/null 2>&1 done echo "Merging profile data" @@ -52,3 +52,5 @@ llvm-profdata merge -o /tmp/pgo-data/merged.profdata /tmp/pgo-data echo "Building binary with profile data" RUSTFLAGS="-Cprofile-use=/tmp/pgo-data/merged.profdata" \ cargo build --release --bin evtx_dump --target $TARGET --features fast-alloc + +echo "PGO-optimized binary written to: ./target/$TARGET/release/evtx_dump" diff --git a/scripts/bench_parsers.sh b/scripts/bench_parsers.sh new file mode 100755 index 00000000..1643b5fa --- /dev/null +++ b/scripts/bench_parsers.sh @@ -0,0 +1,526 @@ +#!/usr/bin/env bash +# +# Comprehensive EVTX Parser Benchmark +# Compares: evtx (Rust), libevtx (C), python-evtx, golang-evtx, velocidex/evtx +# +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +BENCH_DIR="${BENCH_DIR:-$ROOT/benchmark_parsers}" +SAMPLES_DIR="${SAMPLES_DIR:-$ROOT/samples}" +RESULTS_DIR="${RESULTS_DIR:-$ROOT/benchmark_results}" + +# Default settings +EVTX_FILE="" +WARMUP=3 +RUNS=10 +THREADS_LIST="1 8" # Space-separated list of thread counts +SKIP_CLONE=0 +SKIP_BUILD=0 +SKIP_RUST=0 +SKIP_LIBEVTX=0 +SKIP_PYTHON=0 +SKIP_GOLANG_EVTX=0 +SKIP_VELOCIDEX=0 +USE_PYPY=0 +MAX_THREADS="" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +log_info() { echo -e "${BLUE}[INFO]${NC} $*"; } +log_ok() { echo -e "${GREEN}[OK]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_err() { echo -e "${RED}[ERROR]${NC} $*" >&2; } + +usage() { + cat <<'EOF' +Usage: scripts/bench_parsers.sh [options] + +Benchmark evtx against other EVTX parsers (libevtx, python-evtx, golang-evtx, velocidex/evtx) + +Options: + --file EVTX input file (required, ~30MB recommended) + --warmup Hyperfine warmup runs (default: 3) + --runs Hyperfine runs (default: 10) + --threads Space-separated thread counts (default: "1 8") + --max-threads Include $(nproc) threads in the list + --bench-dir Directory for cloned repos (default: benchmark_parsers/) + --results-dir Output directory for results (default: benchmark_results/) + + --skip-clone Don't clone repos (assume already present) + --skip-build Don't rebuild (assume already built) + --skip-rust Skip Rust evtx benchmarks + --skip-libevtx Skip libevtx benchmarks + --skip-python Skip python-evtx benchmarks + --skip-golang-evtx Skip golang-evtx benchmarks + --skip-velocidex Skip velocidex/evtx benchmarks + --use-pypy Also benchmark with PyPy (if installed) + + -h, --help Show this help + +Examples: + # Basic benchmark with a sample file + ./scripts/bench_parsers.sh --file samples/security.evtx + + # Full benchmark with max threads + ./scripts/bench_parsers.sh --file samples/big.evtx --max-threads --threads "1 8 24" + + # Quick test (fewer runs) + ./scripts/bench_parsers.sh --file samples/test.evtx --warmup 1 --runs 3 + +Sample EVTX files: + You can download sample .evtx files from: + - https://github.com/sbousseaden/EVTX-ATTACK-SAMPLES + - https://github.com/NextronSystems/evtx-baseline + +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --file) EVTX_FILE="$2"; shift 2;; + --warmup) WARMUP="$2"; shift 2;; + --runs) RUNS="$2"; shift 2;; + --threads) THREADS_LIST="$2"; shift 2;; + --max-threads) MAX_THREADS=1; shift;; + --bench-dir) BENCH_DIR="$2"; shift 2;; + --results-dir) RESULTS_DIR="$2"; shift 2;; + --skip-clone) SKIP_CLONE=1; shift;; + --skip-build) SKIP_BUILD=1; shift;; + --skip-rust) SKIP_RUST=1; shift;; + --skip-libevtx) SKIP_LIBEVTX=1; shift;; + --skip-python) SKIP_PYTHON=1; shift;; + --skip-golang-evtx) SKIP_GOLANG_EVTX=1; shift;; + --skip-velocidex) SKIP_VELOCIDEX=1; shift;; + --use-pypy) USE_PYPY=1; shift;; + -h|--help) usage; exit 0;; + *) log_err "Unknown arg: $1"; usage; exit 1;; + esac +done + +# Validate required args +if [[ -z "$EVTX_FILE" ]]; then + log_err "Missing required --file argument" + echo "" + usage + exit 1 +fi + +if [[ ! -f "$EVTX_FILE" ]]; then + log_err "EVTX file not found: $EVTX_FILE" + exit 1 +fi + +# Add max threads if requested +if [[ -n "$MAX_THREADS" ]]; then + NPROC=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 8) + THREADS_LIST="$THREADS_LIST $NPROC" +fi + +# Get file size for reporting +FILE_SIZE=$(du -h "$EVTX_FILE" | cut -f1) +log_info "Benchmarking with: $EVTX_FILE ($FILE_SIZE)" +log_info "Thread configurations: $THREADS_LIST" + +# Check for hyperfine +if ! command -v hyperfine &>/dev/null; then + log_err "hyperfine not found. Install it with: cargo install hyperfine" + exit 1 +fi + +mkdir -p "$BENCH_DIR" "$RESULTS_DIR" + +############################################################################# +# Clone Repositories +############################################################################# + +clone_repo() { + local name="$1" + local url="$2" + local dir="$BENCH_DIR/$name" + + if [[ -d "$dir" ]]; then + log_info "$name already cloned" + return 0 + fi + + log_info "Cloning $name..." + git clone --depth 1 "$url" "$dir" + log_ok "Cloned $name" +} + +if [[ $SKIP_CLONE -eq 0 ]]; then + log_info "=== Cloning repositories ===" + + [[ $SKIP_LIBEVTX -eq 0 ]] && clone_repo "libevtx" "https://github.com/libyal/libevtx.git" + [[ $SKIP_PYTHON -eq 0 ]] && clone_repo "python-evtx" "https://github.com/williballenthin/python-evtx.git" + [[ $SKIP_GOLANG_EVTX -eq 0 ]] && clone_repo "golang-evtx" "https://github.com/0xrawsec/golang-evtx.git" + [[ $SKIP_VELOCIDEX -eq 0 ]] && clone_repo "velocidex-evtx" "https://github.com/Velocidex/evtx.git" +fi + +############################################################################# +# Build: Rust evtx (this library) +############################################################################# + +RUST_BIN="$ROOT/target/release/evtx_dump" + +build_rust() { + log_info "Building Rust evtx..." + (cd "$ROOT" && cargo build --release --features "fast-alloc,multithreading") + log_ok "Rust evtx built" +} + +if [[ $SKIP_RUST -eq 0 && $SKIP_BUILD -eq 0 ]]; then + build_rust +fi + +############################################################################# +# Build: libevtx (C) +############################################################################# + +LIBEVTX_BIN="$BENCH_DIR/libevtx/evtxtools/evtxexport" + +build_libevtx() { + local dir="$BENCH_DIR/libevtx" + + if [[ ! -d "$dir" ]]; then + log_warn "libevtx not cloned, skipping build" + return 1 + fi + + log_info "Building libevtx..." + + (cd "$dir" + # libevtx requires autotools + if [[ ! -f "configure" ]]; then + if ! command -v autoreconf &>/dev/null; then + log_err "autoreconf not found. Install autotools: brew install autoconf automake libtool" + return 1 + fi + ./synclibs.sh 2>/dev/null || true + autoreconf -fiv + fi + + if [[ ! -f "Makefile" ]]; then + ./configure --enable-silent-rules + fi + + make -j"$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)" + ) + + if [[ -x "$LIBEVTX_BIN" ]]; then + log_ok "libevtx built" + return 0 + else + log_warn "libevtx build may have failed, binary not found" + return 1 + fi +} + +HAVE_LIBEVTX=0 +if [[ $SKIP_LIBEVTX -eq 0 && $SKIP_BUILD -eq 0 ]]; then + if build_libevtx; then + HAVE_LIBEVTX=1 + fi +elif [[ $SKIP_LIBEVTX -eq 0 && -x "$LIBEVTX_BIN" ]]; then + HAVE_LIBEVTX=1 +fi + +############################################################################# +# Build: python-evtx +############################################################################# + +PYTHON_EVTX_SCRIPT="$BENCH_DIR/python-evtx/scripts/evtx_dump.py" + +setup_python_evtx() { + local dir="$BENCH_DIR/python-evtx" + + if [[ ! -d "$dir" ]]; then + log_warn "python-evtx not cloned, skipping" + return 1 + fi + + log_info "Setting up python-evtx..." + + # Create venv if not exists + if [[ ! -d "$dir/venv" ]]; then + python3 -m venv "$dir/venv" + fi + + # Install + ( + source "$dir/venv/bin/activate" + pip install -q -e "$dir" + ) + + log_ok "python-evtx ready" + return 0 +} + +HAVE_PYTHON_EVTX=0 +if [[ $SKIP_PYTHON -eq 0 && $SKIP_BUILD -eq 0 ]]; then + if setup_python_evtx; then + HAVE_PYTHON_EVTX=1 + fi +elif [[ $SKIP_PYTHON -eq 0 && -f "$PYTHON_EVTX_SCRIPT" ]]; then + HAVE_PYTHON_EVTX=1 +fi + +############################################################################# +# Build: golang-evtx (0xrawsec) +############################################################################# + +GOLANG_EVTX_BIN="$BENCH_DIR/golang-evtx/evtxdump" + +build_golang_evtx() { + local dir="$BENCH_DIR/golang-evtx" + + if [[ ! -d "$dir" ]]; then + log_warn "golang-evtx not cloned, skipping" + return 1 + fi + + if ! command -v go &>/dev/null; then + log_warn "Go not installed, skipping golang-evtx" + return 1 + fi + + log_info "Building golang-evtx..." + + (cd "$dir" + # Build the evtxdump tool + go build -o evtxdump ./tools/evtxdump/ + ) + + if [[ -x "$GOLANG_EVTX_BIN" ]]; then + log_ok "golang-evtx built" + return 0 + else + log_warn "golang-evtx build failed" + return 1 + fi +} + +HAVE_GOLANG_EVTX=0 +if [[ $SKIP_GOLANG_EVTX -eq 0 && $SKIP_BUILD -eq 0 ]]; then + if build_golang_evtx; then + HAVE_GOLANG_EVTX=1 + fi +elif [[ $SKIP_GOLANG_EVTX -eq 0 && -x "$GOLANG_EVTX_BIN" ]]; then + HAVE_GOLANG_EVTX=1 +fi + +############################################################################# +# Build: velocidex/evtx (Go) +############################################################################# + +VELOCIDEX_BIN="$BENCH_DIR/velocidex-evtx/bin/dump" + +build_velocidex() { + local dir="$BENCH_DIR/velocidex-evtx" + + if [[ ! -d "$dir" ]]; then + log_warn "velocidex/evtx not cloned, skipping" + return 1 + fi + + if ! command -v go &>/dev/null; then + log_warn "Go not installed, skipping velocidex/evtx" + return 1 + fi + + log_info "Building velocidex/evtx..." + + (cd "$dir" + mkdir -p bin + go build -o bin/dump ./bin/ + ) + + if [[ -x "$VELOCIDEX_BIN" ]]; then + log_ok "velocidex/evtx built" + return 0 + else + log_warn "velocidex/evtx build failed" + return 1 + fi +} + +HAVE_VELOCIDEX=0 +if [[ $SKIP_VELOCIDEX -eq 0 && $SKIP_BUILD -eq 0 ]]; then + if build_velocidex; then + HAVE_VELOCIDEX=1 + fi +elif [[ $SKIP_VELOCIDEX -eq 0 && -x "$VELOCIDEX_BIN" ]]; then + HAVE_VELOCIDEX=1 +fi + +############################################################################# +# Run Benchmarks +############################################################################# + +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +RESULT_PREFIX="$RESULTS_DIR/bench_${TIMESTAMP}" + +log_info "" +log_info "=== Running Benchmarks ===" +log_info "File: $EVTX_FILE ($FILE_SIZE)" +log_info "Results will be saved to: $RESULTS_DIR/" +log_info "" + +# Summary of what will be benchmarked +echo "Parsers to benchmark:" +[[ $SKIP_RUST -eq 0 ]] && echo " ✓ Rust evtx (this library)" +[[ $HAVE_LIBEVTX -eq 1 ]] && echo " ✓ libevtx (C)" || echo " ✗ libevtx (not available)" +[[ $HAVE_PYTHON_EVTX -eq 1 ]] && echo " ✓ python-evtx (CPython)" || echo " ✗ python-evtx (not available)" +[[ $HAVE_GOLANG_EVTX -eq 1 ]] && echo " ✓ golang-evtx (Go, multiprocessing)" || echo " ✗ golang-evtx (not available)" +[[ $HAVE_VELOCIDEX -eq 1 ]] && echo " ✓ velocidex/evtx (Go)" || echo " ✗ velocidex/evtx (not available)" +echo "" + +############################################################################# +# Benchmark: XML Output +############################################################################# + +run_xml_benchmark() { + local threads="$1" + log_info "--- XML Benchmark (threads=$threads) ---" + + local cmds=() + local names=() + + # Rust evtx (XML) + if [[ $SKIP_RUST -eq 0 && -x "$RUST_BIN" ]]; then + cmds+=("$RUST_BIN -t $threads -o xml '$EVTX_FILE' > /dev/null") + names+=("evtx-rust-xml-t$threads") + fi + + # libevtx (XML only, single-threaded) + if [[ $HAVE_LIBEVTX -eq 1 && "$threads" == "1" ]]; then + cmds+=("$LIBEVTX_BIN '$EVTX_FILE' > /dev/null") + names+=("libevtx-xml") + fi + + # python-evtx (XML, single-threaded, very slow) + if [[ $HAVE_PYTHON_EVTX -eq 1 && "$threads" == "1" ]]; then + local python_cmd="source $BENCH_DIR/python-evtx/venv/bin/activate && python $PYTHON_EVTX_SCRIPT '$EVTX_FILE' > /dev/null" + # Only include python-evtx for small files or explicit request + cmds+=("bash -c \"$python_cmd\"") + names+=("python-evtx-xml") + fi + + if [[ ${#cmds[@]} -eq 0 ]]; then + log_warn "No XML benchmarks to run for threads=$threads" + return + fi + + local hyperfine_args=() + for i in "${!cmds[@]}"; do + hyperfine_args+=(-n "${names[$i]}" "${cmds[$i]}") + done + + hyperfine -w "$WARMUP" -r "$RUNS" \ + "${hyperfine_args[@]}" \ + --export-json "${RESULT_PREFIX}_xml_t${threads}.json" \ + --export-markdown "${RESULT_PREFIX}_xml_t${threads}.md" \ + | tee "${RESULT_PREFIX}_xml_t${threads}.txt" +} + +############################################################################# +# Benchmark: JSON Output +############################################################################# + +run_json_benchmark() { + local threads="$1" + log_info "--- JSON Benchmark (threads=$threads) ---" + + local cmds=() + local names=() + + # Rust evtx (JSON) + if [[ $SKIP_RUST -eq 0 && -x "$RUST_BIN" ]]; then + cmds+=("$RUST_BIN -t $threads -o jsonl '$EVTX_FILE' > /dev/null") + names+=("evtx-rust-json-t$threads") + fi + + # golang-evtx (JSON, uses multiprocessing internally) + if [[ $HAVE_GOLANG_EVTX -eq 1 ]]; then + cmds+=("$GOLANG_EVTX_BIN '$EVTX_FILE' > /dev/null") + names+=("golang-evtx-json") + fi + + # velocidex/evtx (JSON only, single-threaded) + if [[ $HAVE_VELOCIDEX -eq 1 && "$threads" == "1" ]]; then + cmds+=("$VELOCIDEX_BIN '$EVTX_FILE' > /dev/null") + names+=("velocidex-evtx-json") + fi + + if [[ ${#cmds[@]} -eq 0 ]]; then + log_warn "No JSON benchmarks to run for threads=$threads" + return + fi + + local hyperfine_args=() + for i in "${!cmds[@]}"; do + hyperfine_args+=(-n "${names[$i]}" "${cmds[$i]}") + done + + hyperfine -w "$WARMUP" -r "$RUNS" \ + "${hyperfine_args[@]}" \ + --export-json "${RESULT_PREFIX}_json_t${threads}.json" \ + --export-markdown "${RESULT_PREFIX}_json_t${threads}.md" \ + | tee "${RESULT_PREFIX}_json_t${threads}.txt" +} + +############################################################################# +# Main benchmark loop +############################################################################# + +for threads in $THREADS_LIST; do + run_xml_benchmark "$threads" + run_json_benchmark "$threads" +done + +############################################################################# +# Generate Summary +############################################################################# + +log_info "" +log_info "=== Benchmark Complete ===" +log_info "Results saved to: $RESULTS_DIR/" +log_info "" + +# Create summary markdown +SUMMARY_FILE="${RESULT_PREFIX}_summary.md" +{ + echo "# EVTX Parser Benchmark Results" + echo "" + echo "**Date:** $(date)" + echo "**File:** $EVTX_FILE ($FILE_SIZE)" + echo "**System:** $(uname -srm)" + echo "**CPU:** $(sysctl -n machdep.cpu.brand_string 2>/dev/null || lscpu 2>/dev/null | grep 'Model name' | cut -d: -f2 | xargs || echo 'Unknown')" + echo "" + echo "## Results" + echo "" + + for f in "${RESULT_PREFIX}"_*.md; do + if [[ -f "$f" && "$f" != "$SUMMARY_FILE" ]]; then + echo "### $(basename "$f" .md | sed 's/_/ /g')" + echo "" + cat "$f" + echo "" + fi + done +} > "$SUMMARY_FILE" + +log_ok "Summary written to: $SUMMARY_FILE" + +# Print quick summary +echo "" +echo "=== Quick Results ===" +for f in "${RESULT_PREFIX}"_*.txt; do + [[ -f "$f" ]] && cat "$f" +done diff --git a/scripts/bench_utf16_escape_matrix.sh b/scripts/bench_utf16_escape_matrix.sh new file mode 100755 index 00000000..da164a1e --- /dev/null +++ b/scripts/bench_utf16_escape_matrix.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +ZIG_ROOT="${ZIG_ROOT:-/Users/omerba/Workspace/zig-evtx}" + +OUT_DIR="${OUT_DIR:-$ROOT/perf}" +DATA_PATH="${DATA_PATH:-$OUT_DIR/utf16_escape_data.bin}" +RUST_OUT="${RUST_OUT:-$OUT_DIR/utf16_escape_rust.csv}" +ZIG_OUT="${ZIG_OUT:-$OUT_DIR/utf16_escape_zig.csv}" +MATRIX_OUT="${MATRIX_OUT:-$OUT_DIR/utf16_escape_matrix.md}" + +mkdir -p "$OUT_DIR" + +python3 "$ROOT/scripts/gen_utf16_escape_dataset.py" --out "$DATA_PATH" + +(cd "$ROOT" && cargo build --release --bin bench_utf16_escape_matrix) +(cd "$ZIG_ROOT" && zig build -Doptimize=ReleaseFast) + +"$ROOT/target/release/bench_utf16_escape_matrix" --data "$DATA_PATH" > "$RUST_OUT" +"$ZIG_ROOT/zig-out/bin/bench_utf16_escape_matrix" --data "$DATA_PATH" > "$ZIG_OUT" + +python3 "$ROOT/scripts/merge_utf16_escape_matrix.py" \ + --rust "$RUST_OUT" \ + --zig "$ZIG_OUT" \ + --out "$MATRIX_OUT" + +echo "Rust CSV: $RUST_OUT" +echo "Zig CSV: $ZIG_OUT" +echo "Matrix : $MATRIX_OUT" diff --git a/scripts/download_evtx_samples.sh b/scripts/download_evtx_samples.sh new file mode 100755 index 00000000..dc771838 --- /dev/null +++ b/scripts/download_evtx_samples.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# +# Download sample EVTX files for benchmarking +# +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +SAMPLES_DIR="${SAMPLES_DIR:-$ROOT/samples}" + +mkdir -p "$SAMPLES_DIR" + +echo "Downloading sample EVTX files to $SAMPLES_DIR..." +echo "" + +# Option 1: EVTX-ATTACK-SAMPLES (many small files, good variety) +ATTACK_SAMPLES_DIR="$SAMPLES_DIR/EVTX-ATTACK-SAMPLES" +if [[ ! -d "$ATTACK_SAMPLES_DIR" ]]; then + echo "Cloning EVTX-ATTACK-SAMPLES (many diverse samples)..." + git clone --depth 1 https://github.com/sbousseaden/EVTX-ATTACK-SAMPLES.git "$ATTACK_SAMPLES_DIR" + echo "Done!" +else + echo "EVTX-ATTACK-SAMPLES already exists" +fi + +# Count total size +TOTAL_SIZE=$(du -sh "$ATTACK_SAMPLES_DIR" 2>/dev/null | cut -f1) +EVTX_COUNT=$(find "$ATTACK_SAMPLES_DIR" -name "*.evtx" 2>/dev/null | wc -l | xargs) + +echo "" +echo "=== Downloaded Samples ===" +echo "Location: $ATTACK_SAMPLES_DIR" +echo "Total size: $TOTAL_SIZE" +echo "EVTX files: $EVTX_COUNT" +echo "" + +# Find largest file for benchmarking +echo "Largest EVTX files (best for benchmarking):" +find "$ATTACK_SAMPLES_DIR" -name "*.evtx" -exec du -h {} \; 2>/dev/null | sort -rh | head -10 + +# Create a combined file for bigger benchmarks +echo "" +echo "=== Creating Combined Test File ===" + +COMBINED_FILE="$SAMPLES_DIR/combined_samples.evtx" +if [[ ! -f "$COMBINED_FILE" ]]; then + # Just use the largest file we find + LARGEST=$(find "$ATTACK_SAMPLES_DIR" -name "*.evtx" -exec du -b {} \; 2>/dev/null | sort -rn | head -1 | cut -f2) + if [[ -n "$LARGEST" ]]; then + cp "$LARGEST" "$COMBINED_FILE" + echo "Copied largest file as: $COMBINED_FILE" + echo "Size: $(du -h "$COMBINED_FILE" | cut -f1)" + fi +else + echo "Combined file already exists: $COMBINED_FILE ($(du -h "$COMBINED_FILE" | cut -f1))" +fi + +echo "" +echo "=== Next Steps ===" +echo "" +echo "For quick benchmarks, use any .evtx file:" +echo " ./scripts/bench_parsers.sh --file samples/EVTX-ATTACK-SAMPLES/some_file.evtx" +echo "" +echo "For comprehensive benchmarks, you may want a larger (~30MB+) Security.evtx file." +echo "These typically come from Windows machines with audit logging enabled." +echo "" +echo "To export from a Windows machine:" +echo " wevtutil epl Security C:\\path\\to\\security.evtx" +echo "" +echo "Or download additional samples from:" +echo " - https://github.com/NextronSystems/evtx-baseline" +echo " - Your own Windows test environments" diff --git a/scripts/gen_utf16_escape_dataset.py b/scripts/gen_utf16_escape_dataset.py new file mode 100755 index 00000000..e94786a5 --- /dev/null +++ b/scripts/gen_utf16_escape_dataset.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +""" +Generate a shared UTF-16LE escape benchmark dataset for Rust + Zig. + +Binary format (little-endian): + - magic: b"UTFB" + - u32 version (1) + - u32 case_count + - repeated cases: + - u32 pattern_len + - pattern bytes (utf-8) + - u32 length_units + - u32 byte_len + - utf16le bytes +""" + +from __future__ import annotations + +import argparse +import struct +from dataclasses import dataclass + + +@dataclass(frozen=True) +class Case: + pattern: str + length_units: int + utf16le: bytes + + +def _repeat_units(units: list[int], length: int) -> list[int]: + out: list[int] = [] + if not units: + return out + i = 0 + while len(out) < length: + out.append(units[i % len(units)]) + i += 1 + return out + + +def _build_cases(lengths: list[int]) -> list[Case]: + patterns: dict[str, list[int]] = { + "ascii_plain": [0x0061], # 'a' + "ascii_esc": [ + 0x0022, # " + 0x005C, # \ + 0x000A, # \n + 0x000D, # \r + 0x0009, # \t + 0x0008, # \b + 0x000C, # \f + 0x001F, # unit separator + 0x0041, # A + ], + "latin1": [0x00E9], # é + "bmp": [0x2603], # ☃ + "surrogate": [0xD83D, 0xDE00], # 😀 + } + + cases: list[Case] = [] + for pattern, units in patterns.items(): + for length in lengths: + if length <= 0: + continue + if pattern == "surrogate": + # Fill with full surrogate pairs; if odd, pad final unit with 'A'. + pair_count = length // 2 + out_units: list[int] = [] + for _ in range(pair_count): + out_units.extend(units) + if length % 2 == 1: + out_units.append(0x0041) + else: + out_units = _repeat_units(units, length) + + utf16le = b"".join(struct.pack(" int: + ap = argparse.ArgumentParser() + ap.add_argument("--out", required=True, help="Output dataset path") + ap.add_argument( + "--lengths", + default="1,2,3,4,5,8,12,16,24,32,48,64,96,128", + help="Comma-separated UTF-16 unit lengths", + ) + args = ap.parse_args() + + lengths = [int(x) for x in args.lengths.split(",") if x.strip()] + cases = _build_cases(lengths) + + with open(args.out, "wb") as f: + f.write(b"UTFB") + f.write(struct.pack(" dict[tuple[str, int], dict[str, float]]: + out: dict[tuple[str, int], dict[str, float]] = {} + with open(path, "r", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + pattern = row["pattern"] + length = int(row["length"]) + out[(pattern, length)] = { + "ns_per_iter": float(row["ns_per_iter"]), + "ns_per_unit": float(row["ns_per_unit"]), + } + return out + + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--rust", required=True) + ap.add_argument("--zig", required=True) + ap.add_argument("--out", required=True) + args = ap.parse_args() + + rust = load_csv(args.rust) + zig = load_csv(args.zig) + + patterns = sorted({p for (p, _l) in rust.keys() | zig.keys()}) + lengths_by_pattern: dict[str, list[int]] = defaultdict(list) + for (p, l) in rust.keys() | zig.keys(): + lengths_by_pattern[p].append(l) + for p in patterns: + lengths_by_pattern[p] = sorted(set(lengths_by_pattern[p])) + + with open(args.out, "w", encoding="utf-8") as f: + f.write("| pattern | length | rust ns/unit | zig ns/unit | ratio |\n") + f.write("| --- | ---: | ---: | ---: | ---: |\n") + for p in patterns: + for l in lengths_by_pattern[p]: + r = rust.get((p, l)) + z = zig.get((p, l)) + if not r or not z: + continue + ratio = r["ns_per_unit"] / z["ns_per_unit"] if z["ns_per_unit"] else 0.0 + f.write( + f"| {p} | {l} | {r['ns_per_unit']:.6f} | {z['ns_per_unit']:.6f} | {ratio:.2f} |\n" + ) + + print(f"Wrote {args.out}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/perf_evtx_dump.sh b/scripts/perf_evtx_dump.sh new file mode 100755 index 00000000..d32301c6 --- /dev/null +++ b/scripts/perf_evtx_dump.sh @@ -0,0 +1,125 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +FILE="$ROOT/samples/security_big_sample.evtx" +THREADS=1 +WARMUP=10 +RUNS=10 +PROFILE_LOOPS=20 +WEIGHT="cpu" +OUT_DIR="$ROOT/perf" +SKIP_BUILD=0 +SKIP_SAMPLY=0 +SKIP_HYPERFINE=0 +NO_REDIRECT=0 +RUST_FEATURES="fast-alloc" + +ZIG_ROOT="${ZIG_ROOT:-/Users/omerba/Workspace/zig-evtx}" +ZIG_BIN="${ZIG_BIN:-$ZIG_ROOT/zig-out/bin/evtx_dump_zig}" +ZIG_LOOP_BIN="${ZIG_LOOP_BIN:-$ZIG_ROOT/zig-out/bin/evtx_dump_loop}" + +usage() { + cat <<'EOF' +Usage: scripts/perf_evtx_dump.sh [options] + +Options: + --file EVTX input file (default: samples/security_big_sample.evtx) + --threads Threads for evtx_dump (default: 1) + --warmup Hyperfine warmup runs (default: 10) + --runs Hyperfine runs (default: 10) + --profile-loops Loop count for samply profile (default: 20) + --no-redirect Do not redirect output to /dev/null + --features Cargo features (default: fast-alloc; `bench` auto-added unless --skip-samply) + --weight Weight mode for tables (default: cpu) + --out-dir Output directory (default: perf) + --skip-build Skip cargo/zig builds + --skip-hyperfine Skip hyperfine run + --skip-samply Skip samply profiles + table extraction +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --file) FILE="$2"; shift 2;; + --threads) THREADS="$2"; shift 2;; + --warmup) WARMUP="$2"; shift 2;; + --runs) RUNS="$2"; shift 2;; + --profile-loops) PROFILE_LOOPS="$2"; shift 2;; + --no-redirect) NO_REDIRECT=1; shift;; + --features) RUST_FEATURES="$2"; shift 2;; + --weight) WEIGHT="$2"; shift 2;; + --out-dir) OUT_DIR="$2"; shift 2;; + --skip-build) SKIP_BUILD=1; shift;; + --skip-hyperfine) SKIP_HYPERFINE=1; shift;; + --skip-samply) SKIP_SAMPLY=1; shift;; + -h|--help) usage; exit 0;; + *) echo "Unknown arg: $1"; usage; exit 1;; + esac +done + +RUST_BIN="$ROOT/target/release/evtx_dump" +RUST_LOOP_BIN="$ROOT/target/release/bench_evtx_dump_loop" + +# `bench_evtx_dump_loop` is gated behind the `bench` feature. +if [[ $SKIP_SAMPLY -eq 0 && " $RUST_FEATURES " != *" bench "* ]]; then + RUST_FEATURES="${RUST_FEATURES} bench" +fi + +REDIRECT="> /dev/null" +if [[ $NO_REDIRECT -eq 1 ]]; then + REDIRECT="" +fi + +mkdir -p "$OUT_DIR" + +if [[ $SKIP_BUILD -eq 0 ]]; then + if [[ -n "$RUST_FEATURES" ]]; then + (cd "$ROOT" && cargo build --release --features "$RUST_FEATURES" --bin evtx_dump --bin bench_evtx_dump_loop) + else + (cd "$ROOT" && cargo build --release --bin evtx_dump --bin bench_evtx_dump_loop) + fi + (cd "$ZIG_ROOT" && zig build -Doptimize=ReleaseFast) +fi + +RUST_CMD="$RUST_BIN -t $THREADS -o jsonl $FILE" +ZIG_CMD="$ZIG_BIN --no-checks -t $THREADS -o jsonl $FILE" + +if [[ $SKIP_HYPERFINE -eq 0 ]]; then + hyperfine -w "$WARMUP" -r "$RUNS" \ + "$RUST_CMD $REDIRECT" \ + "$ZIG_CMD $REDIRECT" \ + --export-json "$OUT_DIR/hyperfine_evtx_dump.json" \ + | tee "$OUT_DIR/hyperfine_evtx_dump.txt" +fi + +if [[ $SKIP_SAMPLY -eq 0 ]]; then + samply record --save-only --unstable-presymbolicate \ + -o "$OUT_DIR/rust_evtx_dump.json.gz" -- \ + "$RUST_LOOP_BIN" --file "$FILE" --loops "$PROFILE_LOOPS" --threads "$THREADS" + + if [[ -x "$ZIG_BIN" && -x "$ZIG_LOOP_BIN" ]]; then + samply record --save-only --unstable-presymbolicate \ + -o "$OUT_DIR/zig_evtx_dump.json.gz" -- \ + "$ZIG_LOOP_BIN" --file "$FILE" --loops "$PROFILE_LOOPS" --threads "$THREADS" + else + echo "Skipping Zig samply profile (set ZIG_LOOP_BIN to a loop-capable binary)." >&2 + fi + + python3 "$ROOT/scripts/samply_extract_tables.py" \ + --profile "$OUT_DIR/rust_evtx_dump.json.gz" \ + --syms "$OUT_DIR/rust_evtx_dump.json.syms.json" \ + --out-dir "$OUT_DIR" \ + --label rust_evtx_dump \ + --weight "$WEIGHT" + + if [[ -f "$OUT_DIR/zig_evtx_dump.json.gz" ]]; then + python3 "$ROOT/scripts/samply_extract_tables.py" \ + --profile "$OUT_DIR/zig_evtx_dump.json.gz" \ + --syms "$OUT_DIR/zig_evtx_dump.json.syms.json" \ + --out-dir "$OUT_DIR" \ + --label zig_evtx_dump \ + --weight "$WEIGHT" + fi +fi diff --git a/scripts/perf_tree_build.sh b/scripts/perf_tree_build.sh new file mode 100755 index 00000000..98b8d9db --- /dev/null +++ b/scripts/perf_tree_build.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +FILE="$ROOT/samples/security.evtx" +LOOPS=500000 +PROFILE_LOOPS=500000 +WEIGHT="cpu" +OUT_DIR="$ROOT/perf" +SKIP_BUILD=0 +SKIP_SAMPLY=0 +SKIP_HYPERFINE=0 + +ZIG_ROOT="${ZIG_ROOT:-/Users/omerba/Workspace/zig-evtx}" +ZIG_BIN="${ZIG_BIN:-$ZIG_ROOT/zig-out/bin/bench_tree_build_loop}" + +usage() { + cat <<'EOF' +Usage: scripts/perf_tree_build.sh [options] + +Options: + --file EVTX input file (default: samples/security.evtx) + --loops Loop count for hyperfine (default: 500000) + --profile-loops Loop count for samply profiles (default: 500000) + --weight Weight mode for tables (default: cpu) + --out-dir Output directory (default: perf) + --skip-build Skip cargo/zig builds + --skip-hyperfine Skip hyperfine run + --skip-samply Skip samply profiles + table extraction +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --file) FILE="$2"; shift 2;; + --loops) LOOPS="$2"; shift 2;; + --profile-loops) PROFILE_LOOPS="$2"; shift 2;; + --weight) WEIGHT="$2"; shift 2;; + --out-dir) OUT_DIR="$2"; shift 2;; + --skip-build) SKIP_BUILD=1; shift;; + --skip-hyperfine) SKIP_HYPERFINE=1; shift;; + --skip-samply) SKIP_SAMPLY=1; shift;; + -h|--help) usage; exit 0;; + *) echo "Unknown arg: $1"; usage; exit 1;; + esac +done + +RUST_BIN="$ROOT/target/release/bench_tree_build" +RUST_BIN_DIRECT="$ROOT/target/release/bench_tree_build_direct" + +mkdir -p "$OUT_DIR" + +if [[ $SKIP_BUILD -eq 0 ]]; then + (cd "$ROOT" && cargo build --release --features bench) + (cd "$ZIG_ROOT" && zig build -Doptimize=ReleaseFast) +fi + +if [[ $SKIP_HYPERFINE -eq 0 ]]; then + hyperfine -w 3 -r 8 \ + "$RUST_BIN --file $FILE --loops $LOOPS" \ + "$RUST_BIN_DIRECT --file $FILE --loops $LOOPS" \ + "$ZIG_BIN --file $FILE --loops $LOOPS" \ + --export-json "$OUT_DIR/hyperfine_tree_build.json" \ + | tee "$OUT_DIR/hyperfine_tree_build.txt" +fi + +if [[ $SKIP_SAMPLY -eq 0 ]]; then + samply record --save-only --unstable-presymbolicate \ + -o "$OUT_DIR/rust_tree_build_loop.json.gz" -- \ + "$RUST_BIN" --file "$FILE" --loops "$PROFILE_LOOPS" > /dev/null + + samply record --save-only --unstable-presymbolicate \ + -o "$OUT_DIR/rust_tree_build_direct_loop.json.gz" -- \ + "$RUST_BIN_DIRECT" --file "$FILE" --loops "$PROFILE_LOOPS" > /dev/null + + samply record --save-only --unstable-presymbolicate \ + -o "$OUT_DIR/zig_tree_build_loop.json.gz" -- \ + "$ZIG_BIN" --file "$FILE" --loops "$PROFILE_LOOPS" > /dev/null + + python3 "$ROOT/scripts/samply_extract_tables.py" \ + --profile "$OUT_DIR/rust_tree_build_loop.json.gz" \ + --syms "$OUT_DIR/rust_tree_build_loop.json.syms.json" \ + --out-dir "$OUT_DIR" \ + --label rust_tree_build_loop \ + --weight "$WEIGHT" + + python3 "$ROOT/scripts/samply_extract_tables.py" \ + --profile "$OUT_DIR/rust_tree_build_direct_loop.json.gz" \ + --syms "$OUT_DIR/rust_tree_build_direct_loop.json.syms.json" \ + --out-dir "$OUT_DIR" \ + --label rust_tree_build_direct_loop \ + --weight "$WEIGHT" + + python3 "$ROOT/scripts/samply_extract_tables.py" \ + --profile "$OUT_DIR/zig_tree_build_loop.json.gz" \ + --syms "$OUT_DIR/zig_tree_build_loop.json.syms.json" \ + --out-dir "$OUT_DIR" \ + --label zig_tree_build_loop \ + --weight "$WEIGHT" +fi diff --git a/scripts/samply_extract_tables.py b/scripts/samply_extract_tables.py new file mode 100755 index 00000000..6dfdab7f --- /dev/null +++ b/scripts/samply_extract_tables.py @@ -0,0 +1,457 @@ +#!/usr/bin/env python3 +""" +Extract simple markdown tables from a samply (Firefox Profiler) JSON profile + syms sidecar. + +Outputs (matching PERF.md conventions): + - top_leaves_