diff --git a/Cargo.lock b/Cargo.lock index 543b56dec6..07469f5cf1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1219,6 +1219,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + [[package]] name = "embedding" version = "37.0.0" @@ -2759,7 +2765,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a55c51ee6c0db07e68448e336cf8ea4131a620edefebf9893e759b2d793420f8" dependencies = [ "cobs", - "embedded-io", + "embedded-io 0.4.0", "serde", ] @@ -4455,6 +4461,7 @@ dependencies = [ "cc", "cfg-if", "cranelift-native", + "embedded-io 0.6.1", "encoding_rs", "env_logger 0.11.5", "futures", @@ -4480,6 +4487,7 @@ dependencies = [ "serde", "serde_derive", "serde_json", + "sha2", "smallvec", "target-lexicon", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index 5fb9e36bd0..55d7c92da1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -523,6 +523,9 @@ gc-drc = ["gc", "wasmtime/gc-drc", "wasmtime-cli-flags/gc-drc"] gc-null = ["gc", "wasmtime/gc-null", "wasmtime-cli-flags/gc-null"] pulley = ["wasmtime-cli-flags/pulley"] stack-switching = ["wasmtime/stack-switching", "wasmtime-cli-flags/stack-switching"] +rr = ["wasmtime/rr", "wasmtime-cli-flags/rr"] +rr-component = ["rr", "wasmtime/rr-component"] +rr-validate = ["rr", "wasmtime/rr-validate", "wasmtime-cli-flags/rr-validate"] # CLI subcommands for the `wasmtime` executable. See `wasmtime $cmd --help` # for more information on each subcommand. diff --git a/crates/cli-flags/Cargo.toml b/crates/cli-flags/Cargo.toml index 1469c9535b..062dd85b14 100644 --- a/crates/cli-flags/Cargo.toml +++ b/crates/cli-flags/Cargo.toml @@ -40,3 +40,5 @@ threads = ["wasmtime/threads"] memory-protection-keys = ["wasmtime/memory-protection-keys"] pulley = ["wasmtime/pulley"] stack-switching = ["wasmtime/stack-switching"] +rr = ["wasmtime/rr"] +rr-validate = ["wasmtime/rr-validate"] diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index 64f109f349..49a220c15d 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -484,6 +484,25 @@ wasmtime_option_group! { } } +wasmtime_option_group! { + #[derive(PartialEq, Clone, Deserialize)] + #[serde(rename_all = "kebab-case", deny_unknown_fields)] + pub struct RecordOptions { + /// Filesystem endpoint to store the recorded execution trace + pub path: Option, + /// Include (optional) signatures to facilitate validation checks during replay + /// (see `wasmtime replay` for details). + pub validation_metadata: Option, + /// Window size of internal buffering for record events (large windows offer more opportunities + /// for coalescing events at the cost of memory usage). + pub event_window_size: Option, + } + + enum Record { + ... + } +} + #[derive(Debug, Clone, PartialEq)] pub struct WasiNnGraph { pub format: String, @@ -534,6 +553,18 @@ pub struct CommonOptions { #[serde(skip)] wasi_raw: Vec>, + /// Options to enable and configure execution recording, `-R help` to see all. + /// + /// Generates of a serialized trace of the Wasm module execution that captures all + /// non-determinism observable by the module. This trace can subsequently be + /// re-executed in a determinstic, embedding-agnostic manner (see the `wasmtime replay` command). + /// + /// Note: Minimal configs for deterministic Wasm semantics will be + /// enforced during recording by default (NaN canonicalization, deterministic relaxed SIMD) + #[arg(short = 'R', long = "record", value_name = "KEY[=VAL[,..]]")] + #[serde(skip)] + record_raw: Vec>, + // These fields are filled in by the `configure` method below via the // options parsed from the CLI above. This is what the CLI should use. #[arg(skip)] @@ -560,6 +591,10 @@ pub struct CommonOptions { #[serde(rename = "wasi", default)] pub wasi: WasiOptions, + #[arg(skip)] + #[serde(rename = "record", default)] + pub record: RecordOptions, + /// The target triple; default is the host triple #[arg(long, value_name = "TARGET")] #[serde(skip)] @@ -606,12 +641,14 @@ impl CommonOptions { debug_raw: Vec::new(), wasm_raw: Vec::new(), wasi_raw: Vec::new(), + record_raw: Vec::new(), configured: true, opts: Default::default(), codegen: Default::default(), debug: Default::default(), wasm: Default::default(), wasi: Default::default(), + record: Default::default(), target: None, config: None, } @@ -629,12 +666,14 @@ impl CommonOptions { self.debug = toml_options.debug; self.wasm = toml_options.wasm; self.wasi = toml_options.wasi; + self.record = toml_options.record; } self.opts.configure_with(&self.opts_raw); self.codegen.configure_with(&self.codegen_raw); self.debug.configure_with(&self.debug_raw); self.wasm.configure_with(&self.wasm_raw); self.wasi.configure_with(&self.wasi_raw); + self.record.configure_with(&self.record_raw); Ok(()) } @@ -979,6 +1018,35 @@ impl CommonOptions { true => err, } + let record = &self.record; + match_feature! { + ["rr" : record.path.clone()] + path => { + use std::{io::BufWriter, sync::Arc}; + use wasmtime::{RecordConfig, RecordSettings}; + let default_settings = RecordSettings::default(); + match_feature! { + ["rr-validate": record.validation_metadata] + _v => (), + _ => err, + } + config.enable_record(RecordConfig { + writer_initializer: Arc::new(move || { + Box::new(BufWriter::new(fs::File::create(&path).unwrap())) + }), + settings: RecordSettings { + add_validation: record + .validation_metadata + .unwrap_or(default_settings.add_validation), + event_window_size: record + .event_window_size + .unwrap_or(default_settings.event_window_size), + }, + })? + }, + _ => err, + } + Ok(config) } @@ -1093,6 +1161,7 @@ mod tests { [debug] [wasm] [wasi] + [record] "#; let mut common_options: CommonOptions = toml::from_str(basic_toml).unwrap(); common_options.config(None).unwrap(); @@ -1214,6 +1283,8 @@ impl fmt::Display for CommonOptions { wasm, wasi_raw, wasi, + record_raw, + record, configured, target, config, @@ -1230,6 +1301,7 @@ impl fmt::Display for CommonOptions { let wasi_flags; let wasm_flags; let debug_flags; + let record_flags; if *configured { codegen_flags = codegen.to_options(); @@ -1237,6 +1309,7 @@ impl fmt::Display for CommonOptions { wasi_flags = wasi.to_options(); wasm_flags = wasm.to_options(); opts_flags = opts.to_options(); + record_flags = record.to_options(); } else { codegen_flags = codegen_raw .iter() @@ -1247,6 +1320,11 @@ impl fmt::Display for CommonOptions { wasi_flags = wasi_raw.iter().flat_map(|t| t.0.iter()).cloned().collect(); wasm_flags = wasm_raw.iter().flat_map(|t| t.0.iter()).cloned().collect(); opts_flags = opts_raw.iter().flat_map(|t| t.0.iter()).cloned().collect(); + record_flags = record_raw + .iter() + .flat_map(|t| t.0.iter()) + .cloned() + .collect(); } for flag in codegen_flags { @@ -1264,6 +1342,9 @@ impl fmt::Display for CommonOptions { for flag in debug_flags { write!(f, "-D{flag} ")?; } + for flag in record_flags { + write!(f, "-R{flag} ")?; + } Ok(()) } diff --git a/crates/environ/src/component/artifacts.rs b/crates/environ/src/component/artifacts.rs index 096ea839be..f32cf95e58 100644 --- a/crates/environ/src/component/artifacts.rs +++ b/crates/environ/src/component/artifacts.rs @@ -18,6 +18,8 @@ pub struct ComponentArtifacts { pub types: ComponentTypes, /// Serialized metadata about all included core wasm modules. pub static_modules: PrimaryMap, + /// A SHA-256 checksum of the source Wasm binary from which the component was compiled + pub checksum: [u8; 32], } /// Runtime state that a component retains to support its operation. diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index f8f67e81aa..b08ceee230 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -62,6 +62,8 @@ hashbrown = { workspace = true, features = ["default-hasher"] } bitflags = { workspace = true } futures = { workspace = true, features = ["alloc"], optional = true } bytes = { workspace = true, optional = true } +embedded-io = { version = "0.6.1", features = ["alloc"], optional = true } +sha2 = { version = "0.10.2", default-features = false } [target.'cfg(target_os = "windows")'.dependencies.windows-sys] workspace = true @@ -402,3 +404,13 @@ component-model-async-bytes = [ "component-model-async", "dep:bytes", ] + +# Enables support for record/replay for components +rr-component = ["component-model", "rr"] +# Enable support for the common base infrastructure of record/replay +# By default, this supports core wasm `rr`. For components, enable `rr-component` +rr = ["dep:embedded-io"] + +# Enables record/replay with support for additional validation signatures/checks. +rr-validate = ["rr"] + diff --git a/crates/wasmtime/src/compile.rs b/crates/wasmtime/src/compile.rs index 9db99d0f9d..68efb82125 100644 --- a/crates/wasmtime/src/compile.rs +++ b/crates/wasmtime/src/compile.rs @@ -26,6 +26,8 @@ use crate::Engine; use crate::hash_map::HashMap; use crate::hash_set::HashSet; use crate::prelude::*; +#[cfg(feature = "component-model")] +use sha2::{Digest, Sha256}; use std::{ any::Any, borrow::Cow, @@ -208,6 +210,7 @@ pub(crate) fn build_component_artifacts( ty, types, static_modules: compilation_artifacts.modules, + checksum: Sha256::digest(binary).into(), }; object.serialize_info(&artifacts); diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 61b3e6bf3c..a842375292 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -3,6 +3,8 @@ use alloc::sync::Arc; use bitflags::Flags; use core::fmt; use core::str::FromStr; +#[cfg(feature = "rr")] +use serde::{Deserialize, Serialize}; #[cfg(any(feature = "cache", feature = "cranelift", feature = "winch"))] use std::path::Path; use wasmparser::WasmFeatures; @@ -24,6 +26,8 @@ use crate::stack::{StackCreator, StackCreatorProxy}; #[cfg(feature = "async")] use wasmtime_fiber::RuntimeFiberStackCreator; +#[cfg(feature = "rr")] +use crate::rr::{RecordWriter, ReplayReader}; #[cfg(feature = "runtime")] pub use crate::runtime::code_memory::CustomCodeMemory; #[cfg(feature = "cache")] @@ -99,6 +103,17 @@ impl core::hash::Hash for ModuleVersionStrategy { } } +impl ModuleVersionStrategy { + /// Get the string-encoding version of the module. + pub fn as_str(&self) -> &str { + match &self { + Self::WasmtimeVersion => env!("CARGO_PKG_VERSION"), + Self::Custom(c) => c, + Self::None => "", + } + } +} + /// Global configuration options used to create an [`Engine`](crate::Engine) /// and customize its behavior. /// @@ -163,6 +178,8 @@ pub struct Config { pub(crate) coredump_on_trap: bool, pub(crate) macos_use_mach_ports: bool, pub(crate) detect_host_feature: Option Option>, + #[cfg(feature = "rr")] + pub(crate) rr: Option, } /// User-provided configuration for the compiler. @@ -219,6 +236,112 @@ impl Default for CompilerConfig { } } +/// Settings for execution recording. +#[cfg(feature = "rr")] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RecordSettings { + /// Flag to include additional signatures for replay validation. + pub add_validation: bool, + /// Maximum window size of internal event buffer. + pub event_window_size: usize, +} + +#[cfg(feature = "rr")] +impl Default for RecordSettings { + fn default() -> Self { + Self { + add_validation: false, + event_window_size: 16, + } + } +} + +/// Configuration for recording execution. +#[cfg(feature = "rr")] +#[derive(Clone)] +pub struct RecordConfig { + /// Closure that generates a writer for recording execution traces. + pub writer_initializer: Arc Box + Send + Sync>, + /// Associated metadata for configuring the recording strategy. + pub settings: RecordSettings, +} + +/// Settings for execution replay. +#[cfg(feature = "rr")] +#[derive(Debug, Clone)] +pub struct ReplaySettings { + /// Flag to include additional signatures for replay validation. + pub validate: bool, + /// Static buffer size for deserialization of variable-length types (like [String]). + pub deser_buffer_size: usize, +} + +#[cfg(feature = "rr")] +impl Default for ReplaySettings { + fn default() -> Self { + Self { + validate: false, + deser_buffer_size: 64, + } + } +} + +/// Configuration for replay execution. +#[cfg(feature = "rr")] +#[derive(Clone)] +pub struct ReplayConfig { + /// Closure that generates a reader for replaying execution traces. + pub reader_initializer: Arc Box + Send + Sync>, + /// Flag for dynamic validation checks when replaying events. + pub settings: ReplaySettings, +} + +/// Configurations for record/replay (RR) executions. +#[cfg(feature = "rr")] +#[derive(Clone)] +pub enum RRConfig { + /// Record configuration. + Record(RecordConfig), + /// Replay configuration. + Replay(ReplayConfig), +} + +#[cfg(feature = "rr")] +impl From for RRConfig { + fn from(value: RecordConfig) -> Self { + Self::Record(value) + } +} + +#[cfg(feature = "rr")] +impl From for RRConfig { + fn from(value: ReplayConfig) -> Self { + Self::Replay(value) + } +} + +#[cfg(feature = "rr")] +impl RRConfig { + /// Obtain the record configuration. + /// + /// Return [`None`] if it is not configured. + pub fn record(&self) -> Option<&RecordConfig> { + match self { + Self::Record(r) => Some(r), + _ => None, + } + } + /// Obtain the replay configuration. + /// + /// Return [`None`] if it is not configured. + pub fn replay(&self) -> Option<&ReplayConfig> { + match self { + Self::Replay(r) => Some(r), + _ => None, + } + } +} + impl Config { /// Creates a new configuration object with the default configuration /// specified. @@ -271,6 +394,8 @@ impl Config { detect_host_feature: Some(detect_host_feature), #[cfg(not(feature = "std"))] detect_host_feature: None, + #[cfg(feature = "rr")] + rr: None, }; #[cfg(any(feature = "cranelift", feature = "winch"))] { @@ -1001,6 +1126,11 @@ impl Config { /// /// [proposal]: https://github.com/webassembly/relaxed-simd pub fn relaxed_simd_deterministic(&mut self, enable: bool) -> &mut Self { + #[cfg(feature = "rr")] + assert!( + !(self.is_determinism_enforced() && !enable), + "Deterministic relaxed SIMD cannot be disabled when record/replay is enabled" + ); self.tunables.relaxed_simd_deterministic = Some(enable); self } @@ -1318,6 +1448,11 @@ impl Config { /// The default value for this is `false` #[cfg(any(feature = "cranelift", feature = "winch"))] pub fn cranelift_nan_canonicalization(&mut self, enable: bool) -> &mut Self { + #[cfg(feature = "rr")] + assert!( + !(self.is_determinism_enforced() && !enable), + "NaN canonicalization cannot be disabled when record/replay is enabled" + ); let val = if enable { "true" } else { "false" }; self.compiler_config .settings @@ -2675,6 +2810,91 @@ impl Config { self.tunables.signals_based_traps = Some(enable); self } + + /// Enforce deterministic execution configurations. Currently, means the following: + /// * Enabling NaN canonicalization with [`Config::cranelift_nan_canonicalization`] + /// * Enabling deterministic relaxed SIMD with [`Config::relaxed_simd_deterministic`] + /// + /// Required for faithful record/replay execution. + #[inline] + pub fn enforce_determinism(&mut self) -> &mut Self { + #[cfg(any(feature = "cranelift", feature = "winch"))] + self.cranelift_nan_canonicalization(true); + self.relaxed_simd_deterministic(true); + self + } + + /// Remove determinstic execution enforcements (if any) applied + /// by [`Config::enforce_determinism`]. + #[inline] + pub fn remove_determinism_enforcement(&mut self) -> &mut Self { + #[cfg(any(feature = "cranelift", feature = "winch"))] + self.cranelift_nan_canonicalization(false); + self.relaxed_simd_deterministic(false); + self + } + + /// Evaluates to true if current configuration must respect + /// deterministic execution in its configuration. + /// + /// Required for faithful record/replay execution. + #[cfg(feature = "rr")] + #[inline] + pub fn is_determinism_enforced(&mut self) -> bool { + self.rr.is_some() + } + + /// Enable execution trace recording with the provided configuration. + /// + /// This method implicitly enforces determinism (see [`Config::enforce_determinism`] + /// for details). + /// + /// ## Errors + /// + /// Errors if record/replay are simultaneously enabled. + #[cfg(feature = "rr")] + pub fn enable_record(&mut self, record: RecordConfig) -> Result<&mut Self> { + self.enforce_determinism(); + if let Some(cfg) = &self.rr { + if let RRConfig::Replay(_) = cfg { + bail!("Cannot enable recording when replay is already enabled"); + } + } + self.rr = Some(RRConfig::from(record)); + Ok(self) + } + + /// Enable replay execution based on the provided configuration. + /// + /// This method implicitly enforces determinism (see [`Config::enforce_determinism`] + /// for details). + /// + /// ## Errors + /// + /// Errors if record/replay are simultaneously enabled. + #[cfg(feature = "rr")] + pub fn enable_replay(&mut self, replay: ReplayConfig) -> Result<&mut Self> { + self.enforce_determinism(); + if let Some(cfg) = &self.rr { + if let RRConfig::Record(_) = cfg { + bail!("Cannot enable replay when recording is already enabled"); + } + } + self.rr = Some(RRConfig::from(replay)); + Ok(self) + } + + /// Disable the currently active record/replay configuration, and remove + /// any determinism enforcement it introduced as side-effects. + /// + /// A common option is used for both record/replay here + /// since record and replay can never be set simultaneously/ + #[cfg(feature = "rr")] + pub fn disable_record_replay(&mut self) -> &mut Self { + self.remove_determinism_enforcement(); + self.rr = None; + self + } } impl Default for Config { diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index 5c49b99857..a5f9c024fa 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -1,4 +1,6 @@ use crate::Config; +#[cfg(feature = "rr")] +use crate::RRConfig; use crate::prelude::*; #[cfg(feature = "runtime")] pub use crate::runtime::code_memory::CustomCodeMemory; @@ -254,6 +256,14 @@ impl Engine { self.config().async_support } + /// Returns an immutable reference to the record/replay configuration settings + /// used by the engine + #[cfg(feature = "rr")] + #[inline] + pub fn rr(&self) -> Option<&RRConfig> { + self.config().rr.as_ref() + } + /// Detects whether the bytes provided are a precompiled object produced by /// Wasmtime. /// diff --git a/crates/wasmtime/src/engine/serialization.rs b/crates/wasmtime/src/engine/serialization.rs index feccd9f5dc..84accb2ee7 100644 --- a/crates/wasmtime/src/engine/serialization.rs +++ b/crates/wasmtime/src/engine/serialization.rs @@ -127,11 +127,7 @@ pub fn append_compiler_info(engine: &Engine, obj: &mut Object<'_>, metadata: &Me ); let mut data = Vec::new(); data.push(VERSION); - let version = match &engine.config().module_version { - ModuleVersionStrategy::WasmtimeVersion => env!("CARGO_PKG_VERSION"), - ModuleVersionStrategy::Custom(c) => c, - ModuleVersionStrategy::None => "", - }; + let version = engine.config().module_version.as_str(); // This precondition is checked in Config::module_version: assert!( version.len() < 256, diff --git a/crates/wasmtime/src/runtime.rs b/crates/wasmtime/src/runtime.rs index 43c6eee333..e715f91acd 100644 --- a/crates/wasmtime/src/runtime.rs +++ b/crates/wasmtime/src/runtime.rs @@ -48,6 +48,7 @@ pub(crate) mod linker; pub(crate) mod memory; pub(crate) mod module; pub(crate) mod resources; +pub(crate) mod rr; pub(crate) mod store; pub(crate) mod trampoline; pub(crate) mod trap; diff --git a/crates/wasmtime/src/runtime/component/component.rs b/crates/wasmtime/src/runtime/component/component.rs index 00f7acf72d..b418589684 100644 --- a/crates/wasmtime/src/runtime/component/component.rs +++ b/crates/wasmtime/src/runtime/component/component.rs @@ -90,6 +90,9 @@ struct ComponentInner { /// `realloc`, to avoid the need to look up types in the registry and take /// locks when calling `realloc` via `TypedFunc::call_raw`. realloc_func_type: Arc, + + /// The SHA-256 checksum of the source binary + checksum: [u8; 32], } pub(crate) struct AllCallFuncPointers { @@ -402,6 +405,7 @@ impl Component { info, mut types, mut static_modules, + checksum, } = match artifacts { Some(artifacts) => artifacts, None => postcard::from_bytes(code_memory.wasmtime_info())?, @@ -452,6 +456,7 @@ impl Component { code, info, realloc_func_type, + checksum, }), }) } @@ -828,6 +833,15 @@ impl Component { &self.inner.realloc_func_type } + #[allow( + unused, + reason = "used only for verification with wasmtime `rr` feature \ + and requires a lot of unnecessary gating across crates" + )] + pub(crate) fn checksum(&self) -> &[u8; 32] { + &self.inner.checksum + } + /// Returns the `Export::LiftedFunction` metadata associated with `export`. /// /// # Panics diff --git a/crates/wasmtime/src/runtime/component/concurrent/futures_and_streams.rs b/crates/wasmtime/src/runtime/component/concurrent/futures_and_streams.rs index 0433bbb2fa..6e80141b4b 100644 --- a/crates/wasmtime/src/runtime/component/concurrent/futures_and_streams.rs +++ b/crates/wasmtime/src/runtime/component/concurrent/futures_and_streams.rs @@ -204,9 +204,9 @@ fn accept_reader, U: 'static> bail!("read pointer not aligned"); } lower - .as_slice_mut() - .get_mut(address..) - .and_then(|b| b.get_mut(..T::SIZE32 * count)) + .as_slice() + .get(address..) + .and_then(|b| b.get(..T::SIZE32 * count)) .ok_or_else(|| anyhow::anyhow!("read pointer out of bounds of memory"))?; if let Some(ty) = payload(ty, &types) { @@ -2468,7 +2468,7 @@ impl Instance { let ty = types[types[read_ty].ty].payload.unwrap(); let ptr = func::validate_inbounds_dynamic( types.canonical_abi(&ty), - lower.as_slice_mut(), + lower.as_slice(), &ValRaw::u32(read_address.try_into().unwrap()), )?; val.store(lower, ty, ptr)?; @@ -2541,9 +2541,9 @@ impl Instance { } let size = usize::try_from(abi.size32).unwrap(); lower - .as_slice_mut() - .get_mut(read_address..) - .and_then(|b| b.get_mut(..size * count)) + .as_slice() + .get(read_address..) + .and_then(|b| b.get(..size * count)) .ok_or_else(|| anyhow::anyhow!("read pointer out of bounds of memory"))?; let mut ptr = read_address; for value in values { @@ -3096,7 +3096,7 @@ impl Instance { let debug_msg_address = usize::try_from(debug_msg_address)?; // Lower the string into the component's memory let offset = lower_cx - .as_slice_mut() + .as_slice() .get(debug_msg_address..) .and_then(|b| b.get(..debug_msg.bytes().len())) .map(|_| debug_msg_address) diff --git a/crates/wasmtime/src/runtime/component/func/host.rs b/crates/wasmtime/src/runtime/component/func/host.rs index d858a71c13..d2d2cb7266 100644 --- a/crates/wasmtime/src/runtime/component/func/host.rs +++ b/crates/wasmtime/src/runtime/component/func/host.rs @@ -2,13 +2,14 @@ use crate::component::concurrent::{Accessor, Status}; use crate::component::func::{LiftContext, LowerContext, Options}; use crate::component::matching::InstanceType; -use crate::component::storage::slice_to_storage_mut; +use crate::component::storage::{slice_to_storage_mut, storage_as_slice_mut}; use crate::component::{ComponentNamedList, ComponentType, Instance, Lift, Lower, Val}; use crate::prelude::*; use crate::runtime::vm::component::{ ComponentInstance, VMComponentContext, VMLowering, VMLoweringCallee, }; use crate::runtime::vm::{SendSyncPtr, VMOpaqueContext, VMStore}; +use crate::store::StoreOpaque; use crate::{AsContextMut, CallHook, StoreContextMut, ValRaw}; use alloc::sync::Arc; use core::any::Any; @@ -16,11 +17,100 @@ use core::future::Future; use core::mem::{self, MaybeUninit}; use core::pin::Pin; use core::ptr::NonNull; +use wasmtime_environ::component::TypeFunc; use wasmtime_environ::component::{ CanonicalAbiInfo, ComponentTypes, InterfaceType, MAX_FLAT_ASYNC_PARAMS, MAX_FLAT_PARAMS, MAX_FLAT_RESULTS, OptionsIndex, TypeFuncIndex, TypeTuple, }; +/// Convenience methods to inject record + replay logic +mod rr_hooks { + use super::*; + #[cfg(feature = "rr-component")] + use crate::rr::component_events::{ + HostFuncReturnEvent, LowerReturnEvent, LowerStoreReturnEvent, + }; + /// Record/replay hook operation for host function entry events + #[inline] + pub fn record_replay_host_func_entry( + args: &mut [MaybeUninit], + func_type: &TypeFunc, + store: &mut StoreOpaque, + ) -> Result<()> { + #[cfg(all(feature = "rr-component", feature = "rr-validate"))] + { + use crate::rr::component_events::HostFuncEntryEvent; + store.record_event_validation(|| HostFuncEntryEvent::new(args, func_type.clone()))?; + store.next_replay_event_validation::(func_type)?; + } + let _ = (args, func_type, store); + Ok(()) + } + + /// Record hook operation for host function return events + #[inline] + pub fn record_host_func_return( + args: &[MaybeUninit], + store: &mut StoreOpaque, + ) -> Result<()> { + #[cfg(feature = "rr-component")] + store.record_event(|| HostFuncReturnEvent::new(args))?; + let _ = (args, store); + Ok(()) + } + + /// Record hook wrapping a lowering `store` call of component types + #[inline] + pub fn record_lower_store( + lower_store: F, + cx: &mut LowerContext<'_, T>, + ty: InterfaceType, + offset: usize, + ) -> Result<()> + where + F: FnOnce(&mut LowerContext<'_, T>, InterfaceType, usize) -> Result<()>, + { + #[cfg(all(feature = "rr-component", feature = "rr-validate"))] + { + use crate::rr::component_events::LowerStoreEntryEvent; + cx.store + .0 + .record_event_validation(|| LowerStoreEntryEvent::new(ty, offset))?; + } + let store_result = lower_store(cx, ty, offset); + #[cfg(feature = "rr-component")] + cx.store + .0 + .record_event(|| LowerStoreReturnEvent::new(&store_result))?; + store_result + } + + /// Record hook wrapping a lowering `lower` call of component types + #[inline] + pub fn record_lower( + lower: F, + cx: &mut LowerContext<'_, T>, + ty: InterfaceType, + ) -> Result<()> + where + F: FnOnce(&mut LowerContext<'_, T>, InterfaceType) -> Result<()>, + { + #[cfg(all(feature = "rr-component", feature = "rr-validate"))] + { + use crate::rr::component_events::LowerEntryEvent; + cx.store + .0 + .record_event_validation(|| LowerEntryEvent::new(ty))?; + } + let lower_result = lower(cx, ty); + #[cfg(feature = "rr-component")] + cx.store + .0 + .record_event(|| LowerReturnEvent::new(&lower_result))?; + lower_result + } +} + pub struct HostFunc { entrypoint: VMLoweringCallee, typecheck: Box) -> Result<()>) + Send + Sync>, @@ -257,109 +347,147 @@ where let param_tys = InterfaceType::Tuple(ty.params); let result_tys = InterfaceType::Tuple(ty.results); - if async_ { + rr_hooks::record_replay_host_func_entry(storage, &ty, store.0)?; + + let storage_type = if async_ { #[cfg(feature = "component-model-async")] { - let mut storage = unsafe { Storage::<'_, Params, u32>::new_async::(storage) }; + StorageType::Async(unsafe { Storage::<'_, Params, u32>::new_async::(storage) }) + } + #[cfg(not(feature = "component-model-async"))] + unreachable!( + "async-lowered imports should have failed validation \ + when `component-model-async` feature disabled" + ); + } else { + StorageType::Sync(unsafe { Storage::<'_, Params, Return>::new_sync(storage) }) + }; - // Lift the parameters, either from flat storage or from linear - // memory. - let lift = &mut LiftContext::new(store.0.store_opaque_mut(), &options, instance); - lift.enter_call(); - let params = storage.lift_params(lift, param_tys)?; + if !store.0.replay_enabled() { + match storage_type { + #[cfg(feature = "component-model-async")] + StorageType::Async(mut storage) => { + // Lift the parameters, either from flat storage or from linear + // memory. + let lift = &mut LiftContext::new(store.0.store_opaque_mut(), &options, instance); + lift.enter_call(); + let params = storage.lift_params(lift, param_tys)?; + + // Load the return pointer, if present. + let retptr = match storage.async_retptr() { + Some(ptr) => { + let mut lower = + LowerContext::new(store.as_context_mut(), &options, &types, instance); + validate_inbounds::(lower.as_slice(), ptr)? + } + // If there's no return pointer then `Return` should have an + // empty flat representation. In this situation pretend the + // return pointer was 0 so we have something to shepherd along + // into the closure below. + None => { + assert_eq!(Return::flatten_count(), 0); + 0 + } + }; + + let host_result = closure(store.as_context_mut(), instance, params); + + let mut lower_result = { + let types = types.clone(); + move |store: StoreContextMut, instance: Instance, ret: Return| { + unsafe { + flags.set_may_leave(false); + } + let mut lower = LowerContext::new(store, &options, &types, instance); + ret.linear_lower_to_memory(&mut lower, result_tys, retptr)?; + unsafe { + flags.set_may_leave(true); + } + lower.exit_call()?; + Ok(()) + } + }; + let task = match host_result { + HostResult::Done(result) => { + lower_result(store.as_context_mut(), instance, result?)?; + None + } + #[cfg(feature = "component-model-async")] + HostResult::Future(future) => instance.first_poll( + store.as_context_mut(), + future, + caller_instance, + lower_result, + )?, + }; + + let status = if let Some(task) = task { + Status::Started.pack(Some(task)) + } else { + Status::Returned.pack(None) + }; - // Load the return pointer, if present. - let retptr = match storage.async_retptr() { - Some(ptr) => { - let mut lower = - LowerContext::new(store.as_context_mut(), &options, &types, instance); - validate_inbounds::(lower.as_slice_mut(), ptr)? + let mut lower = LowerContext::new(store, &options, &types, instance); + storage.lower_results(&mut lower, InterfaceType::U32, status)?; + } + StorageType::Sync(mut storage) => { + let mut lift = LiftContext::new(store.0.store_opaque_mut(), &options, instance); + lift.enter_call(); + let params = storage.lift_params(&mut lift, param_tys)?; + + let ret = match closure(store.as_context_mut(), instance, params) { + HostResult::Done(result) => result?, + #[cfg(feature = "component-model-async")] + HostResult::Future(future) => { + instance.poll_and_block(store.0.traitobj_mut(), future, caller_instance)? + } + }; + + unsafe { + flags.set_may_leave(false); } - // If there's no return pointer then `Return` should have an - // empty flat representation. In this situation pretend the - // return pointer was 0 so we have something to shepherd along - // into the closure below. - None => { - assert_eq!(Return::flatten_count(), 0); - 0 + let mut lower = LowerContext::new(store, &options, &types, instance); + storage.lower_results(&mut lower, result_tys, ret)?; + unsafe { + flags.set_may_leave(true); } - }; - - let host_result = closure(store.as_context_mut(), instance, params); - - let mut lower_result = { - let types = types.clone(); - move |store: StoreContextMut, instance: Instance, ret: Return| { + lower.exit_call()?; + } + } + } else { + #[cfg(feature = "rr-component")] + { + match storage_type { + #[cfg(feature = "component-model-async")] + StorageType::Async(_) => unreachable!("`rr` should not be configurable with async"), + StorageType::Sync(mut storage) => { unsafe { flags.set_may_leave(false); } let mut lower = LowerContext::new(store, &options, &types, instance); - ret.linear_lower_to_memory(&mut lower, result_tys, retptr)?; + storage.replay_lower_results(&mut lower)?; unsafe { flags.set_may_leave(true); } - lower.exit_call()?; - Ok(()) } - }; - let task = match host_result { - HostResult::Done(result) => { - lower_result(store.as_context_mut(), instance, result?)?; - None - } - #[cfg(feature = "component-model-async")] - HostResult::Future(future) => instance.first_poll( - store.as_context_mut(), - future, - caller_instance, - lower_result, - )?, - }; - - let status = if let Some(task) = task { - Status::Started.pack(Some(task)) - } else { - Status::Returned.pack(None) - }; - - let mut lower = LowerContext::new(store, &options, &types, instance); - storage.lower_results(&mut lower, InterfaceType::U32, status)?; - } - #[cfg(not(feature = "component-model-async"))] - { - let _ = caller_instance; - unreachable!( - "async-lowered imports should have failed validation \ - when `component-model-async` feature disabled" - ); - } - } else { - let mut storage = unsafe { Storage::<'_, Params, Return>::new_sync(storage) }; - let mut lift = LiftContext::new(store.0.store_opaque_mut(), &options, instance); - lift.enter_call(); - let params = storage.lift_params(&mut lift, param_tys)?; - - let ret = match closure(store.as_context_mut(), instance, params) { - HostResult::Done(result) => result?, - #[cfg(feature = "component-model-async")] - HostResult::Future(future) => { - instance.poll_and_block(store.0.traitobj_mut(), future, caller_instance)? } - }; - - unsafe { - flags.set_may_leave(false); } - let mut lower = LowerContext::new(store, &options, &types, instance); - storage.lower_results(&mut lower, result_tys, ret)?; - unsafe { - flags.set_may_leave(true); - } - lower.exit_call()?; } return Ok(()); + /// Sum storage type across async/sync storage formats + enum StorageType< + 'a, + P: ComponentType, + ReturnSync: ComponentType, + #[cfg(feature = "component-model-async")] ReturnAsync: ComponentType, + > { + #[cfg(feature = "component-model-async")] + Async(Storage<'a, P, ReturnAsync>), + Sync(Storage<'a, P, ReturnSync>), + } + /// Type-level representation of the matrix of possibilities of how /// WebAssembly parameters and results are handled in the canonical ABI. /// @@ -578,10 +706,45 @@ where ret: R, ) -> Result<()> { match self.lower_dst() { - Dst::Direct(storage) => ret.linear_lower_to_flat(cx, ty, storage), + Dst::Direct(storage) => { + let result = rr_hooks::record_lower( + |cx, ty| ret.linear_lower_to_flat(cx, ty, storage), + cx, + ty, + ); + rr_hooks::record_host_func_return( + unsafe { storage_as_slice_mut(storage) }, + cx.store.0, + )?; + result + } Dst::Indirect(ptr) => { - let ptr = validate_inbounds::(cx.as_slice_mut(), ptr)?; - ret.linear_lower_to_memory(cx, ty, ptr) + let ptr = validate_inbounds::(cx.as_slice(), ptr)?; + let result = rr_hooks::record_lower_store( + |cx, ty, ptr| ret.linear_lower_to_memory(cx, ty, ptr), + cx, + ty, + ptr, + ); + // Recording here is just for marking the return event + rr_hooks::record_host_func_return(&[], cx.store.0)?; + result + } + } + } + + #[cfg(feature = "rr-component")] + fn replay_lower_results(&mut self, cx: &mut LowerContext<'_, T>) -> Result<()> { + use crate::component::storage::storage_as_slice_mut; + match self.lower_dst() { + Dst::Direct(storage) => { + // This path also stores the final return values in resulting storage + cx.replay_lowering(Some(unsafe { storage_as_slice_mut(storage) })) + } + Dst::Indirect(_ptr) => { + // While replay will not have to change '_ptr' for indirect results, + // it will have to overwrite any nested stored lowerings (deep copy) + cx.replay_lowering(None) } } } @@ -734,129 +897,156 @@ where let param_tys = &types[func_ty.params]; let result_tys = &types[func_ty.results]; - let mut params_and_results = Vec::new(); - let mut lift = &mut LiftContext::new(store.0.store_opaque_mut(), &options, instance); - lift.enter_call(); - let max_flat = if async_ { - MAX_FLAT_ASYNC_PARAMS - } else { - MAX_FLAT_PARAMS - }; + rr_hooks::record_replay_host_func_entry(storage, &types[ty], store.0)?; - let ret_index = unsafe { - dynamic_params_load( - &mut lift, - &types, - storage, - param_tys, - &mut params_and_results, - max_flat, - )? - }; - let result_start = params_and_results.len(); - for _ in 0..result_tys.types.len() { - params_and_results.push(Val::Bool(false)); - } + if !store.0.replay_enabled() { + let mut params_and_results = Vec::new(); + let mut lift = &mut LiftContext::new(store.0.store_opaque_mut(), &options, instance); + lift.enter_call(); + let max_flat = if async_ { + MAX_FLAT_ASYNC_PARAMS + } else { + MAX_FLAT_PARAMS + }; - if async_ { - #[cfg(feature = "component-model-async")] - { - let retptr = if result_tys.types.len() == 0 { - 0 - } else { - let retptr = unsafe { storage[ret_index].assume_init() }; - let mut lower = - LowerContext::new(store.as_context_mut(), &options, &types, instance); - validate_inbounds_dynamic(&result_tys.abi, lower.as_slice_mut(), &retptr)? - }; + let ret_index = unsafe { + dynamic_params_load( + &mut lift, + &types, + storage, + param_tys, + &mut params_and_results, + max_flat, + )? + }; + let result_start = params_and_results.len(); + for _ in 0..result_tys.types.len() { + params_and_results.push(Val::Bool(false)); + } + + if async_ { + #[cfg(feature = "component-model-async")] + { + let retptr = if result_tys.types.len() == 0 { + 0 + } else { + let retptr = unsafe { storage[ret_index].assume_init() }; + let mut lower = + LowerContext::new(store.as_context_mut(), &options, &types, instance); + validate_inbounds_dynamic(&result_tys.abi, lower.as_slice(), &retptr)? + }; + + let future = closure( + store.as_context_mut(), + instance, + params_and_results, + result_start, + ); + + let task = instance.first_poll(store, future, caller_instance, { + let types = types.clone(); + let result_tys = func_ty.results; + move |store: StoreContextMut, instance: Instance, result_vals: Vec| { + let result_tys = &types[result_tys]; + let result_vals = &result_vals[result_start..]; + assert_eq!(result_vals.len(), result_tys.types.len()); + + unsafe { + flags.set_may_leave(false); + } + + let mut lower = LowerContext::new(store, &options, &types, instance); + let mut ptr = retptr; + for (val, ty) in result_vals.iter().zip(result_tys.types.iter()) { + let offset = types.canonical_abi(ty).next_field32_size(&mut ptr); + val.store(&mut lower, *ty, offset)?; + } + + unsafe { + flags.set_may_leave(true); + } + + lower.exit_call()?; + + Ok(()) + } + })?; + + let status = if let Some(task) = task { + Status::Started.pack(Some(task)) + } else { + Status::Returned.pack(None) + }; + storage[0] = MaybeUninit::new(ValRaw::i32(status as i32)); + } + #[cfg(not(feature = "component-model-async"))] + { + unreachable!( + "async-lowered imports should have failed validation \ + when `component-model-async` feature disabled" + ); + } + } else { let future = closure( store.as_context_mut(), instance, params_and_results, result_start, ); + let result_vals = + instance.poll_and_block(store.0.traitobj_mut(), future, caller_instance)?; + let result_vals = &result_vals[result_start..]; - let task = instance.first_poll(store, future, caller_instance, { - let types = types.clone(); - let result_tys = func_ty.results; - move |store: StoreContextMut, instance: Instance, result_vals: Vec| { - let result_tys = &types[result_tys]; - let result_vals = &result_vals[result_start..]; - assert_eq!(result_vals.len(), result_tys.types.len()); - - unsafe { - flags.set_may_leave(false); - } - - let mut lower = LowerContext::new(store, &options, &types, instance); - let mut ptr = retptr; - for (val, ty) in result_vals.iter().zip(result_tys.types.iter()) { - let offset = types.canonical_abi(ty).next_field32_size(&mut ptr); - val.store(&mut lower, *ty, offset)?; - } - - unsafe { - flags.set_may_leave(true); - } - - lower.exit_call()?; + unsafe { + flags.set_may_leave(false); + } - Ok(()) + let mut cx = LowerContext::new(store, &options, &types, instance); + if let Some(cnt) = result_tys.abi.flat_count(MAX_FLAT_RESULTS) { + let mut dst = storage[..cnt].iter_mut(); + for (val, ty) in result_vals.iter().zip(result_tys.types.iter()) { + rr_hooks::record_lower(|cx, ty| val.lower(cx, ty, &mut dst), &mut cx, *ty)?; } - })?; - - let status = if let Some(task) = task { - Status::Started.pack(Some(task)) + assert!(dst.next().is_none()); + rr_hooks::record_host_func_return(storage, cx.store.0)?; } else { - Status::Returned.pack(None) - }; + let ret_ptr = unsafe { storage[ret_index].assume_init_ref() }; + let mut ptr = validate_inbounds_dynamic(&result_tys.abi, cx.as_slice(), ret_ptr)?; + for (val, ty) in result_vals.iter().zip(result_tys.types.iter()) { + let offset = types.canonical_abi(ty).next_field32_size(&mut ptr); + val.store(&mut cx, *ty, offset)?; + } + } - storage[0] = MaybeUninit::new(ValRaw::i32(status as i32)); - } - #[cfg(not(feature = "component-model-async"))] - { - unreachable!( - "async-lowered imports should have failed validation \ - when `component-model-async` feature disabled" - ); - } - } else { - let future = closure( - store.as_context_mut(), - instance, - params_and_results, - result_start, - ); - let result_vals = - instance.poll_and_block(store.0.traitobj_mut(), future, caller_instance)?; - let result_vals = &result_vals[result_start..]; + unsafe { + flags.set_may_leave(true); + } - unsafe { - flags.set_may_leave(false); + cx.exit_call()?; } - - let mut cx = LowerContext::new(store, &options, &types, instance); - if let Some(cnt) = result_tys.abi.flat_count(MAX_FLAT_RESULTS) { - let mut dst = storage[..cnt].iter_mut(); - for (val, ty) in result_vals.iter().zip(result_tys.types.iter()) { - val.lower(&mut cx, *ty, &mut dst)?; - } - assert!(dst.next().is_none()); + } else { + #[cfg(feature = "rr-component")] + if async_ { + unreachable!("`rr` should not be configurable with async"); } else { - let ret_ptr = unsafe { storage[ret_index].assume_init_ref() }; - let mut ptr = validate_inbounds_dynamic(&result_tys.abi, cx.as_slice_mut(), ret_ptr)?; - for (val, ty) in result_vals.iter().zip(result_tys.types.iter()) { - let offset = types.canonical_abi(ty).next_field32_size(&mut ptr); - val.store(&mut cx, *ty, offset)?; + unsafe { + flags.set_may_leave(false); + } + let mut cx = LowerContext::new(store, &options, &types, instance); + if let Some(_cnt) = result_tys.abi.flat_count(MAX_FLAT_RESULTS) { + // Copy the entire contiguous storage slice (instead of looping values one-by-one) + // This path also stores the final return values in resulting storage + cx.replay_lowering(Some(storage))?; + } else { + // The indirect `ret_ptr` will not change during replay, but it will + // have to overwrite any nested stored lowerings (deep copy) + cx.replay_lowering(None)?; + } + unsafe { + flags.set_may_leave(true); } } - - unsafe { - flags.set_may_leave(true); - } - - cx.exit_call()?; } Ok(()) diff --git a/crates/wasmtime/src/runtime/component/func/options.rs b/crates/wasmtime/src/runtime/component/func/options.rs index eaa6b27cd7..8ccdc4a715 100644 --- a/crates/wasmtime/src/runtime/component/func/options.rs +++ b/crates/wasmtime/src/runtime/component/func/options.rs @@ -1,7 +1,16 @@ +#[cfg(feature = "rr-component")] +use crate::ValRaw; use crate::component::matching::InstanceType; use crate::component::resources::{HostResourceData, HostResourceIndex, HostResourceTables}; use crate::component::{Instance, ResourceType}; use crate::prelude::*; +#[cfg(feature = "rr-component")] +use crate::rr::{ + RREvent, RecordBuffer, Recorder, ReplayError, Replayer, + component_events::MemorySliceWriteEvent, component_events::ReallocEntryEvent, +}; +#[cfg(all(feature = "rr-component", feature = "rr-validate"))] +use crate::rr::{Validate, component_events::ReallocReturnEvent}; use crate::runtime::vm::component::{ CallContexts, ComponentInstance, InstanceFlags, ResourceTable, ResourceTables, }; @@ -9,6 +18,9 @@ use crate::runtime::vm::{VMFuncRef, VMMemoryDefinition}; use crate::store::{StoreId, StoreOpaque}; use crate::{FuncType, StoreContextMut}; use alloc::sync::Arc; +#[cfg(feature = "rr-component")] +use core::mem::MaybeUninit; +use core::ops::{Deref, DerefMut}; use core::pin::Pin; use core::ptr::NonNull; use wasmtime_environ::component::{ @@ -16,6 +28,99 @@ use wasmtime_environ::component::{ TypeResourceTableIndex, }; +/// Same as [`ConstMemorySliceCell`] except allows for dynamically sized slices. +/// +/// Prefer the above for efficiency if slice size is known statically. +/// +/// **Note**: The correct operation of this type relies of several invariants. +/// See [`ConstMemorySliceCell`] for detailed description on the role +/// of these types. +pub struct MemorySliceCell<'a> { + bytes: &'a mut [u8], + #[cfg(feature = "rr-component")] + offset: usize, + #[cfg(feature = "rr-component")] + recorder: Option<&'a mut RecordBuffer>, +} +impl<'a> Deref for MemorySliceCell<'a> { + type Target = [u8]; + fn deref(&self) -> &Self::Target { + self.bytes + } +} +impl DerefMut for MemorySliceCell<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.bytes + } +} +impl Drop for MemorySliceCell<'_> { + /// Drop serves as a recording hook for stores to the memory slice + fn drop(&mut self) { + #[cfg(feature = "rr-component")] + if let Some(buf) = &mut self.recorder { + buf.record_event(|| MemorySliceWriteEvent::new(self.offset, self.bytes.to_vec())) + .unwrap(); + } + } +} + +/// Zero-cost encapsulation type for a statically sized slice of mutable memory +/// +/// # Purpose and Usage (Read Carefully!) +/// +/// This type (and its dynamic counterpart [`MemorySliceCell`]) are critical to +/// record/replay (RR) support in Wasmtime. In practice, all lowering operations utilize +/// a [`LowerContext`], which provides a capability to modify guest Wasm module state in +/// the following ways: +/// +/// 1. Write to slices of memory with [`get`](LowerContext::get)/[`get_dyn`](LowerContext::get_dyn) +/// 2. Movement of memory with [`realloc`](LowerContext::realloc) +/// +/// The above are intended to be the narrow waists for recording changes to guest state, and +/// should be the **only** interfaces used during lowerng. In particular, +/// [`get`](LowerContext::get)/[`get_dyn`](LowerContext::get_dyn) return +/// ([`ConstMemorySliceCell`]/[`MemorySliceCell`]), which implement [`Drop`] +/// allowing us a hook to just capture the final aggregate changes made to guest memory by the host. +/// +/// ## Critical Invariants +/// +/// Typically recording would need to know both when the slice was borrowed AND when it was +/// dropped, since memory movement with [`realloc`](LowerContext::realloc) can be interleaved between +/// borrows and drops, and replays would have to be aware of this. **However**, with this abstraction, +/// we can be more efficient and get away with **only** recording drops, because of the implicit interaction between +/// [`realloc`](LowerContext::realloc) and [`get`](LowerContext::get)/[`get_dyn`](LowerContext::get_dyn), +/// which both take a `&mut self`. Since the latter implements [`Drop`], which also takes a `&mut self`, +/// the compiler will automatically enforce that drops of this type need to be triggered before a +/// [`realloc`](LowerContext::realloc), preventing any interleavings in between the borrow and drop of the slice. +pub struct ConstMemorySliceCell<'a, const N: usize> { + bytes: &'a mut [u8; N], + #[cfg(feature = "rr-component")] + offset: usize, + #[cfg(feature = "rr-component")] + recorder: Option<&'a mut RecordBuffer>, +} +impl<'a, const N: usize> Deref for ConstMemorySliceCell<'a, N> { + type Target = [u8; N]; + fn deref(&self) -> &Self::Target { + self.bytes + } +} +impl<'a, const N: usize> DerefMut for ConstMemorySliceCell<'a, N> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.bytes + } +} +impl<'a, const N: usize> Drop for ConstMemorySliceCell<'a, N> { + /// Drops serves as a recording hook for stores to the memory slice + fn drop(&mut self) { + #[cfg(feature = "rr-component")] + if let Some(buf) = &mut self.recorder { + buf.record_event(|| MemorySliceWriteEvent::new(self.offset, self.bytes.to_vec())) + .unwrap(); + } + } +} + /// Runtime representation of canonical ABI options in the component model. /// /// This structure packages up the runtime representation of each option from @@ -163,7 +268,7 @@ impl Options { } } - /// Same as above, just `_mut` + /// Same as [`memory`](Self::memory), just `_mut` pub fn memory_mut<'a>(&self, store: &'a mut StoreOpaque) -> &'a mut [u8] { self.store_id.assert_belongs_to(store.id()); @@ -174,6 +279,22 @@ impl Options { } } + /// Same as [`memory_mut`](Self::memory_mut), but with the record buffer from the encapsulating store + #[cfg(feature = "rr-component")] + fn memory_mut_with_recorder<'a>( + &self, + store: &'a mut StoreOpaque, + ) -> (&'a mut [u8], Option<&'a mut RecordBuffer>) { + self.store_id.assert_belongs_to(store.id()); + + // See comments in `memory` about the unsafety + let memslice = unsafe { + let memory = self.memory.unwrap().as_ref(); + core::slice::from_raw_parts_mut(memory.base.as_ptr(), memory.current_length()) + }; + (memslice, store.record_buffer_mut()) + } + /// Returns the underlying encoding used for strings in this /// lifting/lowering. pub fn string_encoding(&self) -> StringEncoding { @@ -293,24 +414,44 @@ impl<'a, T: 'static> LowerContext<'a, T> { self.instance.id().get_mut(self.store.0) } - /// Returns a view into memory as a mutable slice of bytes. + /// Returns a view into memory as a mutable slice of bytes + the + /// record buffer to record state. + /// + /// # Panics + /// + /// See [`as_slice`](Self::as_slice) + #[cfg(feature = "rr-component")] + fn as_slice_mut_with_recorder(&mut self) -> (&mut [u8], Option<&mut RecordBuffer>) { + self.options.memory_mut_with_recorder(self.store.0) + } + + /// Returns a view into memory as a mutable slice of bytes + /// + /// # Panics + /// + /// See [`as_slice`](Self::as_slice) + #[inline] + fn as_slice_mut(&mut self) -> &mut [u8] { + self.options.memory_mut(self.store.0) + } + + /// Returns a view into memory as an immutable slice of bytes. /// /// # Panics /// /// This will panic if memory has not been configured for this lowering /// (e.g. it wasn't present during the specification of canonical options). - pub fn as_slice_mut(&mut self) -> &mut [u8] { - self.options.memory_mut(self.store.0) + pub fn as_slice(&mut self) -> &[u8] { + self.options.memory(self.store.0) } - /// Invokes the memory allocation function (which is style after `realloc`) - /// with the specified parameters. + /// Inner invocation of realloc, without record/replay scaffolding /// /// # Panics /// /// This will panic if realloc hasn't been configured for this lowering via /// its canonical options. - pub fn realloc( + fn realloc_inner( &mut self, old: usize, old_size: usize, @@ -332,6 +473,32 @@ impl<'a, T: 'static> LowerContext<'a, T> { .map(|(_, ptr)| ptr) } + /// Invokes the memory allocation function (which is style after `realloc`) + /// with the specified parameters. + /// + /// # Panics + /// + /// This will panic if realloc hasn't been configured for this lowering via + /// its canonical options. + pub fn realloc( + &mut self, + old: usize, + old_size: usize, + old_align: u32, + new_size: usize, + ) -> Result { + #[cfg(feature = "rr-component")] + self.store + .0 + .record_event(|| ReallocEntryEvent::new(old, old_size, old_align, new_size))?; + let result = self.realloc_inner(old, old_size, old_align, new_size); + #[cfg(all(feature = "rr-component", feature = "rr-validate"))] + self.store + .0 + .record_event_validation(|| ReallocReturnEvent::new(&result))?; + result + } + /// Returns a fixed mutable slice of memory `N` bytes large starting at /// offset `N`, panicking on out-of-bounds. /// @@ -342,7 +509,15 @@ impl<'a, T: 'static> LowerContext<'a, T> { /// /// This will panic if memory has not been configured for this lowering /// (e.g. it wasn't present during the specification of canonical options). - pub fn get(&mut self, offset: usize) -> &mut [u8; N] { + #[inline] + pub fn get(&mut self, offset: usize) -> ConstMemorySliceCell<'_, N> { + cfg_if::cfg_if! { + if #[cfg(feature = "rr-component")] { + let (slice_mut, recorder) = self.as_slice_mut_with_recorder(); + } else { + let slice_mut = self.as_slice_mut(); + } + } // FIXME: this bounds check shouldn't actually be necessary, all // callers of `ComponentType::store` have already performed a bounds // check so we're guaranteed that `offset..offset+N` is in-bounds. That @@ -353,7 +528,37 @@ impl<'a, T: 'static> LowerContext<'a, T> { // For now I figure we can leave in this bounds check and if it becomes // an issue we can optimize further later, probably with judicious use // of `unsafe`. - self.as_slice_mut()[offset..].first_chunk_mut().unwrap() + ConstMemorySliceCell { + bytes: slice_mut[offset..].first_chunk_mut().unwrap(), + #[cfg(feature = "rr-component")] + offset: offset, + #[cfg(feature = "rr-component")] + recorder: recorder, + } + } + + /// The dynamically-sized version of [`get`](Self::get). If size of slice required is + /// statically known, prefer the const version for optimal efficiency + /// + /// # Panics + /// + /// Refer to [`get`](Self::get). + #[inline] + pub fn get_dyn(&mut self, offset: usize, size: usize) -> MemorySliceCell<'_> { + cfg_if::cfg_if! { + if #[cfg(feature = "rr-component")] { + let (slice_mut, recorder) = self.as_slice_mut_with_recorder(); + } else { + let slice_mut = self.as_slice_mut(); + } + } + MemorySliceCell { + bytes: &mut slice_mut[offset..][..size], + #[cfg(feature = "rr-component")] + offset: offset, + #[cfg(feature = "rr-component")] + recorder: recorder, + } } /// Lowers an `own` resource into the guest, converting the `rep` specified @@ -442,6 +647,116 @@ impl<'a, T: 'static> LowerContext<'a, T> { ) } + /// Perform a replay of all the type lowering-associated events for this context + /// + /// These typically include all `Lower*` and `Realloc*` event, along with relevant + /// `HostFunctionReturnEvent`. + /// + /// ## Important Notes + /// + /// * It is assumed that this is only invoked at the root lower/store calls + /// + #[cfg(feature = "rr-component")] + pub fn replay_lowering( + &mut self, + mut result_storage: Option<&mut [MaybeUninit]>, + ) -> Result<()> { + // There is a lot of `rr-validate` feature gating here for optimal replay performance + // and memory overhead in a non-validating scenario. If this proves to not produce a huge + // overhead in practice, gating can be removed in the future in favor of readability + if self.store.0.replay_buffer_mut().is_none() { + return Ok(()); + } + let mut complete = false; + let mut lowering_error: Option = None; + // No nested expected; these depths should only be 1 + let mut _realloc_stack = Vec::>::new(); + // Lowering tracks is only for ordering entry/exit events + let mut _lower_stack = Vec::<()>::new(); + let mut _lower_store_stack = Vec::<()>::new(); + while !complete { + let buf = self.store.0.replay_buffer_mut().unwrap(); + let event = buf.next_event()?; + #[cfg(feature = "rr-validate")] + let run_validate = buf.settings().validate && buf.trace_settings().add_validation; + match event { + RREvent::ComponentHostFuncReturn(e) => { + // End of the lowering process + if let Some(e) = lowering_error { + return Err(e.into()); + } + if let Some(storage) = result_storage.as_deref_mut() { + e.move_into_slice(storage); + } + complete = true; + } + RREvent::ComponentReallocEntry(e) => { + let _result = + self.realloc_inner(e.old_addr, e.old_size, e.old_align, e.new_size); + #[cfg(feature = "rr-validate")] + if run_validate { + _realloc_stack.push(_result); + } + } + // No return value to validate for lower/lower-store; store error and just check that entry happened before + RREvent::ComponentLowerReturn(e) => { + #[cfg(feature = "rr-validate")] + if run_validate { + _lower_stack.pop().ok_or(ReplayError::InvalidOrdering)?; + } + lowering_error = e.ret().map_err(Into::into).err(); + } + RREvent::ComponentLowerStoreReturn(e) => { + #[cfg(feature = "rr-validate")] + if run_validate { + _lower_store_stack + .pop() + .ok_or(ReplayError::InvalidOrdering)?; + } + lowering_error = e.ret().map_err(Into::into).err(); + } + RREvent::ComponentMemorySliceWrite(e) => { + // The bounds check is performed here is required here (in the absence of + // trace validation) to protect against malicious out-of-bounds slice writes + self.as_slice_mut()[e.offset..e.offset + e.bytes.len()] + .copy_from_slice(e.bytes.as_slice()); + } + // Optional events + // + // Realloc or any lowering methods cannot call back to the host. Hence, you cannot + // have host calls entries during this method + RREvent::ComponentHostFuncEntry(_) => { + bail!("Cannot call back into host during lowering") + } + // Unwrapping should never occur on valid executions since *Entry should be before *Return in trace + RREvent::ComponentReallocReturn(_e) => + { + #[cfg(feature = "rr-validate")] + if run_validate { + lowering_error = _e.validate(&_realloc_stack.pop().unwrap()).err() + } + } + RREvent::ComponentLowerEntry(_) => { + // All we want here is ensuring Entry occurs before Return + #[cfg(feature = "rr-validate")] + if run_validate { + _lower_stack.push(()) + } + } + RREvent::ComponentLowerStoreEntry(_) => { + // All we want here is ensuring Entry occurs before Return + #[cfg(feature = "rr-validate")] + if run_validate { + _lower_store_stack.push(()) + } + } + + _ => bail!("Invalid event \'{:?}\' encountered during lowering", event), + }; + } + Ok(()) + } + /// See [`HostResourceTables::enter_call`]. #[inline] pub fn enter_call(&mut self) { diff --git a/crates/wasmtime/src/runtime/component/func/typed.rs b/crates/wasmtime/src/runtime/component/func/typed.rs index 3e669ab278..ac636cce58 100644 --- a/crates/wasmtime/src/runtime/component/func/typed.rs +++ b/crates/wasmtime/src/runtime/component/func/typed.rs @@ -1085,7 +1085,7 @@ macro_rules! integers { // `align_to_mut` which is not safe in general but is safe in // our specific case as all `u8` patterns are valid `Self` // patterns since `Self` is an integral type. - let dst = &mut cx.as_slice_mut()[offset..][..items.len() * Self::SIZE32]; + let mut dst = cx.get_dyn(offset, items.len() * Self::SIZE32); let (before, middle, end) = unsafe { dst.align_to_mut::() }; assert!(before.is_empty() && end.is_empty()); assert_eq!(middle.len(), items.len()); @@ -1185,7 +1185,7 @@ macro_rules! floats { ) -> Result<()> { debug_assert!(matches!(ty, InterfaceType::$ty)); debug_assert!(offset % Self::SIZE32 == 0); - let ptr = cx.get(offset); + let mut ptr = cx.get(offset); *ptr = self.to_bits().to_le_bytes(); Ok(()) } @@ -1207,7 +1207,7 @@ macro_rules! floats { // This should all have already been verified in terms of // alignment and sizing meaning that these assertions here are // not truly necessary but are instead double-checks. - let dst = &mut cx.as_slice_mut()[offset..][..items.len() * Self::SIZE32]; + let mut dst = cx.get_dyn(offset, items.len() * Self::SIZE32); assert!(dst.as_ptr().cast::().is_aligned()); // And with all that out of the way perform the copying loop. @@ -1477,7 +1477,8 @@ fn lower_string(cx: &mut LowerContext<'_, T>, string: &str) -> Result<(usize, ); } let ptr = cx.realloc(0, 0, 1, string.len())?; - cx.as_slice_mut()[ptr..][..string.len()].copy_from_slice(string.as_bytes()); + cx.get_dyn(ptr, string.len()) + .copy_from_slice(string.as_bytes()); Ok((ptr, string.len())) } @@ -1494,13 +1495,14 @@ fn lower_string(cx: &mut LowerContext<'_, T>, string: &str) -> Result<(usize, } let mut ptr = cx.realloc(0, 0, 2, size)?; let mut copied = 0; - let bytes = &mut cx.as_slice_mut()[ptr..][..size]; + let mut bytes = cx.get_dyn(ptr, size); for (u, bytes) in string.encode_utf16().zip(bytes.chunks_mut(2)) { let u_bytes = u.to_le_bytes(); bytes[0] = u_bytes[0]; bytes[1] = u_bytes[1]; copied += 1; } + drop(bytes); if (copied * 2) < size { ptr = cx.realloc(ptr, size, 2, copied * 2)?; } @@ -1512,7 +1514,7 @@ fn lower_string(cx: &mut LowerContext<'_, T>, string: &str) -> Result<(usize, let bytes = string.as_bytes(); let mut iter = string.char_indices(); let mut ptr = cx.realloc(0, 0, 2, bytes.len())?; - let mut dst = &mut cx.as_slice_mut()[ptr..][..bytes.len()]; + let mut dst = cx.get_dyn(ptr, bytes.len()); let mut result = 0; while let Some((i, ch)) = iter.next() { // Test if this `char` fits into the latin1 encoding. @@ -1531,8 +1533,9 @@ fn lower_string(cx: &mut LowerContext<'_, T>, string: &str) -> Result<(usize, if worst_case > MAX_STRING_BYTE_LENGTH { bail!("byte length too large"); } + drop(dst); ptr = cx.realloc(ptr, bytes.len(), 2, worst_case)?; - dst = &mut cx.as_slice_mut()[ptr..][..worst_case]; + dst = cx.get_dyn(ptr, worst_case); // Previously encoded latin1 bytes are inflated to their 16-bit // size for utf16 @@ -1551,11 +1554,13 @@ fn lower_string(cx: &mut LowerContext<'_, T>, string: &str) -> Result<(usize, bytes[1] = u_bytes[1]; result += 1; } + drop(dst); if worst_case > 2 * result { ptr = cx.realloc(ptr, worst_case, 2, 2 * result)?; } return Ok((ptr, result | UTF16_TAG)); } + drop(dst); if result < bytes.len() { ptr = cx.realloc(ptr, bytes.len(), 2, result)?; } diff --git a/crates/wasmtime/src/runtime/component/instance.rs b/crates/wasmtime/src/runtime/component/instance.rs index b60d43eab2..28402a3511 100644 --- a/crates/wasmtime/src/runtime/component/instance.rs +++ b/crates/wasmtime/src/runtime/component/instance.rs @@ -1016,6 +1016,18 @@ impl InstancePre { fn instantiate_impl(&self, mut store: impl AsContextMut) -> Result { let mut store = store.as_context_mut(); + #[cfg(feature = "rr-component")] + { + use crate::rr::{Validate, component_events::InstantiationEvent}; + store + .0 + .record_event(|| InstantiationEvent::from_component(&self.component))?; + // This is a required validation check for functional correctness, so don't use + // [`StoreOpaque::next_replay_event_validation`] + store.0.next_replay_event_and(|event: InstantiationEvent| { + event.validate(&InstantiationEvent::from_component(&self.component)) + })?; + } store .engine() .allocator() diff --git a/crates/wasmtime/src/runtime/func.rs b/crates/wasmtime/src/runtime/func.rs index 101a94cdef..be8725d6e2 100644 --- a/crates/wasmtime/src/runtime/func.rs +++ b/crates/wasmtime/src/runtime/func.rs @@ -1482,6 +1482,69 @@ impl Func { } } +/// Convenience methods to inject record + replay logic +mod rr_hooks { + use super::*; + #[cfg(feature = "rr")] + use crate::rr::core_events::HostFuncReturnEvent; + use wasmtime_environ::WasmFuncType; + + #[inline] + /// Record and replay hook operation for host function entry events + pub fn record_replay_host_func_entry( + args: &[MaybeUninit], + wasm_func_type: &WasmFuncType, + store: &mut StoreOpaque, + ) -> Result<()> { + #[cfg(all(feature = "rr", feature = "rr-validate"))] + { + // Record/replay the raw parameter args + use crate::rr::core_events::HostFuncEntryEvent; + store.record_event_validation(|| { + let num_params = wasm_func_type.params().len(); + HostFuncEntryEvent::new(&args[..num_params], wasm_func_type.clone()) + })?; + store.next_replay_event_validation::(wasm_func_type)?; + } + let _ = (args, wasm_func_type, store); + Ok(()) + } + + #[inline] + /// Record hook operation for host function return events + pub fn record_host_func_return( + args: &[MaybeUninit], + wasm_func_type: &WasmFuncType, + store: &mut StoreOpaque, + ) -> Result<()> { + // Record the return values + #[cfg(feature = "rr")] + store.record_event(|| { + let func_type = wasm_func_type; + let num_results = func_type.params().len(); + HostFuncReturnEvent::new(&args[..num_results]) + })?; + let _ = (args, wasm_func_type, store); + Ok(()) + } + + #[inline] + /// Replay hook operation for host function return events + pub fn replay_host_func_return( + args: &mut [MaybeUninit], + wasm_func_type: &WasmFuncType, + store: &mut StoreOpaque, + ) -> Result<()> { + #[cfg(feature = "rr")] + store.next_replay_event_and(|event: HostFuncReturnEvent| { + event.move_into_slice(args); + Ok(()) + })?; + let _ = (args, wasm_func_type, store); + Ok(()) + } +} + /// Prepares for entrance into WebAssembly. /// /// This function will set up context such that `closure` is allowed to call a @@ -2364,45 +2427,74 @@ impl HostContext { }; let func = &state.func; - let ret = 'ret: { - if let Err(trap) = caller.store.0.call_hook(CallHook::CallingHost) { - break 'ret R::fallible_from_error(trap); - } + let wasm_func_subtype = { + let type_index = state._ty.index(); + caller.engine().signatures().borrow(type_index).unwrap() + }; + let wasm_func_type = wasm_func_subtype.unwrap_func(); + + // Record/replay(validation) of the raw parameter arguments + // Don't need auto-assert GC store here since we aren't using P, just raw args + rr_hooks::record_replay_host_func_entry( + unsafe { args.as_ref() }, + wasm_func_type, + caller.store.0, + )?; - let mut store = if P::may_gc() { - AutoAssertNoGc::new(caller.store.0) - } else { - unsafe { AutoAssertNoGc::disabled(caller.store.0) } + if !caller.store.0.replay_enabled() { + let ret = 'ret: { + if let Err(trap) = caller.store.0.call_hook(CallHook::CallingHost) { + break 'ret R::fallible_from_error(trap); + } + // Setup call parameters + let params = { + let mut store = if P::may_gc() { + AutoAssertNoGc::new(caller.store.0) + } else { + unsafe { AutoAssertNoGc::disabled(caller.store.0) } + }; + // SAFETY: this function requires `args` to be valid and the + // `WasmTyList` trait means that everything should be correctly + // ascribed/typed, making this valid to load from. + unsafe { P::load(&mut store, args.as_mut()) } + // Drop on store is necessary here; scope closure makes this implicit + }; + let r = func(caller.sub_caller(), params); + if let Err(trap) = caller.store.0.call_hook(CallHook::ReturningFromHost) { + break 'ret R::fallible_from_error(trap); + } + r.into_fallible() }; - // SAFETY: this function requires `args` to be valid and the - // `WasmTyList` trait means that everything should be correctly - // ascribed/typed, making this valid to load from. - let params = unsafe { P::load(&mut store, args.as_mut()) }; - let _ = &mut store; - drop(store); - - let r = func(caller.sub_caller(), params); - if let Err(trap) = caller.store.0.call_hook(CallHook::ReturningFromHost) { - break 'ret R::fallible_from_error(trap); + if !ret.compatible_with_store(caller.store.0) { + bail!("host function attempted to return cross-`Store` value to Wasm") + } else { + let mut store = if R::may_gc() { + AutoAssertNoGc::new(caller.store.0) + } else { + unsafe { AutoAssertNoGc::disabled(caller.store.0) } + }; + // SAFETY: this function requires that `args` is safe for this + // type signature, and the guarantees of `WasmRet` means that + // everything should be typed appropriately. + unsafe { ret.store(&mut store, args.as_mut())? }; } - r.into_fallible() - }; - - if !ret.compatible_with_store(caller.store.0) { - bail!("host function attempted to return cross-`Store` value to Wasm") + // Record the return values + rr_hooks::record_host_func_return( + unsafe { args.as_ref() }, + wasm_func_type, + caller.store.0, + )?; } else { - let mut store = if R::may_gc() { - AutoAssertNoGc::new(caller.store.0) - } else { - unsafe { AutoAssertNoGc::disabled(caller.store.0) } - }; - // SAFETY: this function requires that `args` is safe for this - // type signature, and the guarantees of `WasmRet` means that - // everything should be typed appropriately. - let ret = unsafe { ret.store(&mut store, args.as_mut())? }; - Ok(ret) + // Replay the return values + rr_hooks::replay_host_func_return( + unsafe { args.as_mut() }, + wasm_func_type, + caller.store.0, + )?; } + + Ok(()) }; // With nothing else on the stack move `run` into this diff --git a/crates/wasmtime/src/runtime/instance.rs b/crates/wasmtime/src/runtime/instance.rs index fef680a35a..d852912268 100644 --- a/crates/wasmtime/src/runtime/instance.rs +++ b/crates/wasmtime/src/runtime/instance.rs @@ -932,6 +932,20 @@ fn pre_instantiate_raw( imports.push(&item, store); } + #[cfg(feature = "rr")] + if module.engine().rr().is_some() + && module.exports().any(|export| { + use crate::ExternType; + if let ExternType::Memory(_) = export.ty() { + true + } else { + false + } + }) + { + bail!("Cannot support record/replay for core wasm modules when a memory is exported"); + } + Ok(imports) } diff --git a/crates/wasmtime/src/runtime/rr/events/component_events.rs b/crates/wasmtime/src/runtime/rr/events/component_events.rs new file mode 100644 index 0000000000..e103b11823 --- /dev/null +++ b/crates/wasmtime/src/runtime/rr/events/component_events.rs @@ -0,0 +1,205 @@ +//! Module comprising of component model wasm events + +use super::*; +#[expect(unused_imports, reason = "used for doc-links")] +use crate::component::{Component, ComponentType}; +use wasmtime_environ::component::InterfaceType; +use wasmtime_environ::component::TypeFunc; + +/// A [`Component`] instantiatation event +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct InstantiationEvent { + /// A checksum of the component bytecode + checksum: [u8; 32], +} + +impl InstantiationEvent { + pub fn from_component(component: &Component) -> Self { + Self { + checksum: *component.checksum(), + } + } +} + +/// A call event from a Wasm component into the host +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HostFuncEntryEvent { + /// Raw values passed across the call entry boundary + args: RRFuncArgVals, + + /// Param/return types (required to support replay validation). + /// + /// Note: This relies on the invariant that [InterfaceType] will always be + /// deterministic. Currently, the type indices into various [ComponentTypes] + /// maintain this, allowing for quick type-checking. + types: TypeFunc, +} +impl HostFuncEntryEvent { + // Record + pub fn new(args: &[MaybeUninit], types: TypeFunc) -> Self { + Self { + args: func_argvals_from_raw_slice(args), + types: types, + } + } +} +#[cfg(feature = "rr-validate")] +impl Validate for HostFuncEntryEvent { + fn validate(&self, expect_types: &TypeFunc) -> Result<(), ReplayError> { + self.log(); + if &self.types == expect_types { + Ok(()) + } else { + Err(ReplayError::FailedValidation) + } + } +} + +/// A return event after a host call for a Wasm component +/// +/// Matches 1:1 with [`HostFuncEntryEvent`] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HostFuncReturnEvent { + /// Lowered values passed across the call return boundary + args: RRFuncArgVals, +} +impl HostFuncReturnEvent { + pub fn new(args: &[MaybeUninit]) -> Self { + Self { + args: func_argvals_from_raw_slice(args), + } + } + + /// Consume the caller event and encode it back into the slice + pub fn move_into_slice(self, args: &mut [MaybeUninit]) { + func_argvals_into_raw_slice(self.args, args); + } +} + +macro_rules! generic_new_result_events { + ( + $( + $(#[doc = $doc:literal])* + $event:ident => ($ok_ty:ty,$err_variant:path) + ),* + ) => ( + $( + $(#[doc = $doc])* + #[derive(Debug, Clone, Serialize, Deserialize)] + pub struct $event { + ret: Result<$ok_ty, EventActionError>, + } + + impl $event { + pub fn new(ret: &Result<$ok_ty>) -> Self { + Self { + ret: ret.as_ref().map(|t| *t).map_err(|e| $err_variant(e.to_string())) + } + } + pub fn ret(self) -> Result<$ok_ty, EventActionError> { self.ret } + } + + )* + ); +} + +macro_rules! generic_new_events { + ( + $( + $(#[doc = $doc:literal])* + $struct:ident { + $( + $field:ident : $field_ty:ty + ),* + } + ),* + ) => ( + $( + #[derive(Debug, Clone, Serialize, Deserialize)] + $(#[doc = $doc])* + pub struct $struct { + $( + pub $field: $field_ty, + )* + } + )* + $( + impl $struct { + pub fn new($($field: $field_ty),*) -> Self { + Self { + $($field),* + } + } + } + )* + ); +} + +generic_new_result_events! { + /// Return from a reallocation call (needed only for validation) + ReallocReturnEvent => (usize, EventActionError::ReallocError), + /// Return from a type lowering invocation + LowerReturnEvent => ((), EventActionError::LowerError), + /// Return from store invocations during type lowering + LowerStoreReturnEvent => ((), EventActionError::LowerStoreError) +} + +#[cfg(feature = "rr-validate")] +impl Validate> for ReallocReturnEvent { + /// We can check that realloc is deterministic (as expected by the engine) + fn validate(&self, expect_ret: &Result) -> Result<(), ReplayError> { + self.log(); + // Cannot just use eq since anyhow::Error and EventActionError cannot be compared + match (self.ret.as_ref(), expect_ret.as_ref()) { + (Ok(r), Ok(s)) => { + if r == s { + Ok(()) + } else { + Err(ReplayError::FailedValidation) + } + } + // Return the recorded error + (Err(e), Err(f)) => Err(ReplayError::from(EventActionError::ReallocError(format!( + "Replayed Realloc Error: {} \nRecorded Realloc Error: {}", + e, f + )))), + // Diverging errors.. Report as a failed validation + (Ok(_), Err(_)) => Err(ReplayError::FailedValidation), + (Err(_), Ok(_)) => Err(ReplayError::FailedValidation), + } + } +} + +generic_new_events! { + /// A reallocation call event in the Component Model canonical ABI + /// + /// Usually performed during lowering of complex [`ComponentType`]s to Wasm + ReallocEntryEvent { + old_addr: usize, + old_size: usize, + old_align: u32, + new_size: usize + }, + + /// Entry to a type lowering invocation + LowerEntryEvent { + ty: InterfaceType + }, + + /// Entry to store invocations during type lowering + LowerStoreEntryEvent { + ty: InterfaceType, + offset: usize + }, + + /// A write to a mutable slice of Wasm linear memory by the host. This is the + /// fundamental representation of host-written data to Wasm and is usually + /// performed during lowering of a [`ComponentType`]. + /// Note that this currently signifies a single mutable operation at the smallest granularity + /// on a given linear memory slice. These can be optimized and coalesced into + /// larger granularity operations in the future at either the recording or the replay level. + MemorySliceWriteEvent { + offset: usize, + bytes: Vec + } +} diff --git a/crates/wasmtime/src/runtime/rr/events/core_events.rs b/crates/wasmtime/src/runtime/rr/events/core_events.rs new file mode 100644 index 0000000000..885de03fa6 --- /dev/null +++ b/crates/wasmtime/src/runtime/rr/events/core_events.rs @@ -0,0 +1,59 @@ +//! Module comprising of core wasm events +use super::*; +#[expect(unused_imports, reason = "used for doc-links")] +use wasmtime_environ::{WasmFuncType, WasmValType}; + +/// Note: Switch [`CoreFuncArgTypes`] to use [`Vec`] for better efficiency +type CoreFuncArgTypes = WasmFuncType; + +/// A call event from a Core Wasm module into the host +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HostFuncEntryEvent { + /// Raw values passed across the call/return boundary + args: RRFuncArgVals, + /// Param/return types (required to support replay validation) + types: CoreFuncArgTypes, +} +impl HostFuncEntryEvent { + // Record + pub fn new(args: &[MaybeUninit], types: WasmFuncType) -> Self { + Self { + args: func_argvals_from_raw_slice(args), + types: types, + } + } +} +#[cfg(feature = "rr-validate")] +impl Validate for HostFuncEntryEvent { + fn validate(&self, expect_types: &CoreFuncArgTypes) -> Result<(), ReplayError> { + self.log(); + if &self.types == expect_types { + Ok(()) + } else { + Err(ReplayError::FailedValidation) + } + } +} + +/// A return event after a host call for a Core Wasm +/// +/// Matches 1:1 with [`HostFuncEntryEvent`] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HostFuncReturnEvent { + /// Raw values passed across the call/return boundary + args: RRFuncArgVals, +} +impl HostFuncReturnEvent { + // Record + pub fn new(args: &[MaybeUninit]) -> Self { + Self { + args: func_argvals_from_raw_slice(args), + } + } + // Replay + /// Consume the caller event and encode it back into the slice with an optional + /// typechecking validation of the event. + pub fn move_into_slice(self, args: &mut [MaybeUninit]) { + func_argvals_into_raw_slice(self.args, args); + } +} diff --git a/crates/wasmtime/src/runtime/rr/events/mod.rs b/crates/wasmtime/src/runtime/rr/events/mod.rs new file mode 100644 index 0000000000..8b0e7dde4d --- /dev/null +++ b/crates/wasmtime/src/runtime/rr/events/mod.rs @@ -0,0 +1,154 @@ +#[cfg(any(feature = "rr-component", feature = "rr-validate"))] +use super::ReplayError; +use crate::ValRaw; +use crate::prelude::*; +use core::fmt; +use core::mem::{self, MaybeUninit}; +use serde::{Deserialize, Serialize}; + +/// A serde compatible representation of errors produced by actions during +/// initial recording for specific events +/// +/// We need this since the [anyhow::Error] trait object cannot be used. This +/// type just encapsulates the corresponding display messages during recording +/// so that it can be re-thrown during replay +/// +/// Unforunately since we cannot serialize [anyhow::Error], there's no good +/// way to equate errors across record/replay boundary without creating a +/// common error format. Perhaps this is future work +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum EventActionError { + ReallocError(String), + LowerError(String), + LowerStoreError(String), +} + +impl fmt::Display for EventActionError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::ReallocError(s) | Self::LowerError(s) | Self::LowerStoreError(s) => { + write!(f, "{}", s) + } + } + } +} + +impl core::error::Error for EventActionError {} + +type ValRawBytes = [u8; mem::size_of::()]; + +/// Types that can be converted zero-copy to [`ValRawBytes`] for +/// serialization/deserialization in record/replay (since +/// unions are non serializable by `serde`) +/// +/// Essentially [`From`] and [`Into`] but local to the crate +/// to bypass orphan rule for externally defined types +trait ValRawBytesConvertable { + fn to_valraw_bytes(self) -> ValRawBytes; + fn from_valraw_bytes(value: ValRawBytes) -> Self; +} + +impl ValRawBytesConvertable for ValRaw { + #[inline] + fn to_valraw_bytes(self) -> ValRawBytes { + self.as_bytes() + } + #[inline] + fn from_valraw_bytes(value: ValRawBytes) -> Self { + ValRaw::from_bytes(value) + } +} + +impl ValRawBytesConvertable for MaybeUninit { + #[inline] + fn to_valraw_bytes(self) -> ValRawBytes { + // Uninitialized data is assumed and serialized, so hence + // may contain some undefined values + unsafe { self.assume_init() }.to_valraw_bytes() + } + #[inline] + fn from_valraw_bytes(value: ValRawBytes) -> Self { + MaybeUninit::new(ValRaw::from_valraw_bytes(value)) + } +} + +type RRFuncArgVals = Vec; + +/// Construct [`RRFuncArgVals`] from raw value buffer +fn func_argvals_from_raw_slice(args: &[T]) -> RRFuncArgVals +where + T: ValRawBytesConvertable + Copy, +{ + args.iter().map(|x| x.to_valraw_bytes()).collect() +} + +/// Encode [`RRFuncArgVals`] back into raw value buffer +fn func_argvals_into_raw_slice(rr_args: RRFuncArgVals, raw_args: &mut [T]) +where + T: ValRawBytesConvertable, +{ + for (src, dst) in rr_args.into_iter().zip(raw_args.iter_mut()) { + *dst = T::from_valraw_bytes(src); + } +} + +/// Trait signifying types that can be validated on replay +/// +/// All `PartialEq` and `Eq` types are directly validatable with themselves. +/// Note however that some [`Validate`] implementations are present even +/// when feature `rr-validate` is disabled, when validation is needed +/// for a faithful replay (e.g. [`component_events::InstantiationEvent`]). +#[cfg(any(feature = "rr-component", feature = "rr-validate"))] +pub trait Validate { + /// Perform a validation of the event to ensure replay consistency + fn validate(&self, expect: &T) -> Result<(), ReplayError>; + + /// Write a log message + fn log(&self) + where + Self: fmt::Debug, + { + log::debug!("Validating => {:?}", self); + } +} + +#[cfg(any(feature = "rr-component", feature = "rr-validate"))] +impl Validate for T +where + T: PartialEq + fmt::Debug, +{ + /// All types that are [`PartialEq`] are directly validatable with themselves + fn validate(&self, expect: &T) -> Result<(), ReplayError> { + self.log(); + if self == expect { + Ok(()) + } else { + Err(ReplayError::FailedValidation) + } + } +} + +/// Events used as markers for debugging/testing in traces +/// +/// Marker events should be injectable at any point in a record +/// trace without impacting functional correctness of replay +pub mod marker_events { + use crate::prelude::*; + use serde::{Deserialize, Serialize}; + + /// A Nop event + #[derive(Debug, Clone, Serialize, Deserialize)] + pub struct NopEvent; + + /// An event for custom String messages + #[derive(Debug, Clone, Serialize, Deserialize)] + pub struct CustomMessageEvent(pub String); + impl From<&str> for CustomMessageEvent { + fn from(v: &str) -> Self { + Self(v.into()) + } + } +} + +pub mod component_events; +pub mod core_events; diff --git a/crates/wasmtime/src/runtime/rr/io.rs b/crates/wasmtime/src/runtime/rr/io.rs new file mode 100644 index 0000000000..c59e54a64d --- /dev/null +++ b/crates/wasmtime/src/runtime/rr/io.rs @@ -0,0 +1,70 @@ +use crate::prelude::*; +use postcard; +use serde::{Deserialize, Serialize}; + +cfg_if::cfg_if! { + if #[cfg(feature = "std")] { + use std::io::{Write, Read}; + /// An [`Write`] usable for recording in RR + /// + /// This supports `no_std`, but must be [Send] and [Sync] + pub trait RecordWriter: Write + Send + Sync {} + impl RecordWriter for T {} + + /// An [`Read`] usable for replaying in RR + pub trait ReplayReader: Read + Send + Sync {} + impl ReplayReader for T {} + + } else { + // `no_std` configuration + use embedded_io::{Read, Write}; + + /// An [`Write`] usable for recording in RR + /// + /// This supports `no_std`, but must be [Send] and [Sync] + pub trait RecordWriter: Write + Send + Sync {} + impl RecordWriter for T {} + + /// An [`Read`] usable for replaying in RR + /// + /// This supports `no_std`, but must be [Send] and [Sync] + pub trait ReplayReader: Read + Send + Sync {} + impl ReplayReader for T {} + } +} + +/// Serialize and write `value` to a `RecordWriter` +/// +/// Currently uses `postcard` serializer +pub fn to_record_writer(value: &T, writer: W) -> Result<()> +where + T: Serialize + ?Sized, + W: RecordWriter, +{ + cfg_if::cfg_if! { + if #[cfg(feature = "std")] { + postcard::to_io(value, writer)?; + } else { + postcard::to_eio(value, writer)?; + } + } + Ok(()) +} + +/// Read and deserialize a `value` from a `ReplayReader`. +/// +/// Currently uses `postcard` deserializer, with optional scratch +/// buffer to deserialize into +pub fn from_replay_reader<'a, T, R>(reader: R, scratch: &'a mut [u8]) -> Result +where + T: Deserialize<'a>, + R: ReplayReader + 'a, +{ + cfg_if::cfg_if! { + if #[cfg(feature = "std")] { + Ok(postcard::from_io((reader, scratch))?.0) + } else { + Ok(postcard::from_eio((reader, scratch))?.0) + } + } +} diff --git a/crates/wasmtime/src/runtime/rr/mod.rs b/crates/wasmtime/src/runtime/rr/mod.rs new file mode 100644 index 0000000000..9fec674b48 --- /dev/null +++ b/crates/wasmtime/src/runtime/rr/mod.rs @@ -0,0 +1,540 @@ +#![cfg(feature = "rr")] +//! Wasmtime's Record and Replay support. +//! +//! This feature is currently not optimized and under development +//! +//! ## Notes +//! +//! This module does NOT support RR for component builtins yet. + +use crate::config::{ModuleVersionStrategy, RecordSettings, ReplaySettings}; +use crate::prelude::*; +use core::fmt; +use events::EventActionError; +use serde::{Deserialize, Serialize}; +// Use component events internally even without feature flags enabled +// so that [`RREvent`] has a well-defined serialization format, but export +// it for other modules only when enabled +#[cfg(any(feature = "rr-validate", feature = "rr-component"))] +pub use events::Validate; +use events::component_events as __component_events; +#[cfg(feature = "rr-component")] +pub use events::component_events; +pub use events::{core_events, marker_events}; +pub use io::{RecordWriter, ReplayReader}; + +/// Encapsulation of event types comprising an [`RREvent`] sum type +mod events; +/// I/O support for reading and writing traces +mod io; + +/// Macro template for [`RREvent`] and its conversion to/from specific +/// event types +macro_rules! rr_event { + ( + $( + $(#[doc = $doc:literal])* + $variant:ident($event:ty) + ),* + ) => ( + /// A single, unified, low-level recording/replay event + /// + /// This type is the narrow waist for serialization/deserialization. + /// Higher-level events (e.g. import calls consisting of lifts and lowers + /// of parameter/return types) may drop down to one or more [`RREvent`]s + #[derive(Debug, Clone, Serialize, Deserialize)] + pub enum RREvent { + /// Event signalling the end of a trace + Eof, + $( + $(#[doc = $doc])* + $variant($event), + )* + } + + impl fmt::Display for RREvent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Eof => write!(f, "Eof event"), + $( + Self::$variant(e) => write!(f, "{:?}", e), + )* + } + } + } + + $( + impl From<$event> for RREvent { + fn from(value: $event) -> Self { + RREvent::$variant(value) + } + } + impl TryFrom for $event { + type Error = ReplayError; + fn try_from(value: RREvent) -> Result { + if let RREvent::$variant(x) = value { + Ok(x) + } else { + Err(ReplayError::IncorrectEventVariant) + } + } + } + )* + ); +} + +// Set of supported record/replay events +rr_event! { + // Marker events + /// Nop Event + Nop(marker_events::NopEvent), + /// A custom message + CustomMessage(marker_events::CustomMessageEvent), + + /// Call into host function from Core Wasm + CoreHostFuncEntry(core_events::HostFuncEntryEvent), + /// Return from host function to Core Wasm + CoreHostFuncReturn(core_events::HostFuncReturnEvent), + + // REQUIRED events for replay + // + /// Instantiation of a component + ComponentInstantiation(__component_events::InstantiationEvent), + /// Return from host function to component + ComponentHostFuncReturn(__component_events::HostFuncReturnEvent), + /// Component ABI realloc call in linear wasm memory + ComponentReallocEntry(__component_events::ReallocEntryEvent), + /// Return from a type lowering operation + ComponentLowerReturn(__component_events::LowerReturnEvent), + /// Return from a store during a type lowering operation + ComponentLowerStoreReturn(__component_events::LowerStoreReturnEvent), + /// An attempt to obtain a mutable slice into Wasm linear memory + ComponentMemorySliceWrite(__component_events::MemorySliceWriteEvent), + + // OPTIONAL events for replay validation + // + // ReallocReturn is optional because we can assume the realloc is deterministic + // and the error message is subsumed by the containing LowerReturn/LowerStoreReturn + /// Return from Component ABI realloc call + ComponentReallocReturn(__component_events::ReallocReturnEvent), + /// Call into host function from component + ComponentHostFuncEntry(__component_events::HostFuncEntryEvent), + /// Call into [Lower::lower] for type lowering + ComponentLowerEntry(__component_events::LowerEntryEvent), + /// Call into [Lower::store] during type lowering + ComponentLowerStoreEntry(__component_events::LowerStoreEntryEvent) +} + +impl RREvent { + /// Indicates whether current event is a marker event + #[inline] + fn is_marker(&self) -> bool { + match self { + Self::Nop(_) | Self::CustomMessage(_) => true, + _ => false, + } + } +} + +/// Error type signalling failures during a replay run +#[derive(Debug, PartialEq, Eq)] +pub enum ReplayError { + EmptyBuffer, + FailedValidation, + IncorrectEventVariant, + InvalidOrdering, + EventActionError(EventActionError), +} + +impl fmt::Display for ReplayError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::EmptyBuffer => { + write!( + f, + "replay buffer is empty (or unexpected read-failure encountered). Ensure sufficient `deserialization-buffer-size` in replay settings if you included `validation-metadata` during recording" + ) + } + Self::FailedValidation => { + write!(f, "replay event validation failed") + } + Self::IncorrectEventVariant => { + write!(f, "event method invoked on incorrect variant") + } + Self::EventActionError(e) => { + write!(f, "{:?}", e) + } + Self::InvalidOrdering => { + write!(f, "event occured at an invalid position in the trace") + } + } + } +} + +impl core::error::Error for ReplayError {} + +impl From for ReplayError { + fn from(value: EventActionError) -> Self { + Self::EventActionError(value) + } +} + +/// This trait provides the interface for a FIFO recorder +pub trait Recorder { + /// Construct a recorder with the writer backend + fn new_recorder(writer: Box, settings: RecordSettings) -> Result + where + Self: Sized; + + /// Record the event generated by `f` + /// + /// ## Error + /// + /// Propogates from underlying writer + fn record_event(&mut self, f: F) -> Result<()> + where + T: Into, + F: FnOnce() -> T; + + /// Trigger an explicit flush of any buffered data to the writer + /// + /// Buffer should be emptied during this process + fn flush(&mut self) -> Result<()>; + + /// Get settings associated with the recording process + fn settings(&self) -> &RecordSettings; + + // Provided methods + + /// Record a event only when validation is requested + #[inline] + #[cfg(feature = "rr-validate")] + fn record_event_validation(&mut self, f: F) -> Result<()> + where + T: Into, + F: FnOnce() -> T, + { + let settings = self.settings(); + if settings.add_validation { + self.record_event(f)?; + } + Ok(()) + } +} + +/// This trait provides the interface for a FIFO replayer that +/// essentially operates as an iterator over the recorded events +pub trait Replayer: Iterator { + /// Constructs a reader on buffer + fn new_replayer(reader: Box, settings: ReplaySettings) -> Result + where + Self: Sized; + + /// Get settings associated with the replay process + #[allow( + unused, + reason = "currently used only for validation resulting in \ + many unnecessary feature gates. will expand in the future to more features" + )] + fn settings(&self) -> &ReplaySettings; + + /// Get the settings (embedded within the trace) during recording + #[allow( + unused, + reason = "currently used only for validation resulting in \ + many unnecessary feature gates. will expand in the future to more features" + )] + fn trace_settings(&self) -> &RecordSettings; + + // Provided Methods + + /// Get the next functional replay event (skips past all non-marker events) + /// + /// ## Errors + /// + /// Returns a [`ReplayError::EmptyBuffer`] if the buffer is empty + #[inline] + fn next_event(&mut self) -> Result { + let event = self.next().ok_or(ReplayError::EmptyBuffer); + if let Ok(e) = &event { + log::debug!("Replay Event => {}", e); + } + event + } + + /// Pop the next replay event with an attemped type conversion to expected + /// event type + /// + /// ## Errors + /// + /// See [`next_event_and`](Replayer::next_event_and) + #[inline] + fn next_event_typed(&mut self) -> Result + where + T: TryFrom, + ReplayError: From<>::Error>, + { + T::try_from(self.next_event()?).map_err(|e| e.into()) + } + + /// Pop the next replay event and calls `f` with a desired type conversion + /// + /// ## Errors + /// + /// Returns a [`ReplayError::EmptyBuffer`] if the buffer is empty or a + /// [`ReplayError::IncorrectEventVariant`] if it failed to convert type safely + #[inline] + fn next_event_and(&mut self, f: F) -> Result<(), ReplayError> + where + T: TryFrom, + ReplayError: From<>::Error>, + F: FnOnce(T) -> Result<(), ReplayError>, + { + let call_event = self.next_event_typed()?; + Ok(f(call_event)?) + } + + /// Conditionally process the next validation recorded event and if + /// replay validation is enabled, run the validation check + /// + /// ## Errors + /// + /// In addition to errors in [`next_event_typed`](Replayer::next_event_typed), + /// validation errors can be thrown + #[inline] + #[cfg(feature = "rr-validate")] + fn next_event_validation(&mut self, expect: &Y) -> Result<(), ReplayError> + where + T: TryFrom + Validate, + ReplayError: From<>::Error>, + { + if self.trace_settings().add_validation { + let event = self.next_event_typed::()?; + if self.settings().validate { + event.validate(expect) + } else { + Ok(()) + } + } else { + Ok(()) + } + } +} + +/// Buffer to write recording data. +/// +/// This type can be optimized for [`RREvent`] data configurations. +pub struct RecordBuffer { + /// In-memory event buffer to enable windows for coalescing + buf: Vec, + /// Writer to store data into + writer: Box, + /// Settings in record configuration + settings: RecordSettings, +} + +impl RecordBuffer { + /// Push a new record event [`RREvent`] to the buffer + fn push_event(&mut self, event: RREvent) -> Result<()> { + self.buf.push(event); + if self.buf.len() >= self.settings().event_window_size { + self.flush()?; + } + Ok(()) + } +} + +impl Drop for RecordBuffer { + fn drop(&mut self) { + // Insert End of trace delimiter + self.push_event(RREvent::Eof).unwrap(); + self.flush().unwrap(); + } +} + +impl Recorder for RecordBuffer { + fn new_recorder(mut writer: Box, settings: RecordSettings) -> Result { + // Replay requires the Module version and record settings + io::to_record_writer(ModuleVersionStrategy::WasmtimeVersion.as_str(), &mut writer)?; + io::to_record_writer(&settings, &mut writer)?; + Ok(RecordBuffer { + buf: Vec::new(), + writer: writer, + settings: settings, + }) + } + + #[inline] + fn record_event(&mut self, f: F) -> Result<()> + where + T: Into, + F: FnOnce() -> T, + { + let event = f().into(); + log::debug!("Recording event => {}", &event); + self.push_event(event) + } + + fn flush(&mut self) -> Result<()> { + log::debug!("Flushing record buffer..."); + for e in self.buf.drain(..) { + io::to_record_writer(&e, &mut self.writer)?; + } + return Ok(()); + } + + #[inline] + fn settings(&self) -> &RecordSettings { + &self.settings + } +} + +/// Buffer to read replay data +pub struct ReplayBuffer { + /// Reader to read replay trace from + reader: Box, + /// Settings in replay configuration + settings: ReplaySettings, + /// Settings for record configuration (encoded in the trace) + trace_settings: RecordSettings, + /// Intermediate static buffer for deserialization + deser_buffer: Vec, +} + +impl Iterator for ReplayBuffer { + type Item = RREvent; + + fn next(&mut self) -> Option { + let ret = 'event_loop: loop { + let result = io::from_replay_reader(&mut self.reader, &mut self.deser_buffer); + match result { + Err(e) => { + log::error!("Erroneous replay read: {}", e); + break 'event_loop None; + } + Ok(event) => { + if let RREvent::Eof = &event { + break 'event_loop None; + } else if event.is_marker() { + continue 'event_loop; + } else { + break 'event_loop Some(event); + } + } + } + }; + ret + } +} + +impl Drop for ReplayBuffer { + fn drop(&mut self) { + if let Some(event) = self.next() { + if let RREvent::Eof = event { + } else { + log::warn!( + "Replay buffer is dropped with {} remaining events, and is likely an invalid execution", + self.count() + ); + } + } + } +} + +impl Replayer for ReplayBuffer { + fn new_replayer(mut reader: Box, settings: ReplaySettings) -> Result { + let mut scratch = [0u8; 12]; + // Ensure module versions match + let version = io::from_replay_reader::<&str, _>(&mut reader, &mut scratch)?; + assert_eq!( + version, + ModuleVersionStrategy::WasmtimeVersion.as_str(), + "Wasmtime version mismatch between engine used for record and replay" + ); + + // Read the recording settings + let trace_settings: RecordSettings = io::from_replay_reader(&mut reader, &mut scratch)?; + + if settings.validate && !trace_settings.add_validation { + log::warn!( + "Replay validation will be omitted since the recorded trace has no validation metadata..." + ); + } + + let deser_buffer = vec![0; settings.deser_buffer_size]; + + Ok(ReplayBuffer { + reader, + settings, + trace_settings, + deser_buffer, + }) + } + + #[inline] + fn settings(&self) -> &ReplaySettings { + &self.settings + } + + #[inline] + fn trace_settings(&self) -> &RecordSettings { + &self.trace_settings + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ValRaw; + use core::mem::MaybeUninit; + use std::fs::File; + use std::path::Path; + use tempfile::{NamedTempFile, TempPath}; + + #[test] + #[cfg(all(feature = "rr", feature = "rr-component"))] + fn rr_buffers() -> Result<()> { + let record_settings = RecordSettings::default(); + let tmp = NamedTempFile::new()?; + let tmppath = tmp.path().to_str().expect("Filename should be UTF-8"); + + let values = vec![ValRaw::i32(1), ValRaw::f32(2), ValRaw::i64(3)] + .into_iter() + .map(|x| MaybeUninit::new(x)) + .collect::>(); + + // Record values + let mut recorder = + RecordBuffer::new_recorder(Box::new(File::create(tmppath)?), record_settings)?; + recorder + .record_event(|| __component_events::HostFuncReturnEvent::new(values.as_slice()))?; + recorder.flush()?; + + let tmp = tmp.into_temp_path(); + let tmppath = >::as_ref(&tmp) + .to_str() + .expect("Filename should be UTF-8"); + let replay_settings = ReplaySettings::default(); + + // Assert that replayed values are identical + let mut replayer = + ReplayBuffer::new_replayer(Box::new(File::open(tmppath)?), replay_settings)?; + let mut result_values = values.clone(); + replayer.next_event_and(|event: __component_events::HostFuncReturnEvent| { + event.move_into_slice(result_values.as_mut_slice()); + + // Check replay `values` matches record `values` + for (a, b) in values.iter().zip(result_values.iter()) { + unsafe { + assert!(a.assume_init().as_bytes() == b.assume_init().as_bytes()); + } + } + Ok(()) + })?; + + // Check queue is empty + assert!(replayer.next().is_none()); + + Ok(()) + } +} diff --git a/crates/wasmtime/src/runtime/store.rs b/crates/wasmtime/src/runtime/store.rs index 33e4b6dae5..18d486951e 100644 --- a/crates/wasmtime/src/runtime/store.rs +++ b/crates/wasmtime/src/runtime/store.rs @@ -85,6 +85,10 @@ use crate::component::concurrent; use crate::fiber; use crate::module::RegisteredModuleId; use crate::prelude::*; +#[cfg(feature = "rr-validate")] +use crate::rr::Validate; +#[cfg(feature = "rr")] +use crate::rr::{RREvent, RecordBuffer, Recorder, ReplayBuffer, ReplayError, Replayer}; #[cfg(feature = "gc")] use crate::runtime::vm::GcRootsList; #[cfg(feature = "stack-switching")] @@ -409,6 +413,17 @@ pub struct StoreOpaque { /// For example if Pulley is enabled and configured then this will store a /// Pulley interpreter. executor: Executor, + + /// Storage for recording execution + /// + /// `None` implies recording is disabled for this store + #[cfg(feature = "rr")] + record_buffer: Option, + /// Storage for replaying execution + /// + /// `None` implies replay is disabled for this store + #[cfg(feature = "rr")] + replay_buffer: Option, } /// Executor state within `StoreOpaque`. @@ -601,6 +616,30 @@ impl Store { executor: Executor::new(engine), #[cfg(feature = "component-model-async")] concurrent_async_state: Default::default(), + #[cfg(feature = "rr")] + record_buffer: engine.rr().and_then(|v| { + v.record().and_then(|record| { + Some( + RecordBuffer::new_recorder( + (record.writer_initializer)(), + record.settings.clone(), + ) + .unwrap(), + ) + }) + }), + #[cfg(feature = "rr")] + replay_buffer: engine.rr().and_then(|v| { + v.replay().and_then(|replay| { + Some( + ReplayBuffer::new_replayer( + (replay.reader_initializer)(), + replay.settings.clone(), + ) + .unwrap(), + ) + }) + }), }; let mut inner = Box::new(StoreInner { inner, @@ -1419,6 +1458,106 @@ impl StoreOpaque { &mut self.vm_store_context } + #[cfg(feature = "rr")] + #[inline(always)] + pub fn record_buffer_mut(&mut self) -> Option<&mut RecordBuffer> { + self.record_buffer.as_mut() + } + + #[cfg(feature = "rr")] + #[inline(always)] + pub fn replay_buffer_mut(&mut self) -> Option<&mut ReplayBuffer> { + self.replay_buffer.as_mut() + } + + /// Record the given event into the store's record buffer + /// + /// Convenience wrapper around [`Recorder::record_event`] + #[cfg(feature = "rr")] + #[inline(always)] + pub(crate) fn record_event(&mut self, f: F) -> Result<()> + where + T: Into, + F: FnOnce() -> T, + { + if let Some(buf) = self.record_buffer_mut() { + buf.record_event(f) + } else { + Ok(()) + } + } + + /// Conditionally record the given event into the store's record buffer + /// if validation is enabled for recording + /// + /// Convenience wrapper around [`Recorder::record_event_validation`] + #[cfg(feature = "rr-validate")] + #[inline(always)] + pub(crate) fn record_event_validation(&mut self, f: F) -> Result<()> + where + T: Into, + F: FnOnce() -> T, + { + if let Some(buf) = self.record_buffer_mut() { + buf.record_event_validation(f) + } else { + Ok(()) + } + } + + /// Process the next replay event from the store's replay buffer + /// + /// Convenience wrapper around [`Replayer::next_event_and`] + #[cfg(feature = "rr")] + #[inline] + pub(crate) fn next_replay_event_and(&mut self, f: F) -> Result<(), ReplayError> + where + T: TryFrom, + ReplayError: From<>::Error>, + F: FnOnce(T) -> Result<(), ReplayError>, + { + if let Some(buf) = self.replay_buffer_mut() { + buf.next_event_and(f) + } else { + Ok(()) + } + } + + /// Process the next replay event as a validation event from the store's replay buffer + /// and if validation is enabled on replay, and run the validation check + /// + /// Convenience wrapper around [`Replayer::next_event_validation`] + #[cfg(feature = "rr-validate")] + #[inline] + pub(crate) fn next_replay_event_validation( + &mut self, + expect: &Y, + ) -> Result<(), ReplayError> + where + T: TryFrom + Validate, + ReplayError: From<>::Error>, + { + if let Some(buf) = self.replay_buffer_mut() { + buf.next_event_validation::(expect) + } else { + Ok(()) + } + } + + /// Check if replay is enabled for the Store + /// + /// Note: Defaults to false when `rr` feature is disabled + #[inline(always)] + pub fn replay_enabled(&self) -> bool { + cfg_if::cfg_if! { + if #[cfg(feature = "rr")] { + self.replay_buffer.is_some() + } else { + false + } + } + } + #[inline(never)] pub(crate) fn allocate_gc_heap(&mut self) -> Result<()> { log::trace!("allocating GC heap for store {:?}", self.id()); diff --git a/crates/wasmtime/src/runtime/vm/component/libcalls.rs b/crates/wasmtime/src/runtime/vm/component/libcalls.rs index be65a8820a..21e9df7020 100644 --- a/crates/wasmtime/src/runtime/vm/component/libcalls.rs +++ b/crates/wasmtime/src/runtime/vm/component/libcalls.rs @@ -576,12 +576,31 @@ fn inflate_latin1_bytes(dst: &mut [u16], latin1_bytes_so_far: usize) -> &mut [u1 return rest; } +/// Hook for record/replay of libcalls. Currently stubbed for record and panics on replay +/// +/// TODO: Implement libcall hooks +#[inline] +fn rr_hook(store: &mut dyn VMStore, libcall: &str) -> Result<()> { + #[cfg(feature = "rr-component")] + { + if (*store).replay_enabled() { + bail!("Replay support for libcall {libcall:?} not yet supported!"); + } else { + use crate::rr::marker_events::CustomMessageEvent; + (*store).record_event(|| CustomMessageEvent::from(libcall))?; + } + } + let _ = (store, libcall); + Ok(()) +} + fn resource_new32( store: &mut dyn VMStore, instance: Instance, resource: u32, rep: u32, ) -> Result { + rr_hook(store, "resource_new32")?; let resource = TypeResourceTableIndex::from_u32(resource); instance.resource_new32(store, resource, rep) } @@ -592,6 +611,7 @@ fn resource_rep32( resource: u32, idx: u32, ) -> Result { + rr_hook(store, "resource_rep32")?; let resource = TypeResourceTableIndex::from_u32(resource); instance.resource_rep32(store, resource, idx) } @@ -602,6 +622,7 @@ fn resource_drop( resource: u32, idx: u32, ) -> Result { + rr_hook(store, "resource_drop")?; let resource = TypeResourceTableIndex::from_u32(resource); Ok(ResourceDropRet( instance.resource_drop(store, resource, idx)?, @@ -628,6 +649,7 @@ fn resource_transfer_own( src_table: u32, dst_table: u32, ) -> Result { + rr_hook(store, "resource_transfer_own")?; let src_table = TypeResourceTableIndex::from_u32(src_table); let dst_table = TypeResourceTableIndex::from_u32(dst_table); instance.resource_transfer_own(store, src_idx, src_table, dst_table) @@ -640,16 +662,19 @@ fn resource_transfer_borrow( src_table: u32, dst_table: u32, ) -> Result { + rr_hook(store, "resource_transfer_borrow")?; let src_table = TypeResourceTableIndex::from_u32(src_table); let dst_table = TypeResourceTableIndex::from_u32(dst_table); instance.resource_transfer_borrow(store, src_idx, src_table, dst_table) } fn resource_enter_call(store: &mut dyn VMStore, instance: Instance) { + rr_hook(store, "resource_enter_call").unwrap(); instance.resource_enter_call(store) } fn resource_exit_call(store: &mut dyn VMStore, instance: Instance) -> Result<()> { + rr_hook(store, "resource_exit_call")?; instance.resource_exit_call(store) } diff --git a/crates/wasmtime/src/runtime/vm/vmcontext.rs b/crates/wasmtime/src/runtime/vm/vmcontext.rs index ff2ba0141a..59d95f7dfa 100644 --- a/crates/wasmtime/src/runtime/vm/vmcontext.rs +++ b/crates/wasmtime/src/runtime/vm/vmcontext.rs @@ -1605,6 +1605,18 @@ impl ValRaw { assert!(cfg!(feature = "gc") || exnref == 0); exnref } + + /// Get the raw bits of the union + #[inline] + pub fn as_bytes(&self) -> [u8; mem::size_of::()] { + unsafe { mem::transmute(*self) } + } + + /// Construct ValRaw from raw bits + #[inline] + pub fn from_bytes(value: [u8; mem::size_of::()]) -> Self { + unsafe { mem::transmute(value) } + } } /// An "opaque" version of `VMContext` which must be explicitly casted to a diff --git a/src/bin/wasmtime.rs b/src/bin/wasmtime.rs index ecafff3aa5..54e84969b9 100644 --- a/src/bin/wasmtime.rs +++ b/src/bin/wasmtime.rs @@ -89,6 +89,19 @@ enum Subcommand { /// Inspect `*.cwasm` files output from Wasmtime #[cfg(feature = "objdump")] Objdump(wasmtime_cli::commands::ObjdumpCommand), + + /// Run a determinstic, embedding-agnostic replay execution of the Wasm module + /// according to a prior recorded execution trace (e.g. generated with the + /// `--record` option under `wasmtime run`). + /// + /// The options below are the superset of the `run` command. The notable options + /// added for replay are `--trace` (to specify the recorded traces) and + /// corresponding settings (e.g. `--validate`) + /// + /// Note: Minimal configs for deterministic Wasm semantics will be + /// enforced during replay by default (NaN canonicalization, deterministic relaxed SIMD) + #[cfg(feature = "rr")] + Replay(wasmtime_cli::commands::ReplayCommand), } impl Wasmtime { @@ -101,7 +114,10 @@ impl Wasmtime { match subcommand { #[cfg(feature = "run")] - Subcommand::Run(c) => c.execute(), + Subcommand::Run(c) => c.execute( + #[cfg(feature = "rr")] + None, + ), #[cfg(feature = "cache")] Subcommand::Config(c) => c.execute(), @@ -126,6 +142,9 @@ impl Wasmtime { #[cfg(feature = "objdump")] Subcommand::Objdump(c) => c.execute(), + + #[cfg(feature = "rr")] + Subcommand::Replay(c) => c.execute(), } } } diff --git a/src/commands.rs b/src/commands.rs index 04fd0286ba..eda254fb97 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -39,3 +39,8 @@ pub use self::settings::*; mod objdump; #[cfg(feature = "objdump")] pub use self::objdump::*; + +#[cfg(feature = "rr")] +mod replay; +#[cfg(feature = "rr")] +pub use self::replay::*; diff --git a/src/commands/replay.rs b/src/commands/replay.rs new file mode 100644 index 0000000000..9bd9aeb83f --- /dev/null +++ b/src/commands/replay.rs @@ -0,0 +1,66 @@ +//! Implementation of the `wasmtime replay` command + +use crate::commands::run::RunCommand; +use anyhow::Result; +use clap::Parser; +use std::{fs, io::BufReader, path::PathBuf, sync::Arc}; +use wasmtime::{ReplayConfig, ReplaySettings}; + +#[derive(Parser)] +/// Replay-specific options for CLI +pub struct ReplayOptions { + /// The path of the recorded trace + /// + /// Execution traces can be obtained for most modes of Wasmtime execution with -R. + /// See `wasmtime run -R help` for relevant information on recording execution + /// + /// Note: The module used for replay must exactly match that used during recording + #[arg(short, long, required = true, value_name = "RECORDED TRACE")] + trace: PathBuf, + + /// Dynamic checks of record signatures to validate replay consistency. + /// + /// Requires record traces to be generated with `validation_metadata` enabled. + #[arg(short, long, default_value_t = false)] + validate: bool, + + /// Size of static buffer needed to deserialized variable-length types like String. This is not + /// not relevant for basic functional recording/replaying, but may be required to replay traces where + /// `validation-metadata` was enabled for recording + #[arg(short, long, default_value_t = 64)] + deser_buffer_size: usize, +} + +/// Execute a deterministic, embedding-agnostic replay of a Wasm modules given its associated recorded trace +#[derive(Parser)] +pub struct ReplayCommand { + #[command(flatten)] + replay_opts: ReplayOptions, + + #[command(flatten)] + run_cmd: RunCommand, +} + +impl ReplayCommand { + /// Executes the command. + pub fn execute(self) -> Result<()> { + #[cfg(not(feature = "rr-validate"))] + if self.replay_opts.validate { + anyhow::bail!("Cannot use `validate` when `rr-validate` feature is disabled"); + } + let replay_cfg = ReplayConfig { + reader_initializer: Arc::new(move || { + Box::new(BufReader::new( + fs::File::open(&self.replay_opts.trace).unwrap(), + )) + }), + settings: ReplaySettings { + validate: self.replay_opts.validate, + deser_buffer_size: self.replay_opts.deser_buffer_size, + ..Default::default() + }, + }; + // Replay uses the `run` command harness + self.run_cmd.execute(Some(replay_cfg)) + } +} diff --git a/src/commands/run.rs b/src/commands/run.rs index 17f6fbbef0..0188e19936 100644 --- a/src/commands/run.rs +++ b/src/commands/run.rs @@ -13,6 +13,8 @@ use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; use std::thread; use wasi_common::sync::{Dir, TcpListener, WasiCtxBuilder, ambient_authority}; +#[cfg(feature = "rr")] +use wasmtime::ReplayConfig; use wasmtime::{Engine, Func, Module, Store, StoreLimits, Val, ValType}; use wasmtime_wasi::{WasiCtxView, WasiView}; @@ -85,7 +87,10 @@ enum CliLinker { impl RunCommand { /// Executes the command. - pub fn execute(mut self) -> Result<()> { + pub fn execute( + mut self, + #[cfg(feature = "rr")] replay_cfg: Option, + ) -> Result<()> { self.run.common.init_logging()?; let mut config = self.run.common.config(None)?; @@ -105,6 +110,11 @@ impl RunCommand { None => {} } + #[cfg(feature = "rr")] + if let Some(cfg) = replay_cfg { + config.enable_replay(cfg)?; + } + let engine = Engine::new(&config)?; // Read the wasm module binary either as `*.wat` or a raw binary.