Skip to content
Draft
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ bs58 = "0.5.1"
chrono = "0.4.42"
clap = "4.5.51"
criterion = "0.7.0"
hex = "0.4.3"
ed25519-dalek = "=1.0.1"
libsecp256k1 = "0.6.0"
mollusk-svm = { path = "harness", version = "0.7.2" }
Expand All @@ -53,6 +54,7 @@ serde = "1.0.203"
serde_json = "1.0.117"
serde_yaml = "0.9.34"
serial_test = "2.0"
sha2 = "0.10.9"
solana-account = "3.2.0"
solana-account-info = "3.0"
solana-bpf-loader-program = "3.1.0"
Expand Down
7 changes: 7 additions & 0 deletions harness/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ fuzz-fd = [
]
invocation-inspect-callback = []
precompiles = ["dep:agave-precompiles"]
register-tracing = [
"invocation-inspect-callback",
"dep:hex",
"dep:sha2"
]
serde = [
"dep:serde",
"mollusk-svm-result/serde",
Expand All @@ -37,13 +42,15 @@ agave-feature-set = { workspace = true, features = ["agave-unstable-api"] }
agave-precompiles = { workspace = true, features = ["agave-unstable-api"], optional = true }
agave-syscalls = { workspace = true }
bincode = { workspace = true }
hex = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"], optional = true }
mollusk-svm-error = { workspace = true }
mollusk-svm-fuzz-fixture = { workspace = true, optional = true }
mollusk-svm-fuzz-fixture-firedancer = { workspace = true, optional = true }
mollusk-svm-fuzz-fs = { workspace = true, optional = true }
mollusk-svm-keys = { workspace = true }
mollusk-svm-result = { workspace = true }
sha2 = { workspace = true, optional = true }
solana-account = { workspace = true }
solana-bpf-loader-program = { workspace = true, features = ["agave-unstable-api"] }
solana-clock = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion harness/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use {
},
};

fn default_shared_object_dirs() -> Vec<PathBuf> {
pub fn default_shared_object_dirs() -> Vec<PathBuf> {
let mut search_path = vec![PathBuf::from("tests/fixtures")];

if let Ok(bpf_out_dir) = std::env::var("BPF_OUT_DIR") {
Expand Down
53 changes: 49 additions & 4 deletions harness/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,8 @@ pub mod file;
#[cfg(any(feature = "fuzz", feature = "fuzz-fd"))]
pub mod fuzz;
pub mod program;
#[cfg(feature = "register-tracing")]
pub mod register_tracing;
pub mod sysvar;

// Re-export result module from mollusk-svm-result crate
Expand Down Expand Up @@ -509,6 +511,8 @@ pub struct Mollusk {
/// programs comes from the sysvars.
#[cfg(feature = "fuzz-fd")]
pub slot: u64,

pub enable_register_tracing: bool,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is always going to be false when the feature is disabled, I think it should be private, right?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making it private prevents such manual instantiations where we get a compiler complaint that the enable_register_tracing is private:

..Default::default()

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah crud. What about just using the environment variable everywhere for the logic gate? The problem is, if someone flips this to true on Mollusk directly, but doesn't provide the SBF_TRACE_DIR, it's going to disable tracing anyway. This would be confusing for people.

Alternatively, we could choose a default dir under target if we really want this to be public. However, doing something like this:

let mut mollusk = Mollusk::default();
mollusk.enable_register_tracing = true;
...

...would still not add the callback and not activate tracing, as per the current implementation.

Copy link
Author

@procdump procdump Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah crud. What about just using the environment variable everywhere for the logic gate? The problem is, if someone flips this to true on Mollusk directly, but doesn't provide the SBF_TRACE_DIR, it's going to disable tracing anyway. This would be confusing for people.

You're right - it would be confusing. There's another case that also concerns me - it's related to the environment variable only approach. As I'm going to write some tests for this feature it's that I would need to set the envvar from within the test itself. Setting envvars is unsafe as tests are run in the same process but in different threads. Hence having multiple tests playing with the SBF_TRACE_DIR var is UB. This is the reason I added with_register_tracing in the first place - just having a constructor that has the ability to instantiate mollusk with register tracing but without the need to play with the envvar.

Alternatively, we could choose a default dir under target if we really want this to be public. However, doing something like this:

I think it's probably a good idea to have a default path.

let mut mollusk = Mollusk::default();
mollusk.enable_register_tracing = true;
...

...would still not add the callback and not activate tracing, as per the current implementation.

I think I came up with some sort of a possible solution - I can bury the enable_register_tracing bool in the callback structure - thus nobody would be able to set it without actually providing a handler - I'll try to rework it to see the final outcome and will show it here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting envvars is unsafe as tests are run in the same process but in different threads. Hence having multiple tests playing with the SBF_TRACE_DIR var is UB.

Why wouldn't the SBF_TRACE_DIR variable just be readonly across all threads?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I came up with some sort of a possible solution - I can bury the enable_register_tracing bool in the callback structure - thus nobody would be able to set it without actually providing a handler - I'll try to rework it to see the final outcome and will show it here.

Cool! Ping me when you throw it up.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've pushed the changes. I've also added one test implementing a custom register tracing callback counting the number of JMP class instructions executed. Tests help me reason about the API and its consumption later. On one side it's now that you can tweak existing codebases with the SBF_TRACE_DIR and on the other side it's possible to enable register tracing without the need to tweak the same environment variable at all - hence the with_register_tracing constructor.

What I don't like up to now is that you can instantiate Mollusk::default() and change the callback expecting this will enable register tracing. Unfortunately it won't because it's the ProgramCache that also needs to have register tracing tweaked. Yet doesn't such post-fact changes also not work for the rest of the Mollusk fields where there's a cross dependency between some of them?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting envvars is unsafe as tests are run in the same process but in different threads. Hence having multiple tests playing with the SBF_TRACE_DIR var is UB.

Why wouldn't the SBF_TRACE_DIR variable just be readonly across all threads?

It could be but if we need to enable tracing for one test and skip it for another the envvar is not very convenient IMO.

}

#[cfg(feature = "invocation-inspect-callback")]
Expand Down Expand Up @@ -557,8 +561,17 @@ impl Default for Mollusk {
};
#[cfg(not(feature = "fuzz"))]
let feature_set = FeatureSet::all_enabled();
let program_cache = ProgramCache::new(&feature_set, &compute_budget);
Self {

let enable_register_tracing = if cfg!(feature = "register-tracing") {
// If `SBF_TRACE_DIR` is set enable register tracing.
std::env::var("SBF_TRACE_DIR").is_ok()
} else {
false
};

let program_cache =
ProgramCache::new(&feature_set, &compute_budget, enable_register_tracing);
let mut svm = Self {
config: Config::default(),
compute_budget,
epoch_stake: EpochStake::default(),
Expand All @@ -572,7 +585,19 @@ impl Default for Mollusk {

#[cfg(feature = "fuzz-fd")]
slot: 0,

enable_register_tracing,
};

#[cfg(feature = "register-tracing")]
if enable_register_tracing {
// Have a default register tracing callback if register tracing is
// enabled.
svm.invocation_inspect_callback =
Box::new(register_tracing::DefaultRegisterTracingCallback {});
}

svm
}
}

Expand Down Expand Up @@ -658,6 +683,26 @@ impl Mollusk {
mollusk
}

/// Create a new Mollusk instance just like the `new` method but
/// with register tracing enabled using a default callback.
#[cfg(feature = "register-tracing")]
pub fn with_register_tracing(program_id: &Pubkey, program_name: &str) -> Self {
let mut mollusk = Mollusk {
enable_register_tracing: true,
..Self::default()
};
let program_cache = ProgramCache::new(
&mollusk.feature_set,
&mollusk.compute_budget,
mollusk.enable_register_tracing,
);
mollusk.program_cache = program_cache;
mollusk.invocation_inspect_callback =
Box::new(register_tracing::DefaultRegisterTracingCallback {});
mollusk.add_program(program_id, program_name, &DEFAULT_LOADER_KEY);
mollusk
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need this new constructor? The existing Mollusk::new will call into Self::default, and the logic gate where you check for the SBF_TRACE_DIR environment variable will be evaluated. So people can just use Mollusk::new!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need this new constructor? The existing Mollusk::new will call into Self::default, and the logic gate where you check for the SBF_TRACE_DIR environment variable will be evaluated. So people can just use Mollusk::new!

I'd be glad to have your review up to now and if you prefer I'll rework it to have everything gated behind the environment variable - I have in mind something like this:

diff --git a/harness/src/lib.rs b/harness/src/lib.rs
index 815ec07..b89830f 100644
--- a/harness/src/lib.rs
+++ b/harness/src/lib.rs
@@ -689,32 +689,6 @@ impl Mollusk {
         mollusk
     }
 
-    /// Create a new Mollusk instance just like the `new` method but
-    /// with register tracing enabled using a default callback.
-    ///
-    /// If `SBF_TRACE_DIR` is set it will override the passed `sbf_trace_dir`.
-    #[cfg(feature = "register-tracing")]
-    pub fn with_register_tracing(
-        program_id: &Pubkey,
-        program_name: &str,
-        sbf_trace_dir: &str,
-    ) -> Self {
-        let mut mollusk = Mollusk::default();
-        mollusk.invocation_inspect_callback =
-            Box::new(register_tracing::DefaultRegisterTracingCallback {
-                sbf_trace_dir: std::env::var("SBF_TRACE_DIR").unwrap_or(sbf_trace_dir.into()),
-            });
-        mollusk.program_cache = ProgramCache::new(
-            &mollusk.feature_set,
-            &mollusk.compute_budget,
-            mollusk
-                .invocation_inspect_callback
-                .is_register_tracing_callback(),
-        );
-        mollusk.add_program(program_id, program_name, &DEFAULT_LOADER_KEY);
-        mollusk
-    }
-
     /// Add a program to the test environment.
     ///
     /// If you intend to CPI to a program, this is likely what you want to use.
diff --git a/harness/src/register_tracing.rs b/harness/src/register_tracing.rs
index ddb3cc0..7c18b1e 100644
--- a/harness/src/register_tracing.rs
+++ b/harness/src/register_tracing.rs
@@ -85,7 +85,7 @@ impl InvocationInspectCallback for DefaultRegisterTracingCallback {
 
     // This callback is specifically implemented to handle register tracing.
     fn is_register_tracing_callback(&self) -> bool {
-        true
+        std::env::var("SBF_TRACE_DIR").is_ok()
     }
 }
 
diff --git a/harness/tests/register_tracing.rs b/harness/tests/register_tracing.rs
index ba9e85c..0c30d15 100644
--- a/harness/tests/register_tracing.rs
+++ b/harness/tests/register_tracing.rs
@@ -87,11 +87,11 @@ fn test_custom_register_tracing_callback() {
     }
 
     std::env::set_var("SBF_OUT_DIR", "../target/deploy");
+    std::env::set_var("SBF_TRACE_DIR", "/tmp/sbf_trace_dir");
 
     let program_id = Pubkey::new_unique();
     let payer_pk = Pubkey::new_unique();
-    let mut mollusk =
-        Mollusk::with_register_tracing(&program_id, "test_program_primary", "sbf_trace_dir");
+    let mut mollusk = Mollusk::new(&program_id, "test_program_primary");
 
     // Phase 1 - basic register tracing test.
 

/// Add a program to the test environment.
///
/// If you intend to CPI to a program, this is likely what you want to use.
Expand Down Expand Up @@ -729,13 +774,13 @@ impl Mollusk {
&runtime_features,
&execution_budget,
/* reject_deployment_of_broken_elfs */ false,
/* debugging_features */ false,
/* debugging_features */ self.enable_register_tracing,
)
.unwrap(),
),
program_runtime_v2: Arc::new(create_program_runtime_environment_v2(
&execution_budget,
/* debugging_features */ false,
/* debugging_features */ self.enable_register_tracing,
)),
};

Expand Down
8 changes: 6 additions & 2 deletions harness/src/program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,19 @@ pub struct ProgramCache {
}

impl ProgramCache {
pub fn new(feature_set: &FeatureSet, compute_budget: &ComputeBudget) -> Self {
pub fn new(
feature_set: &FeatureSet,
compute_budget: &ComputeBudget,
enable_register_tracing: bool,
) -> Self {
let me = Self {
cache: Rc::new(RefCell::new(ProgramCacheForTxBatch::default())),
entries_cache: Rc::new(RefCell::new(HashMap::new())),
program_runtime_environment: create_program_runtime_environment_v1(
&feature_set.runtime_features(),
&compute_budget.to_budget(),
/* reject_deployment_of_broken_elfs */ false,
/* debugging_features */ false,
/* debugging_features */ enable_register_tracing,
)
.unwrap(),
};
Expand Down
132 changes: 132 additions & 0 deletions harness/src/register_tracing.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
use {
crate::{
file::{default_shared_object_dirs, read_file},
InvocationInspectCallback,
},
sha2::{Digest, Sha256},
solana_program_runtime::invoke_context::{Executable, InvokeContext, RegisterTrace},
solana_pubkey::Pubkey,
solana_transaction_context::{InstructionAccount, InstructionContext},
std::{fs::File, io::Write, path::PathBuf},
};

pub struct DefaultRegisterTracingCallback;

impl InvocationInspectCallback for DefaultRegisterTracingCallback {
fn before_invocation(&self, _: &Pubkey, _: &[u8], _: &[InstructionAccount], _: &InvokeContext) {
}

fn after_invocation(&self, invoke_context: &InvokeContext) {
invoke_context.iterate_vm_traces(
&|instruction_context: InstructionContext,
executable: &Executable,
register_trace: RegisterTrace| {
if let Err(e) = default_register_tracing_callback(
instruction_context,
executable,
register_trace,
) {
eprintln!("Error collecting the register tracing: {}", e);
}
},
);
}
}

pub fn default_register_tracing_callback(
instruction_context: InstructionContext,
executable: &Executable,
register_trace: RegisterTrace,
) -> Result<(), Box<dyn std::error::Error>> {
if register_trace.is_empty() {
// Can't do much with an empty trace.
return Ok(());
}

if let Ok(sbf_trace_dir) = &std::env::var("SBF_TRACE_DIR") {
let current_dir = std::env::current_dir()?;
let sbf_trace_dir = current_dir.join(sbf_trace_dir);
std::fs::create_dir_all(&sbf_trace_dir)?;

let trace_digest = compute_hash(as_bytes(register_trace));
let base_fname = sbf_trace_dir.join(&trace_digest[..16]);
let mut regs_file = File::create(base_fname.with_extension("regs"))?;
let mut insns_file = File::create(base_fname.with_extension("insns"))?;
let mut so_hash_file = File::create(base_fname.with_extension("exec.sha256"))?;

// Get program_id.
let program_id = instruction_context.get_program_key()?;

// Persist the preload hash of the executable.
let _ = so_hash_file.write(
find_executable_pre_load_hash(executable)
.ok_or(format!(
"Can't find shared object for executable with program_id: {program_id}"
))?
.as_bytes(),
);

// Get the relocated executable.
let (_, program) = executable.get_text_bytes();
for regs in register_trace.iter() {
// The program counter is stored in r11.
let pc = regs[11];
// From the executable fetch the instruction this program counter points to.
let insn =
solana_program_runtime::solana_sbpf::ebpf::get_insn_unchecked(program, pc as usize)
.to_array();

// Persist them in files.
let _ = regs_file.write(as_bytes(regs.as_slice()))?;
let _ = insns_file.write(insn.as_slice())?;
}
}

Ok(())
}

pub(crate) fn as_bytes<T>(slice: &[T]) -> &[u8] {
unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, std::mem::size_of_val(slice)) }
}

fn find_so_files(dirs: &[PathBuf]) -> Vec<PathBuf> {
let mut so_files = Vec::new();

for dir in dirs {
if dir.is_dir() {
if let Ok(entries) = std::fs::read_dir(dir) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_file() && path.extension().is_some_and(|ext| ext == "so") {
so_files.push(path);
}
}
}
}
}

so_files
}

fn find_executable_pre_load_hash(executable: &Executable) -> Option<String> {
find_so_files(&default_shared_object_dirs())
.iter()
.filter_map(|file| {
let so = read_file(file);
// Reconstruct a loaded Exectuable just to compare its relocated
// text bytes with the passed executable ones.
// If there's a match return the preload hash of the corresponding shared
// object.
Executable::load(&so, executable.get_loader().clone())
.ok()
.map(|e| Some((so, e)))
.unwrap_or(None)
})
.filter(|(_, e)| executable.get_text_bytes().1 == e.get_text_bytes().1)
.map(|(so, _)| compute_hash(&so))
.next_back()
}

fn compute_hash(slice: &[u8]) -> String {
hex::encode(Sha256::digest(slice).as_slice())
}