model-checking · tautschnig · Aug 4, 2025 · Jul 24, 2025 · Jul 24, 2025 · Jul 28, 2025
@@ -1,8 +1,7 @@
 // Copyright Kani Contributors
 // SPDX-License-Identifier: Apache-2.0 OR MIT
 
-use lazy_static::lazy_static;
-use std::sync::Mutex;
+use std::cell::RefCell;
 use string_interner::StringInterner;
 use string_interner::backend::StringBackend;
 use string_interner::symbol::SymbolU32;
@@ -23,10 +22,11 @@ use string_interner::symbol::SymbolU32;
 #[derive(Clone, Hash, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub struct InternedString(SymbolU32);
 
-// Use a `Mutex` to make this thread safe.
-lazy_static! {
-    static ref INTERNER: Mutex<StringInterner<StringBackend>> =
-        Mutex::new(StringInterner::default());
+// This [StringInterner] is a thread local, letting us get away with less synchronization.
+// See the `sync` module below for a full explanation of this choice's consequences.
+thread_local! {
+    static INTERNER: RefCell<StringInterner<StringBackend>> =
+        RefCell::new(StringInterner::default());
 }
 
 impl InternedString {
@@ -42,7 +42,7 @@ impl InternedString {
     /// Needed because exporting the &str backing the InternedString is blocked by lifetime rules.
     /// Instead, this allows users to operate on the &str when needed.
     pub fn map<T, F: FnOnce(&str) -> T>(&self, f: F) -> T {
-        f(INTERNER.lock().unwrap().resolve(self.0).unwrap())
+        INTERNER.with_borrow(|i| f(i.resolve(self.0).unwrap()))
     }
 
     pub fn starts_with(&self, pattern: &str) -> bool {
@@ -52,13 +52,13 @@ impl InternedString {
 
 impl std::fmt::Display for InternedString {
     fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{}", INTERNER.lock().unwrap().resolve(self.0).unwrap())
+        INTERNER.with_borrow(|i| write!(fmt, "{}", i.resolve(self.0).unwrap()))
     }
 }
 /// Custom-implement Debug, so our debug logging contains meaningful strings, not numbers
 impl std::fmt::Debug for InternedString {
     fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
-        write!(fmt, "{:?}", INTERNER.lock().unwrap().resolve(self.0).unwrap())
+        INTERNER.with_borrow(|i| write!(fmt, "{:?}", i.resolve(self.0).unwrap()))
     }
 }
 
@@ -67,7 +67,7 @@ where
     T: AsRef<str>,
 {
     fn from(s: T) -> InternedString {
-        InternedString(INTERNER.lock().unwrap().get_or_intern(s))
+        InternedString(INTERNER.with_borrow_mut(|i| i.get_or_intern(s)))
     }
 }
 
@@ -76,7 +76,7 @@ where
     T: AsRef<str>,
 {
     fn eq(&self, other: &T) -> bool {
-        INTERNER.lock().unwrap().resolve(self.0).unwrap() == other.as_ref()
+        INTERNER.with_borrow(|i| i.resolve(self.0).unwrap() == other.as_ref())
     }
 }
 
@@ -105,6 +105,68 @@ where
     }
 }
 
+/// At a high level, the key design choice here is to keep our [StringInterner]s as thread locals.
+/// This works because whichever thread is generating `SymbolTable`s will be updating the interner in a way that
+/// affects serialization, but the serialization doesn't affect the interner in a way that affects generating
+/// `SymbolTable`s.
+///
+/// Thus, it makes a lot of sense to have threads each maintain their own copy of a [StringInterner]. Then, when the main
+/// codegen thread wants to tell another thread to write a new `SymbolTable` to disk, it can just send along
+/// its master copy of the [StringInterner] that they can use to update theirs.
+///
+/// To enforce this, [InternedString] is marked `!Send`--preventing them from being sent between threads
+/// unless they're wrapped in a [WithInterner](sync::WithInterner) type that will ensure the recieving thread updates
+/// its local interner to match the sending thread's.
+pub(crate) mod sync {
+    use string_interner::{StringInterner, backend::StringBackend};
+
+    use crate::{InternedString, cbmc_string::INTERNER};
+
+    /// The value of an [InternedString] is defined based on a thread local [INTERNER] so they cannot safely
+    /// be sent between threads.
+    impl !Send for InternedString {}
+
+    /// A type that is only `!Send` because it contains types specific to a thread local [INTERNER]
+    /// (e.g. [InternedString]s). This forces users to annotate that the types they want to wrap in [WithInterner]
+    /// are `!Send` just for that specific reason rather than using it to make arbitrary types `Send`.
+    ///
+    /// # Safety
+    ///
+    /// Should only be implemented for types which are `!Send` **solely** because they contain information specific
+    /// to their thread local [INTERNER] (e.g. by containing [InternedString]s).
+    pub unsafe trait InternerSpecific {}
+
+    /// Since [WithInterner<T>] guarantees that the inner `T` cannot be accessed without updating the
+    /// thread local [INTERNER] to a copy of what was used to generate `T`, it is safe to send between threads,
+    /// even if the inner `T` contains [InternedString]s which are not [Send] on their own.
+    unsafe impl<T: InternerSpecific> Send for WithInterner<T> {}
+
+    /// A type `T` bundled with the [StringInterner] that was used to generate it.
+    ///
+    /// The only way to access the inner `T` is by calling `into_inner()`, which will automatically
+    /// update the current thread's interner to the interner used the generate `T`,
+    /// ensuring interner coherence between the sending & receiving threads.
+    pub struct WithInterner<T> {
+        interner: StringInterner<StringBackend>,
+        inner: T,
+    }
+
+    impl<T> WithInterner<T> {
+        /// Create a new wrapper of `inner` with a clone of the current thread local [INTERNER].
+        pub fn new_with_current(inner: T) -> Self {
+            let interner = INTERNER.with_borrow(|i| i.clone());
+            WithInterner { interner, inner }
+        }
+
+        /// Get the inner wrapped `T` and implicitly update the current thread local [INTERNER] with a
+        /// copy of the one used to generate `T`.
+        pub fn into_inner(self) -> T {
+            INTERNER.with_borrow_mut(|i| *i = self.interner);
+            self.inner
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::cbmc_string::InternedString;

@@ -31,6 +31,7 @@
 
 #![feature(f128)]
 #![feature(f16)]
+#![feature(negative_impls)]
 
 mod env;
 pub use env::global_dead_object;
@@ -41,4 +42,5 @@ pub mod utils;
 pub use irep::serialize;
 pub use machine_model::{MachineModel, RoundingMode};
 mod cbmc_string;
+pub use cbmc_string::sync::{InternerSpecific, WithInterner};
 pub use cbmc_string::{InternString, InternStringOption, InternedString};
@@ -4,7 +4,9 @@
 //! This file contains the code necessary to interface with the compiler backend
 
 use crate::args::ReachabilityType;
-use crate::codegen_cprover_gotoc::GotocCtx;
+use crate::codegen_cprover_gotoc::context::MinimalGotocCtx;
+use crate::codegen_cprover_gotoc::utils::file_writing_pool::{FileDataToWrite, ThreadPool};
+use crate::codegen_cprover_gotoc::{GotocCtx, context};
 use crate::kani_middle::analysis;
 use crate::kani_middle::attributes::KaniAttributes;
 use crate::kani_middle::check_reachable_items;
@@ -13,10 +15,9 @@ use crate::kani_middle::provide;
 use crate::kani_middle::reachability::{collect_reachable_items, filter_crate_items};
 use crate::kani_middle::transform::{BodyTransformation, GlobalPasses};
 use crate::kani_queries::QueryDb;
-use cbmc::RoundingMode;
 use cbmc::goto_program::Location;
-use cbmc::irep::goto_binary_serde::write_goto_binary_file;
 use cbmc::{InternedString, MachineModel};
+use cbmc::{RoundingMode, WithInterner};
 use kani_metadata::artifact::convert_type;
 use kani_metadata::{ArtifactType, HarnessMetadata, KaniMetadata, UnsupportedFeature};
 use kani_metadata::{AssignsContract, CompilerArtifactStub};
@@ -44,18 +45,27 @@ use rustc_session::output::out_filename;
 use rustc_span::{Symbol, sym};
 use rustc_target::spec::PanicStrategy;
 use std::any::Any;
+use std::cmp::min;
 use std::collections::BTreeMap;
 use std::fmt::Write;
 use std::fs::File;
 use std::io::BufWriter;
+use std::num::NonZero;
 use std::path::Path;
 use std::sync::{Arc, Mutex};
+use std::thread::available_parallelism;
 use std::time::Instant;
 use tracing::{debug, info};
 
+/// The maximum amount of threads it would be useful to have in the file-exporting thread pool.
+///
+/// This is constrained by the speed at which the single main compiler thread can codegen goto files for export. Right now,
+/// it can generate code for ~2 harnesses in the time it takes 1 to be exported. Thus, using any more than 4 threads for exporting
+/// would just increase contention on the shared work queue.
+const MAX_SENSIBLE_FILE_EXPORT_THREADS: usize = 4;
+
 pub type UnsupportedConstructs = FxHashMap<InternedString, Vec<Location>>;
 
-#[derive(Clone)]
 pub struct GotocCodegenBackend {
     /// The query is shared with `KaniCompiler` and it is initialized as part of `rustc`
     /// initialization, which may happen after this object is created.
@@ -72,6 +82,7 @@ impl GotocCodegenBackend {
     /// Generate code that is reachable from the given starting points.
     ///
     /// Invariant: iff `check_contract.is_some()` then `return.2.is_some()`
+    #[allow(clippy::too_many_arguments)]
     fn codegen_items<'tcx>(
         &self,
         tcx: TyCtxt<'tcx>,
@@ -80,7 +91,8 @@ impl GotocCodegenBackend {
         machine_model: &MachineModel,
         check_contract: Option<InternalDefId>,
         mut transformer: BodyTransformation,
-    ) -> (GotocCtx<'tcx>, Vec<MonoItem>, Option<AssignsContract>) {
+        thread_pool: &ThreadPool,
+    ) -> (MinimalGotocCtx, Vec<MonoItem>, Option<AssignsContract>) {
         // This runs reachability analysis before global passes are applied.
         //
         // Alternatively, we could run reachability only once after the global passes are applied
@@ -201,19 +213,49 @@ impl GotocCodegenBackend {
 
         gcx.handle_quantifiers();
 
+        // Split ownership of the context so that the majority of fields can be saved to our results,
+        // but the symbol table can be passed to the thread that handles exporting.
+        let (min_gcx, symbol_table) = gcx.split();
+
         // No output should be generated if user selected no_codegen.
         if !tcx.sess.opts.unstable_opts.no_codegen && tcx.sess.opts.output_types.should_codegen() {
             let pretty = self.queries.lock().unwrap().args().output_pretty_json;
-            write_file(symtab_goto, ArtifactType::PrettyNameMap, &pretty_name_map, pretty);
-            write_goto_binary_file(symtab_goto, &gcx.symbol_table);
-            write_file(symtab_goto, ArtifactType::TypeMap, &type_map, pretty);
-            // If they exist, write out vtable virtual call function pointer restrictions
-            if let Some(restrictions) = vtable_restrictions {
-                write_file(symtab_goto, ArtifactType::VTableRestriction, &restrictions, pretty);
-            }
+
+            // Save all the data needed to write this goto file
+            // so another thread can handle it in parallel.
+            let new_file_data = FileDataToWrite {
+                symtab_goto: symtab_goto.to_path_buf(),
+                symbol_table,
+                vtable_restrictions,
+                type_map,
+                pretty_name_map,
+                pretty,
+            };
+
+            // Package the file data with a copy of the string interner used to generate it.
+            let file_data_with_interner = WithInterner::new_with_current(new_file_data);
+
+            // Send everything to the thread pool for handling and move on.
+            thread_pool.send_work(file_data_with_interner).unwrap();
         }
 
-        (gcx, items, contract_info)
+        (min_gcx, items, contract_info)
+    }
+
+    /// Determines the number of threads to add to the pool for goto binary exporting.
+    fn thread_pool_size(known_num_harnesses: Option<usize>) -> usize {
+        // Default to the available parallelism if # of threads isn't specified.
+        let mut max_total_threads = available_parallelism().map(NonZero::get).unwrap_or(1);
+
+        // If we know the # of harnesses upfront, cap the number of total threads at that.
+        // Multiple threads can't work on exporting the same harness, so any threads
+        // more than the # of harnesses cannot possibly provide an additional benefit.
+        if let Some(num_harnesses) = known_num_harnesses {
+            max_total_threads = min(max_total_threads, num_harnesses);
+        }
+
+        // One thread will be the main compiler thread, and we shouldn't have more than the max that would make sense.
+        min(max_total_threads.saturating_sub(1), MAX_SENSIBLE_FILE_EXPORT_THREADS)
     }
 
     /// Given a harness, return the DefId of its target if it's a contract harness.
@@ -329,11 +371,26 @@ impl CodegenBackend for GotocCodegenBackend {
             let base_filename = base_filepath.as_path();
             let reachability = queries.args().reachability_analysis;
             let mut results = GotoCodegenResults::new(tcx, reachability);
+
+            // If reachability is None, just return early as we'll do no codegen.
+            if reachability == ReachabilityType::None {
+                return codegen_results(tcx, &results.machine_model);
+            }
+
+            // Create an empty thread pool. We will set the size later once we
+            // concretely know the # of harnesses we need to analyze.
+            let mut export_thread_pool = ThreadPool::empty();
+
             match reachability {
                 ReachabilityType::AllFns | ReachabilityType::Harnesses => {
                     let mut units = CodegenUnits::new(&queries, tcx);
                     let mut modifies_instances = vec![];
                     let mut loop_contracts_instances = vec![];
+
+                    // We know the # of harnesses here, so provide them to the thread_pool size calculation.
+                    let num_harnesses: usize = units.iter().map(|unit| unit.harnesses.len()).sum();
+                    export_thread_pool.add_workers(Self::thread_pool_size(Some(num_harnesses)));
+
                     // Cross-crate collecting of all items that are reachable from the crate harnesses.
                     for unit in units.iter() {
                         // We reset the body cache for now because each codegen unit has different
@@ -344,19 +401,20 @@ impl CodegenBackend for GotocCodegenBackend {
                             let is_automatic_harness = units.is_automatic_harness(harness);
                             let contract_metadata =
                                 self.target_if_contract_harness(tcx, harness, is_automatic_harness);
-                            let (gcx, items, contract_info) = self.codegen_items(
+                            let (min_gcx, items, contract_info) = self.codegen_items(
                                 tcx,
                                 &[MonoItem::Fn(*harness)],
                                 model_path,
                                 &results.machine_model,
                                 contract_metadata
                                     .map(|def| rustc_internal::internal(tcx, def.def_id())),
                                 transformer,
+                                &export_thread_pool,
                             );
-                            if gcx.has_loop_contracts {
+                            if min_gcx.has_loop_contracts {
                                 loop_contracts_instances.push(*harness);
                             }
-                            results.extend(gcx, items, None);
+                            results.extend(min_gcx, items, None);
                             if let Some(assigns_contract) = contract_info {
                                 modifies_instances.push((*harness, assigns_contract));
                             }
@@ -366,9 +424,13 @@ impl CodegenBackend for GotocCodegenBackend {
                     units.store_loop_contracts(&loop_contracts_instances);
                     units.write_metadata(&queries, tcx);
                 }
-                ReachabilityType::None => {}
+                ReachabilityType::None => unreachable!(),
                 ReachabilityType::PubFns => {
                     let unit = CodegenUnit::default();
+
+                    // Here, we don't know up front how many harnesses we will have to analyze, so pass None.
+                    export_thread_pool.add_workers(Self::thread_pool_size(None));
+
                     let transformer = BodyTransformation::new(&queries, tcx, &unit);
                     let main_instance = rustc_public::entry_fn()
                         .map(|main_fn| Instance::try_from(main_fn).unwrap());
@@ -387,12 +449,17 @@ impl CodegenBackend for GotocCodegenBackend {
                         &results.machine_model,
                         Default::default(),
                         transformer,
+                        &export_thread_pool,
                     );
                     assert!(contract_info.is_none());
                     let _ = results.extend(gcx, items, None);
                 }
             }
 
+            // Join all the worker threads in the pool to ensure all goto files have been written before
+            // moving on to verification.
+            export_thread_pool.join_all();
+
             if reachability != ReachabilityType::None {
                 // Print compilation report.
                 results.print_report(tcx);
@@ -637,16 +704,16 @@ impl GotoCodegenResults {
 
     fn extend(
         &mut self,
-        gcx: GotocCtx,
+        min_gcx: context::MinimalGotocCtx,
         items: Vec<MonoItem>,
         metadata: Option<HarnessMetadata>,
     ) -> BodyTransformation {
         let mut items = items;
         self.harnesses.extend(metadata);
-        self.concurrent_constructs.extend(gcx.concurrent_constructs);
-        self.unsupported_constructs.extend(gcx.unsupported_constructs);
+        self.concurrent_constructs.extend(min_gcx.concurrent_constructs);
+        self.unsupported_constructs.extend(min_gcx.unsupported_constructs);
         self.items.append(&mut items);
-        gcx.transformer
+        min_gcx.transformer
     }
 
     /// Prints a report at the end of the compilation.