Skip to content
Merged
84 changes: 73 additions & 11 deletions cprover_bindings/src/cbmc_string.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT

use lazy_static::lazy_static;
use std::sync::Mutex;
use std::cell::RefCell;
use string_interner::StringInterner;
use string_interner::backend::StringBackend;
use string_interner::symbol::SymbolU32;
Expand All @@ -23,10 +22,11 @@ use string_interner::symbol::SymbolU32;
#[derive(Clone, Hash, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct InternedString(SymbolU32);

// Use a `Mutex` to make this thread safe.
lazy_static! {
static ref INTERNER: Mutex<StringInterner<StringBackend>> =
Mutex::new(StringInterner::default());
// This [StringInterner] is a thread local, letting us get away with less synchronization.
// See the `sync` module below for a full explanation of this choice's consequences.
thread_local! {
static INTERNER: RefCell<StringInterner<StringBackend>> =
RefCell::new(StringInterner::default());
}

impl InternedString {
Expand All @@ -42,7 +42,7 @@ impl InternedString {
/// Needed because exporting the &str backing the InternedString is blocked by lifetime rules.
/// Instead, this allows users to operate on the &str when needed.
pub fn map<T, F: FnOnce(&str) -> T>(&self, f: F) -> T {
f(INTERNER.lock().unwrap().resolve(self.0).unwrap())
INTERNER.with_borrow(|i| f(i.resolve(self.0).unwrap()))
}

pub fn starts_with(&self, pattern: &str) -> bool {
Expand All @@ -52,13 +52,13 @@ impl InternedString {

impl std::fmt::Display for InternedString {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(fmt, "{}", INTERNER.lock().unwrap().resolve(self.0).unwrap())
INTERNER.with_borrow(|i| write!(fmt, "{}", i.resolve(self.0).unwrap()))
}
}
/// Custom-implement Debug, so our debug logging contains meaningful strings, not numbers
impl std::fmt::Debug for InternedString {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(fmt, "{:?}", INTERNER.lock().unwrap().resolve(self.0).unwrap())
INTERNER.with_borrow(|i| write!(fmt, "{:?}", i.resolve(self.0).unwrap()))
}
}

Expand All @@ -67,7 +67,7 @@ where
T: AsRef<str>,
{
fn from(s: T) -> InternedString {
InternedString(INTERNER.lock().unwrap().get_or_intern(s))
InternedString(INTERNER.with_borrow_mut(|i| i.get_or_intern(s)))
}
}

Expand All @@ -76,7 +76,7 @@ where
T: AsRef<str>,
{
fn eq(&self, other: &T) -> bool {
INTERNER.lock().unwrap().resolve(self.0).unwrap() == other.as_ref()
INTERNER.with_borrow(|i| i.resolve(self.0).unwrap() == other.as_ref())
}
}

Expand Down Expand Up @@ -105,6 +105,68 @@ where
}
}

/// At a high level, the key design choice here is to keep our [StringInterner]s as thread locals.
/// This works because whichever thread is generating `SymbolTable`s will be updating the interner in a way that
/// affects serialization, but the serialization doesn't affect the interner in a way that affects generating
/// `SymbolTable`s.
///
/// Thus, it makes a lot of sense to have threads each maintain their own copy of a [StringInterner]. Then, when the main
/// codegen thread wants to tell another thread to write a new `SymbolTable` to disk, it can just send along
/// its master copy of the [StringInterner] that they can use to update theirs.
///
/// To enforce this, [InternedString] is marked `!Send`--preventing them from being sent between threads
/// unless they're wrapped in a [WithInterner](sync::WithInterner) type that will ensure the recieving thread updates
/// its local interner to match the sending thread's.
pub(crate) mod sync {
use string_interner::{StringInterner, backend::StringBackend};

use crate::{InternedString, cbmc_string::INTERNER};

/// The value of an [InternedString] is defined based on a thread local [INTERNER] so they cannot safely
/// be sent between threads.
impl !Send for InternedString {}

/// A type that is only `!Send` because it contains types specific to a thread local [INTERNER]
/// (e.g. [InternedString]s). This forces users to annotate that the types they want to wrap in [WithInterner]
/// are `!Send` just for that specific reason rather than using it to make arbitrary types `Send`.
///
/// # Safety
///
/// Should only be implemented for types which are `!Send` **solely** because they contain information specific
/// to their thread local [INTERNER] (e.g. by containing [InternedString]s).
pub unsafe trait InternerSpecific {}

/// Since [WithInterner<T>] guarantees that the inner `T` cannot be accessed without updating the
/// thread local [INTERNER] to a copy of what was used to generate `T`, it is safe to send between threads,
/// even if the inner `T` contains [InternedString]s which are not [Send] on their own.
unsafe impl<T: InternerSpecific> Send for WithInterner<T> {}

/// A type `T` bundled with the [StringInterner] that was used to generate it.
///
/// The only way to access the inner `T` is by calling `into_inner()`, which will automatically
/// update the current thread's interner to the interner used the generate `T`,
/// ensuring interner coherence between the sending & receiving threads.
pub struct WithInterner<T> {
interner: StringInterner<StringBackend>,
inner: T,
}

impl<T> WithInterner<T> {
/// Create a new wrapper of `inner` with a clone of the current thread local [INTERNER].
pub fn new_with_current(inner: T) -> Self {
let interner = INTERNER.with_borrow(|i| i.clone());
WithInterner { interner, inner }
}

/// Get the inner wrapped `T` and implicitly update the current thread local [INTERNER] with a
/// copy of the one used to generate `T`.
pub fn into_inner(self) -> T {
INTERNER.with_borrow_mut(|i| *i = self.interner);
self.inner
}
}
}

#[cfg(test)]
mod tests {
use crate::cbmc_string::InternedString;
Expand Down
2 changes: 2 additions & 0 deletions cprover_bindings/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#![feature(f128)]
#![feature(f16)]
#![feature(negative_impls)]

mod env;
pub use env::global_dead_object;
Expand All @@ -41,4 +42,5 @@ pub mod utils;
pub use irep::serialize;
pub use machine_model::{MachineModel, RoundingMode};
mod cbmc_string;
pub use cbmc_string::sync::{InternerSpecific, WithInterner};
pub use cbmc_string::{InternString, InternStringOption, InternedString};
109 changes: 88 additions & 21 deletions kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
//! This file contains the code necessary to interface with the compiler backend

use crate::args::ReachabilityType;
use crate::codegen_cprover_gotoc::GotocCtx;
use crate::codegen_cprover_gotoc::context::MinimalGotocCtx;
use crate::codegen_cprover_gotoc::utils::file_writing_pool::{FileDataToWrite, ThreadPool};
use crate::codegen_cprover_gotoc::{GotocCtx, context};
use crate::kani_middle::analysis;
use crate::kani_middle::attributes::KaniAttributes;
use crate::kani_middle::check_reachable_items;
Expand All @@ -13,10 +15,9 @@ use crate::kani_middle::provide;
use crate::kani_middle::reachability::{collect_reachable_items, filter_crate_items};
use crate::kani_middle::transform::{BodyTransformation, GlobalPasses};
use crate::kani_queries::QueryDb;
use cbmc::RoundingMode;
use cbmc::goto_program::Location;
use cbmc::irep::goto_binary_serde::write_goto_binary_file;
use cbmc::{InternedString, MachineModel};
use cbmc::{RoundingMode, WithInterner};
use kani_metadata::artifact::convert_type;
use kani_metadata::{ArtifactType, HarnessMetadata, KaniMetadata, UnsupportedFeature};
use kani_metadata::{AssignsContract, CompilerArtifactStub};
Expand Down Expand Up @@ -44,18 +45,27 @@ use rustc_session::output::out_filename;
use rustc_span::{Symbol, sym};
use rustc_target::spec::PanicStrategy;
use std::any::Any;
use std::cmp::min;
use std::collections::BTreeMap;
use std::fmt::Write;
use std::fs::File;
use std::io::BufWriter;
use std::num::NonZero;
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::thread::available_parallelism;
use std::time::Instant;
use tracing::{debug, info};

/// The maximum amount of threads it would be useful to have in the file-exporting thread pool.
///
/// This is constrained by the speed at which the single main compiler thread can codegen goto files for export. Right now,
/// it can generate code for ~2 harnesses in the time it takes 1 to be exported. Thus, using any more than 4 threads for exporting
/// would just increase contention on the shared work queue.
const MAX_SENSIBLE_FILE_EXPORT_THREADS: usize = 4;

pub type UnsupportedConstructs = FxHashMap<InternedString, Vec<Location>>;

#[derive(Clone)]
pub struct GotocCodegenBackend {
/// The query is shared with `KaniCompiler` and it is initialized as part of `rustc`
/// initialization, which may happen after this object is created.
Expand All @@ -72,6 +82,7 @@ impl GotocCodegenBackend {
/// Generate code that is reachable from the given starting points.
///
/// Invariant: iff `check_contract.is_some()` then `return.2.is_some()`
#[allow(clippy::too_many_arguments)]
fn codegen_items<'tcx>(
&self,
tcx: TyCtxt<'tcx>,
Expand All @@ -80,7 +91,8 @@ impl GotocCodegenBackend {
machine_model: &MachineModel,
check_contract: Option<InternalDefId>,
mut transformer: BodyTransformation,
) -> (GotocCtx<'tcx>, Vec<MonoItem>, Option<AssignsContract>) {
thread_pool: &ThreadPool,
) -> (MinimalGotocCtx, Vec<MonoItem>, Option<AssignsContract>) {
// This runs reachability analysis before global passes are applied.
//
// Alternatively, we could run reachability only once after the global passes are applied
Expand Down Expand Up @@ -201,19 +213,49 @@ impl GotocCodegenBackend {

gcx.handle_quantifiers();

// Split ownership of the context so that the majority of fields can be saved to our results,
// but the symbol table can be passed to the thread that handles exporting.
let (min_gcx, symbol_table) = gcx.split();

// No output should be generated if user selected no_codegen.
if !tcx.sess.opts.unstable_opts.no_codegen && tcx.sess.opts.output_types.should_codegen() {
let pretty = self.queries.lock().unwrap().args().output_pretty_json;
write_file(symtab_goto, ArtifactType::PrettyNameMap, &pretty_name_map, pretty);
write_goto_binary_file(symtab_goto, &gcx.symbol_table);
write_file(symtab_goto, ArtifactType::TypeMap, &type_map, pretty);
// If they exist, write out vtable virtual call function pointer restrictions
if let Some(restrictions) = vtable_restrictions {
write_file(symtab_goto, ArtifactType::VTableRestriction, &restrictions, pretty);
}

// Save all the data needed to write this goto file
// so another thread can handle it in parallel.
let new_file_data = FileDataToWrite {
symtab_goto: symtab_goto.to_path_buf(),
symbol_table,
vtable_restrictions,
type_map,
pretty_name_map,
pretty,
};

// Package the file data with a copy of the string interner used to generate it.
let file_data_with_interner = WithInterner::new_with_current(new_file_data);

// Send everything to the thread pool for handling and move on.
thread_pool.send_work(file_data_with_interner).unwrap();
}

(gcx, items, contract_info)
(min_gcx, items, contract_info)
}

/// Determines the number of threads to add to the pool for goto binary exporting.
fn thread_pool_size(known_num_harnesses: Option<usize>) -> usize {
// Default to the available parallelism if # of threads isn't specified.
let mut max_total_threads = available_parallelism().map(NonZero::get).unwrap_or(1);

// If we know the # of harnesses upfront, cap the number of total threads at that.
// Multiple threads can't work on exporting the same harness, so any threads
// more than the # of harnesses cannot possibly provide an additional benefit.
if let Some(num_harnesses) = known_num_harnesses {
max_total_threads = min(max_total_threads, num_harnesses);
}

// One thread will be the main compiler thread, and we shouldn't have more than the max that would make sense.
min(max_total_threads.saturating_sub(1), MAX_SENSIBLE_FILE_EXPORT_THREADS)
}

/// Given a harness, return the DefId of its target if it's a contract harness.
Expand Down Expand Up @@ -329,11 +371,26 @@ impl CodegenBackend for GotocCodegenBackend {
let base_filename = base_filepath.as_path();
let reachability = queries.args().reachability_analysis;
let mut results = GotoCodegenResults::new(tcx, reachability);

// If reachability is None, just return early as we'll do no codegen.
if reachability == ReachabilityType::None {
return codegen_results(tcx, &results.machine_model);
}

// Create an empty thread pool. We will set the size later once we
// concretely know the # of harnesses we need to analyze.
let mut export_thread_pool = ThreadPool::empty();

match reachability {
ReachabilityType::AllFns | ReachabilityType::Harnesses => {
let mut units = CodegenUnits::new(&queries, tcx);
let mut modifies_instances = vec![];
let mut loop_contracts_instances = vec![];

// We know the # of harnesses here, so provide them to the thread_pool size calculation.
let num_harnesses: usize = units.iter().map(|unit| unit.harnesses.len()).sum();
export_thread_pool.add_workers(Self::thread_pool_size(Some(num_harnesses)));

// Cross-crate collecting of all items that are reachable from the crate harnesses.
for unit in units.iter() {
// We reset the body cache for now because each codegen unit has different
Expand All @@ -344,19 +401,20 @@ impl CodegenBackend for GotocCodegenBackend {
let is_automatic_harness = units.is_automatic_harness(harness);
let contract_metadata =
self.target_if_contract_harness(tcx, harness, is_automatic_harness);
let (gcx, items, contract_info) = self.codegen_items(
let (min_gcx, items, contract_info) = self.codegen_items(
tcx,
&[MonoItem::Fn(*harness)],
model_path,
&results.machine_model,
contract_metadata
.map(|def| rustc_internal::internal(tcx, def.def_id())),
transformer,
&export_thread_pool,
);
if gcx.has_loop_contracts {
if min_gcx.has_loop_contracts {
loop_contracts_instances.push(*harness);
}
results.extend(gcx, items, None);
results.extend(min_gcx, items, None);
if let Some(assigns_contract) = contract_info {
modifies_instances.push((*harness, assigns_contract));
}
Expand All @@ -366,9 +424,13 @@ impl CodegenBackend for GotocCodegenBackend {
units.store_loop_contracts(&loop_contracts_instances);
units.write_metadata(&queries, tcx);
}
ReachabilityType::None => {}
ReachabilityType::None => unreachable!(),
ReachabilityType::PubFns => {
let unit = CodegenUnit::default();

// Here, we don't know up front how many harnesses we will have to analyze, so pass None.
export_thread_pool.add_workers(Self::thread_pool_size(None));

let transformer = BodyTransformation::new(&queries, tcx, &unit);
let main_instance = rustc_public::entry_fn()
.map(|main_fn| Instance::try_from(main_fn).unwrap());
Expand All @@ -387,12 +449,17 @@ impl CodegenBackend for GotocCodegenBackend {
&results.machine_model,
Default::default(),
transformer,
&export_thread_pool,
);
assert!(contract_info.is_none());
let _ = results.extend(gcx, items, None);
}
}

// Join all the worker threads in the pool to ensure all goto files have been written before
// moving on to verification.
export_thread_pool.join_all();

if reachability != ReachabilityType::None {
// Print compilation report.
results.print_report(tcx);
Expand Down Expand Up @@ -637,16 +704,16 @@ impl GotoCodegenResults {

fn extend(
&mut self,
gcx: GotocCtx,
min_gcx: context::MinimalGotocCtx,
items: Vec<MonoItem>,
metadata: Option<HarnessMetadata>,
) -> BodyTransformation {
let mut items = items;
self.harnesses.extend(metadata);
self.concurrent_constructs.extend(gcx.concurrent_constructs);
self.unsupported_constructs.extend(gcx.unsupported_constructs);
self.concurrent_constructs.extend(min_gcx.concurrent_constructs);
self.unsupported_constructs.extend(min_gcx.unsupported_constructs);
self.items.append(&mut items);
gcx.transformer
min_gcx.transformer
}

/// Prints a report at the end of the compilation.
Expand Down
Loading
Loading