Skip to content
This repository was archived by the owner on Nov 15, 2023. It is now read-only.

Commit 06bebff

Browse files
committed
Merge branch 'master' into ao-fix-zombienet-parachain-upgrade
* master: Bump `wasmtime` to 0.38.0 and `zstd` to 0.11.2 (companion for substrate#11720) (#5707) pvf: ensure enough stack space (#5712) Bump generic-array from 0.12.3 to 0.12.4 in /bridges/fuzz/storage-proof (#5648) pvf: unignore `terminates_on_timeout` test (#5722)
2 parents 11545b6 + b072240 commit 06bebff

File tree

10 files changed

+401
-279
lines changed

10 files changed

+401
-279
lines changed

Cargo.lock

Lines changed: 266 additions & 229 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bridges/fuzz/storage-proof/Cargo.lock

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

node/core/pvf/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ gum = { package = "tracing-gum", path = "../../gum" }
2020
pin-project = "1.0.9"
2121
rand = "0.8.5"
2222
tempfile = "3.3.0"
23+
rayon = "1.5.1"
2324
parity-scale-codec = { version = "3.1.2", default-features = false, features = ["derive"] }
2425
polkadot-parachain = { path = "../../../parachain" }
2526
polkadot-core-primitives = { path = "../../../core-primitives" }

node/core/pvf/src/error.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
1616

1717
use parity_scale_codec::{Decode, Encode};
18+
use std::any::Any;
1819

1920
/// Result of PVF preparation performed by the validation host.
2021
pub type PrepareResult = Result<(), PrepareError>;
@@ -108,3 +109,17 @@ impl From<PrepareError> for ValidationError {
108109
}
109110
}
110111
}
112+
113+
/// Attempt to convert an opaque panic payload to a string.
114+
///
115+
/// This is a best effort, and is not guaranteed to provide the most accurate value.
116+
pub(crate) fn stringify_panic_payload(payload: Box<dyn Any + Send + 'static>) -> String {
117+
match payload.downcast::<&'static str>() {
118+
Ok(msg) => msg.to_string(),
119+
Err(payload) => match payload.downcast::<String>() {
120+
Ok(msg) => *msg,
121+
// At least we tried...
122+
Err(_) => "unknown panic payload".to_string(),
123+
},
124+
}
125+
}

node/core/pvf/src/execute/worker.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
use crate::{
1818
artifacts::ArtifactPathId,
19-
executor_intf::TaskExecutor,
19+
executor_intf::Executor,
2020
worker_common::{
2121
bytes_to_path, framed_recv, framed_send, path_to_bytes, spawn_with_program_path,
2222
worker_event_loop, IdleWorker, SpawnErr, WorkerHandle,
@@ -184,8 +184,8 @@ impl Response {
184184
/// the path to the socket used to communicate with the host.
185185
pub fn worker_entrypoint(socket_path: &str) {
186186
worker_event_loop("execute", socket_path, |mut stream| async move {
187-
let executor = TaskExecutor::new().map_err(|e| {
188-
io::Error::new(io::ErrorKind::Other, format!("cannot create task executor: {}", e))
187+
let executor = Executor::new().map_err(|e| {
188+
io::Error::new(io::ErrorKind::Other, format!("cannot create executor: {}", e))
189189
})?;
190190
loop {
191191
let (artifact_path, params) = recv_request(&mut stream).await?;
@@ -204,14 +204,14 @@ pub fn worker_entrypoint(socket_path: &str) {
204204
async fn validate_using_artifact(
205205
artifact_path: &Path,
206206
params: &[u8],
207-
spawner: &TaskExecutor,
207+
executor: &Executor,
208208
) -> Response {
209209
let validation_started_at = Instant::now();
210210
let descriptor_bytes = match unsafe {
211211
// SAFETY: this should be safe since the compiled artifact passed here comes from the
212212
// file created by the prepare workers. These files are obtained by calling
213213
// [`executor_intf::prepare`].
214-
crate::executor_intf::execute(artifact_path.as_ref(), params, spawner.clone())
214+
executor.execute(artifact_path.as_ref(), params)
215215
} {
216216
Err(err) => return Response::format_invalid("execute", &err.to_string()),
217217
Ok(d) => d,

node/core/pvf/src/executor_intf.rs

Lines changed: 100 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ use std::{
4343
const DEFAULT_HEAP_PAGES_ESTIMATE: u64 = 32;
4444
const EXTRA_HEAP_PAGES: u64 = 2048;
4545

46+
/// The number of bytes devoted for the stack during wasm execution of a PVF.
47+
const NATIVE_STACK_MAX: u32 = 256 * 1024 * 1024;
48+
4649
const CONFIG: Config = Config {
4750
allow_missing_func_imports: true,
4851
cache_path: None,
@@ -69,7 +72,7 @@ const CONFIG: Config = Config {
6972
// the stack limit set by the wasmtime.
7073
deterministic_stack_limit: Some(DeterministicStackLimit {
7174
logical_max: 65536,
72-
native_stack_max: 256 * 1024 * 1024,
75+
native_stack_max: NATIVE_STACK_MAX,
7376
}),
7477
canonicalize_nans: true,
7578
// Rationale for turning the multi-threaded compilation off is to make the preparation time
@@ -98,20 +101,99 @@ pub fn prepare(blob: RuntimeBlob) -> Result<Vec<u8>, sc_executor_common::error::
98101
sc_executor_wasmtime::prepare_runtime_artifact(blob, &CONFIG.semantics)
99102
}
100103

101-
/// Executes the given PVF in the form of a compiled artifact and returns the result of execution
102-
/// upon success.
103-
///
104-
/// # Safety
105-
///
106-
/// The caller must ensure that the compiled artifact passed here was:
107-
/// 1) produced by [`prepare`],
108-
/// 2) written to the disk as a file,
109-
/// 3) was not modified,
110-
/// 4) will not be modified while any runtime using this artifact is alive, or is being
111-
/// instantiated.
112-
///
113-
/// Failure to adhere to these requirements might lead to crashes and arbitrary code execution.
114-
pub unsafe fn execute(
104+
pub struct Executor {
105+
thread_pool: rayon::ThreadPool,
106+
spawner: TaskSpawner,
107+
}
108+
109+
impl Executor {
110+
pub fn new() -> Result<Self, String> {
111+
// Wasmtime powers the Substrate Executor. It compiles the wasm bytecode into native code.
112+
// That native code does not create any stacks and just reuses the stack of the thread that
113+
// wasmtime was invoked from.
114+
//
115+
// Also, we configure the executor to provide the deterministic stack and that requires
116+
// supplying the amount of the native stack space that wasm is allowed to use. This is
117+
// realized by supplying the limit into `wasmtime::Config::max_wasm_stack`.
118+
//
119+
// There are quirks to that configuration knob:
120+
//
121+
// 1. It only limits the amount of stack space consumed by wasm but does not ensure nor check
122+
// that the stack space is actually available.
123+
//
124+
// That means, if the calling thread has 1 MiB of stack space left and the wasm code consumes
125+
// more, then the wasmtime limit will **not** trigger. Instead, the wasm code will hit the
126+
// guard page and the Rust stack overflow handler will be triggered. That leads to an
127+
// **abort**.
128+
//
129+
// 2. It cannot and does not limit the stack space consumed by Rust code.
130+
//
131+
// Meaning that if the wasm code leaves no stack space for Rust code, then the Rust code
132+
// and that will abort the process as well.
133+
//
134+
// Typically on Linux the main thread gets the stack size specified by the `ulimit` and
135+
// typically it's configured to 8 MiB. Rust's spawned threads are 2 MiB. OTOH, the
136+
// NATIVE_STACK_MAX is set to 256 MiB. Not nearly enough.
137+
//
138+
// Hence we need to increase it.
139+
//
140+
// The simplest way to fix that is to spawn a thread with the desired stack limit. In order
141+
// to avoid costs of creating a thread, we use a thread pool. The execution is
142+
// single-threaded hence the thread pool has only one thread.
143+
//
144+
// The reasoning why we pick this particular size is:
145+
//
146+
// The default Rust thread stack limit 2 MiB + 256 MiB wasm stack.
147+
let thread_stack_size = 2 * 1024 * 1024 + NATIVE_STACK_MAX as usize;
148+
let thread_pool = rayon::ThreadPoolBuilder::new()
149+
.num_threads(1)
150+
.stack_size(thread_stack_size)
151+
.build()
152+
.map_err(|e| format!("Failed to create thread pool: {:?}", e))?;
153+
154+
let spawner =
155+
TaskSpawner::new().map_err(|e| format!("cannot create task spawner: {}", e))?;
156+
157+
Ok(Self { thread_pool, spawner })
158+
}
159+
160+
/// Executes the given PVF in the form of a compiled artifact and returns the result of execution
161+
/// upon success.
162+
///
163+
/// # Safety
164+
///
165+
/// The caller must ensure that the compiled artifact passed here was:
166+
/// 1) produced by [`prepare`],
167+
/// 2) written to the disk as a file,
168+
/// 3) was not modified,
169+
/// 4) will not be modified while any runtime using this artifact is alive, or is being
170+
/// instantiated.
171+
///
172+
/// Failure to adhere to these requirements might lead to crashes and arbitrary code execution.
173+
pub unsafe fn execute(
174+
&self,
175+
compiled_artifact_path: &Path,
176+
params: &[u8],
177+
) -> Result<Vec<u8>, String> {
178+
let spawner = self.spawner.clone();
179+
let mut result = None;
180+
self.thread_pool.scope({
181+
let result = &mut result;
182+
move |s| {
183+
s.spawn(move |_| {
184+
// spawn does not return a value, so we need to use a variable to pass the result.
185+
*result = Some(
186+
do_execute(compiled_artifact_path, params, spawner)
187+
.map_err(|err| format!("execute error: {:?}", err)),
188+
);
189+
});
190+
}
191+
});
192+
result.unwrap_or_else(|| Err("rayon thread pool spawn failed".to_string()))
193+
}
194+
}
195+
196+
unsafe fn do_execute(
115197
compiled_artifact_path: &Path,
116198
params: &[u8],
117199
spawner: impl sp_core::traits::SpawnNamed + 'static,
@@ -291,9 +373,9 @@ impl sp_externalities::ExtensionStore for ValidationExternalities {
291373
///
292374
/// This is a light handle meaning it will only clone the handle not create a new thread pool.
293375
#[derive(Clone)]
294-
pub(crate) struct TaskExecutor(futures::executor::ThreadPool);
376+
pub(crate) struct TaskSpawner(futures::executor::ThreadPool);
295377

296-
impl TaskExecutor {
378+
impl TaskSpawner {
297379
pub(crate) fn new() -> Result<Self, String> {
298380
futures::executor::ThreadPoolBuilder::new()
299381
.pool_size(4)
@@ -304,7 +386,7 @@ impl TaskExecutor {
304386
}
305387
}
306388

307-
impl sp_core::traits::SpawnNamed for TaskExecutor {
389+
impl sp_core::traits::SpawnNamed for TaskSpawner {
308390
fn spawn_blocking(
309391
&self,
310392
_task_name: &'static str,

node/core/pvf/src/prepare/worker.rs

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use async_std::{
3030
};
3131
use parity_scale_codec::{Decode, Encode};
3232
use sp_core::hexdisplay::HexDisplay;
33-
use std::{any::Any, panic, sync::Arc, time::Duration};
33+
use std::{panic, sync::Arc, time::Duration};
3434

3535
/// The time period after which the preparation worker is considered unresponsive and will be killed.
3636
// NOTE: If you change this make sure to fix the buckets of `pvf_preparation_time` metric.
@@ -294,20 +294,8 @@ fn prepare_artifact(code: &[u8]) -> Result<CompiledArtifact, PrepareError> {
294294
Err(err) => Err(PrepareError::Preparation(format!("{:?}", err))),
295295
}
296296
})
297-
.map_err(|panic_payload| PrepareError::Panic(stringify_panic_payload(panic_payload)))
297+
.map_err(|panic_payload| {
298+
PrepareError::Panic(crate::error::stringify_panic_payload(panic_payload))
299+
})
298300
.and_then(|inner_result| inner_result)
299301
}
300-
301-
/// Attempt to convert an opaque panic payload to a string.
302-
///
303-
/// This is a best effort, and is not guaranteed to provide the most accurate value.
304-
fn stringify_panic_payload(payload: Box<dyn Any + Send + 'static>) -> String {
305-
match payload.downcast::<&'static str>() {
306-
Ok(msg) => msg.to_string(),
307-
Err(payload) => match payload.downcast::<String>() {
308-
Ok(msg) => *msg,
309-
// At least we tried...
310-
Err(_) => "unkown panic payload".to_string(),
311-
},
312-
}
313-
}

node/core/pvf/src/testing.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ pub fn validate_candidate(
2929
code: &[u8],
3030
params: &[u8],
3131
) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
32-
use crate::executor_intf::{execute, prepare, prevalidate, TaskExecutor};
32+
use crate::executor_intf::{prepare, prevalidate, Executor};
3333

3434
let code = sp_maybe_compressed_blob::decompress(code, 10 * 1024 * 1024)
3535
.expect("Decompressing code failed");
@@ -40,11 +40,11 @@ pub fn validate_candidate(
4040
let artifact_path = tmpdir.path().join("blob");
4141
std::fs::write(&artifact_path, &artifact)?;
4242

43-
let executor = TaskExecutor::new()?;
43+
let executor = Executor::new()?;
4444
let result = unsafe {
4545
// SAFETY: This is trivially safe since the artifact is obtained by calling `prepare`
4646
// and is written into a temporary directory in an unmodified state.
47-
execute(&artifact_path, params, executor)?
47+
executor.execute(&artifact_path, params)?
4848
};
4949

5050
Ok(result)

node/core/pvf/tests/it/main.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ impl TestHost {
7878
}
7979

8080
#[async_std::test]
81-
#[ignore]
8281
async fn terminates_on_timeout() {
8382
let host = TestHost::new();
8483

node/primitives/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ thiserror = "1.0.31"
2222
serde = { version = "1.0.137", features = ["derive"] }
2323

2424
[target.'cfg(not(target_os = "unknown"))'.dependencies]
25-
zstd = { version = "0.10.2", default-features = false }
25+
zstd = { version = "0.11.2", default-features = false }
2626

2727
[dev-dependencies]
2828
polkadot-erasure-coding = { path = "../../erasure-coding" }

0 commit comments

Comments
 (0)