Skip to content

Commit 21ef949

Browse files
jpserratmrcnski
andauthored
Use clone instead of fork on pvf (#2477)
@mrcnski Done the change on the prepare worker, once the prepare worker part is good I'll do the same for the execute worker. This is based on https://github.com/koute/polkavm/blob/11beebd06276ce9b84f335350138479e714f6caf/crates/polkavm/src/sandbox/linux.rs#L711. ## TODO - [x] Add a check for this capability at startup - [x] Add prdoc mentioning the new Secure Validator Mode (optional) requirement. ## Related Closes #2162 --------- Co-authored-by: Marcin S <marcin@realemail.net>
1 parent caa987d commit 21ef949

20 files changed

Lines changed: 860 additions & 394 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

polkadot/node/core/pvf/common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ sp-tracing = { path = "../../../../../substrate/primitives/tracing" }
3333

3434
[target.'cfg(target_os = "linux")'.dependencies]
3535
landlock = "0.3.0"
36+
nix = { version = "0.27.1", features = ["sched"] }
3637
seccompiler = "0.4.0"
3738

3839
[dev-dependencies]

polkadot/node/core/pvf/common/src/execute.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,11 @@ pub enum JobError {
9292
TimedOut,
9393
#[error("An unexpected panic has occurred in the execution job: {0}")]
9494
Panic(String),
95+
/// Some error occurred when interfacing with the kernel.
96+
#[error("Error interfacing with the kernel: {0}")]
97+
Kernel(String),
9598
#[error("Could not spawn the requested thread: {0}")]
9699
CouldNotSpawnThread(String),
97100
#[error("An error occurred in the CPU time monitor thread: {0}")]
98101
CpuTimeMonitorThread(String),
99-
#[error("Could not set pdeathsig: {0}")]
100-
CouldNotSetPdeathsig(String),
101102
}

polkadot/node/core/pvf/common/src/executor_interface.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,15 +140,18 @@ pub unsafe fn create_runtime_from_artifact_bytes(
140140
executor_params: &ExecutorParams,
141141
) -> Result<WasmtimeRuntime, WasmError> {
142142
let mut config = DEFAULT_CONFIG.clone();
143-
config.semantics = params_to_wasmtime_semantics(executor_params);
143+
config.semantics = params_to_wasmtime_semantics(executor_params).0;
144144

145145
sc_executor_wasmtime::create_runtime_from_artifact_bytes::<HostFunctions>(
146146
compiled_artifact_blob,
147147
config,
148148
)
149149
}
150150

151-
pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Semantics {
151+
/// Takes the default config and overwrites any settings with existing executor parameters.
152+
///
153+
/// Returns the semantics as well as the stack limit (since we are guaranteed to have it).
154+
pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> (Semantics, DeterministicStackLimit) {
152155
let mut sem = DEFAULT_CONFIG.semantics.clone();
153156
let mut stack_limit = sem
154157
.deterministic_stack_limit
@@ -169,8 +172,8 @@ pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Semantics {
169172
ExecutorParam::PvfExecTimeout(_, _) => (), /* Not used here */
170173
}
171174
}
172-
sem.deterministic_stack_limit = Some(stack_limit);
173-
sem
175+
sem.deterministic_stack_limit = Some(stack_limit.clone());
176+
(sem, stack_limit)
174177
}
175178

176179
/// Runs the prevalidation on the given code. Returns a [`RuntimeBlob`] if it succeeds.
@@ -187,7 +190,7 @@ pub fn prepare(
187190
blob: RuntimeBlob,
188191
executor_params: &ExecutorParams,
189192
) -> Result<Vec<u8>, sc_executor_common::error::WasmError> {
190-
let semantics = params_to_wasmtime_semantics(executor_params);
193+
let (semantics, _) = params_to_wasmtime_semantics(executor_params);
191194
sc_executor_wasmtime::prepare_runtime_artifact(blob, &semantics)
192195
}
193196

polkadot/node/core/pvf/common/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ pub struct SecurityStatus {
5757
pub can_enable_seccomp: bool,
5858
/// Whether we are able to unshare the user namespace and change the filesystem root.
5959
pub can_unshare_user_namespace_and_change_root: bool,
60+
/// Whether we are able to call `clone` with all sandboxing flags.
61+
pub can_do_secure_clone: bool,
6062
}
6163

6264
/// A handshake with information for the worker.

polkadot/node/core/pvf/common/src/worker/mod.rs

Lines changed: 128 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,19 @@
1818
1919
pub mod security;
2020

21-
use crate::{framed_recv_blocking, WorkerHandshake, LOG_TARGET};
21+
use crate::{framed_recv_blocking, SecurityStatus, WorkerHandshake, LOG_TARGET};
2222
use cpu_time::ProcessTime;
2323
use futures::never::Never;
2424
use parity_scale_codec::Decode;
2525
use std::{
2626
any::Any,
27-
fmt, io,
28-
os::unix::net::UnixStream,
27+
fmt::{self},
28+
fs::File,
29+
io::{self, Read, Write},
30+
os::{
31+
fd::{AsRawFd, FromRawFd, RawFd},
32+
unix::net::UnixStream,
33+
},
2934
path::PathBuf,
3035
sync::mpsc::{Receiver, RecvTimeoutError},
3136
time::Duration,
@@ -78,7 +83,7 @@ macro_rules! decl_worker_main {
7883

7984
"--check-can-enable-landlock" => {
8085
#[cfg(target_os = "linux")]
81-
let status = if let Err(err) = security::landlock::check_is_fully_enabled() {
86+
let status = if let Err(err) = security::landlock::check_can_fully_enable() {
8287
// Write the error to stderr, log it on the host-side.
8388
eprintln!("{}", err);
8489
-1
@@ -91,7 +96,7 @@ macro_rules! decl_worker_main {
9196
},
9297
"--check-can-enable-seccomp" => {
9398
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
94-
let status = if let Err(err) = security::seccomp::check_is_fully_enabled() {
99+
let status = if let Err(err) = security::seccomp::check_can_fully_enable() {
95100
// Write the error to stderr, log it on the host-side.
96101
eprintln!("{}", err);
97102
-1
@@ -107,7 +112,7 @@ macro_rules! decl_worker_main {
107112
let cache_path_tempdir = std::path::Path::new(&args[2]);
108113
#[cfg(target_os = "linux")]
109114
let status = if let Err(err) =
110-
security::change_root::check_is_fully_enabled(&cache_path_tempdir)
115+
security::change_root::check_can_fully_enable(&cache_path_tempdir)
111116
{
112117
// Write the error to stderr, log it on the host-side.
113118
eprintln!("{}", err);
@@ -119,6 +124,21 @@ macro_rules! decl_worker_main {
119124
let status = -1;
120125
std::process::exit(status)
121126
},
127+
"--check-can-do-secure-clone" => {
128+
#[cfg(target_os = "linux")]
129+
// SAFETY: new process is spawned within a single threaded process. This
130+
// invariant is enforced by tests.
131+
let status = if let Err(err) = unsafe { security::clone::check_can_fully_clone() } {
132+
// Write the error to stderr, log it on the host-side.
133+
eprintln!("{}", err);
134+
-1
135+
} else {
136+
0
137+
};
138+
#[cfg(not(target_os = "linux"))]
139+
let status = -1;
140+
std::process::exit(status)
141+
},
122142

123143
"test-sleep" => {
124144
std::thread::sleep(std::time::Duration::from_secs(5));
@@ -171,6 +191,84 @@ macro_rules! decl_worker_main {
171191
};
172192
}
173193

194+
//taken from the os_pipe crate. Copied here to reduce one dependency and
195+
// because its type-safe abstractions do not play well with nix's clone
196+
#[cfg(not(target_os = "macos"))]
197+
pub fn pipe2_cloexec() -> io::Result<(libc::c_int, libc::c_int)> {
198+
let mut fds: [libc::c_int; 2] = [0; 2];
199+
let res = unsafe { libc::pipe2(fds.as_mut_ptr(), libc::O_CLOEXEC) };
200+
if res != 0 {
201+
return Err(io::Error::last_os_error())
202+
}
203+
Ok((fds[0], fds[1]))
204+
}
205+
206+
#[cfg(target_os = "macos")]
207+
pub fn pipe2_cloexec() -> io::Result<(libc::c_int, libc::c_int)> {
208+
let mut fds: [libc::c_int; 2] = [0; 2];
209+
let res = unsafe { libc::pipe(fds.as_mut_ptr()) };
210+
if res != 0 {
211+
return Err(io::Error::last_os_error())
212+
}
213+
let res = unsafe { libc::fcntl(fds[0], libc::F_SETFD, libc::FD_CLOEXEC) };
214+
if res != 0 {
215+
return Err(io::Error::last_os_error())
216+
}
217+
let res = unsafe { libc::fcntl(fds[1], libc::F_SETFD, libc::FD_CLOEXEC) };
218+
if res != 0 {
219+
return Err(io::Error::last_os_error())
220+
}
221+
Ok((fds[0], fds[1]))
222+
}
223+
224+
/// A wrapper around a file descriptor used to encapsulate and restrict
225+
/// functionality for pipe operations.
226+
pub struct PipeFd {
227+
file: File,
228+
}
229+
230+
impl AsRawFd for PipeFd {
231+
/// Returns the raw file descriptor associated with this `PipeFd`
232+
fn as_raw_fd(&self) -> RawFd {
233+
self.file.as_raw_fd()
234+
}
235+
}
236+
237+
impl FromRawFd for PipeFd {
238+
/// Creates a new `PipeFd` instance from a raw file descriptor.
239+
///
240+
/// # Safety
241+
///
242+
/// The fd passed in must be an owned file descriptor; in particular, it must be open.
243+
unsafe fn from_raw_fd(fd: RawFd) -> Self {
244+
PipeFd { file: File::from_raw_fd(fd) }
245+
}
246+
}
247+
248+
impl Read for PipeFd {
249+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
250+
self.file.read(buf)
251+
}
252+
253+
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
254+
self.file.read_to_end(buf)
255+
}
256+
}
257+
258+
impl Write for PipeFd {
259+
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
260+
self.file.write(buf)
261+
}
262+
263+
fn flush(&mut self) -> io::Result<()> {
264+
self.file.flush()
265+
}
266+
267+
fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
268+
self.file.write_all(buf)
269+
}
270+
}
271+
174272
/// Some allowed overhead that we account for in the "CPU time monitor" thread's sleeps, on the
175273
/// child process.
176274
pub const JOB_TIMEOUT_OVERHEAD: Duration = Duration::from_millis(50);
@@ -192,14 +290,12 @@ impl fmt::Display for WorkerKind {
192290
}
193291
}
194292

195-
// Some fields are only used for logging, and dead-code analysis ignores Debug.
196-
#[allow(dead_code)]
197293
#[derive(Debug)]
198294
pub struct WorkerInfo {
199-
pid: u32,
200-
kind: WorkerKind,
201-
version: Option<String>,
202-
worker_dir_path: PathBuf,
295+
pub pid: u32,
296+
pub kind: WorkerKind,
297+
pub version: Option<String>,
298+
pub worker_dir_path: PathBuf,
203299
}
204300

205301
// NOTE: The worker version must be passed in so that we accurately get the version of the worker,
@@ -218,7 +314,7 @@ pub fn run_worker<F>(
218314
worker_version: Option<&str>,
219315
mut event_loop: F,
220316
) where
221-
F: FnMut(UnixStream, PathBuf) -> io::Result<Never>,
317+
F: FnMut(UnixStream, &WorkerInfo, SecurityStatus) -> io::Result<Never>,
222318
{
223319
#[cfg_attr(not(target_os = "linux"), allow(unused_mut))]
224320
let mut worker_info = WorkerInfo {
@@ -250,11 +346,8 @@ pub fn run_worker<F>(
250346
}
251347

252348
// Make sure that we can read the worker dir path, and log its contents.
253-
let entries = || -> Result<Vec<_>, io::Error> {
254-
std::fs::read_dir(&worker_info.worker_dir_path)?
255-
.map(|res| res.map(|e| e.file_name()))
256-
.collect()
257-
}();
349+
let entries: io::Result<Vec<_>> = std::fs::read_dir(&worker_info.worker_dir_path)
350+
.and_then(|d| d.map(|res| res.map(|e| e.file_name())).collect());
258351
match entries {
259352
Ok(entries) =>
260353
gum::trace!(target: LOG_TARGET, ?worker_info, "content of worker dir: {:?}", entries),
@@ -284,6 +377,22 @@ pub fn run_worker<F>(
284377
{
285378
gum::trace!(target: LOG_TARGET, ?security_status, "Enabling security features");
286379

380+
// First, make sure env vars were cleared, to match the environment we perform the checks
381+
// within. (In theory, running checks with different env vars could result in different
382+
// outcomes of the checks.)
383+
if !security::check_env_vars_were_cleared(&worker_info) {
384+
let err = "not all env vars were cleared when spawning the process";
385+
gum::error!(
386+
target: LOG_TARGET,
387+
?worker_info,
388+
"{}",
389+
err
390+
);
391+
if security_status.secure_validator_mode {
392+
worker_shutdown(worker_info, err);
393+
}
394+
}
395+
287396
// Call based on whether we can change root. Error out if it should work but fails.
288397
//
289398
// NOTE: This should not be called in a multi-threaded context (i.e. inside the tokio
@@ -337,23 +446,10 @@ pub fn run_worker<F>(
337446
}
338447
}
339448
}
340-
341-
if !security::check_env_vars_were_cleared(&worker_info) {
342-
let err = "not all env vars were cleared when spawning the process";
343-
gum::error!(
344-
target: LOG_TARGET,
345-
?worker_info,
346-
"{}",
347-
err
348-
);
349-
if security_status.secure_validator_mode {
350-
worker_shutdown(worker_info, err);
351-
}
352-
}
353449
}
354450

355451
// Run the main worker loop.
356-
let err = event_loop(stream, worker_info.worker_dir_path.clone())
452+
let err = event_loop(stream, &worker_info, security_status)
357453
// It's never `Ok` because it's `Ok(Never)`.
358454
.unwrap_err();
359455

polkadot/node/core/pvf/common/src/worker/security/change_root.rs

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,7 @@ pub fn enable_for_worker(worker_info: &WorkerInfo) -> Result<()> {
5454
///
5555
/// NOTE: This should not be called in a multi-threaded context. `unshare(2)`:
5656
/// "CLONE_NEWUSER requires that the calling process is not threaded."
57-
#[cfg(target_os = "linux")]
58-
pub fn check_is_fully_enabled(tempdir: &Path) -> Result<()> {
57+
pub fn check_can_fully_enable(tempdir: &Path) -> Result<()> {
5958
let worker_dir_path = tempdir.to_owned();
6059
try_restrict(&WorkerInfo {
6160
pid: std::process::id(),
@@ -69,7 +68,6 @@ pub fn check_is_fully_enabled(tempdir: &Path) -> Result<()> {
6968
///
7069
/// NOTE: This should not be called in a multi-threaded context. `unshare(2)`:
7170
/// "CLONE_NEWUSER requires that the calling process is not threaded."
72-
#[cfg(target_os = "linux")]
7371
fn try_restrict(worker_info: &WorkerInfo) -> Result<()> {
7472
// TODO: Remove this once this is stable: https://github.com/rust-lang/rust/issues/105723
7573
macro_rules! cstr_ptr {
@@ -78,12 +76,6 @@ fn try_restrict(worker_info: &WorkerInfo) -> Result<()> {
7876
};
7977
}
8078

81-
gum::trace!(
82-
target: LOG_TARGET,
83-
?worker_info,
84-
"unsharing the user namespace and calling pivot_root",
85-
);
86-
8779
let worker_dir_path_c = CString::new(worker_info.worker_dir_path.as_os_str().as_bytes())
8880
.expect("on unix; the path will never contain 0 bytes; qed");
8981

0 commit comments

Comments
 (0)