Skip to content

Commit d347f38

Browse files
committed
Rework to minimize mmap/munmap
These are fine for single-threaded but were skewing results when doing parallelism.
1 parent e200243 commit d347f38

File tree

1 file changed

+122
-134
lines changed

1 file changed

+122
-134
lines changed

src/main.rs

Lines changed: 122 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use clap::Parser;
22
use nix::sys::mman::{mmap_anonymous, MapFlags, ProtFlags};
33
use rayon::prelude::*;
44
use serde::Serialize;
5+
use std::marker::PhantomData;
56
use std::mem::MaybeUninit;
67
use std::slice;
78
use std::time::{Duration, Instant};
@@ -47,7 +48,6 @@ enum Strategy {
4748
struct BenchArgs {
4849
total_size: usize,
4950
dirty_fraction: f64,
50-
quiet: bool,
5151
threads: usize,
5252
processes: usize,
5353
}
@@ -62,6 +62,72 @@ struct BenchResult {
6262
pub processes: usize,
6363
}
6464

65+
impl BenchResult {
66+
fn new(args: &BenchArgs, strategy: Strategy, duration: Duration) -> Self {
67+
let BenchArgs {
68+
total_size,
69+
dirty_fraction,
70+
threads,
71+
processes,
72+
..
73+
} = *args;
74+
BenchResult {
75+
strategy,
76+
total_size,
77+
dirty_fraction,
78+
duration,
79+
threads,
80+
processes,
81+
}
82+
}
83+
}
84+
85+
struct MemoryRegion<'a> {
86+
ptr: *mut u8,
87+
size: usize,
88+
dirty_pct: f64,
89+
phantom: PhantomData<&'a [u8]>,
90+
}
91+
92+
impl<'a> MemoryRegion<'a> {
93+
pub fn new(size: usize, dirty_pct: f64, force_resident: bool) -> anyhow::Result<Self> {
94+
let prot = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE;
95+
let flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS;
96+
let map = unsafe { mmap_anonymous(None, size.try_into()?, prot, flags) }?;
97+
let map = map.as_ptr() as *mut u8;
98+
99+
if force_resident {
100+
let keep_res_slice = unsafe { slice::from_raw_parts_mut(map, size) };
101+
keep_res_slice.fill(0);
102+
}
103+
104+
Ok(MemoryRegion {
105+
ptr: map,
106+
size,
107+
dirty_pct,
108+
phantom: PhantomData,
109+
})
110+
}
111+
112+
pub fn as_mut_slice(&mut self) -> &'a mut [u8] {
113+
unsafe { slice::from_raw_parts_mut(self.ptr, self.size) }
114+
}
115+
116+
pub fn make_dirty(&mut self) {
117+
let dirty_bytes = (self.size as f64 * self.dirty_pct).round() as usize;
118+
if dirty_bytes > 0 {
119+
let dirty_slice = unsafe { slice::from_raw_parts_mut(self.ptr, dirty_bytes) };
120+
dirty_slice.fill(0xAA);
121+
}
122+
}
123+
}
124+
125+
impl<'a> Drop for MemoryRegion<'a> {
126+
fn drop(&mut self) {
127+
unsafe { libc::munmap(self.ptr as *mut libc::c_void, self.size) };
128+
}
129+
}
130+
65131
macro_rules! qprintln {
66132
($condition:expr, $($arg:tt)*) => {
67133
if !$condition {
@@ -99,7 +165,6 @@ fn main() -> anyhow::Result<()> {
99165
let bench_args = BenchArgs {
100166
total_size,
101167
dirty_fraction,
102-
quiet,
103168
threads: args.threads,
104169
processes: args.processes,
105170
};
@@ -124,174 +189,97 @@ fn main() -> anyhow::Result<()> {
124189
);
125190
qprintln!(quiet, "------------------------------\n");
126191

127-
rayon::ThreadPoolBuilder::new()
128-
.num_threads(args.threads)
129-
.build_global()?;
192+
// we want to reduce the number of new regions we create
193+
// while still creating enough work to be meaningful
194+
let do_memset = || -> anyhow::Result<Vec<BenchResult>> {
195+
let mut region = MemoryRegion::new(total_size, args.dirty_fraction, true)?;
196+
(0..args.iterations)
197+
.map(|_i| run_benchmark_memset(&bench_args, &mut region))
198+
.collect::<anyhow::Result<Vec<BenchResult>>>()
199+
};
130200

131-
let results = if args.threads > 1 {
201+
let do_madvise = || {
202+
let mut region = MemoryRegion::new(total_size, args.dirty_fraction, false)?;
132203
(0..args.iterations)
133-
.into_par_iter()
134-
.map(|_i| {
135-
[
136-
run_benchmark_memset(&bench_args),
137-
run_benchmark_madvise(&bench_args),
138-
run_benchmark_pagemap_scan(&bench_args),
139-
]
140-
})
141-
.flatten()
142-
.collect::<anyhow::Result<Vec<BenchResult>>>()?
143-
} else {
204+
.map(|_i| run_benchmark_madvise(&bench_args, &mut region))
205+
.collect::<anyhow::Result<Vec<BenchResult>>>()
206+
};
207+
208+
let do_pagemap_scan = || {
209+
let mut region = MemoryRegion::new(total_size, args.dirty_fraction, false)?;
144210
(0..args.iterations)
145-
.into_iter()
146-
.map(|_i| {
147-
[
148-
run_benchmark_memset(&bench_args),
149-
run_benchmark_madvise(&bench_args),
150-
run_benchmark_pagemap_scan(&bench_args),
151-
]
152-
})
153-
.flatten()
154-
.collect::<anyhow::Result<Vec<BenchResult>>>()?
211+
.map(|_i| run_benchmark_pagemap_scan(&bench_args, &mut region))
212+
.collect::<anyhow::Result<Vec<BenchResult>>>()
155213
};
156214

215+
let results: Vec<BenchResult> = (0..args.threads)
216+
.into_par_iter()
217+
.map(|_| [do_memset(), do_madvise(), do_pagemap_scan()])
218+
.flatten()
219+
.flatten()
220+
.flatten()
221+
.collect();
222+
157223
if args.json {
158224
println!("{}", serde_json::to_string(&results)?);
159225
}
160226

161227
Ok(())
162228
}
163229

164-
/// Allocates and dirties memory for a test scenario.
165-
fn setup_memory(total_size: usize, dirty_fraction: f64, warmup: bool) -> anyhow::Result<*mut u8> {
166-
let prot = ProtFlags::PROT_READ | ProtFlags::PROT_WRITE;
167-
let flags = MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS;
168-
let map = unsafe { mmap_anonymous(None, total_size.try_into()?, prot, flags) }?;
169-
let map = map.as_ptr() as *mut u8;
170-
171-
if warmup {
172-
let keep_res_slice = unsafe { slice::from_raw_parts_mut(map, total_size) };
173-
keep_res_slice.fill(0);
174-
}
175-
176-
// Dirty a fraction of the memory
177-
let dirty_bytes = (total_size as f64 * dirty_fraction).round() as usize;
178-
if dirty_bytes > 0 {
179-
let dirty_slice = unsafe { slice::from_raw_parts_mut(map, dirty_bytes) };
180-
dirty_slice.fill(1); // Write something to make pages dirty
181-
}
182-
183-
Ok(map)
184-
}
185-
186-
fn run_benchmark_memset(args: &BenchArgs) -> anyhow::Result<BenchResult> {
187-
let BenchArgs {
188-
total_size,
189-
dirty_fraction,
190-
quiet,
191-
threads,
192-
processes,
193-
} = *args;
194-
qprintln!(quiet, "Scenario 1: Naive memset on all pages");
195-
let map = setup_memory(total_size, dirty_fraction, true)?;
196-
let slice = unsafe { slice::from_raw_parts_mut(map, total_size) };
197-
230+
fn run_benchmark_memset(
231+
args: &BenchArgs,
232+
region: &mut MemoryRegion,
233+
) -> anyhow::Result<BenchResult> {
234+
region.make_dirty();
198235
let start = Instant::now();
199-
slice.fill(0);
236+
region.as_mut_slice().fill(0);
200237
let duration = start.elapsed();
201238

202-
qprintln!(quiet, " Zeroed all {} bytes.", total_size);
203-
qprintln!(quiet, " Time taken: {:?}\n", duration);
204-
205-
unsafe { libc::munmap(map as *mut libc::c_void, total_size) };
206-
207-
Ok(BenchResult {
208-
strategy: Strategy::MemZero,
209-
total_size,
210-
dirty_fraction,
211-
duration,
212-
threads,
213-
processes,
214-
})
239+
Ok(BenchResult::new(args, Strategy::MemZero, duration))
215240
}
216241

217-
fn run_benchmark_madvise(args: &BenchArgs) -> anyhow::Result<BenchResult> {
218-
let BenchArgs {
219-
total_size,
220-
dirty_fraction,
221-
quiet,
222-
threads,
223-
processes,
224-
} = *args;
225-
qprintln!(quiet, "Scenario 2: use madvise on all pages");
226-
let map = setup_memory(total_size, dirty_fraction, false)?;
227-
242+
fn run_benchmark_madvise(
243+
args: &BenchArgs,
244+
region: &mut MemoryRegion,
245+
) -> anyhow::Result<BenchResult> {
246+
region.make_dirty();
228247
let start = Instant::now();
229-
let ret = unsafe { libc::madvise(map as *mut libc::c_void, total_size, libc::MADV_DONTNEED) };
248+
let ret = unsafe {
249+
libc::madvise(
250+
region.ptr as *mut libc::c_void,
251+
args.total_size,
252+
libc::MADV_DONTNEED,
253+
)
254+
};
230255
let duration = start.elapsed();
231256

232257
if ret != 0 {
233258
return Err(std::io::Error::last_os_error().into());
234259
}
235260

236-
qprintln!(quiet, " Called madvise on {total_size} bytes.");
237-
qprintln!(quiet, " Time taken: {:?}\n", duration);
238-
239-
unsafe { libc::munmap(map as *mut libc::c_void, total_size) };
240-
241-
Ok(BenchResult {
242-
strategy: Strategy::Madvise,
243-
total_size,
244-
dirty_fraction,
245-
duration,
246-
threads,
247-
processes,
248-
})
261+
Ok(BenchResult::new(args, Strategy::Madvise, duration))
249262
}
250263

251-
fn run_benchmark_pagemap_scan(args: &BenchArgs) -> anyhow::Result<BenchResult> {
252-
let BenchArgs {
253-
total_size,
254-
dirty_fraction,
255-
quiet,
256-
threads,
257-
processes,
258-
} = *args;
259-
qprintln!(quiet, "Scenario 3: Only memset dirty pages");
260-
assert_eq!(total_size % rustix::param::page_size(), 0);
261-
let map = setup_memory(total_size, dirty_fraction, false)?;
262-
let pages = total_size / rustix::param::page_size();
264+
fn run_benchmark_pagemap_scan(
265+
args: &BenchArgs,
266+
region: &mut MemoryRegion,
267+
) -> anyhow::Result<BenchResult> {
268+
let pages = args.total_size / rustix::param::page_size();
263269

270+
region.make_dirty();
264271
let start = Instant::now();
265272

266273
let mut regions: Box<[MaybeUninit<pagemap::PageRegion>]> = Box::new_uninit_slice(pages);
267-
let dirty_pages = pagemap::dirty_pages_in_region(map, total_size, regions.as_mut())?;
268-
269-
let mut total_zeroed = 0;
274+
let dirty_pages =
275+
pagemap::dirty_pages_in_region(region.ptr, args.total_size, regions.as_mut())?;
270276
for dirty_region in dirty_pages.regions {
271277
let start_ptr = dirty_region.start as *mut u8;
272278
let len = usize::try_from(dirty_region.end - dirty_region.start)?;
273279
let region_slice = unsafe { slice::from_raw_parts_mut(start_ptr, len) };
274280
region_slice.fill(0);
275-
total_zeroed += len;
276281
}
277282
let duration = start.elapsed();
278283

279-
qprintln!(
280-
quiet,
281-
" Found {} dirty page ranges.",
282-
dirty_pages.regions.len()
283-
);
284-
qprintln!(quiet, " Zeroed {} bytes.", total_zeroed);
285-
qprintln!(quiet, " Time taken: {:?}\n", duration);
286-
287-
unsafe { libc::munmap(map as *mut libc::c_void, total_size) };
288-
289-
Ok(BenchResult {
290-
strategy: Strategy::PagemapScan,
291-
total_size,
292-
dirty_fraction,
293-
duration,
294-
threads,
295-
processes,
296-
})
284+
Ok(BenchResult::new(args, Strategy::PagemapScan, duration))
297285
}

0 commit comments

Comments
 (0)