@@ -2,6 +2,7 @@ use clap::Parser;
22use nix:: sys:: mman:: { mmap_anonymous, MapFlags , ProtFlags } ;
33use rayon:: prelude:: * ;
44use serde:: Serialize ;
5+ use std:: marker:: PhantomData ;
56use std:: mem:: MaybeUninit ;
67use std:: slice;
78use std:: time:: { Duration , Instant } ;
@@ -47,7 +48,6 @@ enum Strategy {
4748struct BenchArgs {
4849 total_size : usize ,
4950 dirty_fraction : f64 ,
50- quiet : bool ,
5151 threads : usize ,
5252 processes : usize ,
5353}
@@ -62,6 +62,72 @@ struct BenchResult {
6262 pub processes : usize ,
6363}
6464
65+ impl BenchResult {
66+ fn new ( args : & BenchArgs , strategy : Strategy , duration : Duration ) -> Self {
67+ let BenchArgs {
68+ total_size,
69+ dirty_fraction,
70+ threads,
71+ processes,
72+ ..
73+ } = * args;
74+ BenchResult {
75+ strategy,
76+ total_size,
77+ dirty_fraction,
78+ duration,
79+ threads,
80+ processes,
81+ }
82+ }
83+ }
84+
85+ struct MemoryRegion < ' a > {
86+ ptr : * mut u8 ,
87+ size : usize ,
88+ dirty_pct : f64 ,
89+ phantom : PhantomData < & ' a [ u8 ] > ,
90+ }
91+
92+ impl < ' a > MemoryRegion < ' a > {
93+ pub fn new ( size : usize , dirty_pct : f64 , force_resident : bool ) -> anyhow:: Result < Self > {
94+ let prot = ProtFlags :: PROT_READ | ProtFlags :: PROT_WRITE ;
95+ let flags = MapFlags :: MAP_PRIVATE | MapFlags :: MAP_ANONYMOUS ;
96+ let map = unsafe { mmap_anonymous ( None , size. try_into ( ) ?, prot, flags) } ?;
97+ let map = map. as_ptr ( ) as * mut u8 ;
98+
99+ if force_resident {
100+ let keep_res_slice = unsafe { slice:: from_raw_parts_mut ( map, size) } ;
101+ keep_res_slice. fill ( 0 ) ;
102+ }
103+
104+ Ok ( MemoryRegion {
105+ ptr : map,
106+ size,
107+ dirty_pct,
108+ phantom : PhantomData ,
109+ } )
110+ }
111+
112+ pub fn as_mut_slice ( & mut self ) -> & ' a mut [ u8 ] {
113+ unsafe { slice:: from_raw_parts_mut ( self . ptr , self . size ) }
114+ }
115+
116+ pub fn make_dirty ( & mut self ) {
117+ let dirty_bytes = ( self . size as f64 * self . dirty_pct ) . round ( ) as usize ;
118+ if dirty_bytes > 0 {
119+ let dirty_slice = unsafe { slice:: from_raw_parts_mut ( self . ptr , dirty_bytes) } ;
120+ dirty_slice. fill ( 0xAA ) ;
121+ }
122+ }
123+ }
124+
125+ impl < ' a > Drop for MemoryRegion < ' a > {
126+ fn drop ( & mut self ) {
127+ unsafe { libc:: munmap ( self . ptr as * mut libc:: c_void , self . size ) } ;
128+ }
129+ }
130+
65131macro_rules! qprintln {
66132 ( $condition: expr, $( $arg: tt) * ) => {
67133 if !$condition {
@@ -99,7 +165,6 @@ fn main() -> anyhow::Result<()> {
99165 let bench_args = BenchArgs {
100166 total_size,
101167 dirty_fraction,
102- quiet,
103168 threads : args. threads ,
104169 processes : args. processes ,
105170 } ;
@@ -124,174 +189,97 @@ fn main() -> anyhow::Result<()> {
124189 ) ;
125190 qprintln ! ( quiet, "------------------------------\n " ) ;
126191
127- rayon:: ThreadPoolBuilder :: new ( )
128- . num_threads ( args. threads )
129- . build_global ( ) ?;
192+ // we want to reduce the number of new regions we create
193+ // while still creating enough work to be meaningful
194+ let do_memset = || -> anyhow:: Result < Vec < BenchResult > > {
195+ let mut region = MemoryRegion :: new ( total_size, args. dirty_fraction , true ) ?;
196+ ( 0 ..args. iterations )
197+ . map ( |_i| run_benchmark_memset ( & bench_args, & mut region) )
198+ . collect :: < anyhow:: Result < Vec < BenchResult > > > ( )
199+ } ;
130200
131- let results = if args. threads > 1 {
201+ let do_madvise = || {
202+ let mut region = MemoryRegion :: new ( total_size, args. dirty_fraction , false ) ?;
132203 ( 0 ..args. iterations )
133- . into_par_iter ( )
134- . map ( |_i| {
135- [
136- run_benchmark_memset ( & bench_args) ,
137- run_benchmark_madvise ( & bench_args) ,
138- run_benchmark_pagemap_scan ( & bench_args) ,
139- ]
140- } )
141- . flatten ( )
142- . collect :: < anyhow:: Result < Vec < BenchResult > > > ( ) ?
143- } else {
204+ . map ( |_i| run_benchmark_madvise ( & bench_args, & mut region) )
205+ . collect :: < anyhow:: Result < Vec < BenchResult > > > ( )
206+ } ;
207+
208+ let do_pagemap_scan = || {
209+ let mut region = MemoryRegion :: new ( total_size, args. dirty_fraction , false ) ?;
144210 ( 0 ..args. iterations )
145- . into_iter ( )
146- . map ( |_i| {
147- [
148- run_benchmark_memset ( & bench_args) ,
149- run_benchmark_madvise ( & bench_args) ,
150- run_benchmark_pagemap_scan ( & bench_args) ,
151- ]
152- } )
153- . flatten ( )
154- . collect :: < anyhow:: Result < Vec < BenchResult > > > ( ) ?
211+ . map ( |_i| run_benchmark_pagemap_scan ( & bench_args, & mut region) )
212+ . collect :: < anyhow:: Result < Vec < BenchResult > > > ( )
155213 } ;
156214
215+ let results: Vec < BenchResult > = ( 0 ..args. threads )
216+ . into_par_iter ( )
217+ . map ( |_| [ do_memset ( ) , do_madvise ( ) , do_pagemap_scan ( ) ] )
218+ . flatten ( )
219+ . flatten ( )
220+ . flatten ( )
221+ . collect ( ) ;
222+
157223 if args. json {
158224 println ! ( "{}" , serde_json:: to_string( & results) ?) ;
159225 }
160226
161227 Ok ( ( ) )
162228}
163229
164- /// Allocates and dirties memory for a test scenario.
165- fn setup_memory ( total_size : usize , dirty_fraction : f64 , warmup : bool ) -> anyhow:: Result < * mut u8 > {
166- let prot = ProtFlags :: PROT_READ | ProtFlags :: PROT_WRITE ;
167- let flags = MapFlags :: MAP_PRIVATE | MapFlags :: MAP_ANONYMOUS ;
168- let map = unsafe { mmap_anonymous ( None , total_size. try_into ( ) ?, prot, flags) } ?;
169- let map = map. as_ptr ( ) as * mut u8 ;
170-
171- if warmup {
172- let keep_res_slice = unsafe { slice:: from_raw_parts_mut ( map, total_size) } ;
173- keep_res_slice. fill ( 0 ) ;
174- }
175-
176- // Dirty a fraction of the memory
177- let dirty_bytes = ( total_size as f64 * dirty_fraction) . round ( ) as usize ;
178- if dirty_bytes > 0 {
179- let dirty_slice = unsafe { slice:: from_raw_parts_mut ( map, dirty_bytes) } ;
180- dirty_slice. fill ( 1 ) ; // Write something to make pages dirty
181- }
182-
183- Ok ( map)
184- }
185-
186- fn run_benchmark_memset ( args : & BenchArgs ) -> anyhow:: Result < BenchResult > {
187- let BenchArgs {
188- total_size,
189- dirty_fraction,
190- quiet,
191- threads,
192- processes,
193- } = * args;
194- qprintln ! ( quiet, "Scenario 1: Naive memset on all pages" ) ;
195- let map = setup_memory ( total_size, dirty_fraction, true ) ?;
196- let slice = unsafe { slice:: from_raw_parts_mut ( map, total_size) } ;
197-
230+ fn run_benchmark_memset (
231+ args : & BenchArgs ,
232+ region : & mut MemoryRegion ,
233+ ) -> anyhow:: Result < BenchResult > {
234+ region. make_dirty ( ) ;
198235 let start = Instant :: now ( ) ;
199- slice . fill ( 0 ) ;
236+ region . as_mut_slice ( ) . fill ( 0 ) ;
200237 let duration = start. elapsed ( ) ;
201238
202- qprintln ! ( quiet, " Zeroed all {} bytes." , total_size) ;
203- qprintln ! ( quiet, " Time taken: {:?}\n " , duration) ;
204-
205- unsafe { libc:: munmap ( map as * mut libc:: c_void , total_size) } ;
206-
207- Ok ( BenchResult {
208- strategy : Strategy :: MemZero ,
209- total_size,
210- dirty_fraction,
211- duration,
212- threads,
213- processes,
214- } )
239+ Ok ( BenchResult :: new ( args, Strategy :: MemZero , duration) )
215240}
216241
217- fn run_benchmark_madvise ( args : & BenchArgs ) -> anyhow:: Result < BenchResult > {
218- let BenchArgs {
219- total_size,
220- dirty_fraction,
221- quiet,
222- threads,
223- processes,
224- } = * args;
225- qprintln ! ( quiet, "Scenario 2: use madvise on all pages" ) ;
226- let map = setup_memory ( total_size, dirty_fraction, false ) ?;
227-
242+ fn run_benchmark_madvise (
243+ args : & BenchArgs ,
244+ region : & mut MemoryRegion ,
245+ ) -> anyhow:: Result < BenchResult > {
246+ region. make_dirty ( ) ;
228247 let start = Instant :: now ( ) ;
229- let ret = unsafe { libc:: madvise ( map as * mut libc:: c_void , total_size, libc:: MADV_DONTNEED ) } ;
248+ let ret = unsafe {
249+ libc:: madvise (
250+ region. ptr as * mut libc:: c_void ,
251+ args. total_size ,
252+ libc:: MADV_DONTNEED ,
253+ )
254+ } ;
230255 let duration = start. elapsed ( ) ;
231256
232257 if ret != 0 {
233258 return Err ( std:: io:: Error :: last_os_error ( ) . into ( ) ) ;
234259 }
235260
236- qprintln ! ( quiet, " Called madvise on {total_size} bytes." ) ;
237- qprintln ! ( quiet, " Time taken: {:?}\n " , duration) ;
238-
239- unsafe { libc:: munmap ( map as * mut libc:: c_void , total_size) } ;
240-
241- Ok ( BenchResult {
242- strategy : Strategy :: Madvise ,
243- total_size,
244- dirty_fraction,
245- duration,
246- threads,
247- processes,
248- } )
261+ Ok ( BenchResult :: new ( args, Strategy :: Madvise , duration) )
249262}
250263
251- fn run_benchmark_pagemap_scan ( args : & BenchArgs ) -> anyhow:: Result < BenchResult > {
252- let BenchArgs {
253- total_size,
254- dirty_fraction,
255- quiet,
256- threads,
257- processes,
258- } = * args;
259- qprintln ! ( quiet, "Scenario 3: Only memset dirty pages" ) ;
260- assert_eq ! ( total_size % rustix:: param:: page_size( ) , 0 ) ;
261- let map = setup_memory ( total_size, dirty_fraction, false ) ?;
262- let pages = total_size / rustix:: param:: page_size ( ) ;
264+ fn run_benchmark_pagemap_scan (
265+ args : & BenchArgs ,
266+ region : & mut MemoryRegion ,
267+ ) -> anyhow:: Result < BenchResult > {
268+ let pages = args. total_size / rustix:: param:: page_size ( ) ;
263269
270+ region. make_dirty ( ) ;
264271 let start = Instant :: now ( ) ;
265272
266273 let mut regions: Box < [ MaybeUninit < pagemap:: PageRegion > ] > = Box :: new_uninit_slice ( pages) ;
267- let dirty_pages = pagemap:: dirty_pages_in_region ( map, total_size, regions. as_mut ( ) ) ?;
268-
269- let mut total_zeroed = 0 ;
274+ let dirty_pages =
275+ pagemap:: dirty_pages_in_region ( region. ptr , args. total_size , regions. as_mut ( ) ) ?;
270276 for dirty_region in dirty_pages. regions {
271277 let start_ptr = dirty_region. start as * mut u8 ;
272278 let len = usize:: try_from ( dirty_region. end - dirty_region. start ) ?;
273279 let region_slice = unsafe { slice:: from_raw_parts_mut ( start_ptr, len) } ;
274280 region_slice. fill ( 0 ) ;
275- total_zeroed += len;
276281 }
277282 let duration = start. elapsed ( ) ;
278283
279- qprintln ! (
280- quiet,
281- " Found {} dirty page ranges." ,
282- dirty_pages. regions. len( )
283- ) ;
284- qprintln ! ( quiet, " Zeroed {} bytes." , total_zeroed) ;
285- qprintln ! ( quiet, " Time taken: {:?}\n " , duration) ;
286-
287- unsafe { libc:: munmap ( map as * mut libc:: c_void , total_size) } ;
288-
289- Ok ( BenchResult {
290- strategy : Strategy :: PagemapScan ,
291- total_size,
292- dirty_fraction,
293- duration,
294- threads,
295- processes,
296- } )
284+ Ok ( BenchResult :: new ( args, Strategy :: PagemapScan , duration) )
297285}
0 commit comments