Skip to content

Commit f293839

Browse files
committed
Random interleaving of benchmark repetitions - the sequel (fixes #1051)
Based on the original implementation by Hai Huang @haih-g) from #1105.
1 parent d17ea66 commit f293839

File tree

10 files changed

+254
-48
lines changed

10 files changed

+254
-48
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,8 @@ too (`-lkstat`).
299299

300300
[Setting the Time Unit](#setting-the-time-unit)
301301

302+
[Random Interleaving](docs/random_interleaving.md)
303+
302304
[User-Requested Performance Counters](docs/perf_counters.md)
303305

304306
[Preventing Optimization](#preventing-optimization)

docs/random_interleaving.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<a name="interleaving" />
2+
3+
# Random Interleaving
4+
5+
[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a
6+
technique to lower run-to-run variance. It randomly interleaves repetitions of a
7+
microbenchmark with repetitions from other microbenchmarks in the same benchmark
8+
test. Data shows it is able to lower run-to-run variance by
9+
[40%](https://github.com/google/benchmark/issues/1051) on average.
10+
11+
To use, you mainly need to set `--benchmark_enable_random_interleaving=true`,
12+
and optionally specify non-zero repetition count `--benchmark_repetitions=9`
13+
and optionally decrease the per-repetition time `--benchmark_min_time=0.1`.

include/benchmark/benchmark.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1472,6 +1472,19 @@ class BenchmarkReporter {
14721472
int64_t max_bytes_used;
14731473
};
14741474

1475+
struct PerFamilyRunReports {
1476+
PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
1477+
1478+
// How many runs will all instances of this benchmark perform?
1479+
int num_runs_total;
1480+
1481+
// How many runs have happened already?
1482+
int num_runs_done;
1483+
1484+
// The reports about (non-errneous!) runs of this family.
1485+
std::vector<BenchmarkReporter::Run> Runs;
1486+
};
1487+
14751488
// Construct a BenchmarkReporter with the output stream set to 'std::cout'
14761489
// and the error stream set to 'std::cerr'
14771490
BenchmarkReporter();

src/benchmark.cc

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@
3333
#include <cstdlib>
3434
#include <fstream>
3535
#include <iostream>
36+
#include <limits>
3637
#include <map>
3738
#include <memory>
39+
#include <random>
3840
#include <string>
3941
#include <thread>
4042
#include <utility>
@@ -73,6 +75,10 @@ DEFINE_double(benchmark_min_time, 0.5);
7375
// standard deviation of the runs will be reported.
7476
DEFINE_int32(benchmark_repetitions, 1);
7577

78+
// If set, enable random interleaving of repetitions of all benchmarks.
79+
// See http://github.com/google/benchmark/issues/1051 for details.
80+
DEFINE_bool(benchmark_enable_random_interleaving, false);
81+
7682
// Report the result of each benchmark repetitions. When 'true' is specified
7783
// only the mean, standard deviation, and other statistics are reported for
7884
// repeated benchmarks. Affects all reporters.
@@ -297,23 +303,69 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
297303
context.name_field_width = name_field_width;
298304

299305
// Keep track of running times of all instances of each benchmark family.
300-
std::map<int /*family_index*/, std::vector<BenchmarkReporter::Run>>
301-
complexity_reports;
306+
std::map<int /*family_index*/, BenchmarkReporter::PerFamilyRunReports>
307+
per_family_reports;
302308

303309
if (display_reporter->ReportContext(context) &&
304310
(!file_reporter || file_reporter->ReportContext(context))) {
305311
FlushStreams(display_reporter);
306312
FlushStreams(file_reporter);
307313

314+
size_t num_repetitions_total = 0;
315+
316+
std::vector<internal::BenchmarkRunner> runners;
317+
runners.reserve(benchmarks.size());
308318
for (const BenchmarkInstance& benchmark : benchmarks) {
309-
std::vector<BenchmarkReporter::Run>* complexity_reports_for_family =
310-
nullptr;
319+
BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
311320
if (benchmark.complexity() != oNone)
312-
complexity_reports_for_family =
313-
&complexity_reports[benchmark.family_index()];
321+
reports_for_family = &per_family_reports[benchmark.family_index()];
322+
323+
runners.emplace_back(benchmark, reports_for_family);
324+
int num_repeats_of_this_instance = runners.back().GetNumRepeats();
325+
num_repetitions_total += num_repeats_of_this_instance;
326+
if (reports_for_family)
327+
reports_for_family->num_runs_total += num_repeats_of_this_instance;
328+
}
329+
assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
330+
331+
std::vector<int> repetition_indices;
332+
repetition_indices.reserve(num_repetitions_total);
333+
for (size_t runner_index = 0, num_runners = runners.size();
334+
runner_index != num_runners; ++runner_index) {
335+
const internal::BenchmarkRunner& runner = runners[runner_index];
336+
std::fill_n(std::back_inserter(repetition_indices),
337+
runner.GetNumRepeats(), runner_index);
338+
}
339+
assert(repetition_indices.size() == num_repetitions_total &&
340+
"Unexpected number of repetition indexes.");
341+
342+
if (FLAGS_benchmark_enable_random_interleaving) {
343+
std::random_device rd;
344+
std::mt19937 g(rd());
345+
std::shuffle(repetition_indices.begin(), repetition_indices.end(), g);
346+
}
314347

315-
RunResults run_results =
316-
RunBenchmark(benchmark, complexity_reports_for_family);
348+
for (size_t repetition_index : repetition_indices) {
349+
internal::BenchmarkRunner& runner = runners[repetition_index];
350+
runner.DoOneRepetition();
351+
if (runner.HasRepeatsRemaining()) continue;
352+
// FIXME: report each repetition separately, not all of them in bulk.
353+
354+
RunResults run_results = runner.GetResults();
355+
356+
// Maybe calculate complexity report
357+
if (BenchmarkReporter::PerFamilyRunReports* reports_for_family =
358+
runner.GetReportsForFamily()) {
359+
if (reports_for_family->num_runs_done ==
360+
reports_for_family->num_runs_total) {
361+
auto additional_run_stats = ComputeBigO(reports_for_family->Runs);
362+
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
363+
additional_run_stats.begin(),
364+
additional_run_stats.end());
365+
per_family_reports.erase(
366+
(int)reports_for_family->Runs.front().family_index);
367+
}
368+
}
317369

318370
Report(display_reporter, file_reporter, run_results);
319371
}
@@ -471,6 +523,7 @@ void PrintUsageAndExit() {
471523
" [--benchmark_filter=<regex>]\n"
472524
" [--benchmark_min_time=<min_time>]\n"
473525
" [--benchmark_repetitions=<num_repetitions>]\n"
526+
" [--benchmark_enable_random_interleaving={true|false}]\n"
474527
" [--benchmark_report_aggregates_only={true|false}]\n"
475528
" [--benchmark_display_aggregates_only={true|false}]\n"
476529
" [--benchmark_format=<console|json|csv>]\n"
@@ -495,6 +548,8 @@ void ParseCommandLineFlags(int* argc, char** argv) {
495548
&FLAGS_benchmark_min_time) ||
496549
ParseInt32Flag(argv[i], "benchmark_repetitions",
497550
&FLAGS_benchmark_repetitions) ||
551+
ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
552+
&FLAGS_benchmark_enable_random_interleaving) ||
498553
ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
499554
&FLAGS_benchmark_report_aggregates_only) ||
500555
ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",

src/benchmark_api_internal.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ class BenchmarkInstance {
3939
IterationCount iterations() const { return iterations_; }
4040
int threads() const { return threads_; }
4141

42-
bool last_benchmark_instance;
43-
4442
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
4543
internal::ThreadManager* manager,
4644
internal::PerfCountersMeasurement* perf_counters_measurement) const;

src/benchmark_register.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,6 @@ bool BenchmarkFamilies::FindBenchmarks(
166166
const auto full_name = instance.name().str();
167167
if ((re.Match(full_name) && !isNegativeFilter) ||
168168
(!re.Match(full_name) && isNegativeFilter)) {
169-
instance.last_benchmark_instance = (&args == &family->args_.back());
170169
benchmarks->push_back(std::move(instance));
171170

172171
++per_family_instance_index;

src/benchmark_runner.cc

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,9 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
143143

144144
BenchmarkRunner::BenchmarkRunner(
145145
const benchmark::internal::BenchmarkInstance& b_,
146-
std::vector<BenchmarkReporter::Run>* complexity_reports_)
146+
BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
147147
: b(b_),
148-
complexity_reports(complexity_reports_),
148+
reports_for_family(reports_for_family_),
149149
min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
150150
repeats(b.repetitions() != 0 ? b.repetitions()
151151
: FLAGS_benchmark_repetitions),
@@ -172,22 +172,6 @@ BenchmarkRunner::BenchmarkRunner(
172172
perf_counters_measurement.IsValid())
173173
<< "Perf counters were requested but could not be set up.";
174174
}
175-
176-
for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
177-
DoOneRepetition(repetition_num);
178-
}
179-
180-
// Calculate additional statistics
181-
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
182-
183-
// Maybe calculate complexity report
184-
if (complexity_reports && b.last_benchmark_instance) {
185-
auto additional_run_stats = ComputeBigO(*complexity_reports);
186-
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
187-
additional_run_stats.begin(),
188-
additional_run_stats.end());
189-
complexity_reports->clear();
190-
}
191175
}
192176

193177
BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
@@ -283,8 +267,10 @@ bool BenchmarkRunner::ShouldReportIterationResults(
283267
((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time());
284268
}
285269

286-
void BenchmarkRunner::DoOneRepetition(int64_t repetition_index) {
287-
const bool is_the_first_repetition = repetition_index == 0;
270+
void BenchmarkRunner::DoOneRepetition() {
271+
assert(HasRepeatsRemaining() && "Already done all repetitions?");
272+
273+
const bool is_the_first_repetition = num_repetitions_done == 0;
288274
IterationResults i;
289275

290276
// We *may* be gradually increasing the length (iteration count)
@@ -337,19 +323,25 @@ void BenchmarkRunner::DoOneRepetition(int64_t repetition_index) {
337323
// Ok, now actualy report.
338324
BenchmarkReporter::Run report =
339325
CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds,
340-
repetition_index, repeats);
326+
num_repetitions_done, repeats);
341327

342-
if (complexity_reports && !report.error_occurred)
343-
complexity_reports->push_back(report);
328+
if (reports_for_family) {
329+
++reports_for_family->num_runs_done;
330+
if (!report.error_occurred) reports_for_family->Runs.push_back(report);
331+
}
344332

345333
run_results.non_aggregates.push_back(report);
334+
335+
++num_repetitions_done;
346336
}
347337

348-
RunResults RunBenchmark(
349-
const benchmark::internal::BenchmarkInstance& b,
350-
std::vector<BenchmarkReporter::Run>* complexity_reports) {
351-
internal::BenchmarkRunner r(b, complexity_reports);
352-
return r.get_results();
338+
RunResults&& BenchmarkRunner::GetResults() {
339+
assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?");
340+
341+
// Calculate additional statistics over the repetitions of this instance.
342+
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
343+
344+
return std::move(run_results);
353345
}
354346

355347
} // end namespace internal

src/benchmark_runner.h

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,20 +50,34 @@ struct RunResults {
5050
class BenchmarkRunner {
5151
public:
5252
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
53-
std::vector<BenchmarkReporter::Run>* complexity_reports_);
53+
BenchmarkReporter::PerFamilyRunReports* reports_for_family);
5454

55-
RunResults&& get_results() { return std::move(run_results); }
55+
int GetNumRepeats() const { return repeats; }
56+
57+
bool HasRepeatsRemaining() const {
58+
return GetNumRepeats() != num_repetitions_done;
59+
}
60+
61+
void DoOneRepetition();
62+
63+
RunResults&& GetResults();
64+
65+
BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const {
66+
return reports_for_family;
67+
};
5668

5769
private:
5870
RunResults run_results;
5971

6072
const benchmark::internal::BenchmarkInstance& b;
61-
std::vector<BenchmarkReporter::Run>* complexity_reports;
73+
BenchmarkReporter::PerFamilyRunReports* reports_for_family;
6274

6375
const double min_time;
6476
const int repeats;
6577
const bool has_explicit_iteration_count;
6678

79+
int num_repetitions_done = 0;
80+
6781
std::vector<std::thread> pool;
6882

6983
IterationCount iters; // preserved between repetitions!
@@ -83,14 +97,8 @@ class BenchmarkRunner {
8397
IterationCount PredictNumItersNeeded(const IterationResults& i) const;
8498

8599
bool ShouldReportIterationResults(const IterationResults& i) const;
86-
87-
void DoOneRepetition(int64_t repetition_index);
88100
};
89101

90-
RunResults RunBenchmark(
91-
const benchmark::internal::BenchmarkInstance& b,
92-
std::vector<BenchmarkReporter::Run>* complexity_reports);
93-
94102
} // namespace internal
95103

96104
} // end namespace benchmark

test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
199199

200200
add_gtest(benchmark_gtest)
201201
add_gtest(benchmark_name_gtest)
202+
add_gtest(benchmark_random_interleaving_gtest)
202203
add_gtest(commandlineflags_gtest)
203204
add_gtest(statistics_gtest)
204205
add_gtest(string_util_gtest)

0 commit comments

Comments
 (0)