Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
25a5fdf
Implementation of random interleaving. See
haihuang-ml Mar 28, 2021
f603351
Fix benchmark_random_interleaving_gtest.cc for fr-1051
haihuang-ml Mar 28, 2021
58f3a39
Fix macos build for fr-1051
haihuang-ml Mar 28, 2021
89d3a78
Fix macos and windows build for fr-1051.
haihuang-ml Mar 28, 2021
0aadade
Fix benchmark_random_interleaving_test.cc for macos and windows in fr…
haihuang-ml Mar 28, 2021
f42e87c
Fix int type benchmark_random_interleaving_gtest for macos in fr-1051
haihuang-ml Mar 28, 2021
8660ae1
Address dominichamon's comments 03/29 for fr-1051
haihuang-ml Mar 30, 2021
4640ed4
Address dominichamon's comment on default min_time / repetitions for …
haihuang-ml Mar 31, 2021
1a2b6df
Fix windows test failures for fr-1051
haihuang-ml Apr 2, 2021
81c9ab0
Add license blurb for fr-1051.
haihuang-ml Apr 13, 2021
70fe24a
Switch to std::shuffle() for fr-1105.
haihuang-ml Apr 19, 2021
4966e90
Change to 1e-9 in fr-1105
haihuang-ml Apr 29, 2021
688eaff
Merge branch 'master' into fr-1051
haihuang-ml Apr 29, 2021
9399f14
Fix broken build caused by bad merge for fr-1105.
haihuang-ml Apr 29, 2021
5923bf6
Merge branch 'master' into fr-1051
dominichamon May 6, 2021
c51e52c
Merge branch 'master' into fr-1051
dominichamon May 10, 2021
bb8e0e9
Fix build breakage for fr-1051.
haihuang-ml May 10, 2021
32e86fc
Print out reports as they come in if random interleaving is disabled …
haihuang-ml May 10, 2021
1bc2173
size_t, int64_t --> int in benchmark_runner for fr-1051.
haihuang-ml May 12, 2021
ce7220a
Address comments from dominichamon for fr-1051
haihuang-ml May 14, 2021
81ac7fe
benchmar_indices --> size_t to make CI pass: fr-1051
haihuang-ml May 16, 2021
086b15e
Fix min_time not initialized issue for fr-1051.
haihuang-ml May 16, 2021
ee70382
min_time --> MinTime in fr-1051.
haihuang-ml May 18, 2021
4bdafa9
Merge branch 'master' into fr-1051
dominichamon May 19, 2021
736875b
Add doc for random interleaving for fr-1051
haihuang-ml May 20, 2021
d340447
Merge branch 'fr-1051' of github.com:haih-g/benchmark into fr-1051
haihuang-ml May 20, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions include/benchmark/benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ struct Statistics {
: name_(name), compute_(compute) {}
};

struct BenchmarkInstance;
class BenchmarkInstance;
class ThreadTimer;
class ThreadManager;

Expand Down Expand Up @@ -686,7 +686,7 @@ class State {
internal::ThreadTimer* timer_;
internal::ThreadManager* manager_;

friend struct internal::BenchmarkInstance;
friend class internal::BenchmarkInstance;
};

inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
Expand Down Expand Up @@ -961,6 +961,7 @@ class Benchmark {

private:
friend class BenchmarkFamilies;
friend class BenchmarkInstance;

std::string name_;
AggregationReportMode aggregation_report_mode_;
Expand Down
156 changes: 127 additions & 29 deletions src/benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <random>
#include <string>
#include <thread>
#include <utility>
Expand All @@ -51,6 +53,18 @@
#include "thread_manager.h"
#include "thread_timer.h"

// Each benchmark can be repeated a number of times, and within each
// *repetition*, we run the user-defined benchmark function a number of
// *iterations*. The number of repetitions is determined based on flags
// (--benchmark_repetitions).
namespace {

// Attempt to make each repetition run for at least this much of time.
constexpr double kDefaultMinTimeTotal = 0.5;
constexpr size_t kDefaultRepetitions = 12;

} // namespace

// Print a list of benchmarks. This option overrides all other options.
DEFINE_bool(benchmark_list_tests, false);

Expand All @@ -59,16 +73,32 @@ DEFINE_bool(benchmark_list_tests, false);
// linked into the binary are run.
DEFINE_string(benchmark_filter, ".");

// Minimum number of seconds we should run benchmark before results are
// considered significant. For cpu-time based tests, this is the lower bound
// on the total cpu time used by all threads that make up the test. For
// real-time based tests, this is the lower bound on the elapsed time of the
// benchmark execution, regardless of number of threads.
DEFINE_double(benchmark_min_time, 0.5);
// Minimum number of seconds we should run benchmark per repetition before
// results are considered significant. For cpu-time based tests, this is the
// lower bound on the total cpu time used by all threads that make up the test.
// For real-time based tests, this is the lower bound on the elapsed time of the
// benchmark execution, regardless of number of threads. If left unset, will use
// 0.5 / 12 if random interleaving is enabled. Otherwise, will use 0.5.
// Do NOT read this flag directly. Use GetMinTime() to read this flag.
DEFINE_double(benchmark_min_time, -1.0);

// The number of runs of each benchmark. If greater than 1, the mean and
// standard deviation of the runs will be reported.
DEFINE_int32(benchmark_repetitions, 1);
// standard deviation of the runs will be reported. By default, the number of
// repetitions is 1 if random interleaving is disabled, and up to 12 if random
// interleaving is enabled. (Read the documentation for random interleaving to
// see why it might be less than 12.)
// Do NOT read this flag directly, Use GetRepetitions() to access this flag.
DEFINE_int32(benchmark_repetitions, -1);

// The maximum overhead allowed for random interleaving. A value X means total
// execution time under random interleaving is limited by
// (1 + X) * original total execution time. Set to 'inf' to allow infinite
// overhead.
DEFINE_double(benchmark_random_interleaving_max_overhead, 0.4);

// If set, enable random interleaving. See
// http://github.com/google/benchmark/issues/1051 for details.
DEFINE_bool(benchmark_enable_random_interleaving, false);

// Report the result of each benchmark repetitions. When 'true' is specified
// only the mean, standard deviation, and other statistics are reported for
Expand Down Expand Up @@ -110,6 +140,37 @@ namespace benchmark {

namespace internal {

// Performance measurements always come with random variances. Defines a
// factor by which the required number of iterations is overestimated in order
// to reduce the probability that the minimum time requirement will not be met.
const double kSafetyMultiplier = 1.4;

// Wraps --benchmark_min_time and returns valid default values if not supplied.
double GetMinTime() {
const double min_time = FLAGS_benchmark_min_time;
if (min_time >= 0.0) {
return min_time;
}

if (FLAGS_benchmark_enable_random_interleaving) {
return kDefaultMinTimeTotal / kDefaultRepetitions;
}
return kDefaultMinTimeTotal;
}

// Wraps --benchmark_repetitions and return valid default value if not supplied.
size_t GetRepetitions() {
const int repetitions = FLAGS_benchmark_repetitions;
if (repetitions >= 0) {
return static_cast<size_t>(repetitions);
}

if (FLAGS_benchmark_enable_random_interleaving) {
return kDefaultRepetitions;
}
return 1;
}

// FIXME: wouldn't LTO mess this up?
void UseCharPointer(char const volatile*) {}

Expand Down Expand Up @@ -222,15 +283,15 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
CHECK(display_reporter != nullptr);

// Determine the width of the name field using a minimum width of 10.
bool might_have_aggregates = FLAGS_benchmark_repetitions > 1;
bool might_have_aggregates = GetRepetitions() > 1;
size_t name_field_width = 10;
size_t stat_field_width = 0;
for (const BenchmarkInstance& benchmark : benchmarks) {
name_field_width =
std::max<size_t>(name_field_width, benchmark.name.str().size());
might_have_aggregates |= benchmark.repetitions > 1;
std::max<size_t>(name_field_width, benchmark.name().str().size());
might_have_aggregates |= benchmark.repetitions() > 1;

for (const auto& Stat : *benchmark.statistics)
for (const auto& Stat : *benchmark.statistics())
stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
}
if (might_have_aggregates) name_field_width += 1 + stat_field_width;
Expand All @@ -255,23 +316,56 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
flushStreams(display_reporter);
flushStreams(file_reporter);

for (const auto& benchmark : benchmarks) {
RunResults run_results = RunBenchmark(benchmark, &complexity_reports);

auto report = [&run_results](BenchmarkReporter* reporter,
bool report_aggregates_only) {
assert(reporter);
// If there are no aggregates, do output non-aggregates.
report_aggregates_only &= !run_results.aggregates_only.empty();
if (!report_aggregates_only)
reporter->ReportRuns(run_results.non_aggregates);
if (!run_results.aggregates_only.empty())
reporter->ReportRuns(run_results.aggregates_only);
};

report(display_reporter, run_results.display_report_aggregates_only);
// Without random interleaving, benchmarks are executed in the order of:
// A, A, ..., A, B, B, ..., B, C, C, ..., C, ...
// That is, repetition is within RunBenchmark(), hence the name
// inner_repetitions.
// With random interleaving, benchmarks are executed in the order of:
// {Random order of A, B, C, ...}, {Random order of A, B, C, ...}, ...
// That is, repetitions is outside of RunBenchmark(), hence the name
// outer_repetitions.
size_t inner_repetitions =
FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions();
size_t outer_repetitions =
FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1;
std::vector<size_t> benchmark_indices(benchmarks.size());
for (size_t i = 0; i < benchmarks.size(); ++i) {
benchmark_indices[i] = i;
}

// 'run_results_vector' and 'benchmarks' are parallel arrays.
std::vector<RunResults> run_results_vector(benchmarks.size());
for (size_t i = 0; i < outer_repetitions; i++) {
if (FLAGS_benchmark_enable_random_interleaving) {
std::random_shuffle(benchmark_indices.begin(), benchmark_indices.end());
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::random_shuffle uses std::rand and was deprecated in C++11 and removed in C++17. std::shuffle is its replacement. See https://en.cppreference.com/w/cpp/algorithm/random_shuffle

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}
for (size_t j : benchmark_indices) {
// Repetitions will be automatically adjusted under random interleaving.
if (!FLAGS_benchmark_enable_random_interleaving ||
i < benchmarks[j].random_interleaving_repetitions()) {
RunBenchmark(benchmarks[j], outer_repetitions, inner_repetitions,
&complexity_reports, &run_results_vector[j]);
}
}
}

auto report = [](BenchmarkReporter* reporter, bool report_aggregates_only,
const RunResults& run_results) {
assert(reporter);
// If there are no aggregates, do output non-aggregates.
report_aggregates_only &= !run_results.aggregates_only.empty();
if (!report_aggregates_only)
reporter->ReportRuns(run_results.non_aggregates);
if (!run_results.aggregates_only.empty())
reporter->ReportRuns(run_results.aggregates_only);
};

for (const RunResults& run_results : run_results_vector) {
report(display_reporter, run_results.display_report_aggregates_only,
run_results);
if (file_reporter)
report(file_reporter, run_results.file_report_aggregates_only);
report(file_reporter, run_results.file_report_aggregates_only,
run_results);

flushStreams(display_reporter);
flushStreams(file_reporter);
Expand Down Expand Up @@ -399,7 +493,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,

if (FLAGS_benchmark_list_tests) {
for (auto const& benchmark : benchmarks)
Out << benchmark.name.str() << "\n";
Out << benchmark.name().str() << "\n";
} else {
internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
}
Expand Down Expand Up @@ -443,6 +537,10 @@ void ParseCommandLineFlags(int* argc, char** argv) {
&FLAGS_benchmark_min_time) ||
ParseInt32Flag(argv[i], "benchmark_repetitions",
&FLAGS_benchmark_repetitions) ||
ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
&FLAGS_benchmark_enable_random_interleaving) ||
ParseDoubleFlag(argv[i], "benchmark_random_interleaving_max_overhead",
&FLAGS_benchmark_random_interleaving_max_overhead) ||
ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
&FLAGS_benchmark_report_aggregates_only) ||
ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
Expand Down
111 changes: 111 additions & 0 deletions src/benchmark_adjust_repetitions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#include "benchmark_adjust_repetitions.h"

#include "benchmark_api_internal.h"
#include "log.h"

namespace benchmark {
namespace internal {

namespace {

constexpr double kNanosecondInSecond = 0.000000001;
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

= 1e-9

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


} // namespace

size_t ComputeRandomInterleavingRepetitions(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be returning int rather than size_t.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

InternalRandomInterleavingRepetitionsInput input) {
// Find the repetitions such that total overhead is bounded. Let
// n = desired number of repetitions, i.e., the output of this method.
// t = total real execution time per repetition including overhead,
// (input.total_execution_time_per_repetition).
// o = maximum allowed increase in total real execution time due to random
// interleaving, measured as a fraction (input.max_overhead).
// e = estimated total execution time without Random Interleaving
// We want
// t * n / e <= 1 + o
// I.e.,
// n <= (1 + o) * e / t
//
// Let
// h = overhead per repetition, which include all setup / teardown time and
// also the execution time of preliminary trials used to search for the
// correct number of iterations.
// r = real execution time per repetition not including overhead
// (input.real_accumulated_time_per_repetition).
// s = measured execution time per repetition not including overhead,
// which can be either real or CPU time
// (input.accumulated_time_per_repetition).
// We have
// h = t - r
//
// Let
// m = total minimum measured execution time for all repetitions
// (input.min_time_per_repetition * input.max_repetitions).
// Let
// f = m / s
// f is the scale factor between m and s, and will be used to estimate
// l, the total real execution time for all repetitions excluding the
// overhead. It's reasonable to assume that the real execution time excluding
// the overhead is proportional to the measured time. Hence we expect to see
// l / r to be equal to m / s. That is, l / r = f, thus, l = r * f. Then the
// total execution time e can be estimated by h + l, which is h + r * f.
// e = h + r * f
// Note that this might be an underestimation. If number of repetitions is
// reduced, we may need to run more iterations per repetition, and that may
// increase the number of preliminary trials needed to find the correct
// number of iterations.

double h = std::max(0.0, input.total_execution_time_per_repetition -
input.real_time_used_per_repetition);
double r =
std::max(input.real_time_used_per_repetition, kNanosecondInSecond);
double s =
std::max(input.time_used_per_repetition, kNanosecondInSecond);
double m = input.min_time_per_repetition * input.max_repetitions;

// f = m / s
// RunBenchmark() always overshoot the iteration count by kSafetyMultiplier.
// Apply the same factor here.
// f = kSafetyMultiplier * m / s
// Also we want to make sure 1 <= f <= input.max_repetitions. Note that we
// may not be able to reach m because the total iters per repetition is
// upper bounded by --benchmark_max_iters. This behavior is preserved in
// Random Interleaving, as we won't run repetitions more than
// input.max_repetitions to reach m.

double f = kSafetyMultiplier * m / s;
f = std::min(std::max(f, 1.0), static_cast<double>(input.max_repetitions));

double e = h + r * f;
// n <= (1 + o) * e / t = (1 + o) * e / (h + r)
// Also we want to make sure 1 <= n <= input.max_repetition, and (h + r) > 0.
double n = (1 + input.max_overhead) * e / (h + r);
n = std::min(std::max(n, 1.0), static_cast<double>(input.max_repetitions));

size_t n_size_t = static_cast<size_t>(n);

VLOG(2) << "Computed random interleaving repetitions"
<< "\n input.total_execution_time_per_repetition: "
<< input.total_execution_time_per_repetition
<< "\n input.time_used_per_repetition: "
<< input.time_used_per_repetition
<< "\n input.real_time_used_per_repetition: "
<< input.real_time_used_per_repetition
<< "\n input.min_time_per_repetitions: "
<< input.min_time_per_repetition
<< "\n input.max_repetitions: " << input.max_repetitions
<< "\n input.max_overhead: " << input.max_overhead
<< "\n h: " << h
<< "\n r: " << r
<< "\n s: " << s
<< "\n f: " << f
<< "\n m: " << m
<< "\n e: " << e
<< "\n n: " << n
<< "\n n_size_t: " << n_size_t;

return n_size_t;
}

} // internal
} // benchmark
28 changes: 28 additions & 0 deletions src/benchmark_adjust_repetitions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef BENCHMARK_ADJUST_REPETITIONS_H
#define BENCHMARK_ADJUST_REPETITIONS_H

#include "benchmark/benchmark.h"
#include "commandlineflags.h"

namespace benchmark {
namespace internal {

// Defines the input tuple to ComputeRandomInterleavingRepetitions().
struct InternalRandomInterleavingRepetitionsInput {
double total_execution_time_per_repetition;
double time_used_per_repetition;
double real_time_used_per_repetition;
double min_time_per_repetition;
double max_overhead;
size_t max_repetitions;
};

// Should be called right after the first repetition is completed to estimate
// the number of iterations.
size_t ComputeRandomInterleavingRepetitions(
InternalRandomInterleavingRepetitionsInput input);

} // end namespace internal
} // end namespace benchmark

#endif // BENCHMARK_ADJUST_REPETITIONS_H
Loading