-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Implementation of random interleaving. #1105
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
25a5fdf
f603351
58f3a39
89d3a78
0aadade
f42e87c
8660ae1
4640ed4
1a2b6df
81c9ab0
70fe24a
4966e90
688eaff
9399f14
5923bf6
c51e52c
bb8e0e9
32e86fc
1bc2173
ce7220a
81ac7fe
086b15e
ee70382
4bdafa9
736875b
d340447
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,7 +32,9 @@ | |
| #include <cstdlib> | ||
| #include <fstream> | ||
| #include <iostream> | ||
| #include <limits> | ||
| #include <memory> | ||
| #include <random> | ||
| #include <string> | ||
| #include <thread> | ||
| #include <utility> | ||
|
|
@@ -51,6 +53,18 @@ | |
| #include "thread_manager.h" | ||
| #include "thread_timer.h" | ||
|
|
||
| // Each benchmark can be repeated a number of times, and within each | ||
| // *repetition*, we run the user-defined benchmark function a number of | ||
| // *iterations*. The number of repetitions is determined based on flags | ||
| // (--benchmark_repetitions). | ||
| namespace { | ||
|
|
||
| // Attempt to make each repetition run for at least this much of time. | ||
| constexpr double kDefaultMinTimeTotalSecs = 0.5; | ||
| constexpr int64_t kRandomInterleavingDefaultRepetitions = 12; | ||
|
|
||
| } // namespace | ||
|
|
||
| // Print a list of benchmarks. This option overrides all other options. | ||
| DEFINE_bool(benchmark_list_tests, false); | ||
|
|
||
|
|
@@ -59,16 +73,39 @@ DEFINE_bool(benchmark_list_tests, false); | |
| // linked into the binary are run. | ||
| DEFINE_string(benchmark_filter, "."); | ||
|
|
||
| // Minimum number of seconds we should run benchmark before results are | ||
| // considered significant. For cpu-time based tests, this is the lower bound | ||
| // on the total cpu time used by all threads that make up the test. For | ||
| // real-time based tests, this is the lower bound on the elapsed time of the | ||
| // benchmark execution, regardless of number of threads. | ||
| DEFINE_double(benchmark_min_time, 0.5); | ||
| // Do NOT read these flags directly. Use Get*() to read them. | ||
| namespace do_not_read_flag_directly { | ||
|
|
||
| // Minimum number of seconds we should run benchmark per repetition before | ||
| // results are considered significant. For cpu-time based tests, this is the | ||
| // lower bound on the total cpu time used by all threads that make up the test. | ||
| // For real-time based tests, this is the lower bound on the elapsed time of the | ||
| // benchmark execution, regardless of number of threads. If left unset, will use | ||
| // kDefaultMinTimeTotalSecs / FLAGS_benchmark_repetitions, if random | ||
| // interleaving is enabled. Otherwise, will use kDefaultMinTimeTotalSecs. | ||
| // Do NOT read this flag directly. Use GetMinTime() to read this flag. | ||
| DEFINE_double(benchmark_min_time, -1.0); | ||
|
|
||
| // The number of runs of each benchmark. If greater than 1, the mean and | ||
| // standard deviation of the runs will be reported. | ||
| DEFINE_int32(benchmark_repetitions, 1); | ||
| // standard deviation of the runs will be reported. By default, the number of | ||
| // repetitions is 1 if random interleaving is disabled, and up to | ||
| // kDefaultRepetitions if random interleaving is enabled. (Read the | ||
| // documentation for random interleaving to see why it might be less than | ||
| // kDefaultRepetitions.) | ||
| // Do NOT read this flag directly, Use GetRepetitions() to access this flag. | ||
| DEFINE_int32(benchmark_repetitions, -1); | ||
|
|
||
| } // namespace do_not_read_flag_directly | ||
|
|
||
| // The maximum overhead allowed for random interleaving. A value X means total | ||
| // execution time under random interleaving is limited by | ||
| // (1 + X) * original total execution time. Set to 'inf' to allow infinite | ||
| // overhead. | ||
| DEFINE_double(benchmark_random_interleaving_max_overhead, 0.4); | ||
|
|
||
| // If set, enable random interleaving. See | ||
| // http://github.com/google/benchmark/issues/1051 for details. | ||
| DEFINE_bool(benchmark_enable_random_interleaving, false); | ||
dmah42 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // Report the result of each benchmark repetitions. When 'true' is specified | ||
| // only the mean, standard deviation, and other statistics are reported for | ||
|
|
@@ -110,6 +147,30 @@ namespace benchmark { | |
|
|
||
| namespace internal { | ||
|
|
||
| // Performance measurements always come with random variances. Defines a | ||
| // factor by which the required number of iterations is overestimated in order | ||
| // to reduce the probability that the minimum time requirement will not be met. | ||
| const double kSafetyMultiplier = 1.4; | ||
dmah42 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // Wraps --benchmark_min_time and returns valid default values if not supplied. | ||
| double GetMinTime() { | ||
| const double default_min_time = kDefaultMinTimeTotalSecs / GetRepetitions(); | ||
| const double flag_min_time = | ||
| do_not_read_flag_directly::FLAGS_benchmark_min_time; | ||
| return flag_min_time >= 0.0 ? flag_min_time : default_min_time; | ||
| } | ||
|
|
||
| // Wraps --benchmark_repetitions and return valid default value if not supplied. | ||
| int64_t GetRepetitions() { | ||
|
||
| const int64_t default_repetitions = | ||
| FLAGS_benchmark_enable_random_interleaving | ||
| ? kRandomInterleavingDefaultRepetitions | ||
| : 1; | ||
| const int64_t flag_repetitions = | ||
| do_not_read_flag_directly::FLAGS_benchmark_repetitions; | ||
| return flag_repetitions >= 0 ? flag_repetitions : default_repetitions; | ||
| } | ||
|
|
||
| // FIXME: wouldn't LTO mess this up? | ||
| void UseCharPointer(char const volatile*) {} | ||
|
|
||
|
|
@@ -222,15 +283,15 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, | |
| CHECK(display_reporter != nullptr); | ||
|
|
||
| // Determine the width of the name field using a minimum width of 10. | ||
| bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; | ||
| bool might_have_aggregates = GetRepetitions() > 1; | ||
| size_t name_field_width = 10; | ||
| size_t stat_field_width = 0; | ||
| for (const BenchmarkInstance& benchmark : benchmarks) { | ||
| name_field_width = | ||
| std::max<size_t>(name_field_width, benchmark.name.str().size()); | ||
| might_have_aggregates |= benchmark.repetitions > 1; | ||
| std::max<size_t>(name_field_width, benchmark.name().str().size()); | ||
| might_have_aggregates |= benchmark.repetitions() > 1; | ||
|
|
||
| for (const auto& Stat : *benchmark.statistics) | ||
| for (const auto& Stat : *benchmark.statistics()) | ||
| stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size()); | ||
| } | ||
| if (might_have_aggregates) name_field_width += 1 + stat_field_width; | ||
|
|
@@ -255,23 +316,56 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks, | |
| flushStreams(display_reporter); | ||
| flushStreams(file_reporter); | ||
|
|
||
| for (const auto& benchmark : benchmarks) { | ||
| RunResults run_results = RunBenchmark(benchmark, &complexity_reports); | ||
|
|
||
| auto report = [&run_results](BenchmarkReporter* reporter, | ||
| bool report_aggregates_only) { | ||
| assert(reporter); | ||
| // If there are no aggregates, do output non-aggregates. | ||
| report_aggregates_only &= !run_results.aggregates_only.empty(); | ||
| if (!report_aggregates_only) | ||
| reporter->ReportRuns(run_results.non_aggregates); | ||
| if (!run_results.aggregates_only.empty()) | ||
| reporter->ReportRuns(run_results.aggregates_only); | ||
| }; | ||
|
|
||
| report(display_reporter, run_results.display_report_aggregates_only); | ||
| // Without random interleaving, benchmarks are executed in the order of: | ||
| // A, A, ..., A, B, B, ..., B, C, C, ..., C, ... | ||
| // That is, repetition is within RunBenchmark(), hence the name | ||
| // inner_repetitions. | ||
| // With random interleaving, benchmarks are executed in the order of: | ||
| // {Random order of A, B, C, ...}, {Random order of A, B, C, ...}, ... | ||
| // That is, repetitions is outside of RunBenchmark(), hence the name | ||
| // outer_repetitions. | ||
| int64_t inner_repetitions = | ||
| FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions(); | ||
| int64_t outer_repetitions = | ||
| FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1; | ||
| std::vector<size_t> benchmark_indices(benchmarks.size()); | ||
| for (size_t i = 0; i < benchmarks.size(); ++i) { | ||
| benchmark_indices[i] = i; | ||
| } | ||
|
|
||
| // 'run_results_vector' and 'benchmarks' are parallel arrays. | ||
| std::vector<RunResults> run_results_vector(benchmarks.size()); | ||
| for (int64_t i = 0; i < outer_repetitions; i++) { | ||
| if (FLAGS_benchmark_enable_random_interleaving) { | ||
| std::random_shuffle(benchmark_indices.begin(), benchmark_indices.end()); | ||
|
||
| } | ||
| for (size_t j : benchmark_indices) { | ||
| // Repetitions will be automatically adjusted under random interleaving. | ||
| if (!FLAGS_benchmark_enable_random_interleaving || | ||
| i < benchmarks[j].random_interleaving_repetitions()) { | ||
| RunBenchmark(benchmarks[j], outer_repetitions, inner_repetitions, | ||
| &complexity_reports, &run_results_vector[j]); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| auto report = [](BenchmarkReporter* reporter, bool report_aggregates_only, | ||
| const RunResults& run_results) { | ||
| assert(reporter); | ||
| // If there are no aggregates, do output non-aggregates. | ||
| report_aggregates_only &= !run_results.aggregates_only.empty(); | ||
| if (!report_aggregates_only) | ||
| reporter->ReportRuns(run_results.non_aggregates); | ||
| if (!run_results.aggregates_only.empty()) | ||
| reporter->ReportRuns(run_results.aggregates_only); | ||
| }; | ||
|
|
||
| for (const RunResults& run_results : run_results_vector) { | ||
| report(display_reporter, run_results.display_report_aggregates_only, | ||
| run_results); | ||
| if (file_reporter) | ||
| report(file_reporter, run_results.file_report_aggregates_only); | ||
| report(file_reporter, run_results.file_report_aggregates_only, | ||
| run_results); | ||
|
|
||
| flushStreams(display_reporter); | ||
| flushStreams(file_reporter); | ||
|
|
@@ -399,7 +493,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, | |
|
|
||
| if (FLAGS_benchmark_list_tests) { | ||
| for (auto const& benchmark : benchmarks) | ||
| Out << benchmark.name.str() << "\n"; | ||
| Out << benchmark.name().str() << "\n"; | ||
| } else { | ||
| internal::RunBenchmarks(benchmarks, display_reporter, file_reporter); | ||
| } | ||
|
|
@@ -439,10 +533,16 @@ void ParseCommandLineFlags(int* argc, char** argv) { | |
| if (ParseBoolFlag(argv[i], "benchmark_list_tests", | ||
| &FLAGS_benchmark_list_tests) || | ||
| ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || | ||
| ParseDoubleFlag(argv[i], "benchmark_min_time", | ||
| &FLAGS_benchmark_min_time) || | ||
| ParseInt32Flag(argv[i], "benchmark_repetitions", | ||
| &FLAGS_benchmark_repetitions) || | ||
| ParseDoubleFlag( | ||
| argv[i], "benchmark_min_time", | ||
| &do_not_read_flag_directly::FLAGS_benchmark_min_time) || | ||
| ParseInt32Flag( | ||
| argv[i], "benchmark_repetitions", | ||
| &do_not_read_flag_directly::FLAGS_benchmark_repetitions) || | ||
| ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", | ||
| &FLAGS_benchmark_enable_random_interleaving) || | ||
| ParseDoubleFlag(argv[i], "benchmark_random_interleaving_max_overhead", | ||
| &FLAGS_benchmark_random_interleaving_max_overhead) || | ||
| ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", | ||
| &FLAGS_benchmark_report_aggregates_only) || | ||
| ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| #include "benchmark_adjust_repetitions.h" | ||
dmah42 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| #include "benchmark_api_internal.h" | ||
| #include "log.h" | ||
|
|
||
| namespace benchmark { | ||
| namespace internal { | ||
|
|
||
| namespace { | ||
|
|
||
| constexpr double kNanosecondInSecond = 0.000000001; | ||
|
||
|
|
||
| } // namespace | ||
|
|
||
| size_t ComputeRandomInterleavingRepetitions( | ||
|
||
| InternalRandomInterleavingRepetitionsInput input) { | ||
| // Find the repetitions such that total overhead is bounded. Let | ||
| // n = desired number of repetitions, i.e., the output of this method. | ||
| // t = total real execution time per repetition including overhead, | ||
| // (input.total_execution_time_per_repetition). | ||
| // o = maximum allowed increase in total real execution time due to random | ||
| // interleaving, measured as a fraction (input.max_overhead). | ||
| // e = estimated total execution time without Random Interleaving | ||
| // We want | ||
| // t * n / e <= 1 + o | ||
| // I.e., | ||
| // n <= (1 + o) * e / t | ||
| // | ||
| // Let | ||
| // h = overhead per repetition, which include all setup / teardown time and | ||
| // also the execution time of preliminary trials used to search for the | ||
| // correct number of iterations. | ||
| // r = real execution time per repetition not including overhead | ||
| // (input.real_accumulated_time_per_repetition). | ||
| // s = measured execution time per repetition not including overhead, | ||
| // which can be either real or CPU time | ||
| // (input.accumulated_time_per_repetition). | ||
| // We have | ||
| // h = t - r | ||
| // | ||
| // Let | ||
| // m = total minimum measured execution time for all repetitions | ||
| // (input.min_time_per_repetition * input.max_repetitions). | ||
| // Let | ||
| // f = m / s | ||
| // f is the scale factor between m and s, and will be used to estimate | ||
| // l, the total real execution time for all repetitions excluding the | ||
| // overhead. It's reasonable to assume that the real execution time excluding | ||
| // the overhead is proportional to the measured time. Hence we expect to see | ||
| // l / r to be equal to m / s. That is, l / r = f, thus, l = r * f. Then the | ||
| // total execution time e can be estimated by h + l, which is h + r * f. | ||
| // e = h + r * f | ||
| // Note that this might be an underestimation. If number of repetitions is | ||
| // reduced, we may need to run more iterations per repetition, and that may | ||
| // increase the number of preliminary trials needed to find the correct | ||
| // number of iterations. | ||
|
|
||
| double h = std::max(0.0, input.total_execution_time_per_repetition - | ||
| input.real_time_used_per_repetition); | ||
| double r = | ||
| std::max(input.real_time_used_per_repetition, kNanosecondInSecond); | ||
| double s = | ||
| std::max(input.time_used_per_repetition, kNanosecondInSecond); | ||
| double m = input.min_time_per_repetition * input.max_repetitions; | ||
|
|
||
| // f = m / s | ||
| // RunBenchmark() always overshoot the iteration count by kSafetyMultiplier. | ||
| // Apply the same factor here. | ||
| // f = kSafetyMultiplier * m / s | ||
| // Also we want to make sure 1 <= f <= input.max_repetitions. Note that we | ||
| // may not be able to reach m because the total iters per repetition is | ||
| // upper bounded by --benchmark_max_iters. This behavior is preserved in | ||
| // Random Interleaving, as we won't run repetitions more than | ||
| // input.max_repetitions to reach m. | ||
|
|
||
| double f = kSafetyMultiplier * m / s; | ||
| f = std::min(std::max(f, 1.0), static_cast<double>(input.max_repetitions)); | ||
|
|
||
| double e = h + r * f; | ||
| // n <= (1 + o) * e / t = (1 + o) * e / (h + r) | ||
| // Also we want to make sure 1 <= n <= input.max_repetition, and (h + r) > 0. | ||
| double n = (1 + input.max_overhead) * e / (h + r); | ||
| n = std::min(std::max(n, 1.0), static_cast<double>(input.max_repetitions)); | ||
|
|
||
| size_t n_size_t = static_cast<size_t>(n); | ||
|
|
||
| VLOG(2) << "Computed random interleaving repetitions" | ||
| << "\n input.total_execution_time_per_repetition: " | ||
| << input.total_execution_time_per_repetition | ||
| << "\n input.time_used_per_repetition: " | ||
| << input.time_used_per_repetition | ||
| << "\n input.real_time_used_per_repetition: " | ||
| << input.real_time_used_per_repetition | ||
| << "\n input.min_time_per_repetitions: " | ||
| << input.min_time_per_repetition | ||
| << "\n input.max_repetitions: " << input.max_repetitions | ||
| << "\n input.max_overhead: " << input.max_overhead | ||
| << "\n h: " << h | ||
| << "\n r: " << r | ||
| << "\n s: " << s | ||
| << "\n f: " << f | ||
| << "\n m: " << m | ||
| << "\n e: " << e | ||
| << "\n n: " << n | ||
| << "\n n_size_t: " << n_size_t; | ||
|
|
||
| return n_size_t; | ||
| } | ||
|
|
||
| } // internal | ||
| } // benchmark | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| #ifndef BENCHMARK_ADJUST_REPETITIONS_H | ||
dmah42 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| #define BENCHMARK_ADJUST_REPETITIONS_H | ||
|
|
||
| #include "benchmark/benchmark.h" | ||
| #include "commandlineflags.h" | ||
|
|
||
| namespace benchmark { | ||
| namespace internal { | ||
|
|
||
| // Defines the input tuple to ComputeRandomInterleavingRepetitions(). | ||
| struct InternalRandomInterleavingRepetitionsInput { | ||
| double total_execution_time_per_repetition; | ||
| double time_used_per_repetition; | ||
| double real_time_used_per_repetition; | ||
| double min_time_per_repetition; | ||
| double max_overhead; | ||
| size_t max_repetitions; | ||
| }; | ||
|
|
||
| // Should be called right after the first repetition is completed to estimate | ||
| // the number of iterations. | ||
| size_t ComputeRandomInterleavingRepetitions( | ||
| InternalRandomInterleavingRepetitionsInput input); | ||
|
|
||
| } // end namespace internal | ||
| } // end namespace benchmark | ||
|
|
||
| #endif // BENCHMARK_ADJUST_REPETITIONS_H | ||
Uh oh!
There was an error while loading. Please reload this page.