Skip to content

Commit c1257b1

Browse files
ludamadludamad0
andauthored
feat(bb): op counting mode (AztecProtocol#4437)
- Introduces preset 'op-counting' that builds with support for operation counts - Introduces mechanism to connect this to google benchmark - Support for ultra_honk_rounds_bench and goblin_bench right now ![image](https://github.com/AztecProtocol/aztec-packages/assets/163993/9785e99b-ef1f-4ea6-bfab-cd63d33499e1) For best results run with e.g. `./bin/goblin_bench --benchmark_min_time=0s --benchmark_counters_tabular=true` other: - Make macros consistently have BB_, rename some BBERG_ macros --------- Co-authored-by: ludamad <adam@aztecprotocol.com>
1 parent 2edf0d6 commit c1257b1

24 files changed

Lines changed: 397 additions & 143 deletions

barretenberg/cpp/CMakePresets.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,16 @@
166166
"LDFLAGS": "-fsanitize=memory"
167167
}
168168
},
169+
{
170+
"name": "op-counting",
171+
"displayName": "Release build with operation counts for benchmarks",
172+
"description": "Build with op counting",
173+
"inherits": "clang16",
174+
"binaryDir": "build-op-counting",
175+
"environment": {
176+
"CXXFLAGS": "-DBB_USE_OP_COUNT"
177+
}
178+
},
169179
{
170180
"name": "coverage",
171181
"displayName": "Build with coverage",
@@ -300,6 +310,11 @@
300310
"inherits": "default",
301311
"configurePreset": "clang16"
302312
},
313+
{
314+
"name": "op-counting",
315+
"inherits": "default",
316+
"configurePreset": "op-counting"
317+
},
303318
{
304319
"name": "clang16-dbg",
305320
"inherits": "default",

barretenberg/cpp/src/barretenberg/benchmark/basics_bench/basics.bench.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
* sequential_copy: 3.3
2121
*
2222
*/
23+
#include "barretenberg/common/op_count.hpp"
2324
#include "barretenberg/common/thread.hpp"
2425
#include "barretenberg/ecc/curves/bn254/bn254.hpp"
2526
#include <benchmark/benchmark.h>

barretenberg/cpp/src/barretenberg/benchmark/goblin_bench/goblin.bench.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <benchmark/benchmark.h>
33

44
#include "barretenberg/benchmark/ultra_bench/mock_proofs.hpp"
5+
#include "barretenberg/common/op_count_google_bench.hpp"
56
#include "barretenberg/goblin/goblin.hpp"
67
#include "barretenberg/goblin/mock_circuits.hpp"
78
#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp"
@@ -67,6 +68,7 @@ BENCHMARK_DEFINE_F(GoblinBench, GoblinFull)(benchmark::State& state)
6768
GoblinMockCircuits::perform_op_queue_interactions_for_mock_first_circuit(goblin.op_queue);
6869

6970
for (auto _ : state) {
71+
BB_REPORT_OP_COUNT_IN_BENCH(state);
7072
// Perform a specified number of iterations of function/kernel accumulation
7173
perform_goblin_accumulation_rounds(state, goblin);
7274

barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_honk_rounds.bench.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <benchmark/benchmark.h>
22

33
#include "barretenberg/benchmark/ultra_bench/mock_proofs.hpp"
4+
#include "barretenberg/common/op_count_google_bench.hpp"
45
#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp"
56
#include "barretenberg/ultra_honk/ultra_composer.hpp"
67
#include "barretenberg/ultra_honk/ultra_prover.hpp"
@@ -27,15 +28,20 @@ enum {
2728
* @param prover - The ultrahonk prover.
2829
* @param index - The pass to measure.
2930
**/
30-
BBERG_PROFILE static void test_round_inner(State& state, UltraProver& prover, size_t index) noexcept
31+
BB_PROFILE static void test_round_inner(State& state, UltraProver& prover, size_t index) noexcept
3132
{
3233
auto time_if_index = [&](size_t target_index, auto&& func) -> void {
34+
BB_REPORT_OP_COUNT_IN_BENCH(state);
3335
if (index == target_index) {
3436
state.ResumeTiming();
3537
}
38+
3639
func();
3740
if (index == target_index) {
3841
state.PauseTiming();
42+
} else {
43+
// We don't actually want to write to user-defined counters
44+
BB_REPORT_OP_COUNT_BENCH_CANCEL();
3945
}
4046
};
4147

@@ -47,7 +53,7 @@ BBERG_PROFILE static void test_round_inner(State& state, UltraProver& prover, si
4753
time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); });
4854
time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); });
4955
}
50-
BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
56+
BB_PROFILE static void test_round(State& state, size_t index) noexcept
5157
{
5258
bb::srs::init_crs_factory("../srs_db/ignition");
5359

barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_plonk_rounds.bench.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ enum {
1717
SIXTH_BATCH_OPEN
1818
};
1919

20-
BBERG_PROFILE static void plonk_round(
20+
BB_PROFILE static void plonk_round(
2121
State& state, plonk::UltraProver& prover, size_t target_index, size_t index, auto&& func) noexcept
2222
{
2323
if (index == target_index) {
@@ -37,7 +37,7 @@ BBERG_PROFILE static void plonk_round(
3737
* @param prover - The ultraplonk prover.
3838
* @param index - The pass to measure.
3939
**/
40-
BBERG_PROFILE static void test_round_inner(State& state, plonk::UltraProver& prover, size_t index) noexcept
40+
BB_PROFILE static void test_round_inner(State& state, plonk::UltraProver& prover, size_t index) noexcept
4141
{
4242
plonk_round(state, prover, PREAMBLE, index, [&] { prover.execute_preamble_round(); });
4343
plonk_round(state, prover, FIRST_WIRE_COMMITMENTS, index, [&] { prover.execute_first_round(); });
@@ -47,7 +47,7 @@ BBERG_PROFILE static void test_round_inner(State& state, plonk::UltraProver& pro
4747
plonk_round(state, prover, FIFTH_COMPUTE_QUOTIENT_EVALUTION, index, [&] { prover.execute_fifth_round(); });
4848
plonk_round(state, prover, SIXTH_BATCH_OPEN, index, [&] { prover.execute_sixth_round(); });
4949
}
50-
BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
50+
BB_PROFILE static void test_round(State& state, size_t index) noexcept
5151
{
5252
bb::srs::init_crs_factory("../srs_db/ignition");
5353
for (auto _ : state) {
Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,26 @@
11
#pragma once
22

33
#ifdef _WIN32
4-
#define BBERG_INLINE __forceinline inline
4+
#define BB_INLINE __forceinline inline
55
#else
6-
#define BBERG_INLINE __attribute__((always_inline)) inline
6+
#define BB_INLINE __attribute__((always_inline)) inline
77
#endif
88

99
// TODO(AD): Other instrumentation?
1010
#ifdef XRAY
11-
#define BBERG_PROFILE [[clang::xray_always_instrument]] [[clang::noinline]]
12-
#define BBERG_NO_PROFILE [[clang::xray_never_instrument]]
11+
#define BB_PROFILE [[clang::xray_always_instrument]] [[clang::noinline]]
12+
#define BB_NO_PROFILE [[clang::xray_never_instrument]]
1313
#else
14-
#define BBERG_PROFILE
15-
#define BBERG_NO_PROFILE
14+
#define BB_PROFILE
15+
#define BB_NO_PROFILE
16+
#endif
17+
18+
// Optimization hints for clang - which outcome of an expression is expected for better
19+
// branch-prediction optimization
20+
#ifdef __clang__
21+
#define BB_LIKELY(x) __builtin_expect(!!(x), 1)
22+
#define BB_UNLIKELY(x) __builtin_expect(!!(x), 0)
23+
#else
24+
#define BB_LIKELY(x) x
25+
#define BB_UNLIKELY(x) x
1626
#endif
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
2+
#include <cstddef>
3+
#ifdef BB_USE_OP_COUNT
4+
#include "op_count.hpp"
5+
#include <iostream>
6+
#include <sstream>
7+
#include <thread>
8+
9+
namespace bb::detail {
10+
void GlobalOpCountContainer::add_entry(const char* key, std::size_t* count)
11+
{
12+
std::unique_lock<std::mutex> lock(mutex);
13+
std::stringstream ss;
14+
ss << std::this_thread::get_id();
15+
counts.push_back({ key, ss.str(), count });
16+
}
17+
18+
void GlobalOpCountContainer::print() const
19+
{
20+
std::cout << "print_op_counts() START" << std::endl;
21+
for (const Entry& entry : counts) {
22+
if (*entry.count > 0) {
23+
std::cout << entry.key << "\t" << *entry.count << "\t[thread=" << entry.thread_id << "]" << std::endl;
24+
}
25+
}
26+
std::cout << "print_op_counts() END" << std::endl;
27+
}
28+
29+
std::map<std::string, std::size_t> GlobalOpCountContainer::get_aggregate_counts() const
30+
{
31+
std::map<std::string, std::size_t> aggregate_counts;
32+
for (const Entry& entry : counts) {
33+
if (*entry.count > 0) {
34+
aggregate_counts[entry.key] += *entry.count;
35+
}
36+
}
37+
return aggregate_counts;
38+
}
39+
40+
void GlobalOpCountContainer::clear()
41+
{
42+
std::unique_lock<std::mutex> lock(mutex);
43+
for (Entry& entry : counts) {
44+
*entry.count = 0;
45+
}
46+
}
47+
48+
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
49+
GlobalOpCountContainer GLOBAL_OP_COUNTS;
50+
} // namespace bb::detail
51+
#endif
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
2+
#pragma once
3+
4+
#ifndef BB_USE_OP_COUNT
5+
// require a semicolon to appease formatters
6+
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
7+
#define BB_OP_COUNT_TRACK() (void)0
8+
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
9+
#define BB_OP_COUNT_TRACK_NAME(name) (void)0
10+
#else
11+
/**
12+
* Provides an abstraction that counts operations based on function names.
13+
* For efficiency, we spread out counts across threads.
14+
*/
15+
16+
#include "barretenberg/common/compiler_hints.hpp"
17+
#include <algorithm>
18+
#include <atomic>
19+
#include <cstdlib>
20+
#include <map>
21+
#include <mutex>
22+
#include <string>
23+
#include <vector>
24+
namespace bb::detail {
25+
// Compile-time string
26+
// See e.g. https://www.reddit.com/r/cpp_questions/comments/pumi9r/does_c20_not_support_string_literals_as_template/
27+
template <std::size_t N> struct OperationLabel {
28+
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
29+
constexpr OperationLabel(const char (&str)[N])
30+
{
31+
for (std::size_t i = 0; i < N; ++i) {
32+
value[i] = str[i];
33+
}
34+
}
35+
36+
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
37+
char value[N];
38+
};
39+
40+
// Contains all statically known op counts
41+
struct GlobalOpCountContainer {
42+
public:
43+
struct Entry {
44+
std::string key;
45+
std::string thread_id;
46+
std::size_t* count;
47+
};
48+
std::mutex mutex;
49+
std::vector<Entry> counts;
50+
void print() const;
51+
// NOTE: Should be called when other threads aren't active
52+
void clear();
53+
void add_entry(const char* key, std::size_t* count);
54+
std::map<std::string, std::size_t> get_aggregate_counts() const;
55+
};
56+
57+
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
58+
extern GlobalOpCountContainer GLOBAL_OP_COUNTS;
59+
60+
template <OperationLabel Op> struct GlobalOpCount {
61+
public:
62+
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
63+
static thread_local std::size_t* thread_local_count;
64+
65+
static constexpr void increment_op_count()
66+
{
67+
if (std::is_constant_evaluated()) {
68+
// We do nothing if the compiler tries to run this
69+
return;
70+
}
71+
if (BB_UNLIKELY(thread_local_count == nullptr)) {
72+
thread_local_count = new std::size_t();
73+
GLOBAL_OP_COUNTS.add_entry(Op.value, thread_local_count);
74+
}
75+
(*thread_local_count)++;
76+
}
77+
};
78+
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
79+
template <OperationLabel Op> thread_local std::size_t* GlobalOpCount<Op>::thread_local_count;
80+
81+
} // namespace bb::detail
82+
83+
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
84+
#define BB_OP_COUNT_TRACK() bb::detail::GlobalOpCount<__func__>::increment_op_count()
85+
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
86+
#define BB_OP_COUNT_TRACK_NAME(name) bb::detail::GlobalOpCount<name>::increment_op_count()
87+
#endif
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
2+
#pragma once
3+
#include <benchmark/benchmark.h>
4+
5+
#ifndef BB_USE_OP_COUNT
6+
namespace bb {
7+
struct GoogleBenchOpCountReporter {
8+
GoogleBenchOpCountReporter(::benchmark::State& state)
9+
{
10+
// unused, we don't have op counts on
11+
(void)state;
12+
}
13+
};
14+
}; // namespace bb
15+
// require a semicolon to appease formatters
16+
#define BB_REPORT_OP_COUNT_IN_BENCH(state) (void)0
17+
#define BB_REPORT_OP_COUNT_BENCH_CANCEL() (void)0
18+
#else
19+
#include "op_count.hpp"
20+
namespace bb {
21+
// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions)
22+
struct GoogleBenchOpCountReporter {
23+
// We allow having a ref member as this only lives inside a function frame
24+
::benchmark::State& state;
25+
bool cancelled = false;
26+
GoogleBenchOpCountReporter(::benchmark::State& state)
27+
: state(state)
28+
{
29+
// Intent: Clear when we enter the state loop
30+
bb::detail::GLOBAL_OP_COUNTS.clear();
31+
}
32+
~GoogleBenchOpCountReporter()
33+
{
34+
// Allow for conditional reporting
35+
if (cancelled) {
36+
return;
37+
}
38+
// Intent: Collect results when we exit the state loop
39+
for (auto& entry : bb::detail::GLOBAL_OP_COUNTS.get_aggregate_counts()) {
40+
state.counters[entry.first] = static_cast<double>(entry.second);
41+
}
42+
}
43+
};
44+
// Allow for integration with google benchmark user-defined counters
45+
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
46+
#define BB_REPORT_OP_COUNT_IN_BENCH(state) GoogleBenchOpCountReporter __bb_report_op_count_in_bench{ state };
47+
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
48+
#define BB_REPORT_OP_COUNT_BENCH_CANCEL() __bb_report_op_count_in_bench.cancelled = true;
49+
}; // namespace bb
50+
#endif

barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class ThreadPool {
5252
std::condition_variable complete_condition_;
5353
bool stop = false;
5454

55-
BBERG_NO_PROFILE void worker_loop(size_t thread_index);
55+
BB_NO_PROFILE void worker_loop(size_t thread_index);
5656

5757
void do_iterations()
5858
{

0 commit comments

Comments
 (0)