Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions benchmarks/inc/alloc.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#pragma once

#include <cstddef>
#include <cstdlib>
#include <stdexcept>

template <class T, size_t Alignment, size_t Skew>
struct skewed_allocator {
using value_type = T;

T* allocate(size_t n) {
const auto p = static_cast<unsigned char*>(_aligned_malloc(n * sizeof(T) + Skew, Alignment));
if (!p) {
throw std::bad_alloc{};
}
return reinterpret_cast<T*>(p + Skew);
}

void deallocate(T* p, size_t) {
if (p) {
_aligned_free(reinterpret_cast<unsigned char*>(p) - Skew);
}
}
};

// The purpose is to provide consistent behavior for benchmarks.
// 64 seems to be reasonable alignment for practical perf uses,
// as it is both cache line size and maximum vector instruction size (on x64).
// However to provide even more consistency, aligning to page,
// to make sure the same number of page boundaries is crossed each time.
constexpr size_t page_size = 4096;

// A realistic skew relative to allocation granularity, when a variable is placed
// next to a pointer in a structure or on stack. Also corresponds to the default packing.
constexpr size_t skew = 8;

template <class T>
struct aligned_allocator : skewed_allocator<T, page_size, 0> {};

template <class T>
struct unaligned_allocator : skewed_allocator<T, page_size, skew> {};

#pragma warning(push)
#pragma warning(disable : 4324) // structure was padded due to alignment specifier

template <class T>
struct alignas(page_size) aligner {
T value;
};

template <class T>
struct alignas(page_size) unaligner {
char pad[skew];
T value;
};

#pragma warning(pop)
72 changes: 49 additions & 23 deletions benchmarks/src/swap_ranges.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,28 @@
#include <cstdint>
#include <vector>

#include "alloc.hpp"

using namespace std;

template <size_t N, class T>
template <size_t N, class T, template <class> class Padder>
void std_swap(benchmark::State& state) {
T a[N];
memset(a, 'a', sizeof(a));
T b[N];
memset(b, 'b', sizeof(b));
Padder<T[N]> a;
memset(a.value, 'a', sizeof(a.value));
Padder<T[N]> b;
memset(b.value, 'b', sizeof(b.value));

for (auto _ : state) {
swap(a, b);
benchmark::DoNotOptimize(a);
benchmark::DoNotOptimize(b);
swap(a.value, b.value);
benchmark::DoNotOptimize(a.value);
benchmark::DoNotOptimize(b.value);
}
}

template <class T>
template <class T, template <class> class Alloc>
void std_swap_ranges(benchmark::State& state) {
vector<T> a(static_cast<size_t>(state.range(0)), T{'a'});
vector<T> b(static_cast<size_t>(state.range(0)), T{'b'});
vector<T, Alloc<T>> a(static_cast<size_t>(state.range(0)), T{'a'});
vector<T, Alloc<T>> b(static_cast<size_t>(state.range(0)), T{'b'});

for (auto _ : state) {
swap_ranges(a.begin(), a.end(), b.begin());
Expand All @@ -35,18 +37,42 @@ void std_swap_ranges(benchmark::State& state) {
}
}

BENCHMARK(std_swap<1, uint8_t>);
BENCHMARK(std_swap<5, uint8_t>);
BENCHMARK(std_swap<15, uint8_t>);
BENCHMARK(std_swap<26, uint8_t>);
BENCHMARK(std_swap<38, uint8_t>);
BENCHMARK(std_swap<60, uint8_t>);
BENCHMARK(std_swap<125, uint8_t>);
BENCHMARK(std_swap<800, uint8_t>);
BENCHMARK(std_swap<3000, uint8_t>);
BENCHMARK(std_swap<9000, uint8_t>);

BENCHMARK(std_swap_ranges<uint8_t>)
BENCHMARK(std_swap<1, uint8_t, aligner>);
BENCHMARK(std_swap<5, uint8_t, aligner>);
BENCHMARK(std_swap<15, uint8_t, aligner>);
BENCHMARK(std_swap<26, uint8_t, aligner>);
BENCHMARK(std_swap<38, uint8_t, aligner>);
BENCHMARK(std_swap<60, uint8_t, aligner>);
BENCHMARK(std_swap<125, uint8_t, aligner>);
BENCHMARK(std_swap<800, uint8_t, aligner>);
BENCHMARK(std_swap<3000, uint8_t, aligner>);
BENCHMARK(std_swap<9000, uint8_t, aligner>);

BENCHMARK(std_swap<1, uint8_t, unaligner>);
BENCHMARK(std_swap<5, uint8_t, unaligner>);
BENCHMARK(std_swap<15, uint8_t, unaligner>);
BENCHMARK(std_swap<26, uint8_t, unaligner>);
BENCHMARK(std_swap<38, uint8_t, unaligner>);
BENCHMARK(std_swap<60, uint8_t, unaligner>);
BENCHMARK(std_swap<125, uint8_t, unaligner>);
BENCHMARK(std_swap<800, uint8_t, unaligner>);
BENCHMARK(std_swap<3000, uint8_t, unaligner>);
BENCHMARK(std_swap<9000, uint8_t, unaligner>);


BENCHMARK(std_swap_ranges<uint8_t, aligned_allocator>)
->Arg(1)
->Arg(5)
->Arg(15)
->Arg(26)
->Arg(38)
->Arg(60)
->Arg(125)
->Arg(800)
->Arg(3000)
->Arg(9000);

BENCHMARK(std_swap_ranges<uint8_t, unaligned_allocator>)
->Arg(1)
->Arg(5)
->Arg(15)
Expand Down