Skip to content

Commit 1e84aa5

Browse files
authored
arrow2_convert primitive (de)serialization benchmarks (#1742)
* arrow2_convert primitive benchmarks * addressing PR comments
1 parent c54abe0 commit 1e84aa5

File tree

2 files changed

+145
-0
lines changed

2 files changed

+145
-0
lines changed

crates/re_arrow_store/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,7 @@ required-features = ["polars"]
111111
[[bench]]
112112
name = "data_store"
113113
harness = false
114+
115+
[[bench]]
116+
name = "arrow2_convert"
117+
harness = false
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
//! Keeping track of performance issues/regressions in `arrow2_convert` that directly affect us.
2+
3+
#[global_allocator]
4+
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
5+
6+
use arrow2::{array::PrimitiveArray, datatypes::PhysicalType, types::PrimitiveType};
7+
use criterion::{criterion_group, criterion_main, Criterion};
8+
use re_log_types::{
9+
component_types::InstanceKey, external::arrow2_convert::deserialize::TryIntoCollection,
10+
Component as _, DataCell,
11+
};
12+
13+
// ---
14+
15+
criterion_group!(benches, serialize, deserialize);
16+
criterion_main!(benches);
17+
18+
// ---
19+
20+
#[cfg(not(debug_assertions))]
21+
const NUM_INSTANCES: usize = 100_000;
22+
23+
// `cargo test` also runs the benchmark setup code, so make sure they run quickly:
24+
#[cfg(debug_assertions)]
25+
const NUM_INSTANCES: usize = 1;
26+
27+
// ---
28+
29+
fn serialize(c: &mut Criterion) {
30+
let mut group = c.benchmark_group(format!(
31+
"arrow2_convert/serialize/primitive/instances={NUM_INSTANCES}"
32+
));
33+
group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _));
34+
35+
{
36+
group.bench_function("arrow2_convert", |b| {
37+
b.iter(|| {
38+
let cell = DataCell::from_component::<InstanceKey>(0..NUM_INSTANCES as u64);
39+
assert_eq!(NUM_INSTANCES as u32, cell.num_instances());
40+
assert_eq!(
41+
cell.datatype().to_physical_type(),
42+
PhysicalType::Primitive(PrimitiveType::UInt64)
43+
);
44+
cell
45+
});
46+
});
47+
}
48+
49+
{
50+
group.bench_function("arrow2/from_values", |b| {
51+
b.iter(|| {
52+
let values = PrimitiveArray::from_values(0..NUM_INSTANCES as u64).boxed();
53+
let cell = crate::DataCell::from_arrow(InstanceKey::name(), values);
54+
assert_eq!(NUM_INSTANCES as u32, cell.num_instances());
55+
assert_eq!(
56+
cell.datatype().to_physical_type(),
57+
PhysicalType::Primitive(PrimitiveType::UInt64)
58+
);
59+
cell
60+
});
61+
});
62+
}
63+
64+
{
65+
group.bench_function("arrow2/from_vec", |b| {
66+
b.iter(|| {
67+
// NOTE: We do the `collect()` here on purpose!
68+
//
69+
// All of these APIs have to allocate an array under the hood, except `from_vec`
70+
// which is O(1) (it just unsafely reuses the vec's data pointer).
71+
// We need to measure the collection in order to have a leveled playing field.
72+
let values = PrimitiveArray::from_vec((0..NUM_INSTANCES as u64).collect()).boxed();
73+
let cell = crate::DataCell::from_arrow(InstanceKey::name(), values);
74+
assert_eq!(NUM_INSTANCES as u32, cell.num_instances());
75+
assert_eq!(
76+
cell.datatype().to_physical_type(),
77+
PhysicalType::Primitive(PrimitiveType::UInt64)
78+
);
79+
cell
80+
});
81+
});
82+
}
83+
}
84+
85+
fn deserialize(c: &mut Criterion) {
86+
let mut group = c.benchmark_group(format!(
87+
"arrow2_convert/deserialize/primitive/instances={NUM_INSTANCES}"
88+
));
89+
group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _));
90+
91+
let cell = DataCell::from_component::<InstanceKey>(0..NUM_INSTANCES as u64);
92+
let data = cell.as_arrow();
93+
94+
{
95+
group.bench_function("arrow2_convert", |b| {
96+
b.iter(|| {
97+
let keys: Vec<InstanceKey> = data.as_ref().try_into_collection().unwrap();
98+
assert_eq!(NUM_INSTANCES, keys.len());
99+
assert_eq!(
100+
InstanceKey(NUM_INSTANCES as u64 / 2),
101+
keys[NUM_INSTANCES / 2]
102+
);
103+
keys
104+
});
105+
});
106+
}
107+
108+
{
109+
group.bench_function("arrow2/validity_checks", |b| {
110+
b.iter(|| {
111+
let data = data.as_any().downcast_ref::<PrimitiveArray<u64>>().unwrap();
112+
let keys: Vec<InstanceKey> = data
113+
.into_iter()
114+
.filter_map(|v| v.copied().map(InstanceKey))
115+
.collect();
116+
assert_eq!(NUM_INSTANCES, keys.len());
117+
assert_eq!(
118+
InstanceKey(NUM_INSTANCES as u64 / 2),
119+
keys[NUM_INSTANCES / 2]
120+
);
121+
keys
122+
});
123+
});
124+
}
125+
126+
{
127+
group.bench_function("arrow2/validity_bypass", |b| {
128+
b.iter(|| {
129+
let data = data.as_any().downcast_ref::<PrimitiveArray<u64>>().unwrap();
130+
assert!(data.validity().is_none());
131+
let keys: Vec<InstanceKey> = data.values_iter().copied().map(InstanceKey).collect();
132+
assert_eq!(NUM_INSTANCES, keys.len());
133+
assert_eq!(
134+
InstanceKey(NUM_INSTANCES as u64 / 2),
135+
keys[NUM_INSTANCES / 2]
136+
);
137+
keys
138+
});
139+
});
140+
}
141+
}

0 commit comments

Comments
 (0)