Skip to content

Commit 886014a

Browse files
authored
Primary caching 12: barebone range support (#4784)
**Prefer on a per-commit basis, stuff has moved around** Range queries are back!... in the most primitive form possible. No invalidation, no bucketing, no optimization, no nothing. Just putting everything in place. https://github.com/rerun-io/rerun/assets/2910679/a65281e4-9843-4598-9547-ce7e45197995 --- Part of the primary caching series of PR (index search, joins, deserialization): - #4592 - #4593 - #4659 - #4680 - #4681 - #4698 - #4711 - #4712 - #4721 - #4726 - #4773 - #4784 - #4785 - #4793 - #4800
1 parent cd1b6c2 commit 886014a

16 files changed

Lines changed: 977 additions & 272 deletions

File tree

crates/re_data_store/benches/arrow2.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
55

66
use std::sync::Arc;
77

8-
use arrow2::array::{Array, PrimitiveArray, StructArray, UnionArray};
8+
use arrow2::array::{Array, FixedSizeListArray, PrimitiveArray, StructArray};
99
use criterion::Criterion;
1010
use itertools::Itertools;
1111

@@ -277,8 +277,8 @@ fn estimated_size_bytes(c: &mut Criterion) {
277277
ArrayKind::Primitive => {
278278
bench_downcast_first::<PrimitiveArray<u64>>(&mut group, kind);
279279
}
280-
ArrayKind::Struct => bench_downcast_first::<StructArray>(&mut group, kind),
281-
ArrayKind::StructLarge => bench_downcast_first::<UnionArray>(&mut group, kind),
280+
ArrayKind::Struct => bench_downcast_first::<FixedSizeListArray>(&mut group, kind),
281+
ArrayKind::StructLarge => bench_downcast_first::<StructArray>(&mut group, kind),
282282
}
283283

284284
fn bench_downcast_first<T: arrow2::array::Array + Clone>(

crates/re_log_types/src/example_components.rs

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,35 @@
11
//! Example components to be used for tests and docs
22
3-
use re_types_core::{Loggable, SizeBytes};
3+
use re_types_core::{components::InstanceKey, Loggable, SizeBytes};
44

55
// ----------------------------------------------------------------------------
66

77
#[derive(Debug)]
88
pub struct MyPoints;
99

10+
impl MyPoints {
11+
pub const NUM_COMPONENTS: usize = 5;
12+
}
13+
1014
impl re_types_core::Archetype for MyPoints {
1115
type Indicator = re_types_core::GenericIndicatorComponent<Self>;
1216

1317
fn name() -> re_types_core::ArchetypeName {
14-
"test.MyPoints".into()
18+
"example.MyPoints".into()
1519
}
1620

1721
fn required_components() -> ::std::borrow::Cow<'static, [re_types_core::ComponentName]> {
1822
vec![MyPoint::name()].into()
1923
}
2024

2125
fn recommended_components() -> std::borrow::Cow<'static, [re_types_core::ComponentName]> {
22-
vec![MyColor::name(), MyLabel::name()].into()
26+
vec![
27+
re_types_core::LoggableBatch::name(&Self::Indicator::default()),
28+
InstanceKey::name(),
29+
MyColor::name(),
30+
MyLabel::name(),
31+
]
32+
.into()
2333
}
2434
}
2535

@@ -32,6 +42,7 @@ pub struct MyPoint {
3242
}
3343

3444
impl MyPoint {
45+
#[inline]
3546
pub fn new(x: f32, y: f32) -> Self {
3647
Self { x, y }
3748
}
@@ -121,7 +132,15 @@ impl Loggable for MyPoint {
121132
#[repr(transparent)]
122133
pub struct MyColor(pub u32);
123134

135+
impl MyColor {
136+
#[inline]
137+
pub fn from_rgb(r: u8, g: u8, b: u8) -> Self {
138+
Self(u32::from_le_bytes([r, g, b, 255]))
139+
}
140+
}
141+
124142
impl From<u32> for MyColor {
143+
#[inline]
125144
fn from(value: u32) -> Self {
126145
Self(value)
127146
}

crates/re_query_cache/src/cache.rs

Lines changed: 63 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use re_types_core::{
1818
components::InstanceKey, Archetype, ArchetypeName, Component, ComponentName, SizeBytes as _,
1919
};
2020

21-
use crate::{ErasedFlatVecDeque, FlatVecDeque};
21+
use crate::{ErasedFlatVecDeque, FlatVecDeque, LatestAtCache, RangeCache};
2222

2323
// ---
2424

@@ -67,6 +67,17 @@ pub struct CachesPerArchetype {
6767
// than an `ArchetypeName`: the query system doesn't care about archetypes.
6868
pub(crate) latest_at_per_archetype: RwLock<HashMap<ArchetypeName, Arc<RwLock<LatestAtCache>>>>,
6969

70+
/// Which [`Archetype`] are we querying for?
71+
///
72+
/// This is very important because of our data model: we not only query for components, but we
73+
/// query for components from a specific point-of-view (the so-called primary component).
74+
/// Different archetypes have different point-of-views, and therefore can end up with different
75+
/// results, even from the same raw data.
76+
//
77+
// TODO(cmc): At some point we should probably just store the PoV and optional components rather
78+
// than an `ArchetypeName`: the query system doesn't care about archetypes.
79+
pub(crate) range_per_archetype: RwLock<HashMap<ArchetypeName, Arc<RwLock<RangeCache>>>>,
80+
7081
/// Everything greater than or equal to this timestamp has been asynchronously invalidated.
7182
///
7283
/// The next time this cache gets queried, it must remove any entry matching this criteria.
@@ -134,6 +145,42 @@ impl Caches {
134145
f(&mut cache)
135146
}
136147

148+
/// Gives write access to the appropriate `RangeCache` according to the specified
149+
/// query parameters.
150+
#[inline]
151+
pub fn with_range<A, F, R>(
152+
store_id: StoreId,
153+
entity_path: EntityPath,
154+
query: &RangeQuery,
155+
mut f: F,
156+
) -> R
157+
where
158+
A: Archetype,
159+
F: FnMut(&mut RangeCache) -> R,
160+
{
161+
let key = CacheKey::new(store_id, entity_path, query.timeline);
162+
163+
let cache =
164+
re_data_store::DataStore::with_subscriber_once(*CACHES, move |caches: &Caches| {
165+
let mut caches = caches.0.write();
166+
167+
let caches_per_archetype = caches.entry(key.clone()).or_default();
168+
caches_per_archetype.handle_pending_invalidation(&key);
169+
170+
let mut range_per_archetype = caches_per_archetype.range_per_archetype.write();
171+
let range_cache = range_per_archetype.entry(A::name()).or_default();
172+
173+
Arc::clone(range_cache)
174+
175+
// Implicitly releasing all intermediary locks.
176+
})
177+
// NOTE: downcasting cannot fail, this is our own private handle.
178+
.unwrap();
179+
180+
let mut cache = cache.write();
181+
f(&mut cache)
182+
}
183+
137184
#[inline]
138185
pub(crate) fn with<F: FnMut(&Caches) -> R, R>(f: F) -> R {
139186
// NOTE: downcasting cannot fail, this is our own private handle.
@@ -347,6 +394,9 @@ pub struct CacheBucket {
347394
///
348395
/// This corresponds to the data time and `RowId` returned by `re_query::query_archetype`.
349396
///
397+
/// This is guaranteed to always be sorted and dense (i.e. there cannot be a hole in the cached
398+
/// data, unless the raw data itself in the store has a hole at that particular point in time).
399+
///
350400
/// Reminder: within a single timestamp, rows are sorted according to their [`RowId`]s.
351401
pub(crate) data_times: VecDeque<(TimeInt, RowId)>,
352402

@@ -375,6 +425,18 @@ impl CacheBucket {
375425
self.data_times.iter()
376426
}
377427

428+
#[inline]
429+
pub fn contains_data_time(&self, data_time: TimeInt) -> bool {
430+
let first_time = self.data_times.front().map_or(&TimeInt::MAX, |(t, _)| t);
431+
let last_time = self.data_times.back().map_or(&TimeInt::MIN, |(t, _)| t);
432+
*first_time <= data_time && data_time <= *last_time
433+
}
434+
435+
#[inline]
436+
pub fn contains_data_row(&self, data_time: TimeInt, row_id: RowId) -> bool {
437+
self.data_times.binary_search(&(data_time, row_id)).is_ok()
438+
}
439+
378440
/// Iterate over the [`InstanceKey`] batches of the point-of-view components.
379441
#[inline]
380442
pub fn iter_pov_instance_keys(&self) -> impl Iterator<Item = &[InstanceKey]> {
@@ -554,42 +616,3 @@ impl CacheBucket {
554616
Ok(added_size_bytes)
555617
}
556618
}
557-
558-
// ---
559-
560-
// NOTE: Because we're working with deserialized data, everything has to be done with metaprogramming,
561-
// which is notoriously painful in Rust (i.e., macros).
562-
// For this reason we move as much of the code as possible into the already existing macros in `query.rs`.
563-
564-
/// Caches the results of `LatestAt` archetype queries (`ArchetypeView`).
565-
///
566-
/// There is one `LatestAtCache` for each unique [`CacheKey`].
567-
///
568-
/// All query steps are cached: index search, cluster key joins and deserialization.
569-
#[derive(Default)]
570-
pub struct LatestAtCache {
571-
/// Organized by _query_ time.
572-
///
573-
/// If the data you're looking for isn't in here, try partially running the query (i.e. run the
574-
/// index search in order to find a data time, but don't actually deserialize and join the data)
575-
/// and check if there is any data available for the resulting _data_ time in [`Self::per_data_time`].
576-
pub per_query_time: BTreeMap<TimeInt, Arc<RwLock<CacheBucket>>>,
577-
578-
/// Organized by _data_ time.
579-
///
580-
/// Due to how our latest-at semantics work, any number of queries at time `T+n` where `n >= 0`
581-
/// can result in a data time of `T`.
582-
pub per_data_time: BTreeMap<TimeInt, Arc<RwLock<CacheBucket>>>,
583-
584-
/// Dedicated bucket for timeless data, if any.
585-
///
586-
/// Query time and data time are one and the same in the timeless case, therefore we only need
587-
/// this one bucket.
588-
//
589-
// NOTE: Lives separately so we don't pay the extra `Option` cost in the much more common
590-
// timeful case.
591-
pub timeless: Option<CacheBucket>,
592-
593-
/// Total size of the data stored in this cache in bytes.
594-
pub total_size_bytes: u64,
595-
}

0 commit comments

Comments
 (0)