Skip to content

Commit 4d5847e

Browse files
committed
refactor: rename RowIdTreeMap to RowAddrTreeMap
1 parent 3676148 commit 4d5847e

File tree

22 files changed

+192
-194
lines changed

22 files changed

+192
-194
lines changed

docs/src/format/table/index/scalar/bitmap.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ The bitmap index consists of a single file `bitmap_page_lookup.lance` that store
1515

1616
### File Schema
1717

18-
| Column | Type | Nullable | Description |
19-
|-----------|------------|----------|---------------------------------------------------------------------|
20-
| `keys` | {DataType} | true | The unique value from the indexed column |
21-
| `bitmaps` | Binary | true | Serialized RowAddrTreeMap containing row IDs where this value appears |
18+
| Column | Type | Nullable | Description |
19+
|-----------|------------|----------|-------------------------------------------------------------------------|
20+
| `keys` | {DataType} | true | The unique value from the indexed column |
21+
| `bitmaps` | Binary | true | Serialized RowAddrTreeMap containing row addrs where this value appears |
2222

2323
## Accelerated Queries
2424

docs/src/format/table/index/scalar/label_list.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ The label list index uses a bitmap index internally and stores its data in:
1717

1818
### File Schema
1919

20-
| Column | Type | Nullable | Description |
21-
|-----------|------------|----------|---------------------------------------------------------------------|
22-
| `keys` | {DataType} | true | The unique label value from the indexed column |
23-
| `bitmaps` | Binary | true | Serialized RowAddrTreeMap containing row IDs where this label appears |
20+
| Column | Type | Nullable | Description |
21+
|-----------|------------|----------|------------------------------------------------------------------------|
22+
| `keys` | {DataType} | true | The unique label value from the indexed column |
23+
| `bitmaps` | Binary | true | Serialized RowAddrTreeMap containing row addr where this label appears |
2424

2525
## Accelerated Queries
2626

rust/lance-core/src/utils/mask.rs

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -417,8 +417,6 @@ impl RowIdSelection {
417417
}
418418
}
419419

420-
pub type RowIdTreeMap = RowAddrTreeMap;
421-
422420
impl RowAddrTreeMap {
423421
/// Create an empty set
424422
pub fn new() -> Self {
@@ -1103,13 +1101,13 @@ mod tests {
11031101
}
11041102
left.extend(left_rows.iter().copied());
11051103

1106-
let mut right = RowIdTreeMap::default();
1104+
let mut right = RowAddrTreeMap::default();
11071105
for fragment in right_full_fragments.clone() {
11081106
right.insert_fragment(fragment);
11091107
}
11101108
right.extend(right_rows.iter().copied());
11111109

1112-
let mut expected = RowIdTreeMap::default();
1110+
let mut expected = RowAddrTreeMap::default();
11131111
for fragment in &left_full_fragments {
11141112
if right_full_fragments.contains(fragment) {
11151113
expected.insert_fragment(*fragment);
@@ -1138,19 +1136,19 @@ mod tests {
11381136
right_full_fragments in proptest::collection::vec(0..u32::MAX, 0..10),
11391137
right_rows in proptest::collection::vec(0..u64::MAX, 0..1000),
11401138
) {
1141-
let mut left = RowIdTreeMap::default();
1139+
let mut left = RowAddrTreeMap::default();
11421140
for fragment in left_full_fragments.clone() {
11431141
left.insert_fragment(fragment);
11441142
}
11451143
left.extend(left_rows.iter().copied());
11461144

1147-
let mut right = RowIdTreeMap::default();
1145+
let mut right = RowAddrTreeMap::default();
11481146
for fragment in right_full_fragments.clone() {
11491147
right.insert_fragment(fragment);
11501148
}
11511149
right.extend(right_rows.iter().copied());
11521150

1153-
let mut expected = RowIdTreeMap::default();
1151+
let mut expected = RowAddrTreeMap::default();
11541152
for fragment in left_full_fragments {
11551153
expected.insert_fragment(fragment);
11561154
}
@@ -1181,13 +1179,13 @@ mod tests {
11811179
left_rows in proptest::collection::vec(0..u64::MAX, 0..1000),
11821180
right_rows in proptest::collection::vec(0..u64::MAX, 0..1000),
11831181
) {
1184-
let mut left = RowIdTreeMap::default();
1182+
let mut left = RowAddrTreeMap::default();
11851183
for fragment in left_full_fragments {
11861184
left.insert_fragment(fragment);
11871185
}
11881186
left.extend(left_rows.iter().copied());
11891187

1190-
let mut right = RowIdTreeMap::default();
1188+
let mut right = RowAddrTreeMap::default();
11911189
right.extend(right_rows.iter().copied());
11921190

11931191
let mut expected = left.clone();
@@ -1205,13 +1203,13 @@ mod tests {
12051203
right_full_fragments in proptest::collection::vec(0..u32::MAX, 0..10),
12061204
left_rows in proptest::collection::vec(0..u64::MAX, 0..1000),
12071205
) {
1208-
let mut left = RowIdTreeMap::default();
1206+
let mut left = RowAddrTreeMap::default();
12091207
for fragment in left_full_fragments {
12101208
left.insert_fragment(fragment);
12111209
}
12121210
left.extend(left_rows.iter().copied());
12131211

1214-
let mut right = RowIdTreeMap::default();
1212+
let mut right = RowAddrTreeMap::default();
12151213
for fragment in right_full_fragments.clone() {
12161214
right.insert_fragment(fragment);
12171215
}
@@ -1233,7 +1231,7 @@ mod tests {
12331231
assert!(mask.iter_ids().is_none());
12341232

12351233
// Test with just an allow list
1236-
let mut allow_list = RowIdTreeMap::default();
1234+
let mut allow_list = RowAddrTreeMap::default();
12371235
allow_list.extend([1, 5, 10].iter().copied());
12381236
mask.allow_list = Some(allow_list);
12391237

@@ -1248,7 +1246,7 @@ mod tests {
12481246
);
12491247

12501248
// Test with both allow list and block list
1251-
let mut block_list = RowIdTreeMap::default();
1249+
let mut block_list = RowAddrTreeMap::default();
12521250
block_list.extend([5].iter().copied());
12531251
mask.block_list = Some(block_list);
12541252

@@ -1262,14 +1260,14 @@ mod tests {
12621260
);
12631261

12641262
// Test with full fragment in block list
1265-
let mut block_list = RowIdTreeMap::default();
1263+
let mut block_list = RowAddrTreeMap::default();
12661264
block_list.insert_fragment(0);
12671265
mask.block_list = Some(block_list);
12681266
assert!(mask.iter_ids().is_none());
12691267

12701268
// Test with full fragment in allow list
12711269
mask.block_list = None;
1272-
let mut allow_list = RowIdTreeMap::default();
1270+
let mut allow_list = RowAddrTreeMap::default();
12731271
allow_list.insert_fragment(0);
12741272
mask.allow_list = Some(allow_list);
12751273
assert!(mask.iter_ids().is_none());

rust/lance-index/src/frag_reuse.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use arrow_array::{Array, ArrayRef, PrimitiveArray, RecordBatch, UInt64Array};
88
use async_trait::async_trait;
99
use deepsize::{Context, DeepSizeOf};
1010
use itertools::Itertools;
11-
use lance_core::utils::mask::{RowAddrTreeMap, RowIdTreeMap};
11+
use lance_core::utils::mask::RowAddrTreeMap;
1212
use lance_core::{Error, Result};
1313
use lance_table::format::pb::fragment_reuse_index_details::InlineContent;
1414
use lance_table::format::{pb, ExternalFile, Fragment};
@@ -245,7 +245,7 @@ impl FragReuseIndex {
245245
mapped_value
246246
}
247247

248-
pub fn remap_row_ids_tree_map(&self, row_ids: &RowIdTreeMap) -> RowIdTreeMap {
248+
pub fn remap_row_ids_tree_map(&self, row_ids: &RowAddrTreeMap) -> RowAddrTreeMap {
249249
RowAddrTreeMap::from_iter(row_ids.row_ids().unwrap().filter_map(|addr| {
250250
let addr_as_u64 = u64::from(addr);
251251
self.remap_row_id(addr_as_u64)

rust/lance-index/src/scalar/bitmap.rs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use futures::TryStreamExt;
2121
use lance_core::{
2222
cache::{CacheKey, LanceCache, WeakLanceCache},
2323
error::LanceOptionExt,
24-
utils::mask::{RowIdTreeMap, RowAddrTreeMap},
24+
utils::mask::RowAddrTreeMap,
2525
Error, Result, ROW_ID,
2626
};
2727
use roaring::RoaringBitmap;
@@ -100,7 +100,7 @@ pub struct BitmapIndex {
100100
/// for quickly locating the row and reading it out
101101
index_map: BTreeMap<OrderableScalarValue, usize>,
102102

103-
null_map: Arc<RowIdTreeMap>,
103+
null_map: Arc<RowAddrTreeMap>,
104104

105105
value_type: DataType,
106106

@@ -119,7 +119,7 @@ pub struct BitmapKey {
119119
}
120120

121121
impl CacheKey for BitmapKey {
122-
type ValueType = RowIdTreeMap;
122+
type ValueType = RowAddrTreeMap;
123123

124124
fn key(&self) -> std::borrow::Cow<'_, str> {
125125
format!("{}", self.value.0).into()
@@ -129,7 +129,7 @@ impl CacheKey for BitmapKey {
129129
impl BitmapIndex {
130130
fn new(
131131
index_map: BTreeMap<OrderableScalarValue, usize>,
132-
null_map: Arc<RowIdTreeMap>,
132+
null_map: Arc<RowAddrTreeMap>,
133133
value_type: DataType,
134134
store: Arc<dyn IndexStore>,
135135
index_cache: WeakLanceCache,
@@ -160,7 +160,7 @@ impl BitmapIndex {
160160
let data_type = schema.fields[0].data_type();
161161
return Ok(Arc::new(Self::new(
162162
BTreeMap::new(),
163-
Arc::new(RowIdTreeMap::default()),
163+
Arc::new(RowAddrTreeMap::default()),
164164
data_type,
165165
store,
166166
WeakLanceCache::from(index_cache),
@@ -217,7 +217,7 @@ impl BitmapIndex {
217217
location: location!(),
218218
})?;
219219
let bitmap_bytes = binary_bitmaps.value(0);
220-
let mut bitmap = RowIdTreeMap::deserialize_from(bitmap_bytes).unwrap();
220+
let mut bitmap = RowAddrTreeMap::deserialize_from(bitmap_bytes).unwrap();
221221

222222
// Apply fragment remapping if needed
223223
if let Some(fri) = &frag_reuse_index {
@@ -243,7 +243,7 @@ impl BitmapIndex {
243243
&self,
244244
key: &OrderableScalarValue,
245245
metrics: Option<&dyn MetricsCollector>,
246-
) -> Result<Arc<RowIdTreeMap>> {
246+
) -> Result<Arc<RowAddrTreeMap>> {
247247
if key.0.is_null() {
248248
return Ok(self.null_map.clone());
249249
}
@@ -278,7 +278,7 @@ impl BitmapIndex {
278278
location: location!(),
279279
})?;
280280
let bitmap_bytes = binary_bitmaps.value(0); // First (and only) row
281-
let mut bitmap = RowIdTreeMap::deserialize_from(bitmap_bytes).unwrap();
281+
let mut bitmap = RowAddrTreeMap::deserialize_from(bitmap_bytes).unwrap();
282282

283283
if let Some(fri) = &self.frag_reuse_index {
284284
bitmap = fri.remap_row_ids_tree_map(&bitmap);
@@ -358,7 +358,7 @@ impl Index for BitmapIndex {
358358
}
359359

360360
let bitmap_bytes = bitmap_binary_array.value(idx);
361-
let mut bitmap = RowIdTreeMap::deserialize_from(bitmap_bytes).unwrap();
361+
let mut bitmap = RowAddrTreeMap::deserialize_from(bitmap_bytes).unwrap();
362362

363363
if let Some(frag_reuse_index_ref) = self.frag_reuse_index.as_ref() {
364364
bitmap = frag_reuse_index_ref.remap_row_ids_tree_map(&bitmap);
@@ -436,7 +436,7 @@ impl ScalarIndex for BitmapIndex {
436436
metrics.record_comparisons(keys.len());
437437

438438
if keys.is_empty() {
439-
RowIdTreeMap::default()
439+
RowAddrTreeMap::default()
440440
} else {
441441
let mut bitmaps = Vec::new();
442442
for key in keys {
@@ -445,7 +445,7 @@ impl ScalarIndex for BitmapIndex {
445445
}
446446

447447
let bitmap_refs: Vec<_> = bitmaps.iter().map(|b| b.as_ref()).collect();
448-
RowIdTreeMap::union_all(&bitmap_refs)
448+
RowAddrTreeMap::union_all(&bitmap_refs)
449449
}
450450
}
451451
SargableQuery::IsIn(values) => {
@@ -472,11 +472,11 @@ impl ScalarIndex for BitmapIndex {
472472
}
473473

474474
if bitmaps.is_empty() {
475-
RowIdTreeMap::default()
475+
RowAddrTreeMap::default()
476476
} else {
477-
// Convert Arc<RowIdTreeMap> to &RowIdTreeMap for union_all
477+
// Convert Arc<RowAddrTreeMap> to &RowAddrTreeMap for union_all
478478
let bitmap_refs: Vec<_> = bitmaps.iter().map(|b| b.as_ref()).collect();
479-
RowIdTreeMap::union_all(&bitmap_refs)
479+
RowAddrTreeMap::union_all(&bitmap_refs)
480480
}
481481
}
482482
SargableQuery::IsNull() => {
@@ -597,7 +597,7 @@ impl BitmapIndexPlugin {
597597
}
598598

599599
async fn write_bitmap_index(
600-
state: HashMap<ScalarValue, RowIdTreeMap>,
600+
state: HashMap<ScalarValue, RowAddrTreeMap>,
601601
index_store: &dyn IndexStore,
602602
value_type: &DataType,
603603
) -> Result<()> {
@@ -661,7 +661,7 @@ impl BitmapIndexPlugin {
661661

662662
async fn do_train_bitmap_index(
663663
mut data_source: SendableRecordBatchStream,
664-
mut state: HashMap<ScalarValue, RowIdTreeMap>,
664+
mut state: HashMap<ScalarValue, RowAddrTreeMap>,
665665
index_store: &dyn IndexStore,
666666
) -> Result<()> {
667667
let value_type = data_source.schema().field(0).data_type().clone();
@@ -687,7 +687,7 @@ impl BitmapIndexPlugin {
687687
index_store: &dyn IndexStore,
688688
) -> Result<()> {
689689
// mapping from item to list of the row ids where it is present
690-
let dictionary: HashMap<ScalarValue, RowIdTreeMap> = HashMap::new();
690+
let dictionary: HashMap<ScalarValue, RowAddrTreeMap> = HashMap::new();
691691

692692
Self::do_train_bitmap_index(data, dictionary, index_store).await
693693
}
@@ -971,7 +971,7 @@ pub mod tests {
971971
.await
972972
.unwrap_or_else(|_| panic!("Key {} should exist", key_val));
973973

974-
// Convert RowIdTreeMap to a vector for easier assertion
974+
// Convert RowAddrTreeMap to a vector for easier assertion
975975
let row_ids: Vec<u64> = bitmap.row_ids().unwrap().map(u64::from).collect();
976976

977977
// Verify length

0 commit comments

Comments
 (0)