Commit 3d5428d
General virtual columns support + row numbers as a first use-case (#8715)
Based on #7307.
# Which issue does this PR close?
- Closes #7299
# Rationale for this change
We need row numbers for many of the downstream features, e.g. computing
unique row identifier in iceberg.
# What changes are included in this PR?
New API to get row numbers as a virtual column:
```
let file = File::open(path).unwrap();
let row_number_field = Field::new("row_number", ArrowDataType::Int64, false).with_extension_type(RowNumber);
let options = ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field]);
let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options)
.unwrap()
.build()
.expect("Could not create reader");
reader
.collect::<Result<Vec<_>, _>>()
.expect("Could not read")
```
```
This column is defined as an extension type.
Parquet metadata is propagated to the array builder to compute first row
indexes.
New Virtual column is included in addition to Primitive and Group.
# Are these changes tested?
Yes
# Are there any user-facing changes?
This is user facing feature, and has added docstrings.
No breaking changes, at least I tried not to, by creating a duplicate of
public method to add more parameters.
---------
Co-authored-by: Jonas Irgens Kylling <[email protected]>
Co-authored-by: scovich <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>1 parent 5133cb9 commit 3d5428d
File tree
14 files changed
+1104
-49
lines changed- parquet
- examples
- src
- arrow
- array_reader
- arrow_reader
- async_reader
- push_decoder/reader_builder
- schema
- file/metadata/thrift
14 files changed
+1104
-49
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
22 | 22 | | |
23 | 23 | | |
24 | 24 | | |
25 | | - | |
| 25 | + | |
26 | 26 | | |
27 | 27 | | |
28 | 28 | | |
| |||
35 | 35 | | |
36 | 36 | | |
37 | 37 | | |
38 | | - | |
| 38 | + | |
39 | 39 | | |
40 | | - | |
41 | | - | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
42 | 43 | | |
43 | 44 | | |
44 | 45 | | |
| |||
100 | 101 | | |
101 | 102 | | |
102 | 103 | | |
103 | | - | |
| 104 | + | |
| 105 | + | |
104 | 106 | | |
105 | 107 | | |
106 | 108 | | |
107 | 109 | | |
108 | 110 | | |
109 | 111 | | |
110 | | - | |
| 112 | + | |
111 | 113 | | |
112 | 114 | | |
113 | 115 | | |
| |||
118 | 120 | | |
119 | 121 | | |
120 | 122 | | |
121 | | - | |
| 123 | + | |
122 | 124 | | |
123 | 125 | | |
124 | 126 | | |
| |||
129 | 131 | | |
130 | 132 | | |
131 | 133 | | |
| 134 | + | |
| 135 | + | |
| 136 | + | |
| 137 | + | |
| 138 | + | |
| 139 | + | |
| 140 | + | |
| 141 | + | |
132 | 142 | | |
133 | 143 | | |
134 | 144 | | |
135 | | - | |
136 | | - | |
| 145 | + | |
| 146 | + | |
| 147 | + | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
137 | 152 | | |
138 | 153 | | |
139 | 154 | | |
| 155 | + | |
140 | 156 | | |
141 | 157 | | |
142 | 158 | | |
143 | 159 | | |
144 | 160 | | |
| 161 | + | |
| 162 | + | |
| 163 | + | |
| 164 | + | |
145 | 165 | | |
146 | 166 | | |
147 | 167 | | |
148 | 168 | | |
149 | 169 | | |
150 | 170 | | |
151 | | - | |
| 171 | + | |
152 | 172 | | |
153 | 173 | | |
154 | 174 | | |
| |||
163 | 183 | | |
164 | 184 | | |
165 | 185 | | |
166 | | - | |
| 186 | + | |
167 | 187 | | |
168 | 188 | | |
169 | 189 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
26 | 26 | | |
27 | 27 | | |
28 | 28 | | |
| 29 | + | |
29 | 30 | | |
30 | 31 | | |
31 | 32 | | |
32 | 33 | | |
33 | 34 | | |
34 | 35 | | |
35 | | - | |
| 36 | + | |
36 | 37 | | |
37 | 38 | | |
38 | 39 | | |
| 40 | + | |
39 | 41 | | |
40 | 42 | | |
41 | 43 | | |
| |||
89 | 91 | | |
90 | 92 | | |
91 | 93 | | |
| 94 | + | |
| 95 | + | |
92 | 96 | | |
93 | 97 | | |
94 | 98 | | |
| |||
98 | 102 | | |
99 | 103 | | |
100 | 104 | | |
| 105 | + | |
101 | 106 | | |
102 | 107 | | |
103 | 108 | | |
| |||
108 | 113 | | |
109 | 114 | | |
110 | 115 | | |
| 116 | + | |
| 117 | + | |
| 118 | + | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
111 | 122 | | |
112 | 123 | | |
113 | 124 | | |
| |||
153 | 164 | | |
154 | 165 | | |
155 | 166 | | |
| 167 | + | |
| 168 | + | |
| 169 | + | |
| 170 | + | |
| 171 | + | |
| 172 | + | |
| 173 | + | |
156 | 174 | | |
157 | 175 | | |
158 | 176 | | |
| |||
164 | 182 | | |
165 | 183 | | |
166 | 184 | | |
| 185 | + | |
| 186 | + | |
| 187 | + | |
| 188 | + | |
| 189 | + | |
| 190 | + | |
| 191 | + | |
| 192 | + | |
| 193 | + | |
| 194 | + | |
| 195 | + | |
| 196 | + | |
167 | 197 | | |
168 | 198 | | |
169 | 199 | | |
| |||
439 | 469 | | |
440 | 470 | | |
441 | 471 | | |
| 472 | + | |
442 | 473 | | |
443 | 474 | | |
444 | 475 | | |
| |||
455 | 486 | | |
456 | 487 | | |
457 | 488 | | |
| 489 | + | |
458 | 490 | | |
459 | 491 | | |
460 | 492 | | |
| |||
472 | 504 | | |
473 | 505 | | |
474 | 506 | | |
| 507 | + | |
| 508 | + | |
| 509 | + | |
| 510 | + | |
| 511 | + | |
| 512 | + | |
| 513 | + | |
| 514 | + | |
| 515 | + | |
| 516 | + | |
| 517 | + | |
| 518 | + | |
| 519 | + | |
| 520 | + | |
| 521 | + | |
| 522 | + | |
| 523 | + | |
| 524 | + | |
| 525 | + | |
| 526 | + | |
| 527 | + | |
| 528 | + | |
| 529 | + | |
| 530 | + | |
| 531 | + | |
| 532 | + | |
| 533 | + | |
| 534 | + | |
| 535 | + | |
| 536 | + | |
| 537 | + | |
| 538 | + | |
| 539 | + | |
| 540 | + | |
| 541 | + | |
| 542 | + | |
| 543 | + | |
475 | 544 | | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
561 | 561 | | |
562 | 562 | | |
563 | 563 | | |
| 564 | + | |
564 | 565 | | |
565 | 566 | | |
566 | 567 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
27 | 27 | | |
28 | 28 | | |
29 | 29 | | |
| 30 | + | |
30 | 31 | | |
31 | 32 | | |
32 | 33 | | |
| |||
42 | 43 | | |
43 | 44 | | |
44 | 45 | | |
| 46 | + | |
45 | 47 | | |
46 | 48 | | |
47 | 49 | | |
48 | 50 | | |
49 | 51 | | |
50 | 52 | | |
| 53 | + | |
51 | 54 | | |
52 | 55 | | |
53 | 56 | | |
| |||
139 | 142 | | |
140 | 143 | | |
141 | 144 | | |
| 145 | + | |
| 146 | + | |
| 147 | + | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
| 152 | + | |
142 | 153 | | |
143 | 154 | | |
144 | 155 | | |
145 | 156 | | |
146 | | - | |
| 157 | + | |
| 158 | + | |
| 159 | + | |
147 | 160 | | |
148 | 161 | | |
149 | 162 | | |
150 | 163 | | |
151 | 164 | | |
152 | 165 | | |
| 166 | + | |
| 167 | + | |
| 168 | + | |
| 169 | + | |
| 170 | + | |
| 171 | + | |
| 172 | + | |
| 173 | + | |
153 | 174 | | |
154 | 175 | | |
155 | 176 | | |
| |||
0 commit comments