Skip to content

Commit 8aa0fec

Browse files
authored
Fix CTE Column Pruning (#942)
* Fix nested join callback so that the bitmap is at the block level rather than using global indices. * Save new BoundColumnRefExpressions for CTE nodes in remove_unused_columns pass so that references to them stay around while they are being used.
1 parent 87edc0b commit 8aa0fec

4 files changed

Lines changed: 10 additions & 8 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "DebugSanitiz
229229
if (NOT WIN32)
230230
add_compile_options(
231231
"-g"
232-
"-O1"
232+
"-O0"
233233
"-fno-omit-frame-pointer"
234234
)
235235
else()

bodo/pandas/physical/expression.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -857,7 +857,8 @@ void PhysicalExpression::join_expr_batch(
857857
int64_t right_index_start, int64_t right_index_end) {
858858
for (int64_t j = right_index_start; j < right_index_end; j++) {
859859
for (int64_t i = left_index_start; i < left_index_end; i++) {
860-
SetBitTo(match_arr, i + j,
860+
SetBitTo(match_arr,
861+
(i - left_index_start) + (j - right_index_start),
861862
join_expr(left_table, right_table, left_data, right_data,
862863
left_null_bitmap, right_null_bitmap, i, j));
863864
}

bodo/pandas/vendor/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ class RemoveUnusedColumnsPass {
8484
// same CTE index by storing the addresses of the CTERef nodes
8585
// that we've seen for each CTE index thus far.
8686
unordered_map<idx_t, unordered_set<void *>> cte_ref_check;
87+
vector<unique_ptr<BoundColumnRefExpression>> temp_column_refs;
8788
};
8889
// Bodo Change End
8990

bodo/pandas/vendor/duckdb/src/optimizer/remove_unused_columns.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -548,8 +548,8 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
548548
ColumnBinding cte_binding(cte_id, i);
549549
if (column_references.find(cte_binding) != column_references.end()) {
550550
ColumnBinding child_binding(use_cte_root_table_index, i);
551-
BoundColumnRefExpression tempcolref(use_cte_op.types[i], child_binding);
552-
AddBinding(tempcolref);
551+
pass.temp_column_refs.push_back(make_uniq<BoundColumnRefExpression>(use_cte_op.types[i], child_binding));
552+
AddBinding(*(pass.temp_column_refs.back()));
553553
++count_col_found;
554554
}
555555
}
@@ -586,8 +586,8 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
586586
binding = ColumnBinding(aggr.aggregate_index, agg_col);
587587
}
588588

589-
BoundColumnRefExpression tempcolref(cte_op.types[col_idx], binding);
590-
cte_side.AddBinding(tempcolref);
589+
pass.temp_column_refs.push_back(make_uniq<BoundColumnRefExpression>(cte_op.types[col_idx], binding));
590+
cte_side.AddBinding(*(pass.temp_column_refs.back()));
591591
}
592592
} else {
593593
vector<idx_t> cte_root_table_indices = cte_op.GetTableIndex();
@@ -600,8 +600,8 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) {
600600
// For each column needed by a CTERef.
601601
for (auto col_idx : cte_it->second) {
602602
ColumnBinding binding(cte_root_table_index, col_idx);
603-
BoundColumnRefExpression tempcolref(cte_op.types[col_idx], binding);
604-
cte_side.AddBinding(tempcolref);
603+
pass.temp_column_refs.push_back(make_uniq<BoundColumnRefExpression>(cte_op.types[col_idx], binding));
604+
cte_side.AddBinding(*(pass.temp_column_refs.back()));
605605
}
606606
}
607607
// Now process the duplicated (CTE) side of the CTE.

0 commit comments

Comments
 (0)