-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Minor: Simplify + document EliminateCrossJoin better
#10427
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -885,7 +885,7 @@ pub fn can_hash(data_type: &DataType) -> bool { | |
| /// Check whether all columns are from the schema. | ||
| pub fn check_all_columns_from_schema( | ||
| columns: &HashSet<Column>, | ||
| schema: DFSchemaRef, | ||
| schema: &DFSchema, | ||
| ) -> Result<bool> { | ||
| for col in columns.iter() { | ||
| let exist = schema.is_column_from_schema(col); | ||
|
|
@@ -909,8 +909,8 @@ pub fn check_all_columns_from_schema( | |
| pub fn find_valid_equijoin_key_pair( | ||
| left_key: &Expr, | ||
| right_key: &Expr, | ||
| left_schema: DFSchemaRef, | ||
| right_schema: DFSchemaRef, | ||
| left_schema: &DFSchema, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thats interesting, should we a reference instead of Arc whenever possible?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think in general we should use a reference when the underlying callsite doesn't actually need a owned copy So in this case, the callsite just needs a I think the runtime overhead of creating another Arc is likely to be unmeasurable in all but the most extreme circumstances. However I think the code is often easier to reason about |
||
| right_schema: &DFSchema, | ||
| ) -> Result<Option<(Expr, Expr)>> { | ||
| let left_using_columns = left_key.to_columns()?; | ||
| let right_using_columns = right_key.to_columns()?; | ||
|
|
@@ -920,8 +920,8 @@ pub fn find_valid_equijoin_key_pair( | |
| return Ok(None); | ||
| } | ||
|
|
||
| if check_all_columns_from_schema(&left_using_columns, left_schema.clone())? | ||
| && check_all_columns_from_schema(&right_using_columns, right_schema.clone())? | ||
| if check_all_columns_from_schema(&left_using_columns, left_schema)? | ||
| && check_all_columns_from_schema(&right_using_columns, right_schema)? | ||
| { | ||
| return Ok(Some((left_key.clone(), right_key.clone()))); | ||
| } else if check_all_columns_from_schema(&right_using_columns, left_schema)? | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -107,7 +107,7 @@ impl OptimizerRule for EliminateCrossJoin { | |
| left = find_inner_join( | ||
| &left, | ||
| &mut all_inputs, | ||
| &mut possible_join_keys, | ||
| &possible_join_keys, | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| &mut all_join_keys, | ||
| )?; | ||
| } | ||
|
|
@@ -144,7 +144,9 @@ impl OptimizerRule for EliminateCrossJoin { | |
| } | ||
| } | ||
|
|
||
| /// Recursively accumulate possible_join_keys and inputs from inner joins (including cross joins). | ||
| /// Recursively accumulate possible_join_keys and inputs from inner joins | ||
| /// (including cross joins). | ||
| /// | ||
| /// Returns a boolean indicating whether the flattening was successful. | ||
| fn try_flatten_join_inputs( | ||
| plan: &LogicalPlan, | ||
|
|
@@ -159,22 +161,19 @@ fn try_flatten_join_inputs( | |
| return Ok(false); | ||
| } | ||
| possible_join_keys.extend(join.on.clone()); | ||
| let left = &*(join.left); | ||
| let right = &*(join.right); | ||
| vec![left, right] | ||
| vec![&join.left, &join.right] | ||
| } | ||
| LogicalPlan::CrossJoin(join) => { | ||
| let left = &*(join.left); | ||
| let right = &*(join.right); | ||
| vec![left, right] | ||
| vec![&join.left, &join.right] | ||
| } | ||
| _ => { | ||
| return plan_err!("flatten_join_inputs just can call join/cross_join"); | ||
| } | ||
| }; | ||
|
|
||
| for child in children.iter() { | ||
| match *child { | ||
| let child = child.as_ref(); | ||
| match child { | ||
| LogicalPlan::Join(Join { | ||
| join_type: JoinType::Inner, | ||
| .. | ||
|
|
@@ -184,27 +183,39 @@ fn try_flatten_join_inputs( | |
| return Ok(false); | ||
| } | ||
| } | ||
| _ => all_inputs.push((*child).clone()), | ||
| _ => all_inputs.push(child.clone()), | ||
| } | ||
| } | ||
| Ok(true) | ||
| } | ||
|
|
||
| /// Finds the next to join with the left input plan, | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I spent some time studying this code so wanted to document what it is doing |
||
| /// | ||
| /// Finds the next `right` from `rights` that can be joined with `left_input` | ||
| /// plan based on the join keys in `possible_join_keys`. | ||
| /// | ||
| /// If such a matching `right` is found: | ||
| /// 1. Adds the matching join keys to `all_join_keys`. | ||
| /// 2. Returns `left_input JOIN right ON (all join keys)`. | ||
| /// | ||
| /// If no matching `right` is found: | ||
| /// 1. Removes the first plan from `rights` | ||
| /// 2. Returns `left_input CROSS JOIN right`. | ||
| fn find_inner_join( | ||
| left_input: &LogicalPlan, | ||
| rights: &mut Vec<LogicalPlan>, | ||
| possible_join_keys: &mut Vec<(Expr, Expr)>, | ||
| possible_join_keys: &[(Expr, Expr)], | ||
| all_join_keys: &mut HashSet<(Expr, Expr)>, | ||
| ) -> Result<LogicalPlan> { | ||
| for (i, right_input) in rights.iter().enumerate() { | ||
| let mut join_keys = vec![]; | ||
|
|
||
| for (l, r) in &mut *possible_join_keys { | ||
| for (l, r) in possible_join_keys.iter() { | ||
| let key_pair = find_valid_equijoin_key_pair( | ||
| l, | ||
| r, | ||
| left_input.schema().clone(), | ||
| right_input.schema().clone(), | ||
| left_input.schema(), | ||
| right_input.schema(), | ||
| )?; | ||
|
|
||
| // Save join keys | ||
|
|
@@ -215,6 +226,7 @@ fn find_inner_join( | |
| } | ||
| } | ||
|
|
||
| // Found one or more matching join keys | ||
| if !join_keys.is_empty() { | ||
| all_join_keys.extend(join_keys.clone()); | ||
| let right_input = rights.remove(i); | ||
|
|
@@ -236,6 +248,9 @@ fn find_inner_join( | |
| })); | ||
| } | ||
| } | ||
|
|
||
| // no matching right plan had any join keys, cross join with the first right | ||
| // plan | ||
| let right = rights.remove(0); | ||
| let join_schema = Arc::new(build_join_schema( | ||
| left_input.schema(), | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This takes the schema by reference rather than value now.
Since
DFSchemaRefis anArcI don't expect this to make any measurable performance improvement, but I think it makes it clearer what is going on and avoids a bunch ofclonesThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍