diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index e1656a22b1a47..aac506d48ba92 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -1035,7 +1035,9 @@ impl DataFrame { } /// Return a reference to the unoptimized [`LogicalPlan`] that comprises - /// this DataFrame. See [`Self::into_unoptimized_plan`] for more details. + /// this DataFrame. + /// + /// See [`Self::into_unoptimized_plan`] for more details. pub fn logical_plan(&self) -> &LogicalPlan { &self.plan } @@ -1052,6 +1054,9 @@ impl DataFrame { /// snapshot of the [`SessionState`] attached to this [`DataFrame`] and /// consequently subsequent operations may take place against a different /// state (e.g. a different value of `now()`) + /// + /// See [`Self::into_parts`] to retrieve the owned [`LogicalPlan`] and + /// corresponding [`SessionState`]. pub fn into_unoptimized_plan(self) -> LogicalPlan { self.plan } diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 8483525d7f55a..ae3aaa55199e4 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -82,16 +82,12 @@ pub const UNNAMED_TABLE: &str = "?table?"; /// // SELECT last_name /// // FROM employees /// // WHERE salary < 1000 -/// let plan = table_scan( -/// Some("employee"), -/// &employee_schema(), -/// None, -/// )? -/// // Keep only rows where salary < 1000 -/// .filter(col("salary").lt_eq(lit(1000)))? -/// // only show "last_name" in the final results -/// .project(vec![col("last_name")])? -/// .build()?; +/// let plan = table_scan(Some("employee"), &employee_schema(), None)? +/// // Keep only rows where salary < 1000 +/// .filter(col("salary").lt(lit(1000)))? +/// // only show "last_name" in the final results +/// .project(vec![col("last_name")])? +/// .build()?; /// /// # Ok(()) /// # } diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index d2d133c6cbc34..7abe6b70b64e4 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -56,19 +56,139 @@ use crate::logical_plan::tree_node::unwrap_arc; pub use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan}; pub use datafusion_common::{JoinConstraint, JoinType}; -/// A LogicalPlan represents the different types of relational -/// operators (such as Projection, Filter, etc) and can be created by -/// the SQL query planner and the DataFrame API. +/// A `LogicalPlan` is a node in a tree of relational operators (such as +/// Projection or Filter). /// -/// A LogicalPlan represents transforming an input relation (table) to -/// an output relation (table) with a (potentially) different -/// schema. A plan represents a dataflow tree where data flows -/// from leaves up to the root to produce the query result. +/// Represents transforming an input relation (table) to an output relation +/// (table) with a potentially different schema. Plans form a dataflow tree +/// where data flows from leaves up to the root to produce the query result. +/// +/// `LogicalPlan`s can be created by the SQL query planner, the DataFrame API, +/// or programmatically (for example custom query languages). /// /// # See also: -/// * [`tree_node`]: To inspect and rewrite `LogicalPlan` trees +/// * [`Expr`]: For the expressions that are evaluated by the plan +/// * [`LogicalPlanBuilder`]: For building `LogicalPlan`s +/// * [`tree_node`]: To inspect and rewrite `LogicalPlan`s /// /// [`tree_node`]: crate::logical_plan::tree_node +/// +/// # Examples +/// +/// ## Creating a LogicalPlan from SQL: +/// +/// See [`SessionContext::sql`](https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql) +/// +/// ## Creating a LogicalPlan from the DataFrame API: +/// +/// See [`DataFrame::logical_plan`](https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.logical_plan) +/// +/// ## Creating a LogicalPlan programmatically: +/// +/// See [`LogicalPlanBuilder`] +/// +/// # Visiting and Rewriting `LogicalPlan`s +/// +/// Using the [`tree_node`] API, you can recursively walk all nodes in a +/// `LogicalPlan`. For example, to find all column references in a plan: +/// +/// ``` +/// # use std::collections::HashSet; +/// # use arrow::datatypes::{DataType, Field, Schema}; +/// # use datafusion_expr::{Expr, col, lit, LogicalPlan, LogicalPlanBuilder, table_scan}; +/// # use datafusion_common::tree_node::{TreeNodeRecursion, TreeNode}; +/// # use datafusion_common::{Column, Result}; +/// # fn employee_schema() -> Schema { +/// # Schema::new(vec![ +/// # Field::new("name", DataType::Utf8, false), +/// # Field::new("salary", DataType::Int32, false), +/// # ]) +/// # } +/// // Projection(name, salary) +/// // Filter(salary > 1000) +/// // TableScan(employee) +/// # fn main() -> Result<()> { +/// let plan = table_scan(Some("employee"), &employee_schema(), None)? +/// .filter(col("salary").gt(lit(1000)))? +/// .project(vec![col("name")])? +/// .build()?; +/// +/// // use apply to walk the plan and collect all expressions +/// let mut expressions = HashSet::new(); +/// plan.apply(|node| { +/// // collect all expressions in the plan +/// node.apply_expressions(|expr| { +/// expressions.insert(expr.clone()); +/// Ok(TreeNodeRecursion::Continue) // control walk of expressions +/// })?; +/// Ok(TreeNodeRecursion::Continue) // control walk of plan nodes +/// }).unwrap(); +/// +/// // we found the expression in projection and filter +/// assert_eq!(expressions.len(), 2); +/// println!("Found expressions: {:?}", expressions); +/// // found predicate in the Filter: employee.salary > 1000 +/// let salary = Expr::Column(Column::new(Some("employee"), "salary")); +/// assert!(expressions.contains(&salary.gt(lit(1000)))); +/// // found projection in the Projection: employee.name +/// let name = Expr::Column(Column::new(Some("employee"), "name")); +/// assert!(expressions.contains(&name)); +/// # Ok(()) +/// # } +/// ``` +/// +/// You can also rewrite plans using the [`tree_node`] API. For example, to +/// replace the filter predicate in a plan: +/// +/// ``` +/// # use std::collections::HashSet; +/// # use arrow::datatypes::{DataType, Field, Schema}; +/// # use datafusion_expr::{Expr, col, lit, LogicalPlan, LogicalPlanBuilder, table_scan}; +/// # use datafusion_common::tree_node::{TreeNodeRecursion, TreeNode}; +/// # use datafusion_common::{Column, Result}; +/// # fn employee_schema() -> Schema { +/// # Schema::new(vec![ +/// # Field::new("name", DataType::Utf8, false), +/// # Field::new("salary", DataType::Int32, false), +/// # ]) +/// # } +/// // Projection(name, salary) +/// // Filter(salary > 1000) +/// // TableScan(employee) +/// # fn main() -> Result<()> { +/// use datafusion_common::tree_node::Transformed; +/// let plan = table_scan(Some("employee"), &employee_schema(), None)? +/// .filter(col("salary").gt(lit(1000)))? +/// .project(vec![col("name")])? +/// .build()?; +/// +/// // use transform to rewrite the plan +/// let transformed_result = plan.transform(|node| { +/// // when we see the filter node +/// if let LogicalPlan::Filter(mut filter) = node { +/// // replace predicate with salary < 2000 +/// filter.predicate = Expr::Column(Column::new(Some("employee"), "salary")).lt(lit(2000)); +/// let new_plan = LogicalPlan::Filter(filter); +/// return Ok(Transformed::yes(new_plan)); // communicate the node was changed +/// } +/// // return the node unchanged +/// Ok(Transformed::no(node)) +/// }).unwrap(); +/// +/// // Transformed result contains rewritten plan and information about +/// // whether the plan was changed +/// assert!(transformed_result.transformed); +/// let rewritten_plan = transformed_result.data; +/// +/// // we found the filter +/// assert_eq!(rewritten_plan.display_indent().to_string(), +/// "Projection: employee.name\ +/// \n Filter: employee.salary < Int32(2000)\ +/// \n TableScan: employee"); +/// # Ok(()) +/// # } +/// ``` +/// #[derive(Clone, PartialEq, Eq, Hash)] pub enum LogicalPlan { /// Evaluates an arbitrary list of expressions (essentially a