dask-contrib · charlesbluca · May 13, 2022 · May 5, 2022 · May 5, 2022 · May 5, 2022
diff --git a/dask_planner/src/expression.rs b/dask_planner/src/expression.rs
@@ -134,7 +134,7 @@ impl PyExpr {
             Expr::Case { .. } => panic!("Case!!!"),
             Expr::Cast { .. } => "Cast",
             Expr::TryCast { .. } => panic!("TryCast!!!"),
-            Expr::Sort { .. } => panic!("Sort!!!"),
+            Expr::Sort { .. } => "Sort",
             Expr::ScalarFunction { .. } => "ScalarFunction",
             Expr::AggregateFunction { .. } => "AggregateFunction",
             Expr::WindowFunction { .. } => panic!("WindowFunction!!!"),

diff --git a/dask_planner/src/sql/logical.rs b/dask_planner/src/sql/logical.rs
@@ -7,6 +7,7 @@ mod aggregate;
 mod filter;
 mod join;
 pub mod projection;
+mod sort;
 
 pub use datafusion_expr::LogicalPlan;
 
@@ -49,28 +50,54 @@ impl PyLogicalPlan {
 
 #[pymethods]
 impl PyLogicalPlan {
-    /// LogicalPlan::Projection as PyProjection
-    pub fn projection(&self) -> PyResult<projection::PyProjection> {
-        let proj: projection::PyProjection = self.current_node.clone().unwrap().into();
-        Ok(proj)
+    /// LogicalPlan::Aggregate as PyAggregate
+    pub fn aggregate(&self) -> PyResult<aggregate::PyAggregate> {
+        self.current_node
+            .as_ref()
+            .map(|plan| plan.clone().into())
+            .ok_or(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
+                "current_node was None",
+            ))
     }
 
     /// LogicalPlan::Filter as PyFilter
     pub fn filter(&self) -> PyResult<filter::PyFilter> {
-        let filter: filter::PyFilter = self.current_node.clone().unwrap().into();
-        Ok(filter)
+        self.current_node
+            .as_ref()
+            .map(|plan| plan.clone().into())
+            .ok_or(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
+                "current_node was None",
+            ))
     }
 
     /// LogicalPlan::Join as PyJoin
     pub fn join(&self) -> PyResult<join::PyJoin> {
-        let join: join::PyJoin = self.current_node.clone().unwrap().into();
-        Ok(join)
+        self.current_node
+            .as_ref()
+            .map(|plan| plan.clone().into())
+            .ok_or(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
+                "current_node was None",
+            ))
     }
 
-    /// LogicalPlan::Aggregate as PyAggregate
-    pub fn aggregate(&self) -> PyResult<aggregate::PyAggregate> {
-        let agg: aggregate::PyAggregate = self.current_node.clone().unwrap().into();
-        Ok(agg)
+    /// LogicalPlan::Projection as PyProjection
+    pub fn projection(&self) -> PyResult<projection::PyProjection> {
+        self.current_node
+            .as_ref()
+            .map(|plan| plan.clone().into())
+            .ok_or(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
+                "current_node was None",
+            ))
+    }
+
+    /// LogicalPlan::Sort as PySort
+    pub fn sort(&self) -> PyResult<sort::PySort> {
+        self.current_node
+            .as_ref()
+            .map(|plan| plan.clone().into())
+            .ok_or(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
+                "current_node was None",
+            ))
     }
 
     /// Gets the "input" for the current LogicalPlan

diff --git a/dask_planner/src/sql/logical/sort.rs b/dask_planner/src/sql/logical/sort.rs
@@ -0,0 +1,78 @@
+use crate::expression::PyExpr;
+
+use datafusion_expr::logical_plan::Sort;
+pub use datafusion_expr::{logical_plan::LogicalPlan, Expr};
+
+use crate::sql::exceptions::py_type_err;
+use pyo3::prelude::*;
+
+#[pyclass(name = "Sort", module = "dask_planner", subclass)]
+#[derive(Clone)]
+pub struct PySort {
+    sort: Sort,
+}
+
+impl PySort {
+    /// Returns if a sort expressions denotes an ascending sort
+    fn is_ascending(&self, expr: Expr) -> bool {
+        match expr {
+            Expr::Sort {
+                expr: _,
+                asc,
+                nulls_first: _,
+            } => asc,
+            _ => panic!("Provided expression is not a sort epxression"),
+        }
+    }
+    /// Returns if nulls should be placed first in a sort expression
+    fn is_nulls_first(&self, expr: Expr) -> bool {
+        match &expr {
+            Expr::Sort {
+                expr: _,
+                asc: _,
+                nulls_first,
+            } => nulls_first.clone(),
+            _ => panic!("Provided expression is not a sort epxression"),
+        }
+    }
+}
+#[pymethods]
+impl PySort {
+    /// Returns a Vec of the sort expressions
+    #[pyo3(name = "getCollation")]
+    pub fn sort_expressions(&self) -> PyResult<Vec<PyExpr>> {
+        let mut sort_exprs: Vec<PyExpr> = Vec::new();
+        for expr in &self.sort.expr {
+            sort_exprs.push(PyExpr::from(expr.clone(), Some(self.sort.input.clone())));
+        }
+        Ok(sort_exprs)
+    }
+
+    #[pyo3(name = "getAscending")]
+    pub fn get_ascending(&self) -> PyResult<Vec<bool>> {
+        let mut is_ascending: Vec<bool> = Vec::new();
+        for sortexpr in &self.sort.expr {
+            is_ascending.push(self.is_ascending(sortexpr.clone()))
+        }
+        Ok(is_ascending)
+    }
+    #[pyo3(name = "getNullsFirst")]
+    pub fn get_nulls_first(&self) -> PyResult<Vec<bool>> {
+        let nulls_first: Vec<bool> = self
+            .sort
+            .expr
+            .iter()
+            .map(|sortexpr| self.is_nulls_first(sortexpr.clone()))
+            .collect::<Vec<bool>>();
+        Ok(nulls_first)
+    }
+}
+
+impl From<LogicalPlan> for PySort {
+    fn from(logical_plan: LogicalPlan) -> PySort {
+        match logical_plan {
+            LogicalPlan::Sort(srt) => PySort { sort: srt },
+            _ => panic!("something went wrong here"),
+        }
+    }
+}
diff --git a/dask_sql/physical/rel/logical/sort.py b/dask_sql/physical/rel/logical/sort.py
@@ -2,8 +2,7 @@
 
 from dask_sql.datacontainer import DataContainer
 from dask_sql.physical.rel.base import BaseRelPlugin
-
-# from dask_sql.physical.utils.sort import apply_sort
+from dask_sql.physical.utils.sort import apply_sort
 
 if TYPE_CHECKING:
     import dask_sql
@@ -21,7 +20,10 @@ def convert(self, rel: "LogicalPlan", context: "dask_sql.Context") -> DataContai
         (dc,) = self.assert_inputs(rel, 1, context)
         df = dc.df
         cc = dc.column_container
-
+        sort_expressions = rel.sort().getCollation()
+        sort_columns = [expr.column_name(rel) for expr in sort_expressions]
+        sort_ascending = rel.sort().getAscending()
+        sort_null_first = rel.sort().getNullsFirst()
         # TODO: Commented out to pass flake8, will be fixed in sort PR
         # sort_collation = rel.getCollation().getFieldCollations()
         # sort_columns = [
@@ -35,7 +37,7 @@ def convert(self, rel: "LogicalPlan", context: "dask_sql.Context") -> DataContai
         # sort_null_first = [x.nullDirection == FIRST for x in sort_collation]
 
         df = df.persist()
-        # df = apply_sort(df, sort_columns, sort_ascending, sort_null_first)
+        df = apply_sort(df, sort_columns, sort_ascending, sort_null_first)
 
         cc = self.fix_column_to_row_type(cc, rel.getRowType())
         # No column type has changed, so no need to cast again

diff --git a/tests/integration/test_sort.py b/tests/integration/test_sort.py
@@ -6,7 +6,7 @@
 from tests.utils import assert_eq
 
 
-@pytest.mark.skip(reason="WIP DataFusion")
+# @pytest.mark.skip(reason="WIP DataFusion")
 @pytest.mark.parametrize(
     "input_table_1,input_df",
     [
@@ -90,7 +90,7 @@ def test_sort_by_alias(c, input_table_1, request):
     assert_eq(df_result, df_expected, check_index=False)
 
 
-@pytest.mark.skip(reason="WIP DataFusion")
+# @pytest.mark.skip(reason="WIP DataFusion")
 @pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
 def test_sort_with_nan(gpu):
     c = Context()
@@ -181,7 +181,7 @@ def test_sort_with_nan(gpu):
     )
 
 
-@pytest.mark.skip(reason="WIP DataFusion")
+# @pytest.mark.skip(reason="WIP DataFusion")
 @pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
 def test_sort_with_nan_more_columns(gpu):
     c = Context()
@@ -240,7 +240,7 @@ def test_sort_with_nan_more_columns(gpu):
     )
 
 
-@pytest.mark.skip(reason="WIP DataFusion")
+# @pytest.mark.skip(reason="WIP DataFusion")
 @pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
 def test_sort_with_nan_many_partitions(gpu):
     c = Context()
@@ -281,7 +281,7 @@ def test_sort_with_nan_many_partitions(gpu):
     )
 
 
-@pytest.mark.skip(reason="WIP DataFusion")
+# @pytest.mark.skip(reason="WIP DataFusion")
 @pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
 def test_sort_strings(c, gpu):
     string_table = pd.DataFrame({"a": ["zzhsd", "öfjdf", "baba"]})
@@ -301,7 +301,7 @@ def test_sort_strings(c, gpu):
     assert_eq(df_result, df_expected, check_index=False)
 
 
-@pytest.mark.skip(reason="WIP DataFusion")
+# @pytest.mark.skip(reason="WIP DataFusion")
 @pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
 def test_sort_not_allowed(c, gpu):
     table_name = "gpu_user_table_1" if gpu else "user_table_1"