Skip to content

Commit 7b60e4a

Browse files
authored
More efficient window implementation (#217)
* Add more optimization rules, for example one that splits OVER from other projects * Implemented a more optimized window handler plugin * Huge refactoring to integrate multiple window aggregations into one pass - needs refinement still * Make sure to keep output names even after optimization * Refine the new implemenation with docu and type annotations * Make sure the mapped function is pickleable without dask
1 parent 5f7ebc1 commit 7b60e4a

8 files changed

Lines changed: 496 additions & 437 deletions

File tree

dask_sql/context.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def __init__(self):
8686
RelConverter.add_plugin_class(logical.LogicalTableScanPlugin, replace=False)
8787
RelConverter.add_plugin_class(logical.LogicalUnionPlugin, replace=False)
8888
RelConverter.add_plugin_class(logical.LogicalValuesPlugin, replace=False)
89+
RelConverter.add_plugin_class(logical.LogicalWindowPlugin, replace=False)
8990
RelConverter.add_plugin_class(logical.SamplePlugin, replace=False)
9091
RelConverter.add_plugin_class(custom.AnalyzeTablePlugin, replace=False)
9192
RelConverter.add_plugin_class(custom.CreateExperimentPlugin, replace=False)
@@ -108,7 +109,6 @@ def __init__(self):
108109
RexConverter.add_plugin_class(core.RexCallPlugin, replace=False)
109110
RexConverter.add_plugin_class(core.RexInputRefPlugin, replace=False)
110111
RexConverter.add_plugin_class(core.RexLiteralPlugin, replace=False)
111-
RexConverter.add_plugin_class(core.RexOverPlugin, replace=False)
112112

113113
InputUtil.add_plugin_class(input_utils.DaskInputPlugin, replace=False)
114114
InputUtil.add_plugin_class(input_utils.PandasInputPlugin, replace=False)
@@ -427,7 +427,7 @@ def sql(
427427
cc = dc.column_container
428428
cc = cc.rename(
429429
{
430-
df_col: df_col if not df_col.startswith("EXPR$") else select_name
430+
df_col: select_name
431431
for df_col, select_name in zip(cc.columns, select_names)
432432
}
433433
)
@@ -711,12 +711,18 @@ def _get_ral(self, sql):
711711
sqlNode = generator.getSqlNode(sql)
712712
sqlNodeClass = get_java_class(sqlNode)
713713

714-
if sqlNodeClass.startswith("com.dask.sql.parser."):
715-
rel = sqlNode
716-
rel_string = ""
717-
else:
714+
select_names = None
715+
rel = sqlNode
716+
rel_string = ""
717+
718+
if not sqlNodeClass.startswith("com.dask.sql.parser."):
718719
validatedSqlNode = generator.getValidatedNode(sqlNode)
719720
nonOptimizedRelNode = generator.getRelationalAlgebra(validatedSqlNode)
721+
# Optimization might remove some alias projects. Make sure to keep them here.
722+
select_names = [
723+
str(name)
724+
for name in nonOptimizedRelNode.getRowType().getFieldNames()
725+
]
720726
rel = generator.getOptimizedRelationalAlgebra(nonOptimizedRelNode)
721727
rel_string = str(generator.getRelationalAlgebraString(rel))
722728
except (ValidationException, SqlParseException) as e:
@@ -741,13 +747,14 @@ def _get_ral(self, sql):
741747
if sqlNodeClass == "org.apache.calcite.sql.SqlSelect":
742748
select_names = [
743749
self._to_sql_string(s, default_dialect=default_dialect)
744-
for s in sqlNode.getSelectList()
750+
if current_name.startswith("EXPR$")
751+
else current_name
752+
for s, current_name in zip(sqlNode.getSelectList(), select_names)
745753
]
746754
else:
747755
logger.debug(
748756
"Not extracting output column names as the SQL is not a SELECT call"
749757
)
750-
select_names = None
751758

752759
logger.debug(f"Extracted relational algebra:\n {rel_string}")
753760
return rel, select_names, rel_string

dask_sql/physical/rel/logical/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .table_scan import LogicalTableScanPlugin
88
from .union import LogicalUnionPlugin
99
from .values import LogicalValuesPlugin
10+
from .window import LogicalWindowPlugin
1011

1112
__all__ = [
1213
LogicalAggregatePlugin,
@@ -17,5 +18,6 @@
1718
LogicalTableScanPlugin,
1819
LogicalUnionPlugin,
1920
LogicalValuesPlugin,
21+
LogicalWindowPlugin,
2022
SamplePlugin,
2123
]

0 commit comments

Comments
 (0)