@@ -774,59 +774,46 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
774774 checkAnswer(df5,
775775 Seq (Row (1 , " cathy" , 9000.00 , 1200.0 , false ), Row (1 , " amy" , 10000.00 , 1000.0 , true )))
776776
777+ val name = udf { (x : String ) => x.matches(" cat|dav|amy" ) }
778+ val sub = udf { (x : String ) => x.substring(0 , 3 ) }
777779 val df6 = spark.read
778780 .table(" h2.test.employee" )
779- .groupBy(" DEPT" ).sum(" SALARY" )
780- .orderBy(" DEPT" )
781+ .select($" SALARY" , $" BONUS" , sub($" NAME" ).as(" shortName" ))
782+ .filter(name($" shortName" ))
783+ .sort($" SALARY" .desc)
781784 .limit(1 )
785+ // LIMIT is pushed down only if all the filters are pushed down
782786 checkSortRemoved(df6, false )
783787 checkLimitRemoved(df6, false )
784- checkPushedInfo(df6,
785- " PushedAggregates: [SUM(SALARY)]" ,
786- " PushedFilters: []" ,
787- " PushedGroupByExpressions: [DEPT]" )
788- checkAnswer(df6, Seq (Row (1 , 19000.00 )))
788+ checkPushedInfo(df6, " PushedFilters: []" )
789+ checkAnswer(df6, Seq (Row (10000.00 , 1000.0 , " amy" )))
789790
790- val name = udf { (x : String ) => x.matches(" cat|dav|amy" ) }
791- val sub = udf { (x : String ) => x.substring(0 , 3 ) }
792791 val df7 = spark.read
793792 .table(" h2.test.employee" )
794- .select($" SALARY" , $" BONUS" , sub($" NAME" ).as(" shortName" ))
795- .filter(name($" shortName" ))
796- .sort($" SALARY" .desc)
793+ .sort(sub($" NAME" ))
797794 .limit(1 )
798- // LIMIT is pushed down only if all the filters are pushed down
799795 checkSortRemoved(df7, false )
800796 checkLimitRemoved(df7, false )
801797 checkPushedInfo(df7, " PushedFilters: []" )
802- checkAnswer(df7, Seq (Row (10000 .00 , 1000 .0 , " amy " )))
798+ checkAnswer(df7, Seq (Row (2 , " alex " , 12000 .00 , 1200 .0 , false )))
803799
804800 val df8 = spark.read
805- .table(" h2.test.employee" )
806- .sort(sub($" NAME" ))
807- .limit(1 )
808- checkSortRemoved(df8, false )
809- checkLimitRemoved(df8, false )
810- checkPushedInfo(df8, " PushedFilters: []" )
811- checkAnswer(df8, Seq (Row (2 , " alex" , 12000.00 , 1200.0 , false )))
812-
813- val df9 = spark.read
814801 .table(" h2.test.employee" )
815802 .select($" DEPT" , $" name" , $" SALARY" ,
816803 when(($" SALARY" > 8000 ).and($" SALARY" < 10000 ), $" salary" ).otherwise(0 ).as(" key" ))
817804 .sort(" key" , " dept" , " SALARY" )
818805 .limit(3 )
819- checkSortRemoved(df9 )
820- checkLimitRemoved(df9 )
821- checkPushedInfo(df9 ,
806+ checkSortRemoved(df8 )
807+ checkLimitRemoved(df8 )
808+ checkPushedInfo(df8 ,
822809 " PushedFilters: []" ,
823- " PushedTopN: " +
824- " ORDER BY [CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00) THEN SALARY ELSE 0.00 END " +
825- " ASC NULLS FIRST, DEPT ASC NULLS FIRST, SALARY ASC NULLS FIRST] LIMIT 3, " )
826- checkAnswer(df9 ,
810+ " PushedTopN: ORDER BY " +
811+ " [CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00) THEN SALARY ELSE 0.00 END" +
812+ " ASC NULLS FIRST, DEPT ASC NULLS FIRST, SALARY ASC NULLS FIRST] LIMIT 3" )
813+ checkAnswer(df8 ,
827814 Seq (Row (1 , " amy" , 10000 , 0 ), Row (2 , " david" , 10000 , 0 ), Row (2 , " alex" , 12000 , 0 )))
828815
829- val df10 = spark.read
816+ val df9 = spark.read
830817 .option(" partitionColumn" , " dept" )
831818 .option(" lowerBound" , " 0" )
832819 .option(" upperBound" , " 2" )
@@ -836,14 +823,14 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
836823 when(($" SALARY" > 8000 ).and($" SALARY" < 10000 ), $" salary" ).otherwise(0 ).as(" key" ))
837824 .orderBy($" key" , $" dept" , $" SALARY" )
838825 .limit(3 )
839- checkSortRemoved(df10 , false )
840- checkLimitRemoved(df10 , false )
841- checkPushedInfo(df10 ,
826+ checkSortRemoved(df9 , false )
827+ checkLimitRemoved(df9 , false )
828+ checkPushedInfo(df9 ,
842829 " PushedFilters: []" ,
843- " PushedTopN: " +
844- " ORDER BY [CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00) THEN SALARY ELSE 0.00 END " +
845- " ASC NULLS FIRST, DEPT ASC NULLS FIRST, SALARY ASC NULLS FIRST] LIMIT 3, " )
846- checkAnswer(df10 ,
830+ " PushedTopN: ORDER BY " +
831+ " [CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00) THEN SALARY ELSE 0.00 END " +
832+ " ASC NULLS FIRST, DEPT ASC NULLS FIRST, SALARY ASC NULLS FIRST] LIMIT 3" )
833+ checkAnswer(df9 ,
847834 Seq (Row (1 , " amy" , 10000 , 0 ), Row (2 , " david" , 10000 , 0 ), Row (2 , " alex" , 12000 , 0 )))
848835 }
849836
@@ -872,6 +859,196 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
872859 checkAnswer(df2, Seq (Row (2 , " david" , 10000.00 )))
873860 }
874861
862+ test(" scan with aggregate push-down, top N push-down and offset push-down" ) {
863+ val df1 = spark.read
864+ .table(" h2.test.employee" )
865+ .groupBy(" DEPT" ).sum(" SALARY" )
866+ .orderBy(" DEPT" )
867+
868+ val paging1 = df1.offset(1 ).limit(1 )
869+ checkSortRemoved(paging1)
870+ checkLimitRemoved(paging1)
871+ checkPushedInfo(paging1,
872+ " PushedAggregates: [SUM(SALARY)]" ,
873+ " PushedGroupByExpressions: [DEPT]" ,
874+ " PushedFilters: []" ,
875+ " PushedOffset: OFFSET 1" ,
876+ " PushedTopN: ORDER BY [DEPT ASC NULLS FIRST] LIMIT 2" )
877+ checkAnswer(paging1, Seq (Row (2 , 22000.00 )))
878+
879+ val topN1 = df1.limit(1 )
880+ checkSortRemoved(topN1)
881+ checkLimitRemoved(topN1)
882+ checkPushedInfo(topN1,
883+ " PushedAggregates: [SUM(SALARY)]" ,
884+ " PushedGroupByExpressions: [DEPT]" ,
885+ " PushedFilters: []" ,
886+ " PushedTopN: ORDER BY [DEPT ASC NULLS FIRST] LIMIT 1" )
887+ checkAnswer(topN1, Seq (Row (1 , 19000.00 )))
888+
889+ val df2 = spark.read
890+ .table(" h2.test.employee" )
891+ .select($" DEPT" .cast(" string" ).as(" my_dept" ), $" SALARY" )
892+ .groupBy(" my_dept" ).sum(" SALARY" )
893+ .orderBy(" my_dept" )
894+
895+ val paging2 = df2.offset(1 ).limit(1 )
896+ checkSortRemoved(paging2)
897+ checkLimitRemoved(paging2)
898+ checkPushedInfo(paging2,
899+ " PushedAggregates: [SUM(SALARY)]" ,
900+ " PushedGroupByExpressions: [CAST(DEPT AS string)]" ,
901+ " PushedFilters: []" ,
902+ " PushedOffset: OFFSET 1" ,
903+ " PushedTopN: ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST] LIMIT 2" )
904+ checkAnswer(paging2, Seq (Row (" 2" , 22000.00 )))
905+
906+ val topN2 = df2.limit(1 )
907+ checkSortRemoved(topN2)
908+ checkLimitRemoved(topN2)
909+ checkPushedInfo(topN2,
910+ " PushedAggregates: [SUM(SALARY)]" ,
911+ " PushedGroupByExpressions: [CAST(DEPT AS string)]" ,
912+ " PushedFilters: []" ,
913+ " PushedTopN: ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST] LIMIT 1" )
914+ checkAnswer(topN2, Seq (Row (" 1" , 19000.00 )))
915+
916+ val df3 = spark.read
917+ .table(" h2.test.employee" )
918+ .groupBy(" dept" ).sum(" SALARY" )
919+ .orderBy($" dept" .cast(" string" ))
920+
921+ val paging3 = df3.offset(1 ).limit(1 )
922+ checkSortRemoved(paging3)
923+ checkLimitRemoved(paging3)
924+ checkPushedInfo(paging3,
925+ " PushedAggregates: [SUM(SALARY)]" ,
926+ " PushedGroupByExpressions: [DEPT]" ,
927+ " PushedFilters: []" ,
928+ " PushedOffset: OFFSET 1" ,
929+ " PushedTopN: ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST] LIMIT 2" )
930+ checkAnswer(paging3, Seq (Row (2 , 22000.00 )))
931+
932+ val topN3 = df3.limit(1 )
933+ checkSortRemoved(topN3)
934+ checkLimitRemoved(topN3)
935+ checkPushedInfo(topN3,
936+ " PushedAggregates: [SUM(SALARY)]" ,
937+ " PushedGroupByExpressions: [DEPT]" ,
938+ " PushedFilters: []" ,
939+ " PushedTopN: ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST] LIMIT 1" )
940+ checkAnswer(topN3, Seq (Row (1 , 19000.00 )))
941+
942+ val df4 = spark.read
943+ .table(" h2.test.employee" )
944+ .groupBy(" DEPT" , " IS_MANAGER" ).sum(" SALARY" )
945+ .orderBy(" DEPT" , " IS_MANAGER" )
946+
947+ val paging4 = df4.offset(1 ).limit(1 )
948+ checkSortRemoved(paging4)
949+ checkLimitRemoved(paging4)
950+ checkPushedInfo(paging4,
951+ " PushedAggregates: [SUM(SALARY)]" ,
952+ " PushedGroupByExpressions: [DEPT, IS_MANAGER]" ,
953+ " PushedFilters: []" ,
954+ " PushedOffset: OFFSET 1" ,
955+ " PushedTopN: ORDER BY [DEPT ASC NULLS FIRST, IS_MANAGER ASC NULLS FIRST] LIMIT 2" )
956+ checkAnswer(paging4, Seq (Row (1 , true , 10000.00 )))
957+
958+ val topN4 = df4.limit(1 )
959+ checkSortRemoved(topN4)
960+ checkLimitRemoved(topN4)
961+ checkPushedInfo(topN4,
962+ " PushedAggregates: [SUM(SALARY)]" ,
963+ " PushedGroupByExpressions: [DEPT, IS_MANAGER]" ,
964+ " PushedFilters: []" ,
965+ " PushedTopN: ORDER BY [DEPT ASC NULLS FIRST, IS_MANAGER ASC NULLS FIRST] LIMIT 1" )
966+ checkAnswer(topN4, Seq (Row (1 , false , 9000.00 )))
967+
968+ val df5 = spark.read
969+ .table(" h2.test.employee" )
970+ .select($" SALARY" , $" IS_MANAGER" , $" DEPT" .cast(" string" ).as(" my_dept" ))
971+ .groupBy(" my_dept" , " IS_MANAGER" ).sum(" SALARY" )
972+ .orderBy(" my_dept" , " IS_MANAGER" )
973+
974+ val paging5 = df5.offset(1 ).limit(1 )
975+ checkSortRemoved(paging5)
976+ checkLimitRemoved(paging5)
977+ checkPushedInfo(paging5,
978+ " PushedAggregates: [SUM(SALARY)]" ,
979+ " PushedGroupByExpressions: [CAST(DEPT AS string), IS_MANAGER]" ,
980+ " PushedFilters: []" ,
981+ " PushedOffset: OFFSET 1" ,
982+ " PushedTopN: " +
983+ " ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST, IS_MANAGER ASC NULLS FIRST] LIMIT 2" )
984+ checkAnswer(paging5, Seq (Row (" 1" , true , 10000.00 )))
985+
986+ val topN5 = df5.limit(1 )
987+ checkSortRemoved(topN5)
988+ checkLimitRemoved(topN5)
989+ checkPushedInfo(topN5,
990+ " PushedAggregates: [SUM(SALARY)]" ,
991+ " PushedGroupByExpressions: [CAST(DEPT AS string), IS_MANAGER]" ,
992+ " PushedFilters: []" ,
993+ " PushedTopN: " +
994+ " ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST, IS_MANAGER ASC NULLS FIRST] LIMIT 1" )
995+ checkAnswer(topN5, Seq (Row (" 1" , false , 9000.00 )))
996+
997+ val df6 = spark.read
998+ .table(" h2.test.employee" )
999+ .select($" DEPT" , $" SALARY" )
1000+ .groupBy(" dept" ).agg(sum(" SALARY" ))
1001+ .orderBy(sum(" SALARY" ))
1002+
1003+ val paging6 = df6.offset(1 ).limit(1 )
1004+ checkSortRemoved(paging6)
1005+ checkLimitRemoved(paging6)
1006+ checkPushedInfo(paging6,
1007+ " PushedAggregates: [SUM(SALARY)]" ,
1008+ " PushedGroupByExpressions: [DEPT]" ,
1009+ " PushedFilters: []" ,
1010+ " PushedOffset: OFFSET 1" ,
1011+ " PushedTopN: ORDER BY [SUM(SALARY) ASC NULLS FIRST] LIMIT 2" )
1012+ checkAnswer(paging6, Seq (Row (1 , 19000.00 )))
1013+
1014+ val topN6 = df6.limit(1 )
1015+ checkSortRemoved(topN6)
1016+ checkLimitRemoved(topN6)
1017+ checkPushedInfo(topN6,
1018+ " PushedAggregates: [SUM(SALARY)]" ,
1019+ " PushedGroupByExpressions: [DEPT]" ,
1020+ " PushedFilters: []" ,
1021+ " PushedTopN: ORDER BY [SUM(SALARY) ASC NULLS FIRST] LIMIT 1" )
1022+ checkAnswer(topN6, Seq (Row (6 , 12000.00 )))
1023+
1024+ val df7 = spark.read
1025+ .table(" h2.test.employee" )
1026+ .select($" DEPT" , $" SALARY" )
1027+ .groupBy(" dept" ).agg(sum(" SALARY" ).as(" total" ))
1028+ .orderBy(" total" )
1029+
1030+ val paging7 = df7.offset(1 ).limit(1 )
1031+ checkSortRemoved(paging7)
1032+ checkLimitRemoved(paging7)
1033+ checkPushedInfo(paging7,
1034+ " PushedAggregates: [SUM(SALARY)]" ,
1035+ " PushedGroupByExpressions: [DEPT]" ,
1036+ " PushedFilters: []" ,
1037+ " PushedOffset: OFFSET 1" ,
1038+ " PushedTopN: ORDER BY [SUM(SALARY) ASC NULLS FIRST] LIMIT 2" )
1039+ checkAnswer(paging7, Seq (Row (1 , 19000.00 )))
1040+
1041+ val topN7 = df7.limit(1 )
1042+ checkSortRemoved(topN7)
1043+ checkLimitRemoved(topN7)
1044+ checkPushedInfo(topN7,
1045+ " PushedAggregates: [SUM(SALARY)]" ,
1046+ " PushedGroupByExpressions: [DEPT]" ,
1047+ " PushedFilters: []" ,
1048+ " PushedTopN: ORDER BY [SUM(SALARY) ASC NULLS FIRST] LIMIT 1" )
1049+ checkAnswer(topN7, Seq (Row (6 , 12000.00 )))
1050+ }
1051+
8751052 test(" scan with filter push-down" ) {
8761053 val df = spark.table(" h2.test.people" ).filter($" id" > 1 )
8771054 checkFiltersRemoved(df)
0 commit comments