-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Add Aggregation fuzzer framework #12667
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6514cd2
1a11133
e0ea349
c952bdf
214d67f
04b4246
6b2af7f
77d2268
4bef192
e7fbf47
984f6aa
12e3f37
ca4a40c
a4639de
0cfd035
8271079
d6e358e
2279ab7
7deced4
c5d80ce
b50ea49
7a9118f
ea6ad89
3d9bc15
bf7fc82
2e35985
0090e6c
90cb038
c2dcb60
d90b92b
58c0777
d5ff6ec
4b18d53
fbf3a6e
79b0734
ca36a88
ea5e80b
7f08f2b
9b0005b
8040dc3
5c90a6b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,6 +44,307 @@ use rand::rngs::StdRng; | |
| use rand::{Rng, SeedableRng}; | ||
| use tokio::task::JoinSet; | ||
|
|
||
| use crate::fuzz_cases::aggregation_fuzzer::{ | ||
| AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, | ||
| }; | ||
|
|
||
| // ======================================================================== | ||
| // The new aggregation fuzz tests based on [`AggregationFuzzer`] | ||
| // ======================================================================== | ||
|
|
||
| // TODO: write more test case to cover more `group by`s and `aggregation function`s | ||
| // TODO: maybe we can use macro to simply the case creating | ||
|
|
||
| /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `no group by` | ||
| #[tokio::test(flavor = "multi_thread")] | ||
| async fn test_basic_prim_aggr_no_group() { | ||
| let builder = AggregationFuzzerBuilder::default(); | ||
|
|
||
| // Define data generator config | ||
| let columns = vec![ColumnDescr::new("a", DataType::Int32)]; | ||
|
|
||
| let data_gen_config = DatasetGeneratorConfig { | ||
| columns, | ||
| rows_num_range: (512, 1024), | ||
| sort_keys_set: Vec::new(), | ||
| }; | ||
|
|
||
| // Build fuzzer | ||
| let fuzzer = builder | ||
| .data_gen_config(data_gen_config) | ||
| .data_gen_rounds(16) | ||
| .add_sql("SELECT sum(a) FROM fuzz_table") | ||
| .add_sql("SELECT sum(distinct a) FROM fuzz_table") | ||
| .add_sql("SELECT max(a) FROM fuzz_table") | ||
| .add_sql("SELECT min(a) FROM fuzz_table") | ||
| .add_sql("SELECT count(a) FROM fuzz_table") | ||
| .add_sql("SELECT count(distinct a) FROM fuzz_table") | ||
| .add_sql("SELECT avg(a) FROM fuzz_table") | ||
| .table_name("fuzz_table") | ||
| .build(); | ||
|
|
||
| fuzzer.run().await; | ||
| } | ||
|
|
||
| /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by single int64` | ||
| #[tokio::test(flavor = "multi_thread")] | ||
| async fn test_basic_prim_aggr_group_by_single_int64() { | ||
| let builder = AggregationFuzzerBuilder::default(); | ||
|
|
||
| // Define data generator config | ||
| let columns = vec![ | ||
| ColumnDescr::new("a", DataType::Int32), | ||
| ColumnDescr::new("b", DataType::Int64), | ||
| ColumnDescr::new("c", DataType::Int64), | ||
| ]; | ||
| let sort_keys_set = vec![ | ||
| vec!["b".to_string()], | ||
| vec!["c".to_string(), "b".to_string()], | ||
| ]; | ||
| let data_gen_config = DatasetGeneratorConfig { | ||
| columns, | ||
| rows_num_range: (512, 1024), | ||
| sort_keys_set, | ||
| }; | ||
|
|
||
| // Build fuzzer | ||
| let fuzzer = builder | ||
| .data_gen_config(data_gen_config) | ||
| .data_gen_rounds(16) | ||
| .add_sql("SELECT b, sum(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, sum(distinct a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, max(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, min(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, count(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, count(distinct a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, avg(a) FROM fuzz_table GROUP BY b") | ||
| .table_name("fuzz_table") | ||
| .build(); | ||
|
|
||
| fuzzer.run().await; | ||
| } | ||
|
|
||
| /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by single string` | ||
| #[tokio::test(flavor = "multi_thread")] | ||
| async fn test_basic_prim_aggr_group_by_single_string() { | ||
| let builder = AggregationFuzzerBuilder::default(); | ||
|
|
||
| // Define data generator config | ||
| let columns = vec![ | ||
| ColumnDescr::new("a", DataType::Int32), | ||
| ColumnDescr::new("b", DataType::Utf8), | ||
| ColumnDescr::new("c", DataType::Int64), | ||
| ]; | ||
| let sort_keys_set = vec![ | ||
| vec!["b".to_string()], | ||
| vec!["c".to_string(), "b".to_string()], | ||
| ]; | ||
| let data_gen_config = DatasetGeneratorConfig { | ||
| columns, | ||
| rows_num_range: (512, 1024), | ||
| sort_keys_set, | ||
| }; | ||
|
|
||
| // Build fuzzer | ||
| let fuzzer = builder | ||
| .data_gen_config(data_gen_config) | ||
| .data_gen_rounds(16) | ||
| .add_sql("SELECT b, sum(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, sum(distinct a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, max(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, min(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, count(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, count(distinct a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, avg(a) FROM fuzz_table GROUP BY b") | ||
| .table_name("fuzz_table") | ||
| .build(); | ||
|
|
||
| fuzzer.run().await; | ||
| } | ||
|
|
||
| /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by string + int64` | ||
| #[tokio::test(flavor = "multi_thread")] | ||
| async fn test_basic_prim_aggr_group_by_mixed_string_int64() { | ||
| let builder = AggregationFuzzerBuilder::default(); | ||
|
|
||
| // Define data generator config | ||
| let columns = vec![ | ||
| ColumnDescr::new("a", DataType::Int32), | ||
| ColumnDescr::new("b", DataType::Utf8), | ||
| ColumnDescr::new("c", DataType::Int64), | ||
| ColumnDescr::new("d", DataType::Int32), | ||
| ]; | ||
| let sort_keys_set = vec![ | ||
| vec!["b".to_string(), "c".to_string()], | ||
| vec!["d".to_string(), "b".to_string(), "c".to_string()], | ||
| ]; | ||
| let data_gen_config = DatasetGeneratorConfig { | ||
| columns, | ||
| rows_num_range: (512, 1024), | ||
| sort_keys_set, | ||
| }; | ||
|
|
||
| // Build fuzzer | ||
| let fuzzer = builder | ||
| .data_gen_config(data_gen_config) | ||
| .data_gen_rounds(16) | ||
| .add_sql("SELECT b, c, sum(a) FROM fuzz_table GROUP BY b, c") | ||
| .add_sql("SELECT b, c, sum(distinct a) FROM fuzz_table GROUP BY b,c") | ||
| .add_sql("SELECT b, c, max(a) FROM fuzz_table GROUP BY b, c") | ||
| .add_sql("SELECT b, c, min(a) FROM fuzz_table GROUP BY b, c") | ||
| .add_sql("SELECT b, c, count(a) FROM fuzz_table GROUP BY b, c") | ||
| .add_sql("SELECT b, c, count(distinct a) FROM fuzz_table GROUP BY b, c") | ||
| .add_sql("SELECT b, c, avg(a) FROM fuzz_table GROUP BY b, c") | ||
| .table_name("fuzz_table") | ||
| .build(); | ||
|
|
||
| fuzzer.run().await; | ||
| } | ||
|
|
||
| /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `no group by` | ||
| #[tokio::test(flavor = "multi_thread")] | ||
| async fn test_basic_string_aggr_no_group() { | ||
| let builder = AggregationFuzzerBuilder::default(); | ||
|
|
||
| // Define data generator config | ||
| let columns = vec![ColumnDescr::new("a", DataType::Utf8)]; | ||
|
|
||
| let data_gen_config = DatasetGeneratorConfig { | ||
| columns, | ||
| rows_num_range: (512, 1024), | ||
| sort_keys_set: Vec::new(), | ||
| }; | ||
|
|
||
| // Build fuzzer | ||
| let fuzzer = builder | ||
| .data_gen_config(data_gen_config) | ||
| .data_gen_rounds(8) | ||
| .add_sql("SELECT max(a) FROM fuzz_table") | ||
| .add_sql("SELECT min(a) FROM fuzz_table") | ||
| .add_sql("SELECT count(a) FROM fuzz_table") | ||
| .add_sql("SELECT count(distinct a) FROM fuzz_table") | ||
| .table_name("fuzz_table") | ||
| .build(); | ||
|
|
||
| fuzzer.run().await; | ||
| } | ||
|
|
||
| /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by single int64` | ||
| #[tokio::test(flavor = "multi_thread")] | ||
| async fn test_basic_string_aggr_group_by_single_int64() { | ||
| let builder = AggregationFuzzerBuilder::default(); | ||
|
|
||
| // Define data generator config | ||
| let columns = vec![ | ||
| ColumnDescr::new("a", DataType::Utf8), | ||
| ColumnDescr::new("b", DataType::Int64), | ||
| ColumnDescr::new("c", DataType::Int64), | ||
| ]; | ||
| let sort_keys_set = vec![ | ||
| vec!["b".to_string()], | ||
| vec!["c".to_string(), "b".to_string()], | ||
| ]; | ||
| let data_gen_config = DatasetGeneratorConfig { | ||
| columns, | ||
| rows_num_range: (512, 1024), | ||
| sort_keys_set, | ||
| }; | ||
|
|
||
| // Build fuzzer | ||
| let fuzzer = builder | ||
| .data_gen_config(data_gen_config) | ||
| .data_gen_rounds(8) | ||
| // FIXME: Encounter error in min/max | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Proposed fix in #12834 |
||
| // ArrowError(InvalidArgumentError("number of columns(1) must match number of fields(2) in schema")) | ||
| // .add_sql("SELECT b, max(a) FROM fuzz_table GROUP BY b") | ||
| // .add_sql("SELECT b, min(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, count(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, count(distinct a) FROM fuzz_table GROUP BY b") | ||
| .table_name("fuzz_table") | ||
| .build(); | ||
|
|
||
| fuzzer.run().await; | ||
| } | ||
|
|
||
| /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by single string` | ||
| #[tokio::test(flavor = "multi_thread")] | ||
| async fn test_basic_string_aggr_group_by_single_string() { | ||
| let builder = AggregationFuzzerBuilder::default(); | ||
|
|
||
| // Define data generator config | ||
| let columns = vec![ | ||
| ColumnDescr::new("a", DataType::Utf8), | ||
| ColumnDescr::new("b", DataType::Utf8), | ||
| ColumnDescr::new("c", DataType::Int64), | ||
| ]; | ||
| let sort_keys_set = vec![ | ||
| vec!["b".to_string()], | ||
| vec!["c".to_string(), "b".to_string()], | ||
| ]; | ||
| let data_gen_config = DatasetGeneratorConfig { | ||
| columns, | ||
| rows_num_range: (512, 1024), | ||
| sort_keys_set, | ||
| }; | ||
|
|
||
| // Build fuzzer | ||
| let fuzzer = builder | ||
| .data_gen_config(data_gen_config) | ||
| .data_gen_rounds(16) | ||
| // FIXME: Encounter error in min/max | ||
| // ArrowError(InvalidArgumentError("number of columns(1) must match number of fields(2) in schema")) | ||
| // .add_sql("SELECT b, max(a) FROM fuzz_table GROUP BY b") | ||
| // .add_sql("SELECT b, min(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, count(a) FROM fuzz_table GROUP BY b") | ||
| .add_sql("SELECT b, count(distinct a) FROM fuzz_table GROUP BY b") | ||
| .table_name("fuzz_table") | ||
| .build(); | ||
|
|
||
| fuzzer.run().await; | ||
| } | ||
|
|
||
| /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by string + int64` | ||
| #[tokio::test(flavor = "multi_thread")] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if it would be easier to see what was happening if we made a few district explicity tests (rather than a single one that is mulit-threaded), though I see you are just following the existing pattern
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I am not so clear about
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I meant like Rather than a single test that was multi-threaded
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it. For me I found the error messages are messy in current pattern. We can indeed consider more about how to make seeing things in the tests easier. |
||
| async fn test_basic_string_aggr_group_by_mixed_string_int64() { | ||
| let builder = AggregationFuzzerBuilder::default(); | ||
|
|
||
| // Define data generator config | ||
| let columns = vec![ | ||
| ColumnDescr::new("a", DataType::Utf8), | ||
| ColumnDescr::new("b", DataType::Utf8), | ||
| ColumnDescr::new("c", DataType::Int64), | ||
| ColumnDescr::new("d", DataType::Int32), | ||
| ]; | ||
| let sort_keys_set = vec![ | ||
| vec!["b".to_string(), "c".to_string()], | ||
| vec!["d".to_string(), "b".to_string(), "c".to_string()], | ||
| ]; | ||
| let data_gen_config = DatasetGeneratorConfig { | ||
| columns, | ||
| rows_num_range: (512, 1024), | ||
| sort_keys_set, | ||
| }; | ||
|
|
||
| // Build fuzzer | ||
| let fuzzer = builder | ||
| .data_gen_config(data_gen_config) | ||
| .data_gen_rounds(16) | ||
| // FIXME: Encounter error in min/max | ||
| // ArrowError(InvalidArgumentError("number of columns(1) must match number of fields(2) in schema")) | ||
| // .add_sql("SELECT b, c, max(a) FROM fuzz_table GROUP BY b, c") | ||
| // .add_sql("SELECT b, c, min(a) FROM fuzz_table GROUP BY b, c") | ||
| .add_sql("SELECT b, c, count(a) FROM fuzz_table GROUP BY b, c") | ||
| .add_sql("SELECT b, c, count(distinct a) FROM fuzz_table GROUP BY b, c") | ||
| .table_name("fuzz_table") | ||
| .build(); | ||
|
|
||
| fuzzer.run().await; | ||
| } | ||
|
|
||
| // ======================================================================== | ||
| // The old aggregation fuzz tests | ||
| // ======================================================================== | ||
| /// Tracks if this stream is generating input or output | ||
| /// Tests that streaming aggregate and batch (non streaming) aggregate produce | ||
| /// same results | ||
| #[tokio::test(flavor = "multi_thread")] | ||
|
|
@@ -311,6 +612,7 @@ async fn group_by_string_test( | |
| let actual = extract_result_counts(results); | ||
| assert_eq!(expected, actual); | ||
| } | ||
|
|
||
| async fn verify_ordered_aggregate(frame: &DataFrame, expected_sort: bool) { | ||
| struct Visitor { | ||
| expected_sort: bool, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.