Skip to content

Commit aa7ce00

Browse files
authored
Merge branch 'master' into flinkConfigtoHdfsConfig
2 parents ec7156d + 7fb436d commit aa7ce00

77 files changed

Lines changed: 1498 additions & 384 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
dag_name: deltastreamer-immutable-dataset.yaml
17+
dag_rounds: 5
18+
dag_intermittent_delay_mins: 0
19+
dag_content:
20+
first_bulk_insert:
21+
config:
22+
record_size: 200
23+
num_partitions_insert: 10
24+
repeat_count: 3
25+
num_records_insert: 5000
26+
type: BulkInsertNode
27+
deps: none
28+
first_validate:
29+
config:
30+
validate_hive: false
31+
delete_input_data: false
32+
type: ValidateDatasetNode
33+
deps: first_bulk_insert
34+
first_insert:
35+
config:
36+
record_size: 200
37+
num_partitions_insert: 10
38+
repeat_count: 3
39+
num_records_insert: 5000
40+
type: InsertNode
41+
deps: first_validate
42+
second_validate:
43+
config:
44+
validate_hive: false
45+
delete_input_data: false
46+
type: ValidateDatasetNode
47+
deps: first_insert
48+
last_validate:
49+
config:
50+
execute_itr_count: 5
51+
delete_input_data: true
52+
type: ValidateAsyncOperations
53+
deps: second_validate
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
dag_name: deltastreamer-pure-bulk-inserts.yaml
17+
dag_rounds: 10
18+
dag_intermittent_delay_mins: 0
19+
dag_content:
20+
first_bulk_insert:
21+
config:
22+
record_size: 200
23+
num_partitions_insert: 10
24+
repeat_count: 3
25+
num_records_insert: 5000
26+
type: BulkInsertNode
27+
deps: none
28+
second_validate:
29+
config:
30+
validate_hive: false
31+
delete_input_data: false
32+
type: ValidateDatasetNode
33+
deps: first_bulk_insert
34+
last_validate:
35+
config:
36+
execute_itr_count: 10
37+
type: ValidateAsyncOperations
38+
deps: second_validate
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
dag_name: deltastreamer-pure-inserts.yaml
17+
dag_rounds: 10
18+
dag_intermittent_delay_mins: 0
19+
dag_content:
20+
first_insert:
21+
config:
22+
record_size: 200
23+
num_partitions_insert: 10
24+
repeat_count: 3
25+
num_records_insert: 5000
26+
type: InsertNode
27+
deps: none
28+
second_validate:
29+
config:
30+
validate_hive: false
31+
delete_input_data: false
32+
type: ValidateDatasetNode
33+
deps: first_insert
34+
last_validate:
35+
config:
36+
execute_itr_count: 10
37+
type: ValidateAsyncOperations
38+
deps: second_validate

docker/demo/config/test-suite/insert-overwrite.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ dag_name: simple-deltastreamer.yaml
1717
dag_rounds: 1
1818
dag_intermittent_delay_mins: 1
1919
dag_content:
20-
2120
first_insert:
2221
config:
2322
record_size: 1000
@@ -91,4 +90,4 @@ dag_content:
9190
validate_hive: false
9291
delete_input_data: false
9392
type: ValidateDatasetNode
94-
deps: third_upsert
93+
deps: third_upsert

docker/demo/config/test-suite/multi-writer-1-ds.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616
dag_name: simple-deltastreamer.yaml
17-
dag_rounds: 3
17+
dag_rounds: 6
1818
dag_intermittent_delay_mins: 0
1919
dag_content:
2020
first_insert:
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
dag_name: cow-spark-simple.yaml
17+
dag_rounds: 6
18+
dag_intermittent_delay_mins: 0
19+
dag_content:
20+
first_insert:
21+
config:
22+
record_size: 1000
23+
num_partitions_insert: 1
24+
repeat_count: 1
25+
num_records_insert: 100000
26+
start_partition: 1
27+
type: SparkInsertNode
28+
deps: none
29+
first_upsert:
30+
config:
31+
record_size: 1000
32+
num_partitions_insert: 1
33+
num_records_insert: 50000
34+
repeat_count: 1
35+
num_records_upsert: 50000
36+
num_partitions_upsert: 1
37+
start_partition: 1
38+
type: SparkUpsertNode
39+
deps: first_insert
40+
first_delete:
41+
config:
42+
num_partitions_delete: 0
43+
num_records_delete: 10000
44+
start_partition: 1
45+
type: SparkDeleteNode
46+
deps: first_upsert
47+
second_validate:
48+
config:
49+
validate_hive: false
50+
delete_input_data: true
51+
type: ValidateDatasetNode
52+
deps: first_delete

docker/demo/config/test-suite/multi-writer-2-sds.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616
dag_name: cow-spark-simple.yaml
17-
dag_rounds: 3
18-
dag_intermittent_delay_mins: 0
17+
dag_rounds: 5
18+
dag_intermittent_delay_mins: 1
1919
dag_content:
2020
first_insert:
2121
config:
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
dag_name: cow-spark-simple.yaml
17+
dag_rounds: 4
18+
dag_intermittent_delay_mins: 1
19+
dag_content:
20+
first_insert:
21+
config:
22+
record_size: 1000
23+
num_partitions_insert: 1
24+
repeat_count: 1
25+
num_records_insert: 100000
26+
start_partition: 20
27+
type: SparkInsertNode
28+
deps: none
29+
first_upsert:
30+
config:
31+
record_size: 1000
32+
num_partitions_insert: 1
33+
num_records_insert: 50000
34+
repeat_count: 1
35+
num_records_upsert: 50000
36+
num_partitions_upsert: 1
37+
start_partition: 20
38+
type: SparkUpsertNode
39+
deps: first_insert
40+
first_delete:
41+
config:
42+
num_partitions_delete: 0
43+
num_records_delete: 10000
44+
start_partition: 20
45+
type: SparkDeleteNode
46+
deps: first_upsert
47+
second_validate:
48+
config:
49+
validate_hive: false
50+
delete_input_data: true
51+
type: ValidateDatasetNode
52+
deps: first_delete
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
dag_name: cow-spark-simple.yaml
17+
dag_rounds: 4
18+
dag_intermittent_delay_mins: 1
19+
dag_content:
20+
first_insert:
21+
config:
22+
record_size: 1000
23+
num_partitions_insert: 1
24+
repeat_count: 1
25+
num_records_insert: 100000
26+
start_partition: 30
27+
type: SparkInsertNode
28+
deps: none
29+
first_upsert:
30+
config:
31+
record_size: 1000
32+
num_partitions_insert: 1
33+
num_records_insert: 50000
34+
repeat_count: 1
35+
num_records_upsert: 50000
36+
num_partitions_upsert: 1
37+
start_partition: 30
38+
type: SparkUpsertNode
39+
deps: first_insert
40+
first_delete:
41+
config:
42+
num_partitions_delete: 0
43+
num_records_delete: 10000
44+
start_partition: 30
45+
type: SparkDeleteNode
46+
deps: first_upsert
47+
second_validate:
48+
config:
49+
validate_hive: false
50+
delete_input_data: true
51+
type: ValidateDatasetNode
52+
deps: first_delete
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
#
20+
21+
hoodie.insert.shuffle.parallelism=2
22+
hoodie.upsert.shuffle.parallelism=2
23+
hoodie.bulkinsert.shuffle.parallelism=2
24+
hoodie.delete.shuffle.parallelism=2
25+
26+
hoodie.metadata.enable=false
27+
28+
hoodie.deltastreamer.source.test.num_partitions=100
29+
hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
30+
hoodie.deltastreamer.source.test.max_unique_records=100000000
31+
hoodie.embed.timeline.server=false
32+
hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
33+
34+
hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
35+
hoodie.datasource.hive_sync.skip_ro_suffix=true
36+
37+
hoodie.datasource.write.recordkey.field=_row_key
38+
hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
39+
hoodie.datasource.write.partitionpath.field=timestamp
40+
41+
hoodie.write.concurrency.mode=optimistic_concurrency_control
42+
hoodie.cleaner.policy.failed.writes=LAZY
43+
hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.InProcessLockProvider
44+
45+
hoodie.deltastreamer.source.dfs.root=/tmp/hudi/input3
46+
hoodie.deltastreamer.schemaprovider.target.schema.file=file:/tmp/source.avsc
47+
hoodie.deltastreamer.schemaprovider.source.schema.file=file:/tmp/source.avsc
48+
hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
49+
hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
50+
51+
hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
52+
hoodie.datasource.hive_sync.database=testdb
53+
hoodie.datasource.hive_sync.table=table1
54+
hoodie.datasource.hive_sync.assume_date_partitioning=false
55+
hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
56+
hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
57+

0 commit comments

Comments
 (0)