Skip to content

Commit 9d18493

Browse files
committed
Fix test due to encoding slashes
1 parent b291531 commit 9d18493

3 files changed

Lines changed: 36 additions & 30 deletions

File tree

docker/demo/sparksql-bootstrap-prep-source.commands

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,7 @@
1818
import org.apache.spark.sql.functions.col
1919

2020
val df = spark.read.format("org.apache.hudi").load("/user/hive/warehouse/stock_ticks_cow/*/*/*").drop("_hoodie_commit_time", "_hoodie_record_key", "_hoodie_file_name", "_hoodie_commit_seqno", "_hoodie_partition_path")
21-
df.write.format("parquet").save("/user/hive/warehouse/stock_ticks_cow_bs_src/2018/08/31/")
21+
// TODO(HUDI-4944): fix the test to use a partition column with slashes (`/`) included
22+
// in the value. Currently it fails the tests due to slash encoding.
23+
df.write.format("parquet").partitionBy("symbol").save("/user/hive/warehouse/stock_ticks_cow_bs_src")
2224
System.exit(0)

hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -231,33 +231,35 @@ private void ingestFirstBatchAndHiveSync() throws Exception {
231231
executeSparkSQLCommand(SPARKSQL_BS_PREP_COMMANDS, true);
232232
List<String> bootstrapCmds = CollectionUtils.createImmutableList(
233233
"spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
234-
+ " --table-type COPY_ON_WRITE "
235-
+ " --run-bootstrap "
236-
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
237-
+ " --target-base-path " + COW_BOOTSTRAPPED_BASE_PATH + " --target-table " + COW_BOOTSTRAPPED_TABLE_NAME
238-
+ " --props /var/demo/config/dfs-source.properties"
239-
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
240-
+ " --initial-checkpoint-provider"
241-
+ " org.apache.hudi.utilities.checkpointing.InitialCheckpointFromAnotherHoodieTimelineProvider"
242-
+ " --hoodie-conf hoodie.bootstrap.base.path=" + BOOTSTRAPPED_SRC_PATH
243-
+ " --hoodie-conf hoodie.deltastreamer.checkpoint.provider.path=" + COW_BASE_PATH
244-
+ " --hoodie-conf hoodie.bootstrap.parallelism=2 "
245-
+ " --hoodie-conf hoodie.bootstrap.keygen.class=" + SimpleKeyGenerator.class.getName()
246-
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_BOOTSTRAPPED_TABLE_NAME),
234+
+ " --table-type COPY_ON_WRITE "
235+
+ " --run-bootstrap "
236+
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
237+
+ " --target-base-path " + COW_BOOTSTRAPPED_BASE_PATH + " --target-table " + COW_BOOTSTRAPPED_TABLE_NAME
238+
+ " --props /var/demo/config/dfs-source.properties"
239+
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
240+
+ " --initial-checkpoint-provider"
241+
+ " org.apache.hudi.utilities.checkpointing.InitialCheckpointFromAnotherHoodieTimelineProvider"
242+
+ " --hoodie-conf hoodie.datasource.write.partitionpath.field=symbol"
243+
+ " --hoodie-conf hoodie.bootstrap.base.path=" + BOOTSTRAPPED_SRC_PATH
244+
+ " --hoodie-conf hoodie.deltastreamer.checkpoint.provider.path=" + COW_BASE_PATH
245+
+ " --hoodie-conf hoodie.bootstrap.parallelism=2 "
246+
+ " --hoodie-conf hoodie.bootstrap.keygen.class=" + SimpleKeyGenerator.class.getName()
247+
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_BOOTSTRAPPED_TABLE_NAME),
247248
"spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
248-
+ " --table-type MERGE_ON_READ "
249-
+ " --run-bootstrap "
250-
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
251-
+ " --target-base-path " + MOR_BOOTSTRAPPED_BASE_PATH + " --target-table " + MOR_BOOTSTRAPPED_TABLE_NAME
252-
+ " --props /var/demo/config/dfs-source.properties"
253-
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
254-
+ " --initial-checkpoint-provider"
255-
+ " org.apache.hudi.utilities.checkpointing.InitialCheckpointFromAnotherHoodieTimelineProvider"
256-
+ " --hoodie-conf hoodie.bootstrap.base.path=" + BOOTSTRAPPED_SRC_PATH
257-
+ " --hoodie-conf hoodie.deltastreamer.checkpoint.provider.path=" + COW_BASE_PATH
258-
+ " --hoodie-conf hoodie.bootstrap.parallelism=2 "
259-
+ " --hoodie-conf hoodie.bootstrap.keygen.class=" + SimpleKeyGenerator.class.getName()
260-
+ String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_BOOTSTRAPPED_TABLE_NAME));
249+
+ " --table-type MERGE_ON_READ "
250+
+ " --run-bootstrap "
251+
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
252+
+ " --target-base-path " + MOR_BOOTSTRAPPED_BASE_PATH + " --target-table " + MOR_BOOTSTRAPPED_TABLE_NAME
253+
+ " --props /var/demo/config/dfs-source.properties"
254+
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
255+
+ " --initial-checkpoint-provider"
256+
+ " org.apache.hudi.utilities.checkpointing.InitialCheckpointFromAnotherHoodieTimelineProvider"
257+
+ " --hoodie-conf hoodie.datasource.write.partitionpath.field=symbol"
258+
+ " --hoodie-conf hoodie.bootstrap.base.path=" + BOOTSTRAPPED_SRC_PATH
259+
+ " --hoodie-conf hoodie.deltastreamer.checkpoint.provider.path=" + COW_BASE_PATH
260+
+ " --hoodie-conf hoodie.bootstrap.parallelism=2 "
261+
+ " --hoodie-conf hoodie.bootstrap.keygen.class=" + SimpleKeyGenerator.class.getName()
262+
+ String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_BOOTSTRAPPED_TABLE_NAME));
261263
executeCommandStringsInDocker(ADHOC_1_CONTAINER, bootstrapCmds);
262264
}
263265

hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
import org.apache.hudi.exception.TableNotFoundException;
5757
import org.apache.hudi.hive.HiveSyncConfig;
5858
import org.apache.hudi.hive.HoodieHiveSyncClient;
59-
import org.apache.hudi.keygen.NonpartitionedKeyGenerator;
6059
import org.apache.hudi.keygen.SimpleKeyGenerator;
6160
import org.apache.hudi.metrics.Metrics;
6261
import org.apache.hudi.utilities.DummySchemaProvider;
@@ -640,12 +639,15 @@ public void testBulkInsertsAndUpsertsWithBootstrap() throws Exception {
640639
Dataset<Row> sourceDf = sqlContext.read()
641640
.format("org.apache.hudi")
642641
.load(tableBasePath);
643-
sourceDf.write().format("parquet").save(bootstrapSourcePath);
642+
// TODO(HUDI-4944): fix the test to use a partition column with slashes (`/`) included
643+
// in the value. Currently it fails the tests due to slash encoding.
644+
sourceDf.write().format("parquet").partitionBy("rider").save(bootstrapSourcePath);
644645

645646
String newDatasetBasePath = dfsBasePath + "/test_dataset_bootstrapped";
646647
cfg.runBootstrap = true;
647648
cfg.configs.add(String.format("hoodie.bootstrap.base.path=%s", bootstrapSourcePath));
648-
cfg.configs.add(String.format("hoodie.bootstrap.keygen.class=%s", NonpartitionedKeyGenerator.class.getName()));
649+
cfg.configs.add(String.format("%s=%s", DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "rider"));
650+
cfg.configs.add(String.format("hoodie.bootstrap.keygen.class=%s", SimpleKeyGenerator.class.getName()));
649651
cfg.configs.add("hoodie.bootstrap.parallelism=5");
650652
cfg.targetBasePath = newDatasetBasePath;
651653
new HoodieDeltaStreamer(cfg, jsc).sync();

0 commit comments

Comments
 (0)