Skip to content

Commit aeefd9b

Browse files
author
Jonathan Vexler
committed
add testing for fast bootstrap
1 parent 0ed2644 commit aeefd9b

2 files changed

Lines changed: 7 additions & 0 deletions

File tree

hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import org.apache.spark.unsafe.types.UTF8String
4242
import java.text.SimpleDateFormat
4343
import javax.annotation.concurrent.NotThreadSafe
4444
import scala.collection.JavaConverters._
45+
import scala.collection.mutable
4546
import scala.util.control.NonFatal
4647
import scala.util.{Failure, Success, Try}
4748

hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrapRead.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import java.util.stream.Stream;
5454

5555
import static org.apache.hudi.common.testutils.RawTripTestPayload.recordToString;
56+
import static org.apache.hudi.config.HoodieBootstrapConfig.DATA_QUERIES_ONLY;
5657
import static org.junit.jupiter.api.Assertions.assertEquals;
5758

5859
/**
@@ -78,6 +79,7 @@ public class TestBootstrapRead extends HoodieSparkClientTestBase {
7879

7980
protected String[] partitionCols;
8081
protected static String[] dropColumns = {"_hoodie_commit_time", "_hoodie_commit_seqno", "_hoodie_record_key", "_hoodie_file_name", "city_to_state"};
82+
protected static String[] fastBootstrapDropCols = {"city_to_state", "partition_path"};
8183

8284
@BeforeEach
8385
public void setUp() throws Exception {
@@ -227,12 +229,16 @@ protected void compareTables() {
227229
}
228230
Dataset<Row> hudiDf = sparkSession.read().options(readOpts).format("hudi").load(hudiBasePath);
229231
Dataset<Row> bootstrapDf = sparkSession.read().format("hudi").load(bootstrapTargetPath);
232+
Dataset<Row> fastBootstrapDf = sparkSession.read().format("hudi").option(DATA_QUERIES_ONLY.key(), "true").load(bootstrapTargetPath);
230233
if (nPartitions == 0) {
234+
compareDf(fastBootstrapDf.drop("city_to_state"), bootstrapDf.drop(dropColumns).drop("_hoodie_partition_path"));
231235
compareDf(hudiDf.drop(dropColumns), bootstrapDf.drop(dropColumns));
232236
return;
233237
}
234238
compareDf(hudiDf.drop(dropColumns).drop(partitionCols), bootstrapDf.drop(dropColumns).drop(partitionCols));
239+
compareDf(fastBootstrapDf.drop("city_to_state").drop(partitionCols), bootstrapDf.drop(dropColumns).drop("_hoodie_partition_path").drop(partitionCols));
235240
compareDf(hudiDf.select("_row_key",partitionCols), bootstrapDf.select("_row_key",partitionCols));
241+
compareDf(fastBootstrapDf.select("_row_key",partitionCols), bootstrapDf.select("_row_key",partitionCols));
236242
}
237243

238244
protected void compareDf(Dataset<Row> df1, Dataset<Row> df2) {

0 commit comments

Comments
 (0)