|
70 | 70 | import org.apache.hudi.utilities.sources.ParquetDFSSource; |
71 | 71 | import org.apache.hudi.utilities.sources.SqlSource; |
72 | 72 | import org.apache.hudi.utilities.sources.TestDataSource; |
| 73 | +import org.apache.hudi.utilities.sources.TestParquetDFSSourceEmptyBatch; |
73 | 74 | import org.apache.hudi.utilities.testutils.JdbcTestUtils; |
74 | 75 | import org.apache.hudi.utilities.testutils.UtilitiesTestBase; |
75 | 76 | import org.apache.hudi.utilities.testutils.sources.DistributedTestDataSource; |
|
130 | 131 | import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME; |
131 | 132 | import static org.junit.jupiter.api.Assertions.assertEquals; |
132 | 133 | import static org.junit.jupiter.api.Assertions.assertFalse; |
| 134 | +import static org.junit.jupiter.api.Assertions.assertNotEquals; |
133 | 135 | import static org.junit.jupiter.api.Assertions.assertNotNull; |
134 | 136 | import static org.junit.jupiter.api.Assertions.assertThrows; |
135 | 137 | import static org.junit.jupiter.api.Assertions.assertTrue; |
@@ -1420,15 +1422,39 @@ private void prepareParquetDFSSource(boolean useSchemaProvider, boolean hasTrans |
1420 | 1422 | } |
1421 | 1423 |
|
1422 | 1424 | private void testParquetDFSSource(boolean useSchemaProvider, List<String> transformerClassNames) throws Exception { |
| 1425 | + testParquetDFSSource(useSchemaProvider, transformerClassNames, false); |
| 1426 | + } |
| 1427 | + |
| 1428 | + private void testParquetDFSSource(boolean useSchemaProvider, List<String> transformerClassNames, boolean testEmptyBatch) throws Exception { |
1423 | 1429 | prepareParquetDFSSource(useSchemaProvider, transformerClassNames != null); |
1424 | 1430 | String tableBasePath = dfsBasePath + "/test_parquet_table" + testNum; |
1425 | 1431 | HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer( |
1426 | | - TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT, ParquetDFSSource.class.getName(), |
| 1432 | + TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT, testEmptyBatch ? TestParquetDFSSourceEmptyBatch.class.getName() |
| 1433 | + : ParquetDFSSource.class.getName(), |
1427 | 1434 | transformerClassNames, PROPS_FILENAME_TEST_PARQUET, false, |
1428 | 1435 | useSchemaProvider, 100000, false, null, null, "timestamp", null), jsc); |
1429 | 1436 | deltaStreamer.sync(); |
1430 | 1437 | TestHelpers.assertRecordCount(PARQUET_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext); |
1431 | 1438 | testNum++; |
| 1439 | + |
| 1440 | + if (testEmptyBatch) { |
| 1441 | + prepareParquetDFSFiles(100, PARQUET_SOURCE_ROOT, "2.parquet", false, null, null); |
| 1442 | + // parquet source to return empty batch |
| 1443 | + TestParquetDFSSourceEmptyBatch.returnEmptyBatch = true; |
| 1444 | + deltaStreamer.sync(); |
| 1445 | + TestHelpers.assertRecordCount(PARQUET_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext); |
| 1446 | + |
| 1447 | + HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(tableBasePath).setConf(jsc.hadoopConfiguration()).build(); |
| 1448 | + |
| 1449 | + HoodieInstant lastInstant = metaClient.reloadActiveTimeline().getCommitsTimeline().lastInstant().get(); |
| 1450 | + HoodieCommitMetadata commitMetadata = HoodieCommitMetadata |
| 1451 | + .fromBytes(metaClient.getActiveTimeline().getInstantDetails(lastInstant).get(), HoodieCommitMetadata.class); |
| 1452 | + assertFalse(commitMetadata.getExtraMetadata().containsKey(HoodieCommitMetadata.SCHEMA_KEY)); |
| 1453 | + |
| 1454 | + // validate table schema fetches valid schema from last but one commit. |
| 1455 | + TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(metaClient); |
| 1456 | + assertNotEquals(tableSchemaResolver.getTableAvroSchema(), Schema.create(Schema.Type.NULL).toString()); |
| 1457 | + } |
1432 | 1458 | } |
1433 | 1459 |
|
1434 | 1460 | private void testORCDFSSource(boolean useSchemaProvider, List<String> transformerClassNames) throws Exception { |
@@ -1584,6 +1610,11 @@ public void testParquetDFSSourceWithoutSchemaProviderAndNoTransformer() throws E |
1584 | 1610 | testParquetDFSSource(false, null); |
1585 | 1611 | } |
1586 | 1612 |
|
| 1613 | + @Test |
| 1614 | + public void testParquetDFSSourceForEmptyBatch() throws Exception { |
| 1615 | + testParquetDFSSource(false, null, true); |
| 1616 | + } |
| 1617 | + |
1587 | 1618 | @Test |
1588 | 1619 | public void testParquetDFSSourceWithoutSchemaProviderAndTransformer() throws Exception { |
1589 | 1620 | testParquetDFSSource(false, Collections.singletonList(TripsWithDistanceTransformer.class.getName())); |
|
0 commit comments