apache
diff --git a/‎docker/demo/config/dfs-source.properties‎
Lines changed: 4 additions & 0 deletions b/‎docker/demo/config/dfs-source.properties‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java‎
Lines changed: 3 additions & 3 deletions b/‎hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java‎
Lines changed: 0 additions & 1 deletion b/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java‎
Lines changed: 7 additions & 0 deletions b/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/execution/datasources/SparkParsePartitionUtil.scala‎
Lines changed: 6 additions & 6 deletions b/‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/execution/datasources/SparkParsePartitionUtil.scala‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala‎
Lines changed: 1 addition & 1 deletion b/‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestConsistentBucketIndex.java‎
Lines changed: 1 addition & 1 deletion b/‎hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestConsistentBucketIndex.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java‎
Lines changed: 22 additions & 4 deletions b/‎hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java‎
Lines changed: 22 additions & 4 deletions
diff --git a/‎hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java‎
Lines changed: 15 additions & 4 deletions b/‎hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java‎
Lines changed: 15 additions & 4 deletions
diff --git a/‎hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableIncrementalRead.java‎
Lines changed: 1 addition & 1 deletion b/‎hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableIncrementalRead.java‎
Lines changed: 1 addition & 1 deletion
@@ -19,6 +19,10 @@ include=base.properties
 # Key fields, for kafka example
 hoodie.datasource.write.recordkey.field=key
 hoodie.datasource.write.partitionpath.field=date
+# NOTE: We have to duplicate configuration since this is being used
+#       w/ both Spark and DeltaStreamer
+hoodie.table.recordkey.fields=key
+hoodie.table.partition.fields=date
 # Schema provider props (change to absolute path based on your installation)
 hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc
 hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc
 
@@ -1006,21 +1006,21 @@ protected void compactIfNecessary(BaseHoodieWriteClient writeClient, String inst
     // finish off any pending compactions if any from previous attempt.
     writeClient.runAnyPendingCompactions();
 
-    String latestDeltacommitTime = metadataMetaClient.reloadActiveTimeline().getDeltaCommitTimeline().filterCompletedInstants().lastInstant()
+    String latestDeltaCommitTime = metadataMetaClient.reloadActiveTimeline().getDeltaCommitTimeline().filterCompletedInstants().lastInstant()
         .get().getTimestamp();
     List<HoodieInstant> pendingInstants = dataMetaClient.reloadActiveTimeline().filterInflightsAndRequested()
         .findInstantsBefore(instantTime).getInstants().collect(Collectors.toList());
 
     if (!pendingInstants.isEmpty()) {
       LOG.info(String.format("Cannot compact metadata table as there are %d inflight instants before latest deltacommit %s: %s",
-          pendingInstants.size(), latestDeltacommitTime, Arrays.toString(pendingInstants.toArray())));
+          pendingInstants.size(), latestDeltaCommitTime, Arrays.toString(pendingInstants.toArray())));
       return;
     }
 
     // Trigger compaction with suffixes based on the same instant time. This ensures that any future
     // delta commits synced over will not have an instant time lesser than the last completed instant on the
     // metadata table.
-    final String compactionInstantTime = latestDeltacommitTime + "001";
+    final String compactionInstantTime = latestDeltaCommitTime + "001";
     if (writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty())) {
       writeClient.compact(compactionInstantTime);
     }
 
@@ -77,7 +77,6 @@ public abstract class BuiltinKeyGenerator extends BaseKeyGenerator implements Sp
   protected static final UTF8String NULL_RECORD_KEY_PLACEHOLDER_UTF8 = UTF8String.fromString(NULL_RECORDKEY_PLACEHOLDER);
   protected static final UTF8String EMPTY_RECORD_KEY_PLACEHOLDER_UTF8 = UTF8String.fromString(EMPTY_RECORDKEY_PLACEHOLDER);
 
-
   protected transient volatile SparkRowConverter rowConverter;
   protected transient volatile SparkRowAccessor rowAccessor;
 
 
@@ -20,6 +20,7 @@
 
 import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.catalyst.InternalRow;
@@ -46,6 +47,12 @@ public SimpleKeyGenerator(TypedProperties props) {
 
   SimpleKeyGenerator(TypedProperties props, String recordKeyField, String partitionPathField) {
     super(props);
+    // Make sure key-generator is configured properly
+    ValidationUtils.checkArgument(recordKeyField == null || !recordKeyField.isEmpty(),
+        "Record key field has to be non-empty!");
+    ValidationUtils.checkArgument(partitionPathField == null || !partitionPathField.isEmpty(),
+        "Partition path field has to be non-empty!");
+
     this.recordKeyFields = recordKeyField == null ? Collections.emptyList() : Collections.singletonList(recordKeyField);
     this.partitionPathFields = partitionPathField == null ? Collections.emptyList() : Collections.singletonList(partitionPathField);
     this.simpleAvroKeyGenerator = new SimpleAvroKeyGenerator(props, recordKeyField, partitionPathField);
 
@@ -26,10 +26,10 @@ import org.apache.spark.sql.types.DataType
 
 trait SparkParsePartitionUtil extends Serializable {
 
-  def parsePartition(
-    path: Path,
-    typeInference: Boolean,
-    basePaths: Set[Path],
-    userSpecifiedDataTypes: Map[String, DataType],
-    timeZone: TimeZone): InternalRow
+  def parsePartition(path: Path,
+                     typeInference: Boolean,
+                     basePaths: Set[Path],
+                     userSpecifiedDataTypes: Map[String, DataType],
+                     timeZone: TimeZone,
+                     validatePartitionValues: Boolean = false): InternalRow
 }
@@ -85,7 +85,7 @@ trait SparkAdapter extends Serializable {
   /**
    * Create the SparkParsePartitionUtil.
    */
-  def createSparkParsePartitionUtil(conf: SQLConf): SparkParsePartitionUtil
+  def getSparkParsePartitionUtil: SparkParsePartitionUtil
 
   /**
    * ParserInterface#parseMultipartIdentifier is supported since spark3, for spark2 this should not be called.
 
@@ -97,7 +97,7 @@ private void setUp(boolean populateMetaFields, boolean partitioned) throws Excep
       initTestDataGenerator(new String[] {""});
     }
     initFileSystem();
-    Properties props = populateMetaFields ? new Properties() : getPropertiesForKeyGen();
+    Properties props = getPropertiesForKeyGen(populateMetaFields);
     props.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key");
     metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ, props);
     config = getConfigBuilder()
 
@@ -36,6 +36,7 @@
 import org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.testutils.Transformations;
+import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieClusteringConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -61,7 +62,6 @@
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.storage.StorageLevel;
-import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.ValueSource;
@@ -87,9 +87,8 @@ public class TestHoodieMergeOnReadTable extends SparkClientFunctionalTestHarness
   private HoodieTableMetaClient metaClient;
   private HoodieTestDataGenerator dataGen;
 
-  @BeforeEach
-  void setUp() throws IOException {
-    Properties properties = new Properties();
+  void setUp(Properties props) throws IOException {
+    Properties properties = CollectionUtils.copy(props);
     properties.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().toString());
     metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, properties);
     dataGen = new HoodieTestDataGenerator();
@@ -99,6 +98,9 @@ void setUp() throws IOException {
   @Test
   public void testMetadataAggregateFromWriteStatus() throws Exception {
     HoodieWriteConfig cfg = getConfigBuilder(false).withWriteStatusClass(MetadataMergeWriteStatus.class).build();
+
+    setUp(cfg.getProps());
+
     try (SparkRDDWriteClient client = getHoodieWriteClient(cfg);) {
 
       String newCommitTime = "001";
@@ -125,6 +127,9 @@ public void testUpsertPartitioner(boolean populateMetaFields) throws Exception {
     HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(true);
     addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
     HoodieWriteConfig cfg = cfgBuilder.build();
+
+    setUp(cfg.getProps());
+
     try (SparkRDDWriteClient client = getHoodieWriteClient(cfg);) {
 
       /**
@@ -213,6 +218,8 @@ public void testLogFileCountsAfterCompaction(boolean preserveCommitMeta) throws
     addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
     HoodieWriteConfig config = cfgBuilder.build();
 
+    setUp(config.getProps());
+
     try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config);) {
       String newCommitTime = "100";
       writeClient.startCommitWithTime(newCommitTime);
@@ -302,6 +309,8 @@ public void testMetadataStatsOnCommit(Boolean rollbackUsingMarkers) throws Excep
     HoodieWriteConfig cfg = getConfigBuilder(false, rollbackUsingMarkers, IndexType.INMEMORY)
         .withAutoCommit(false).build();
 
+    setUp(cfg.getProps());
+
     try (SparkRDDWriteClient client = getHoodieWriteClient(cfg);) {
       HoodieTable table = HoodieSparkTable.create(cfg, context(), metaClient);
 
@@ -381,6 +390,9 @@ public void testMetadataStatsOnCommit(Boolean rollbackUsingMarkers) throws Excep
   @Test
   public void testRollingStatsWithSmallFileHandling() throws Exception {
     HoodieWriteConfig cfg = getConfigBuilder(false, IndexType.INMEMORY).withAutoCommit(false).build();
+
+    setUp(cfg.getProps());
+
     try (SparkRDDWriteClient client = getHoodieWriteClient(cfg);) {
       Map<String, Long> fileIdToInsertsMap = new HashMap<>();
       Map<String, Long> fileIdToUpsertsMap = new HashMap<>();
@@ -497,6 +509,9 @@ public void testRollingStatsWithSmallFileHandling() throws Exception {
   @Test
   public void testHandleUpdateWithMultiplePartitions() throws Exception {
     HoodieWriteConfig cfg = getConfig(true);
+
+    setUp(cfg.getProps());
+
     try (SparkRDDWriteClient client = getHoodieWriteClient(cfg);) {
 
       /**
@@ -578,6 +593,9 @@ public void testReleaseResource() throws Exception {
     HoodieWriteConfig.Builder builder = getConfigBuilder(true);
     builder.withReleaseResourceEnabled(true);
     builder.withAutoCommit(false);
+
+    setUp(builder.build().getProps());
+
     /**
      * Write 1 (test when RELEASE_RESOURCE_ENABLE is true)
      */
 
@@ -54,6 +54,7 @@
 import java.nio.file.Paths;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Properties;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -98,8 +99,13 @@ public void testWriteDuringCompaction() throws IOException {
         .withLayoutConfig(HoodieLayoutConfig.newBuilder()
             .withLayoutType(HoodieStorageLayout.LayoutType.BUCKET.name())
             .withLayoutPartitioner(SparkBucketIndexPartitioner.class.getName()).build())
-        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BUCKET).withBucketNum("1").build()).build();
-    metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, config.getProps());
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BUCKET).withBucketNum("1").build())
+        .build();
+
+    Properties props = getPropertiesForKeyGen(true);
+    props.putAll(config.getProps());
+
+    metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, props);
     client = getHoodieWriteClient(config);
 
     // write data and commit
@@ -138,8 +144,13 @@ public void testWriteLogDuringCompaction(boolean enableMetadataTable, boolean en
         .withLayoutConfig(HoodieLayoutConfig.newBuilder()
             .withLayoutType(HoodieStorageLayout.LayoutType.BUCKET.name())
             .withLayoutPartitioner(SparkBucketIndexPartitioner.class.getName()).build())
-        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BUCKET).withBucketNum("1").build()).build();
-    metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, config.getProps());
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BUCKET).withBucketNum("1").build())
+        .build();
+
+    Properties props = getPropertiesForKeyGen(true);
+    props.putAll(config.getProps());
+
+    metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, props);
     client = getHoodieWriteClient(config);
 
     final List<HoodieRecord> records = dataGen.generateInserts("001", 100);
 
@@ -82,7 +82,7 @@ void setUp() {
   public void testIncrementalReadsWithCompaction() throws Exception {
     final String partitionPath = "2020/02/20"; // use only one partition for this test
     final HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(new String[] { partitionPath });
-    Properties props = new Properties();
+    Properties props = getPropertiesForKeyGen(true);
     props.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), HoodieFileFormat.PARQUET.toString());
     HoodieTableMetaClient metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, props);
     HoodieWriteConfig cfg = getConfigBuilder(true).build();
Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,7 @@ private void setUp(boolean populateMetaFields, boolean partitioned) throws Excep`
`97`	`97`	`initTestDataGenerator(new String[] {""});`
`98`	`98`	`}`
`99`	`99`	`initFileSystem();`
`100`		`- Properties props = populateMetaFields ? new Properties() : getPropertiesForKeyGen();`
	`100`	`+ Properties props = getPropertiesForKeyGen(populateMetaFields);`
`101`	`101`	`props.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key");`
`102`	`102`	`metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ, props);`
`103`	`103`	`config = getConfigBuilder()`