Skip to content

Commit 97ccf5d

Browse files
authored
[HUDI-4223] Fix NullPointerException from getLogRecordScanner when reading metadata table (#5840)
When explicitly specifying the metadata table path for reading in spark, the "hoodie.metadata.enable" is overwritten to true for proper read behavior.
1 parent 08fe281 commit 97ccf5d

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import org.apache.hadoop.fs.Path
2525
import org.apache.hadoop.mapred.JobConf
2626
import org.apache.hudi.HoodieConversionUtils.{toJavaOption, toScalaOption}
2727
import org.apache.hudi.HoodieMergeOnReadRDD.{AvroDeserializerSupport, collectFieldOrdinals, getPartitionPath, projectAvro, projectAvroUnsafe, projectRowUnsafe, resolveAvroSchemaNullability}
28-
import org.apache.hudi.MergeOnReadSnapshotRelation.getFilePath
2928
import org.apache.hudi.common.config.HoodieMetadataConfig
3029
import org.apache.hudi.common.engine.HoodieLocalEngineContext
3130
import org.apache.hudi.common.fs.FSUtils
@@ -37,9 +36,9 @@ import org.apache.hudi.config.HoodiePayloadConfig
3736
import org.apache.hudi.exception.HoodieException
3837
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig
3938
import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes
39+
import org.apache.hudi.internal.schema.InternalSchema
4040
import org.apache.hudi.metadata.HoodieTableMetadata.getDataTableBasePathFromMetadataTable
4141
import org.apache.hudi.metadata.{HoodieBackedTableMetadata, HoodieTableMetadata}
42-
import org.apache.hudi.internal.schema.InternalSchema
4342
import org.apache.spark.rdd.RDD
4443
import org.apache.spark.sql.avro.HoodieAvroDeserializer
4544
import org.apache.spark.sql.catalyst.InternalRow
@@ -324,7 +323,8 @@ private object HoodieMergeOnReadRDD {
324323
val fs = FSUtils.getFs(tablePath, hadoopConf)
325324

326325
if (HoodieTableMetadata.isMetadataTable(tablePath)) {
327-
val metadataConfig = tableState.metadataConfig
326+
val metadataConfig = HoodieMetadataConfig.newBuilder()
327+
.fromProperties(tableState.metadataConfig.getProps).enable(true).build()
328328
val dataTableBasePath = getDataTableBasePathFromMetadataTable(tablePath)
329329
val metadataTable = new HoodieBackedTableMetadata(
330330
new HoodieLocalEngineContext(hadoopConf), metadataConfig,

hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarn
7878
.save(basePath)
7979

8080
// Files partition of MT
81-
val filesPartitionDF = spark.read.options(metadataOpts).format(hudi).load(s"$basePath/.hoodie/metadata/files")
81+
val filesPartitionDF = spark.read.format(hudi).load(s"$basePath/.hoodie/metadata/files")
8282

8383
// Smoke test
8484
filesPartitionDF.show()
@@ -96,7 +96,7 @@ class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarn
9696
assertEquals(expectedKeys, keys)
9797

9898
// Column Stats Index partition of MT
99-
val colStatsDF = spark.read.options(metadataOpts).format(hudi).load(s"$basePath/.hoodie/metadata/column_stats")
99+
val colStatsDF = spark.read.format(hudi).load(s"$basePath/.hoodie/metadata/column_stats")
100100

101101
// Smoke test
102102
colStatsDF.show()

0 commit comments

Comments
 (0)