Skip to content

Commit 0caadf9

Browse files
author
Alexey Kudinkin
committed
Fixing partition-path extraction for globbed paths
1 parent f74fa2e commit 0caadf9

1 file changed

Lines changed: 9 additions & 6 deletions

File tree

hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,13 +155,16 @@ class BaseFileOnlyRelation(sqlContext: SQLContext,
155155
// the table schema evolution.
156156
userSpecifiedSchema = userSchema.orElse(Some(tableStructSchema)),
157157
className = fileFormatClassName,
158-
// Since we're reading the table as just collection of files we have to make sure
159-
// we only read the latest version of every Hudi's file-group, which might be compacted, clustered, etc.
160-
// while keeping previous versions of the files around as well.
161-
//
162-
// We rely on [[HoodieROTablePathFilter]], to do proper filtering to assure that
163158
options = optParams ++ Map(
164-
"mapreduce.input.pathFilter.class" -> classOf[HoodieROTablePathFilter].getName
159+
// Since we're reading the table as just collection of files we have to make sure
160+
// we only read the latest version of every Hudi's file-group, which might be compacted, clustered, etc.
161+
// while keeping previous versions of the files around as well.
162+
//
163+
// We rely on [[HoodieROTablePathFilter]], to do proper filtering to assure that
164+
"mapreduce.input.pathFilter.class" -> classOf[HoodieROTablePathFilter].getName,
165+
// We have to override [[EXTRACT_PARTITION_VALUES_FROM_PARTITION_PATH]] setting, since
166+
// the relation might have this setting overridden
167+
DataSourceReadOptions.EXTRACT_PARTITION_VALUES_FROM_PARTITION_PATH.key -> shouldExtractPartitionValuesFromPartitionPath.toString
165168
),
166169
partitionColumns = partitionColumns
167170
)

0 commit comments

Comments
 (0)