@@ -24,6 +24,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient
2424import org .apache .hudi .hadoop .HoodieROTablePathFilter
2525import org .apache .spark .sql .SQLContext
2626import org .apache .spark .sql .catalyst .expressions .Expression
27+ import org .apache .spark .sql .execution .datasources
2728import org .apache .spark .sql .execution .datasources ._
2829import org .apache .spark .sql .sources .{BaseRelation , Filter }
2930import org .apache .spark .sql .types .StructType
@@ -148,6 +149,15 @@ class BaseFileOnlyRelation(sqlContext: SQLContext,
148149 val readPathsStr = optParams.get(DataSourceReadOptions .READ_PATHS .key)
149150 val extraReadPaths = readPathsStr.map(p => p.split(" ," ).toSeq).getOrElse(Seq ())
150151
152+ // NOTE: Spark is able to infer partitioning values from partition path only when Hive-style partitioning
153+ // scheme is used. Therefore, we fallback to reading the table as non-partitioned (specifying
154+ // partitionColumns = Seq.empty) whenever Hive-style partitioning is not involved
155+ val partitionColumns : Seq [String ] = if (tableConfig.getHiveStylePartitioningEnable.toBoolean) {
156+ this .partitionColumns
157+ } else {
158+ Seq .empty
159+ }
160+
151161 DataSource .apply(
152162 sparkSession = sparkSession,
153163 paths = extraReadPaths,
@@ -162,9 +172,15 @@ class BaseFileOnlyRelation(sqlContext: SQLContext,
162172 //
163173 // We rely on [[HoodieROTablePathFilter]], to do proper filtering to assure that
164174 " mapreduce.input.pathFilter.class" -> classOf [HoodieROTablePathFilter ].getName,
175+
165176 // We have to override [[EXTRACT_PARTITION_VALUES_FROM_PARTITION_PATH]] setting, since
166177 // the relation might have this setting overridden
167- DataSourceReadOptions .EXTRACT_PARTITION_VALUES_FROM_PARTITION_PATH .key -> shouldExtractPartitionValuesFromPartitionPath.toString
178+ DataSourceReadOptions .EXTRACT_PARTITION_VALUES_FROM_PARTITION_PATH .key -> shouldExtractPartitionValuesFromPartitionPath.toString,
179+
180+ // NOTE: We have to specify table's base-path explicitly, since we're requesting Spark to read it as a
181+ // list of globbed paths which complicates partitioning discovery for Spark.
182+ // Please check [[PartitioningAwareFileIndex#basePaths]] comment for more details.
183+ PartitioningAwareFileIndex .BASE_PATH_PARAM -> metaClient.getBasePathV2.toString
168184 ),
169185 partitionColumns = partitionColumns
170186 )
0 commit comments