Skip to content

Commit 18c3dd4

Browse files
committed
SPARK-26990: use user specified field names if possible
1 parent 52a180f commit 18c3dd4

2 files changed

Lines changed: 23 additions & 5 deletions

File tree

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,15 +111,16 @@ object PartitioningUtils {
111111
caseSensitive: Boolean,
112112
validatePartitionColumns: Boolean,
113113
timeZone: TimeZone): PartitionSpec = {
114-
val userSpecifiedDataTypes = if (userSpecifiedSchema.isDefined) {
114+
val (userSpecifiedDataTypes, userSpecifiedNames) = if (userSpecifiedSchema.isDefined) {
115115
val nameToDataType = userSpecifiedSchema.get.fields.map(f => f.name -> f.dataType).toMap
116+
val nameToName = userSpecifiedSchema.get.fields.map(f => f.name -> f.name).toMap
116117
if (!caseSensitive) {
117-
CaseInsensitiveMap(nameToDataType)
118+
(CaseInsensitiveMap(nameToDataType), CaseInsensitiveMap(nameToName))
118119
} else {
119-
nameToDataType
120+
(nameToDataType, nameToName)
120121
}
121122
} else {
122-
Map.empty[String, DataType]
123+
(Map.empty[String, DataType], Map.empty[String, String])
123124
}
124125

125126
val dateFormatter = DateFormatter()
@@ -170,7 +171,9 @@ object PartitioningUtils {
170171
columnNames.zip(literals).map { case (name, Literal(_, dataType)) =>
171172
// We always assume partition columns are nullable since we've no idea whether null values
172173
// will be appended in the future.
173-
StructField(name, userSpecifiedDataTypes.getOrElse(name, dataType), nullable = true)
174+
val resultName = userSpecifiedNames.getOrElse(name, name)
175+
val resultDataType = userSpecifiedDataTypes.getOrElse(name, dataType)
176+
StructField(resultName, resultDataType, nullable = true)
174177
}
175178
}
176179

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,21 @@ class FileIndexSuite extends SharedSQLContext {
6565
}
6666
}
6767

68+
test("SPARK-26990: use user specified field names if possible") {
69+
withTempDir { dir =>
70+
val partitionDirectory = new File(dir, "a=foo")
71+
partitionDirectory.mkdir()
72+
val file = new File(partitionDirectory, "text.txt")
73+
stringToFile(file, "text")
74+
val path = new Path(dir.getCanonicalPath)
75+
val schema = StructType(Seq(StructField("A", StringType, false)))
76+
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
77+
val fileIndex = new InMemoryFileIndex(spark, Seq(path), Map.empty, Some(schema))
78+
assert(fileIndex.partitionSchema.length == 1 && fileIndex.partitionSchema.head.name == "A")
79+
}
80+
}
81+
}
82+
6883
test("SPARK-26230: if case sensitive, validate partitions with original column names") {
6984
withTempDir { dir =>
7085
val partitionDirectory = new File(dir, "a=1")

0 commit comments

Comments
 (0)