Skip to content

Commit 56d7da1

Browse files
cloud-fandavies
authored andcommitted
[SPARK-10104] [SQL] Consolidate different forms of table identifiers
Right now, we have QualifiedTableName, TableIdentifier, and Seq[String] to represent table identifiers. We should only have one form and TableIdentifier is the best one because it provides methods to get table name, database name, return unquoted string, and return quoted string. Author: Wenchen Fan <wenchen@databricks.com> Author: Wenchen Fan <cloud0fan@163.com> Closes #8453 from cloud-fan/table-name.
1 parent 9a430a0 commit 56d7da1

32 files changed

Lines changed: 212 additions & 327 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ object SqlParser extends AbstractSparkSQLParser with DataTypeParser {
170170
joinedRelation | relationFactor
171171

172172
protected lazy val relationFactor: Parser[LogicalPlan] =
173-
( rep1sep(ident, ".") ~ (opt(AS) ~> opt(ident)) ^^ {
173+
( tableIdentifier ~ (opt(AS) ~> opt(ident)) ^^ {
174174
case tableIdent ~ alias => UnresolvedRelation(tableIdent, alias)
175175
}
176176
| ("(" ~> start <~ ")") ~ (AS.? ~> ident) ^^ { case s ~ a => Subquery(a, s) }

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/TableIdentifier.scala

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,16 @@ package org.apache.spark.sql.catalyst
2020
/**
2121
* Identifies a `table` in `database`. If `database` is not defined, the current database is used.
2222
*/
23-
private[sql] case class TableIdentifier(table: String, database: Option[String] = None) {
24-
def withDatabase(database: String): TableIdentifier = this.copy(database = Some(database))
25-
26-
def toSeq: Seq[String] = database.toSeq :+ table
23+
private[sql] case class TableIdentifier(table: String, database: Option[String]) {
24+
def this(table: String) = this(table, None)
2725

2826
override def toString: String = quotedString
2927

30-
def quotedString: String = toSeq.map("`" + _ + "`").mkString(".")
28+
def quotedString: String = database.map(db => s"`$db`.`$table`").getOrElse(s"`$table`")
29+
30+
def unquotedString: String = database.map(db => s"$db.$table").getOrElse(table)
31+
}
3132

32-
def unquotedString: String = toSeq.mkString(".")
33+
private[sql] object TableIdentifier {
34+
def apply(tableName: String): TableIdentifier = new TableIdentifier(tableName)
3335
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ class Analyzer(
105105
// here use the CTE definition first, check table name only and ignore database name
106106
// see https://github.com/apache/spark/pull/4929#discussion_r27186638 for more info
107107
case u : UnresolvedRelation =>
108-
val substituted = cteRelations.get(u.tableIdentifier.last).map { relation =>
108+
val substituted = cteRelations.get(u.tableIdentifier.table).map { relation =>
109109
val withAlias = u.alias.map(Subquery(_, relation))
110110
withAlias.getOrElse(relation)
111111
}
@@ -257,7 +257,7 @@ class Analyzer(
257257
catalog.lookupRelation(u.tableIdentifier, u.alias)
258258
} catch {
259259
case _: NoSuchTableException =>
260-
u.failAnalysis(s"no such table ${u.tableName}")
260+
u.failAnalysis(s"Table Not Found: ${u.tableName}")
261261
}
262262
}
263263

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala

Lines changed: 61 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,9 @@ trait Catalog {
4242

4343
val conf: CatalystConf
4444

45-
def tableExists(tableIdentifier: Seq[String]): Boolean
45+
def tableExists(tableIdent: TableIdentifier): Boolean
4646

47-
def lookupRelation(
48-
tableIdentifier: Seq[String],
49-
alias: Option[String] = None): LogicalPlan
47+
def lookupRelation(tableIdent: TableIdentifier, alias: Option[String] = None): LogicalPlan
5048

5149
/**
5250
* Returns tuples of (tableName, isTemporary) for all tables in the given database.
@@ -56,101 +54,67 @@ trait Catalog {
5654

5755
def refreshTable(tableIdent: TableIdentifier): Unit
5856

59-
// TODO: Refactor it in the work of SPARK-10104
60-
def registerTable(tableIdentifier: Seq[String], plan: LogicalPlan): Unit
57+
def registerTable(tableIdent: TableIdentifier, plan: LogicalPlan): Unit
6158

62-
// TODO: Refactor it in the work of SPARK-10104
63-
def unregisterTable(tableIdentifier: Seq[String]): Unit
59+
def unregisterTable(tableIdent: TableIdentifier): Unit
6460

6561
def unregisterAllTables(): Unit
6662

67-
// TODO: Refactor it in the work of SPARK-10104
68-
protected def processTableIdentifier(tableIdentifier: Seq[String]): Seq[String] = {
69-
if (conf.caseSensitiveAnalysis) {
70-
tableIdentifier
71-
} else {
72-
tableIdentifier.map(_.toLowerCase)
73-
}
74-
}
75-
76-
// TODO: Refactor it in the work of SPARK-10104
77-
protected def getDbTableName(tableIdent: Seq[String]): String = {
78-
val size = tableIdent.size
79-
if (size <= 2) {
80-
tableIdent.mkString(".")
81-
} else {
82-
tableIdent.slice(size - 2, size).mkString(".")
83-
}
84-
}
85-
86-
// TODO: Refactor it in the work of SPARK-10104
87-
protected def getDBTable(tableIdent: Seq[String]) : (Option[String], String) = {
88-
(tableIdent.lift(tableIdent.size - 2), tableIdent.last)
89-
}
90-
9163
/**
92-
* It is not allowed to specifiy database name for tables stored in [[SimpleCatalog]].
93-
* We use this method to check it.
64+
* Get the table name of TableIdentifier for temporary tables.
9465
*/
95-
protected def checkTableIdentifier(tableIdentifier: Seq[String]): Unit = {
96-
if (tableIdentifier.length > 1) {
66+
protected def getTableName(tableIdent: TableIdentifier): String = {
67+
// It is not allowed to specify database name for temporary tables.
68+
// We check it here and throw exception if database is defined.
69+
if (tableIdent.database.isDefined) {
9770
throw new AnalysisException("Specifying database name or other qualifiers are not allowed " +
9871
"for temporary tables. If the table name has dots (.) in it, please quote the " +
9972
"table name with backticks (`).")
10073
}
74+
if (conf.caseSensitiveAnalysis) {
75+
tableIdent.table
76+
} else {
77+
tableIdent.table.toLowerCase
78+
}
10179
}
10280
}
10381

10482
class SimpleCatalog(val conf: CatalystConf) extends Catalog {
105-
val tables = new ConcurrentHashMap[String, LogicalPlan]
106-
107-
override def registerTable(
108-
tableIdentifier: Seq[String],
109-
plan: LogicalPlan): Unit = {
110-
checkTableIdentifier(tableIdentifier)
111-
val tableIdent = processTableIdentifier(tableIdentifier)
112-
tables.put(getDbTableName(tableIdent), plan)
83+
private[this] val tables = new ConcurrentHashMap[String, LogicalPlan]
84+
85+
override def registerTable(tableIdent: TableIdentifier, plan: LogicalPlan): Unit = {
86+
tables.put(getTableName(tableIdent), plan)
11387
}
11488

115-
override def unregisterTable(tableIdentifier: Seq[String]): Unit = {
116-
checkTableIdentifier(tableIdentifier)
117-
val tableIdent = processTableIdentifier(tableIdentifier)
118-
tables.remove(getDbTableName(tableIdent))
89+
override def unregisterTable(tableIdent: TableIdentifier): Unit = {
90+
tables.remove(getTableName(tableIdent))
11991
}
12092

12193
override def unregisterAllTables(): Unit = {
12294
tables.clear()
12395
}
12496

125-
override def tableExists(tableIdentifier: Seq[String]): Boolean = {
126-
checkTableIdentifier(tableIdentifier)
127-
val tableIdent = processTableIdentifier(tableIdentifier)
128-
tables.containsKey(getDbTableName(tableIdent))
97+
override def tableExists(tableIdent: TableIdentifier): Boolean = {
98+
tables.containsKey(getTableName(tableIdent))
12999
}
130100

131101
override def lookupRelation(
132-
tableIdentifier: Seq[String],
102+
tableIdent: TableIdentifier,
133103
alias: Option[String] = None): LogicalPlan = {
134-
checkTableIdentifier(tableIdentifier)
135-
val tableIdent = processTableIdentifier(tableIdentifier)
136-
val tableFullName = getDbTableName(tableIdent)
137-
val table = tables.get(tableFullName)
104+
val tableName = getTableName(tableIdent)
105+
val table = tables.get(tableName)
138106
if (table == null) {
139-
sys.error(s"Table Not Found: $tableFullName")
107+
throw new NoSuchTableException
140108
}
141-
val tableWithQualifiers = Subquery(tableIdent.last, table)
109+
val tableWithQualifiers = Subquery(tableName, table)
142110

143111
// If an alias was specified by the lookup, wrap the plan in a subquery so that attributes are
144112
// properly qualified with this alias.
145113
alias.map(a => Subquery(a, tableWithQualifiers)).getOrElse(tableWithQualifiers)
146114
}
147115

148116
override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = {
149-
val result = ArrayBuffer.empty[(String, Boolean)]
150-
for (name <- tables.keySet().asScala) {
151-
result += ((name, true))
152-
}
153-
result
117+
tables.keySet().asScala.map(_ -> true).toSeq
154118
}
155119

156120
override def refreshTable(tableIdent: TableIdentifier): Unit = {
@@ -165,68 +129,50 @@ class SimpleCatalog(val conf: CatalystConf) extends Catalog {
165129
* lost when the JVM exits.
166130
*/
167131
trait OverrideCatalog extends Catalog {
132+
private[this] val overrides = new ConcurrentHashMap[String, LogicalPlan]
168133

169-
// TODO: This doesn't work when the database changes...
170-
val overrides = new mutable.HashMap[(Option[String], String), LogicalPlan]()
171-
172-
abstract override def tableExists(tableIdentifier: Seq[String]): Boolean = {
173-
val tableIdent = processTableIdentifier(tableIdentifier)
174-
// A temporary tables only has a single part in the tableIdentifier.
175-
val overriddenTable = if (tableIdentifier.length > 1) {
176-
None: Option[LogicalPlan]
134+
private def getOverriddenTable(tableIdent: TableIdentifier): Option[LogicalPlan] = {
135+
if (tableIdent.database.isDefined) {
136+
None
177137
} else {
178-
overrides.get(getDBTable(tableIdent))
138+
Option(overrides.get(getTableName(tableIdent)))
179139
}
180-
overriddenTable match {
140+
}
141+
142+
abstract override def tableExists(tableIdent: TableIdentifier): Boolean = {
143+
getOverriddenTable(tableIdent) match {
181144
case Some(_) => true
182-
case None => super.tableExists(tableIdentifier)
145+
case None => super.tableExists(tableIdent)
183146
}
184147
}
185148

186149
abstract override def lookupRelation(
187-
tableIdentifier: Seq[String],
150+
tableIdent: TableIdentifier,
188151
alias: Option[String] = None): LogicalPlan = {
189-
val tableIdent = processTableIdentifier(tableIdentifier)
190-
// A temporary tables only has a single part in the tableIdentifier.
191-
val overriddenTable = if (tableIdentifier.length > 1) {
192-
None: Option[LogicalPlan]
193-
} else {
194-
overrides.get(getDBTable(tableIdent))
195-
}
196-
val tableWithQualifers = overriddenTable.map(r => Subquery(tableIdent.last, r))
152+
getOverriddenTable(tableIdent) match {
153+
case Some(table) =>
154+
val tableName = getTableName(tableIdent)
155+
val tableWithQualifiers = Subquery(tableName, table)
197156

198-
// If an alias was specified by the lookup, wrap the plan in a subquery so that attributes are
199-
// properly qualified with this alias.
200-
val withAlias =
201-
tableWithQualifers.map(r => alias.map(a => Subquery(a, r)).getOrElse(r))
157+
// If an alias was specified by the lookup, wrap the plan in a sub-query so that attributes
158+
// are properly qualified with this alias.
159+
alias.map(a => Subquery(a, tableWithQualifiers)).getOrElse(tableWithQualifiers)
202160

203-
withAlias.getOrElse(super.lookupRelation(tableIdentifier, alias))
161+
case None => super.lookupRelation(tableIdent, alias)
162+
}
204163
}
205164

206165
abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = {
207-
// We always return all temporary tables.
208-
val temporaryTables = overrides.map {
209-
case ((_, tableName), _) => (tableName, true)
210-
}.toSeq
211-
212-
temporaryTables ++ super.getTables(databaseName)
166+
overrides.keySet().asScala.map(_ -> true).toSeq ++ super.getTables(databaseName)
213167
}
214168

215-
override def registerTable(
216-
tableIdentifier: Seq[String],
217-
plan: LogicalPlan): Unit = {
218-
checkTableIdentifier(tableIdentifier)
219-
val tableIdent = processTableIdentifier(tableIdentifier)
220-
overrides.put(getDBTable(tableIdent), plan)
169+
override def registerTable(tableIdent: TableIdentifier, plan: LogicalPlan): Unit = {
170+
overrides.put(getTableName(tableIdent), plan)
221171
}
222172

223-
override def unregisterTable(tableIdentifier: Seq[String]): Unit = {
224-
// A temporary tables only has a single part in the tableIdentifier.
225-
// If tableIdentifier has more than one parts, it is not a temporary table
226-
// and we do not need to do anything at here.
227-
if (tableIdentifier.length == 1) {
228-
val tableIdent = processTableIdentifier(tableIdentifier)
229-
overrides.remove(getDBTable(tableIdent))
173+
override def unregisterTable(tableIdent: TableIdentifier): Unit = {
174+
if (tableIdent.database.isEmpty) {
175+
overrides.remove(getTableName(tableIdent))
230176
}
231177
}
232178

@@ -243,12 +189,12 @@ object EmptyCatalog extends Catalog {
243189

244190
override val conf: CatalystConf = EmptyConf
245191

246-
override def tableExists(tableIdentifier: Seq[String]): Boolean = {
192+
override def tableExists(tableIdent: TableIdentifier): Boolean = {
247193
throw new UnsupportedOperationException
248194
}
249195

250196
override def lookupRelation(
251-
tableIdentifier: Seq[String],
197+
tableIdent: TableIdentifier,
252198
alias: Option[String] = None): LogicalPlan = {
253199
throw new UnsupportedOperationException
254200
}
@@ -257,15 +203,17 @@ object EmptyCatalog extends Catalog {
257203
throw new UnsupportedOperationException
258204
}
259205

260-
override def registerTable(tableIdentifier: Seq[String], plan: LogicalPlan): Unit = {
206+
override def registerTable(tableIdent: TableIdentifier, plan: LogicalPlan): Unit = {
261207
throw new UnsupportedOperationException
262208
}
263209

264-
override def unregisterTable(tableIdentifier: Seq[String]): Unit = {
210+
override def unregisterTable(tableIdent: TableIdentifier): Unit = {
265211
throw new UnsupportedOperationException
266212
}
267213

268-
override def unregisterAllTables(): Unit = {}
214+
override def unregisterAllTables(): Unit = {
215+
throw new UnsupportedOperationException
216+
}
269217

270218
override def refreshTable(tableIdent: TableIdentifier): Unit = {
271219
throw new UnsupportedOperationException

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
1919

2020
import org.apache.spark.sql.AnalysisException
2121
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
22-
import org.apache.spark.sql.catalyst.errors
22+
import org.apache.spark.sql.catalyst.{TableIdentifier, errors}
2323
import org.apache.spark.sql.catalyst.expressions._
2424
import org.apache.spark.sql.catalyst.plans.logical.LeafNode
2525
import org.apache.spark.sql.catalyst.trees.TreeNode
@@ -36,11 +36,11 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str
3636
* Holds the name of a relation that has yet to be looked up in a [[Catalog]].
3737
*/
3838
case class UnresolvedRelation(
39-
tableIdentifier: Seq[String],
39+
tableIdentifier: TableIdentifier,
4040
alias: Option[String] = None) extends LeafNode {
4141

4242
/** Returns a `.` separated name for this relation. */
43-
def tableName: String = tableIdentifier.mkString(".")
43+
def tableName: String = tableIdentifier.unquotedString
4444

4545
override def output: Seq[Attribute] = Nil
4646

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,8 @@ package object dsl {
286286

287287
def insertInto(tableName: String, overwrite: Boolean = false): LogicalPlan =
288288
InsertIntoTable(
289-
analysis.UnresolvedRelation(Seq(tableName)), Map.empty, logicalPlan, overwrite, false)
289+
analysis.UnresolvedRelation(TableIdentifier(tableName)),
290+
Map.empty, logicalPlan, overwrite, false)
290291

291292
def analyze: LogicalPlan = EliminateSubQueries(analysis.SimpleAnalyzer.execute(logicalPlan))
292293
}

0 commit comments

Comments
 (0)