Skip to content

Commit 2476ed0

Browse files
committed
Minor updates.
1 parent ab71f21 commit 2476ed0

16 files changed

Lines changed: 115 additions & 38 deletions

File tree

python/pyspark/sql.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
from py4j.protocol import Py4JError
2222

2323
__all__ = [
24-
"StringType", "BinaryType", "BooleanType", "DecimalType", "DoubleType",
25-
"FloatType", "ByteType", "IntegerType", "LongType", "ShortType",
26-
"ArrayType", "MapType", "StructField", "StructType",
24+
"StringType", "BinaryType", "BooleanType", "TimestampType", "DecimalType",
25+
"DoubleType", "FloatType", "ByteType", "IntegerType", "LongType",
26+
"ShortType", "ArrayType", "MapType", "StructField", "StructType",
2727
"SQLContext", "HiveContext", "LocalHiveContext", "TestHiveContext", "SchemaRDD", "Row"]
2828

2929
class PrimitiveTypeSingleton(type):
@@ -106,7 +106,7 @@ class FloatType(object):
106106
Because query evaluation is done in Scala, java.lang.Double will be be used
107107
for Python float numbers. Because the underlying JVM type of FloatType is
108108
java.lang.Float (in Java) and Float (in scala), there will be a java.lang.ClassCastException
109-
if FloatType (Python) used.
109+
if FloatType (Python) is used.
110110
111111
"""
112112
__metaclass__ = PrimitiveTypeSingleton
@@ -121,7 +121,7 @@ class ByteType(object):
121121
Because query evaluation is done in Scala, java.lang.Integer will be be used
122122
for Python int numbers. Because the underlying JVM type of ByteType is
123123
java.lang.Byte (in Java) and Byte (in scala), there will be a java.lang.ClassCastException
124-
if ByteType (Python) used.
124+
if ByteType (Python) is used.
125125
126126
"""
127127
__metaclass__ = PrimitiveTypeSingleton
@@ -159,7 +159,7 @@ class ShortType(object):
159159
Because query evaluation is done in Scala, java.lang.Integer will be be used
160160
for Python int numbers. Because the underlying JVM type of ShortType is
161161
java.lang.Short (in Java) and Short (in scala), there will be a java.lang.ClassCastException
162-
if ShortType (Python) used.
162+
if ShortType (Python) is used.
163163
164164
"""
165165
__metaclass__ = PrimitiveTypeSingleton
@@ -171,13 +171,16 @@ class ArrayType(object):
171171
"""Spark SQL ArrayType
172172
173173
The data type representing list values.
174+
An ArrayType object comprises two fields, elementType (a DataType) and containsNull (a bool).
175+
The field of elementType is used to specify the type of array elements.
176+
The field of containsNull is used to specify if the array has None values.
174177
175178
"""
176179
def __init__(self, elementType, containsNull=False):
177180
"""Creates an ArrayType
178181
179182
:param elementType: the data type of elements.
180-
:param containsNull: indicates whether the list contains null values.
183+
:param containsNull: indicates whether the list contains None values.
181184
:return:
182185
183186
>>> ArrayType(StringType) == ArrayType(StringType, False)
@@ -205,6 +208,12 @@ class MapType(object):
205208
"""Spark SQL MapType
206209
207210
The data type representing dict values.
211+
A MapType object comprises three fields,
212+
keyType (a DataType), valueType (a DataType) and valueContainsNull (a bool).
213+
The field of keyType is used to specify the type of keys in the map.
214+
The field of valueType is used to specify the type of values in the map.
215+
The field of valueContainsNull is used to specify if values of this map has None values.
216+
For values of a MapType column, keys are not allowed to have None values.
208217
209218
"""
210219
def __init__(self, keyType, valueType, valueContainsNull=True):
@@ -241,6 +250,10 @@ class StructField(object):
241250
"""Spark SQL StructField
242251
243252
Represents a field in a StructType.
253+
A StructField object comprises three fields, name (a string), dataType (a DataType),
254+
and nullable (a bool). The field of name is the name of a StructField. The field of
255+
dataType specifies the data type of a StructField.
256+
The field of nullable specifies if values of a StructField can contain None values.
244257
245258
"""
246259
def __init__(self, name, dataType, nullable):
@@ -276,7 +289,8 @@ def __ne__(self, other):
276289
class StructType(object):
277290
"""Spark SQL StructType
278291
279-
The data type representing tuple values.
292+
The data type representing namedtuple values.
293+
A StructType object comprises a list of L{StructField}s.
280294
281295
"""
282296
def __init__(self, fields):
@@ -308,6 +322,11 @@ def __ne__(self, other):
308322
return not self.__eq__(other)
309323

310324
def _parse_datatype_list(datatype_list_string):
325+
"""Parses a list of comma separated data types.
326+
327+
:param datatype_list_string:
328+
:return:
329+
"""
311330
index = 0
312331
datatype_list = []
313332
start = 0
@@ -331,6 +350,7 @@ def _parse_datatype_list(datatype_list_string):
331350

332351
def _parse_datatype_string(datatype_string):
333352
"""Parses the given data type string.
353+
334354
:param datatype_string:
335355
:return:
336356

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,12 @@ class Analyzer(catalog: Catalog, registry: FunctionRegistry, caseSensitive: Bool
109109
object ResolveReferences extends Rule[LogicalPlan] {
110110
def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
111111
case q: LogicalPlan if q.childrenResolved =>
112-
logTrace(s"Attempting to resolve ${q.simpleString}")
112+
logger.trace(s"Attempting to resolve ${q.simpleString}")
113113
q transformExpressions {
114114
case u @ UnresolvedAttribute(name) =>
115115
// Leave unchanged if resolution fails. Hopefully will be resolved next round.
116116
val result = q.resolve(name).getOrElse(u)
117-
logDebug(s"Resolving $u to $result")
117+
logger.debug(s"Resolving $u to $result")
118118
result
119119
}
120120
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ trait HiveTypeCoercion {
7575
// Leave the same if the dataTypes match.
7676
case Some(newType) if a.dataType == newType.dataType => a
7777
case Some(newType) =>
78-
logDebug(s"Promoting $a to $newType in ${q.simpleString}}")
78+
logger.debug(s"Promoting $a to $newType in ${q.simpleString}}")
7979
newType
8080
}
8181
}
@@ -154,7 +154,7 @@ trait HiveTypeCoercion {
154154
(Alias(Cast(l, StringType), l.name)(), r)
155155

156156
case (l, r) if l.dataType != r.dataType =>
157-
logDebug(s"Resolving mismatched union input ${l.dataType}, ${r.dataType}")
157+
logger.debug(s"Resolving mismatched union input ${l.dataType}, ${r.dataType}")
158158
findTightestCommonType(l.dataType, r.dataType).map { widestType =>
159159
val newLeft =
160160
if (l.dataType == widestType) l else Alias(Cast(l, widestType), l.name)()
@@ -170,15 +170,15 @@ trait HiveTypeCoercion {
170170

171171
val newLeft =
172172
if (castedLeft.map(_.dataType) != left.output.map(_.dataType)) {
173-
logDebug(s"Widening numeric types in union $castedLeft ${left.output}")
173+
logger.debug(s"Widening numeric types in union $castedLeft ${left.output}")
174174
Project(castedLeft, left)
175175
} else {
176176
left
177177
}
178178

179179
val newRight =
180180
if (castedRight.map(_.dataType) != right.output.map(_.dataType)) {
181-
logDebug(s"Widening numeric types in union $castedRight ${right.output}")
181+
logger.debug(s"Widening numeric types in union $castedRight ${right.output}")
182182
Project(castedRight, right)
183183
} else {
184184
right

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions
1919

20-
import org.apache.spark.Logging
20+
import org.apache.spark.sql.catalyst.Logging
2121
import org.apache.spark.sql.catalyst.trees
2222
import org.apache.spark.sql.catalyst.errors.attachTree
2323
import org.apache.spark.sql.catalyst.plans.QueryPlan
@@ -79,7 +79,7 @@ object BindReferences extends Logging {
7979
// produce new attributes that can't be bound. Likely the right thing to do is remove
8080
// this rule and require all operators to explicitly bind to the input schema that
8181
// they specify.
82-
logDebug(s"Couldn't find $a in ${input.mkString("[", ",", "]")}")
82+
logger.debug(s"Couldn't find $a in ${input.mkString("[", ",", "]")}")
8383
a
8484
} else {
8585
BoundReference(ordinal, a)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql
19+
20+
package object catalyst {
21+
protected[catalyst] type Logging = com.typesafe.scalalogging.slf4j.Logging
22+
}
23+

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.catalyst.planning
1919

20-
import org.apache.spark.Logging
20+
import org.apache.spark.sql.catalyst.Logging
2121
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
2222
import org.apache.spark.sql.catalyst.trees.TreeNode
2323

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ package org.apache.spark.sql.catalyst.planning
1919

2020
import scala.annotation.tailrec
2121

22-
import org.apache.spark.Logging
2322
import org.apache.spark.sql.catalyst.expressions._
23+
import org.apache.spark.sql.catalyst.Logging
2424
import org.apache.spark.sql.catalyst.plans._
2525
import org.apache.spark.sql.catalyst.plans.logical._
2626

@@ -113,7 +113,7 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
113113

114114
def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
115115
case join @ Join(left, right, joinType, condition) =>
116-
logDebug(s"Considering join on: $condition")
116+
logger.debug(s"Considering join on: $condition")
117117
// Find equi-join predicates that can be evaluated before the join, and thus can be used
118118
// as join keys.
119119
val (joinPredicates, otherPredicates) =
@@ -131,7 +131,7 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
131131
val rightKeys = joinKeys.map(_._2)
132132

133133
if (joinKeys.nonEmpty) {
134-
logDebug(s"leftKeys:${leftKeys} | rightKeys:${rightKeys}")
134+
logger.debug(s"leftKeys:${leftKeys} | rightKeys:${rightKeys}")
135135
Some((joinType, leftKeys, rightKeys, otherPredicates.reduceOption(And), left, right))
136136
} else {
137137
None

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.catalyst.rules
1919

20-
import org.apache.spark.Logging
20+
import org.apache.spark.sql.catalyst.Logging
2121
import org.apache.spark.sql.catalyst.trees.TreeNode
2222

2323
abstract class Rule[TreeType <: TreeNode[_]] extends Logging {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.catalyst.rules
1919

20-
import org.apache.spark.Logging
20+
import org.apache.spark.sql.catalyst.Logging
2121
import org.apache.spark.sql.catalyst.trees.TreeNode
2222
import org.apache.spark.sql.catalyst.util.sideBySide
2323

@@ -60,7 +60,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
6060
case (plan, rule) =>
6161
val result = rule(plan)
6262
if (!result.fastEquals(plan)) {
63-
logTrace(
63+
logger.trace(
6464
s"""
6565
|=== Applying Rule ${rule.ruleName} ===
6666
|${sideBySide(plan.treeString, result.treeString).mkString("\n")}
@@ -71,25 +71,25 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
7171
}
7272
iteration += 1
7373
if (iteration > batch.strategy.maxIterations) {
74-
logInfo(s"Max iterations ($iteration) reached for batch ${batch.name}")
74+
logger.info(s"Max iterations ($iteration) reached for batch ${batch.name}")
7575
continue = false
7676
}
7777

7878
if (curPlan.fastEquals(lastPlan)) {
79-
logTrace(s"Fixed point reached for batch ${batch.name} after $iteration iterations.")
79+
logger.trace(s"Fixed point reached for batch ${batch.name} after $iteration iterations.")
8080
continue = false
8181
}
8282
lastPlan = curPlan
8383
}
8484

8585
if (!batchStartPlan.fastEquals(curPlan)) {
86-
logDebug(
86+
logger.debug(
8787
s"""
8888
|=== Result of Batch ${batch.name} ===
8989
|${sideBySide(plan.treeString, curPlan.treeString).mkString("\n")}
9090
""".stripMargin)
9191
} else {
92-
logTrace(s"Batch ${batch.name} has no effect.")
92+
logger.trace(s"Batch ${batch.name} has no effect.")
9393
}
9494
}
9595

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,8 @@ case class MapType(
359359
valueContainsNull: Boolean) extends DataType {
360360
private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
361361
builder.append(s"${prefix}-- key: ${keyType.simpleString}\n")
362-
builder.append(s"${prefix}-- value: ${valueType.simpleString}\n")
362+
builder.append(s"${prefix}-- value: ${valueType.simpleString} " +
363+
s"(valueContainsNull = ${valueContainsNull})\n")
363364
DataType.buildFormattedString(keyType, s"$prefix |", builder)
364365
DataType.buildFormattedString(valueType, s"$prefix |", builder)
365366
}

0 commit comments

Comments
 (0)