Skip to content
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions docs/sql-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ Spark SQL and DataFrames support the following data types:
absolute point in time.
- `DateType`: Represents values comprising values of fields year, month and day, without a
time-zone.
* Calendar Interval type
Copy link
Member

@dongjoon-hyun dongjoon-hyun Jul 10, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe, one word, CalendarInterval instead of Calendar Interval?

- `CalendarIntervalType`: The data type representing calendar time intervals. The calendar time interval is stored
internally in two components: number of months and the number of microseconds.
* Complex types
- `ArrayType(elementType, containsNull)`: Represents values comprising a sequence of
elements with the type of `elementType`. `containsNull` is used to indicate if
Expand Down Expand Up @@ -163,6 +166,13 @@ You can access them by doing
DateType
</td>
</tr>
<tr>
<td> <b>CalendarIntervalType</b> </td>
<td> org.apache.spark.sql.types.CalendarInterval </td>
<td>
CalendarIntervalType
</td>
</tr>
<tr>
<td> <b>ArrayType</b> </td>
<td> scala.collection.Seq </td>
Expand Down Expand Up @@ -298,6 +308,13 @@ please use factory methods provided in
DataTypes.DateType
</td>
</tr>
<tr>
<td> <b>CalendarIntervalType</b> </td>
<td> org.apache.spark.sql.types.CalendarInterval </td>
<td>
CalendarIntervalType
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this correct in this context? This section is for DataTypes factory.

</td>
</tr>
<tr>
<td> <b>ArrayType</b> </td>
<td> java.util.List </td>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@

import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.util.ArrayData;
import org.apache.spark.sql.catalyst.util.MapData;
import org.apache.spark.sql.types.CalendarInterval;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.Decimal;
import org.apache.spark.sql.catalyst.util.MapData;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;

public interface SpecializedGetters {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import org.apache.spark.unsafe.array.ByteArrayMethods;
import org.apache.spark.unsafe.bitset.BitSetMethods;
import org.apache.spark.unsafe.hash.Murmur3_x86_32;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;

import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import org.apache.spark.unsafe.array.ByteArrayMethods;
import org.apache.spark.unsafe.bitset.BitSetMethods;
import org.apache.spark.unsafe.hash.Murmur3_x86_32;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;

import static org.apache.spark.sql.types.DataTypes.*;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData;
import org.apache.spark.sql.catalyst.expressions.UnsafeMapData;
import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
import org.apache.spark.sql.types.CalendarInterval;
import org.apache.spark.sql.types.Decimal;
import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.array.ByteArrayMethods;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.spark.unsafe.types;
package org.apache.spark.sql.types;

import java.io.Serializable;
import java.util.Locale;
Expand Down Expand Up @@ -103,8 +103,7 @@ public static CalendarInterval fromCaseInsensitiveString(String s) {
throw new IllegalArgumentException("Interval cannot be null or blank.");
}
String sInLowerCase = s.trim().toLowerCase(Locale.ROOT);
String interval =
sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase;
String interval = sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't include this kind of style change.

CalendarInterval cal = fromString(interval);
if (cal == null) {
throw new IllegalArgumentException("Invalid interval: " + s);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
package org.apache.spark.sql.vectorized;

import org.apache.spark.annotation.Evolving;
import org.apache.spark.sql.types.CalendarInterval;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.Decimal;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.apache.spark.sql.catalyst.util.ArrayData;
import org.apache.spark.sql.catalyst.util.GenericArrayData;
import org.apache.spark.sql.types.*;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
import org.apache.spark.sql.types.*;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
import org.apache.spark.sql.types.*;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, _}
import org.apache.spark.sql.catalyst.expressions.objects._
import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
import org.apache.spark.unsafe.types.UTF8String


/**
Expand Down Expand Up @@ -671,6 +671,8 @@ object ScalaReflection extends ScalaReflection {
Schema(TimestampType, nullable = true)
case t if isSubtype(t, localTypeOf[java.time.LocalDate]) => Schema(DateType, nullable = true)
case t if isSubtype(t, localTypeOf[java.sql.Date]) => Schema(DateType, nullable = true)
case t if isSubtype(t, localTypeOf[CalendarInterval]) =>
Schema(CalendarIntervalType, nullable = true)
case t if isSubtype(t, localTypeOf[BigDecimal]) =>
Schema(DecimalType.SYSTEM_DEFAULT, nullable = true)
case t if isSubtype(t, localTypeOf[java.math.BigDecimal]) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark._
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval


/**
* Helper object for stream joins. See [[StreamingSymmetricHashJoinExec]] in SQL for more details.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.UTF8StringBuilder
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.unsafe.types.UTF8String.{IntWrapper, LongWrapper}

object Cast {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
import org.apache.spark.unsafe.types.UTF8String

/**
* A mutable wrapper that makes two rows appear as a single concatenated row. Designed to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval

case class TimeWindow(
timeColumn: Expression,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval

@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the negated value of `expr`.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.Platform
import org.apache.spark.unsafe.types._
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.{ParentClassLoader, Utils}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import org.apache.spark.unsafe.UTF8StringBuilder
import org.apache.spark.unsafe.array.ByteArrayMethods
import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
import org.apache.spark.unsafe.types.CalendarInterval
import org.apache.spark.util.collection.OpenHashSet

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
import org.apache.spark.unsafe.types.UTF8String

/**
* Common base class for time zone aware expressions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.Platform
import org.apache.spark.unsafe.hash.Murmur3_x86_32
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
import org.apache.spark.unsafe.types.UTF8String

////////////////////////////////////////////////////////////////////////////////////////////////////
// This file defines all the expressions for hashing.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types._

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove empty line.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[error] /home/jenkins/workspace/SparkPullRequestBuilder@2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala:49:0: There should be no empty line separating imports in the same group.
[error] Total time: 19 s, completed Jul 17, 2019 3:29:10 PM

import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.Utils

object Literal {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
import org.apache.spark.unsafe.types.UTF8String

/**
* An extended version of [[InternalRow]] that implements all special getters, toString
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStat
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.random.RandomSampler

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ package org.apache.spark.sql.catalyst.plans.logical
import java.util.concurrent.TimeUnit

import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.types.CalendarInterval
import org.apache.spark.sql.types.MetadataBuilder
import org.apache.spark.unsafe.types.CalendarInterval

object EventTimeWatermark {
/** The [[org.apache.spark.sql.types.Metadata]] key used to hold the eventTime watermark delay. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ package org.apache.spark.sql.catalyst.util
import scala.collection.JavaConverters._

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types.{DataType, Decimal}
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
import org.apache.spark.sql.types.{CalendarInterval, DataType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

private object GenericArrayData {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
* limitations under the License.
*/

package org.apache.spark.unsafe.types;
package org.apache.spark.sql.types;

import org.junit.Test;

import static org.junit.Assert.*;
import static org.apache.spark.unsafe.types.CalendarInterval.*;
import static org.apache.spark.sql.types.CalendarInterval.*;

public class CalendarIntervalSuite {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import scala.util.Random
import org.apache.spark.sql.catalyst.CatalystTypeConverters
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval

/**
* Random data generators for Spark SQL DataTypes. These generators do not generate uniformly random
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ case class PrimitiveData(
floatField: Float,
shortField: Short,
byteField: Byte,
booleanField: Boolean)
booleanField: Boolean,
intervalField: CalendarInterval)

case class NullableData(
intField: java.lang.Integer,
Expand All @@ -48,7 +49,8 @@ case class NullableData(
decimalField: java.math.BigDecimal,
dateField: Date,
timestampField: Timestamp,
binaryField: Array[Byte])
binaryField: Array[Byte],
intervalField: CalendarInterval)

case class OptionalData(
intField: Option[Int],
Expand All @@ -58,7 +60,8 @@ case class OptionalData(
shortField: Option[Short],
byteField: Option[Byte],
booleanField: Option[Boolean],
structField: Option[PrimitiveData])
structField: Option[PrimitiveData],
intervalField: Option[CalendarInterval])

case class ComplexData(
arrayField: Seq[Int],
Expand Down Expand Up @@ -174,7 +177,8 @@ class ScalaReflectionSuite extends SparkFunSuite {
StructField("shortField", ShortType, nullable = false),
StructField("byteField", ByteType, nullable = false),
StructField("booleanField", BooleanType, nullable = false))),
nullable = true))
nullable = true),
StructField("intervalField", CalendarIntervalType, nullable = false))
}

test("nullable data") {
Expand All @@ -192,7 +196,8 @@ class ScalaReflectionSuite extends SparkFunSuite {
StructField("decimalField", DecimalType.SYSTEM_DEFAULT, nullable = true),
StructField("dateField", DateType, nullable = true),
StructField("timestampField", TimestampType, nullable = true),
StructField("binaryField", BinaryType, nullable = true))),
StructField("binaryField", BinaryType, nullable = true),
StructField("intervalField", CalendarIntervalType, nullable = true))),
nullable = true))
}

Expand All @@ -207,7 +212,9 @@ class ScalaReflectionSuite extends SparkFunSuite {
StructField("shortField", ShortType, nullable = true),
StructField("byteField", ByteType, nullable = true),
StructField("booleanField", BooleanType, nullable = true),
StructField("structField", schemaFor[PrimitiveData].dataType, nullable = true))),
StructField("structField", schemaFor[PrimitiveData].dataType, nullable = true),
StructField("intervalField", CalendarIntervalType, nullable = true)
)),
nullable = true))
}

Expand Down Expand Up @@ -248,7 +255,8 @@ class ScalaReflectionSuite extends SparkFunSuite {
StructField("floatField", FloatType, nullable = false),
StructField("shortField", ShortType, nullable = false),
StructField("byteField", ByteType, nullable = false),
StructField("booleanField", BooleanType, nullable = false))),
StructField("booleanField", BooleanType, nullable = false),
StructField("intervalField", CalendarIntervalType, nullable = false))),
nullable = true),
StructField(
"nestedArrayField",
Expand Down Expand Up @@ -278,19 +286,20 @@ class ScalaReflectionSuite extends SparkFunSuite {
}

test("convert PrimitiveData to catalyst") {
val data = PrimitiveData(1, 1, 1, 1, 1, 1, true)
val convertedData = InternalRow(1, 1.toLong, 1.toDouble, 1.toFloat, 1.toShort, 1.toByte, true)
val data = PrimitiveData(1, 1, 1, 1, 1, 1, true, new CalendarInterval(2, 0))
val convertedData = InternalRow(1, 1.toLong, 1.toDouble, 1.toFloat, 1.toShort, 1.toByte, true,
new CalendarInterval(2, 0))
val dataType = schemaFor[PrimitiveData].dataType
assert(CatalystTypeConverters.createToCatalystConverter(dataType)(data) === convertedData)
}

test("convert Option[Product] to catalyst") {
val primitiveData = PrimitiveData(1, 1, 1, 1, 1, 1, true)
val primitiveData = PrimitiveData(1, 1, 1, 1, 1, 1, true, new CalendarInterval(2, 0))
val data = OptionalData(Some(2), Some(2), Some(2), Some(2), Some(2), Some(2), Some(true),
Some(primitiveData))
Some(primitiveData), Some(new CalendarInterval(2, 0)))
val dataType = schemaFor[OptionalData].dataType
val convertedData = InternalRow(2, 2.toLong, 2.toDouble, 2.toFloat, 2.toShort, 2.toByte, true,
InternalRow(1, 1, 1, 1, 1, 1, true))
InternalRow(1, 1, 1, 1, 1, 1, true), new CalendarInterval(2, 0))
assert(CatalystTypeConverters.createToCatalystConverter(dataType)(data) === convertedData)
}

Expand Down
Loading