From b9616900e04ee849ed5aedcb7c8b8e465e2b17b9 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 19 Apr 2019 09:59:58 +0800 Subject: [PATCH 01/12] move data source v2 to catalyst module --- .../spark/sql/sources/v2/SessionConfigSupport.java | 0 .../apache/spark/sql/sources/v2/SupportsRead.java | 0 .../apache/spark/sql/sources/v2/SupportsWrite.java | 0 .../apache/spark/sql/sources/v2/TableProvider.java | 0 .../apache/spark/sql/sources/v2/reader/Batch.java | 0 .../spark/sql/sources/v2/reader/InputPartition.java | 0 .../sql/sources/v2/reader/PartitionReader.java | 0 .../sources/v2/reader/PartitionReaderFactory.java | 0 .../apache/spark/sql/sources/v2/reader/Scan.java | 0 .../spark/sql/sources/v2/reader/ScanBuilder.java | 0 .../spark/sql/sources/v2/reader/Statistics.java | 0 .../sources/v2/reader/SupportsPushDownFilters.java | 0 .../v2/reader/SupportsPushDownRequiredColumns.java | 0 .../v2/reader/SupportsReportPartitioning.java | 0 .../sources/v2/reader/SupportsReportStatistics.java | 0 .../reader/partitioning/ClusteredDistribution.java | 0 .../v2/reader/partitioning/Distribution.java | 0 .../v2/reader/partitioning/Partitioning.java | 0 .../reader/streaming/ContinuousPartitionReader.java | 0 .../streaming/ContinuousPartitionReaderFactory.java | 0 .../v2/reader/streaming/ContinuousStream.java | 0 .../v2/reader/streaming/MicroBatchStream.java | 0 .../sql/sources/v2/reader/streaming/Offset.java | 0 .../v2/reader/streaming/PartitionOffset.java | 0 .../v2/reader/streaming/SparkDataStream.java | 0 .../spark/sql/sources/v2/writer/BatchWrite.java | 0 .../spark/sql/sources/v2/writer/DataWriter.java | 0 .../sql/sources/v2/writer/DataWriterFactory.java | 0 .../sources/v2/writer/SupportsDynamicOverwrite.java | 0 .../sql/sources/v2/writer/SupportsOverwrite.java | 0 .../sql/sources/v2/writer/SupportsSaveMode.java} | 13 ++++++------- .../sql/sources/v2/writer/SupportsTruncate.java | 0 .../spark/sql/sources/v2/writer/WriteBuilder.java | 0 .../sql/sources/v2/writer/WriterCommitMessage.java | 0 .../streaming/StreamingDataWriterFactory.java | 0 .../sources/v2/writer/streaming/StreamingWrite.java | 0 .../spark/sql/vectorized/ArrowColumnVector.java | 0 .../apache/spark/sql/vectorized/ColumnVector.java | 0 .../apache/spark/sql/vectorized/ColumnarArray.java | 0 .../apache/spark/sql/vectorized/ColumnarBatch.java | 0 .../apache/spark/sql/vectorized/ColumnarMap.java | 0 .../apache/spark/sql/vectorized/ColumnarRow.java | 0 42 files changed, 6 insertions(+), 7 deletions(-) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReader.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReaderFactory.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/ClusteredDistribution.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Distribution.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReader.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReaderFactory.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/PartitionOffset.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java (100%) rename sql/catalyst/src/main/{scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala => java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java} (68%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/vectorized/ColumnarMap.java (100%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java (100%) diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReader.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReader.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReader.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReader.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReaderFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReaderFactory.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReaderFactory.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReaderFactory.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/ClusteredDistribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/ClusteredDistribution.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/ClusteredDistribution.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/ClusteredDistribution.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Distribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Distribution.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Distribution.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Distribution.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReader.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReader.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReader.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReader.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReaderFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReaderFactory.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReaderFactory.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReaderFactory.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/PartitionOffset.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/PartitionOffset.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/PartitionOffset.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/PartitionOffset.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java similarity index 68% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java index 56b8d84441c95..c4295f2371877 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java @@ -15,13 +15,12 @@ * limitations under the License. */ -package org.apache.spark.sql.catalyst.analysis +package org.apache.spark.sql.sources.v2.writer; -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.SaveMode; -trait NamedRelation extends LogicalPlan { - def name: String - - // When false, the schema of input data must match the schema of this relation, during write. - def skipSchemaResolution: Boolean = false +// A temporary mixin trait for `WriteBuilder` to support `SaveMode`. Will be removed before +// Spark 3.0 when all the new write operators are finished. See SPARK-26356 for more details. +public interface SupportsSaveMode extends WriteBuilder { + WriteBuilder mode(SaveMode mode); } diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarMap.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarMap.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarMap.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarMap.java diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java similarity index 100% rename from sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java From 4d56a2b497c2eada25a897e2d402e6ce693415c6 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 22 Apr 2019 12:15:35 +0800 Subject: [PATCH 02/12] move DataSourceRegister and Filter --- .../sql/sources/DataSourceRegister.scala | 45 +++++++++++++++++++ .../apache/spark/sql/sources/filters.scala | 0 .../apache/spark/sql/sources/interfaces.scala | 25 ----------- 3 files changed, 45 insertions(+), 25 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/sources/DataSourceRegister.scala rename sql/{core => catalyst}/src/main/scala/org/apache/spark/sql/sources/filters.scala (100%) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/DataSourceRegister.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/DataSourceRegister.scala new file mode 100644 index 0000000000000..551cae28199be --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/DataSourceRegister.scala @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.sources + +import org.apache.spark.annotation.Stable + +/** + * Data sources should implement this trait so that they can register an alias to their data source. + * This allows users to give the data source alias as the format type over the fully qualified + * class name. + * + * A new instance of this class will be instantiated each time a DDL call is made. + * + * @since 1.5.0 + */ +@Stable +trait DataSourceRegister { + + /** + * The string that represents the format that this data source provider uses. This is + * overridden by children to provide a nice alias for the data source. For example: + * + * {{{ + * override def shortName(): String = "parquet" + * }}} + * + * @since 1.5.0 + */ + def shortName(): String +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala similarity index 100% rename from sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index 6ad054c9f6403..3a058c9159dfd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -26,31 +26,6 @@ import org.apache.spark.sql.execution.streaming.{Sink, Source} import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.StructType -/** - * Data sources should implement this trait so that they can register an alias to their data source. - * This allows users to give the data source alias as the format type over the fully qualified - * class name. - * - * A new instance of this class will be instantiated each time a DDL call is made. - * - * @since 1.5.0 - */ -@Stable -trait DataSourceRegister { - - /** - * The string that represents the format that this data source provider uses. This is - * overridden by children to provide a nice alias for the data source. For example: - * - * {{{ - * override def shortName(): String = "parquet" - * }}} - * - * @since 1.5.0 - */ - def shortName(): String -} - /** * Implemented by objects that produce relations for a specific kind of data source. When * Spark SQL is given a DDL operation with a USING clause specified (to specify the implemented From f9b13fb4839d99a18b0b1c6d05c21750bdfdddf3 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 May 2019 15:12:48 +0800 Subject: [PATCH 03/12] update --- .../spark/sql/sources/v2/TableProvider.java | 8 +--- .../sql/sources/DataSourceRegister.scala | 45 ------------------- .../apache/spark/sql/sources/interfaces.scala | 25 +++++++++++ 3 files changed, 26 insertions(+), 52 deletions(-) delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/sources/DataSourceRegister.scala diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java index 0e2eb9c3cabb7..404d792f103f9 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java @@ -56,13 +56,7 @@ public interface TableProvider { * @throws UnsupportedOperationException */ default Table getTable(CaseInsensitiveStringMap options, StructType schema) { - String name; - if (this instanceof DataSourceRegister) { - name = ((DataSourceRegister) this).shortName(); - } else { - name = this.getClass().getName(); - } throw new UnsupportedOperationException( - name + " source does not support user-specified schema"); + this.getClass().getName() + " source does not support user-specified schema"); } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/DataSourceRegister.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/DataSourceRegister.scala deleted file mode 100644 index 551cae28199be..0000000000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/DataSourceRegister.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.sources - -import org.apache.spark.annotation.Stable - -/** - * Data sources should implement this trait so that they can register an alias to their data source. - * This allows users to give the data source alias as the format type over the fully qualified - * class name. - * - * A new instance of this class will be instantiated each time a DDL call is made. - * - * @since 1.5.0 - */ -@Stable -trait DataSourceRegister { - - /** - * The string that represents the format that this data source provider uses. This is - * overridden by children to provide a nice alias for the data source. For example: - * - * {{{ - * override def shortName(): String = "parquet" - * }}} - * - * @since 1.5.0 - */ - def shortName(): String -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index 3a058c9159dfd..6ad054c9f6403 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -26,6 +26,31 @@ import org.apache.spark.sql.execution.streaming.{Sink, Source} import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.StructType +/** + * Data sources should implement this trait so that they can register an alias to their data source. + * This allows users to give the data source alias as the format type over the fully qualified + * class name. + * + * A new instance of this class will be instantiated each time a DDL call is made. + * + * @since 1.5.0 + */ +@Stable +trait DataSourceRegister { + + /** + * The string that represents the format that this data source provider uses. This is + * overridden by children to provide a nice alias for the data source. For example: + * + * {{{ + * override def shortName(): String = "parquet" + * }}} + * + * @since 1.5.0 + */ + def shortName(): String +} + /** * Implemented by objects that produce relations for a specific kind of data source. When * Spark SQL is given a DDL operation with a USING clause specified (to specify the implemented From b8f1316ce2eb00c6d040e61e96f1a7302bf44544 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 May 2019 15:17:13 +0800 Subject: [PATCH 04/12] fix a mistake --- .../sql/catalyst/analysis/NamedRelation.scala | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala new file mode 100644 index 0000000000000..56b8d84441c95 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NamedRelation.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan + +trait NamedRelation extends LogicalPlan { + def name: String + + // When false, the schema of input data must match the schema of this relation, during write. + def skipSchemaResolution: Boolean = false +} From ab4f40cc2df91440dc2a2a898cdc0688dea26ba9 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 May 2019 15:18:39 +0800 Subject: [PATCH 05/12] another mistake --- .../sources/v2/writer/SupportsSaveMode.java | 26 ------------------- 1 file changed, 26 deletions(-) delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java deleted file mode 100644 index c4295f2371877..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsSaveMode.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.sources.v2.writer; - -import org.apache.spark.sql.SaveMode; - -// A temporary mixin trait for `WriteBuilder` to support `SaveMode`. Will be removed before -// Spark 3.0 when all the new write operators are finished. See SPARK-26356 for more details. -public interface SupportsSaveMode extends WriteBuilder { - WriteBuilder mode(SaveMode mode); -} From c9e00bc8e538e1c34cb8161f8bb87060047f182a Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 May 2019 15:36:25 +0800 Subject: [PATCH 06/12] move ArrowColumnVector to internal package --- .../org/apache/spark/sql/execution}/ArrowColumnVector.java | 5 ++++- .../apache/spark/sql/execution/arrow/ArrowConverters.scala | 3 ++- .../spark/sql/execution/python/ArrowPythonRunner.scala | 3 ++- .../sql/execution/python/FlatMapGroupsInPandasExec.scala | 4 ++-- .../org/apache/spark/sql/execution/r/ArrowRRunner.scala | 3 ++- .../apache/spark/sql/execution/arrow/ArrowWriterSuite.scala | 2 +- .../sql/execution/vectorized/ArrowColumnVectorSuite.scala | 2 +- .../spark/sql/execution/vectorized/ColumnarBatchSuite.scala | 3 ++- 8 files changed, 16 insertions(+), 9 deletions(-) rename sql/{catalyst/src/main/java/org/apache/spark/sql/vectorized => core/src/main/java/org/apache/spark/sql/execution}/ArrowColumnVector.java (98%) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/ArrowColumnVector.java similarity index 98% rename from sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java rename to sql/core/src/main/java/org/apache/spark/sql/execution/ArrowColumnVector.java index 906e9bc26ef53..04f3b83a50a50 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/ArrowColumnVector.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.vectorized; +package org.apache.spark.sql.execution; import io.netty.buffer.ArrowBuf; import org.apache.arrow.vector.*; @@ -25,6 +25,9 @@ import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.execution.arrow.ArrowUtils; import org.apache.spark.sql.types.*; +import org.apache.spark.sql.vectorized.ColumnVector; +import org.apache.spark.sql.vectorized.ColumnarArray; +import org.apache.spark.sql.vectorized.ColumnarMap; import org.apache.spark.unsafe.types.UTF8String; /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala index 884dc8c6215ff..a282ceca3079d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala @@ -33,8 +33,9 @@ import org.apache.spark.api.java.JavaRDD import org.apache.spark.network.util.JavaUtils import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.execution.ArrowColumnVector import org.apache.spark.sql.types._ -import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector} +import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector} import org.apache.spark.util.{ByteBufferOutputStream, Utils} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala index 04623b1ab3c2f..1ab303802fe25 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala @@ -29,9 +29,10 @@ import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter} import org.apache.spark._ import org.apache.spark.api.python._ import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.execution.ArrowColumnVector import org.apache.spark.sql.execution.arrow.{ArrowUtils, ArrowWriter} import org.apache.spark.sql.types._ -import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector} +import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector} import org.apache.spark.util.Utils /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala index ce755ffb7c9fd..ac112107f4199 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala @@ -26,10 +26,10 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning} -import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan, UnaryExecNode} +import org.apache.spark.sql.execution.{ArrowColumnVector, GroupedIterator, SparkPlan, UnaryExecNode} import org.apache.spark.sql.execution.arrow.ArrowUtils import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector} +import org.apache.spark.sql.vectorized.ColumnarBatch /** * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsInPandas]] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala index a94cb0befba7d..3752753f0f298 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala @@ -31,9 +31,10 @@ import org.apache.spark.api.r._ import org.apache.spark.api.r.SpecialLengths import org.apache.spark.broadcast.Broadcast import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.execution.ArrowColumnVector import org.apache.spark.sql.execution.arrow.{ArrowUtils, ArrowWriter} import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector} +import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector} import org.apache.spark.util.Utils diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala index 92506032ab2e5..e692244eb082a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala @@ -20,8 +20,8 @@ package org.apache.spark.sql.execution.arrow import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.ArrayData +import org.apache.spark.sql.execution.ArrowColumnVector import org.apache.spark.sql.types._ -import org.apache.spark.sql.vectorized.ArrowColumnVector import org.apache.spark.unsafe.types.UTF8String class ArrowWriterSuite extends SparkFunSuite { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala index 4592a1663faed..08d97c7c9c39c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala @@ -21,9 +21,9 @@ import org.apache.arrow.vector._ import org.apache.arrow.vector.complex._ import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.execution.ArrowColumnVector import org.apache.spark.sql.execution.arrow.ArrowUtils import org.apache.spark.sql.types._ -import org.apache.spark.sql.vectorized.ArrowColumnVector import org.apache.spark.unsafe.types.UTF8String class ArrowColumnVectorSuite extends SparkFunSuite { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala index cbfd9d9b4b817..a8bb82e5a68da 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala @@ -31,9 +31,10 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.memory.MemoryMode import org.apache.spark.sql.{RandomDataGenerator, Row} import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.execution.ArrowColumnVector import org.apache.spark.sql.execution.arrow.ArrowUtils import org.apache.spark.sql.types._ -import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector} +import org.apache.spark.sql.vectorized.ColumnarBatch import org.apache.spark.unsafe.Platform import org.apache.spark.unsafe.types.CalendarInterval From 7af4ed12926c86e42504f2fccdda9faf23796812 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 May 2019 16:04:36 +0800 Subject: [PATCH 07/12] fix a mistake --- .../main/java/org/apache/spark/sql/sources/v2/TableProvider.java | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java index 404d792f103f9..89dec19d006c2 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java @@ -18,7 +18,6 @@ package org.apache.spark.sql.sources.v2; import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.sources.DataSourceRegister; import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.util.CaseInsensitiveStringMap; From 21ed7316a960dc078777e4c05fa9dbd6c81b0e0e Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 May 2019 17:29:12 +0800 Subject: [PATCH 08/12] fix mima --- project/MimaExcludes.scala | 8 ++++++++ .../sql/sources/v2/reader/SupportsPushDownFilters.java | 1 + .../main/scala/org/apache/spark/sql/sources/filters.scala | 0 3 files changed, 9 insertions(+) rename sql/{catalyst => core}/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java (96%) rename sql/{catalyst => core}/src/main/scala/org/apache/spark/sql/sources/filters.scala (100%) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 5ccaa38c08675..c2a6356c45b17 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -288,6 +288,14 @@ object MimaExcludes { case _ => true }, + // Data Source V2 vector related API changes + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarBatch"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ArrowColumnVector"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarRow"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarArray"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarMap"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnVector"), + // [SPARK-26216][SQL] Do not use case class as public API (UserDefinedFunction) ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.UserDefinedFunction$"), ProblemFilters.exclude[AbstractClassProblem]("org.apache.spark.sql.expressions.UserDefinedFunction"), diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java similarity index 96% rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java rename to sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java index f10fd884daabe..5d93b424b9723 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java @@ -25,6 +25,7 @@ * push down filters to the data source and reduce the size of the data to be read. */ @Evolving +// TODO: create data source v2 filter API and move this interface to the catalyst module. public interface SupportsPushDownFilters extends ScanBuilder { /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala similarity index 100% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala rename to sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala From a9eac90653437a79c4474f89dbbc6b93a92cf551 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 27 May 2019 23:29:35 +0800 Subject: [PATCH 09/12] fix --- .../spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java | 2 +- .../apache/spark/sql/sources/v2/writer/SupportsOverwrite.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) rename sql/{catalyst => core}/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java (95%) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java index 8058964b662bd..6f86cb5e89c46 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java @@ -25,7 +25,7 @@ * write does not contain data will remain unchanged. *

* This is provided to implement SQL compatible with Hive table operations but is not recommended. - * Instead, use the {@link SupportsOverwrite overwrite by filter API} to explicitly replace data. + * Instead, use the {@code SupportsOverwrite overwrite by filter API} to explicitly replace data. */ public interface SupportsDynamicOverwrite extends WriteBuilder { /** diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java similarity index 95% rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java rename to sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java index b443b3c3aeb4a..9172e86f93e56 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java +++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java @@ -26,6 +26,7 @@ * Overwriting data by filter will delete any data that matches the filter and replace it with data * that is committed in the write. */ +// TODO: create data source v2 filter API and move this interface to the catalyst module. public interface SupportsOverwrite extends WriteBuilder, SupportsTruncate { /** * Configures a write to replace data matching the filters with data committed in the write. From 9567cc7905509e10ad24964434cca8958213d09f Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 28 May 2019 14:49:27 +0800 Subject: [PATCH 10/12] fix test --- .../java/org/apache/spark/sql/sources/v2/TableProvider.java | 2 +- .../execution/streaming/sources/RateStreamProviderSuite.scala | 2 +- .../sql/execution/streaming/sources/TextSocketStreamSuite.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java index 89dec19d006c2..1d37ff042bd33 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java @@ -56,6 +56,6 @@ public interface TableProvider { */ default Table getTable(CaseInsensitiveStringMap options, StructType schema) { throw new UnsupportedOperationException( - this.getClass().getName() + " source does not support user-specified schema"); + this.getClass().getSimpleName() + " source does not support user-specified schema"); } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala index 883201b5fb473..ef88598fcb11b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala @@ -305,7 +305,7 @@ class RateStreamProviderSuite extends StreamTest { .load() } assert(exception.getMessage.contains( - "rate source does not support user-specified schema")) + "RateStreamProvider source does not support user-specified schema")) } test("continuous data") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala index fd3c31fbbacf8..2b7df40abcb64 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala @@ -204,7 +204,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSQLContext with Before provider.getTable(new CaseInsensitiveStringMap(params.asJava), userSpecifiedSchema) } assert(exception.getMessage.contains( - "socket source does not support user-specified schema")) + "TextSocketSourceProvider source does not support user-specified schema")) } test("input row metrics") { From c1b59326ce16980542008d45f1c70864e1d2ea55 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 31 May 2019 22:09:09 +0800 Subject: [PATCH 11/12] move --- sql/catalyst/pom.xml | 4 ++++ .../sql/sources/v2/reader/SupportsPushDownFilters.java | 1 - .../sql/sources/v2/writer/SupportsDynamicOverwrite.java | 2 +- .../spark/sql/sources/v2/writer/SupportsOverwrite.java | 1 - .../apache/spark/sql/vectorized}/ArrowColumnVector.java | 7 ++----- .../main/scala/org/apache/spark/sql/sources/filters.scala | 0 .../main/scala/org/apache/spark/sql/util}/ArrowUtils.scala | 2 +- .../scala/org/apache/spark/sql/util}/ArrowUtilsSuite.scala | 4 ++-- sql/core/pom.xml | 4 ---- .../apache/spark/sql/execution/arrow/ArrowConverters.scala | 4 ++-- .../org/apache/spark/sql/execution/arrow/ArrowWriter.scala | 1 + .../spark/sql/execution/python/AggregateInPandasExec.scala | 2 +- .../spark/sql/execution/python/ArrowEvalPythonExec.scala | 2 +- .../spark/sql/execution/python/ArrowPythonRunner.scala | 6 +++--- .../sql/execution/python/FlatMapGroupsInPandasExec.scala | 6 +++--- .../spark/sql/execution/python/WindowInPandasExec.scala | 2 +- .../org/apache/spark/sql/execution/r/ArrowRRunner.scala | 6 +++--- .../spark/sql/execution/arrow/ArrowConvertersSuite.scala | 1 + .../spark/sql/execution/arrow/ArrowWriterSuite.scala | 2 +- .../sql/execution/vectorized/ArrowColumnVectorSuite.scala | 4 ++-- .../sql/execution/vectorized/ColumnarBatchSuite.scala | 5 ++--- 21 files changed, 31 insertions(+), 35 deletions(-) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java (96%) rename sql/{core => catalyst}/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java (95%) rename sql/{core/src/main/java/org/apache/spark/sql/execution => catalyst/src/main/java/org/apache/spark/sql/vectorized}/ArrowColumnVector.java (97%) rename sql/{core => catalyst}/src/main/scala/org/apache/spark/sql/sources/filters.scala (100%) rename sql/{core/src/main/scala/org/apache/spark/sql/execution/arrow => catalyst/src/main/scala/org/apache/spark/sql/util}/ArrowUtils.scala (99%) rename sql/{core/src/test/scala/org/apache/spark/sql/execution/arrow => catalyst/src/test/scala/org/apache/spark/sql/util}/ArrowUtilsSuite.scala (96%) diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 3a4020bcea7f8..79d9b4951b151 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -114,6 +114,10 @@ 2.7.3 jar + + org.apache.arrow + arrow-vector + target/scala-${scala.binary.version}/classes diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java similarity index 96% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java index 5d93b424b9723..f10fd884daabe 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java @@ -25,7 +25,6 @@ * push down filters to the data source and reduce the size of the data to be read. */ @Evolving -// TODO: create data source v2 filter API and move this interface to the catalyst module. public interface SupportsPushDownFilters extends ScanBuilder { /** diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java index 6f86cb5e89c46..8058964b662bd 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java @@ -25,7 +25,7 @@ * write does not contain data will remain unchanged. *

* This is provided to implement SQL compatible with Hive table operations but is not recommended. - * Instead, use the {@code SupportsOverwrite overwrite by filter API} to explicitly replace data. + * Instead, use the {@link SupportsOverwrite overwrite by filter API} to explicitly replace data. */ public interface SupportsDynamicOverwrite extends WriteBuilder { /** diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java similarity index 95% rename from sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java index 9172e86f93e56..b443b3c3aeb4a 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java @@ -26,7 +26,6 @@ * Overwriting data by filter will delete any data that matches the filter and replace it with data * that is committed in the write. */ -// TODO: create data source v2 filter API and move this interface to the catalyst module. public interface SupportsOverwrite extends WriteBuilder, SupportsTruncate { /** * Configures a write to replace data matching the filters with data committed in the write. diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/ArrowColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java similarity index 97% rename from sql/core/src/main/java/org/apache/spark/sql/execution/ArrowColumnVector.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java index 04f3b83a50a50..07d17ee14ce23 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/ArrowColumnVector.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.execution; +package org.apache.spark.sql.vectorized; import io.netty.buffer.ArrowBuf; import org.apache.arrow.vector.*; @@ -23,11 +23,8 @@ import org.apache.arrow.vector.holders.NullableVarCharHolder; import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.execution.arrow.ArrowUtils; +import org.apache.spark.sql.util.ArrowUtils; import org.apache.spark.sql.types.*; -import org.apache.spark.sql.vectorized.ColumnVector; -import org.apache.spark.sql.vectorized.ColumnarArray; -import org.apache.spark.sql.vectorized.ColumnarMap; import org.apache.spark.unsafe.types.UTF8String; /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala similarity index 100% rename from sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala similarity index 99% rename from sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowUtils.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala index 7de6256aef084..62546a322d3c9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.execution.arrow +package org.apache.spark.sql.util import scala.collection.JavaConverters._ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala similarity index 96% rename from sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowUtilsSuite.scala rename to sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala index d801f62b62323..4439a7bb3ae87 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package org.apache.spark.sql.execution.arrow +package org.apache.spark.sql.util -import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema} +import org.apache.arrow.vector.types.pojo.ArrowType import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util.DateTimeUtils diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 5ddfb02f0de34..02ed6f8adaa62 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -112,10 +112,6 @@ com.fasterxml.jackson.core jackson-databind - - org.apache.arrow - arrow-vector - org.apache.xbean xbean-asm7-shaded diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala index a282ceca3079d..1a6f4acb63521 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala @@ -33,9 +33,9 @@ import org.apache.spark.api.java.JavaRDD import org.apache.spark.network.util.JavaUtils import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.execution.ArrowColumnVector import org.apache.spark.sql.types._ -import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector} +import org.apache.spark.sql.util.ArrowUtils +import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector} import org.apache.spark.util.{ByteBufferOutputStream, Utils} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala index 8dd484af6e908..6147d6fefd52a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala @@ -25,6 +25,7 @@ import org.apache.arrow.vector.complex._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.SpecializedGetters import org.apache.spark.sql.types._ +import org.apache.spark.sql.util.ArrowUtils object ArrowWriter { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala index 2ab7240556aaa..0c78cca086ed3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala @@ -28,8 +28,8 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning} import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan, UnaryExecNode} -import org.apache.spark.sql.execution.arrow.ArrowUtils import org.apache.spark.sql.types.{DataType, StructField, StructType} +import org.apache.spark.sql.util.ArrowUtils import org.apache.spark.util.Utils /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala index 61b167f50fd64..5ca2f2217c483 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala @@ -25,8 +25,8 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.execution.arrow.ArrowUtils import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.ArrowUtils /** * Grouped a iterator into batches. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala index 1ab303802fe25..3710218b2af5f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala @@ -29,10 +29,10 @@ import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter} import org.apache.spark._ import org.apache.spark.api.python._ import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.execution.ArrowColumnVector -import org.apache.spark.sql.execution.arrow.{ArrowUtils, ArrowWriter} +import org.apache.spark.sql.execution.arrow.ArrowWriter import org.apache.spark.sql.types._ -import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector} +import org.apache.spark.sql.util.ArrowUtils +import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector} import org.apache.spark.util.Utils /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala index ac112107f4199..7b0e014f9ca48 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala @@ -26,10 +26,10 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning} -import org.apache.spark.sql.execution.{ArrowColumnVector, GroupedIterator, SparkPlan, UnaryExecNode} -import org.apache.spark.sql.execution.arrow.ArrowUtils +import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan, UnaryExecNode} import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.vectorized.ColumnarBatch +import org.apache.spark.sql.util.ArrowUtils +import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch} /** * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsInPandas]] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala index 1ce1215bfdd62..01ce07b133ffd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala @@ -29,9 +29,9 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning} import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan} -import org.apache.spark.sql.execution.arrow.ArrowUtils import org.apache.spark.sql.execution.window._ import org.apache.spark.sql.types._ +import org.apache.spark.sql.util.ArrowUtils import org.apache.spark.util.Utils /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala index 3752753f0f298..0fe2b628fa38b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala @@ -31,10 +31,10 @@ import org.apache.spark.api.r._ import org.apache.spark.api.r.SpecialLengths import org.apache.spark.broadcast.Broadcast import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.execution.ArrowColumnVector -import org.apache.spark.sql.execution.arrow.{ArrowUtils, ArrowWriter} +import org.apache.spark.sql.execution.arrow.ArrowWriter import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector} +import org.apache.spark.sql.util.ArrowUtils +import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector} import org.apache.spark.util.Utils diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala index c36872a6a5289..86874b9817c20 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.{BinaryType, Decimal, IntegerType, StructField, StructType} +import org.apache.spark.sql.util.ArrowUtils import org.apache.spark.util.Utils diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala index e692244eb082a..92506032ab2e5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala @@ -20,8 +20,8 @@ package org.apache.spark.sql.execution.arrow import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.ArrayData -import org.apache.spark.sql.execution.ArrowColumnVector import org.apache.spark.sql.types._ +import org.apache.spark.sql.vectorized.ArrowColumnVector import org.apache.spark.unsafe.types.UTF8String class ArrowWriterSuite extends SparkFunSuite { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala index 08d97c7c9c39c..60f1b32a41f05 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala @@ -21,9 +21,9 @@ import org.apache.arrow.vector._ import org.apache.arrow.vector.complex._ import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.execution.ArrowColumnVector -import org.apache.spark.sql.execution.arrow.ArrowUtils import org.apache.spark.sql.types._ +import org.apache.spark.sql.util.ArrowUtils +import org.apache.spark.sql.vectorized.ArrowColumnVector import org.apache.spark.unsafe.types.UTF8String class ArrowColumnVectorSuite extends SparkFunSuite { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala index a8bb82e5a68da..bee20227ce67d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala @@ -31,10 +31,9 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.memory.MemoryMode import org.apache.spark.sql.{RandomDataGenerator, Row} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.execution.ArrowColumnVector -import org.apache.spark.sql.execution.arrow.ArrowUtils import org.apache.spark.sql.types._ -import org.apache.spark.sql.vectorized.ColumnarBatch +import org.apache.spark.sql.util.ArrowUtils +import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch} import org.apache.spark.unsafe.Platform import org.apache.spark.unsafe.types.CalendarInterval From 9220e78dd422f66dfd8ec71331e2eec71bc955dc Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 1 Jun 2019 10:30:04 +0800 Subject: [PATCH 12/12] fix mima --- project/MimaExcludes.scala | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index c2a6356c45b17..db664cf6a920c 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -288,13 +288,44 @@ object MimaExcludes { case _ => true }, - // Data Source V2 vector related API changes + // [SPARK-27521][SQL] Move data source v2 to catalyst module ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarBatch"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ArrowColumnVector"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarRow"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarArray"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnarMap"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.vectorized.ColumnVector"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.GreaterThanOrEqual"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringEndsWith"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LessThanOrEqual$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.In$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Not"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.IsNotNull"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LessThan"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LessThanOrEqual"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.EqualNullSafe$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.GreaterThan$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.In"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.And"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringStartsWith$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.EqualNullSafe"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringEndsWith$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.GreaterThanOrEqual$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Not$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.IsNull$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.LessThan$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.IsNotNull$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Or"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.EqualTo$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.GreaterThan"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringContains"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Filter"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.IsNull"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.EqualTo"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.And$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.Or$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringStartsWith"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.StringContains$"), // [SPARK-26216][SQL] Do not use case class as public API (UserDefinedFunction) ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.UserDefinedFunction$"),