apache · jackylee-ch · May 22, 2020 · May 23, 2020 · May 23, 2020 · Jul 26, 2020
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitions.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Map;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * Catalog methods for working with Partitions.
+ */
+@Experimental
+public interface SupportsPartitions extends TableCatalog {
+
+    /**
+     * Create partitions in an existing table, assuming it exists.
+     *
+     * @param ident a table identifier
+     * @param partitions transforms to use for partitioning data in the table
+     * @param ignoreIfExists
+     */
+    void createPartitions(
+            Identifier ident,
+            TablePartition[] partitions,
+            Boolean ignoreIfExists);
+
+    /**
+     * Drop partitions from a table, assuming they exist.
+     *
+     * @param ident a table identifier
+     * @param partitions a list of string map for existing partitions
+     * @param ignoreIfNotExists
+     */
+    void dropPartitions(
+            Identifier ident,
+            Map<String, String>[] partitions,
+            Boolean ignoreIfNotExists);
+
+    /**
+     * Override the specs of one or many existing table partitions, assuming they exist.
+     *
+     * @param ident a table identifier
+     * @param oldpartitions a list of string map for existing partitions to be renamed
+     * @param newPartitions a list of string map for new partitions
+     */
+    void renamePartitions(
+            Identifier ident,
+            Map<String, String>[] oldpartitions,
+            Map<String, String>[] newPartitions);
+
+    /**
+     * Alter one or many table partitions whose specs that match those specified in `parts`,
+     * assuming the partitions exist.
+     *
+     * @param ident a table identifier
+     * @param partitions transforms to use for partitioning data in the table
+     */
+    void alterPartitions(
+            Identifier ident,
+            TablePartition[] partitions);
+
+    /**
+     * Retrieve the metadata of a table partition, assuming it exists.
+     *
+     * @param ident a table identifier
+     * @param partition a list of string map for existing partitions
+     */
+    TablePartition getPartition(
+            Identifier ident,
+            Map<String, String> partition);
+
+    /**
+     * List the names of all partitions that belong to the specified table, assuming it exists.
+     *
+     * @param ident a table identifier
+     * @param partition a list of string map for existing partitions
+     */
+    String[] listPartitionNames(
+            Identifier ident,
+            Map<String, String> partition);
+
+    /**
+     * List the metadata of all partitions that belong to the specified table, assuming it exists.
+     *
+     * @param ident a table identifier
+     * @param partition a list of string map for existing partitions
+     */
+    TablePartition[] listPartitions(
+            Identifier ident,
+            Map<String, String> partition);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TablePartition.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TablePartition.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class TablePartition {
+    private Map<String, String> partitionSpec;
+    private Map<String, String> parametes;
+
+    public TablePartition(
+            Map<String, String> partitionSpec) {
+        this.partitionSpec = partitionSpec;
+        this.parametes = new HashMap<String, String>();
+    }
+
+    public TablePartition(
+            Map<String, String> partitionSpec,
+            Map<String, String> parametes) {
+        this.partitionSpec = partitionSpec;
+        this.parametes = parametes;
+    }
+
+    public Map<String, String> getPartitionSpec() {
+        return partitionSpec;
+    }
+
+    public void setPartitionSpec(Map<String, String> partitionSpec) {
+        this.partitionSpec = partitionSpec;
+    }
+
+    public Map<String, String> getParametes() {
+        return parametes;
+    }
+
+    public void setParameters(Map<String, String> parametes) {
+        this.parametes = parametes;
+    }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionCatalog.scala
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.{lang, util}
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
+import org.apache.spark.sql.connector.catalog.{Identifier, SupportsPartitions, TablePartition}
+
+
+/**
+ * This class is used to test SupportsPartitions API.
+ */
+class InMemoryPartitionCatalog extends InMemoryTableCatalog with SupportsPartitions {
+
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  protected val memoryTablePartitions: util.Map[Identifier, mutable.HashSet[TablePartition]] =
+    new ConcurrentHashMap[Identifier, mutable.HashSet[TablePartition]]()
+
+  def createPartitions(
+      ident: Identifier,
+      partitions: Array[TablePartition],
+      ignoreIfExists: lang.Boolean = false): Unit = {
+    assert(tableExists(ident))
+    val table = loadTable(ident).asInstanceOf[InMemoryTable]
+    val tableSchema = table.schema.map(_.name)
+    checkPartitionKeysExists(tableSchema, partitions.map(_.getPartitionSpec))
+    val tablePartitions =
+      memoryTablePartitions.getOrDefault(ident, new mutable.HashSet[TablePartition]())
+    partitions.foreach(tablePartitions.add)
+    memoryTablePartitions.put(ident, tablePartitions)
+  }
+
+  def dropPartitions(
+      ident: Identifier,
+      partitions: Array[util.Map[String, String]],
+      ignoreIfNotExists: lang.Boolean = false): Unit = {
+    assert(tableExists(ident))
+    val table = loadTable(ident).asInstanceOf[InMemoryTable]
+    val tableSchema = table.schema.map(_.name)
+    checkPartitionKeysExists(tableSchema, partitions)
+    if (memoryTablePartitions.containsKey(ident)) {
+      val tablePartitions = memoryTablePartitions.get(ident)
+      tablePartitions.filter { tablePartition =>
+        partitions.contains(tablePartition.getPartitionSpec)
+      }.foreach(tablePartitions.remove)
+      memoryTablePartitions.put(ident, tablePartitions)
+    }
+  }
+
+  def renamePartitions(
+      ident: Identifier,
+      oldPartitions: Array[util.Map[String, String]],
+      newPartitions: Array[util.Map[String, String]]): Unit = {
+    assert(tableExists(ident))
+    val table = loadTable(ident).asInstanceOf[InMemoryTable]
+    val tableSchema = table.schema.map(_.name)
+    checkPartitionKeysExists(tableSchema, oldPartitions)
+    checkPartitionKeysExists(tableSchema, newPartitions)
+    if (memoryTablePartitions.containsKey(ident)) {
+      val tablePartitions = memoryTablePartitions.get(ident)
+      for (oldPartition <- oldPartitions;
+          newPartition <- newPartitions) {
+        tablePartitions.filter { tablePartition =>
+          oldPartition == tablePartition.getPartitionSpec
+        }.foreach { tablePartition =>
+          tablePartition.setPartitionSpec(newPartition)
+        }
+      }
+      memoryTablePartitions.put(ident, tablePartitions)
+    }
+  }
+
+  def alterPartitions(
+      ident: Identifier,
+      partitions: Array[TablePartition]): Unit = {
+    assert(tableExists(ident))
+    assert(tableExists(ident))
+    val table = loadTable(ident).asInstanceOf[InMemoryTable]
+    val tableSchema = table.schema.map(_.name)
+    checkPartitionKeysExists(tableSchema, partitions.map(_.getPartitionSpec))
+    if (memoryTablePartitions.containsKey(ident)) {
+      val tablePartitions = memoryTablePartitions.get(ident)
+      partitions.foreach { partition =>
+        tablePartitions.filter(_.getPartitionSpec == partition.getPartitionSpec)
+          .foreach(tablePartitions.remove)
+        tablePartitions.add(partition)
+      }
+    }
+  }
+
+  def getPartition(
+      ident: Identifier,
+      partition: util.Map[String, String]): TablePartition = {
+    assert(tableExists(ident))
+    val table = loadTable(ident).asInstanceOf[InMemoryTable]
+    val tableSchema = table.schema.map(_.name)
+    checkPartitionKeysExists(tableSchema, Array(partition))
+    memoryTablePartitions.getOrDefault(ident, mutable.HashSet.empty[TablePartition])
+      .find(_.getPartitionSpec == partition)
+      .getOrElse {
+        throw new NoSuchPartitionException(
+          ident.namespace().quoted,
+          ident.name(),
+          partition.asScala.toMap)
+      }
+  }
+
+  def listPartitionNames(
+      ident: Identifier,
+      partition: util.Map[String, String] = new util.HashMap()): Array[String] = {
+    assert(tableExists(ident))
+    memoryTablePartitions.getOrDefault(ident, mutable.HashSet.empty[TablePartition])
+      .filter { tablePartition =>
+        partition.asScala.toSet.subsetOf(tablePartition.getPartitionSpec.asScala.toSet)
+      }.map { tablePartition =>
+      tablePartition.getPartitionSpec.asScala.map { kv =>
+        s"${kv._1}=${kv._2}"
+      }.mkString("/")
+    }.toArray
+  }
+
+  def listPartitions(
+      ident: Identifier,
+      partition: util.Map[String, String] = new util.HashMap()): Array[TablePartition] = {
+    assert(tableExists(ident))
+    memoryTablePartitions.getOrDefault(ident, mutable.HashSet.empty[TablePartition])
+      .filter { tablePartition =>
+        partition.asScala.toSet.subsetOf(tablePartition.getPartitionSpec.asScala.toSet)
+      }.toArray
+  }
+
+  private def checkPartitionKeysExists(
+      tableSchema: Seq[String],
+      partitions: Array[util.Map[String, String]]): Unit = {
+    partitions.foreach { partition =>
+      val errorPartitionKeys = partition.keySet().asScala.filterNot(tableSchema.contains)
+      if (errorPartitionKeys.nonEmpty) {
+        throw new IllegalArgumentException(
+          s"Partition Keys not exists, table schema: ${tableSchema.mkString("{", ",", "}")}")
+      }
+    }
+  }
+}