Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,16 @@

package org.apache.spark.sql.connector.catalog;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

import scala.jdk.javaapi.CollectionConverters;

import org.apache.spark.annotation.Evolving;
import org.apache.spark.sql.catalyst.util.StringUtils;
import org.apache.spark.sql.connector.expressions.Transform;
import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
Expand All @@ -26,10 +35,6 @@
import org.apache.spark.sql.errors.QueryExecutionErrors;
import org.apache.spark.sql.types.StructType;

import java.util.Collections;
import java.util.Map;
import java.util.Set;

/**
* Catalog methods for working with Tables.
* <p>
Expand Down Expand Up @@ -97,6 +102,26 @@ public interface TableCatalog extends CatalogPlugin {
*/
Identifier[] listTables(String[] namespace) throws NoSuchNamespaceException;

/**
* List the tables in a namespace from the catalog by pattern string.
* <p>
* If the catalog supports views, this must return identifiers for only tables and not views.
*
* @param namespace a multi-part namespace
* @param pattern the filter pattern, only '*' and '|' are allowed as wildcards, others will
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not related to this PR, but the existing doc is a bit vague. | is not a wildcard, right? And | is also a valid syntax in regex. Can we take a look at other databases and see how they document it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, let me investigate it.

* follow regular expression convention, case-insensitive match and white spaces
* on both ends will be ignored
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have a doc page for the pattern string semantic? If we do we should reference it here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I searched the document and the only possible relationship is this one:
https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-like.html#parameters
image

Perhaps we should explain it in detail here?
(PS: The first pr that introduces StringUtils.filterPattern is: #12206)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea if they use the same implementation. The LIKE pattern doc does not even mention the *.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another option is to document it in the SHOW TABLES doc page.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have looked at the document https://spark.apache.org/docs/latest/sql-ref-syntax-aux-show-tables.html#parameters(SHOW TABLES doc page) and found that the parameter regex_pattern in it explains the pattern.
image
Thank you very much for your reminder, Let's refer to it.

* @return an array of Identifiers for tables
* @throws NoSuchNamespaceException If the namespace does not exist (optional).
*/
default Identifier[] listTables(String[] namespace, String pattern)
throws NoSuchNamespaceException {
List<String> tableNames = Arrays.stream(listTables(namespace)).map(Identifier::name).toList();
return CollectionConverters.asJava(StringUtils.filterPattern(
CollectionConverters.asScala(tableNames).toSeq(), pattern)).stream().map(
name -> Identifier.of(namespace, name)).toArray(Identifier[]::new);
}

/**
* Load table metadata by {@link Identifier identifier} from the catalog.
* <p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,35 @@ class CatalogSuite extends SparkFunSuite {
assert(catalog.listTables(Array("ns2")).toSet == Set(ident3))
}

test("listTables by pattern") {
val catalog = newCatalog()
val ident1 = Identifier.of(Array("ns"), "test_table_1")
val ident2 = Identifier.of(Array("ns"), "test_table_2")
val ident3 = Identifier.of(Array("ns2"), "test_table_1")

intercept[NoSuchNamespaceException](catalog.listTables(Array("ns", "*test*")))

catalog.createTable(ident1, schema, emptyTrans, emptyProps)

assert(catalog.listTables(Array("ns"), "*test*").toSet == Set(ident1))
intercept[NoSuchNamespaceException](catalog.listTables(Array("ns2"), "*test*"))

catalog.createTable(ident3, schema, emptyTrans, emptyProps)
catalog.createTable(ident2, schema, emptyTrans, emptyProps)

assert(catalog.listTables(Array("ns"), "*test*").toSet == Set(ident1, ident2))
assert(catalog.listTables(Array("ns2"), "*test*").toSet == Set(ident3))

catalog.dropTable(ident1)

assert(catalog.listTables(Array("ns"), "*test*").toSet == Set(ident2))

catalog.dropTable(ident2)

assert(catalog.listTables(Array("ns"), "*test*").isEmpty)
assert(catalog.listTables(Array("ns2"), "*test*").toSet == Set(ident3))
}

test("createTable") {
val catalog = newCatalog()

Expand Down