-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-20196][PYTHON][SQL] update doc for catalog functions for all languages, add pyspark refreshByPath API #17512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -141,7 +141,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { | |
| } | ||
|
|
||
| /** | ||
| * Returns a list of columns for the given table temporary view. | ||
| * Returns a list of columns for the given table/view or temporary view. | ||
| */ | ||
| @throws[AnalysisException]("table does not exist") | ||
| override def listColumns(tableName: String): Dataset[Column] = { | ||
|
|
@@ -150,7 +150,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { | |
| } | ||
|
|
||
| /** | ||
| * Returns a list of columns for the given table in the specified database. | ||
| * Returns a list of columns for the given table/view or temporary view in the specified database. | ||
| */ | ||
| @throws[AnalysisException]("database or table does not exist") | ||
| override def listColumns(dbName: String, tableName: String): Dataset[Column] = { | ||
|
|
@@ -273,7 +273,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { | |
|
|
||
| /** | ||
| * :: Experimental :: | ||
| * Creates a table from the given path based on a data source and returns the corresponding | ||
| * Creates a table from the given path and returns the corresponding | ||
| * DataFrame. | ||
| * | ||
| * @group ddl_ops | ||
|
|
@@ -287,7 +287,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { | |
| /** | ||
| * :: Experimental :: | ||
| * (Scala-specific) | ||
| * Creates a table from the given path based on a data source and a set of options. | ||
| * Creates a table based on the dataset in a data source and a set of options. | ||
| * Then, returns the corresponding DataFrame. | ||
| * | ||
| * @group ddl_ops | ||
|
|
@@ -304,7 +304,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { | |
| /** | ||
| * :: Experimental :: | ||
| * (Scala-specific) | ||
| * Creates a table from the given path based on a data source, a schema and a set of options. | ||
| * Creates a table based on the dataset in a data source, a schema and a set of options. | ||
| * Then, returns the corresponding DataFrame. | ||
| * | ||
| * @group ddl_ops | ||
|
|
@@ -367,6 +367,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { | |
|
|
||
| /** | ||
| * Recovers all the partitions in the directory of a table and update the catalog. | ||
| * Only works with a partitioned table, and not a temporary view. | ||
| * | ||
| * @param tableName is either a qualified or unqualified name that designates a table. | ||
| * If no database identifier is provided, it refers to a table in the | ||
|
|
@@ -431,8 +432,12 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { | |
| } | ||
|
|
||
| /** | ||
| * Refreshes the cache entry for a table or view, if any. For Hive metastore table, the metadata | ||
| * is refreshed. For data source tables, the schema will not be inferred and refreshed. | ||
| * Invalidates and refreshes all the cached data and metadata of the given table or view. | ||
| * For Hive metastore table, the metadata is refreshed. For data source tables, the schema will | ||
| * not be inferred and refreshed. | ||
| * | ||
| * If this table is cached as an InMemoryRelation, drop the original cached version and make the | ||
| * new version cached lazily. | ||
| * | ||
| * @group cachemgmt | ||
| * @since 2.0.0 | ||
|
|
@@ -456,7 +461,8 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { | |
|
|
||
| /** | ||
| * Refreshes the cache entry and the associated metadata for all Dataset (if any), that contain | ||
| * the given data source path. | ||
| * the given data source path. Path matching is by prefix, i.e. "/" would invalidate | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We also do the re-cache, but the new version cached lazily.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For some reason in here, CatalogImpl.scala is very different from Catalog.scala - let me know if you want me to change them - for now I've updated the first sentence.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. I found this sentence is copied from Catalog.scala. Maybe, we can update them to
|
||
| * everything that is cached. | ||
| * | ||
| * @group cachemgmt | ||
| * @since 2.0.0 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The same here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not a temporary view.->
not a view.