From 4f2e56b754765c8f1493c7b789f7c5075f3cb069 Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Mon, 14 Dec 2015 17:09:14 +0800 Subject: [PATCH 1/4] [SPARK-12318][SPARKR] Save mode in SparkR should be error by default --- R/pkg/R/DataFrame.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 764597d1e32b4..a16f9c39d2882 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1903,7 +1903,7 @@ setMethod("except", #' } setMethod("write.df", signature(df = "DataFrame", path = "character"), - function(df, path, source = NULL, mode = "append", ...){ + function(df, path, source = NULL, mode = "error", ...){ if (is.null(source)) { sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv) source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default", @@ -1928,7 +1928,7 @@ setMethod("write.df", #' @export setMethod("saveDF", signature(df = "DataFrame", path = "character"), - function(df, path, source = NULL, mode = "append", ...){ + function(df, path, source = NULL, mode = "error", ...){ write.df(df, path, source, mode, ...) }) @@ -1968,7 +1968,7 @@ setMethod("saveDF", setMethod("saveAsTable", signature(df = "DataFrame", tableName = "character", source = "character", mode = "character"), - function(df, tableName, source = NULL, mode="append", ...){ + function(df, tableName, source = NULL, mode="error", ...){ if (is.null(source)) { sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv) source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default", From 3e71b06d1da84142619f70b1c6fa4f8bcf6e5960 Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Tue, 15 Dec 2015 11:26:40 +0800 Subject: [PATCH 2/4] Add doc to this change --- docs/sparkr.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sparkr.md b/docs/sparkr.md index 01148786b79d7..adf5ee3b5fd01 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -148,7 +148,7 @@ printSchema(people) The data sources API can also be used to save out DataFrames into multiple file formats. For example we can save the DataFrame from the previous example -to a Parquet file using `write.df` +to a Parquet file using `write.df` (Before spark 1.7, mode's default value is 'append', we change it to 'error' to be consistent with scala api)
{% highlight r %} From d72a3af6d09f83235a6d12eeb848453a11c38e9e Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Tue, 15 Dec 2015 15:16:19 +0800 Subject: [PATCH 3/4] Add migration doc and update R doc --- R/pkg/R/DataFrame.R | 4 ++-- docs/sparkr.md | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index a16f9c39d2882..380a13fe2b7c6 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1886,7 +1886,7 @@ setMethod("except", #' @param df A SparkSQL DataFrame #' @param path A name for the table #' @param source A name for external data source -#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode +#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default) #' #' @family DataFrame functions #' @rdname write.df @@ -1951,7 +1951,7 @@ setMethod("saveDF", #' @param df A SparkSQL DataFrame #' @param tableName A name for the table #' @param source A name for external data source -#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode +#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default) #' #' @family DataFrame functions #' @rdname saveAsTable diff --git a/docs/sparkr.md b/docs/sparkr.md index adf5ee3b5fd01..8712ca787f149 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -387,3 +387,10 @@ The following functions are masked by the SparkR package: Since part of SparkR is modeled on the `dplyr` package, certain functions in SparkR share the same names with those in `dplyr`. Depending on the load order of the two packages, some functions from the package loaded first are masked by those in the package loaded after. In such case, prefix such calls with the package name, for instance, `SparkR::cume_dist(x)` or `dplyr::cume_dist(x)`. You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-manual/R-devel/library/base/html/search.html) + + +# Migration Guide + +## Upgrading From SparkR 1.6 to 1.7 + + - Before Spark 1.7, the default save mode is `append` in api saveDF/write.df/saveAsTable, it is changed to `error` to be consistent with scala api. From 8eb181cd32ddf323db1ba8243f3055eb36e11b02 Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Wed, 16 Dec 2015 09:09:45 +0800 Subject: [PATCH 4/4] Update doc --- docs/sparkr.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sparkr.md b/docs/sparkr.md index 8712ca787f149..9ddd2eda3fe8b 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -148,7 +148,7 @@ printSchema(people)
The data sources API can also be used to save out DataFrames into multiple file formats. For example we can save the DataFrame from the previous example -to a Parquet file using `write.df` (Before spark 1.7, mode's default value is 'append', we change it to 'error' to be consistent with scala api) +to a Parquet file using `write.df` (Until Spark 1.6, the default mode for writes was `append`. It was changed in Spark 1.7 to `error` to match the Scala API)
{% highlight r %} @@ -393,4 +393,4 @@ You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-ma ## Upgrading From SparkR 1.6 to 1.7 - - Before Spark 1.7, the default save mode is `append` in api saveDF/write.df/saveAsTable, it is changed to `error` to be consistent with scala api. + - Until Spark 1.6, the default mode for writes was `append`. It was changed in Spark 1.7 to `error` to match the Scala API.