apache · felixcheung · May 11, 2017 · May 11, 2017 · May 11, 2017 · May 11, 2017
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
@@ -13,6 +13,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
 License: Apache License (== 2.0)
 URL: http://www.apache.org/ http://spark.apache.org/
 BugReports: http://spark.apache.org/contributing.html
+OS_Type: unix
 Depends:
     R (>= 3.0),
     methods

diff --git a/R/pkg/inst/tests/testthat/jarTest.R b/R/pkg/inst/tests/testthat/jarTest.R
@@ -16,7 +16,7 @@
 #
 library(SparkR)
 
-sc <- sparkR.session()
+sc <- sparkR.session(master = "local[1]")
 
 helloTest <- SparkR:::callJStatic("sparkrtest.DummyClass",
                                   "helloWorld",

diff --git a/R/pkg/inst/tests/testthat/packageInAJarTest.R b/R/pkg/inst/tests/testthat/packageInAJarTest.R
@@ -17,7 +17,7 @@
 library(SparkR)
 library(sparkPackageTest)
 
-sparkR.session()
+sparkR.session(master = "local[1]")
 
 run1 <- myfunc(5L)
 

diff --git a/R/pkg/inst/tests/testthat/test_Serde.R b/R/pkg/inst/tests/testthat/test_Serde.R
@@ -17,7 +17,7 @@
 
 context("SerDe functionality")
 
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 
 test_that("SerDe of primitive types", {
   skip_on_cran()

diff --git a/R/pkg/inst/tests/testthat/test_binaryFile.R b/R/pkg/inst/tests/testthat/test_binaryFile.R
@@ -18,7 +18,7 @@
 context("functions on binary files")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 mockFile <- c("Spark is pretty.", "Spark is awesome.")

diff --git a/R/pkg/inst/tests/testthat/test_binary_function.R b/R/pkg/inst/tests/testthat/test_binary_function.R
@@ -18,7 +18,7 @@
 context("binary functions")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Data

diff --git a/R/pkg/inst/tests/testthat/test_broadcast.R b/R/pkg/inst/tests/testthat/test_broadcast.R
@@ -18,7 +18,7 @@
 context("broadcast variables")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Partitioned data

diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R
@@ -60,7 +60,7 @@ test_that("repeatedly starting and stopping SparkR", {
   skip_on_cran()
 
   for (i in 1:4) {
-    sc <- suppressWarnings(sparkR.init())
+    sc <- suppressWarnings(sparkR.init(master = sparkRMaster))
     rdd <- parallelize(sc, 1:20, 2L)
     expect_equal(countRDD(rdd), 20)
     suppressWarnings(sparkR.stop())
@@ -69,7 +69,7 @@ test_that("repeatedly starting and stopping SparkR", {
 
 test_that("repeatedly starting and stopping SparkSession", {
   for (i in 1:4) {
-    sparkR.session(enableHiveSupport = FALSE)
+    sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
     df <- createDataFrame(data.frame(dummy = 1:i))
     expect_equal(count(df), i)
     sparkR.session.stop()
@@ -79,12 +79,12 @@ test_that("repeatedly starting and stopping SparkSession", {
 test_that("rdd GC across sparkR.stop", {
   skip_on_cran()
 
-  sc <- sparkR.sparkContext() # sc should get id 0
+  sc <- sparkR.sparkContext(master = sparkRMaster) # sc should get id 0
   rdd1 <- parallelize(sc, 1:20, 2L) # rdd1 should get id 1
   rdd2 <- parallelize(sc, 1:10, 2L) # rdd2 should get id 2
   sparkR.session.stop()
 
-  sc <- sparkR.sparkContext() # sc should get id 0 again
+  sc <- sparkR.sparkContext(master = sparkRMaster) # sc should get id 0 again
 
   # GC rdd1 before creating rdd3 and rdd2 after
   rm(rdd1)
@@ -104,7 +104,7 @@ test_that("rdd GC across sparkR.stop", {
 test_that("job group functions can be called", {
   skip_on_cran()
 
-  sc <- sparkR.sparkContext()
+  sc <- sparkR.sparkContext(master = sparkRMaster)
   setJobGroup("groupId", "job description", TRUE)
   cancelJobGroup("groupId")
   clearJobGroup()
@@ -118,7 +118,7 @@ test_that("job group functions can be called", {
 test_that("utility function can be called", {
   skip_on_cran()
 
-  sparkR.sparkContext()
+  sparkR.sparkContext(master = sparkRMaster)
   setLogLevel("ERROR")
   sparkR.session.stop()
 })
@@ -175,7 +175,7 @@ test_that("sparkJars sparkPackages as comma-separated strings", {
 })
 
 test_that("spark.lapply should perform simple transforms", {
-  sparkR.sparkContext()
+  sparkR.sparkContext(master = sparkRMaster)
   doubled <- spark.lapply(1:10, function(x) { 2 * x })
   expect_equal(doubled, as.list(2 * 1:10))
   sparkR.session.stop()
@@ -184,7 +184,7 @@ test_that("spark.lapply should perform simple transforms", {
 test_that("add and get file to be downloaded with Spark job on every node", {
   skip_on_cran()
 
-  sparkR.sparkContext()
+  sparkR.sparkContext(master = sparkRMaster)
   # Test add file.
   path <- tempfile(pattern = "hello", fileext = ".txt")
   filename <- basename(path)

diff --git a/R/pkg/inst/tests/testthat/test_includePackage.R b/R/pkg/inst/tests/testthat/test_includePackage.R
@@ -18,7 +18,7 @@
 context("include R packages")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Partitioned data

diff --git a/R/pkg/inst/tests/testthat/test_jvm_api.R b/R/pkg/inst/tests/testthat/test_jvm_api.R
@@ -17,7 +17,7 @@
 
 context("JVM API")
 
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 
 test_that("Create and call methods on object", {
   jarr <- sparkR.newJObject("java.util.ArrayList")

diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib classification algorithms, except for tree-based algorithms")
 
 # Tests for MLlib classification algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 
 absoluteSparkPath <- function(x) {
   sparkHome <- sparkR.conf("spark.home")

diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib clustering algorithms")
 
 # Tests for MLlib clustering algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 
 absoluteSparkPath <- function(x) {
   sparkHome <- sparkR.conf("spark.home")

diff --git a/R/pkg/inst/tests/testthat/test_mllib_fpm.R b/R/pkg/inst/tests/testthat/test_mllib_fpm.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib frequent pattern mining")
 
 # Tests for MLlib frequent pattern mining algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 
 test_that("spark.fpGrowth", {
   data <- selectExpr(createDataFrame(data.frame(items = c(

diff --git a/R/pkg/inst/tests/testthat/test_mllib_recommendation.R b/R/pkg/inst/tests/testthat/test_mllib_recommendation.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib recommendation algorithms")
 
 # Tests for MLlib recommendation algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 
 test_that("spark.als", {
   data <- list(list(0, 0, 4.0), list(0, 1, 2.0), list(1, 1, 3.0), list(1, 2, 4.0),

diff --git a/R/pkg/inst/tests/testthat/test_mllib_regression.R b/R/pkg/inst/tests/testthat/test_mllib_regression.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib regression algorithms, except for tree-based algorithms")
 
 # Tests for MLlib regression algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 
 test_that("formula of spark.glm", {
   skip_on_cran()

diff --git a/R/pkg/inst/tests/testthat/test_mllib_stat.R b/R/pkg/inst/tests/testthat/test_mllib_stat.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib statistics algorithms")
 
 # Tests for MLlib statistics algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 
 test_that("spark.kstest", {
   data <- data.frame(test = c(0.1, 0.15, 0.2, 0.3, 0.25, -1, -0.5))

diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/inst/tests/testthat/test_mllib_tree.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib tree-based algorithms")
 
 # Tests for MLlib tree-based algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 
 absoluteSparkPath <- function(x) {
   sparkHome <- sparkR.conf("spark.home")

diff --git a/R/pkg/inst/tests/testthat/test_parallelize_collect.R b/R/pkg/inst/tests/testthat/test_parallelize_collect.R
@@ -33,7 +33,7 @@ numPairs <- list(list(1, 1), list(1, 2), list(2, 2), list(2, 3))
 strPairs <- list(list(strList, strList), list(strList, strList))
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 jsc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Tests

diff --git a/R/pkg/inst/tests/testthat/test_rdd.R b/R/pkg/inst/tests/testthat/test_rdd.R
@@ -18,7 +18,7 @@
 context("basic RDD functions")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Data

diff --git a/R/pkg/inst/tests/testthat/test_shuffle.R b/R/pkg/inst/tests/testthat/test_shuffle.R
@@ -18,7 +18,7 @@
 context("partitionBy, groupByKey, reduceByKey etc.")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Data

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -61,7 +61,7 @@ unsetHiveContext <- function() {
 # Tests for SparkSQL functions in SparkR
 
 filesBefore <- list.files(path = sparkRDir, all.files = TRUE)
-sparkSession <- sparkR.session()
+sparkSession <- sparkR.session(master = sparkRMaster)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 mockLines <- c("{\"name\":\"Michael\"}",

diff --git a/R/pkg/inst/tests/testthat/test_streaming.R b/R/pkg/inst/tests/testthat/test_streaming.R
@@ -21,7 +21,7 @@ context("Structured Streaming")
 
 # Tests for Structured Streaming functions in SparkR
 
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 
 jsonSubDir <- file.path("sparkr-test", "json", "")
 if (.Platform$OS.type == "windows") {

diff --git a/R/pkg/inst/tests/testthat/test_take.R b/R/pkg/inst/tests/testthat/test_take.R
@@ -30,7 +30,7 @@ strList <- list("Dexter Morgan: Blood. Sometimes it sets my teeth on edge, ",
                 "raising me. But they're both dead now. I didn't kill them. Honest.")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 test_that("take() gives back the original elements in correct count and order", {

diff --git a/R/pkg/inst/tests/testthat/test_textFile.R b/R/pkg/inst/tests/testthat/test_textFile.R
@@ -18,7 +18,7 @@
 context("the textFile() function")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 mockFile <- c("Spark is pretty.", "Spark is awesome.")

diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R
@@ -18,7 +18,7 @@
 context("functions in utils.R")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 test_that("convertJListToRList() gives back (deserializes) the original JLists

diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
@@ -31,4 +31,9 @@ sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db")
 invisible(lapply(sparkRWhitelistSQLDirs,
                  function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)}))
 
+sparkRMaster <- "local[1]"
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+  sparkRMaster <- ""
+}
+
 test_package("SparkR")
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -48,7 +48,7 @@ We use default settings in which it runs in local mode. It auto downloads Spark
 install.spark()
 ```
 ```{r, message=FALSE, results="hide"}
-sparkR.session()
+sparkR.session(master = "local[1]")
 ```
 
 The operations in SparkR are centered around an R class called `SparkDataFrame`. It is a distributed collection of data organized into named columns, which is conceptually equivalent to a table in a relational database or a data frame in R, but with richer optimizations under the hood.