Skip to content

Commit 98087ad

Browse files
committed
change options of spark mirror url
1 parent 6203223 commit 98087ad

3 files changed

Lines changed: 95 additions & 22 deletions

File tree

R/pkg/DESCRIPTION

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ Maintainer: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
88
Depends:
99
R (>= 3.0),
1010
methods
11-
Imports:
12-
rappdirs
1311
Suggests:
1412
testthat,
1513
e1071,

R/pkg/R/install.R

Lines changed: 88 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# Functions to install Spark in case the user directly downloads SparkR
1919
# from CRAN.
2020

21-
#' Download and Install Spark to Local Directory
21+
#' Download and Install Spark Core to Local Directory
2222
#'
2323
#' \code{install_spark} downloads and installs Spark to local directory if
2424
#' it is not found. The Spark version we use is 2.0.0 (preview). Users can
@@ -27,7 +27,7 @@
2727
#'
2828
#' @param hadoop_version Version of Hadoop to install. 2.3, 2.4, 2.6,
2929
#' and 2.7 (default)
30-
#' @param url the base URL of the repositories to use
30+
#' @param mirror_url the base URL of the repositories to use
3131
#' @param local_dir local directory that Spark is installed to
3232
#' @return \code{install_spark} returns the local directory
3333
#' where Spark is found or installed
@@ -39,47 +39,118 @@
3939
#' install_spark()
4040
#'}
4141
#' @note install_spark since 2.1.0
42-
install_spark <- function(hadoop_version = NULL, url = NULL, local_dir = NULL) {
42+
install_spark <- function(hadoop_version = NULL, mirror_url = NULL,
43+
local_dir = NULL) {
4344
version <- paste0("spark-", spark_version_default())
44-
hadoop_version <- hadoop_version_default()
45-
packageName <- paste0(version, "-bin-hadoop", hadoop_version)
45+
hadoop_version <- match.arg(hadoop_version, supported_versions_hadoop())
46+
packageName <- ifelse(hadoop_version == "without",
47+
paste0(version, "-bin-without-hadoop"),
48+
paste0(version, "-bin-hadoop", hadoop_version))
4649
if (is.null(local_dir)) {
47-
local_dir <- getOption("spark.install.dir",
48-
rappdirs::app_dir("spark"))$cache()
50+
local_dir <- getOption("spark.install.dir",spark_cache_path())
51+
} else {
52+
local_dir <- normalizePath(local_dir)
4953
}
54+
5055
packageLocalDir <- file.path(local_dir, packageName)
56+
5157
if (dir.exists(packageLocalDir)) {
5258
fmt <- "Spark %s for Hadoop %s has been installed."
5359
msg <- sprintf(fmt, version, hadoop_version)
5460
message(msg)
61+
return(invisible(packageLocalDir))
62+
}
63+
64+
packageLocalPath <- paste0(packageLocalDir, ".tgz")
65+
tarExists <- file.exists(packageLocalPath)
66+
67+
if (tarExists) {
68+
message("Tar file found. Installing...")
5569
} else {
5670
dir.create(packageLocalDir, recursive = TRUE)
57-
if (is.null(url)) {
58-
mirror_sites <- read.csv(mirror_csv_url())
59-
url <- mirror_sites$url[1]
71+
if (is.null(mirror_url)) {
72+
message("Remote URL not provided. Use Apache default.")
73+
mirror_url <- mirror_url_default()
6074
}
61-
packageRemotePath <- paste0(file.path(url, "spark", version, packageName),
75+
# This is temporary, should be removed when released
76+
version <- "spark-releases/spark-2.0.0-rc4-bin"
77+
packageRemotePath <- paste0(file.path(mirror_url, version, packageName),
6278
".tgz")
6379
fmt <- paste("Installing Spark %s for Hadoop %s.",
6480
"Downloading from:\n %s",
6581
"Installing to:\n %s", sep = "\n")
6682
msg <- sprintf(fmt, version, hadoop_version, packageRemotePath,
6783
packageLocalDir)
6884
message(msg)
69-
packageLocalPath <- paste0(packageLocalDir, ".tgz")
70-
download.file(packageRemotePath, packageLocalPath)
71-
untar(tarfile = packageLocalPath, exdir = local_dir)
85+
86+
fetchFail <- tryCatch(download.file(packageRemotePath, packageLocalPath),
87+
error = function(e) {
88+
msg <- paste0("Fetch failed from ", mirror_url, ".")
89+
message(msg)
90+
TRUE
91+
})
92+
if (fetchFail) {
93+
message("Try the backup option.")
94+
mirror_sites <- tryCatch(read.csv(mirror_url_csv()),
95+
error = function(e) stop("No csv file found."))
96+
mirror_url <- mirror_sites$url[1]
97+
packageRemotePath <- paste0(file.path(mirror_url, version, packageName),
98+
".tgz")
99+
message(sprintf("Downloading from:\n %s", packageRemotePath))
100+
tryCatch(download.file(packageRemotePath, packageLocalPath),
101+
error = function(e) {
102+
stop("Download failed. Please provide a valid mirror_url.")
103+
})
104+
}
105+
}
106+
107+
untar(tarfile = packageLocalPath, exdir = local_dir)
108+
if (!tarExists) {
72109
unlink(packageLocalPath)
73110
}
74-
packageLocalDir
111+
message("Installation done.")
112+
invisible(packageLocalDir)
113+
}
114+
115+
mirror_url_default <- function() {
116+
# change to http://www.apache.org/dyn/closer.lua
117+
# when released
118+
119+
"http://people.apache.org/~pwendell"
120+
}
121+
122+
supported_versions_hadoop <- function() {
123+
c("2.7", "2.6", "2.4", "without")
124+
}
125+
126+
spark_cache_path <- function() {
127+
if (.Platform$OS.type == "windows") {
128+
winAppPath <- Sys.getenv("%LOCALAPPDATA%", unset = NA)
129+
if (is.null(winAppPath)) {
130+
msg <- paste("%LOCALAPPDATA% not found.",
131+
"Please define or enter an installation path in loc_dir.")
132+
stop(msg)
133+
} else {
134+
path <- file.path(winAppPath, "spark", "spark", "Cache")
135+
}
136+
} else if (.Platform$OS.type == "unix") {
137+
if (Sys.info()["sysname"] == "Darwin") {
138+
path <- file.path("~/Library/Caches", "spark")
139+
} else {
140+
path <- file.path(Sys.getenv("XDG_CACHE_HOME", "~/.cache"), "spark")
141+
}
142+
} else {
143+
stop("Unknown OS")
144+
}
145+
normalizePath(path, mustWork = TRUE)
75146
}
76147

77-
mirror_csv_url <- function() {
148+
mirror_url_csv <- function() {
78149
system.file("extdata", "spark_download.csv", package = "SparkR")
79150
}
80151

81152
spark_version_default <- function() {
82-
"2.0.0-preview"
153+
"2.0.0"
83154
}
84155

85156
hadoop_version_default <- function() {

R/pkg/R/sparkR.R

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,12 @@ sparkR.session <- function(
366366
overrideEnvs(sparkConfigMap, paramMap)
367367
}
368368
if (!nzchar(master) || master_is_local(master)) {
369-
if (!nzchar(sparkHome) || !dir.exists(sparkHome)) {
370-
message("Spark is not found in SPARK_HOME. Redirect to the cache directory.")
369+
if (!dir.exists(sparkHome)) {
370+
fmt <- paste("Spark not found in SPARK_HOME: %s.\n",
371+
"Search in the cache directory.",
372+
"It will be installed if not found.")
373+
msg <- sprintf(fmt, sparkHome)
374+
message(msg)
371375
packageLocalDir <- install_spark()
372376
sparkHome <- packageLocalDir
373377
} else {
@@ -376,7 +380,7 @@ sparkR.session <- function(
376380
message(msg)
377381
}
378382
}
379-
383+
380384
if (!exists(".sparkRjsc", envir = .sparkREnv)) {
381385
sparkExecutorEnvMap <- new.env()
382386
sparkR.sparkContext(master, appName, sparkHome, sparkConfigMap, sparkExecutorEnvMap,

0 commit comments

Comments
 (0)