-
Notifications
You must be signed in to change notification settings - Fork 29k
[Spark-16579][SparkR] add install.spark function #14258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
66cfb6c
9d52d19
89efb04
7ba5213
6203223
98087ad
0db89b7
503cb9f
f4522a6
78d6f91
9666e06
e4fe002
c105d88
d19853a
124110a
03b8320
d727be8
ab3789f
626e4a1
785de93
e3fa259
cf0f66d
4f4a899
14e4943
976472f
8821b56
328408b
0091615
dbabb56
6b2a897
907f37d
fa94e3c
22f2f78
7aa3239
3ad99bc
e421c30
aa4ba4d
2bb00e1
699420d
d58e080
82d24a6
0ebef8a
26d4518
f37a07c
64756de
29bdf30
5decac6
d84ba06
3aeb4eb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,160 @@ | ||
| # | ||
| # Licensed to the Apache Software Foundation (ASF) under one or more | ||
| # contributor license agreements. See the NOTICE file distributed with | ||
| # this work for additional information regarding copyright ownership. | ||
| # The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| # (the "License"); you may not use this file except in compliance with | ||
| # the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| # | ||
|
|
||
| # Functions to install Spark in case the user directly downloads SparkR | ||
| # from CRAN. | ||
|
|
||
| #' Download and Install Spark Core to Local Directory | ||
| #' | ||
| #' \code{install_spark} downloads and installs Spark to local directory if | ||
|
||
| #' it is not found. The Spark version we use is 2.0.0 (preview). Users can | ||
|
||
| #' specify a desired Hadoop version, the remote site, and the directory where | ||
| #' the package is installed locally. | ||
| #' | ||
| #' @param hadoop_version Version of Hadoop to install, 2.4, 2.6, | ||
|
||
| #' 2.7 (default) and without | ||
|
||
| #' @param mirror_url the base URL of the repositories to use | ||
|
||
| #' @param local_dir local directory that Spark is installed to | ||
|
||
| #' @return \code{install_spark} returns the local directory | ||
| #' where Spark is found or installed | ||
| #' @rdname install_spark | ||
| #' @name install_spark | ||
| #' @export | ||
| #' @examples | ||
| #'\dontrun{ | ||
| #' install_spark() | ||
| #'} | ||
| #' @note install_spark since 2.1.0 | ||
| install_spark <- function(hadoop_version = NULL, mirror_url = NULL, | ||
| local_dir = NULL) { | ||
| version <- paste0("spark-", spark_version_default()) | ||
|
||
| hadoop_version <- match.arg(hadoop_version, supported_versions_hadoop()) | ||
|
||
| packageName <- ifelse(hadoop_version == "without", | ||
| paste0(version, "-bin-without-hadoop"), | ||
| paste0(version, "-bin-hadoop", hadoop_version)) | ||
| if (is.null(local_dir)) { | ||
| local_dir <- getOption("spark.install.dir", spark_cache_path()) | ||
|
||
| } else { | ||
| local_dir <- normalizePath(local_dir) | ||
| } | ||
|
|
||
| packageLocalDir <- file.path(local_dir, packageName) | ||
|
|
||
| if (dir.exists(packageLocalDir)) { | ||
| fmt <- "Spark %s for Hadoop %s has been installed." | ||
|
||
| msg <- sprintf(fmt, version, hadoop_version) | ||
| message(msg) | ||
| return(invisible(packageLocalDir)) | ||
| } | ||
|
|
||
| packageLocalPath <- paste0(packageLocalDir, ".tgz") | ||
| tarExists <- file.exists(packageLocalPath) | ||
|
|
||
| if (tarExists) { | ||
| message("Tar file found. Installing...") | ||
| } else { | ||
| dir.create(packageLocalDir, recursive = TRUE) | ||
| if (is.null(mirror_url)) { | ||
| message("Remote URL not provided. Use Apache default.") | ||
| mirror_url <- mirror_url_default() | ||
| } | ||
| # This is temporary, should be removed when released | ||
| version <- "spark-releases/spark-2.0.0-rc4-bin" | ||
|
||
| packageRemotePath <- paste0(file.path(mirror_url, version, packageName), | ||
| ".tgz") | ||
| fmt <- paste("Installing Spark %s for Hadoop %s.", | ||
| "Downloading from:\n- %s", | ||
| "Installing to:\n- %s", sep = "\n") | ||
| msg <- sprintf(fmt, version, hadoop_version, packageRemotePath, | ||
| packageLocalDir) | ||
| message(msg) | ||
|
|
||
| fetchFail <- tryCatch(download.file(packageRemotePath, packageLocalPath), | ||
| error = function(e) { | ||
| msg <- paste0("Fetch failed from ", mirror_url, ".") | ||
| message(msg) | ||
| TRUE | ||
| }) | ||
| if (fetchFail) { | ||
| message("Try the backup option.") | ||
| mirror_sites <- tryCatch(read.csv(mirror_url_csv()), | ||
|
||
| error = function(e) stop("No csv file found.")) | ||
| mirror_url <- mirror_sites$url[1] | ||
| packageRemotePath <- paste0(file.path(mirror_url, version, packageName), | ||
| ".tgz") | ||
| message(sprintf("Downloading from:\n- %s", packageRemotePath)) | ||
| tryCatch(download.file(packageRemotePath, packageLocalPath), | ||
| error = function(e) { | ||
| stop("Download failed. Please provide a valid mirror_url.") | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| untar(tarfile = packageLocalPath, exdir = local_dir) | ||
| if (!tarExists) { | ||
| unlink(packageLocalPath) | ||
| } | ||
| message("Installation done.") | ||
| invisible(packageLocalDir) | ||
| } | ||
|
|
||
| mirror_url_default <- function() { | ||
|
||
| # change to http://www.apache.org/dyn/closer.lua | ||
| # when released | ||
|
|
||
| "http://people.apache.org/~pwendell" | ||
| } | ||
|
|
||
| supported_versions_hadoop <- function() { | ||
| c("2.7", "2.6", "2.4", "without") | ||
|
||
| } | ||
|
|
||
| spark_cache_path <- function() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there references about the implementation here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, those actually refer to the implementation of rappdirs/R/cache.r. Should add reference here. |
||
| if (.Platform$OS.type == "windows") { | ||
| winAppPath <- Sys.getenv("%LOCALAPPDATA%", unset = NA) | ||
| if (is.null(winAppPath)) { | ||
| msg <- paste("%LOCALAPPDATA% not found.", | ||
| "Please define or enter an installation path in loc_dir.") | ||
|
||
| stop(msg) | ||
| } else { | ||
| path <- file.path(winAppPath, "spark", "spark", "Cache") | ||
| } | ||
| } else if (.Platform$OS.type == "unix") { | ||
| if (Sys.info()["sysname"] == "Darwin") { | ||
| path <- file.path(Sys.getenv("HOME"), "Library/Caches", "spark") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to check whether the folder exists and create the folder if it is not. I got this error in the first run
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed to |
||
| } else { | ||
| path <- file.path( | ||
| Sys.getenv("XDG_CACHE_HOME", file.path(Sys.getenv("HOME"), ".cache")), | ||
| "spark") | ||
| } | ||
| } else { | ||
| stop("Unknown OS") | ||
|
||
| } | ||
| normalizePath(path, mustWork = TRUE) | ||
| } | ||
|
|
||
| mirror_url_csv <- function() { | ||
| system.file("extdata", "spark_download.csv", package = "SparkR") | ||
| } | ||
|
|
||
| spark_version_default <- function() { | ||
| "2.0.0" | ||
| } | ||
|
|
||
| hadoop_version_default <- function() { | ||
| "2.7" | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -689,3 +689,7 @@ getSparkContext <- function() { | |
| sc <- get(".sparkRjsc", envir = .sparkREnv) | ||
| sc | ||
| } | ||
|
|
||
| master_is_local <- function(master) { | ||
|
||
| grepl("^local(\\[[0-9\\*]*\\])?$", master, perl = TRUE) | ||
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| "url","default" | ||
| "http://apache.osuosl.org",TRUE | ||
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Spark Core->Apache Spark. This downloads the full distribution.Local Directory->a Local Directory