|
18 | 18 | # Functions to install Spark in case the user directly downloads SparkR |
19 | 19 | # from CRAN. |
20 | 20 |
|
21 | | -#' Download and Install Spark to Local Directory |
| 21 | +#' Download and Install Spark Core to Local Directory |
22 | 22 | #' |
23 | 23 | #' \code{install_spark} downloads and installs Spark to local directory if |
24 | 24 | #' it is not found. The Spark version we use is 2.0.0 (preview). Users can |
|
27 | 27 | #' |
28 | 28 | #' @param hadoop_version Version of Hadoop to install. 2.3, 2.4, 2.6, |
29 | 29 | #' and 2.7 (default) |
30 | | -#' @param url the base URL of the repositories to use |
| 30 | +#' @param mirror_url the base URL of the repositories to use |
31 | 31 | #' @param local_dir local directory that Spark is installed to |
32 | 32 | #' @return \code{install_spark} returns the local directory |
33 | 33 | #' where Spark is found or installed |
|
39 | 39 | #' install_spark() |
40 | 40 | #'} |
41 | 41 | #' @note install_spark since 2.1.0 |
42 | | -install_spark <- function(hadoop_version = NULL, url = NULL, local_dir = NULL) { |
| 42 | +install_spark <- function(hadoop_version = NULL, mirror_url = NULL, |
| 43 | + local_dir = NULL) { |
43 | 44 | version <- paste0("spark-", spark_version_default()) |
44 | | - hadoop_version <- hadoop_version_default() |
45 | | - packageName <- paste0(version, "-bin-hadoop", hadoop_version) |
| 45 | + hadoop_version <- match.arg(hadoop_version, supported_versions_hadoop()) |
| 46 | + packageName <- ifelse(hadoop_version == "without", |
| 47 | + paste0(version, "-bin-without-hadoop"), |
| 48 | + paste0(version, "-bin-hadoop", hadoop_version)) |
46 | 49 | if (is.null(local_dir)) { |
47 | | - local_dir <- getOption("spark.install.dir", |
48 | | - rappdirs::app_dir("spark"))$cache() |
| 50 | + local_dir <- getOption("spark.install.dir",spark_cache_path()) |
| 51 | + } else { |
| 52 | + local_dir <- normalizePath(local_dir) |
49 | 53 | } |
| 54 | + |
50 | 55 | packageLocalDir <- file.path(local_dir, packageName) |
| 56 | + |
51 | 57 | if (dir.exists(packageLocalDir)) { |
52 | 58 | fmt <- "Spark %s for Hadoop %s has been installed." |
53 | 59 | msg <- sprintf(fmt, version, hadoop_version) |
54 | 60 | message(msg) |
| 61 | + return(invisible(packageLocalDir)) |
| 62 | + } |
| 63 | + |
| 64 | + packageLocalPath <- paste0(packageLocalDir, ".tgz") |
| 65 | + tarExists <- file.exists(packageLocalPath) |
| 66 | + |
| 67 | + if (tarExists) { |
| 68 | + message("Tar file found. Installing...") |
55 | 69 | } else { |
56 | 70 | dir.create(packageLocalDir, recursive = TRUE) |
57 | | - if (is.null(url)) { |
58 | | - mirror_sites <- read.csv(mirror_csv_url()) |
59 | | - url <- mirror_sites$url[1] |
| 71 | + if (is.null(mirror_url)) { |
| 72 | + message("Remote URL not provided. Use Apache default.") |
| 73 | + mirror_url <- mirror_url_default() |
60 | 74 | } |
61 | | - packageRemotePath <- paste0(file.path(url, "spark", version, packageName), |
| 75 | + # This is temporary, should be removed when released |
| 76 | + version <- "spark-releases/spark-2.0.0-rc4-bin" |
| 77 | + packageRemotePath <- paste0(file.path(mirror_url, version, packageName), |
62 | 78 | ".tgz") |
63 | 79 | fmt <- paste("Installing Spark %s for Hadoop %s.", |
64 | 80 | "Downloading from:\n %s", |
65 | 81 | "Installing to:\n %s", sep = "\n") |
66 | 82 | msg <- sprintf(fmt, version, hadoop_version, packageRemotePath, |
67 | 83 | packageLocalDir) |
68 | 84 | message(msg) |
69 | | - packageLocalPath <- paste0(packageLocalDir, ".tgz") |
70 | | - download.file(packageRemotePath, packageLocalPath) |
71 | | - untar(tarfile = packageLocalPath, exdir = local_dir) |
| 85 | + |
| 86 | + fetchFail <- tryCatch(download.file(packageRemotePath, packageLocalPath), |
| 87 | + error = function(e) { |
| 88 | + msg <- paste0("Fetch failed from ", mirror_url, ".") |
| 89 | + message(msg) |
| 90 | + TRUE |
| 91 | + }) |
| 92 | + if (fetchFail) { |
| 93 | + message("Try the backup option.") |
| 94 | + mirror_sites <- tryCatch(read.csv(mirror_url_csv()), |
| 95 | + error = function(e) stop("No csv file found.")) |
| 96 | + mirror_url <- mirror_sites$url[1] |
| 97 | + packageRemotePath <- paste0(file.path(mirror_url, version, packageName), |
| 98 | + ".tgz") |
| 99 | + message(sprintf("Downloading from:\n %s", packageRemotePath)) |
| 100 | + tryCatch(download.file(packageRemotePath, packageLocalPath), |
| 101 | + error = function(e) { |
| 102 | + stop("Download failed. Please provide a valid mirror_url.") |
| 103 | + }) |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | + untar(tarfile = packageLocalPath, exdir = local_dir) |
| 108 | + if (!tarExists) { |
72 | 109 | unlink(packageLocalPath) |
73 | 110 | } |
74 | | - packageLocalDir |
| 111 | + message("Installation done.") |
| 112 | + invisible(packageLocalDir) |
| 113 | +} |
| 114 | + |
| 115 | +mirror_url_default <- function() { |
| 116 | + # change to http://www.apache.org/dyn/closer.lua |
| 117 | + # when released |
| 118 | + |
| 119 | + "http://people.apache.org/~pwendell" |
| 120 | +} |
| 121 | + |
| 122 | +supported_versions_hadoop <- function() { |
| 123 | + c("2.7", "2.6", "2.4", "without") |
| 124 | +} |
| 125 | + |
| 126 | +spark_cache_path <- function() { |
| 127 | + if (.Platform$OS.type == "windows") { |
| 128 | + winAppPath <- Sys.getenv("%LOCALAPPDATA%", unset = NA) |
| 129 | + if (is.null(winAppPath)) { |
| 130 | + msg <- paste("%LOCALAPPDATA% not found.", |
| 131 | + "Please define or enter an installation path in loc_dir.") |
| 132 | + stop(msg) |
| 133 | + } else { |
| 134 | + path <- file.path(winAppPath, "spark", "spark", "Cache") |
| 135 | + } |
| 136 | + } else if (.Platform$OS.type == "unix") { |
| 137 | + if (Sys.info()["sysname"] == "Darwin") { |
| 138 | + path <- file.path("~/Library/Caches", "spark") |
| 139 | + } else { |
| 140 | + path <- file.path(Sys.getenv("XDG_CACHE_HOME", "~/.cache"), "spark") |
| 141 | + } |
| 142 | + } else { |
| 143 | + stop("Unknown OS") |
| 144 | + } |
| 145 | + normalizePath(path, mustWork = TRUE) |
75 | 146 | } |
76 | 147 |
|
77 | | -mirror_csv_url <- function() { |
| 148 | +mirror_url_csv <- function() { |
78 | 149 | system.file("extdata", "spark_download.csv", package = "SparkR") |
79 | 150 | } |
80 | 151 |
|
81 | 152 | spark_version_default <- function() { |
82 | | - "2.0.0-preview" |
| 153 | + "2.0.0" |
83 | 154 | } |
84 | 155 |
|
85 | 156 | hadoop_version_default <- function() { |
|
0 commit comments