Skip to content

Commit 6b41195

Browse files
HyukjinKwonsarutak
authored andcommitted
[SPARK-17339][SPARKR][CORE] Fix some R tests and use Path.toUri in SparkContext for Windows paths in SparkR
## What changes were proposed in this pull request? This PR fixes the Windows path issues in several APIs. Please refer https://issues.apache.org/jira/browse/SPARK-17339 for more details. ## How was this patch tested? Tests via AppVeyor CI - https://ci.appveyor.com/project/HyukjinKwon/spark/build/82-SPARK-17339-fix-r Also, manually, ![2016-09-06 3 14 38](https://cloud.githubusercontent.com/assets/6477701/18263406/b93a98be-7444-11e6-9521-b28ee65a4771.png) Author: hyukjinkwon <gurwls223@gmail.com> Closes #14960 from HyukjinKwon/SPARK-17339.
1 parent 3ce3a28 commit 6b41195

2 files changed

Lines changed: 12 additions & 6 deletions

File tree

R/pkg/inst/tests/testthat/test_mllib.R

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ context("MLlib functions")
2222
# Tests for MLlib functions in SparkR
2323
sparkSession <- sparkR.session(enableHiveSupport = FALSE)
2424

25+
absoluteSparkPath <- function(x) {
26+
sparkHome <- sparkR.conf("spark.home")
27+
file.path(sparkHome, x)
28+
}
29+
2530
test_that("formula of spark.glm", {
2631
training <- suppressWarnings(createDataFrame(iris))
2732
# directly calling the spark API
@@ -354,7 +359,8 @@ test_that("spark.kmeans", {
354359
})
355360

356361
test_that("spark.mlp", {
357-
df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
362+
df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
363+
source = "libsvm")
358364
model <- spark.mlp(df, blockSize = 128, layers = c(4, 5, 4, 3), solver = "l-bfgs", maxIter = 100,
359365
tol = 0.5, stepSize = 1, seed = 1)
360366

@@ -616,7 +622,7 @@ test_that("spark.gaussianMixture", {
616622
})
617623

618624
test_that("spark.lda with libsvm", {
619-
text <- read.df("data/mllib/sample_lda_libsvm_data.txt", source = "libsvm")
625+
text <- read.df(absoluteSparkPath("data/mllib/sample_lda_libsvm_data.txt"), source = "libsvm")
620626
model <- spark.lda(text, optimizer = "em")
621627

622628
stats <- summary(model, 10)
@@ -652,7 +658,7 @@ test_that("spark.lda with libsvm", {
652658
})
653659

654660
test_that("spark.lda with text input", {
655-
text <- read.text("data/mllib/sample_lda_data.txt")
661+
text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
656662
model <- spark.lda(text, optimizer = "online", features = "value")
657663

658664
stats <- summary(model)
@@ -688,7 +694,7 @@ test_that("spark.lda with text input", {
688694
})
689695

690696
test_that("spark.posterior and spark.perplexity", {
691-
text <- read.text("data/mllib/sample_lda_data.txt")
697+
text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
692698
model <- spark.lda(text, features = "value", k = 3)
693699

694700
# Assert perplexities are equal

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -992,7 +992,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
992992

993993
// This is a hack to enforce loading hdfs-site.xml.
994994
// See SPARK-11227 for details.
995-
FileSystem.get(new URI(path), hadoopConfiguration)
995+
FileSystem.getLocal(hadoopConfiguration)
996996

997997
// A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it.
998998
val confBroadcast = broadcast(new SerializableConfiguration(hadoopConfiguration))
@@ -1081,7 +1081,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
10811081

10821082
// This is a hack to enforce loading hdfs-site.xml.
10831083
// See SPARK-11227 for details.
1084-
FileSystem.get(new URI(path), hadoopConfiguration)
1084+
FileSystem.getLocal(hadoopConfiguration)
10851085

10861086
// The call to NewHadoopJob automatically adds security credentials to conf,
10871087
// so we don't need to explicitly add them ourselves

0 commit comments

Comments
 (0)