apache · dongjoon-hyun · Nov 17, 2019 · Nov 17, 2019 · Nov 17, 2019 · Nov 18, 2019
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
@@ -50,7 +50,7 @@ jobs:
 
   lint:
     runs-on: ubuntu-latest
-    name: Linters
+    name: Linters (Java/Scala/Python), licenses, dependencies
     steps:
     - uses: actions/checkout@master
     - uses: actions/setup-java@v1
@@ -72,3 +72,26 @@ jobs:
       run: ./dev/check-license
     - name: Dependencies
       run: ./dev/test-dependencies.sh
+
+  lintr:
+    runs-on: ubuntu-latest
+    name: Linter (R)
+    steps:
+    - uses: actions/checkout@master
+    - uses: actions/setup-java@v1
+      with:
+        java-version: '11'
+    - name: install R
+      run: |
+        echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' | sudo tee -a /etc/apt/sources.list
+        sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
+        sudo apt-get update
+        sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
+    - name: install R packages
+      run: |
+        sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
+        sudo Rscript -e "devtools::install_github('jimhester/lintr')"
+    - name: package and install SparkR
+      run: ./R/install-dev.sh
+    - name: lint-r
+      run: ./dev/lint-r
diff --git a/R/pkg/.lintr b/R/pkg/.lintr
@@ -1,2 +1,2 @@
-linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, object_name_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
+linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, object_name_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE), object_usage_linter = NULL)
 exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -1174,7 +1174,7 @@ setMethod("dim",
 #' firstName <- names(collected)[1]
 #' }
 #' @note collect since 1.4.0
-setMethod("collect",
+setMethod("collect", # nolint
           signature(x = "SparkDataFrame"),
           function(x, stringsAsFactors = FALSE) {
             connectionTimeout <- as.numeric(Sys.getenv("SPARKR_BACKEND_CONNECTION_TIMEOUT", "6000"))
@@ -2252,7 +2252,7 @@ setMethod("mutate",
 
             # The last column of the same name in the specific columns takes effect
             deDupCols <- list()
-            for (i in 1:length(cols)) {
+            for (i in seq_len(length(cols))) {
               deDupCols[[ns[[i]]]] <- alias(cols[[i]], ns[[i]])
             }
 
@@ -2416,7 +2416,7 @@ setMethod("arrange",
             # builds a list of columns of type Column
             # example: [[1]] Column Species ASC
             #          [[2]] Column Petal_Length DESC
-            jcols <- lapply(seq_len(length(decreasing)), function(i){
+            jcols <- lapply(seq_len(length(decreasing)), function(i) {
               if (decreasing[[i]]) {
                 desc(getColumn(x, by[[i]]))
               } else {
@@ -2653,7 +2653,7 @@ setMethod("crossJoin",
 #' merge(df1, df2, by = NULL) # Performs a Cartesian join
 #' }
 #' @note merge since 1.5.0
-setMethod("merge",
+setMethod("merge", # nolint
           signature(x = "SparkDataFrame", y = "SparkDataFrame"),
           function(x, y, by = intersect(names(x), names(y)), by.x = by, by.y = by,
                    all = FALSE, all.x = all, all.y = all,
@@ -2749,7 +2749,7 @@ genAliasesForIntersectedCols <- function(x, intersectedColNames, suffix) {
     col <- getColumn(x, colName)
     if (colName %in% intersectedColNames) {
       newJoin <- paste(colName, suffix, sep = "")
-      if (newJoin %in% allColNames){
+      if (newJoin %in% allColNames) {
         stop("The following column name: ", newJoin, " occurs more than once in the 'DataFrame'.",
           "Please use different suffixes for the intersected columns.")
       }
@@ -3475,7 +3475,7 @@ setMethod("str",
             cat(paste0("'", class(object), "': ", length(names), " variables:\n"))
 
             if (nrow(localDF) > 0) {
-              for (i in 1 : ncol(localDF)) {
+              for (i in seq_len(ncol(localDF))) {
                 # Get the first elements for each column
 
                 firstElements <- if (types[i] == "character") {
@@ -3865,7 +3865,7 @@ setMethod("isStreaming",
 #' }
 #' @note write.stream since 2.2.0
 #' @note experimental
-setMethod("write.stream",
+setMethod("write.stream", # nolint
           signature(df = "SparkDataFrame"),
           function(df, source = NULL, outputMode = NULL, partitionBy = NULL,
                    trigger.processingTime = NULL, trigger.once = NULL, ...) {

diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
@@ -197,7 +197,7 @@ getSchema <- function(schema, firstRow = NULL, rdd = NULL) {
       as.list(schema)
     }
     if (is.null(names)) {
-      names <- lapply(1:length(firstRow), function(x) {
+      names <- lapply(seq_len(length(firstRow)), function(x) {
         paste0("_", as.character(x))
       })
     }
@@ -213,7 +213,7 @@ getSchema <- function(schema, firstRow = NULL, rdd = NULL) {
     })
 
     types <- lapply(firstRow, infer_type)
-    fields <- lapply(1:length(firstRow), function(i) {
+    fields <- lapply(seq_len(length(firstRow)), function(i) {
       structField(names[[i]], types[[i]], TRUE)
     })
     schema <- do.call(structType, fields)

diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
@@ -416,7 +416,7 @@ spark.getSparkFiles <- function(fileName) {
 #' @examples
 #'\dontrun{
 #' sparkR.session()
-#' doubled <- spark.lapply(1:10, function(x){2 * x})
+#' doubled <- spark.lapply(1:10, function(x) {2 * x})
 #'}
 #' @note spark.lapply since 2.0.0
 spark.lapply <- function(list, func) {

diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
@@ -162,7 +162,7 @@ methods <- c("avg", "max", "mean", "min", "sum")
 #' @note pivot since 2.0.0
 setMethod("pivot",
           signature(x = "GroupedData", colname = "character"),
-          function(x, colname, values = list()){
+          function(x, colname, values = list()) {
             stopifnot(length(colname) == 1)
             if (length(values) == 0) {
               result <- callJMethod(x@sgd, "pivot", colname)

diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
@@ -65,7 +65,7 @@
 #' @note install.spark since 2.1.0
 #' @seealso See available Hadoop versions:
 #'          \href{http://spark.apache.org/downloads.html}{Apache Spark}
-install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
+install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, # nolint
                           localDir = NULL, overwrite = FALSE) {
   sparkHome <- Sys.getenv("SPARK_HOME")
   if (isSparkRShell()) {

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
@@ -290,7 +290,7 @@ function(object, path, overwrite = FALSE) {
 #'                      lowerBoundsOnIntercepts = lbi)
 #' }
 #' @note spark.logit since 2.1.0
-setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula"),
+setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula"), # nolint
           function(data, formula, regParam = 0.0, elasticNetParam = 0.0, maxIter = 100,
                    tol = 1E-6, family = "auto", standardization = TRUE,
                    thresholds = 0.5, weightCol = NULL, aggregationDepth = 2,

diff --git a/R/pkg/R/mllib_utils.R b/R/pkg/R/mllib_utils.R
@@ -80,7 +80,7 @@ predict_internal <- function(object, newData) {
 #' model <- read.ml(path)
 #' }
 #' @note read.ml since 2.0.0
-read.ml <- function(path) {
+read.ml <- function(path) { # nolint
   path <- suppressWarnings(normalizePath(path))
   sparkSession <- getSparkSession()
   callJStatic("org.apache.spark.ml.r.RWrappers", "session", sparkSession)

diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
@@ -899,7 +899,7 @@ setMethod("subtractByKey",
 #' @rdname sampleByKey
 #' @aliases sampleByKey,RDD-method
 #' @noRd
-setMethod("sampleByKey",
+setMethod("sampleByKey", # nolint
           signature(x = "RDD", withReplacement = "logical",
                     fractions = "vector", seed = "integer"),
           function(x, withReplacement, fractions, seed) {

diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
@@ -143,7 +143,7 @@ structField.jobj <- function(x, ...) {
   obj
 }
 
-checkType <- function(type) {
+checkType <- function(type) { # nolint
   if (!is.null(PRIMITIVE_TYPES[[type]])) {
     return()
   } else {

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
@@ -89,7 +89,7 @@ sparkR.stop <- function() {
 }
 
 # Internal function to handle creating the SparkContext.
-sparkR.sparkContext <- function(
+sparkR.sparkContext <- function( # nolint
   master = "",
   appName = "SparkR",
   sparkHome = Sys.getenv("SPARK_HOME"),

diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
@@ -131,7 +131,7 @@ hashCode <- function(key) {
     } else {
       asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) })
       hashC <- 0
-      for (k in 1:length(asciiVals)) {
+      for (k in seq_len(length(asciiVals))) {
         hashC <- mult31AndAdd(hashC, asciiVals[k])
       }
       as.integer(hashC)
@@ -335,7 +335,7 @@ varargsToEnv <- function(...) {
 
 # Utility function to capture the varargs into environment object but all values are converted
 # into string.
-varargsToStrEnv <- function(...) {
+varargsToStrEnv <- function(...) { # nolint
   pairs <- list(...)
   nameList <- names(pairs)
   env <- new.env()
@@ -404,7 +404,7 @@ getStorageLevel <- function(newLevel = c("DISK_ONLY",
                          "OFF_HEAP" = callJStatic(storageLevelClass, "OFF_HEAP"))
 }
 
-storageLevelToString <- function(levelObj) {
+storageLevelToString <- function(levelObj) { # nolint
   useDisk <- callJMethod(levelObj, "useDisk")
   useMemory <- callJMethod(levelObj, "useMemory")
   useOffHeap <- callJMethod(levelObj, "useOffHeap")
@@ -465,7 +465,7 @@ numToInt <- function(num) {
 #   checkedFunc An environment of function objects examined during cleanClosure. It can
 #               be considered as a "name"-to-"list of functions" mapping.
 #   newEnv A new function environment to store necessary function dependencies, an output argument.
-processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
+processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) { # nolint
   nodeLen <- length(node)
 
   if (nodeLen > 1 && typeof(node) == "language") {
@@ -724,7 +724,7 @@ assignNewEnv <- function(data) {
   stopifnot(length(cols) > 0)
 
   env <- new.env()
-  for (i in 1:length(cols)) {
+  for (i in seq_len(length(cols))) {
     assign(x = cols[i], value = data[, cols[i], drop = F], envir = env)
   }
   env
@@ -750,7 +750,7 @@ launchScript <- function(script, combinedArgs, wait = FALSE, stdout = "", stderr
   if (.Platform$OS.type == "windows") {
     scriptWithArgs <- paste(script, combinedArgs, sep = " ")
     # on Windows, intern = F seems to mean output to the console. (documentation on this is missing)
-    shell(scriptWithArgs, translate = TRUE, wait = wait, intern = wait) # nolint
+    shell(scriptWithArgs, translate = TRUE, wait = wait, intern = wait)
   } else {
     # http://stat.ethz.ch/R-manual/R-devel/library/base/html/system2.html
     # stdout = F means discard output

diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R
@@ -160,7 +160,7 @@ isEmpty <- SparkR:::readInt(inputCon)
 computeInputElapsDiff <- 0
 outputComputeElapsDiff <- 0
 
-if (isEmpty != 0) {
+if (isEmpty != 0) { # nolint
   if (numPartitions == -1) {
     if (deserializer == "byte") {
       # Now read as many characters as described in funcLen
@@ -194,7 +194,7 @@ if (isEmpty != 0) {
        } else {
         # gapply mode
         outputs <- list()
-        for (i in 1:length(data)) {
+        for (i in seq_len(length(data))) {
           # Timing reading input data for execution
           inputElap <- elapsedSecs()
           output <- compute(mode, partition, serializer, deserializer, keys[[i]],

diff --git a/R/pkg/tests/fulltests/test_rdd.R b/R/pkg/tests/fulltests/test_rdd.R
@@ -194,7 +194,7 @@ test_that("sampleRDD() on RDDs", {
   expect_equal(unlist(collectRDD(sampleRDD(rdd, FALSE, 1.0, 2014L))), nums)
 })
 
-test_that("takeSample() on RDDs", {
+test_that("takeSample() on RDDs", { # nolint
   # ported from RDDSuite.scala, modified seeds
   data <- parallelize(sc, 1:100, 2L)
   for (seed in 4:5) {

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -172,7 +172,7 @@ test_that("structField type strings", {
   typeList <- c(primitiveTypes, complexTypes)
   typeStrings <- names(typeList)
 
-  for (i in seq_along(typeStrings)){
+  for (i in seq_along(typeStrings)) {
     typeString <- typeStrings[i]
     expected <- typeList[[i]]
     testField <- structField("_col", typeString)
@@ -203,7 +203,7 @@ test_that("structField type strings", {
   errorList <- c(primitiveErrors, complexErrors)
   typeStrings <- names(errorList)
 
-  for (i in seq_along(typeStrings)){
+  for (i in seq_along(typeStrings)) {
     typeString <- typeStrings[i]
     expected <- paste0("Unsupported type for SparkDataframe: ", errorList[[i]])
     expect_error(structField("_col", typeString), expected)