Skip to content

Commit feacda0

Browse files
author
jose.cambronero
committed
resolved merge conflicts for 2 sample ks
2 parents 6a27682 + f68d024 commit feacda0

File tree

913 files changed

+45868
-14523
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

913 files changed

+45868
-14523
lines changed

.rat-excludes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,5 @@ INDEX
9393
.lintr
9494
gen-java.*
9595
.*avpr
96+
org.apache.spark.sql.sources.DataSourceRegister
97+
.*parquet

R/install-dev.bat

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0..
2525
MKDIR %SPARK_HOME%\R\lib
2626

2727
R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" %SPARK_HOME%\R\pkg\
28-
29-
rem Zip the SparkR package so that it can be distributed to worker nodes on YARN
30-
pushd %SPARK_HOME%\R\lib
31-
%JAVA_HOME%\bin\jar.exe cfM "%SPARK_HOME%\R\lib\sparkr.zip" SparkR
32-
popd

R/install-dev.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,4 @@ Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtoo
4242
# Install SparkR to $LIB_DIR
4343
R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
4444

45-
# Zip the SparkR package so that it can be distributed to worker nodes on YARN
46-
cd $LIB_DIR
47-
jar cfM "$LIB_DIR/sparkr.zip" SparkR
48-
4945
popd > /dev/null

R/pkg/DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Collate:
2929
'client.R'
3030
'context.R'
3131
'deserialize.R'
32+
'functions.R'
3233
'mllib.R'
3334
'serialize.R'
3435
'sparkR.R'

R/pkg/NAMESPACE

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ exportMethods("arrange",
2929
"count",
3030
"crosstab",
3131
"describe",
32+
"dim",
3233
"distinct",
3334
"dropna",
3435
"dtypes",
@@ -45,11 +46,16 @@ exportMethods("arrange",
4546
"isLocal",
4647
"join",
4748
"limit",
49+
"merge",
50+
"names",
51+
"ncol",
52+
"nrow",
4853
"orderBy",
4954
"mutate",
5055
"names",
5156
"persist",
5257
"printSchema",
58+
"rbind",
5359
"registerTempTable",
5460
"rename",
5561
"repartition",
@@ -64,8 +70,10 @@ exportMethods("arrange",
6470
"show",
6571
"showDF",
6672
"summarize",
73+
"summary",
6774
"take",
6875
"unionAll",
76+
"unique",
6977
"unpersist",
7078
"where",
7179
"withColumn",
@@ -90,6 +98,7 @@ exportMethods("abs",
9098
"contains",
9199
"cos",
92100
"cosh",
101+
"concat",
93102
"countDistinct",
94103
"desc",
95104
"endsWith",
@@ -98,10 +107,13 @@ exportMethods("abs",
98107
"floor",
99108
"getField",
100109
"getItem",
110+
"greatest",
101111
"hypot",
102112
"isNotNull",
103113
"isNull",
114+
"lit",
104115
"last",
116+
"least",
105117
"like",
106118
"log",
107119
"log10",

R/pkg/R/DataFrame.R

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,16 @@ setMethod("names",
255255
columns(x)
256256
})
257257

258+
#' @rdname columns
259+
setMethod("names<-",
260+
signature(x = "DataFrame"),
261+
function(x, value) {
262+
if (!is.null(value)) {
263+
sdf <- callJMethod(x@sdf, "toDF", listToSeq(as.list(value)))
264+
dataFrame(sdf)
265+
}
266+
})
267+
258268
#' Register Temporary Table
259269
#'
260270
#' Registers a DataFrame as a Temporary Table in the SQLContext
@@ -473,6 +483,18 @@ setMethod("distinct",
473483
dataFrame(sdf)
474484
})
475485

486+
#' @title Distinct rows in a DataFrame
487+
#
488+
#' @description Returns a new DataFrame containing distinct rows in this DataFrame
489+
#'
490+
#' @rdname unique
491+
#' @aliases unique
492+
setMethod("unique",
493+
signature(x = "DataFrame"),
494+
function(x) {
495+
distinct(x)
496+
})
497+
476498
#' Sample
477499
#'
478500
#' Return a sampled subset of this DataFrame using a random seed.
@@ -534,6 +556,58 @@ setMethod("count",
534556
callJMethod(x@sdf, "count")
535557
})
536558

559+
#' @title Number of rows for a DataFrame
560+
#' @description Returns number of rows in a DataFrames
561+
#'
562+
#' @name nrow
563+
#'
564+
#' @rdname nrow
565+
#' @aliases count
566+
setMethod("nrow",
567+
signature(x = "DataFrame"),
568+
function(x) {
569+
count(x)
570+
})
571+
572+
#' Returns the number of columns in a DataFrame
573+
#'
574+
#' @param x a SparkSQL DataFrame
575+
#'
576+
#' @rdname ncol
577+
#' @export
578+
#' @examples
579+
#'\dontrun{
580+
#' sc <- sparkR.init()
581+
#' sqlContext <- sparkRSQL.init(sc)
582+
#' path <- "path/to/file.json"
583+
#' df <- jsonFile(sqlContext, path)
584+
#' ncol(df)
585+
#' }
586+
setMethod("ncol",
587+
signature(x = "DataFrame"),
588+
function(x) {
589+
length(columns(x))
590+
})
591+
592+
#' Returns the dimentions (number of rows and columns) of a DataFrame
593+
#' @param x a SparkSQL DataFrame
594+
#'
595+
#' @rdname dim
596+
#' @export
597+
#' @examples
598+
#'\dontrun{
599+
#' sc <- sparkR.init()
600+
#' sqlContext <- sparkRSQL.init(sc)
601+
#' path <- "path/to/file.json"
602+
#' df <- jsonFile(sqlContext, path)
603+
#' dim(df)
604+
#' }
605+
setMethod("dim",
606+
signature(x = "DataFrame"),
607+
function(x) {
608+
c(count(x), ncol(x))
609+
})
610+
537611
#' Collects all the elements of a Spark DataFrame and coerces them into an R data.frame.
538612
#'
539613
#' @param x A SparkSQL DataFrame
@@ -1205,6 +1279,15 @@ setMethod("join",
12051279
dataFrame(sdf)
12061280
})
12071281

1282+
#' rdname merge
1283+
#' aliases join
1284+
setMethod("merge",
1285+
signature(x = "DataFrame", y = "DataFrame"),
1286+
function(x, y, joinExpr = NULL, joinType = NULL, ...) {
1287+
join(x, y, joinExpr, joinType)
1288+
})
1289+
1290+
12081291
#' UnionAll
12091292
#'
12101293
#' Return a new DataFrame containing the union of rows in this DataFrame
@@ -1231,6 +1314,22 @@ setMethod("unionAll",
12311314
dataFrame(unioned)
12321315
})
12331316

1317+
#' @title Union two or more DataFrames
1318+
#
1319+
#' @description Returns a new DataFrame containing rows of all parameters.
1320+
#
1321+
#' @rdname rbind
1322+
#' @aliases unionAll
1323+
setMethod("rbind",
1324+
signature(... = "DataFrame"),
1325+
function(x, ..., deparse.level = 1) {
1326+
if (nargs() == 3) {
1327+
unionAll(x, ...)
1328+
} else {
1329+
unionAll(x, Recall(..., deparse.level = 1))
1330+
}
1331+
})
1332+
12341333
#' Intersect
12351334
#'
12361335
#' Return a new DataFrame containing rows only in both this DataFrame
@@ -1322,9 +1421,11 @@ setMethod("write.df",
13221421
"org.apache.spark.sql.parquet")
13231422
}
13241423
allModes <- c("append", "overwrite", "error", "ignore")
1424+
# nolint start
13251425
if (!(mode %in% allModes)) {
13261426
stop('mode should be one of "append", "overwrite", "error", "ignore"')
13271427
}
1428+
# nolint end
13281429
jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "saveMode", mode)
13291430
options <- varargsToEnv(...)
13301431
if (!is.null(path)) {
@@ -1384,9 +1485,11 @@ setMethod("saveAsTable",
13841485
"org.apache.spark.sql.parquet")
13851486
}
13861487
allModes <- c("append", "overwrite", "error", "ignore")
1488+
# nolint start
13871489
if (!(mode %in% allModes)) {
13881490
stop('mode should be one of "append", "overwrite", "error", "ignore"')
13891491
}
1492+
# nolint end
13901493
jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "saveMode", mode)
13911494
options <- varargsToEnv(...)
13921495
callJMethod(df@sdf, "saveAsTable", tableName, source, jmode, options)
@@ -1430,6 +1533,19 @@ setMethod("describe",
14301533
dataFrame(sdf)
14311534
})
14321535

1536+
#' @title Summary
1537+
#'
1538+
#' @description Computes statistics for numeric columns of the DataFrame
1539+
#'
1540+
#' @rdname summary
1541+
#' @aliases describe
1542+
setMethod("summary",
1543+
signature(x = "DataFrame"),
1544+
function(x) {
1545+
describe(x)
1546+
})
1547+
1548+
14331549
#' dropna
14341550
#'
14351551
#' Returns a new DataFrame omitting rows with null values.

R/pkg/R/RDD.R

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
8585

8686
isPipelinable <- function(rdd) {
8787
e <- rdd@env
88+
# nolint start
8889
!(e$isCached || e$isCheckpointed)
90+
# nolint end
8991
}
9092

9193
if (!inherits(prev, "PipelinedRDD") || !isPipelinable(prev)) {
@@ -97,7 +99,8 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
9799
# prev_serializedMode is used during the delayed computation of JRDD in getJRDD
98100
} else {
99101
pipelinedFunc <- function(partIndex, part) {
100-
func(partIndex, prev@func(partIndex, part))
102+
f <- prev@func
103+
func(partIndex, f(partIndex, part))
101104
}
102105
.Object@func <- cleanClosure(pipelinedFunc)
103106
.Object@prev_jrdd <- prev@prev_jrdd # maintain the pipeline
@@ -841,7 +844,7 @@ setMethod("sampleRDD",
841844
if (withReplacement) {
842845
count <- rpois(1, fraction)
843846
if (count > 0) {
844-
res[(len + 1):(len + count)] <- rep(list(elem), count)
847+
res[ (len + 1) : (len + count) ] <- rep(list(elem), count)
845848
len <- len + count
846849
}
847850
} else {
@@ -1261,12 +1264,12 @@ setMethod("pipeRDD",
12611264
signature(x = "RDD", command = "character"),
12621265
function(x, command, env = list()) {
12631266
func <- function(part) {
1264-
trim.trailing.func <- function(x) {
1267+
trim_trailing_func <- function(x) {
12651268
sub("[\r\n]*$", "", toString(x))
12661269
}
1267-
input <- unlist(lapply(part, trim.trailing.func))
1270+
input <- unlist(lapply(part, trim_trailing_func))
12681271
res <- system2(command, stdout = TRUE, input = input, env = env)
1269-
lapply(res, trim.trailing.func)
1272+
lapply(res, trim_trailing_func)
12701273
}
12711274
lapplyPartition(x, func)
12721275
})

0 commit comments

Comments
 (0)