Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ exportMethods("arrange",
"freqItems",
"gapply",
"gapplyCollect",
"getNumPartitions",
"group_by",
"groupBy",
"head",
Expand Down
25 changes: 25 additions & 0 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -3406,3 +3406,28 @@ setMethod("randomSplit",
}
sapply(sdfs, dataFrame)
})

#' getNumPartitions
#'
#' Return the number of partitions
#' Note: in order to compute the number of partition the SparkDataFrame has to be converted into a
#' RDD temporarily internally.
#'
#' @param x A SparkDataFrame
#' @family SparkDataFrame functions
#' @aliases getNumPartitions,SparkDataFrame-method
#' @rdname getNumPartitions
#' @name getNumPartitions
#' @export
#' @examples
#'\dontrun{
#' sparkR.session()
#' df <- createDataFrame(cars, numPartitions = 2)
#' getNumPartitions(df)
#' }
#' @note getNumPartitions since 2.1.1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@felixcheung, should this be since 2.2.0? Just curious.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I debated about this quite a bit - generally it should but we merged createDataFrame(..., numPartitions) to 2.1 and it felt important to have a getNumPartition in the same release too.

setMethod("getNumPartitions",
signature(x = "SparkDataFrame"),
function(x) {
getNumPartitions(toRDD(x))
})
10 changes: 5 additions & 5 deletions R/pkg/inst/tests/testthat/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,18 +196,18 @@ test_that("create DataFrame from RDD", {
expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(as.list(collect(where(df, df$name == "John"))),
list(name = "John", age = 19L, height = 176.5))
expect_equal(getNumPartitions(toRDD(df)), 1)
expect_equal(getNumPartitions(df), 1)

df <- as.DataFrame(cars, numPartitions = 2)
expect_equal(getNumPartitions(toRDD(df)), 2)
expect_equal(getNumPartitions(df), 2)
df <- createDataFrame(cars, numPartitions = 3)
expect_equal(getNumPartitions(toRDD(df)), 3)
expect_equal(getNumPartitions(df), 3)
# validate limit by num of rows
df <- createDataFrame(cars, numPartitions = 60)
expect_equal(getNumPartitions(toRDD(df)), 50)
expect_equal(getNumPartitions(df), 50)
# validate when 1 < (length(coll) / numSlices) << length(coll)
df <- createDataFrame(cars, numPartitions = 20)
expect_equal(getNumPartitions(toRDD(df)), 20)
expect_equal(getNumPartitions(df), 20)

df <- as.DataFrame(data.frame(0))
expect_is(df, "SparkDataFrame")
Expand Down