@@ -687,7 +687,7 @@ setMethod("storageLevel",
687687# ' @rdname coalesce
688688# ' @name coalesce
689689# ' @aliases coalesce,SparkDataFrame-method
690- # ' @seealso \link{repartition}
690+ # ' @seealso \link{repartition}, \link{repartitionByRange}
691691# ' @examples
692692# '\dontrun{
693693# ' sparkR.session()
@@ -723,7 +723,7 @@ setMethod("coalesce",
723723# ' @rdname repartition
724724# ' @name repartition
725725# ' @aliases repartition,SparkDataFrame-method
726- # ' @seealso \link{coalesce}
726+ # ' @seealso \link{coalesce}, \link{repartitionByRange}
727727# ' @examples
728728# '\dontrun{
729729# ' sparkR.session()
@@ -759,6 +759,67 @@ setMethod("repartition",
759759 dataFrame(sdf )
760760 })
761761
762+
763+ # ' Repartition by range
764+ # '
765+ # ' The following options for repartition by range are possible:
766+ # ' \itemize{
767+ # ' \item{1.} {Return a new SparkDataFrame range partitioned by
768+ # ' the given columns into \code{numPartitions}.}
769+ # ' \item{2.} {Return a new SparkDataFrame range partitioned by the given column(s),
770+ # ' using \code{spark.sql.shuffle.partitions} as number of partitions.}
771+ # '}
772+ # '
773+ # ' @param x a SparkDataFrame.
774+ # ' @param numPartitions the number of partitions to use.
775+ # ' @param col the column by which the range partitioning will be performed.
776+ # ' @param ... additional column(s) to be used in the range partitioning.
777+ # '
778+ # ' @family SparkDataFrame functions
779+ # ' @rdname repartitionByRange
780+ # ' @name repartitionByRange
781+ # ' @aliases repartitionByRange,SparkDataFrame-method
782+ # ' @seealso \link{repartition}, \link{coalesce}
783+ # ' @examples
784+ # '\dontrun{
785+ # ' sparkR.session()
786+ # ' path <- "path/to/file.json"
787+ # ' df <- read.json(path)
788+ # ' newDF <- repartitionByRange(df, col = df$col1, df$col2)
789+ # ' newDF <- repartitionByRange(df, 3L, col = df$col1, df$col2)
790+ # '}
791+ # ' @note repartitionByRange since 2.4.0
792+ setMethod ("repartitionByRange ",
793+ signature(x = " SparkDataFrame" ),
794+ function (x , numPartitions = NULL , col = NULL , ... ) {
795+ if (! is.null(numPartitions ) && ! is.null(col )) {
796+ # number of partitions and columns both are specified
797+ if (is.numeric(numPartitions ) && class(col ) == " Column" ) {
798+ cols <- list (col , ... )
799+ jcol <- lapply(cols , function (c ) { c @ jc })
800+ sdf <- callJMethod(x @ sdf , " repartitionByRange" , numToInt(numPartitions ), jcol )
801+ } else {
802+ stop(paste(" numPartitions and col must be numeric and Column; however, got" ,
803+ class(numPartitions ), " and" , class(col )))
804+ }
805+ } else if (! is.null(col )) {
806+ # only columns are specified
807+ if (class(col ) == " Column" ) {
808+ cols <- list (col , ... )
809+ jcol <- lapply(cols , function (c ) { c @ jc })
810+ sdf <- callJMethod(x @ sdf , " repartitionByRange" , jcol )
811+ } else {
812+ stop(paste(" col must be Column; however, got" , class(col )))
813+ }
814+ } else if (! is.null(numPartitions )) {
815+ # only numPartitions is specified
816+ stop(" At least one partition-by column must be specified." )
817+ } else {
818+ stop(" Please, specify a column(s) or the number of partitions with a column(s)" )
819+ }
820+ dataFrame(sdf )
821+ })
822+
762823# ' toJSON
763824# '
764825# ' Converts a SparkDataFrame into a SparkDataFrame of JSON string.
0 commit comments