@@ -1294,6 +1294,79 @@ setMethod("sortBy",
12941294 values(sortByKey(keyBy(rdd , func ), ascending , numPartitions ))
12951295 })
12961296
1297+ # Helper function to get first N elements from an RDD in the specified order.
1298+ # Param:
1299+ # rdd An RDD.
1300+ # num Number of elements to return.
1301+ # ascending A flag to indicate whether the sorting is ascending or descending.
1302+ # Return:
1303+ # A list of the first N elements from the RDD in the specified order.
1304+ #
1305+ takeOrderedElem <- function (rdd , num , ascending = TRUE ) {
1306+ if (num < = 0L ) {
1307+ return (list ())
1308+ }
1309+
1310+ partitionFunc <- function (part ) {
1311+ if (num < length(part )) {
1312+ # R limitation: order works only on primitive types!
1313+ ord <- order(unlist(part , recursive = FALSE ), decreasing = ! ascending )
1314+ part [ord [1 : num ]]
1315+ } else {
1316+ part
1317+ }
1318+ }
1319+
1320+ newRdd <- mapPartitions(rdd , partitionFunc )
1321+ take(sortBy(newRdd , function (x ) { x }, ascending = ascending ), num )
1322+ }
1323+
1324+ # ' Returns the first N elements from an RDD in ascending order.
1325+ # '
1326+ # ' @param rdd An RDD.
1327+ # ' @param num Number of elements to return.
1328+ # ' @return The first N elements from the RDD in ascending order.
1329+ # ' @rdname takeOrdered
1330+ # ' @export
1331+ # ' @examples
1332+ # '\dontrun{
1333+ # ' sc <- sparkR.init()
1334+ # ' rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
1335+ # ' takeOrdered(rdd, 6L) # list(1, 2, 3, 4, 5, 6)
1336+ # '}
1337+ setGeneric ("takeOrdered ", function(rdd, num) { standardGeneric("takeOrdered") })
1338+
1339+ # ' @rdname takeOrdered
1340+ # ' @aliases takeOrdered,RDD,RDD-method
1341+ setMethod ("takeOrdered ",
1342+ signature(rdd = " RDD" , num = " integer" ),
1343+ function (rdd , num ) {
1344+ takeOrderedElem(rdd , num )
1345+ })
1346+
1347+ # ' Returns the top N elements from an RDD.
1348+ # '
1349+ # ' @param rdd An RDD.
1350+ # ' @param num Number of elements to return.
1351+ # ' @return The top N elements from the RDD.
1352+ # ' @rdname top
1353+ # ' @export
1354+ # ' @examples
1355+ # '\dontrun{
1356+ # ' sc <- sparkR.init()
1357+ # ' rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7))
1358+ # ' top(rdd, 6L) # list(10, 9, 7, 6, 5, 4)
1359+ # '}
1360+ setGeneric ("top ", function(rdd, num) { standardGeneric("top") })
1361+
1362+ # ' @rdname top
1363+ # ' @aliases top,RDD,RDD-method
1364+ setMethod ("top ",
1365+ signature(rdd = " RDD" , num = " integer" ),
1366+ function (rdd , num ) {
1367+ takeOrderedElem(rdd , num , FALSE )
1368+ })
1369+
12971370# ########### Shuffle Functions ############
12981371
12991372# ' Partition an RDD by key
0 commit comments