@@ -468,6 +468,62 @@ test_that("zipRDD() on RDDs", {
468468 unlink(fileName )
469469})
470470
471+ test_that(" subtract() on RDDs" , {
472+ l <- list (1 , 1 , 2 , 2 , 3 , 4 )
473+ rdd1 <- parallelize(sc , l )
474+
475+ # subtract by itself
476+ actual <- collect(subtract(rdd1 , rdd1 ))
477+ expect_equal(actual , list ())
478+
479+ # subtract by an empty RDD
480+ rdd2 <- parallelize(sc , list ())
481+ actual <- collect(subtract(rdd1 , rdd2 ))
482+ expect_equal(as.list(sort(as.vector(actual , mode = " integer" ))),
483+ l )
484+
485+ rdd2 <- parallelize(sc , list (2 , 4 ))
486+ actual <- collect(subtract(rdd1 , rdd2 ))
487+ expect_equal(as.list(sort(as.vector(actual , mode = " integer" ))),
488+ list (1 , 1 , 3 ))
489+
490+ l <- list (" a" , " a" , " b" , " b" , " c" , " d" )
491+ rdd1 <- parallelize(sc , l )
492+ rdd2 <- parallelize(sc , list (" b" , " d" ))
493+ actual <- collect(subtract(rdd1 , rdd2 ))
494+ expect_equal(as.list(sort(as.vector(actual , mode = " character" ))),
495+ list (" a" , " a" , " c" ))
496+ })
497+
498+ test_that(" subtractByKey() on pairwise RDDs" , {
499+ l <- list (list (" a" , 1 ), list (" b" , 4 ),
500+ list (" b" , 5 ), list (" a" , 2 ))
501+ rdd1 <- parallelize(sc , l )
502+
503+ # subtractByKey by itself
504+ actual <- collect(subtractByKey(rdd1 , rdd1 ))
505+ expect_equal(actual , list ())
506+
507+ # subtractByKey by an empty RDD
508+ rdd2 <- parallelize(sc , list ())
509+ actual <- collect(subtractByKey(rdd1 , rdd2 ))
510+ expect_equal(sortKeyValueList(actual ),
511+ sortKeyValueList(l ))
512+
513+ rdd2 <- parallelize(sc , list (list (" a" , 3 ), list (" c" , 1 )))
514+ actual <- collect(subtractByKey(rdd1 , rdd2 ))
515+ expect_equal(actual ,
516+ list (list (" b" , 4 ), list (" b" , 5 )))
517+
518+ l <- list (list (1 , 1 ), list (2 , 4 ),
519+ list (2 , 5 ), list (1 , 2 ))
520+ rdd1 <- parallelize(sc , l )
521+ rdd2 <- parallelize(sc , list (list (1 , 3 ), list (3 , 1 )))
522+ actual <- collect(subtractByKey(rdd1 , rdd2 ))
523+ expect_equal(actual ,
524+ list (list (2 , 4 ), list (2 , 5 )))
525+ })
526+
471527test_that(" intersection() on RDDs" , {
472528 # intersection with self
473529 actual <- collect(intersection(rdd , rdd ))
0 commit comments