@@ -520,6 +520,22 @@ setMethod("isNaN",
520520 column(jc )
521521 })
522522
523+ # ' kurtosis
524+ # '
525+ # ' Aggregate function: returns the kurtosis of the values in a group.
526+ # '
527+ # ' @rdname kurtosis
528+ # ' @name kurtosis
529+ # ' @family agg_funcs
530+ # ' @export
531+ # ' @examples \dontrun{kurtosis(df$c)}
532+ setMethod ("kurtosis ",
533+ signature(x = " Column" ),
534+ function (x ) {
535+ jc <- callJStatic(" org.apache.spark.sql.functions" , " kurtosis" , x @ jc )
536+ column(jc )
537+ })
538+
523539# ' last
524540# '
525541# ' Aggregate function: returns the last value in a group.
@@ -861,6 +877,28 @@ setMethod("rtrim",
861877 column(jc )
862878 })
863879
880+ # ' sd
881+ # '
882+ # ' Aggregate function: alias for \link{stddev_samp}
883+ # '
884+ # ' @rdname sd
885+ # ' @name sd
886+ # ' @family agg_funcs
887+ # ' @seealso \link{stddev_pop}, \link{stddev_samp}
888+ # ' @export
889+ # ' @examples
890+ # '\dontrun{
891+ # 'stddev(df$c)
892+ # 'select(df, stddev(df$age))
893+ # 'agg(df, sd(df$age))
894+ # '}
895+ setMethod ("sd ",
896+ signature(x = " Column" ),
897+ function (x , na.rm = FALSE ) {
898+ # In R, sample standard deviation is calculated with the sd() function.
899+ stddev_samp(x )
900+ })
901+
864902# ' second
865903# '
866904# ' Extracts the seconds as an integer from a given date/timestamp/string.
@@ -958,6 +996,22 @@ setMethod("size",
958996 column(jc )
959997 })
960998
999+ # ' skewness
1000+ # '
1001+ # ' Aggregate function: returns the skewness of the values in a group.
1002+ # '
1003+ # ' @rdname skewness
1004+ # ' @name skewness
1005+ # ' @family agg_funcs
1006+ # ' @export
1007+ # ' @examples \dontrun{skewness(df$c)}
1008+ setMethod ("skewness ",
1009+ signature(x = " Column" ),
1010+ function (x ) {
1011+ jc <- callJStatic(" org.apache.spark.sql.functions" , " skewness" , x @ jc )
1012+ column(jc )
1013+ })
1014+
9611015# ' soundex
9621016# '
9631017# ' Return the soundex code for the specified expression.
@@ -974,6 +1028,49 @@ setMethod("soundex",
9741028 column(jc )
9751029 })
9761030
1031+ # ' @rdname sd
1032+ # ' @name stddev
1033+ setMethod ("stddev ",
1034+ signature(x = " Column" ),
1035+ function (x ) {
1036+ jc <- callJStatic(" org.apache.spark.sql.functions" , " stddev" , x @ jc )
1037+ column(jc )
1038+ })
1039+
1040+ # ' stddev_pop
1041+ # '
1042+ # ' Aggregate function: returns the population standard deviation of the expression in a group.
1043+ # '
1044+ # ' @rdname stddev_pop
1045+ # ' @name stddev_pop
1046+ # ' @family agg_funcs
1047+ # ' @seealso \link{sd}, \link{stddev_samp}
1048+ # ' @export
1049+ # ' @examples \dontrun{stddev_pop(df$c)}
1050+ setMethod ("stddev_pop ",
1051+ signature(x = " Column" ),
1052+ function (x ) {
1053+ jc <- callJStatic(" org.apache.spark.sql.functions" , " stddev_pop" , x @ jc )
1054+ column(jc )
1055+ })
1056+
1057+ # ' stddev_samp
1058+ # '
1059+ # ' Aggregate function: returns the unbiased sample standard deviation of the expression in a group.
1060+ # '
1061+ # ' @rdname stddev_samp
1062+ # ' @name stddev_samp
1063+ # ' @family agg_funcs
1064+ # ' @seealso \link{stddev_pop}, \link{sd}
1065+ # ' @export
1066+ # ' @examples \dontrun{stddev_samp(df$c)}
1067+ setMethod ("stddev_samp ",
1068+ signature(x = " Column" ),
1069+ function (x ) {
1070+ jc <- callJStatic(" org.apache.spark.sql.functions" , " stddev_samp" , x @ jc )
1071+ column(jc )
1072+ })
1073+
9771074# ' sqrt
9781075# '
9791076# ' Computes the square root of the specified float value.
@@ -1168,6 +1265,71 @@ setMethod("upper",
11681265 column(jc )
11691266 })
11701267
1268+ # ' var
1269+ # '
1270+ # ' Aggregate function: alias for \link{var_samp}.
1271+ # '
1272+ # ' @rdname var
1273+ # ' @name var
1274+ # ' @family agg_funcs
1275+ # ' @seealso \link{var_pop}, \link{var_samp}
1276+ # ' @export
1277+ # ' @examples
1278+ # '\dontrun{
1279+ # 'variance(df$c)
1280+ # 'select(df, var_pop(df$age))
1281+ # 'agg(df, var(df$age))
1282+ # '}
1283+ setMethod ("var ",
1284+ signature(x = " Column" ),
1285+ function (x , y = NULL , na.rm = FALSE , use ) {
1286+ # In R, sample variance is calculated with the var() function.
1287+ var_samp(x )
1288+ })
1289+
1290+ # ' @rdname var
1291+ # ' @name variance
1292+ setMethod ("variance ",
1293+ signature(x = " Column" ),
1294+ function (x ) {
1295+ jc <- callJStatic(" org.apache.spark.sql.functions" , " variance" , x @ jc )
1296+ column(jc )
1297+ })
1298+
1299+ # ' var_pop
1300+ # '
1301+ # ' Aggregate function: returns the population variance of the values in a group.
1302+ # '
1303+ # ' @rdname var_pop
1304+ # ' @name var_pop
1305+ # ' @family agg_funcs
1306+ # ' @seealso \link{var}, \link{var_samp}
1307+ # ' @export
1308+ # ' @examples \dontrun{var_pop(df$c)}
1309+ setMethod ("var_pop ",
1310+ signature(x = " Column" ),
1311+ function (x ) {
1312+ jc <- callJStatic(" org.apache.spark.sql.functions" , " var_pop" , x @ jc )
1313+ column(jc )
1314+ })
1315+
1316+ # ' var_samp
1317+ # '
1318+ # ' Aggregate function: returns the unbiased variance of the values in a group.
1319+ # '
1320+ # ' @rdname var_samp
1321+ # ' @name var_samp
1322+ # ' @family agg_funcs
1323+ # ' @seealso \link{var_pop}, \link{var}
1324+ # ' @export
1325+ # ' @examples \dontrun{var_samp(df$c)}
1326+ setMethod ("var_samp ",
1327+ signature(x = " Column" ),
1328+ function (x ) {
1329+ jc <- callJStatic(" org.apache.spark.sql.functions" , " var_samp" , x @ jc )
1330+ column(jc )
1331+ })
1332+
11711333# ' weekofyear
11721334# '
11731335# ' Extracts the week number as an integer from a given date/timestamp/string.
@@ -2020,10 +2182,10 @@ setMethod("ifelse",
20202182# '
20212183# ' Window function: returns the cumulative distribution of values within a window partition,
20222184# ' i.e. the fraction of rows that are below the current row.
2023- # '
2185+ # '
20242186# ' N = total number of rows in the partition
20252187# ' cumeDist(x) = number of values before (and including) x / N
2026- # '
2188+ # '
20272189# ' This is equivalent to the CUME_DIST function in SQL.
20282190# '
20292191# ' @rdname cumeDist
@@ -2039,13 +2201,13 @@ setMethod("cumeDist",
20392201 })
20402202
20412203# ' denseRank
2042- # '
2204+ # '
20432205# ' Window function: returns the rank of rows within a window partition, without any gaps.
20442206# ' The difference between rank and denseRank is that denseRank leaves no gaps in ranking
20452207# ' sequence when there are ties. That is, if you were ranking a competition using denseRank
20462208# ' and had three people tie for second place, you would say that all three were in second
20472209# ' place and that the next person came in third.
2048- # '
2210+ # '
20492211# ' This is equivalent to the DENSE_RANK function in SQL.
20502212# '
20512213# ' @rdname denseRank
@@ -2065,7 +2227,7 @@ setMethod("denseRank",
20652227# ' Window function: returns the value that is `offset` rows before the current row, and
20662228# ' `defaultValue` if there is less than `offset` rows before the current row. For example,
20672229# ' an `offset` of one will return the previous row at any given point in the window partition.
2068- # '
2230+ # '
20692231# ' This is equivalent to the LAG function in SQL.
20702232# '
20712233# ' @rdname lag
@@ -2092,7 +2254,7 @@ setMethod("lag",
20922254# ' Window function: returns the value that is `offset` rows after the current row, and
20932255# ' `null` if there is less than `offset` rows after the current row. For example,
20942256# ' an `offset` of one will return the next row at any given point in the window partition.
2095- # '
2257+ # '
20962258# ' This is equivalent to the LEAD function in SQL.
20972259# '
20982260# ' @rdname lead
@@ -2119,7 +2281,7 @@ setMethod("lead",
21192281# ' Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window
21202282# ' partition. Fow example, if `n` is 4, the first quarter of the rows will get value 1, the second
21212283# ' quarter will get 2, the third quarter will get 3, and the last quarter will get 4.
2122- # '
2284+ # '
21232285# ' This is equivalent to the NTILE function in SQL.
21242286# '
21252287# ' @rdname ntile
@@ -2137,9 +2299,9 @@ setMethod("ntile",
21372299# ' percentRank
21382300# '
21392301# ' Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
2140- # '
2302+ # '
21412303# ' This is computed by:
2142- # '
2304+ # '
21432305# ' (rank of row in its partition - 1) / (number of rows in the partition - 1)
21442306# '
21452307# ' This is equivalent to the PERCENT_RANK function in SQL.
@@ -2159,12 +2321,12 @@ setMethod("percentRank",
21592321# ' rank
21602322# '
21612323# ' Window function: returns the rank of rows within a window partition.
2162- # '
2324+ # '
21632325# ' The difference between rank and denseRank is that denseRank leaves no gaps in ranking
21642326# ' sequence when there are ties. That is, if you were ranking a competition using denseRank
21652327# ' and had three people tie for second place, you would say that all three were in second
21662328# ' place and that the next person came in third.
2167- # '
2329+ # '
21682330# ' This is equivalent to the RANK function in SQL.
21692331# '
21702332# ' @rdname rank
@@ -2189,7 +2351,7 @@ setMethod("rank",
21892351# ' rowNumber
21902352# '
21912353# ' Window function: returns a sequential number starting at 1 within a window partition.
2192- # '
2354+ # '
21932355# ' This is equivalent to the ROW_NUMBER function in SQL.
21942356# '
21952357# ' @rdname rowNumber
0 commit comments