@@ -2131,6 +2131,69 @@ setMethod("from_unixtime", signature(x = "Column"),
21312131 column(jc )
21322132 })
21332133
2134+ # ' window
2135+ # '
2136+ # ' Bucketize rows into one or more time windows given a timestamp specifying column. Window
2137+ # ' starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
2138+ # ' [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
2139+ # ' the order of months are not supported.
2140+ # '
2141+ # ' The time column must be of TimestampType.
2142+ # '
2143+ # ' Durations are provided as strings, e.g. '1 second', '1 day 12 hours', '2 minutes'. Valid
2144+ # ' interval strings are 'week', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'.
2145+ # ' If the `slideDuration` is not provided, the windows will be tumbling windows.
2146+ # '
2147+ # ' The startTime is the offset with respect to 1970-01-01 00:00:00 UTC with which to start
2148+ # ' window intervals. For example, in order to have hourly tumbling windows that start 15 minutes
2149+ # ' past the hour, e.g. 12:15-13:15, 13:15-14:15... provide `startTime` as `15 minutes`.
2150+ # '
2151+ # ' The output column will be a struct called 'window' by default with the nested columns 'start'
2152+ # ' and 'end'.
2153+ # '
2154+ # ' @family datetime_funcs
2155+ # ' @rdname window
2156+ # ' @name window
2157+ # ' @export
2158+ # ' @examples
2159+ # '\dontrun{
2160+ # ' # One minute windows every 15 seconds 10 seconds after the minute, e.g. 09:00:10-09:01:10,
2161+ # ' # 09:00:25-09:01:25, 09:00:40-09:01:40, ...
2162+ # ' window(df$time, "1 minute", "15 seconds", "10 seconds")
2163+ # '
2164+ # ' # One minute tumbling windows 15 seconds after the minute, e.g. 09:00:15-09:01:15,
2165+ # ' # 09:01:15-09:02:15...
2166+ # ' window(df$time, "1 minute", startTime = "15 seconds")
2167+ # '
2168+ # ' # Thirty second windows every 10 seconds, e.g. 09:00:00-09:00:30, 09:00:10-09:00:40, ...
2169+ # ' window(df$time, "30 seconds", "10 seconds")
2170+ # '}
2171+ setMethod ("window ", signature(x = "Column"),
2172+ function (x , windowDuration , slideDuration = NULL , startTime = NULL ) {
2173+ stopifnot(is.character(windowDuration ))
2174+ if (! is.null(slideDuration ) && ! is.null(startTime )) {
2175+ stopifnot(is.character(slideDuration ) && is.character(startTime ))
2176+ jc <- callJStatic(" org.apache.spark.sql.functions" ,
2177+ " window" ,
2178+ x @ jc , windowDuration , slideDuration , startTime )
2179+ } else if (! is.null(slideDuration )) {
2180+ stopifnot(is.character(slideDuration ))
2181+ jc <- callJStatic(" org.apache.spark.sql.functions" ,
2182+ " window" ,
2183+ x @ jc , windowDuration , slideDuration )
2184+ } else if (! is.null(startTime )) {
2185+ stopifnot(is.character(startTime ))
2186+ jc <- callJStatic(" org.apache.spark.sql.functions" ,
2187+ " window" ,
2188+ x @ jc , windowDuration , windowDuration , startTime )
2189+ } else {
2190+ jc <- callJStatic(" org.apache.spark.sql.functions" ,
2191+ " window" ,
2192+ x @ jc , windowDuration )
2193+ }
2194+ column(jc )
2195+ })
2196+
21342197# ' locate
21352198# '
21362199# ' Locate the position of the first occurrence of substr.
0 commit comments