apache
diff --git a/‎R/pkg/NAMESPACE‎
Lines changed: 1 addition & 0 deletions b/‎R/pkg/NAMESPACE‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/pkg/R/functions.R‎
Lines changed: 63 additions & 0 deletions b/‎R/pkg/R/functions.R‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎R/pkg/R/generics.R‎
Lines changed: 4 additions & 0 deletions b/‎R/pkg/R/generics.R‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/pkg/inst/tests/testthat/test_context.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/inst/tests/testthat/test_context.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/inst/tests/testthat/test_sparkSQL.R‎
Lines changed: 36 additions & 0 deletions b/‎R/pkg/inst/tests/testthat/test_sparkSQL.R‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎bin/spark-class‎
Lines changed: 1 addition & 1 deletion b/‎bin/spark-class‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎build/mvn‎
Lines changed: 3 additions & 3 deletions b/‎build/mvn‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java‎
Lines changed: 29 additions & 1 deletion b/‎common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java‎
Lines changed: 29 additions & 1 deletion
diff --git a/‎common/network-yarn/pom.xml‎
Lines changed: 2 additions & 2 deletions b/‎common/network-yarn/pom.xml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎conf/log4j.properties.template‎
Lines changed: 2 additions & 2 deletions b/‎conf/log4j.properties.template‎
Lines changed: 2 additions & 2 deletions
@@ -265,6 +265,7 @@ exportMethods("%in%",
               "var_samp",
               "weekofyear",
               "when",
+              "window",
               "year")
 
 exportClasses("GroupedData")
 
@@ -2131,6 +2131,69 @@ setMethod("from_unixtime", signature(x = "Column"),
             column(jc)
           })
 
+#' window
+#'
+#' Bucketize rows into one or more time windows given a timestamp specifying column. Window
+#' starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
+#' [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
+#' the order of months are not supported.
+#'
+#' The time column must be of TimestampType.
+#'
+#' Durations are provided as strings, e.g. '1 second', '1 day 12 hours', '2 minutes'. Valid
+#' interval strings are 'week', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'.
+#' If the `slideDuration` is not provided, the windows will be tumbling windows.
+#'
+#' The startTime is the offset with respect to 1970-01-01 00:00:00 UTC with which to start
+#' window intervals. For example, in order to have hourly tumbling windows that start 15 minutes
+#' past the hour, e.g. 12:15-13:15, 13:15-14:15... provide `startTime` as `15 minutes`.
+#'
+#' The output column will be a struct called 'window' by default with the nested columns 'start'
+#' and 'end'.
+#'
+#' @family datetime_funcs
+#' @rdname window
+#' @name window
+#' @export
+#' @examples
+#'\dontrun{
+#'   # One minute windows every 15 seconds 10 seconds after the minute, e.g. 09:00:10-09:01:10,
+#'   # 09:00:25-09:01:25, 09:00:40-09:01:40, ...
+#'   window(df$time, "1 minute", "15 seconds", "10 seconds")
+#'
+#'   # One minute tumbling windows 15 seconds after the minute, e.g. 09:00:15-09:01:15,
+#'    # 09:01:15-09:02:15...
+#'   window(df$time, "1 minute", startTime = "15 seconds")
+#'
+#'   # Thirty second windows every 10 seconds, e.g. 09:00:00-09:00:30, 09:00:10-09:00:40, ...
+#'   window(df$time, "30 seconds", "10 seconds")
+#'}
+setMethod("window", signature(x = "Column"),
+          function(x, windowDuration, slideDuration = NULL, startTime = NULL) {
+            stopifnot(is.character(windowDuration))
+            if (!is.null(slideDuration) && !is.null(startTime)) {
+              stopifnot(is.character(slideDuration) && is.character(startTime))
+              jc <- callJStatic("org.apache.spark.sql.functions",
+                                "window",
+                                x@jc, windowDuration, slideDuration, startTime)
+            } else if (!is.null(slideDuration)) {
+              stopifnot(is.character(slideDuration))
+              jc <- callJStatic("org.apache.spark.sql.functions",
+                                "window",
+                                x@jc, windowDuration, slideDuration)
+            } else if (!is.null(startTime)) {
+              stopifnot(is.character(startTime))
+              jc <- callJStatic("org.apache.spark.sql.functions",
+                                "window",
+                                x@jc, windowDuration, windowDuration, startTime)
+            } else {
+              jc <- callJStatic("org.apache.spark.sql.functions",
+                                "window",
+                                x@jc, windowDuration)
+            }
+            column(jc)
+          })
+
 #' locate
 #'
 #' Locate the position of the first occurrence of substr.
 
@@ -1152,6 +1152,10 @@ setGeneric("var_samp", function(x) { standardGeneric("var_samp") })
 #' @export
 setGeneric("weekofyear", function(x) { standardGeneric("weekofyear") })
 
+#' @rdname window
+#' @export
+setGeneric("window", function(x, ...) { standardGeneric("window") })
+
 #' @rdname year
 #' @export
 setGeneric("year", function(x) { standardGeneric("year") })
 
@@ -26,7 +26,7 @@ test_that("Check masked functions", {
   maskedBySparkR <- masked[funcSparkROrEmpty]
   namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var",
                      "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset",
-                     "summary", "transform", "drop")
+                     "summary", "transform", "drop", "window")
   expect_equal(length(maskedBySparkR), length(namesOfMasked))
   expect_equal(sort(maskedBySparkR), sort(namesOfMasked))
   # above are those reported as masked when `library(SparkR)`
 
@@ -1204,6 +1204,42 @@ test_that("greatest() and least() on a DataFrame", {
   expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3))
 })
 
+test_that("time windowing (window()) with all inputs", {
+  df <- createDataFrame(sqlContext, data.frame(t = c("2016-03-11 09:00:07"), v = c(1)))
+  df$window <- window(df$t, "5 seconds", "5 seconds", "0 seconds")
+  local <- collect(df)$v
+  # Not checking time windows because of possible time zone issues. Just checking that the function
+  # works
+  expect_equal(local, c(1))
+})
+
+test_that("time windowing (window()) with slide duration", {
+  df <- createDataFrame(sqlContext, data.frame(t = c("2016-03-11 09:00:07"), v = c(1)))
+  df$window <- window(df$t, "5 seconds", "2 seconds")
+  local <- collect(df)$v
+  # Not checking time windows because of possible time zone issues. Just checking that the function
+  # works
+  expect_equal(local, c(1, 1))
+})
+
+test_that("time windowing (window()) with start time", {
+  df <- createDataFrame(sqlContext, data.frame(t = c("2016-03-11 09:00:07"), v = c(1)))
+  df$window <- window(df$t, "5 seconds", startTime = "2 seconds")
+  local <- collect(df)$v
+  # Not checking time windows because of possible time zone issues. Just checking that the function
+  # works
+  expect_equal(local, c(1))
+})
+
+test_that("time windowing (window()) with just window duration", {
+  df <- createDataFrame(sqlContext, data.frame(t = c("2016-03-11 09:00:07"), v = c(1)))
+  df$window <- window(df$t, "5 seconds")
+  local <- collect(df)$v
+  # Not checking time windows because of possible time zone issues. Just checking that the function
+  # works
+  expect_equal(local, c(1))
+})
+
 test_that("when(), otherwise() and ifelse() on a DataFrame", {
   l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
   df <- createDataFrame(sqlContext, l)
 
@@ -44,7 +44,7 @@ fi
 
 if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING$SPARK_SQL_TESTING" ]; then
   echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2
-  echo "You need to build Spark before running this program." 1>&2
+  echo "You need to build Spark with the target \"package\" before running this program." 1>&2
   exit 1
 else
   LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"
 
@@ -72,7 +72,7 @@ install_mvn() {
   local MVN_VERSION="3.3.9"
 
   install_app \
-    "http://archive.apache.org/dist/maven/maven-3/${MVN_VERSION}/binaries" \
+    "https://archive.apache.org/dist/maven/maven-3/${MVN_VERSION}/binaries" \
     "apache-maven-${MVN_VERSION}-bin.tar.gz" \
     "apache-maven-${MVN_VERSION}/bin/mvn"
 
@@ -84,7 +84,7 @@ install_zinc() {
   local zinc_path="zinc-0.3.9/bin/zinc"
   [ ! -f "${_DIR}/${zinc_path}" ] && ZINC_INSTALL_FLAG=1
   install_app \
-    "http://downloads.typesafe.com/zinc/0.3.9" \
+    "https://downloads.typesafe.com/zinc/0.3.9" \
     "zinc-0.3.9.tgz" \
     "${zinc_path}"
   ZINC_BIN="${_DIR}/${zinc_path}"
@@ -100,7 +100,7 @@ install_scala() {
   local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"
 
   install_app \
-    "http://downloads.typesafe.com/scala/${scala_version}" \
+    "https://downloads.typesafe.com/scala/${scala_version}" \
     "scala-${scala_version}.tgz" \
     "scala-${scala_version}/bin/scala"
 
 
@@ -18,6 +18,7 @@
 package org.apache.spark.network.protocol;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.nio.channels.WritableByteChannel;
 import javax.annotation.Nullable;
 
@@ -43,6 +44,14 @@ class MessageWithHeader extends AbstractReferenceCounted implements FileRegion {
   private final long bodyLength;
   private long totalBytesTransferred;
 
+  /**
+   * When the write buffer size is larger than this limit, I/O will be done in chunks of this size.
+   * The size should not be too large as it will waste underlying memory copy. e.g. If network
+   * avaliable buffer is smaller than this limit, the data cannot be sent within one single write
+   * operation while it still will make memory copy with this size.
+   */
+  private static final int NIO_BUFFER_LIMIT = 256 * 1024;
+
   /**
    * Construct a new MessageWithHeader.
    *
@@ -128,8 +137,27 @@ protected void deallocate() {
   }
 
   private int copyByteBuf(ByteBuf buf, WritableByteChannel target) throws IOException {
-    int written = target.write(buf.nioBuffer());
+    ByteBuffer buffer = buf.nioBuffer();
+    int written = (buffer.remaining() <= NIO_BUFFER_LIMIT) ?
+      target.write(buffer) : writeNioBuffer(target, buffer);
     buf.skipBytes(written);
     return written;
   }
+
+  private int writeNioBuffer(
+      WritableByteChannel writeCh,
+      ByteBuffer buf) throws IOException {
+    int originalLimit = buf.limit();
+    int ret = 0;
+
+    try {
+      int ioSize = Math.min(buf.remaining(), NIO_BUFFER_LIMIT);
+      buf.limit(buf.position() + ioSize);
+      ret = writeCh.write(buf);
+    } finally {
+      buf.limit(originalLimit);
+    }
+
+    return ret;
+  }
 }
@@ -36,7 +36,7 @@
     <!-- Make sure all Hadoop dependencies are provided to avoid repackaging. -->
     <hadoop.deps.scope>provided</hadoop.deps.scope>
     <shuffle.jar>${project.build.directory}/scala-${scala.binary.version}/spark-${project.version}-yarn-shuffle.jar</shuffle.jar>
-    <shade>org/spark-project/</shade>
+    <shade>org/spark_project/</shade>
   </properties>
 
   <dependencies>
@@ -91,7 +91,7 @@
           <relocations>
             <relocation>
               <pattern>com.fasterxml.jackson</pattern>
-              <shadedPattern>org.spark-project.com.fasterxml.jackson</shadedPattern>
+              <shadedPattern>${spark.shade.packageName}.com.fasterxml.jackson</shadedPattern>
               <includes>
                 <include>com.fasterxml.jackson.**</include>
               </includes>
 
@@ -28,8 +28,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
 log4j.logger.org.apache.spark.repl.Main=WARN
 
 # Settings to quiet third party logs that are too verbose
-log4j.logger.org.spark-project.jetty=WARN
-log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
 log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
 log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
 log4j.logger.org.apache.parquet=ERROR