maropu
diff --git a/‎R/README.md‎
Lines changed: 1 addition & 1 deletion b/‎R/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/functions.R‎
Lines changed: 2 additions & 2 deletions b/‎R/pkg/R/functions.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bin/spark-class‎
Lines changed: 15 additions & 3 deletions b/‎bin/spark-class‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎conf/spark-env.sh.template‎
Lines changed: 3 additions & 0 deletions b/‎conf/spark-env.sh.template‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎core/pom.xml‎
Lines changed: 4 additions & 0 deletions b/‎core/pom.xml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎core/src/main/java/org/apache/spark/shuffle/api/ShuffleDataIO.java‎
Lines changed: 49 additions & 0 deletions b/‎core/src/main/java/org/apache/spark/shuffle/api/ShuffleDataIO.java‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎core/src/main/java/org/apache/spark/shuffle/api/ShuffleExecutorComponents.java‎
Lines changed: 55 additions & 0 deletions b/‎core/src/main/java/org/apache/spark/shuffle/api/ShuffleExecutorComponents.java‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎core/src/main/java/org/apache/spark/shuffle/api/ShuffleMapOutputWriter.java‎
Lines changed: 71 additions & 0 deletions b/‎core/src/main/java/org/apache/spark/shuffle/api/ShuffleMapOutputWriter.java‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎core/src/main/java/org/apache/spark/shuffle/api/ShufflePartitionWriter.java‎
Lines changed: 98 additions & 0 deletions b/‎core/src/main/java/org/apache/spark/shuffle/api/ShufflePartitionWriter.java‎
Lines changed: 98 additions & 0 deletions
@@ -20,7 +20,7 @@ export R_HOME=/home/username/R
 Build Spark with [Maven](https://spark.apache.org/docs/latest/building-spark.html#buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
 
 ```bash
-build/mvn -DskipTests -Psparkr package
+./build/mvn -DskipTests -Psparkr package
 ```
 
 #### Running sparkR
 
@@ -2741,7 +2741,7 @@ setMethod("format_string", signature(format = "character", x = "Column"),
 #' head(tmp)}
 #' @note from_unixtime since 1.5.0
 setMethod("from_unixtime", signature(x = "Column"),
-          function(x, format = "yyyy-MM-dd HH:mm:ss") {
+          function(x, format = "uuuu-MM-dd HH:mm:ss") {
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "from_unixtime",
                               x@jc, format)
@@ -3029,7 +3029,7 @@ setMethod("unix_timestamp", signature(x = "Column", format = "missing"),
 #' @aliases unix_timestamp,Column,character-method
 #' @note unix_timestamp(Column, character) since 1.5.0
 setMethod("unix_timestamp", signature(x = "Column", format = "character"),
-          function(x, format = "yyyy-MM-dd HH:mm:ss") {
+          function(x, format = "uuuu-MM-dd HH:mm:ss") {
             jc <- callJStatic("org.apache.spark.sql.functions", "unix_timestamp", x@jc, format)
             column(jc)
           })
 
@@ -25,7 +25,7 @@ This README file only contains basic setup instructions.
 Spark is built using [Apache Maven](https://maven.apache.org/).
 To build Spark and its example programs, run:
 
-    build/mvn -DskipTests clean package
+    ./build/mvn -DskipTests clean package
 
 (You do not need to do this if you downloaded a pre-built package.)
 
 
@@ -68,15 +68,27 @@ fi
 # The exit code of the launcher is appended to the output, so the parent shell removes it from the
 # command array and checks the value to see if the launcher succeeded.
 build_command() {
-  "$RUNNER" -Xmx128m -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.Main "$@"
+  "$RUNNER" -Xmx128m $SPARK_LAUNCHER_OPTS -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.Main "$@"
   printf "%d\0" $?
 }
 
 # Turn off posix mode since it does not allow process substitution
 set +o posix
 CMD=()
-while IFS= read -d '' -r ARG; do
-  CMD+=("$ARG")
+DELIM=$'\n'
+CMD_START_FLAG="false"
+while IFS= read -d "$DELIM" -r ARG; do
+  if [ "$CMD_START_FLAG" == "true" ]; then
+    CMD+=("$ARG")
+  else
+    if [ "$ARG" == $'\0' ]; then
+      # After NULL character is consumed, change the delimiter and consume command string.
+      DELIM=''
+      CMD_START_FLAG="true"
+    elif [ "$ARG" != "" ]; then
+      echo "$ARG"
+    fi
+  fi
 done < <(build_command "$@")
 
 COUNT=${#CMD[@]}
 
@@ -56,6 +56,9 @@
 # - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
 # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
 
+# Options for launcher
+# - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y")
+
 # Generic options for the daemons used in the standalone deploy mode
 # - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
 # - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
 
@@ -172,6 +172,10 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-math3</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-text</artifactId>
+    </dependency>
     <dependency>
       <groupId>com.google.code.findbugs</groupId>
       <artifactId>jsr305</artifactId>
 
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.api;
+
+import org.apache.spark.annotation.Private;
+
+/**
+ * :: Private ::
+ * An interface for plugging in modules for storing and reading temporary shuffle data.
+ * <p>
+ * This is the root of a plugin system for storing shuffle bytes to arbitrary storage
+ * backends in the sort-based shuffle algorithm implemented by the
+ * {@link org.apache.spark.shuffle.sort.SortShuffleManager}. If another shuffle algorithm is
+ * needed instead of sort-based shuffle, one should implement
+ * {@link org.apache.spark.shuffle.ShuffleManager} instead.
+ * <p>
+ * A single instance of this module is loaded per process in the Spark application.
+ * The default implementation reads and writes shuffle data from the local disks of
+ * the executor, and is the implementation of shuffle file storage that has remained
+ * consistent throughout most of Spark's history.
+ * <p>
+ * Alternative implementations of shuffle data storage can be loaded via setting
+ * <code>spark.shuffle.sort.io.plugin.class</code>.
+ * @since 3.0.0
+ */
+@Private
+public interface ShuffleDataIO {
+
+  /**
+   * Called once on executor processes to bootstrap the shuffle data storage modules that
+   * are only invoked on the executors.
+   */
+  ShuffleExecutorComponents executor();
+}
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.api;
+
+import java.io.IOException;
+
+import org.apache.spark.annotation.Private;
+
+/**
+ * :: Private ::
+ * An interface for building shuffle support for Executors.
+ *
+ * @since 3.0.0
+ */
+@Private
+public interface ShuffleExecutorComponents {
+
+  /**
+   * Called once per executor to bootstrap this module with state that is specific to
+   * that executor, specifically the application ID and executor ID.
+   */
+  void initializeExecutor(String appId, String execId);
+
+  /**
+   * Called once per map task to create a writer that will be responsible for persisting all the
+   * partitioned bytes written by that map task.
+   *  @param shuffleId Unique identifier for the shuffle the map task is a part of
+   * @param mapId Within the shuffle, the identifier of the map task
+   * @param mapTaskAttemptId Identifier of the task attempt. Multiple attempts of the same map task
+ *                         with the same (shuffleId, mapId) pair can be distinguished by the
+ *                         different values of mapTaskAttemptId.
+   * @param numPartitions The number of partitions that will be written by the map task. Some of
+*                      these partitions may be empty.
+   */
+  ShuffleMapOutputWriter createMapOutputWriter(
+      int shuffleId,
+      int mapId,
+      long mapTaskAttemptId,
+      int numPartitions) throws IOException;
+}
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.api;
+
+import java.io.IOException;
+
+import org.apache.spark.annotation.Private;
+
+/**
+ * :: Private ::
+ * A top-level writer that returns child writers for persisting the output of a map task,
+ * and then commits all of the writes as one atomic operation.
+ *
+ * @since 3.0.0
+ */
+@Private
+public interface ShuffleMapOutputWriter {
+
+  /**
+   * Creates a writer that can open an output stream to persist bytes targeted for a given reduce
+   * partition id.
+   * <p>
+   * The chunk corresponds to bytes in the given reduce partition. This will not be called twice
+   * for the same partition within any given map task. The partition identifier will be in the
+   * range of precisely 0 (inclusive) to numPartitions (exclusive), where numPartitions was
+   * provided upon the creation of this map output writer via
+   * {@link ShuffleExecutorComponents#createMapOutputWriter(int, int, long, int)}.
+   * <p>
+   * Calls to this method will be invoked with monotonically increasing reducePartitionIds; each
+   * call to this method will be called with a reducePartitionId that is strictly greater than
+   * the reducePartitionIds given to any previous call to this method. This method is not
+   * guaranteed to be called for every partition id in the above described range. In particular,
+   * no guarantees are made as to whether or not this method will be called for empty partitions.
+   */
+  ShufflePartitionWriter getPartitionWriter(int reducePartitionId) throws IOException;
+
+  /**
+   * Commits the writes done by all partition writers returned by all calls to this object's
+   * {@link #getPartitionWriter(int)}.
+   * <p>
+   * This should ensure that the writes conducted by this module's partition writers are
+   * available to downstream reduce tasks. If this method throws any exception, this module's
+   * {@link #abort(Throwable)} method will be invoked before propagating the exception.
+   * <p>
+   * This can also close any resources and clean up temporary state if necessary.
+   */
+  void commitAllPartitions() throws IOException;
+
+  /**
+   * Abort all of the writes done by any writers returned by {@link #getPartitionWriter(int)}.
+   * <p>
+   * This should invalidate the results of writing bytes. This can also close any resources and
+   * clean up temporary state if necessary.
+   */
+  void abort(Throwable error) throws IOException;
+}
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.api;
+
+import java.io.IOException;
+import java.util.Optional;
+import java.io.OutputStream;
+
+import org.apache.spark.annotation.Private;
+
+/**
+ * :: Private ::
+ * An interface for opening streams to persist partition bytes to a backing data store.
+ * <p>
+ * This writer stores bytes for one (mapper, reducer) pair, corresponding to one shuffle
+ * block.
+ *
+ * @since 3.0.0
+ */
+@Private
+public interface ShufflePartitionWriter {
+
+  /**
+   * Open and return an {@link OutputStream} that can write bytes to the underlying
+   * data store.
+   * <p>
+   * This method will only be called once on this partition writer in the map task, to write the
+   * bytes to the partition. The output stream will only be used to write the bytes for this
+   * partition. The map task closes this output stream upon writing all the bytes for this
+   * block, or if the write fails for any reason.
+   * <p>
+   * Implementations that intend on combining the bytes for all the partitions written by this
+   * map task should reuse the same OutputStream instance across all the partition writers provided
+   * by the parent {@link ShuffleMapOutputWriter}. If one does so, ensure that
+   * {@link OutputStream#close()} does not close the resource, since it will be reused across
+   * partition writes. The underlying resources should be cleaned up in
+   * {@link ShuffleMapOutputWriter#commitAllPartitions()} and
+   * {@link ShuffleMapOutputWriter#abort(Throwable)}.
+   */
+  OutputStream openStream() throws IOException;
+
+  /**
+   * Opens and returns a {@link WritableByteChannelWrapper} for transferring bytes from
+   * input byte channels to the underlying shuffle data store.
+   * <p>
+   * This method will only be called once on this partition writer in the map task, to write the
+   * bytes to the partition. The channel will only be used to write the bytes for this
+   * partition. The map task closes this channel upon writing all the bytes for this
+   * block, or if the write fails for any reason.
+   * <p>
+   * Implementations that intend on combining the bytes for all the partitions written by this
+   * map task should reuse the same channel instance across all the partition writers provided
+   * by the parent {@link ShuffleMapOutputWriter}. If one does so, ensure that
+   * {@link WritableByteChannelWrapper#close()} does not close the resource, since the channel
+   * will be reused across partition writes. The underlying resources should be cleaned up in
+   * {@link ShuffleMapOutputWriter#commitAllPartitions()} and
+   * {@link ShuffleMapOutputWriter#abort(Throwable)}.
+   * <p>
+   * This method is primarily for advanced optimizations where bytes can be copied from the input
+   * spill files to the output channel without copying data into memory. If such optimizations are
+   * not supported, the implementation should return {@link Optional#empty()}. By default, the
+   * implementation returns {@link Optional#empty()}.
+   * <p>
+   * Note that the returned {@link WritableByteChannelWrapper} itself is closed, but not the
+   * underlying channel that is returned by {@link WritableByteChannelWrapper#channel()}. Ensure
+   * that the underlying channel is cleaned up in {@link WritableByteChannelWrapper#close()},
+   * {@link ShuffleMapOutputWriter#commitAllPartitions()}, or
+   * {@link ShuffleMapOutputWriter#abort(Throwable)}.
+   */
+  default Optional<WritableByteChannelWrapper> openChannelWrapper() throws IOException {
+    return Optional.empty();
+  }
+
+  /**
+   * Returns the number of bytes written either by this writer's output stream opened by
+   * {@link #openStream()} or the byte channel opened by {@link #openChannelWrapper()}.
+   * <p>
+   * This can be different from the number of bytes given by the caller. For example, the
+   * stream might compress or encrypt the bytes before persisting the data to the backing
+   * data store.
+   */
+  long getNumBytesWritten();
+}