apache · dvogelbacher · May 20, 2019 · May 20, 2019 · May 21, 2019 · May 21, 2019
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
@@ -183,6 +183,14 @@ def test_filtered_frame(self):
         self.assertEqual(pdf.columns[0], "i")
         self.assertTrue(pdf.empty)
 
+    def test_no_partition_frame(self):
+        schema = StructType([StructField("field1", StringType(), True)])
+        df = self.spark.createDataFrame(self.sc.emptyRDD(), schema)
+        pdf = df.toPandas()
+        self.assertEqual(len(pdf.columns), 1)
+        self.assertEqual(pdf.columns[0], "field1")
+        self.assertTrue(pdf.empty)
+
     def _createDataFrame_toggle(self, pdf, schema=None):
         with self.sql_conf({"spark.sql.execution.arrow.enabled": False}):
             df_no_arrow = self.spark.createDataFrame(pdf, schema=schema)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -3310,23 +3310,31 @@ class Dataset[T] private[sql](
 
           // After last batch, end the stream and write batch order indices
           if (partitionCount == numPartitions) {
-            batchWriter.end()
-            out.writeInt(batchOrder.length)
-            // Sort by (index of partition, batch index in that partition) tuple to get the
-            // overall_batch_index from 0 to N-1 batches, which can be used to put the
-            // transferred batches in the correct order
-            batchOrder.zipWithIndex.sortBy(_._1).foreach { case (_, overallBatchIndex) =>
-              out.writeInt(overallBatchIndex)
-            }
-            out.flush()
+            doAfterLastPartition()
+          }
+        }
+
+        def doAfterLastPartition(): Unit = {
+          batchWriter.end()
+          out.writeInt(batchOrder.length)
+          // Sort by (index of partition, batch index in that partition) tuple to get the
+          // overall_batch_index from 0 to N-1 batches, which can be used to put the
+          // transferred batches in the correct order
+          batchOrder.zipWithIndex.sortBy(_._1).foreach { case (_, overallBatchIndex) =>
+            out.writeInt(overallBatchIndex)
           }
+          out.flush()
         }
 
         sparkSession.sparkContext.runJob(
           arrowBatchRdd,
           (ctx: TaskContext, it: Iterator[Array[Byte]]) => it.toArray,
           0 until numPartitions,
           handlePartitionBatches)
+
+        if (numPartitions == 0) {
+          doAfterLastPartition()
+        }
       }
     }
   }