diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala index fddb9353018a..63f271740973 100644 --- a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala @@ -243,7 +243,16 @@ private[spark] object ReliableCheckpointRDD extends Logging { if (fs.exists(partitionerFilePath)) { val fileInputStream = fs.open(partitionerFilePath, bufferSize) val serializer = SparkEnv.get.serializer.newInstance() - val deserializeStream = serializer.deserializeStream(fileInputStream) + // make sure that the file is closed if error occurrs during deserialization + val deserializeStream = + try { + serializer.deserializeStream(fileInputStream) + } catch { + case ex : Throwable => + fileInputStream.close() + throw ex + } + val partitioner = Utils.tryWithSafeFinally[Partitioner] { deserializeStream.readObject[Partitioner] } { diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala index 993834f8d7d4..26fbd0ca224f 100644 --- a/core/src/test/scala/org/apache/spark/FileSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileSuite.scala @@ -56,10 +56,15 @@ class FileSuite extends SparkFunSuite with LocalSparkContext { nums.saveAsTextFile(outputDir) // Read the plain text file and check it's OK val outputFile = new File(outputDir, "part-00000") - val content = Source.fromFile(outputFile).mkString - assert(content === "1\n2\n3\n4\n") - // Also try reading it in as a text file RDD - assert(sc.textFile(outputDir).collect().toList === List("1", "2", "3", "4")) + val bufferSrc = Source.fromFile(outputFile) + try { + val content = bufferSrc.mkString + assert(content === "1\n2\n3\n4\n") + // Also try reading it in as a text file RDD + assert(sc.textFile(outputDir).collect().toList === List("1", "2", "3", "4")) + } finally { + bufferSrc.close() + } } test("text files (compressed)") { diff --git a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala index 13cba94578a6..bd426c778d6d 100644 --- a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala @@ -77,6 +77,7 @@ class RPackageUtilsSuite assert(RPackageUtils.checkManifestForR(jars(0)), "should have R code") assert(!RPackageUtils.checkManifestForR(jars(1)), "should not have R code") assert(!RPackageUtils.checkManifestForR(jars(2)), "should not have R code") + jars.foreach(_.close()) } } @@ -144,13 +145,15 @@ class RPackageUtilsSuite IvyTestUtils.writeFile(fakePackageDir, "DESCRIPTION", "abc") val finalZip = RPackageUtils.zipRLibraries(tempDir, "sparkr.zip") assert(finalZip.exists()) - val entries = new ZipFile(finalZip).entries().asScala.map(_.getName).toSeq + val zipFile = new ZipFile(finalZip) + val entries = zipFile.entries().asScala.map(_.getName).toSeq assert(entries.contains("/test.R")) assert(entries.contains("/SparkR/abc.R")) assert(entries.contains("/SparkR/DESCRIPTION")) assert(!entries.contains("/package.zip")) assert(entries.contains("/packageTest/def.R")) assert(entries.contains("/packageTest/DESCRIPTION")) + zipFile.close() } finally { FileUtils.deleteDirectory(tempDir) } diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala index 39c5857b1345..327b33aef005 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala @@ -415,8 +415,14 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc val cstream = codec.map(_.compressedOutputStream(fstream)).getOrElse(fstream) val bstream = new BufferedOutputStream(cstream) if (isNewFormat) { - EventLoggingListener.initEventLog(new FileOutputStream(file)) + val newFormatStream = new FileOutputStream(file) + Utils.tryWithSafeFinally { + EventLoggingListener.initEventLog(newFormatStream) + } { + newFormatStream.close() + } } + val writer = new OutputStreamWriter(bstream, StandardCharsets.UTF_8) Utils.tryWithSafeFinally { events.foreach(e => writer.write(compact(render(JsonProtocol.sparkEventToJson(e))) + "\n")) diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala index 176d8930aad1..c7e607076309 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala @@ -202,8 +202,6 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit // Make sure expected events exist in the log file. val logData = EventLoggingListener.openEventLog(new Path(eventLogger.logPath), fileSystem) - val logStart = SparkListenerLogStart(SPARK_VERSION) - val lines = readLines(logData) val eventSet = mutable.Set( SparkListenerApplicationStart, SparkListenerBlockManagerAdded, @@ -216,19 +214,25 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit SparkListenerTaskStart, SparkListenerTaskEnd, SparkListenerApplicationEnd).map(Utils.getFormattedClassName) - lines.foreach { line => - eventSet.foreach { event => - if (line.contains(event)) { - val parsedEvent = JsonProtocol.sparkEventFromJson(parse(line)) - val eventType = Utils.getFormattedClassName(parsedEvent) - if (eventType == event) { - eventSet.remove(event) + try { + val logStart = SparkListenerLogStart(SPARK_VERSION) + val lines = readLines(logData) + lines.foreach { line => + eventSet.foreach { event => + if (line.contains(event)) { + val parsedEvent = JsonProtocol.sparkEventFromJson(parse(line)) + val eventType = Utils.getFormattedClassName(parsedEvent) + if (eventType == event) { + eventSet.remove(event) + } } } } + assert(JsonProtocol.sparkEventFromJson(parse(lines(0))) === logStart) + assert(eventSet.isEmpty, "The following events are missing: " + eventSet.toSeq) + } finally { + logData.close() } - assert(JsonProtocol.sparkEventFromJson(parse(lines(0))) === logStart) - assert(eventSet.isEmpty, "The following events are missing: " + eventSet.toSeq) } private def readLines(in: InputStream): Seq[String] = { diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala index b5385c11a926..8a811751e318 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala @@ -183,9 +183,9 @@ class TaskResultGetterSuite extends SparkFunSuite with BeforeAndAfter with Local // ensure we reset the classloader after the test completes val originalClassLoader = Thread.currentThread.getContextClassLoader + val loader = new MutableURLClassLoader(new Array[URL](0), originalClassLoader) try { // load the exception from the jar - val loader = new MutableURLClassLoader(new Array[URL](0), originalClassLoader) loader.addURL(jarFile.toURI.toURL) Thread.currentThread().setContextClassLoader(loader) val excClass: Class[_] = Utils.classForName("repro.MyException") @@ -210,6 +210,7 @@ class TaskResultGetterSuite extends SparkFunSuite with BeforeAndAfter with Local assert(expectedFailure.findFirstMatchIn(exceptionMessage).isDefined) assert(unknownFailure.findFirstMatchIn(exceptionMessage).isEmpty) } finally { + loader.close() Thread.currentThread.setContextClassLoader(originalClassLoader) } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala index 0c6aabf1926e..2aab018d13b0 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala @@ -151,12 +151,14 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext { val tempDir = Utils.createTempDir() val outputDir = new File(tempDir, "output") MLUtils.saveAsLibSVMFile(examples, outputDir.toURI.toString) - val lines = outputDir.listFiles() + val sources = outputDir.listFiles() .filter(_.getName.startsWith("part-")) - .flatMap(Source.fromFile(_).getLines()) - .toSet + .map(Source.fromFile) + + val lines = sources.flatMap(_.getLines()).toSet val expected = Set("1.1 1:1.23 3:4.56", "0.0 1:1.01 2:2.02 3:3.03") assert(lines === expected) + sources.foreach(_.close()) Utils.deleteRecursively(tempDir) } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala index 9aa2f0bbb995..4e2d6594c523 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala @@ -194,10 +194,13 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false // Finally, stop the endpoint ssc.env.rpcEnv.stop(endpoint) endpoint = null - receivedBlockTracker.stop() logInfo("ReceiverTracker stopped") trackerState = Stopped } + + // note that the output writer is created at construction time, we have to close + // them even if it hasn't been started. + receivedBlockTracker.stop() } /** Allocate all unallocated blocks to the given batch. */ diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java index 01f0c4de9e3c..a47b2faa8b2f 100644 --- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java +++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java @@ -1805,6 +1805,7 @@ public Integer call(String s) { // will be re-processed after recovery List> finalResult = JavaCheckpointTestUtils.runStreams(ssc, 2, 3); assertOrderInvariantEquals(expectedFinal, finalResult.subList(1, 3)); + ssc.stop(); Utils.deleteRecursively(tempDir); } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala index bdbac64b9bc7..5bf430c02b5e 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala @@ -640,16 +640,18 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester val fileStream = ssc.textFileStream(testDir.toString) // Make value 3 take a large time to process, to ensure that the driver // shuts down in the middle of processing the 3rd batch - CheckpointSuite.batchThreeShouldBlockIndefinitely = true - val mappedStream = fileStream.map(s => { + CheckpointSuite.batchThreeShouldBlockALongTime = true + val mappedStream = fileStream.map{ s => val i = s.toInt if (i == 3) { - while (CheckpointSuite.batchThreeShouldBlockIndefinitely) { - Thread.sleep(Long.MaxValue) + if (CheckpointSuite.batchThreeShouldBlockALongTime) { + // It's not a good idea to let the thread run forever + // as resource won't be correctly released + Thread.sleep(6000) } } i - }) + } // Reducing over a large window to ensure that recovery from driver failure // requires reprocessing of all the files seen before the failure @@ -689,7 +691,7 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester } // The original StreamingContext has now been stopped. - CheckpointSuite.batchThreeShouldBlockIndefinitely = false + CheckpointSuite.batchThreeShouldBlockALongTime = false // Create files while the streaming driver is down for (i <- Seq(4, 5, 6)) { @@ -926,5 +928,5 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester } private object CheckpointSuite extends Serializable { - var batchThreeShouldBlockIndefinitely: Boolean = true + var batchThreeShouldBlockALongTime: Boolean = true } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala index 3b662ec1833a..f8a329a4c075 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala @@ -38,19 +38,16 @@ class MapWithStateSuite extends SparkFunSuite protected val batchDuration = Seconds(1) before { - StreamingContext.getActive().foreach { _.stop(stopSparkContext = false) } - checkpointDir = Utils.createTempDir("checkpoint") + StreamingContext.getActive().foreach(_.stop(stopSparkContext = false)) } after { - StreamingContext.getActive().foreach { _.stop(stopSparkContext = false) } - if (checkpointDir != null) { - Utils.deleteRecursively(checkpointDir) - } + StreamingContext.getActive().foreach(_.stop(stopSparkContext = false)) } override def beforeAll(): Unit = { super.beforeAll() + checkpointDir = Utils.createTempDir("checkpoint") val conf = new SparkConf().setMaster("local").setAppName("MapWithStateSuite") conf.set("spark.streaming.clock", classOf[ManualClock].getName()) sc = new SparkContext(conf) @@ -63,6 +60,7 @@ class MapWithStateSuite extends SparkFunSuite } } finally { super.afterAll() + Utils.deleteRecursively(checkpointDir) } } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala index 60c8e702352c..fff2d6fbace3 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala @@ -164,6 +164,7 @@ object MasterFailureTest extends Logging { val mergedOutput = runStreams(ssc, lastExpectedOutput, maxTimeToRun) fileGeneratingThread.join() + ssc.stop() fs.delete(checkpointDir, true) fs.delete(testDir, true) logInfo("Finished test after " + killCount + " failures") diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala index 851013bb1e84..107c3f5dcc08 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala @@ -134,6 +134,7 @@ class ReceivedBlockTrackerSuite val expectedWrittenData1 = blockInfos1.map(BlockAdditionEvent) getWrittenLogData() shouldEqual expectedWrittenData1 getWriteAheadLogFiles() should have size 1 + tracker1.stop() incrementTime() @@ -141,6 +142,7 @@ class ReceivedBlockTrackerSuite val tracker1_ = createTracker(clock = manualClock, recoverFromWriteAheadLog = false) tracker1_.getUnallocatedBlocks(streamId) shouldBe empty tracker1_.hasUnallocatedReceivedBlocks should be (false) + tracker1_.stop() // Restart tracker and verify recovered list of unallocated blocks val tracker2 = createTracker(clock = manualClock, recoverFromWriteAheadLog = true) @@ -163,6 +165,7 @@ class ReceivedBlockTrackerSuite val blockInfos2 = addBlockInfos(tracker2) tracker2.allocateBlocksToBatch(batchTime2) tracker2.getBlocksOfBatchAndStream(batchTime2, streamId) shouldEqual blockInfos2 + tracker2.stop() // Verify whether log has correct contents val expectedWrittenData2 = expectedWrittenData1 ++ @@ -192,6 +195,7 @@ class ReceivedBlockTrackerSuite getWriteAheadLogFiles() should not contain oldestLogFile } printLogFiles("After clean") + tracker3.stop() // Restart tracker and verify recovered state, specifically whether info about the first // batch has been removed, but not the second batch @@ -200,6 +204,7 @@ class ReceivedBlockTrackerSuite tracker4.getUnallocatedBlocks(streamId) shouldBe empty tracker4.getBlocksOfBatchAndStream(batchTime1, streamId) shouldBe empty // should be cleaned tracker4.getBlocksOfBatchAndStream(batchTime2, streamId) shouldEqual blockInfos2 + tracker4.stop() } test("disable write ahead log when checkpoint directory is not set") { diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala index 24cb5afee33c..4bec52b9fe4f 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala @@ -139,6 +139,7 @@ abstract class CommonWriteAheadLogTests( assert(getLogFilesInDirectory(testDir).size < logFiles.size) } } + writeAheadLog.close() } test(testPrefix + "handling file errors while reading rotating logs") {