|
18 | 18 | package org.apache.spark.broadcast |
19 | 19 |
|
20 | 20 | import java.io._ |
21 | | -import java.lang.ref.SoftReference |
| 21 | +import java.lang.ref.{Reference, SoftReference, WeakReference} |
22 | 22 | import java.nio.ByteBuffer |
23 | 23 | import java.util.zip.Adler32 |
24 | 24 |
|
@@ -65,9 +65,10 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long, serializedO |
65 | 65 | * |
66 | 66 | * On the driver, if the value is required, it is read lazily from the block manager. We hold |
67 | 67 | * a soft reference so that it can be garbage collected if required, as we can always reconstruct |
68 | | - * in the future. |
| 68 | + * in the future. For internal broadcast variables where `serializedOnly = true`, we hold a |
| 69 | + * WeakReference to allow the value to be reclaimed more aggressively. |
69 | 70 | */ |
70 | | - @transient private var _value: SoftReference[T] = _ |
| 71 | + @transient private var _value: Reference[T] = _ |
71 | 72 |
|
72 | 73 | /** The compression codec to use, or None if compression is disabled */ |
73 | 74 | @transient private var compressionCodec: Option[CompressionCodec] = _ |
@@ -106,7 +107,11 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long, serializedO |
106 | 107 | memoized |
107 | 108 | } else { |
108 | 109 | val newlyRead = readBroadcastBlock() |
109 | | - _value = new SoftReference[T](newlyRead) |
| 110 | + _value = if (serializedOnly) { |
| 111 | + new WeakReference[T](newlyRead) |
| 112 | + } else { |
| 113 | + new SoftReference[T](newlyRead) |
| 114 | + } |
110 | 115 | newlyRead |
111 | 116 | } |
112 | 117 | } |
@@ -140,9 +145,9 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long, serializedO |
140 | 145 | // skipping the store reduces driver memory pressure because we don't add a long-lived |
141 | 146 | // reference to the broadcasted object. However, this optimization cannot be applied for |
142 | 147 | // local mode (since tasks might run on the driver). To guard against performance |
143 | | - // regressions if an internal broadcast is accessed on the driver, we store a soft |
| 148 | + // regressions if an internal broadcast is accessed on the driver, we store a weak |
144 | 149 | // reference to the broadcasted value: |
145 | | - _value = new SoftReference[T](value) |
| 150 | + _value = new WeakReference[T](value) |
146 | 151 | } else { |
147 | 152 | // Store a copy of the broadcast variable in the driver so that tasks run on the driver |
148 | 153 | // do not create a duplicate copy of the broadcast variable's value. |
|
0 commit comments