-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-24257][SQL]LongToUnsafeRowMap calculate the new size may be wrong #21311
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
d9d8e62
22a2767
6fe1dd0
f3916e7
d7da8ae
b8b6324
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -568,13 +568,16 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap | |
| } | ||
|
|
||
| // There is 8 bytes for the pointer to next value | ||
| if (cursor + 8 + row.getSizeInBytes > page.length * 8L + Platform.LONG_ARRAY_OFFSET) { | ||
| val needSize = cursor + 8 + row.getSizeInBytes | ||
| val nowSize = page.length * 8L + Platform.LONG_ARRAY_OFFSET | ||
| if (needSize > nowSize) { | ||
| val used = page.length | ||
| if (used >= (1 << 30)) { | ||
| sys.error("Can not build a HashedRelation that is larger than 8G") | ||
| } | ||
| ensureAcquireMemory(used * 8L * 2) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doubling the size when growing is very typical, seems what you want to address is when the memory is enough for the requsted size but not enough for doubling the size. I'd suggest we should double the size most of the time, as long as there is enough memory.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok . Doubling the size when growing. |
||
| val newPage = new Array[Long](used * 2) | ||
| val multiples = math.max(math.ceil(needSize.toDouble / (used * 8L)).toInt, 2) | ||
| ensureAcquireMemory(used * 8L * multiples) | ||
|
||
| val newPage = new Array[Long](used * multiples) | ||
| Platform.copyMemory(page, Platform.LONG_ARRAY_OFFSET, newPage, Platform.LONG_ARRAY_OFFSET, | ||
| cursor - Platform.LONG_ARRAY_OFFSET) | ||
| page = newPage | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -254,6 +254,30 @@ class HashedRelationSuite extends SparkFunSuite with SharedSQLContext { | |
| map.free() | ||
| } | ||
|
|
||
| test("LongToUnsafeRowMap with big values") { | ||
| val taskMemoryManager = new TaskMemoryManager( | ||
| new StaticMemoryManager( | ||
| new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"), | ||
| Long.MaxValue, | ||
| Long.MaxValue, | ||
| 1), | ||
| 0) | ||
| val unsafeProj = UnsafeProjection.create(Seq(BoundReference(0, StringType, false))) | ||
|
||
| val keys = Seq(0L) | ||
| val map = new LongToUnsafeRowMap(taskMemoryManager, 1) | ||
| val bigStr = UTF8String.fromString("x" * 1024 * 1024 * 2) | ||
|
||
| keys.foreach { k => | ||
|
||
| map.append(k, unsafeProj(InternalRow(bigStr))) | ||
| } | ||
| map.optimize() | ||
| val row = unsafeProj(InternalRow(bigStr)).copy() | ||
|
||
| keys.foreach { k => | ||
| assert(map.getValue(k, row) eq row) | ||
| assert(row.getUTF8String(0) === bigStr) | ||
| } | ||
| map.free() | ||
| } | ||
|
|
||
| test("Spark-14521") { | ||
| val ser = new KryoSerializer( | ||
| (new SparkConf).set("spark.kryo.referenceTracking", "false")).newInstance() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not related to this pr though,
sys.errorinstead ofUnsupportedOperationException?spark/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UTF8StringBuilder.java
Line 45 in b6c50d7
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok. sys.error instead of UnsupportedOperationException