-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-10399][SPARK-23879][CORE][SQL] Introduce multiple MemoryBlocks to choose several types of memory block #19222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 43 commits
f427ca2
5d7ccdb
251fa09
790bbe7
93a792e
0beab03
fcf764c
d2d2e50
f5e10bb
1905e8c
7778e58
4f96c82
d1d6ae9
914dcd1
336e4b7
5be9ccb
43e6b57
05f024e
9071cf6
37ee9fa
d0b5d59
5cdad44
91028fa
0210bd1
df6dad3
1fa47a8
01f9c8e
2ed8f82
5e3afd1
95fbdee
9b4975b
9e2697c
8cd4853
1bed048
c79585f
77cdb81
ee5a798
c9f401a
cf2d532
eb0cc6d
6f57994
3a93d61
abf6ba0
4567781
95ffd0f
9cbb876
291203c
a62770b
f9bc4d6
c6d45ea
5284593
b1750a1
38ddf48
4e46a18
511d58d
1939fda
cb4b30b
c53b6b8
45aa736
8690e43
59fd393
b69cb64
94c9648
e4a7016
3d03f92
50326ca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| package org.apache.spark.unsafe.array; | ||
|
|
||
| import org.apache.spark.unsafe.Platform; | ||
| import org.apache.spark.unsafe.memory.MemoryBlock; | ||
|
|
||
| public class ByteArrayMethods { | ||
|
|
||
|
|
@@ -48,6 +49,16 @@ public static int roundNumberOfBytesToNearestWord(int numBytes) { | |
| public static int MAX_ROUNDED_ARRAY_LENGTH = Integer.MAX_VALUE - 15; | ||
|
|
||
| private static final boolean unaligned = Platform.unaligned(); | ||
| /** | ||
| * MemoryBlock equality check for MemoryBlocks. | ||
| * @return true if the arrays are equal, false otherwise | ||
| */ | ||
| public static boolean arrayEqualsBlock( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should have new doc instead of using
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no one is calling it.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method is called from
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this only works for |
||
| MemoryBlock leftBase, long leftOffset, MemoryBlock rightBase, long rightOffset, final long length) { | ||
|
||
| return arrayEquals(leftBase.getBaseObject(), leftBase.getBaseOffset() + leftOffset, | ||
| rightBase.getBaseObject(), rightBase.getBaseOffset() + rightOffset, length); | ||
| } | ||
|
|
||
| /** | ||
| * Optimized byte array equality check for byte arrays. | ||
| * @return true if the arrays are equal, false otherwise | ||
|
|
@@ -56,7 +67,7 @@ public static boolean arrayEquals( | |
| Object leftBase, long leftOffset, Object rightBase, long rightOffset, final long length) { | ||
| int i = 0; | ||
|
|
||
| // check if stars align and we can get both offsets to be aligned | ||
| // check if starts align and we can get both offsets to be aligned | ||
| if ((leftOffset % 8) == (rightOffset % 8)) { | ||
| while ((leftOffset + i) % 8 != 0 && i < length) { | ||
| if (Platform.getByte(leftBase, leftOffset + i) != | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,6 @@ | |
|
|
||
| package org.apache.spark.unsafe.array; | ||
|
|
||
| import org.apache.spark.unsafe.Platform; | ||
| import org.apache.spark.unsafe.memory.MemoryBlock; | ||
|
|
||
| /** | ||
|
|
@@ -33,16 +32,12 @@ public final class LongArray { | |
| private static final long WIDTH = 8; | ||
|
|
||
| private final MemoryBlock memory; | ||
| private final Object baseObj; | ||
| private final long baseOffset; | ||
|
|
||
| private final long length; | ||
|
|
||
| public LongArray(MemoryBlock memory) { | ||
| assert memory.size() < (long) Integer.MAX_VALUE * 8: "Array size >= Integer.MAX_VALUE elements"; | ||
| this.memory = memory; | ||
| this.baseObj = memory.getBaseObject(); | ||
| this.baseOffset = memory.getBaseOffset(); | ||
| this.length = memory.size() / WIDTH; | ||
| } | ||
|
|
||
|
|
@@ -51,11 +46,11 @@ public MemoryBlock memoryBlock() { | |
| } | ||
|
|
||
| public Object getBaseObject() { | ||
| return baseObj; | ||
| return memory.getBaseObject(); | ||
| } | ||
|
|
||
| public long getBaseOffset() { | ||
| return baseOffset; | ||
| return memory.getBaseOffset(); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -69,8 +64,9 @@ public long size() { | |
| * Fill this all with 0L. | ||
| */ | ||
| public void zeroOut() { | ||
| long baseOffset = memory.getBaseOffset(); | ||
| for (long off = baseOffset; off < baseOffset + length * WIDTH; off += WIDTH) { | ||
|
||
| Platform.putLong(baseObj, off, 0); | ||
| memory.putLong(off, 0); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -80,7 +76,7 @@ public void zeroOut() { | |
| public void set(int index, long value) { | ||
| assert index >= 0 : "index (" + index + ") should >= 0"; | ||
| assert index < length : "index (" + index + ") should < length (" + length + ")"; | ||
| Platform.putLong(baseObj, baseOffset + index * WIDTH, value); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. update it to use 0-based offset. |
||
| memory.putLong(memory.getBaseOffset() + index * WIDTH, value); | ||
|
||
| } | ||
|
|
||
| /** | ||
|
|
@@ -89,6 +85,6 @@ public void set(int index, long value) { | |
| public long get(int index) { | ||
| assert index >= 0 : "index (" + index + ") should >= 0"; | ||
| assert index < length : "index (" + index + ") should < length (" + length + ")"; | ||
| return Platform.getLong(baseObj, baseOffset + index * WIDTH); | ||
| return memory.getLong(memory.getBaseOffset() + index * WIDTH); | ||
|
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| package org.apache.spark.unsafe.bitset; | ||
|
|
||
| import org.apache.spark.unsafe.Platform; | ||
| import org.apache.spark.unsafe.memory.MemoryBlock; | ||
|
||
|
|
||
| /** | ||
| * Methods for working with fixed-size uncompressed bitsets. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| package org.apache.spark.unsafe.hash; | ||
|
|
||
| import org.apache.spark.unsafe.Platform; | ||
| import org.apache.spark.unsafe.memory.MemoryBlock; | ||
|
|
||
| /** | ||
| * 32-bit Murmur3 hasher. This is based on Guava's Murmur3_32HashFunction. | ||
|
|
@@ -49,55 +50,81 @@ public static int hashInt(int input, int seed) { | |
| } | ||
|
|
||
| public int hashUnsafeWords(Object base, long offset, int lengthInBytes) { | ||
| return hashUnsafeWords(base, offset, lengthInBytes, seed); | ||
| return hashUnsafeWordsBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed); | ||
| } | ||
|
|
||
| public static int hashUnsafeWordsBlock(MemoryBlock base, int seed) { | ||
| // This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method. | ||
| int lengthInBytes = (int)base.size(); | ||
|
||
| assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)"; | ||
| int h1 = hashBytesByIntBlock(base, seed); | ||
| return fmix(h1, lengthInBytes); | ||
| } | ||
|
|
||
| public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) { | ||
| // This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method. | ||
| assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)"; | ||
|
||
| int h1 = hashBytesByInt(base, offset, lengthInBytes, seed); | ||
| int h1 = hashBytesByIntBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed); | ||
|
||
| return fmix(h1, lengthInBytes); | ||
|
||
| } | ||
|
|
||
| public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) { | ||
| public static int hashUnsafeBytesBlock(MemoryBlock base, int seed) { | ||
| // This is not compatible with original and another implementations. | ||
| // But remain it for backward compatibility for the components existing before 2.3. | ||
| long offset = base.getBaseOffset(); | ||
|
||
| int lengthInBytes = (int)base.size(); | ||
| assert (lengthInBytes >= 0): "lengthInBytes cannot be negative"; | ||
| int lengthAligned = lengthInBytes - lengthInBytes % 4; | ||
| int h1 = hashBytesByInt(base, offset, lengthAligned, seed); | ||
| int h1 = hashBytesByIntBlock(base.subBlock(0, lengthAligned), seed); | ||
| for (int i = lengthAligned; i < lengthInBytes; i++) { | ||
| int halfWord = Platform.getByte(base, offset + i); | ||
| int halfWord = base.getByte(offset + i); | ||
| int k1 = mixK1(halfWord); | ||
| h1 = mixH1(h1, k1); | ||
| } | ||
| return fmix(h1, lengthInBytes); | ||
| } | ||
|
|
||
| public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) { | ||
| return hashUnsafeBytesBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed); | ||
| } | ||
|
|
||
| public static int hashUnsafeBytes2(Object base, long offset, int lengthInBytes, int seed) { | ||
| return hashUnsafeBytes2Block(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed); | ||
| } | ||
|
|
||
| public static int hashUnsafeBytes2Block(MemoryBlock base, int seed) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't we just call
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
| // This is compatible with original and another implementations. | ||
| // Use this method for new components after Spark 2.3. | ||
| assert (lengthInBytes >= 0): "lengthInBytes cannot be negative"; | ||
| long offset = base.getBaseOffset(); | ||
| int lengthInBytes = (int)base.size(); | ||
| assert (lengthInBytes >= 0) : "lengthInBytes cannot be negative"; | ||
| int lengthAligned = lengthInBytes - lengthInBytes % 4; | ||
| int h1 = hashBytesByInt(base, offset, lengthAligned, seed); | ||
| int h1 = hashBytesByIntBlock(base.subBlock(0, lengthAligned), seed); | ||
| int k1 = 0; | ||
| for (int i = lengthAligned, shift = 0; i < lengthInBytes; i++, shift += 8) { | ||
| k1 ^= (Platform.getByte(base, offset + i) & 0xFF) << shift; | ||
| k1 ^= (base.getByte(offset + i) & 0xFF) << shift; | ||
| } | ||
| h1 ^= mixK1(k1); | ||
| return fmix(h1, lengthInBytes); | ||
| } | ||
|
|
||
| private static int hashBytesByInt(Object base, long offset, int lengthInBytes, int seed) { | ||
| private static int hashBytesByIntBlock(MemoryBlock base, int seed) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't we just call
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
| long offset = base.getBaseOffset(); | ||
| int lengthInBytes = (int)base.size(); | ||
| assert (lengthInBytes % 4 == 0); | ||
| int h1 = seed; | ||
| for (int i = 0; i < lengthInBytes; i += 4) { | ||
| int halfWord = Platform.getInt(base, offset + i); | ||
| int halfWord = base.getInt(offset + i); | ||
| int k1 = mixK1(halfWord); | ||
| h1 = mixH1(h1, k1); | ||
| } | ||
| return h1; | ||
| } | ||
|
|
||
| private static int hashBytesByInt(Object base, long offset, int lengthInBytes, int seed) { | ||
| return hashBytesByIntBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed); | ||
| } | ||
|
|
||
| public int hashLong(long input) { | ||
| return hashLong(input, seed); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.unsafe.memory; | ||
|
|
||
| import org.apache.spark.unsafe.Platform; | ||
|
|
||
| /** | ||
| * A consecutive block of memory with a byte array on Java heap. | ||
| */ | ||
| public final class ByteArrayMemoryBlock extends MemoryBlock { | ||
|
|
||
| private final byte[] array; | ||
|
|
||
| public ByteArrayMemoryBlock(byte[] obj, long offset, long length) { | ||
|
||
| super(obj, offset, (long)length); | ||
| this.array = obj; | ||
| } | ||
|
|
||
| public ByteArrayMemoryBlock(long length) { | ||
| this(new byte[(int)length], Platform.BYTE_ARRAY_OFFSET, length); | ||
|
||
| } | ||
|
|
||
| @Override | ||
| public MemoryBlock subBlock(long offset, long size) { | ||
| return new ByteArrayMemoryBlock(array, this.offset + offset, size); | ||
| } | ||
|
|
||
| public byte[] getByteArray() { return array; } | ||
|
|
||
| /** | ||
| * Creates a memory block pointing to the memory used by the byte array. | ||
| */ | ||
| public static ByteArrayMemoryBlock fromArray(final byte[] array) { | ||
| return new ByteArrayMemoryBlock(array, Platform.BYTE_ARRAY_OFFSET, array.length); | ||
| } | ||
|
|
||
| @Override | ||
| public final int getInt(long offset) { | ||
| return Platform.getInt(array, offset); | ||
| } | ||
|
|
||
| @Override | ||
| public final void putInt(long offset, int value) { | ||
| Platform.putInt(array, offset, value); | ||
| } | ||
|
|
||
| @Override | ||
| public final boolean getBoolean(long offset) { | ||
| return Platform.getBoolean(array, offset); | ||
| } | ||
|
|
||
| @Override | ||
| public final void putBoolean(long offset, boolean value) { | ||
| Platform.putBoolean(array, offset, value); | ||
| } | ||
|
|
||
| @Override | ||
| public final byte getByte(long offset) { | ||
| return array[(int)(offset - Platform.BYTE_ARRAY_OFFSET)]; | ||
| } | ||
|
|
||
| @Override | ||
| public final void putByte(long offset, byte value) { | ||
| array[(int)(offset - Platform.BYTE_ARRAY_OFFSET)] = value; | ||
| } | ||
|
|
||
| @Override | ||
| public final short getShort(long offset) { | ||
| return Platform.getShort(array, offset); | ||
| } | ||
|
|
||
| @Override | ||
| public final void putShort(long offset, short value) { | ||
| Platform.putShort(array, offset, value); | ||
| } | ||
|
|
||
| @Override | ||
| public final long getLong(long offset) { | ||
| return Platform.getLong(array, offset); | ||
| } | ||
|
|
||
| @Override | ||
| public final void putLong(long offset, long value) { | ||
| Platform.putLong(array, offset, value); | ||
| } | ||
|
|
||
| @Override | ||
| public final float getFloat(long offset) { | ||
| return Platform.getFloat(array, offset); | ||
| } | ||
|
|
||
| @Override | ||
| public final void putFloat(long offset, float value) { | ||
| Platform.putFloat(array, offset, value); | ||
| } | ||
|
|
||
| @Override | ||
| public final double getDouble(long offset) { | ||
| return Platform.getDouble(array, offset); | ||
| } | ||
|
|
||
| @Override | ||
| public final void putDouble(long offset, double value) { | ||
| Platform.putDouble(array, offset, value); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
overflow check?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
or we just use long here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let us use
long.