-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-19112][CORE] Support for ZStandard codec #18805
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
287a9da
295f38a
ea7f4f6
029a753
0525307
2580633
eba3024
95e6b8b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ package org.apache.spark.io | |
| import java.io._ | ||
| import java.util.Locale | ||
|
|
||
| import com.github.luben.zstd.{ZstdInputStream, ZstdOutputStream} | ||
| import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream} | ||
| import net.jpountz.lz4.LZ4BlockOutputStream | ||
| import org.xerial.snappy.{Snappy, SnappyInputStream, SnappyOutputStream} | ||
|
|
@@ -50,13 +51,14 @@ private[spark] object CompressionCodec { | |
|
|
||
| private[spark] def supportsConcatenationOfSerializedStreams(codec: CompressionCodec): Boolean = { | ||
| (codec.isInstanceOf[SnappyCompressionCodec] || codec.isInstanceOf[LZFCompressionCodec] | ||
| || codec.isInstanceOf[LZ4CompressionCodec]) | ||
| || codec.isInstanceOf[LZ4CompressionCodec] || codec.isInstanceOf[ZStandardCompressionCodec]) | ||
| } | ||
|
|
||
| private val shortCompressionCodecNames = Map( | ||
| "lz4" -> classOf[LZ4CompressionCodec].getName, | ||
| "lzf" -> classOf[LZFCompressionCodec].getName, | ||
| "snappy" -> classOf[SnappyCompressionCodec].getName) | ||
| "snappy" -> classOf[SnappyCompressionCodec].getName, | ||
| "zstd" -> classOf[SnappyCompressionCodec].getName) | ||
|
|
||
| def getCodecName(conf: SparkConf): String = { | ||
| conf.get(configKey, DEFAULT_COMPRESSION_CODEC) | ||
|
|
@@ -216,3 +218,30 @@ private final class SnappyOutputStreamWrapper(os: SnappyOutputStream) extends Ou | |
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * :: DeveloperApi :: | ||
| * ZStandard implementation of [[org.apache.spark.io.CompressionCodec]]. | ||
|
||
| * | ||
| * @note The wire protocol for this codec is not guaranteed to be compatible across versions | ||
| * of Spark. This is intended for use as an internal compression utility within a single Spark | ||
| * application. | ||
| */ | ||
| @DeveloperApi | ||
| class ZStandardCompressionCodec(conf: SparkConf) extends CompressionCodec { | ||
|
|
||
| override def compressedOutputStream(s: OutputStream): OutputStream = { | ||
| val level = conf.getSizeAsBytes("spark.io.compression.zstandard.level", "1").toInt | ||
|
||
| val compressionBuffer = conf.getSizeAsBytes("spark.io.compression.lz4.blockSize", "32k").toInt | ||
|
||
| // Wrap the zstd output stream in a buffered output stream, so that we can | ||
| // avoid overhead excessive of JNI call while trying to compress small amount of data. | ||
| new BufferedOutputStream(new ZstdOutputStream(s, level), compressionBuffer) | ||
| } | ||
|
|
||
| override def compressedInputStream(s: InputStream): InputStream = { | ||
| val compressionBuffer = conf.getSizeAsBytes("spark.io.compression.lz4.blockSize", "32k").toInt | ||
| // Wrap the zstd input stream in a buffered input stream so that we can | ||
| // avoid overhead excessive of JNI call while trying to uncompress small amount of data. | ||
| new BufferedInputStream(new ZstdInputStream(s), compressionBuffer) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you mean
ZStandardCompressionCodec?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, my bad. Fixed it.