|
3 | 3 | package s2 |
4 | 4 |
|
5 | 5 | import ( |
| 6 | + "bytes" |
6 | 7 | "math/bits" |
7 | | - |
8 | | - "github.com/klauspost/compress/snappy" |
9 | 8 | ) |
10 | 9 |
|
11 | | -// EncodeSnappy returns the encoded form of src. The returned slice may be a sub- |
12 | | -// slice of dst if dst was large enough to hold the entire encoded block. |
13 | | -// Otherwise, a newly allocated slice will be returned. |
14 | | -// |
15 | | -// The output is Snappy compatible and will likely decompress faster. |
16 | | -// |
17 | | -// The dst and src must not overlap. It is valid to pass a nil dst. |
18 | | -// |
19 | | -// The blocks will require the same amount of memory to decode as encoding, |
20 | | -// and does not make for concurrent decoding. |
21 | | -// Also note that blocks do not contain CRC information, so corruption may be undetected. |
22 | | -// |
23 | | -// If you need to encode larger amounts of data, consider using |
24 | | -// the streaming interface which gives all of these features. |
25 | | -func EncodeSnappy(dst, src []byte) []byte { |
26 | | - return snappy.Encode(dst, src) |
27 | | -} |
28 | | - |
29 | 10 | // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It |
30 | 11 | // assumes that the varint-encoded length of the decompressed bytes has already |
31 | 12 | // been written. |
@@ -188,6 +169,65 @@ func emitCopy(dst []byte, offset, length int) int { |
188 | 169 | return 2 |
189 | 170 | } |
190 | 171 |
|
| 172 | +// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written. |
| 173 | +// |
| 174 | +// It assumes that: |
| 175 | +// dst is long enough to hold the encoded bytes |
| 176 | +// 1 <= offset && offset <= math.MaxUint32 |
| 177 | +// 4 <= length && length <= 1 << 24 |
| 178 | +func emitCopyNoRepeat(dst []byte, offset, length int) int { |
| 179 | + if offset >= 65536 { |
| 180 | + i := 0 |
| 181 | + if length > 64 { |
| 182 | + // Emit a length 64 copy, encoded as 5 bytes. |
| 183 | + dst[4] = uint8(offset >> 24) |
| 184 | + dst[3] = uint8(offset >> 16) |
| 185 | + dst[2] = uint8(offset >> 8) |
| 186 | + dst[1] = uint8(offset) |
| 187 | + dst[0] = 63<<2 | tagCopy4 |
| 188 | + length -= 64 |
| 189 | + if length >= 4 { |
| 190 | + // Emit remaining as repeats |
| 191 | + return 5 + emitCopyNoRepeat(dst[5:], offset, length) |
| 192 | + } |
| 193 | + i = 5 |
| 194 | + } |
| 195 | + if length == 0 { |
| 196 | + return i |
| 197 | + } |
| 198 | + // Emit a copy, offset encoded as 4 bytes. |
| 199 | + dst[i+0] = uint8(length-1)<<2 | tagCopy4 |
| 200 | + dst[i+1] = uint8(offset) |
| 201 | + dst[i+2] = uint8(offset >> 8) |
| 202 | + dst[i+3] = uint8(offset >> 16) |
| 203 | + dst[i+4] = uint8(offset >> 24) |
| 204 | + return i + 5 |
| 205 | + } |
| 206 | + |
| 207 | + // Offset no more than 2 bytes. |
| 208 | + if length > 64 { |
| 209 | + // Emit a length 60 copy, encoded as 3 bytes. |
| 210 | + // Emit remaining as repeat value (minimum 4 bytes). |
| 211 | + dst[2] = uint8(offset >> 8) |
| 212 | + dst[1] = uint8(offset) |
| 213 | + dst[0] = 59<<2 | tagCopy2 |
| 214 | + length -= 60 |
| 215 | + // Emit remaining as repeats, at least 4 bytes remain. |
| 216 | + return 3 + emitCopyNoRepeat(dst[3:], offset, length) |
| 217 | + } |
| 218 | + if length >= 12 || offset >= 2048 { |
| 219 | + // Emit the remaining copy, encoded as 3 bytes. |
| 220 | + dst[2] = uint8(offset >> 8) |
| 221 | + dst[1] = uint8(offset) |
| 222 | + dst[0] = uint8(length-1)<<2 | tagCopy2 |
| 223 | + return 3 |
| 224 | + } |
| 225 | + // Emit the remaining copy, encoded as 2 bytes. |
| 226 | + dst[1] = uint8(offset) |
| 227 | + dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 |
| 228 | + return 2 |
| 229 | +} |
| 230 | + |
191 | 231 | // matchLen returns how many bytes match in a and b |
192 | 232 | // |
193 | 233 | // It assumes that: |
@@ -223,3 +263,188 @@ func matchLen(a []byte, b []byte) int { |
223 | 263 | } |
224 | 264 | return len(a) + checked |
225 | 265 | } |
| 266 | + |
| 267 | +func encodeBlockSnappy(dst, src []byte) (d int) { |
| 268 | + // Initialize the hash table. |
| 269 | + const ( |
| 270 | + tableBits = 14 |
| 271 | + maxTableSize = 1 << tableBits |
| 272 | + ) |
| 273 | + |
| 274 | + var table [maxTableSize]uint32 |
| 275 | + |
| 276 | + // sLimit is when to stop looking for offset/length copies. The inputMargin |
| 277 | + // lets us use a fast path for emitLiteral in the main loop, while we are |
| 278 | + // looking for copies. |
| 279 | + sLimit := len(src) - inputMargin |
| 280 | + |
| 281 | + // Bail if we can't compress to at least this. |
| 282 | + dstLimit := len(src) - len(src)>>5 - 5 |
| 283 | + |
| 284 | + // nextEmit is where in src the next emitLiteral should start from. |
| 285 | + nextEmit := 0 |
| 286 | + |
| 287 | + // The encoded form must start with a literal, as there are no previous |
| 288 | + // bytes to copy, so we start looking for hash matches at s == 1. |
| 289 | + s := 1 |
| 290 | + cv := load64(src, s) |
| 291 | + |
| 292 | + // We search for a repeat at -1, but don't output repeats when nextEmit == 0 |
| 293 | + repeat := 1 |
| 294 | + |
| 295 | + for { |
| 296 | + candidate := 0 |
| 297 | + for { |
| 298 | + // Next src position to check |
| 299 | + nextS := s + (s-nextEmit)>>6 + 4 |
| 300 | + if nextS > sLimit { |
| 301 | + goto emitRemainder |
| 302 | + } |
| 303 | + hash0 := hash6(cv, tableBits) |
| 304 | + hash1 := hash6(cv>>8, tableBits) |
| 305 | + candidate = int(table[hash0]) |
| 306 | + candidate2 := int(table[hash1]) |
| 307 | + table[hash0] = uint32(s) |
| 308 | + table[hash1] = uint32(s + 1) |
| 309 | + hash2 := hash6(cv>>16, tableBits) |
| 310 | + |
| 311 | + // Check repeat at offset checkRep. |
| 312 | + const checkRep = 1 |
| 313 | + if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { |
| 314 | + base := s + checkRep |
| 315 | + // Extend back |
| 316 | + for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { |
| 317 | + i-- |
| 318 | + base-- |
| 319 | + } |
| 320 | + d += emitLiteral(dst[d:], src[nextEmit:base]) |
| 321 | + |
| 322 | + // Extend forward |
| 323 | + candidate := s - repeat + 4 + checkRep |
| 324 | + s += 4 + checkRep |
| 325 | + for s <= sLimit { |
| 326 | + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { |
| 327 | + s += bits.TrailingZeros64(diff) >> 3 |
| 328 | + break |
| 329 | + } |
| 330 | + s += 8 |
| 331 | + candidate += 8 |
| 332 | + } |
| 333 | + |
| 334 | + d += emitCopyNoRepeat(dst[d:], repeat, s-base) |
| 335 | + nextEmit = s |
| 336 | + if s >= sLimit { |
| 337 | + goto emitRemainder |
| 338 | + } |
| 339 | + |
| 340 | + cv = load64(src, s) |
| 341 | + continue |
| 342 | + } |
| 343 | + |
| 344 | + if uint32(cv) == load32(src, candidate) { |
| 345 | + break |
| 346 | + } |
| 347 | + candidate = int(table[hash2]) |
| 348 | + if uint32(cv>>8) == load32(src, candidate2) { |
| 349 | + table[hash2] = uint32(s + 2) |
| 350 | + candidate = candidate2 |
| 351 | + s++ |
| 352 | + break |
| 353 | + } |
| 354 | + table[hash2] = uint32(s + 2) |
| 355 | + if uint32(cv>>16) == load32(src, candidate) { |
| 356 | + s += 2 |
| 357 | + break |
| 358 | + } |
| 359 | + |
| 360 | + cv = load64(src, nextS) |
| 361 | + s = nextS |
| 362 | + } |
| 363 | + |
| 364 | + // Extend backwards |
| 365 | + for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { |
| 366 | + candidate-- |
| 367 | + s-- |
| 368 | + } |
| 369 | + |
| 370 | + // Bail if we exceed the maximum size. |
| 371 | + if d+(s-nextEmit) > dstLimit { |
| 372 | + return 0 |
| 373 | + } |
| 374 | + |
| 375 | + // A 4-byte match has been found. We'll later see if more than 4 bytes |
| 376 | + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit |
| 377 | + // them as literal bytes. |
| 378 | + |
| 379 | + d += emitLiteral(dst[d:], src[nextEmit:s]) |
| 380 | + |
| 381 | + // Call emitCopy, and then see if another emitCopy could be our next |
| 382 | + // move. Repeat until we find no match for the input immediately after |
| 383 | + // what was consumed by the last emitCopy call. |
| 384 | + // |
| 385 | + // If we exit this loop normally then we need to call emitLiteral next, |
| 386 | + // though we don't yet know how big the literal will be. We handle that |
| 387 | + // by proceeding to the next iteration of the main loop. We also can |
| 388 | + // exit this loop via goto if we get close to exhausting the input. |
| 389 | + for { |
| 390 | + // Invariant: we have a 4-byte match at s, and no need to emit any |
| 391 | + // literal bytes prior to s. |
| 392 | + base := s |
| 393 | + repeat = base - candidate |
| 394 | + |
| 395 | + // Extend the 4-byte match as long as possible. |
| 396 | + s += 4 |
| 397 | + candidate += 4 |
| 398 | + for s <= len(src)-8 { |
| 399 | + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { |
| 400 | + s += bits.TrailingZeros64(diff) >> 3 |
| 401 | + break |
| 402 | + } |
| 403 | + s += 8 |
| 404 | + candidate += 8 |
| 405 | + } |
| 406 | + |
| 407 | + d += emitCopyNoRepeat(dst[d:], repeat, s-base) |
| 408 | + if false { |
| 409 | + // Validate match. |
| 410 | + a := src[base:s] |
| 411 | + b := src[base-repeat : base-repeat+(s-base)] |
| 412 | + if !bytes.Equal(a, b) { |
| 413 | + panic("mismatch") |
| 414 | + } |
| 415 | + } |
| 416 | + |
| 417 | + nextEmit = s |
| 418 | + if s >= sLimit { |
| 419 | + goto emitRemainder |
| 420 | + } |
| 421 | + |
| 422 | + if d > dstLimit { |
| 423 | + // Do we have space for more, if not bail. |
| 424 | + return 0 |
| 425 | + } |
| 426 | + // Check for an immediate match, otherwise start search at s+1 |
| 427 | + x := load64(src, s-2) |
| 428 | + m2Hash := hash6(x, tableBits) |
| 429 | + currHash := hash6(x>>16, tableBits) |
| 430 | + candidate = int(table[currHash]) |
| 431 | + table[m2Hash] = uint32(s - 2) |
| 432 | + table[currHash] = uint32(s) |
| 433 | + if uint32(x>>16) != load32(src, candidate) { |
| 434 | + cv = load64(src, s+1) |
| 435 | + s++ |
| 436 | + break |
| 437 | + } |
| 438 | + } |
| 439 | + } |
| 440 | + |
| 441 | +emitRemainder: |
| 442 | + if nextEmit < len(src) { |
| 443 | + // Bail if we exceed the maximum size. |
| 444 | + if d+len(src)-nextEmit > dstLimit { |
| 445 | + return 0 |
| 446 | + } |
| 447 | + d += emitLiteral(dst[d:], src[nextEmit:]) |
| 448 | + } |
| 449 | + return d |
| 450 | +} |
0 commit comments