-
-
Notifications
You must be signed in to change notification settings - Fork 34.2k
buffer: optimize hex parsing. #7602
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| 'use strict'; | ||
|
|
||
| const common = require('../common.js'); | ||
|
|
||
| const bench = common.createBenchmark(main, { | ||
| len: [0, 1, 64, 1024], | ||
| n: [1e7] | ||
| }); | ||
|
|
||
| function main(conf) { | ||
| const len = conf.len | 0; | ||
| const n = conf.n | 0; | ||
| const buf = Buffer.alloc(len); | ||
| var i, b; | ||
|
|
||
| for (i = 0; i < buf.length; i++) | ||
| buf[i] = i & 0xff; | ||
|
|
||
| const hex = buf.toString('hex'); | ||
|
|
||
| bench.start(); | ||
|
|
||
| for (i = 0; i < n; i += 1) | ||
| b = Buffer.from(hex, 'hex'); | ||
|
|
||
| bench.end(n); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -143,16 +143,27 @@ const int8_t unbase64_table[256] = | |
| }; | ||
|
|
||
|
|
||
| template <typename TypeName> | ||
| unsigned hex2bin(TypeName c) { | ||
| if (c >= '0' && c <= '9') | ||
| return c - '0'; | ||
| if (c >= 'A' && c <= 'F') | ||
| return 10 + (c - 'A'); | ||
| if (c >= 'a' && c <= 'f') | ||
| return 10 + (c - 'a'); | ||
| return static_cast<unsigned>(-1); | ||
| } | ||
| const int8_t unhex_table[256] = | ||
| { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, | ||
| -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | ||
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 | ||
| }; | ||
|
|
||
| #define unhex(x) \ | ||
| static_cast<uint8_t>(unhex_table[static_cast<uint8_t>(x)]) | ||
|
|
||
|
|
||
| template <typename TypeName> | ||
|
|
@@ -162,11 +173,11 @@ size_t hex_decode(char* buf, | |
| const size_t srcLen) { | ||
| size_t i; | ||
| for (i = 0; i < len && i * 2 + 1 < srcLen; ++i) { | ||
| unsigned a = hex2bin(src[i * 2 + 0]); | ||
| unsigned b = hex2bin(src[i * 2 + 1]); | ||
| unsigned a = unhex(src[i * 2 + 0]); | ||
| unsigned b = unhex(src[i * 2 + 1]); | ||
| if (!~a || !~b) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this still work with (aka we need more tests to cover these cases?)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @addaleax, good catch. I didn't notice the type difference. It looks like you're right. Applying this patch: diff --git a/src/string_bytes.cc b/src/string_bytes.cc
index c216c5d..cb0e78a 100644
--- a/src/string_bytes.cc
+++ b/src/string_bytes.cc
@@ -173,9 +173,9 @@ size_t hex_decode(char* buf,
const size_t srcLen) {
size_t i;
for (i = 0; i < len && i * 2 + 1 < srcLen; ++i) {
- unsigned a = unhex(src[i * 2 + 0]);
- unsigned b = unhex(src[i * 2 + 1]);
- if (!~a || !~b)
+ uint8_t a = unhex(src[i * 2 + 0]);
+ uint8_t b = unhex(src[i * 2 + 1]);
+ if ((a | b) & 0x80)
return i;
buf[i] = (a << 4) | b;
}Kills some of the gained perf, but large hex strings are still faster: I'll see what else I can do, and probably add a test for bad hex strings. |
||
| return i; | ||
| buf[i] = a * 16 + b; | ||
| buf[i] = (a << 4) | b; | ||
| } | ||
|
|
||
| return i; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
static const int8_t?