Skip to content

Commit 63092af

Browse files
committed
src: improve buffer.transcode performance
1 parent 20aff2b commit 63092af

File tree

1 file changed

+33
-42
lines changed

1 file changed

+33
-42
lines changed

src/node_i18n.cc

Lines changed: 33 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
#include "node_i18n.h"
4444
#include "node_external_reference.h"
45+
#include "simdutf.h"
4546

4647
#if defined(NODE_HAVE_I18N_SUPPORT)
4748

@@ -199,8 +200,7 @@ MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
199200
MaybeLocal<Object> ret;
200201
Converter to(toEncoding);
201202

202-
size_t sublen = ucnv_getMinCharSize(to.conv());
203-
std::string sub(sublen, '?');
203+
std::string sub(to.min_char_size(), '?');
204204
to.set_subst_chars(sub.c_str());
205205

206206
const size_t length_in_chars = source_length / sizeof(UChar);
@@ -222,25 +222,20 @@ MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
222222
const size_t source_length,
223223
UErrorCode* status) {
224224
*status = U_ZERO_ERROR;
225-
MaybeStackBuffer<UChar> destbuf;
226-
int32_t result_length;
227-
u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
228-
source, source_length, status);
229-
MaybeLocal<Object> ret;
230-
if (U_SUCCESS(*status)) {
231-
destbuf.SetLength(result_length);
232-
ret = ToBufferEndian(env, &destbuf);
233-
} else if (*status == U_BUFFER_OVERFLOW_ERROR) {
234-
*status = U_ZERO_ERROR;
235-
destbuf.AllocateSufficientStorage(result_length);
236-
u_strFromUTF8(*destbuf, result_length, &result_length,
237-
source, source_length, status);
238-
if (U_SUCCESS(*status)) {
239-
destbuf.SetLength(result_length);
240-
ret = ToBufferEndian(env, &destbuf);
241-
}
225+
size_t expected_utf16_length =
226+
simdutf::utf16_length_from_utf8(source, source_length);
227+
MaybeStackBuffer<UChar> destbuf(expected_utf16_length);
228+
auto actual_length =
229+
simdutf::convert_utf8_to_utf16(source, source_length, destbuf.out());
230+
231+
if (actual_length == 0) {
232+
*status = U_INVALID_CHAR_FOUND;
233+
return {};
242234
}
243-
return ret;
235+
236+
CHECK_EQ(actual_length, expected_utf16_length);
237+
238+
return Buffer::New(env, &destbuf);
244239
}
245240

246241
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
@@ -250,31 +245,27 @@ MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
250245
const size_t source_length,
251246
UErrorCode* status) {
252247
*status = U_ZERO_ERROR;
253-
MaybeLocal<Object> ret;
254248
const size_t length_in_chars = source_length / sizeof(UChar);
255-
int32_t result_length;
256-
MaybeStackBuffer<UChar> sourcebuf;
257-
MaybeStackBuffer<char> destbuf;
258-
CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
259-
u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
260-
*sourcebuf, length_in_chars, status);
261-
if (U_SUCCESS(*status)) {
262-
destbuf.SetLength(result_length);
263-
ret = ToBufferEndian(env, &destbuf);
264-
} else if (*status == U_BUFFER_OVERFLOW_ERROR) {
265-
*status = U_ZERO_ERROR;
266-
destbuf.AllocateSufficientStorage(result_length);
267-
u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
268-
length_in_chars, status);
269-
if (U_SUCCESS(*status)) {
270-
destbuf.SetLength(result_length);
271-
ret = ToBufferEndian(env, &destbuf);
272-
}
249+
size_t expected_utf8_length = simdutf::utf8_length_from_utf16(
250+
reinterpret_cast<const char16_t*>(source), length_in_chars);
251+
252+
MaybeStackBuffer<char> destbuf(expected_utf8_length);
253+
auto actual_length =
254+
simdutf::convert_utf16_to_utf8(reinterpret_cast<const char16_t*>(source),
255+
length_in_chars,
256+
destbuf.out());
257+
258+
if (actual_length == 0) {
259+
*status = U_INVALID_CHAR_FOUND;
260+
return {};
273261
}
274-
return ret;
262+
263+
CHECK_EQ(actual_length, expected_utf8_length);
264+
265+
return Buffer::New(env, &destbuf);
275266
}
276267

277-
const char* EncodingName(const enum encoding encoding) {
268+
constexpr const char* EncodingName(const enum encoding encoding) {
278269
switch (encoding) {
279270
case ASCII: return "us-ascii";
280271
case LATIN1: return "iso8859-1";
@@ -284,7 +275,7 @@ const char* EncodingName(const enum encoding encoding) {
284275
}
285276
}
286277

287-
bool SupportedEncoding(const enum encoding encoding) {
278+
constexpr bool SupportedEncoding(const enum encoding encoding) {
288279
switch (encoding) {
289280
case ASCII:
290281
case LATIN1:

0 commit comments

Comments
 (0)