From 511a59edeb77f297ce3fa81a1a0268e28b022bd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 20 Oct 2025 21:28:42 +0300 Subject: [PATCH 01/20] Implement -s SINGLE_FILE_BINARY_ENCODE=1 option to embed Wasm binary as binary-encoded form instead of base64 form in SINGLE_FILE mode. Continuation of #21478. --- src/preamble.js | 29 +++++++++++++++++++++-------- src/settings.js | 7 +++++++ test/test_codesize.py | 3 +++ test/test_other.py | 7 +++++-- tools/link.py | 41 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 76 insertions(+), 11 deletions(-) diff --git a/src/preamble.js b/src/preamble.js index bd6aec1cf9924..e9bb82af6fac5 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -410,6 +410,10 @@ function instrumentWasmTableWithAbort() { } #endif +#if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE && !WASM2JS +#include "binaryDecode.js" +#endif + #if !SOURCE_PHASE_IMPORTS && !WASM_ESM_INTEGRATION var wasmBinaryFile; @@ -423,27 +427,35 @@ function getWasmBinary(file) {} #else function findWasmBinary() { -#if SINGLE_FILE +#if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE && !WASM2JS + return binaryDecode('<<< WASM_BINARY_DATA >>>'); +#elif SINGLE_FILE return base64Decode('<<< WASM_BINARY_DATA >>>'); +#elif AUDIO_WORKLET || !EXPORT_ES6 + // For an Audio Worklet, we cannot use `new URL()`. + return locateFile('{{{ WASM_BINARY_FILE }}}'); #else -#if EXPORT_ES6 && !AUDIO_WORKLET - if (Module['locateFile']) { -#endif - return locateFile('{{{ WASM_BINARY_FILE }}}'); -#if EXPORT_ES6 && !AUDIO_WORKLET // For an Audio Worklet, we cannot use `new URL()`. - } + #if ENVIRONMENT_MAY_BE_SHELL if (ENVIRONMENT_IS_SHELL) { return '{{{ WASM_BINARY_FILE }}}'; } #endif + + if (Module['locateFile']) { + return locateFile('{{{ WASM_BINARY_FILE }}}'); + } + // Use bundler-friendly `new URL(..., import.meta.url)` pattern; works in browsers too. return new URL('{{{ WASM_BINARY_FILE }}}', import.meta.url).href; -#endif + #endif } function getBinarySync(file) { +#if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE + return file; +#else #if SINGLE_FILE if (ArrayBuffer.isView(file)) { return file; @@ -464,6 +476,7 @@ function getBinarySync(file) { #else throw 'sync fetching of the wasm failed: you can preload it to Module["wasmBinary"] manually, or emcc.py will do that for you when generating HTML (but not JS)'; #endif +#endif } async function getWasmBinary(binaryFile) { diff --git a/src/settings.js b/src/settings.js index 7624d4a25fdc1..665838ef67125 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1853,6 +1853,13 @@ var WASMFS = false; // [link] var SINGLE_FILE = false; +// If true, binary Wasm content is encoded using a custom UTF-8 embedding +// instead of base64. This generates a smaller binary that compresses well. +// Set this to false to revert back to earlier base64 encoding if you run into +// issues with the binary encoding. (and please let us know of any such issues) +// [link] +var SINGLE_FILE_BINARY_ENCODE = true; + // If set to 1, all JS libraries will be automatically available at link time. // This gets set to 0 in STRICT mode (or with MINIMAL_RUNTIME) which mean you // need to explicitly specify -lfoo.js in at link time in order to access diff --git a/test/test_codesize.py b/test/test_codesize.py index 7ca92e364236d..66350df60f596 100644 --- a/test/test_codesize.py +++ b/test/test_codesize.py @@ -43,6 +43,7 @@ class codesize(RunnerCore): 'random_printf_wasm2js': ('random_printf', True), 'hello_webgl_wasm': ('hello_webgl', False), 'hello_webgl_wasm2js': ('hello_webgl', True), + 'hello_webgl2_wasm_singlefile': ('hello_webgl2_wasm_singlefile', False), 'hello_webgl2_wasm': ('hello_webgl2', False), 'hello_webgl2_wasm2js': ('hello_webgl2', True), 'math': ('math', False), @@ -87,6 +88,7 @@ def test_minimal_runtime_code_size(self, test_name, wasm2js, compare_js_output=F '-lGL', '-sMODULARIZE'] hello_webgl2_sources = hello_webgl_sources + ['-sMAX_WEBGL_VERSION=2'] + hello_webgl2_wasm_singlefile_sources = hello_webgl2_sources + ['-sSINGLE_FILE'] hello_wasm_worker_sources = [test_file('wasm_worker/wasm_worker_code_size.c'), '-sWASM_WORKERS', '-sENVIRONMENT=web'] audio_worklet_sources = [test_file('webaudio/audioworklet.c'), '-sWASM_WORKERS', '-sAUDIO_WORKLET', '-sENVIRONMENT=web', '-sTEXTDECODER=1'] embind_hello_sources = [test_file('codesize/embind_hello_world.cpp'), '-lembind'] @@ -98,6 +100,7 @@ def test_minimal_runtime_code_size(self, test_name, wasm2js, compare_js_output=F 'hello_webgl': hello_webgl_sources, 'math': math_sources, 'hello_webgl2': hello_webgl2_sources, + 'hello_webgl2_wasm_singlefile': hello_webgl2_wasm_singlefile_sources, 'hello_wasm_worker': hello_wasm_worker_sources, 'audio_worklet': audio_worklet_sources, 'embind_val': embind_val_sources, diff --git a/test/test_other.py b/test/test_other.py index 04be3622feefa..393a27fb813c9 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -9475,8 +9475,9 @@ def test_standalone_system_headers(self, prefix): 'closure': (False, True), }) @parameterized({ - '': (True,), - 'disabled': (False,), + '': (1,), + 'disabled': (0,), + 'binary_encode': (2,), }) def test_single_file(self, debug_enabled, closure_enabled, single_file_enabled): cmd = [EMCC, test_file('hello_world.c')] + self.get_cflags() @@ -9487,6 +9488,8 @@ def test_single_file(self, debug_enabled, closure_enabled, single_file_enabled): else: expect_wasm = self.is_wasm() + cmd += [f'-sSINGLE_FILE_BINARY_ENCODE={int(single_file_enabled == 2)}'] + if debug_enabled: cmd += ['-g'] if closure_enabled: diff --git a/tools/link.py b/tools/link.py index 181adf252807a..00cafd558bc57 100644 --- a/tools/link.py +++ b/tools/link.py @@ -163,6 +163,10 @@ def base64_encode(filename): return b64.decode('ascii') +def base64_or_binary_encode(b): + return binary_encode(b) if settings.SINGLE_FILE and settings.SINGLE_FILE_BINARY_ENCODE else base64_encode(b) + + def align_to_wasm_page_boundary(address): page_size = webassembly.WASM_PAGE_SIZE return ((address + (page_size - 1)) // page_size) * page_size @@ -2435,7 +2439,7 @@ def phase_binaryen(target, options, wasm_target): if final_js and settings.SINGLE_FILE and not settings.WASM2JS: js = read_file(final_js) - js = do_replace(js, '<<< WASM_BINARY_DATA >>>', base64_encode(wasm_target)) + js = do_replace(js, '<<< WASM_BINARY_DATA >>>', base64_or_binary_encode(wasm_target)) delete_file(wasm_target) write_file(final_js, js) @@ -2945,9 +2949,44 @@ def move_file(src, dst): shutil.move(src, dst) +def binary_encode(filename): + """This function encodes the given binary byte array to a UTF-8 string, by + first adding +1 to all the bytes [0, 255] to form values [1, 256], and then + encoding each of those values as UTF-8, except for specific byte values that + are escaped as two bytes. This kind of encoding results in a string that will + compress well by both gzip and brotli, unlike base64 encoding binary data + would do, and avoids emitting the null byte inside a string. + """ + + data = utils.read_binary(filename) + + out = bytearray(len(data) * 2) # Size output buffer conservatively + i = 0 + for d in data: + d += 1 # Offset all bytes up by +1 to make zero (a very common value) be encoded with only one byte as 0x01. This is possible since we can encode 255 as 0x100 in UTF-8. + if d == ord("'"): + buf = [ord('\\'), d] # Escape single quote ' character with a backspace since we are writing a string inside single quotes. (' -> 2 bytes) + elif d == ord('"'): + buf = [ord('\\'), d] # Escape double quote " character with a backspace since optimizer may turn the string into being delimited with double quotes. (" -> 2 bytes) + elif d == ord('\r'): + buf = [ord('\\'), ord('r')] # Escape carriage return 0x0D as \r -> 2 bytes + elif d == ord('\n'): + buf = [ord('\\'), ord('n')] # Escape newline 0x0A as \n -> 2 bytes + elif d == ord('\\'): + buf = [ord('\\'), ord('\\')] # Escape backslash \ as \\ -> 2 bytes + else: + buf = chr(d).encode('utf-8') # Otherwise write the original value encoded in UTF-8 (1 or 2 bytes). + for b in buf: # Write the bytes to output buffer + out[i] = b + i += 1 + return out[0:i].decode('utf-8') # Crop output buffer to the actual used size + + # Returns the subresource location for run-time access def get_subresource_location(path, mimetype='application/octet-stream'): if settings.SINGLE_FILE: + if settings.SINGLE_FILE_BINARY_ENCODE: + return binary_encode(utils.read_binary(path)) return f'data:{mimetype};base64,{base64_encode(path)}' else: return os.path.basename(path) From e577792229dab78e38f14bc44ce6ddc064ea6967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 20 Oct 2025 21:32:32 +0300 Subject: [PATCH 02/20] docs --- .../docs/tools_reference/settings_reference.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/site/source/docs/tools_reference/settings_reference.rst b/site/source/docs/tools_reference/settings_reference.rst index 85f053734d054..a68c4a5983295 100644 --- a/site/source/docs/tools_reference/settings_reference.rst +++ b/site/source/docs/tools_reference/settings_reference.rst @@ -2823,6 +2823,18 @@ then you can safely ignore this warning. Default value: false +.. _single_file_binary_encode: + +SINGLE_FILE_BINARY_ENCODE +========================= + +If true, binary Wasm content is encoded using a custom UTF-8 embedding +instead of base64. This generates a smaller binary that compresses well. +Set this to false to revert back to earlier base64 encoding if you run into +issues with the binary encoding. (and please let us know of any such issues) + +Default value: true + .. _auto_js_libraries: AUTO_JS_LIBRARIES From 88f3570fa130e3fd728d75c81c8738f3e240a574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 20 Oct 2025 23:20:52 +0300 Subject: [PATCH 03/20] Add test --- test/test_codesize.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/test_codesize.py b/test/test_codesize.py index 66350df60f596..695c1d869f300 100644 --- a/test/test_codesize.py +++ b/test/test_codesize.py @@ -414,3 +414,13 @@ def test_codesize_file_preload(self): def test_small_js_flags(self): self.emcc('browser_test_hello_world.c', ['-O3', '--closure=1', '-sINCOMING_MODULE_JS_API=[]', '-sENVIRONMENT=web', '--output-eol=linux']) self.check_output_sizes('a.out.js') + + # This test verifies that gzipped binary-encoded a SINGLE_FILE build results in a smaller size + # than gzipped base64-encoded version. + def test_binary_encode_is_smaller_than_base64_encode(self): + self.emcc('hello_world.c', ['-O2', '-sSINGLE_FILE', '-sSINGLE_FILE_BINARY_ENCODE']) + size_binary_encode = len(gzip.compress(read_binary('a.out.js'))) + self.emcc('hello_world.c', ['-O2', '-sSINGLE_FILE', '-sSINGLE_FILE_BINARY_ENCODE=0']) + size_base64 = len(gzip.compress(read_binary('a.out.js'))) + print(f'Binary encoded file size: {size_binary_encode}, base64 encoded file size: {size_base64}') + self.assertLess(size_binary_encode, size_base64) From 543554ed866438c2154ca816a7c724d4f0c20a26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 20 Oct 2025 23:22:15 +0300 Subject: [PATCH 04/20] Add missing file --- src/binaryDecode.js | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 src/binaryDecode.js diff --git a/src/binaryDecode.js b/src/binaryDecode.js new file mode 100644 index 0000000000000..a35a737781998 --- /dev/null +++ b/src/binaryDecode.js @@ -0,0 +1,10 @@ +// Prevent Closure from minifying the binaryDecode() function, or otherwise +// Closure may analyze through the WASM_BINARY_DATA placeholder string into this +// function, leading into incorrect results. +/** @noinline */ +function binaryDecode(bin) { + for(var i = 0, l = bin.length, o = new Uint8Array(l); i < l; ++i) { + o[i] = bin.charCodeAt(i) - 1; + } + return o; +} From 0f8fce35fdf44d9009c575ea370a7c12ce7300c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Mon, 20 Oct 2025 23:24:00 +0300 Subject: [PATCH 05/20] Use booleans --- test/test_other.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_other.py b/test/test_other.py index 393a27fb813c9..7ed31f1d1e804 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -9475,11 +9475,11 @@ def test_standalone_system_headers(self, prefix): 'closure': (False, True), }) @parameterized({ - '': (1,), - 'disabled': (0,), - 'binary_encode': (2,), + '': (True,False), + 'disabled': (False,False), + 'binary_encode': (True,True), }) - def test_single_file(self, debug_enabled, closure_enabled, single_file_enabled): + def test_single_file(self, debug_enabled, closure_enabled, single_file_enabled, single_file_binary_encoded): cmd = [EMCC, test_file('hello_world.c')] + self.get_cflags() if single_file_enabled: @@ -9488,7 +9488,7 @@ def test_single_file(self, debug_enabled, closure_enabled, single_file_enabled): else: expect_wasm = self.is_wasm() - cmd += [f'-sSINGLE_FILE_BINARY_ENCODE={int(single_file_enabled == 2)}'] + cmd += [f'-sSINGLE_FILE_BINARY_ENCODE={int(single_file_binary_encoded)}'] if debug_enabled: cmd += ['-g'] From 091f2070df52208762c2b7c881e8cd0d69340d34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 00:24:10 +0300 Subject: [PATCH 06/20] Add code size file. --- ...inimal_runtime_code_size_hello_webgl2_wasm_singlefile.json | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 test/codesize/test_minimal_runtime_code_size_hello_webgl2_wasm_singlefile.json diff --git a/test/codesize/test_minimal_runtime_code_size_hello_webgl2_wasm_singlefile.json b/test/codesize/test_minimal_runtime_code_size_hello_webgl2_wasm_singlefile.json new file mode 100644 index 0000000000000..9db742f95a315 --- /dev/null +++ b/test/codesize/test_minimal_runtime_code_size_hello_webgl2_wasm_singlefile.json @@ -0,0 +1,4 @@ +{ + "a.html": 15321, + "a.html.gz": 9207 +} From a8e4cbfda6bca585b154627f046dc3ad273e3f27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 00:26:35 +0300 Subject: [PATCH 07/20] Add comment about option becoming permanent in the future --- site/source/docs/tools_reference/settings_reference.rst | 2 ++ src/settings.js | 2 ++ 2 files changed, 4 insertions(+) diff --git a/site/source/docs/tools_reference/settings_reference.rst b/site/source/docs/tools_reference/settings_reference.rst index a68c4a5983295..c55d47022860a 100644 --- a/site/source/docs/tools_reference/settings_reference.rst +++ b/site/source/docs/tools_reference/settings_reference.rst @@ -2832,6 +2832,8 @@ If true, binary Wasm content is encoded using a custom UTF-8 embedding instead of base64. This generates a smaller binary that compresses well. Set this to false to revert back to earlier base64 encoding if you run into issues with the binary encoding. (and please let us know of any such issues) +If no issues arise, this option will permanently become the default in the +future. Default value: true diff --git a/src/settings.js b/src/settings.js index 665838ef67125..a6019134fe63d 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1857,6 +1857,8 @@ var SINGLE_FILE = false; // instead of base64. This generates a smaller binary that compresses well. // Set this to false to revert back to earlier base64 encoding if you run into // issues with the binary encoding. (and please let us know of any such issues) +// If no issues arise, this option will permanently become the default in the +// future. // [link] var SINGLE_FILE_BINARY_ENCODE = true; From c3515f30d7235a75a3ff3766a156ad6c7e9f5a8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 00:26:50 +0300 Subject: [PATCH 08/20] whitespace --- src/binaryDecode.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/binaryDecode.js b/src/binaryDecode.js index a35a737781998..5706354a5a85b 100644 --- a/src/binaryDecode.js +++ b/src/binaryDecode.js @@ -3,7 +3,7 @@ // function, leading into incorrect results. /** @noinline */ function binaryDecode(bin) { - for(var i = 0, l = bin.length, o = new Uint8Array(l); i < l; ++i) { + for (var i = 0, l = bin.length, o = new Uint8Array(l); i < l; ++i) { o[i] = bin.charCodeAt(i) - 1; } return o; From 046c5ba69772dff982fce31f79142a4f3f4616b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 00:32:21 +0300 Subject: [PATCH 09/20] Add binary encoding to minimal runtime. --- src/postamble_minimal.js | 4 +++- src/preamble.js | 4 ---- src/runtime_common.js | 4 ++++ test/test_other.py | 1 + 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/postamble_minimal.js b/src/postamble_minimal.js index 8a76698156e0c..f1d71f9337900 100644 --- a/src/postamble_minimal.js +++ b/src/postamble_minimal.js @@ -149,7 +149,9 @@ function initRuntime(wasmExports) { // Initialize wasm (asynchronous) -#if SINGLE_FILE && WASM == 1 && !WASM2JS +#if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE && !WASM2JS +Module['wasm'] = binaryDecode('<<< WASM_BINARY_DATA >>>'); +#elif SINGLE_FILE && WASM == 1 && !WASM2JS Module['wasm'] = base64Decode('<<< WASM_BINARY_DATA >>>'); #endif diff --git a/src/preamble.js b/src/preamble.js index e9bb82af6fac5..ac283097ae209 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -410,10 +410,6 @@ function instrumentWasmTableWithAbort() { } #endif -#if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE && !WASM2JS -#include "binaryDecode.js" -#endif - #if !SOURCE_PHASE_IMPORTS && !WASM_ESM_INTEGRATION var wasmBinaryFile; diff --git a/src/runtime_common.js b/src/runtime_common.js index 6110d611ab6e8..93523d4e12ecd 100644 --- a/src/runtime_common.js +++ b/src/runtime_common.js @@ -27,6 +27,10 @@ function growMemViews() { #include "runtime_asan.js" #endif +#if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE && !WASM2JS +#include "binaryDecode.js" +#endif + #if MODULARIZE var readyPromiseResolve, readyPromiseReject; #endif diff --git a/test/test_other.py b/test/test_other.py index 7ed31f1d1e804..559db3a9f23de 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -9469,6 +9469,7 @@ def test_standalone_system_headers(self, prefix): @is_slow_test @also_with_wasm2js + @also_with_minimal_runtime @parameterized({ '': (False, False), 'debug': (True, False), From e9c0d0e9f2c8f7333abcb443ef2b2de70ba9cdbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 00:35:00 +0300 Subject: [PATCH 10/20] eslint --- eslint.config.mjs | 1 + 1 file changed, 1 insertion(+) diff --git a/eslint.config.mjs b/eslint.config.mjs index fa52f28f45ce5..9cdd1157b2c66 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -27,6 +27,7 @@ export default [{ '**/test/', 'src/polyfill/', 'src/lib/', + 'src/binaryDecode.js', 'src/minimum_runtime_check.js', 'src/runtime_*.js', 'src/shell*.js', From dbb0ed7b9420fd7af37eda3648396ba470cd4d90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 00:47:03 +0300 Subject: [PATCH 11/20] Optimize encoding. --- src/postamble_minimal.js | 2 +- src/preamble.js | 2 +- tools/link.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/postamble_minimal.js b/src/postamble_minimal.js index f1d71f9337900..9472ec752e9f8 100644 --- a/src/postamble_minimal.js +++ b/src/postamble_minimal.js @@ -150,7 +150,7 @@ function initRuntime(wasmExports) { // Initialize wasm (asynchronous) #if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE && !WASM2JS -Module['wasm'] = binaryDecode('<<< WASM_BINARY_DATA >>>'); +Module['wasm'] = binaryDecode("<<< WASM_BINARY_DATA >>>"); #elif SINGLE_FILE && WASM == 1 && !WASM2JS Module['wasm'] = base64Decode('<<< WASM_BINARY_DATA >>>'); #endif diff --git a/src/preamble.js b/src/preamble.js index ac283097ae209..abccc0e0e2751 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -424,7 +424,7 @@ function getWasmBinary(file) {} function findWasmBinary() { #if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE && !WASM2JS - return binaryDecode('<<< WASM_BINARY_DATA >>>'); + return binaryDecode("<<< WASM_BINARY_DATA >>>"); #elif SINGLE_FILE return base64Decode('<<< WASM_BINARY_DATA >>>'); #elif AUDIO_WORKLET || !EXPORT_ES6 diff --git a/tools/link.py b/tools/link.py index 00cafd558bc57..63947624708a7 100644 --- a/tools/link.py +++ b/tools/link.py @@ -2964,10 +2964,10 @@ def binary_encode(filename): i = 0 for d in data: d += 1 # Offset all bytes up by +1 to make zero (a very common value) be encoded with only one byte as 0x01. This is possible since we can encode 255 as 0x100 in UTF-8. - if d == ord("'"): - buf = [ord('\\'), d] # Escape single quote ' character with a backspace since we are writing a string inside single quotes. (' -> 2 bytes) - elif d == ord('"'): - buf = [ord('\\'), d] # Escape double quote " character with a backspace since optimizer may turn the string into being delimited with double quotes. (" -> 2 bytes) + if d == ord('"'): + # Escape double quote " character with a backspace since we are writing the binary string inside double quotes. + # Also closure optimizer will turn the string into being delimited with double quotes, even if it were single quotes to start with. (" -> 2 bytes) + buf = [ord('\\'), d] elif d == ord('\r'): buf = [ord('\\'), ord('r')] # Escape carriage return 0x0D as \r -> 2 bytes elif d == ord('\n'): From 0783a62a5544075bb5aeca2a7b2e96007c24bef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 00:49:30 +0300 Subject: [PATCH 12/20] Review --- tools/link.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/link.py b/tools/link.py index 63947624708a7..1a1e47bf9126b 100644 --- a/tools/link.py +++ b/tools/link.py @@ -164,7 +164,7 @@ def base64_encode(filename): def base64_or_binary_encode(b): - return binary_encode(b) if settings.SINGLE_FILE and settings.SINGLE_FILE_BINARY_ENCODE else base64_encode(b) + return binary_encode(b) if settings.SINGLE_FILE_BINARY_ENCODE else base64_encode(b) def align_to_wasm_page_boundary(address): From 5f8df5f7ff7d3dc46e2f7ea2c0c417d6fb2b6419 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 00:51:46 +0300 Subject: [PATCH 13/20] Inline binaryDecode.js --- src/binaryDecode.js | 10 ---------- src/runtime_common.js | 11 ++++++++++- 2 files changed, 10 insertions(+), 11 deletions(-) delete mode 100644 src/binaryDecode.js diff --git a/src/binaryDecode.js b/src/binaryDecode.js deleted file mode 100644 index 5706354a5a85b..0000000000000 --- a/src/binaryDecode.js +++ /dev/null @@ -1,10 +0,0 @@ -// Prevent Closure from minifying the binaryDecode() function, or otherwise -// Closure may analyze through the WASM_BINARY_DATA placeholder string into this -// function, leading into incorrect results. -/** @noinline */ -function binaryDecode(bin) { - for (var i = 0, l = bin.length, o = new Uint8Array(l); i < l; ++i) { - o[i] = bin.charCodeAt(i) - 1; - } - return o; -} diff --git a/src/runtime_common.js b/src/runtime_common.js index 93523d4e12ecd..26f02185719d4 100644 --- a/src/runtime_common.js +++ b/src/runtime_common.js @@ -28,7 +28,16 @@ function growMemViews() { #endif #if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE && !WASM2JS -#include "binaryDecode.js" +// Prevent Closure from minifying the binaryDecode() function, or otherwise +// Closure may analyze through the WASM_BINARY_DATA placeholder string into this +// function, leading into incorrect results. +/** @noinline */ +function binaryDecode(bin) { + for (var i = 0, l = bin.length, o = new Uint8Array(l); i < l; ++i) { + o[i] = bin.charCodeAt(i) - 1; + } + return o; +} #endif #if MODULARIZE From f65117890dba30f748c831dedc16c03783356d9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 01:13:16 +0300 Subject: [PATCH 14/20] Rebaseline code size tests --- .../test_codesize_file_preload.expected.js | 1 + .../test_codesize_hello_single_file.json | 4 +-- .../test_codesize_minimal_O0.expected.js | 3 +- ...ode_size_hello_webgl2_wasm_singlefile.json | 4 +-- ..._runtime_code_size_random_printf_wasm.json | 4 +-- test/codesize/test_unoptimized_code_size.json | 28 +++++++++---------- 6 files changed, 23 insertions(+), 21 deletions(-) diff --git a/test/codesize/test_codesize_file_preload.expected.js b/test/codesize/test_codesize_file_preload.expected.js index c4e879be679a1..5e7c1b52b32aa 100644 --- a/test/codesize/test_codesize_file_preload.expected.js +++ b/test/codesize/test_codesize_file_preload.expected.js @@ -391,6 +391,7 @@ function postRun() {} var wasmBinaryFile; function findWasmBinary() { + // For an Audio Worklet, we cannot use `new URL()`. return locateFile("a.out.wasm"); } diff --git a/test/codesize/test_codesize_hello_single_file.json b/test/codesize/test_codesize_hello_single_file.json index b216a17b1a986..8ff0ea14ecf13 100644 --- a/test/codesize/test_codesize_hello_single_file.json +++ b/test/codesize/test_codesize_hello_single_file.json @@ -1,6 +1,6 @@ { - "a.out.js": 6497, - "a.out.js.gz": 3579, + "a.out.js": 5394, + "a.out.js.gz": 2992, "sent": [ "a (fd_write)" ] diff --git a/test/codesize/test_codesize_minimal_O0.expected.js b/test/codesize/test_codesize_minimal_O0.expected.js index 52577c13e659f..ded5d518ed430 100644 --- a/test/codesize/test_codesize_minimal_O0.expected.js +++ b/test/codesize/test_codesize_minimal_O0.expected.js @@ -585,7 +585,8 @@ function createExportWrapper(name, nargs) { var wasmBinaryFile; function findWasmBinary() { - return locateFile('a.out.wasm'); + // For an Audio Worklet, we cannot use `new URL()`. + return locateFile('a.out.wasm'); } function getBinarySync(file) { diff --git a/test/codesize/test_minimal_runtime_code_size_hello_webgl2_wasm_singlefile.json b/test/codesize/test_minimal_runtime_code_size_hello_webgl2_wasm_singlefile.json index 9db742f95a315..cc29e72f2cb69 100644 --- a/test/codesize/test_minimal_runtime_code_size_hello_webgl2_wasm_singlefile.json +++ b/test/codesize/test_minimal_runtime_code_size_hello_webgl2_wasm_singlefile.json @@ -1,4 +1,4 @@ { - "a.html": 15321, - "a.html.gz": 9207 + "a.html": 15176, + "a.html.gz": 9126 } diff --git a/test/codesize/test_minimal_runtime_code_size_random_printf_wasm.json b/test/codesize/test_minimal_runtime_code_size_random_printf_wasm.json index 3576098483a87..776ec42e8b6f9 100644 --- a/test/codesize/test_minimal_runtime_code_size_random_printf_wasm.json +++ b/test/codesize/test_minimal_runtime_code_size_random_printf_wasm.json @@ -1,4 +1,4 @@ { - "a.html": 12507, - "a.html.gz": 6823 + "a.html": 10998, + "a.html.gz": 5758 } diff --git a/test/codesize/test_unoptimized_code_size.json b/test/codesize/test_unoptimized_code_size.json index ae60057550c37..c43770e166873 100644 --- a/test/codesize/test_unoptimized_code_size.json +++ b/test/codesize/test_unoptimized_code_size.json @@ -1,16 +1,16 @@ { - "hello_world.js": 56592, - "hello_world.js.gz": 17608, - "hello_world.wasm": 15119, - "hello_world.wasm.gz": 7444, - "no_asserts.js": 26634, - "no_asserts.js.gz": 8883, - "no_asserts.wasm": 12219, - "no_asserts.wasm.gz": 6005, - "strict.js": 54607, - "strict.js.gz": 16955, - "strict.wasm": 15119, - "strict.wasm.gz": 7442, - "total": 180290, - "total_gz": 64337 + "hello_world.js": 56644, + "hello_world.js.gz": 17635, + "hello_world.wasm": 15127, + "hello_world.wasm.gz": 7450, + "no_asserts.js": 26686, + "no_asserts.js.gz": 8912, + "no_asserts.wasm": 12227, + "no_asserts.wasm.gz": 6010, + "strict.js": 54659, + "strict.js.gz": 16981, + "strict.wasm": 15127, + "strict.wasm.gz": 7447, + "total": 180470, + "total_gz": 64435 } From 1c26859c1b4c4e4b668174588a48e0c36425652c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 01:17:44 +0300 Subject: [PATCH 15/20] code size --- test/codesize/test_unoptimized_code_size.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/codesize/test_unoptimized_code_size.json b/test/codesize/test_unoptimized_code_size.json index c43770e166873..238652ec3a337 100644 --- a/test/codesize/test_unoptimized_code_size.json +++ b/test/codesize/test_unoptimized_code_size.json @@ -1,16 +1,16 @@ { - "hello_world.js": 56644, - "hello_world.js.gz": 17635, + "hello_world.js": 56908, + "hello_world.js.gz": 17654, "hello_world.wasm": 15127, "hello_world.wasm.gz": 7450, "no_asserts.js": 26686, "no_asserts.js.gz": 8912, "no_asserts.wasm": 12227, "no_asserts.wasm.gz": 6010, - "strict.js": 54659, - "strict.js.gz": 16981, + "strict.js": 54923, + "strict.js.gz": 16995, "strict.wasm": 15127, "strict.wasm.gz": 7447, - "total": 180470, - "total_gz": 64435 + "total": 180998, + "total_gz": 64468 } From 88cd3e86c4d6e867b5a3537346d34ed2707aab16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 01:22:27 +0300 Subject: [PATCH 16/20] Move comment back to end of line --- src/preamble.js | 3 +-- test/codesize/test_unoptimized_code_size.json | 16 ++++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/preamble.js b/src/preamble.js index abccc0e0e2751..c9002009f05d2 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -427,8 +427,7 @@ function findWasmBinary() { return binaryDecode("<<< WASM_BINARY_DATA >>>"); #elif SINGLE_FILE return base64Decode('<<< WASM_BINARY_DATA >>>'); -#elif AUDIO_WORKLET || !EXPORT_ES6 - // For an Audio Worklet, we cannot use `new URL()`. +#elif AUDIO_WORKLET || !EXPORT_ES6 // For an Audio Worklet, we cannot use `new URL()`. return locateFile('{{{ WASM_BINARY_FILE }}}'); #else diff --git a/test/codesize/test_unoptimized_code_size.json b/test/codesize/test_unoptimized_code_size.json index 238652ec3a337..ab225faab6899 100644 --- a/test/codesize/test_unoptimized_code_size.json +++ b/test/codesize/test_unoptimized_code_size.json @@ -1,16 +1,16 @@ { - "hello_world.js": 56908, - "hello_world.js.gz": 17654, + "hello_world.js": 56854, + "hello_world.js.gz": 17626, "hello_world.wasm": 15127, "hello_world.wasm.gz": 7450, - "no_asserts.js": 26686, - "no_asserts.js.gz": 8912, + "no_asserts.js": 26632, + "no_asserts.js.gz": 8884, "no_asserts.wasm": 12227, "no_asserts.wasm.gz": 6010, - "strict.js": 54923, - "strict.js.gz": 16995, + "strict.js": 54869, + "strict.js.gz": 16968, "strict.wasm": 15127, "strict.wasm.gz": 7447, - "total": 180998, - "total_gz": 64468 + "total": 180836, + "total_gz": 64385 } From 8a279103e94c5d43d9e1dca7e0f9c550b2769c5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 01:58:15 +0300 Subject: [PATCH 17/20] code size --- test/codesize/test_codesize_file_preload.expected.js | 1 - test/codesize/test_codesize_minimal_O0.expected.js | 1 - 2 files changed, 2 deletions(-) diff --git a/test/codesize/test_codesize_file_preload.expected.js b/test/codesize/test_codesize_file_preload.expected.js index 5e7c1b52b32aa..c4e879be679a1 100644 --- a/test/codesize/test_codesize_file_preload.expected.js +++ b/test/codesize/test_codesize_file_preload.expected.js @@ -391,7 +391,6 @@ function postRun() {} var wasmBinaryFile; function findWasmBinary() { - // For an Audio Worklet, we cannot use `new URL()`. return locateFile("a.out.wasm"); } diff --git a/test/codesize/test_codesize_minimal_O0.expected.js b/test/codesize/test_codesize_minimal_O0.expected.js index 107f5563e1884..16279522e87cb 100644 --- a/test/codesize/test_codesize_minimal_O0.expected.js +++ b/test/codesize/test_codesize_minimal_O0.expected.js @@ -585,7 +585,6 @@ function createExportWrapper(name, nargs) { var wasmBinaryFile; function findWasmBinary() { - // For an Audio Worklet, we cannot use `new URL()`. return locateFile('a.out.wasm'); } From 6ddaa5693fe3a961466feb9da973530592025ed4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 02:22:01 +0300 Subject: [PATCH 18/20] code size --- test/codesize/test_unoptimized_code_size.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/codesize/test_unoptimized_code_size.json b/test/codesize/test_unoptimized_code_size.json index ab225faab6899..edcbe6ecd75bd 100644 --- a/test/codesize/test_unoptimized_code_size.json +++ b/test/codesize/test_unoptimized_code_size.json @@ -11,6 +11,6 @@ "strict.js.gz": 16968, "strict.wasm": 15127, "strict.wasm.gz": 7447, - "total": 180836, + "total": 180812, "total_gz": 64385 } From bddafdf0d3b029794892a03a17df990eabcb5677 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 03:14:26 +0300 Subject: [PATCH 19/20] Automatic rebaseline of codesize expectations. NFC This is an automatic change generated by tools/maint/rebaseline_tests.py. The following (1) test expectation files were updated by running the tests with `--rebaseline`: ``` codesize/test_unoptimized_code_size.json: 180812 => 180812 [+0 bytes / +0.00%] Average change: +0.00% (+0.00% - +0.00%) ``` --- test/codesize/test_unoptimized_code_size.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/codesize/test_unoptimized_code_size.json b/test/codesize/test_unoptimized_code_size.json index edcbe6ecd75bd..64aaf7fad2324 100644 --- a/test/codesize/test_unoptimized_code_size.json +++ b/test/codesize/test_unoptimized_code_size.json @@ -1,16 +1,16 @@ { "hello_world.js": 56854, "hello_world.js.gz": 17626, - "hello_world.wasm": 15127, - "hello_world.wasm.gz": 7450, + "hello_world.wasm": 15119, + "hello_world.wasm.gz": 7444, "no_asserts.js": 26632, "no_asserts.js.gz": 8884, - "no_asserts.wasm": 12227, - "no_asserts.wasm.gz": 6010, + "no_asserts.wasm": 12219, + "no_asserts.wasm.gz": 6005, "strict.js": 54869, "strict.js.gz": 16968, - "strict.wasm": 15127, - "strict.wasm.gz": 7447, + "strict.wasm": 15119, + "strict.wasm.gz": 7442, "total": 180812, - "total_gz": 64385 + "total_gz": 64369 } From a4144de47753b04ca2992f99f43243ea209c6823 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 21 Oct 2025 15:39:40 +0300 Subject: [PATCH 20/20] Fix proxy_to_worker --- src/binaryDecode.js | 10 ++++++++++ src/proxyClient.js | 13 ++++++++++++- src/runtime_common.js | 11 +---------- tools/link.py | 2 +- 4 files changed, 24 insertions(+), 12 deletions(-) create mode 100644 src/binaryDecode.js diff --git a/src/binaryDecode.js b/src/binaryDecode.js new file mode 100644 index 0000000000000..5706354a5a85b --- /dev/null +++ b/src/binaryDecode.js @@ -0,0 +1,10 @@ +// Prevent Closure from minifying the binaryDecode() function, or otherwise +// Closure may analyze through the WASM_BINARY_DATA placeholder string into this +// function, leading into incorrect results. +/** @noinline */ +function binaryDecode(bin) { + for (var i = 0, l = bin.length, o = new Uint8Array(l); i < l; ++i) { + o[i] = bin.charCodeAt(i) - 1; + } + return o; +} diff --git a/src/proxyClient.js b/src/proxyClient.js index 0a3049e2c4f7e..3d14a4ba06960 100644 --- a/src/proxyClient.js +++ b/src/proxyClient.js @@ -132,7 +132,18 @@ var frameId = 0; // Worker -var filename = '<<< filename >>>'; +var filename = "<<< filename >>>"; + +#if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE +#include "binaryDecode.js" + +#if ENVIRONMENT_MAY_BE_NODE +if (ENVIRONMENT_IS_NODE) filename = "data:text/javascript;base64," + Buffer.from(binaryDecode(filename)).toString('base64'); +else +#endif + filename = URL.createObjectURL(new Blob([binaryDecode(filename)], {type: 'application/javascript'})); + +#endif var worker = new Worker(filename); diff --git a/src/runtime_common.js b/src/runtime_common.js index 26f02185719d4..93523d4e12ecd 100644 --- a/src/runtime_common.js +++ b/src/runtime_common.js @@ -28,16 +28,7 @@ function growMemViews() { #endif #if SINGLE_FILE && SINGLE_FILE_BINARY_ENCODE && !WASM2JS -// Prevent Closure from minifying the binaryDecode() function, or otherwise -// Closure may analyze through the WASM_BINARY_DATA placeholder string into this -// function, leading into incorrect results. -/** @noinline */ -function binaryDecode(bin) { - for (var i = 0, l = bin.length, o = new Uint8Array(l); i < l; ++i) { - o[i] = bin.charCodeAt(i) - 1; - } - return o; -} +#include "binaryDecode.js" #endif #if MODULARIZE diff --git a/tools/link.py b/tools/link.py index 1a1e47bf9126b..542e7dbd052e4 100644 --- a/tools/link.py +++ b/tools/link.py @@ -2986,7 +2986,7 @@ def binary_encode(filename): def get_subresource_location(path, mimetype='application/octet-stream'): if settings.SINGLE_FILE: if settings.SINGLE_FILE_BINARY_ENCODE: - return binary_encode(utils.read_binary(path)) + return binary_encode(path) return f'data:{mimetype};base64,{base64_encode(path)}' else: return os.path.basename(path)