From 99f79ba70a69bf7bcee822c0e950d867beba99f0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 14 Mar 2024 16:18:05 -0700 Subject: [PATCH 1/5] work --- src/wasm-interpreter.h | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 46e5c175288..551e0094644 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -2039,7 +2039,44 @@ class ExpressionRunner : public OverriddenVisitor { return Flow(NONCONSTANT_FLOW); } Flow visitStringSliceWTF(StringSliceWTF* curr) { - return Flow(NONCONSTANT_FLOW); + // For now we only support JS-style strings. + if (curr->op != StringSliceWTF16) { + return Flow(NONCONSTANT_FLOW); + } + + Flow ref = visit(curr->ref); + if (ref.breaking()) { + return ref; + } + Flow start = visit(curr->start); + if (start.breaking()) { + return start; + } + Flow end = visit(curr->end); + if (end.breaking()) { + return end; + } + + auto refData = ref.getSingleValue().getGCData(); + if (!refData) { + trap("null ref"); + } + auto& refValues = refData->values; + auto startVal = start.getSingleValue().getUnsigned(); + auto endVal = end.getSingleValue().getUnsigned(); + if (endVal > refValues.size()) { + trap("array oob"); + } + Literals contents; + if (endVal > startVal) { + contents.reserve(endVal - startVal); + for (size_t i = startVal; i < endVal; i++) { + if (i < refValues.size()) { + contents.push_back(refValues[i]); + } + } + } + return makeGCData(contents, curr->type); } Flow visitStringSliceIter(StringSliceIter* curr) { return Flow(NONCONSTANT_FLOW); From 4ef0c05ee9a6542d5ba9e59541bdaa4089323511 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 14 Mar 2024 16:33:47 -0700 Subject: [PATCH 2/5] test --- test/lit/exec/strings.wast | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/lit/exec/strings.wast b/test/lit/exec/strings.wast index 706602b1ce5..5ff8ce47a87 100644 --- a/test/lit/exec/strings.wast +++ b/test/lit/exec/strings.wast @@ -244,6 +244,17 @@ ) ) ) + + ;; CHECK: [fuzz-exec] calling slice + ;; CHECK-NEXT: [fuzz-exec] note result: slice => string("def") + (func $slice (export "slice") (result (ref string)) + ;; Slicing [3:6] here should definitely output "def". + (stringview_wtf16.slice + (string.const "abcdefgh") + (i32.const 3) + (i32.const 6) + ) + ) ) ;; CHECK: [fuzz-exec] calling new_wtf16_array ;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello") @@ -309,6 +320,9 @@ ;; CHECK-NEXT: [LoggingExternalInterface logging 98] ;; CHECK-NEXT: [LoggingExternalInterface logging 99] ;; CHECK-NEXT: [LoggingExternalInterface logging 0] + +;; CHECK: [fuzz-exec] calling slice +;; CHECK-NEXT: [fuzz-exec] note result: slice => string("def") ;; CHECK-NEXT: [fuzz-exec] comparing compare.1 ;; CHECK-NEXT: [fuzz-exec] comparing compare.10 ;; CHECK-NEXT: [fuzz-exec] comparing compare.2 @@ -329,3 +343,4 @@ ;; CHECK-NEXT: [fuzz-exec] comparing get_codeunit ;; CHECK-NEXT: [fuzz-exec] comparing get_length ;; CHECK-NEXT: [fuzz-exec] comparing new_wtf16_array +;; CHECK-NEXT: [fuzz-exec] comparing slice From fd52789951acd9078353e38897fe79abc49d97f2 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 19 Mar 2024 09:57:24 -0700 Subject: [PATCH 3/5] fix --- src/wasm-interpreter.h | 14 +++++++++++--- test/lit/passes/precompute-strings.wast | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 0ec7cd89093..ccb56aba81c 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1903,7 +1903,12 @@ class ExpressionRunner : public OverriddenVisitor { return Literal(curr->string.toString()); } - bool hasNonAsciiUpTo(const Literals& values, Index end) { + // Returns if there is a non-ascii character in a list of values, looking only + // up to an index that is provided (not inclusive). If the index is not + // provided we look in the entire list. + bool hasNonAsciiUpTo(const Literals& values, + std::optional maybeEnd = std::nullopt) { + Index end = maybeEnd ? *maybeEnd : values.size(); for (Index i = 0; i < end; ++i) { if (uint32_t(values[i].geti32()) > 127) { return true; @@ -1930,7 +1935,7 @@ class ExpressionRunner : public OverriddenVisitor { // This is only correct if all the bytes stored in `values` correspond to // single unicode code points. See `visitStringWTF16Get` for details. - if (hasNonAsciiUpTo(data->values, data->values.size())) { + if (hasNonAsciiUpTo(data->values)) { return Flow(NONCONSTANT_FLOW); } @@ -1998,7 +2003,7 @@ class ExpressionRunner : public OverriddenVisitor { } // We don't handle non-ascii code points correctly yet. - if (hasNonAsciiUpTo(refValues, refValues.size())) { + if (hasNonAsciiUpTo(refValues)) { return Flow(NONCONSTANT_FLOW); } @@ -2166,6 +2171,9 @@ class ExpressionRunner : public OverriddenVisitor { if (endVal > refValues.size()) { trap("array oob"); } + if (hasNonAsciiUpTo(refValues, endVal)) { + return Flow(NONCONSTANT_FLOW); + } Literals contents; if (endVal > startVal) { contents.reserve(endVal - startVal); diff --git a/test/lit/passes/precompute-strings.wast b/test/lit/passes/precompute-strings.wast index aa138b289c0..6868336eb8b 100644 --- a/test/lit/passes/precompute-strings.wast +++ b/test/lit/passes/precompute-strings.wast @@ -129,4 +129,23 @@ (i32.const 0) ) ) + + (func $slice (export "slice") (result (ref string)) + ;; Slicing [3:6] here should definitely output "def". + (stringview_wtf16.slice + (string.const "abcdefgh") + (i32.const 3) + (i32.const 6) + ) + ) + + (func $slice-bad (export "slice") (result (ref string)) + ;; This slice contains non-ascii, so we do not optimize. + (stringview_wtf16.slice + ;; abcd£fgh + (string.const "abcd\C2\A3fgh") + (i32.const 3) + (i32.const 6) + ) + ) ) From bc1aafbdfac41ff47fd91ff33086926f5a301097 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 19 Mar 2024 10:00:52 -0700 Subject: [PATCH 4/5] test --- test/lit/passes/precompute-strings.wast | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/test/lit/passes/precompute-strings.wast b/test/lit/passes/precompute-strings.wast index 6868336eb8b..f704306481a 100644 --- a/test/lit/passes/precompute-strings.wast +++ b/test/lit/passes/precompute-strings.wast @@ -130,6 +130,9 @@ ) ) + ;; CHECK: (func $slice (type $1) (result (ref string)) + ;; CHECK-NEXT: (string.const "def") + ;; CHECK-NEXT: ) (func $slice (export "slice") (result (ref string)) ;; Slicing [3:6] here should definitely output "def". (stringview_wtf16.slice @@ -139,7 +142,14 @@ ) ) - (func $slice-bad (export "slice") (result (ref string)) + ;; CHECK: (func $slice-bad (type $1) (result (ref string)) + ;; CHECK-NEXT: (stringview_wtf16.slice + ;; CHECK-NEXT: (string.const "abcd\c2\a3fgh") + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $slice-bad (export "slice-bad") (result (ref string)) ;; This slice contains non-ascii, so we do not optimize. (stringview_wtf16.slice ;; abcd£fgh From 695badded77c74a9f620c74b2a9bc600ab9b61a4 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 19 Mar 2024 11:37:30 -0700 Subject: [PATCH 5/5] fuzzer.skip --- scripts/fuzz_opt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 8bef4e98c8c..683c2bed718 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -332,8 +332,9 @@ def is_git_repo(): 'exception-handling.wast', 'translate-eh-old-to-new.wast', 'rse-eh.wast', - # Non-UTF8 strings trap in V8 + # Non-UTF8 strings trap in V8, and have limitations in our interpreter 'string-lowering.wast', + 'precompute-strings.wast', ]