Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Current Trunk
- Add a new `BinaryenModuleReadWithFeatures` function to the C API that allows
to configure which features to enable in the parser.
- The build-time option to use legacy WasmGC opcodes is removed.
- The strings in `string.const` instructions must now be valid WTF-8.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this not a limitation for anyone we know?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It shouldn't be, no. Before this change, most invalid WTF-8 would end up littered with replacement characters in the output anyway, and certainly no one wants that.


v117
----
Expand Down
3 changes: 0 additions & 3 deletions scripts/fuzz_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,6 @@ def is_git_repo():
'exception-handling.wast',
'translate-to-new-eh.wast',
'rse-eh.wast',
# Non-UTF8 strings trap in V8, and have limitations in our interpreter
'string-lowering.wast',
'precompute-strings.wast',
]


Expand Down
8 changes: 7 additions & 1 deletion src/binaryen-c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "pass.h"
#include "shell-interface.h"
#include "support/colors.h"
#include "support/string.h"
#include "wasm-binary.h"
#include "wasm-builder.h"
#include "wasm-interpreter.h"
Expand Down Expand Up @@ -1895,8 +1896,13 @@ BinaryenExpressionRef BinaryenStringNew(BinaryenModuleRef module,
}
BinaryenExpressionRef BinaryenStringConst(BinaryenModuleRef module,
const char* name) {
// Re-encode from WTF-8 to WTF-16.
std::stringstream wtf16;
[[maybe_unused]] bool valid = String::convertWTF8ToWTF16(wtf16, name);
assert(valid);
// TODO: Use wtf16.view() once we have C++20.
return static_cast<Expression*>(
Builder(*(Module*)module).makeStringConst(name));
Builder(*(Module*)module).makeStringConst(wtf16.str()));
}
BinaryenExpressionRef BinaryenStringMeasure(BinaryenModuleRef module,
BinaryenOp op,
Expand Down
2 changes: 1 addition & 1 deletion src/literal.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class Literal {
assert(type.isSignature());
}
explicit Literal(std::shared_ptr<GCData> gcData, HeapType type);
explicit Literal(std::string string);
explicit Literal(std::string_view string);
Literal(const Literal& other);
Literal& operator=(const Literal& other);
~Literal();
Expand Down
9 changes: 8 additions & 1 deletion src/parser/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "lexer.h"
#include "support/name.h"
#include "support/result.h"
#include "support/string.h"
#include "wasm-builder.h"
#include "wasm-ir-builder.h"
#include "wasm.h"
Expand Down Expand Up @@ -2491,7 +2492,13 @@ struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx> {
Result<> makeStringConst(Index pos,
const std::vector<Annotation>& annotations,
std::string_view str) {
return withLoc(pos, irBuilder.makeStringConst(Name(str)));
// Re-encode from WTF-8 to WTF-16.
std::stringstream wtf16;
if (!String::convertWTF8ToWTF16(wtf16, str)) {
return in.err(pos, "invalid string constant");
}
// TODO: Use wtf16.view() once we have C++20.
return withLoc(pos, irBuilder.makeStringConst(wtf16.str()));
}

Result<> makeStringMeasure(Index pos,
Expand Down
21 changes: 2 additions & 19 deletions src/parser/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <variant>

#include "lexer.h"
#include "support/string.h"

using namespace std::string_view_literals;

Expand Down Expand Up @@ -308,25 +309,7 @@ struct LexStrCtx : LexCtx {
if ((0xd800 <= u && u < 0xe000) || 0x110000 <= u) {
return false;
}
if (u < 0x80) {
// 0xxxxxxx
*escapeBuilder << uint8_t(u);
} else if (u < 0x800) {
// 110xxxxx 10xxxxxx
*escapeBuilder << uint8_t(0b11000000 | ((u >> 6) & 0b00011111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 0) & 0b00111111));
} else if (u < 0x10000) {
// 1110xxxx 10xxxxxx 10xxxxxx
*escapeBuilder << uint8_t(0b11100000 | ((u >> 12) & 0b00001111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 6) & 0b00111111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 0) & 0b00111111));
} else {
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*escapeBuilder << uint8_t(0b11110000 | ((u >> 18) & 0b00000111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 12) & 0b00111111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 6) & 0b00111111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 0) & 0b00111111));
}
String::writeWTF8CodePoint(*escapeBuilder, u);
return true;
}
};
Expand Down
8 changes: 7 additions & 1 deletion src/passes/Print.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2232,7 +2232,13 @@ struct PrintExpressionContents
}
void visitStringConst(StringConst* curr) {
printMedium(o, "string.const ");
String::printEscaped(o, curr->string.str);
// Re-encode from WTF-16 to WTF-8.
std::stringstream wtf8;
[[maybe_unused]] bool valid =
String::convertWTF16ToWTF8(wtf8, curr->string.str);
assert(valid);
// TODO: Use wtf8.view() once we have C++20.
String::printEscaped(o, wtf8.str());
}
void visitStringMeasure(StringMeasure* curr) {
switch (curr->op) {
Expand Down
8 changes: 7 additions & 1 deletion src/passes/StringLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,14 @@ struct StringGathering : public Pass {
}

auto& string = strings[i];
// Re-encode from WTF-16 to WTF-8 to make the name easier to read.
std::stringstream wtf8;
[[maybe_unused]] bool valid =
String::convertWTF16ToWTF8(wtf8, string.str);
assert(valid);
// TODO: Use wtf8.view() once we have C++20.
auto name = Names::getValidGlobalName(
*module, std::string("string.const_") + std::string(string.str));
*module, std::string("string.const_") + std::string(wtf8.str()));
globalName = name;
newNames.insert(name);
auto* stringConst = builder.makeStringConst(string);
Expand Down
7 changes: 6 additions & 1 deletion src/support/json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ namespace json {

void Value::stringify(std::ostream& os, bool pretty) {
if (isString()) {
wasm::String::printEscapedJSON(os, getCString());
std::stringstream wtf16;
[[maybe_unused]] bool valid =
wasm::String::convertWTF8ToWTF16(wtf16, getIString().str);
assert(valid);
// TODO: Use wtf16.view() once we have C++20.
wasm::String::printEscapedJSON(os, wtf16.str());
} else if (isArray()) {
os << '[';
auto first = true;
Expand Down
Loading