-
Notifications
You must be signed in to change notification settings - Fork 5.3k
[RyuJIT Wasm] Initial implementation of cast operations #122301
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 7 commits
9288c02
85f2b80
7dbe5ff
124c8dd
eb34147
e3d59bc
71eb61b
3988828
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -275,6 +275,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) | |
| genCodeForConstant(treeNode); | ||
| break; | ||
|
|
||
| case GT_CAST: | ||
| genCodeForCast(treeNode->AsOp()); | ||
| break; | ||
|
|
||
| default: | ||
| #ifdef DEBUG | ||
| NYIRAW(GenTree::OpName(treeNode->OperGet())); | ||
|
|
@@ -373,22 +377,82 @@ static constexpr uint32_t PackOperAndType(genTreeOps oper, var_types type) | |
| { | ||
| type = TYP_I_IMPL; | ||
| } | ||
| static_assert((ssize_t)GT_COUNT > (ssize_t)TYP_COUNT); | ||
| return ((uint32_t)oper << (ConstLog2<GT_COUNT>::value + 1)) | ((uint32_t)type); | ||
| const int shift1 = ConstLog2<TYP_COUNT>::value + 1; | ||
| return ((uint32_t)oper << shift1) | ((uint32_t)type); | ||
| } | ||
|
|
||
| //------------------------------------------------------------------------ | ||
| // PackOperAndType: Pack a GenTreeOp* into a uint32_t | ||
| // PackOperAndType: Pack a genTreeOps and two var_types into a uint32_t | ||
| // | ||
| // Arguments: | ||
| // treeNode - a GenTreeOp to extract oper and type from | ||
| // oper - a genTreeOps to pack | ||
| // toType - a var_types to pack | ||
| // fromType - a var_types to pack | ||
| // | ||
| // Return Value: | ||
| // the node's oper and type packed into an integer that can be used as a switch value | ||
| // oper and the types packed into an integer that can be used as a switch value/case | ||
| // | ||
| static uint32_t PackOperAndType(GenTreeOp* treeNode) | ||
| static constexpr uint32_t PackOperAndType(genTreeOps oper, var_types toType, var_types fromType) | ||
| { | ||
| return PackOperAndType(treeNode->OperGet(), treeNode->TypeGet()); | ||
| if (fromType == TYP_BYREF) | ||
| { | ||
| fromType = TYP_I_IMPL; | ||
| } | ||
| if (toType == TYP_BYREF) | ||
| { | ||
| toType = TYP_I_IMPL; | ||
| } | ||
| const int shift1 = ConstLog2<TYP_COUNT>::value + 1; | ||
| const int shift2 = shift1 + ConstLog2<GT_COUNT>::value + 1; | ||
| return ((uint32_t)oper << shift1) | ((uint32_t)fromType) | ((uint32_t)toType << shift2); | ||
| } | ||
|
|
||
| //------------------------------------------------------------------------ | ||
| // genCodeForCastr: Generate code for a binary arithmetic operator | ||
| // | ||
| // Arguments: | ||
| // tree - The binary operation for which we are generating code. | ||
kg marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
kg marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // | ||
| void CodeGen::genCodeForCast(GenTreeOp* tree) | ||
| { | ||
| genConsumeOperands(tree); | ||
|
|
||
| instruction ins; | ||
| switch (PackOperAndType(tree->OperGet(), /* toType */ tree->TypeGet(), /* fromType */ tree->gtOp1->TypeGet())) | ||
| { | ||
| // NOTE: For this, RyuJIT seems to just generate an i32 load of the i64 operand instead of a GT_CAST. | ||
| // I suspect once we implement use of wasm locals instead of the linear stack, GT_CAST will appear. | ||
|
Comment on lines
+423
to
+424
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This part needs to actually be fixed in |
||
| case PackOperAndType(GT_CAST, TYP_INT, TYP_LONG): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this is
Nit: I think it would look better if we swapping the from-type and to-type. That way it can read like so: And so on instead of: (I've always found it a bit peculiar that in the dumps we also use this
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer your suggestion too, but I wanted to match the ordering of the wasm opcode names. |
||
| if (tree->gtOverflow()) | ||
| NYI_WASM("Overflow checks"); | ||
|
Comment on lines
+426
to
+427
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can move this to the top of the method I think, to avoid adding it into each case. |
||
| ins = INS_i32_wrap_i64; | ||
| break; | ||
|
|
||
| case PackOperAndType(GT_CAST, TYP_LONG, TYP_INT): | ||
| // FIXME: Use extend8/extend16 as appropriate | ||
| ins = tree->IsUnsigned() ? INS_i64_extend_u_i32 : INS_i64_extend_s_i32; | ||
| break; | ||
|
|
||
| case PackOperAndType(GT_CAST, TYP_DOUBLE, TYP_FLOAT): | ||
| // NOTE: This name is wrong in the spec. | ||
kg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ins = INS_f64_promote_f32; | ||
| break; | ||
|
|
||
| case PackOperAndType(GT_CAST, TYP_FLOAT, TYP_DOUBLE): | ||
| ins = INS_f32_demote_f64; | ||
| break; | ||
|
|
||
| // TODO: Floating point conversions - we need to figure out where semantics require a helper and where they | ||
| // don't. | ||
|
Comment on lines
+445
to
+446
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to decide what our "baseline ISA" is going to be and write that down somewhere. If we're going with 1.0/MVP as the baseline, this will either need inline flow emitted, or a helper call (I think both options are fine?). If we include saturating-fp into the baseline, we can emit these as simple instructions. In terms of reach, saturating-FP is pretty widespread in larger engines (browsers/wasmtime), but there are also engines out in the wild which implement just the bare minimum. We've had at least one such user in NAOT-LLVM.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was discussing this with Adam and I think our best starting point is to use helpers for everything that we know have the right semantics, and then switch over to generating "native" wasm for our baseline ISA as needed as an optimization. IIRC the code size of the right semantics in native wasm can be quite significant.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So you're saying to adopt 1.0 as the baseline? This decision affects not just RyuJit but some higher-level code as well. It means we need to saturating-fp to the list of "instructions sets" in CG2 and such. As for semantics, I am pretty sure the saturating-fp instructions (
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use something like
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes and no. We will use
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have nothing against using 2.0 or 3.0 as the baseline, I just meant that as a starting point we shouldn't try to generate complex code in order to get the semantics right if we could just use a helper instead. So if i.e. the saturating truncate does what we want, we can use it - but jiterp had to generate complex code for some of our conversions just like clang does.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Per @lewing baseline can be whatever is supported by the 3 major browsers.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have opened #122309 about this. We can revisit it when implementing FP -> int.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for opening that. I think we can probably use the support guide here: https://webassembly.org/features to decide? For example, I see non-trapping conversions as currently implemented in Chrome, Firefox, and Safari.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Let's discuss it in #122311. |
||
|
|
||
| default: | ||
| ins = INS_none; | ||
| NYI_WASM("genCodeForCast"); | ||
| break; | ||
| } | ||
|
|
||
| GetEmitter()->emitIns(ins); | ||
| genProduceReg(tree); | ||
| } | ||
|
|
||
| //------------------------------------------------------------------------ | ||
|
|
@@ -402,7 +466,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) | |
| genConsumeOperands(treeNode); | ||
|
|
||
| instruction ins; | ||
| switch (PackOperAndType(treeNode)) | ||
| switch (PackOperAndType(treeNode->OperGet(), treeNode->TypeGet())) | ||
| { | ||
| case PackOperAndType(GT_ADD, TYP_INT): | ||
| if (treeNode->gtOverflow()) | ||
|
|
@@ -497,7 +561,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) | |
| genConsumeOperands(treeNode); | ||
|
|
||
| instruction ins; | ||
| switch (PackOperAndType(treeNode)) | ||
| switch (PackOperAndType(treeNode->OperGet(), treeNode->TypeGet())) | ||
| { | ||
| case PackOperAndType(GT_DIV, TYP_INT): | ||
| ins = INS_i32_div_s; | ||
|
|
@@ -615,7 +679,7 @@ void CodeGen::genCodeForShift(GenTree* tree) | |
| // for both the shift and shiftee. So the shift may need to be extended (zero-extended) for TYP_LONG. | ||
|
|
||
| instruction ins; | ||
| switch (PackOperAndType(treeNode)) | ||
| switch (PackOperAndType(treeNode->OperGet(), treeNode->TypeGet())) | ||
| { | ||
| case PackOperAndType(GT_LSH, TYP_INT): | ||
| ins = INS_i32_shl; | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -5445,10 +5445,11 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) | |||||||||
| costEx = 1; | ||||||||||
| costSz = 4; | ||||||||||
| #elif defined(TARGET_WASM) | ||||||||||
| // TODO-WASM: Better estimate of costs for these opcodes. Most of them are one op on x64 but may be | ||||||||||
| // multiple uops. | ||||||||||
| costEx = 2; | ||||||||||
|
Comment on lines
+5448
to
+5450
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
We can just be approximately right, there is no need to make it more complex I think. We can make floats a bit more expensive like arm64, but it's not necessary. And the vast majority of casts are not FP-related, so they shouldn't be costed as |
||||||||||
| // TODO-WASM: 1 byte opcodes except for the int->fp saturating casts which are 2 bytes. | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
| NYI_WASM("Cast costing"); | ||||||||||
| costEx = 0; | ||||||||||
| costSz = 0; | ||||||||||
| costSz = 1; | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Let's fix it for good while we're here. |
||||||||||
| #else | ||||||||||
| #error "Unknown TARGET" | ||||||||||
| #endif | ||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -151,6 +151,39 @@ INST(f64_div, "f64.div", 0, IF_OPCODE, 0xA3) | |
| INST(f64_min, "f64.min", 0, IF_OPCODE, 0xA4) | ||
| INST(f64_max, "f64.max", 0, IF_OPCODE, 0xA5) | ||
| INST(f64_copysign,"f64.copysign",0, IF_OPCODE, 0xA6) | ||
| // Unary operations | ||
| INST(i32_wrap_i64, "i32.wrap_i64", 0, IF_OPCODE, 0xA7) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The formatting (alignment) is off.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought jitformat would fix it, but it didn't.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (We do explicitly disable jit-format for these kinds of lists via |
||
| INST(i32_trunc_s_f32,"i32.trunc_s_f32",0, IF_OPCODE, 0xA8) | ||
| INST(i32_trunc_u_f32,"i32.trunc_u_f32",0, IF_OPCODE, 0xA9) | ||
| INST(i32_trunc_s_f64,"i32.trunc_s_f64",0, IF_OPCODE, 0xAA) | ||
| INST(i32_trunc_u_f64,"i32.trunc_u_f64",0, IF_OPCODE, 0xAB) | ||
| INST(i64_extend_s_i32,"i32.extend_s_i32",0, IF_OPCODE, 0xAC) | ||
| INST(i64_extend_u_i32,"i32.extend_u_i32",0, IF_OPCODE, 0xAD) | ||
kg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| INST(i64_trunc_s_f32,"i64.trunc_s_f32",0, IF_OPCODE, 0xAE) | ||
| INST(i64_trunc_u_f32,"i64.trunc_u_f32",0, IF_OPCODE, 0xAF) | ||
| INST(i64_trunc_s_f64,"i64.trunc_s_f64",0, IF_OPCODE, 0xB0) | ||
| INST(i64_trunc_u_f64,"i64.trunc_u_f64",0, IF_OPCODE, 0xB1) | ||
| INST(f32_convert_s_i32,"f32.convert_s_i32",0, IF_OPCODE, 0xB2) | ||
| INST(f32_convert_u_i32,"f32.convert_u_i32",0, IF_OPCODE, 0xB3) | ||
| INST(f32_convert_s_i64,"f32.convert_s_i64",0, IF_OPCODE, 0xB4) | ||
| INST(f32_convert_u_i64,"f32.convert_u_i64",0, IF_OPCODE, 0xB5) | ||
| INST(f32_demote_f64,"f32.demote_f64",0, IF_OPCODE, 0xB6) | ||
| INST(f64_convert_s_i32,"f64.convert_s_i32",0, IF_OPCODE, 0xB7) | ||
| INST(f64_convert_u_i32,"f64.convert_u_i32",0, IF_OPCODE, 0xB8) | ||
| INST(f64_convert_s_i64,"f64.convert_s_i64",0, IF_OPCODE, 0xB9) | ||
| INST(f64_convert_u_i64,"f64.convert_u_i64",0, IF_OPCODE, 0xBA) | ||
| // NOTE: This is named f32_promote_f64 in the spec, which is wrong. | ||
kg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| INST(f64_promote_f32,"f64.promote_f32",0, IF_OPCODE, 0xBB) | ||
| INST(i32_reinterpret_f32,"i32.reinterpret_f32",0, IF_OPCODE, 0xBC) | ||
| INST(i64_reinterpret_f64,"i64.reinterpret_f64",0, IF_OPCODE, 0xBD) | ||
| INST(f32_reinterpret_i32,"f32.reinterpret_i32",0, IF_OPCODE, 0xBE) | ||
| INST(f64_reinterpret_i64,"f64.reinterpret_i64",0, IF_OPCODE, 0xBF) | ||
| INST(i32_extend8_s,"i32.extend8_s",0, IF_OPCODE, 0xC0) | ||
| INST(i32_extend16_s,"i32.extend16_s",0, IF_OPCODE, 0xC1) | ||
| INST(i64_extend8_s,"i64.extend8_s",0, IF_OPCODE, 0xC2) | ||
| INST(i64_extend16_s,"i64.extend16_s",0, IF_OPCODE, 0xC3) | ||
| INST(i64_extend32_s,"i64.extend32_s",0, IF_OPCODE, 0xC4) | ||
|
|
||
| // clang-format on | ||
|
|
||
| #undef INST | ||
Uh oh!
There was an error while loading. Please reload this page.