Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 74 additions & 10 deletions src/coreclr/jit/codegenwasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
genCodeForConstant(treeNode);
break;

case GT_CAST:
genCodeForCast(treeNode->AsOp());
break;

default:
#ifdef DEBUG
NYIRAW(GenTree::OpName(treeNode->OperGet()));
Expand Down Expand Up @@ -373,22 +377,82 @@ static constexpr uint32_t PackOperAndType(genTreeOps oper, var_types type)
{
type = TYP_I_IMPL;
}
static_assert((ssize_t)GT_COUNT > (ssize_t)TYP_COUNT);
return ((uint32_t)oper << (ConstLog2<GT_COUNT>::value + 1)) | ((uint32_t)type);
const int shift1 = ConstLog2<TYP_COUNT>::value + 1;
return ((uint32_t)oper << shift1) | ((uint32_t)type);
}

//------------------------------------------------------------------------
// PackOperAndType: Pack a GenTreeOp* into a uint32_t
// PackOperAndType: Pack a genTreeOps and two var_types into a uint32_t
//
// Arguments:
// treeNode - a GenTreeOp to extract oper and type from
// oper - a genTreeOps to pack
// toType - a var_types to pack
// fromType - a var_types to pack
//
// Return Value:
// the node's oper and type packed into an integer that can be used as a switch value
// oper and the types packed into an integer that can be used as a switch value/case
//
static uint32_t PackOperAndType(GenTreeOp* treeNode)
static constexpr uint32_t PackOperAndType(genTreeOps oper, var_types toType, var_types fromType)
{
return PackOperAndType(treeNode->OperGet(), treeNode->TypeGet());
if (fromType == TYP_BYREF)
{
fromType = TYP_I_IMPL;
}
if (toType == TYP_BYREF)
{
toType = TYP_I_IMPL;
}
const int shift1 = ConstLog2<TYP_COUNT>::value + 1;
const int shift2 = shift1 + ConstLog2<GT_COUNT>::value + 1;
return ((uint32_t)oper << shift1) | ((uint32_t)fromType) | ((uint32_t)toType << shift2);
}

//------------------------------------------------------------------------
// genCodeForCastr: Generate code for a binary arithmetic operator
//
// Arguments:
// tree - The binary operation for which we are generating code.
//
void CodeGen::genCodeForCast(GenTreeOp* tree)
{
genConsumeOperands(tree);

instruction ins;
switch (PackOperAndType(tree->OperGet(), /* toType */ tree->TypeGet(), /* fromType */ tree->gtOp1->TypeGet()))
{
// NOTE: For this, RyuJIT seems to just generate an i32 load of the i64 operand instead of a GT_CAST.
// I suspect once we implement use of wasm locals instead of the linear stack, GT_CAST will appear.
Comment on lines +423 to +424
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part needs to actually be fixed in genCodeForLclVar, under the genIsValidReg(varDsc->GetRegNum()) case, we need to insert INS_i32_wrap_i64 if (tree->TypeIs(TYP_INT) && (varDsc->TypeGet() == TYP_LONG)).

case PackOperAndType(GT_CAST, TYP_INT, TYP_LONG):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is genCodeForCast (and we can have another method for bitcasts, which are a bit different), this opcode (GT_CAST) field is redundant.

fromType needs to be genActualType(tree->CastOp()). In general, it is almost always a mistake to use the raw node type except when dealing with indirections and you know you want the indirected (small) type instead of the produced node type.

Nit: I think it would look better if we swapping the from-type and to-type. That way it can read like so:

case (TYP_INT, TYP_SHORT)
case (TYP_INT, TYP_USHORT)
...

And so on instead of:

case (TYP_SHORT, TYP_INT)
case (TYP_USHORT, TYP_INT)

(I've always found it a bit peculiar that in the dumps we also use this to-type <- from-type notation, but in dumps there is typically only one cast)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer your suggestion too, but I wanted to match the ordering of the wasm opcode names.

if (tree->gtOverflow())
NYI_WASM("Overflow checks");
Comment on lines +426 to +427
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can move this to the top of the method I think, to avoid adding it into each case.

ins = INS_i32_wrap_i64;
break;

case PackOperAndType(GT_CAST, TYP_LONG, TYP_INT):
// FIXME: Use extend8/extend16 as appropriate
ins = tree->IsUnsigned() ? INS_i64_extend_u_i32 : INS_i64_extend_s_i32;
break;

case PackOperAndType(GT_CAST, TYP_DOUBLE, TYP_FLOAT):
// NOTE: This name is wrong in the spec.
ins = INS_f64_promote_f32;
break;

case PackOperAndType(GT_CAST, TYP_FLOAT, TYP_DOUBLE):
ins = INS_f32_demote_f64;
break;

// TODO: Floating point conversions - we need to figure out where semantics require a helper and where they
// don't.
Comment on lines +445 to +446
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to decide what our "baseline ISA" is going to be and write that down somewhere.

If we're going with 1.0/MVP as the baseline, this will either need inline flow emitted, or a helper call (I think both options are fine?). If we include saturating-fp into the baseline, we can emit these as simple instructions.

In terms of reach, saturating-FP is pretty widespread in larger engines (browsers/wasmtime), but there are also engines out in the wild which implement just the bare minimum. We've had at least one such user in NAOT-LLVM.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was discussing this with Adam and I think our best starting point is to use helpers for everything that we know have the right semantics, and then switch over to generating "native" wasm for our baseline ISA as needed as an optimization. IIRC the code size of the right semantics in native wasm can be quite significant.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So you're saying to adopt 1.0 as the baseline?

This decision affects not just RyuJit but some higher-level code as well. It means we need to saturating-fp to the list of "instructions sets" in CG2 and such.

As for semantics, I am pretty sure the saturating-fp instructions (*trunc_sat*) are exactly we need for fp->int conversions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use something like wasm-feature-detect and opportunistlically emit code like we do for other archs https://github.com/dotnet/runtime/pull/115335/files, i.e. expose a feature bitmap via API in src/native/minipal/cpufeatures.h. Then crossgen2/ilc can also expose the supported ISA levels via their existing --instruction-set arg.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and opportunistlically emit code like we do for other archs

Yes and no. We will use --instruction-set for not-in-the-baseline extensions, but we should only emit one version of the code since you'll fail to validate if the extension is not supported so there is no point in emitting a runtime check.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have nothing against using 2.0 or 3.0 as the baseline, I just meant that as a starting point we shouldn't try to generate complex code in order to get the semantics right if we could just use a helper instead. So if i.e. the saturating truncate does what we want, we can use it - but jiterp had to generate complex code for some of our conversions just like clang does.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per @lewing baseline can be whatever is supported by the 3 major browsers.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have opened #122309 about this. We can revisit it when implementing FP -> int.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for opening that. I think we can probably use the support guide here: https://webassembly.org/features to decide? For example, I see non-trapping conversions as currently implemented in Chrome, Firefox, and Safari.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for opening that. I think we can probably use the support guide here: https://webassembly.org/features to decide? For example, I see non-trapping conversions as currently implemented in Chrome, Firefox, and Safari.

Let's discuss it in #122311.


default:
ins = INS_none;
NYI_WASM("genCodeForCast");
break;
}

GetEmitter()->emitIns(ins);
genProduceReg(tree);
}

//------------------------------------------------------------------------
Expand All @@ -402,7 +466,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
genConsumeOperands(treeNode);

instruction ins;
switch (PackOperAndType(treeNode))
switch (PackOperAndType(treeNode->OperGet(), treeNode->TypeGet()))
{
case PackOperAndType(GT_ADD, TYP_INT):
if (treeNode->gtOverflow())
Expand Down Expand Up @@ -497,7 +561,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
genConsumeOperands(treeNode);

instruction ins;
switch (PackOperAndType(treeNode))
switch (PackOperAndType(treeNode->OperGet(), treeNode->TypeGet()))
{
case PackOperAndType(GT_DIV, TYP_INT):
ins = INS_i32_div_s;
Expand Down Expand Up @@ -615,7 +679,7 @@ void CodeGen::genCodeForShift(GenTree* tree)
// for both the shift and shiftee. So the shift may need to be extended (zero-extended) for TYP_LONG.

instruction ins;
switch (PackOperAndType(treeNode))
switch (PackOperAndType(treeNode->OperGet(), treeNode->TypeGet()))
{
case PackOperAndType(GT_LSH, TYP_INT):
ins = INS_i32_shl;
Expand Down
7 changes: 4 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5445,10 +5445,11 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
costEx = 1;
costSz = 4;
#elif defined(TARGET_WASM)
// TODO-WASM: Better estimate of costs for these opcodes. Most of them are one op on x64 but may be
// multiple uops.
costEx = 2;
Comment on lines +5448 to +5450
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// TODO-WASM: Better estimate of costs for these opcodes. Most of them are one op on x64 but may be
// multiple uops.
costEx = 2;
costEx = 1;

We can just be approximately right, there is no need to make it more complex I think. We can make floats a bit more expensive like arm64, but it's not necessary. And the vast majority of casts are not FP-related, so they shouldn't be costed as 2.

// TODO-WASM: 1 byte opcodes except for the int->fp saturating casts which are 2 bytes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// TODO-WASM: 1 byte opcodes except for the int->fp saturating casts which are 2 bytes.

NYI_WASM("Cast costing");
costEx = 0;
costSz = 0;
costSz = 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
costSz = 1;
costSz = varTypeIsFloating(op1) && !varTypeIsFloating(tree) ? 2 : 1;

Let's fix it for good while we're here.

#else
#error "Unknown TARGET"
#endif
Expand Down
33 changes: 33 additions & 0 deletions src/coreclr/jit/instrswasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,39 @@ INST(f64_div, "f64.div", 0, IF_OPCODE, 0xA3)
INST(f64_min, "f64.min", 0, IF_OPCODE, 0xA4)
INST(f64_max, "f64.max", 0, IF_OPCODE, 0xA5)
INST(f64_copysign,"f64.copysign",0, IF_OPCODE, 0xA6)
// Unary operations
INST(i32_wrap_i64, "i32.wrap_i64", 0, IF_OPCODE, 0xA7)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The formatting (alignment) is off.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought jitformat would fix it, but it didn't.

Copy link
Contributor

@SingleAccretion SingleAccretion Dec 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(We do explicitly disable jit-format for these kinds of lists via // clang-format off/on, since it doesn't format them sensibly)

INST(i32_trunc_s_f32,"i32.trunc_s_f32",0, IF_OPCODE, 0xA8)
INST(i32_trunc_u_f32,"i32.trunc_u_f32",0, IF_OPCODE, 0xA9)
INST(i32_trunc_s_f64,"i32.trunc_s_f64",0, IF_OPCODE, 0xAA)
INST(i32_trunc_u_f64,"i32.trunc_u_f64",0, IF_OPCODE, 0xAB)
INST(i64_extend_s_i32,"i32.extend_s_i32",0, IF_OPCODE, 0xAC)
INST(i64_extend_u_i32,"i32.extend_u_i32",0, IF_OPCODE, 0xAD)
INST(i64_trunc_s_f32,"i64.trunc_s_f32",0, IF_OPCODE, 0xAE)
INST(i64_trunc_u_f32,"i64.trunc_u_f32",0, IF_OPCODE, 0xAF)
INST(i64_trunc_s_f64,"i64.trunc_s_f64",0, IF_OPCODE, 0xB0)
INST(i64_trunc_u_f64,"i64.trunc_u_f64",0, IF_OPCODE, 0xB1)
INST(f32_convert_s_i32,"f32.convert_s_i32",0, IF_OPCODE, 0xB2)
INST(f32_convert_u_i32,"f32.convert_u_i32",0, IF_OPCODE, 0xB3)
INST(f32_convert_s_i64,"f32.convert_s_i64",0, IF_OPCODE, 0xB4)
INST(f32_convert_u_i64,"f32.convert_u_i64",0, IF_OPCODE, 0xB5)
INST(f32_demote_f64,"f32.demote_f64",0, IF_OPCODE, 0xB6)
INST(f64_convert_s_i32,"f64.convert_s_i32",0, IF_OPCODE, 0xB7)
INST(f64_convert_u_i32,"f64.convert_u_i32",0, IF_OPCODE, 0xB8)
INST(f64_convert_s_i64,"f64.convert_s_i64",0, IF_OPCODE, 0xB9)
INST(f64_convert_u_i64,"f64.convert_u_i64",0, IF_OPCODE, 0xBA)
// NOTE: This is named f32_promote_f64 in the spec, which is wrong.
INST(f64_promote_f32,"f64.promote_f32",0, IF_OPCODE, 0xBB)
INST(i32_reinterpret_f32,"i32.reinterpret_f32",0, IF_OPCODE, 0xBC)
INST(i64_reinterpret_f64,"i64.reinterpret_f64",0, IF_OPCODE, 0xBD)
INST(f32_reinterpret_i32,"f32.reinterpret_i32",0, IF_OPCODE, 0xBE)
INST(f64_reinterpret_i64,"f64.reinterpret_i64",0, IF_OPCODE, 0xBF)
INST(i32_extend8_s,"i32.extend8_s",0, IF_OPCODE, 0xC0)
INST(i32_extend16_s,"i32.extend16_s",0, IF_OPCODE, 0xC1)
INST(i64_extend8_s,"i64.extend8_s",0, IF_OPCODE, 0xC2)
INST(i64_extend16_s,"i64.extend16_s",0, IF_OPCODE, 0xC3)
INST(i64_extend32_s,"i64.extend32_s",0, IF_OPCODE, 0xC4)

// clang-format on

#undef INST
2 changes: 1 addition & 1 deletion src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9680,7 +9680,7 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret)
{
assert(ret->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET));

#if !defined(TARGET_64BIT)
#if !defined(TARGET_64BIT) && !defined(TARGET_WASM)
if (ret->TypeIs(TYP_LONG))
{
GenTree* op1 = ret->AsOp()->GetReturnValue();
Expand Down
Loading