Skip to content

Commit fbac55b

Browse files
authored
[AArch64] Optimize vector fmul(sitofp/uitofp, 1/2^N) -> scvtf/ucvtf (llvm#141480)
When a vector integer-to-float conversion is followed by a multiply with a reciprocal power-of-two constant, we can fold both operations into a single SCVTF or UCVTF instruction with a fixed-point shift operand. For example, `fmul(sitofp(v2i32 x), <0.5, 0.5>)` becomes `scvtf.2s v0, v0, #1`. This is a reworked version with several improvements over the original submission: - Rewrite the C++ operand matcher to share implementation with the existing `SelectCVTFixedPointVec` (MOVIshift, FMOV, and DUP handling with correct truncation for f16) - Add `uitofp`/`ucvtf` patterns via a `CVTFRecipPat` multiclass - Add full GlobalISel support (`GIComplexOperandMatcher` + renderer) Supported vector types: `v2f32`, `v4f32`, `v2f64`, `v4f16`, `v8f16`. Fixes llvm#94909
1 parent 7b43dcd commit fbac55b

4 files changed

Lines changed: 591 additions & 16 deletions

File tree

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
496496
bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
497497
unsigned Width);
498498

499+
template <unsigned FloatWidth>
500+
bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos) {
501+
return SelectCVTFixedPosRecipOperandVec(N, FixedPos, FloatWidth);
502+
}
503+
504+
bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos,
505+
unsigned Width);
506+
499507
bool SelectCMP_SWAP(SDNode *N);
500508

501509
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
@@ -4147,14 +4155,11 @@ static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
41474155
return false;
41484156
}
41494157

4150-
bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4151-
unsigned RegWidth) {
4152-
return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4153-
/*isReciprocal*/ false);
4154-
}
4155-
4156-
bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4157-
unsigned RegWidth) {
4158+
static bool checkCVTFixedPointOperandWithFBitsForVectors(SelectionDAG *CurDAG,
4159+
SDValue N,
4160+
SDValue &FixedPos,
4161+
unsigned RegWidth,
4162+
bool isReciprocal) {
41584163
if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
41594164
N.getValueType().getScalarSizeInBits() ==
41604165
N.getOperand(0).getValueType().getScalarSizeInBits())
@@ -4192,15 +4197,34 @@ bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
41924197
return false;
41934198
}
41944199

4195-
if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
4196-
/*isReciprocal*/ false)) {
4200+
if (unsigned FBits =
4201+
CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
41974202
FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
41984203
return true;
41994204
}
42004205

42014206
return false;
42024207
}
42034208

4209+
bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4210+
unsigned RegWidth) {
4211+
return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4212+
/*isReciprocal*/ false);
4213+
}
4214+
4215+
bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4216+
unsigned RegWidth) {
4217+
return checkCVTFixedPointOperandWithFBitsForVectors(
4218+
CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ false);
4219+
}
4220+
4221+
bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperandVec(SDValue N,
4222+
SDValue &FixedPos,
4223+
unsigned RegWidth) {
4224+
return checkCVTFixedPointOperandWithFBitsForVectors(
4225+
CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ true);
4226+
}
4227+
42044228
bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
42054229
SDValue &FixedPos,
42064230
unsigned RegWidth) {

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9296,6 +9296,53 @@ defm : FCVTPat<v4f16, v4i16, i16, V64, fixedpoint_v4f16>;
92969296
defm : FCVTPat<v8f16, v8i16, i16, V128, fixedpoint_v8f16>;
92979297
}
92989298

9299+
// fmul(sitofp(x), 1/2^N) -> scvtf(x, N), fmul(uitofp(x), 1/2^N) -> ucvtf(x, N)
9300+
class fixedpoint_recip_vec_f64<ValueType FloatVT>
9301+
: ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperandVec<64>">;
9302+
class fixedpoint_recip_vec_f32<ValueType FloatVT>
9303+
: ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperandVec<32>">;
9304+
class fixedpoint_recip_vec_f16<ValueType FloatVT>
9305+
: ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperandVec<16>">;
9306+
def fixedpoint_recip_vec_xform : SDNodeXForm<timm, [{
9307+
(void)N;
9308+
return V;
9309+
}]>;
9310+
def gi_fixedpoint_recip_vec_xform
9311+
: GICustomOperandRenderer<"renderFixedPointRecipXForm">,
9312+
GISDNodeXFormEquiv<fixedpoint_recip_vec_xform>;
9313+
9314+
def fixedpoint_recip_v2f64 : fixedpoint_recip_vec_f64<v2f64>;
9315+
def fixedpoint_recip_v2f32 : fixedpoint_recip_vec_f32<v2f32>;
9316+
def fixedpoint_recip_v4f32 : fixedpoint_recip_vec_f32<v4f32>;
9317+
def fixedpoint_recip_v4f16 : fixedpoint_recip_vec_f16<v4f16>;
9318+
def fixedpoint_recip_v8f16 : fixedpoint_recip_vec_f16<v8f16>;
9319+
9320+
def gi_fixedpoint_recip_v2f64
9321+
: GIComplexOperandMatcher<v2s64, "selectCVTFixedPosRecipOperandVec">,
9322+
GIComplexPatternEquiv<fixedpoint_recip_v2f64>;
9323+
def gi_fixedpoint_recip_v2f32
9324+
: GIComplexOperandMatcher<v2s32, "selectCVTFixedPosRecipOperandVec">,
9325+
GIComplexPatternEquiv<fixedpoint_recip_v2f32>;
9326+
def gi_fixedpoint_recip_v4f32
9327+
: GIComplexOperandMatcher<v4s32, "selectCVTFixedPosRecipOperandVec">,
9328+
GIComplexPatternEquiv<fixedpoint_recip_v4f32>;
9329+
def gi_fixedpoint_recip_v4f16
9330+
: GIComplexOperandMatcher<v4s16, "selectCVTFixedPosRecipOperandVec">,
9331+
GIComplexPatternEquiv<fixedpoint_recip_v4f16>;
9332+
def gi_fixedpoint_recip_v8f16
9333+
: GIComplexOperandMatcher<v8s16, "selectCVTFixedPosRecipOperandVec">,
9334+
GIComplexPatternEquiv<fixedpoint_recip_v8f16>;
9335+
9336+
multiclass CVTFRecipPat<ValueType FVT, ValueType IVT,
9337+
RegisterOperand RC, ComplexPattern fixedpoint> {
9338+
def : Pat<(FVT (fmul (sint_to_fp (IVT RC:$Rn)), fixedpoint:$scale)),
9339+
(!cast<Instruction>("SCVTF"#IVT#"_shift") RC:$Rn,
9340+
(fixedpoint_recip_vec_xform fixedpoint:$scale))>;
9341+
def : Pat<(FVT (fmul (uint_to_fp (IVT RC:$Rn)), fixedpoint:$scale)),
9342+
(!cast<Instruction>("UCVTF"#IVT#"_shift") RC:$Rn,
9343+
(fixedpoint_recip_vec_xform fixedpoint:$scale))>;
9344+
}
9345+
92999346
// X << 1 ==> X + X
93009347
class SHLToADDPat<ValueType ty, RegisterClass regtype>
93019348
: Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
@@ -9350,6 +9397,16 @@ defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
93509397
defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
93519398
TriOpFrag<(add_like node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
93529399

9400+
let Predicates = [HasNEON] in {
9401+
defm : CVTFRecipPat<v2f64, v2i64, V128, fixedpoint_recip_v2f64>;
9402+
defm : CVTFRecipPat<v2f32, v2i32, V64, fixedpoint_recip_v2f32>;
9403+
defm : CVTFRecipPat<v4f32, v4i32, V128, fixedpoint_recip_v4f32>;
9404+
}
9405+
let Predicates = [HasNEON, HasFullFP16] in {
9406+
defm : CVTFRecipPat<v4f16, v4i16, V64, fixedpoint_recip_v4f16>;
9407+
defm : CVTFRecipPat<v8f16, v8i16, V128, fixedpoint_recip_v8f16>;
9408+
}
9409+
93539410
def VImm0080: PatLeaf<(AArch64movi_shift (i32 128), (i32 0))>;
93549411
def VImm00008000: PatLeaf<(AArch64movi_shift (i32 128), (i32 8))>;
93559412
def VImm0000000080000000: PatLeaf<(AArch64NvCast (v2f64 (fneg (AArch64NvCast (v4i32 (AArch64movi_shift (i32 128), (i32 24)))))))>;

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -484,9 +484,14 @@ class AArch64InstructionSelector : public InstructionSelector {
484484

485485
ComplexRendererFns selectCVTFixedPointVec(MachineOperand &Root) const;
486486
ComplexRendererFns
487-
selectCVTFixedPointVecBase(const MachineOperand &Root) const;
487+
selectCVTFixedPosRecipOperandVec(MachineOperand &Root) const;
488+
ComplexRendererFns
489+
selectCVTFixedPointVecBase(const MachineOperand &Root,
490+
bool isReciprocal = false) const;
488491
void renderFixedPointXForm(MachineInstrBuilder &MIB, const MachineInstr &MI,
489492
int OpIdx = -1) const;
493+
void renderFixedPointRecipXForm(MachineInstrBuilder &MIB,
494+
const MachineInstr &MI, int OpIdx = -1) const;
490495

491496
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
492497
int OpIdx = -1) const;
@@ -7863,7 +7868,7 @@ AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
78637868

78647869
InstructionSelector::ComplexRendererFns
78657870
AArch64InstructionSelector::selectCVTFixedPointVecBase(
7866-
const MachineOperand &Root) const {
7871+
const MachineOperand &Root, bool isReciprocal) const {
78677872
if (!Root.isReg())
78687873
return std::nullopt;
78697874
const MachineRegisterInfo &MRI =
@@ -7892,16 +7897,22 @@ AArch64InstructionSelector::selectCVTFixedPointVecBase(
78927897
default:
78937898
return std::nullopt;
78947899
};
7895-
if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
7896-
/*isReciprocal*/ false))
7900+
if (unsigned FBits =
7901+
CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal))
78977902
return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(FBits); }}};
78987903

78997904
return std::nullopt;
79007905
}
79017906

79027907
InstructionSelector::ComplexRendererFns
79037908
AArch64InstructionSelector::selectCVTFixedPointVec(MachineOperand &Root) const {
7904-
return selectCVTFixedPointVecBase(Root);
7909+
return selectCVTFixedPointVecBase(Root, /*isReciprocal*/ false);
7910+
}
7911+
7912+
InstructionSelector::ComplexRendererFns
7913+
AArch64InstructionSelector::selectCVTFixedPosRecipOperandVec(
7914+
MachineOperand &Root) const {
7915+
return selectCVTFixedPointVecBase(Root, /*isReciprocal*/ true);
79057916
}
79067917

79077918
void AArch64InstructionSelector::renderFixedPointXForm(MachineInstrBuilder &MIB,
@@ -7911,12 +7922,21 @@ void AArch64InstructionSelector::renderFixedPointXForm(MachineInstrBuilder &MIB,
79117922
// should be able to reuse the Renderers already calculated by
79127923
// selectCVTFixedPointVecBase.
79137924
InstructionSelector::ComplexRendererFns Renderer =
7914-
selectCVTFixedPointVecBase(MI.getOperand(2));
7925+
selectCVTFixedPointVecBase(MI.getOperand(2), /*isReciprocal*/ false);
79157926
assert((Renderer && Renderer->size() == 1) &&
79167927
"Expected selectCVTFixedPointVec to provide a function\n");
79177928
(Renderer->front())(MIB);
79187929
}
79197930

7931+
void AArch64InstructionSelector::renderFixedPointRecipXForm(
7932+
MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7933+
InstructionSelector::ComplexRendererFns Renderer =
7934+
selectCVTFixedPointVecBase(MI.getOperand(2), /*isReciprocal*/ true);
7935+
assert((Renderer && Renderer->size() == 1) &&
7936+
"Expected selectCVTFixedPosRecipOperandVec to provide a function\n");
7937+
(Renderer->front())(MIB);
7938+
}
7939+
79207940
void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
79217941
const MachineInstr &MI,
79227942
int OpIdx) const {

0 commit comments

Comments
 (0)