Skip to content

Commit d67b6d1

Browse files
uweigandbongjunj
authored andcommitted
s390x: Refactor hardware facility detection (bytecodealliance#11220)
This patch implements a number of changes relating to s390x HW facilities and facility detection: - Fix a mis-named facility: the z15 (arch13) CPU introduced the Miscellaneous-Instruction-Extensions Facility *3* (not 2). Rename "mie2" to "mie3" throughout the code base. - Now that we can use inline asm, use the STORE FACILITY LIST EXTENDED instruction rather than HWCAP to detect facilities at run time. This eliminates the libc crate dependency, and allows for more fine-grained feature detection. - Add support for the z16 (arch14) CPU names (these do not provide any facilities that would be relevant to cranelift, but it should be possible to use these names as synonyms to z15 at least). - Add support for the z17 (arch15) CPU names, and two new facilities provided at this level: the Miscellaneous-Instruction-Extensions Facility 4 and the Vector-Enhancements Facility 3. (Note that no code to exploit these facilities is present in this patch; that will be provided later.)
1 parent de20fb5 commit d67b6d1

File tree

17 files changed

+138
-70
lines changed

17 files changed

+138
-70
lines changed

cranelift/codegen/meta/src/isa/s390x.rs

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ pub(crate) fn define() -> TargetIsa {
88
// so we list only facilities of later processors here.
99

1010
// z15 (arch13) facilities
11-
let has_mie2 = settings.add_bool(
12-
"has_mie2",
13-
"Has Miscellaneous-Instruction-Extensions Facility 2 support.",
11+
let has_mie3 = settings.add_bool(
12+
"has_mie3",
13+
"Has Miscellaneous-Instruction-Extensions Facility 3 support.",
1414
"",
1515
false,
1616
);
@@ -21,18 +21,54 @@ pub(crate) fn define() -> TargetIsa {
2121
false,
2222
);
2323

24+
// z16 (arch14) has no new facilities that can be exploited by cranelift
25+
26+
// z17 (arch15) facilities
27+
let has_mie4 = settings.add_bool(
28+
"has_mie4",
29+
"Has Miscellaneous-Instruction-Extensions Facility 4 support.",
30+
"",
31+
false,
32+
);
33+
let has_vxrs_ext3 = settings.add_bool(
34+
"has_vxrs_ext3",
35+
"Has Vector-Enhancements Facility 3 support.",
36+
"",
37+
false,
38+
);
39+
2440
// Architecture level presets
2541
settings.add_preset(
2642
"arch13",
2743
"Thirteenth Edition of the z/Architecture.",
28-
preset!(has_mie2 && has_vxrs_ext2),
44+
preset!(has_mie3 && has_vxrs_ext2),
45+
);
46+
settings.add_preset(
47+
"arch14",
48+
"Fourteenth Edition of the z/Architecture.",
49+
preset!(has_mie3 && has_vxrs_ext2),
50+
);
51+
settings.add_preset(
52+
"arch15",
53+
"Fifteenth Edition of the z/Architecture.",
54+
preset!(has_mie3 && has_mie4 && has_vxrs_ext2 && has_vxrs_ext3),
2955
);
3056

3157
// Processor presets
3258
settings.add_preset(
3359
"z15",
3460
"IBM z15 processor.",
35-
preset!(has_mie2 && has_vxrs_ext2),
61+
preset!(has_mie3 && has_vxrs_ext2),
62+
);
63+
settings.add_preset(
64+
"z16",
65+
"IBM z16 processor.",
66+
preset!(has_mie3 && has_vxrs_ext2),
67+
);
68+
settings.add_preset(
69+
"z17",
70+
"IBM z17 processor.",
71+
preset!(has_mie3 && has_mie4 && has_vxrs_ext2 && has_vxrs_ext3),
3672
);
3773

3874
TargetIsa::new("s390x", settings.build())

cranelift/codegen/src/isa/s390x/inst.isle

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,10 +1482,10 @@
14821482

14831483
;; Helpers for querying enabled ISA extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14841484

1485-
(decl mie2_enabled () Type)
1486-
(extern extractor mie2_enabled mie2_enabled)
1487-
(decl mie2_disabled () Type)
1488-
(extern extractor mie2_disabled mie2_disabled)
1485+
(decl mie3_enabled () Type)
1486+
(extern extractor mie3_enabled mie3_enabled)
1487+
(decl mie3_disabled () Type)
1488+
(extern extractor mie3_disabled mie3_disabled)
14891489

14901490
(decl vxrs_ext2_enabled () Type)
14911491
(extern extractor vxrs_ext2_enabled vxrs_ext2_enabled)

cranelift/codegen/src/isa/s390x/inst/emit.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,8 +1435,8 @@ impl Inst {
14351435
match iset_requirement {
14361436
// Baseline ISA is z14
14371437
InstructionSet::Base => true,
1438-
// Miscellaneous-Instruction-Extensions Facility 2 (z15)
1439-
InstructionSet::MIE2 => emit_info.isa_flags.has_mie2(),
1438+
// Miscellaneous-Instruction-Extensions Facility 3 (z15)
1439+
InstructionSet::MIE3 => emit_info.isa_flags.has_mie3(),
14401440
// Vector-Enhancements Facility 2 (z15)
14411441
InstructionSet::VXRS_EXT2 => emit_info.isa_flags.has_vxrs_ext2(),
14421442
}

cranelift/codegen/src/isa/s390x/inst/mod.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ impl WritableRegPair {
8787
pub(crate) enum InstructionSet {
8888
/// Baseline ISA for cranelift is z14.
8989
Base,
90-
/// Miscellaneous-Instruction-Extensions Facility 2 (z15)
91-
MIE2,
90+
/// Miscellaneous-Instruction-Extensions Facility 3 (z15)
91+
MIE3,
9292
/// Vector-Enhancements Facility 2 (z15)
9393
VXRS_EXT2,
9494
}
@@ -242,15 +242,15 @@ impl Inst {
242242

243243
// These depend on the opcode
244244
Inst::AluRRR { alu_op, .. } => match alu_op {
245-
ALUOp::NotAnd32 | ALUOp::NotAnd64 => InstructionSet::MIE2,
246-
ALUOp::NotOrr32 | ALUOp::NotOrr64 => InstructionSet::MIE2,
247-
ALUOp::NotXor32 | ALUOp::NotXor64 => InstructionSet::MIE2,
248-
ALUOp::AndNot32 | ALUOp::AndNot64 => InstructionSet::MIE2,
249-
ALUOp::OrrNot32 | ALUOp::OrrNot64 => InstructionSet::MIE2,
245+
ALUOp::NotAnd32 | ALUOp::NotAnd64 => InstructionSet::MIE3,
246+
ALUOp::NotOrr32 | ALUOp::NotOrr64 => InstructionSet::MIE3,
247+
ALUOp::NotXor32 | ALUOp::NotXor64 => InstructionSet::MIE3,
248+
ALUOp::AndNot32 | ALUOp::AndNot64 => InstructionSet::MIE3,
249+
ALUOp::OrrNot32 | ALUOp::OrrNot64 => InstructionSet::MIE3,
250250
_ => InstructionSet::Base,
251251
},
252252
Inst::UnaryRR { op, .. } => match op {
253-
UnaryOp::PopcntReg => InstructionSet::MIE2,
253+
UnaryOp::PopcntReg => InstructionSet::MIE3,
254254
_ => InstructionSet::Base,
255255
},
256256
Inst::FpuRound { op, .. } => match op {

cranelift/codegen/src/isa/s390x/lower.isle

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -985,12 +985,12 @@
985985
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
986986

987987
;; z15 version using a single instruction (NOR).
988-
(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bnot x)))
988+
(rule 2 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bnot x)))
989989
(let ((rx Reg x))
990990
(not_or_reg ty rx rx)))
991991

992992
;; z14 version using XOR with -1.
993-
(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bnot x)))
993+
(rule 1 (lower (has_type (and (mie3_disabled) (fits_in_64 ty)) (bnot x)))
994994
(not_reg ty x))
995995

996996
;; Vector version using vector NOR.
@@ -999,7 +999,7 @@
999999

10001000
;; With z15 (bnot (bxor ...)) can be a single instruction, similar to the
10011001
;; (bxor _ (bnot _)) lowering.
1002-
(rule 3 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bnot (bxor x y))))
1002+
(rule 3 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bnot (bxor x y))))
10031003
(not_xor_reg ty x y))
10041004

10051005
;; Combine a not/xor operation of vector types into one.
@@ -1038,9 +1038,9 @@
10381038
;; forms early on.
10391039

10401040
;; z15 version using a single instruction.
1041-
(rule 7 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (band x (bnot y))))
1041+
(rule 7 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (band x (bnot y))))
10421042
(and_not_reg ty x y))
1043-
(rule 8 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (band (bnot y) x)))
1043+
(rule 8 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (band (bnot y) x)))
10441044
(and_not_reg ty x y))
10451045

10461046
;; And-not two vector registers.
@@ -1080,9 +1080,9 @@
10801080
;; forms early on.
10811081

10821082
;; z15 version using a single instruction.
1083-
(rule 7 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bor x (bnot y))))
1083+
(rule 7 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bor x (bnot y))))
10841084
(or_not_reg ty x y))
1085-
(rule 8 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bor (bnot y) x)))
1085+
(rule 8 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bor (bnot y) x)))
10861086
(or_not_reg ty x y))
10871087

10881088
;; Or-not two vector registers.
@@ -1119,9 +1119,9 @@
11191119
;; forms early on.
11201120

11211121
;; z15 version using a single instruction.
1122-
(rule 5 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bxor x (bnot y))))
1122+
(rule 5 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bxor x (bnot y))))
11231123
(not_xor_reg ty x y))
1124-
(rule 6 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bxor (bnot y) x)))
1124+
(rule 6 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bxor (bnot y) x)))
11251125
(not_xor_reg ty x y))
11261126

11271127
;; Xor-not two vector registers.
@@ -1134,14 +1134,14 @@
11341134
;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11351135

11361136
;; z15 version using a NAND instruction.
1137-
(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bitselect x y z)))
1137+
(rule 2 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bitselect x y z)))
11381138
(let ((rx Reg x)
11391139
(if_true Reg (and_reg ty y rx))
11401140
(if_false Reg (and_not_reg ty z rx)))
11411141
(or_reg ty if_false if_true)))
11421142

11431143
;; z14 version using XOR with -1.
1144-
(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bitselect x y z)))
1144+
(rule 1 (lower (has_type (and (mie3_disabled) (fits_in_64 ty)) (bitselect x y z)))
11451145
(let ((rx Reg x)
11461146
(if_true Reg (and_reg ty y rx))
11471147
(if_false Reg (and_reg ty z (not_reg ty rx))))
@@ -1329,7 +1329,7 @@
13291329

13301330
;; On z15, the POPCNT instruction has a variant to compute a full 64-bit
13311331
;; population count, which we also use for 16- and 32-bit types.
1332-
(rule -1 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (popcnt x)))
1332+
(rule -1 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (popcnt x)))
13331333
(popcnt_reg (put_in_reg_zext64 x)))
13341334

13351335
;; On z14, we use the regular POPCNT, which computes the population count
@@ -1340,18 +1340,18 @@
13401340
;; $I16, where we instead accumulate in the low byte and clear high bits
13411341
;; via an explicit and operation.)
13421342

1343-
(rule (lower (has_type (and (mie2_disabled) $I16) (popcnt x)))
1343+
(rule (lower (has_type (and (mie3_disabled) $I16) (popcnt x)))
13441344
(let ((cnt2 Reg (popcnt_byte x))
13451345
(cnt1 Reg (add_reg $I32 cnt2 (lshr_imm $I32 cnt2 8))))
13461346
(and_uimm16shifted $I32 cnt1 (uimm16shifted 255 0))))
13471347

1348-
(rule (lower (has_type (and (mie2_disabled) $I32) (popcnt x)))
1348+
(rule (lower (has_type (and (mie3_disabled) $I32) (popcnt x)))
13491349
(let ((cnt4 Reg (popcnt_byte x))
13501350
(cnt2 Reg (add_reg $I32 cnt4 (lshl_imm $I32 cnt4 16)))
13511351
(cnt1 Reg (add_reg $I32 cnt2 (lshl_imm $I32 cnt2 8))))
13521352
(lshr_imm $I32 cnt1 24)))
13531353

1354-
(rule (lower (has_type (and (mie2_disabled) $I64) (popcnt x)))
1354+
(rule (lower (has_type (and (mie3_disabled) $I64) (popcnt x)))
13551355
(let ((cnt8 Reg (popcnt_byte x))
13561356
(cnt4 Reg (add_reg $I64 cnt8 (lshl_imm $I64 cnt8 32)))
13571357
(cnt2 Reg (add_reg $I64 cnt4 (lshl_imm $I64 cnt4 16)))
@@ -3054,17 +3054,17 @@
30543054
;; On z15 this can use the NN(G)RK instruction. On z14, perform an And
30553055
;; operation and invert the result. In the little-endian case, we can
30563056
;; simply byte-swap the source operand.
3057-
(rule 4 (atomic_rmw_body ib (and (mie2_enabled) (ty_32_or_64 ty)) (bigendian)
3057+
(rule 4 (atomic_rmw_body ib (and (mie3_enabled) (ty_32_or_64 ty)) (bigendian)
30583058
(AtomicRmwOp.Nand) tmp val src)
30593059
(push_alu_reg ib (aluop_not_and ty) tmp val src))
3060-
(rule 3 (atomic_rmw_body ib (and (mie2_enabled) (ty_32_or_64 ty)) (littleendian)
3060+
(rule 3 (atomic_rmw_body ib (and (mie3_enabled) (ty_32_or_64 ty)) (littleendian)
30613061
(AtomicRmwOp.Nand) tmp val src)
30623062
(push_alu_reg ib (aluop_not_and ty) tmp val (bswap_reg ty src)))
3063-
(rule 2 (atomic_rmw_body ib (and (mie2_disabled) (ty_32_or_64 ty)) (bigendian)
3063+
(rule 2 (atomic_rmw_body ib (and (mie3_disabled) (ty_32_or_64 ty)) (bigendian)
30643064
(AtomicRmwOp.Nand) tmp val src)
30653065
(push_not_reg ib ty tmp
30663066
(push_alu_reg ib (aluop_and ty) tmp val src)))
3067-
(rule 1 (atomic_rmw_body ib (and (mie2_disabled) (ty_32_or_64 ty)) (littleendian)
3067+
(rule 1 (atomic_rmw_body ib (and (mie3_disabled) (ty_32_or_64 ty)) (littleendian)
30683068
(AtomicRmwOp.Nand) tmp val src)
30693069
(push_not_reg ib ty tmp
30703070
(push_alu_reg ib (aluop_and ty) tmp val (bswap_reg ty src))))

cranelift/codegen/src/isa/s390x/lower/isle.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,17 +211,17 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> {
211211
}
212212

213213
#[inline]
214-
fn mie2_enabled(&mut self, _: Type) -> Option<()> {
215-
if self.backend.isa_flags.has_mie2() {
214+
fn mie3_enabled(&mut self, _: Type) -> Option<()> {
215+
if self.backend.isa_flags.has_mie3() {
216216
Some(())
217217
} else {
218218
None
219219
}
220220
}
221221

222222
#[inline]
223-
fn mie2_disabled(&mut self, _: Type) -> Option<()> {
224-
if !self.backend.isa_flags.has_mie2() {
223+
fn mie3_disabled(&mut self, _: Type) -> Option<()> {
224+
if !self.backend.isa_flags.has_mie3() {
225225
Some(())
226226
} else {
227227
None

cranelift/filetests/filetests/isa/s390x/bitops-optimized.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
test compile precise-output
22
set opt_level=speed
3-
target s390x has_mie2
3+
target s390x has_mie3
44

55
function %band_not_i32(i32, i32) -> i32 {
66
block0(v0: i32, v1: i32):

cranelift/filetests/filetests/runtests/atomic-rmw-little.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
test interpret
22
test run
33
target s390x
4-
target s390x has_mie2
4+
target s390x has_mie3
55
target aarch64
66
target aarch64 has_lse
77
target x86_64

cranelift/filetests/filetests/runtests/atomic-rmw-subword-big.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
test interpret
22
test run
33
target s390x
4-
target s390x has_mie2
4+
target s390x has_mie3
55

66
; We can't test that these instructions are right regarding atomicity, but we can
77
; test if they perform their operation correctly

cranelift/filetests/filetests/runtests/atomic-rmw-subword-little.clif

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
test interpret
22
test run
33
target s390x
4-
target s390x has_mie2
4+
target s390x has_mie3
55
target aarch64
66
target aarch64 has_lse
77
target x86_64

0 commit comments

Comments
 (0)