|
985 | 985 | ;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
986 | 986 |
|
987 | 987 | ;; z15 version using a single instruction (NOR). |
988 | | -(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bnot x))) |
| 988 | +(rule 2 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bnot x))) |
989 | 989 | (let ((rx Reg x)) |
990 | 990 | (not_or_reg ty rx rx))) |
991 | 991 |
|
992 | 992 | ;; z14 version using XOR with -1. |
993 | | -(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bnot x))) |
| 993 | +(rule 1 (lower (has_type (and (mie3_disabled) (fits_in_64 ty)) (bnot x))) |
994 | 994 | (not_reg ty x)) |
995 | 995 |
|
996 | 996 | ;; Vector version using vector NOR. |
|
999 | 999 |
|
1000 | 1000 | ;; With z15 (bnot (bxor ...)) can be a single instruction, similar to the |
1001 | 1001 | ;; (bxor _ (bnot _)) lowering. |
1002 | | -(rule 3 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bnot (bxor x y)))) |
| 1002 | +(rule 3 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bnot (bxor x y)))) |
1003 | 1003 | (not_xor_reg ty x y)) |
1004 | 1004 |
|
1005 | 1005 | ;; Combine a not/xor operation of vector types into one. |
|
1038 | 1038 | ;; forms early on. |
1039 | 1039 |
|
1040 | 1040 | ;; z15 version using a single instruction. |
1041 | | -(rule 7 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (band x (bnot y)))) |
| 1041 | +(rule 7 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (band x (bnot y)))) |
1042 | 1042 | (and_not_reg ty x y)) |
1043 | | -(rule 8 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (band (bnot y) x))) |
| 1043 | +(rule 8 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (band (bnot y) x))) |
1044 | 1044 | (and_not_reg ty x y)) |
1045 | 1045 |
|
1046 | 1046 | ;; And-not two vector registers. |
|
1080 | 1080 | ;; forms early on. |
1081 | 1081 |
|
1082 | 1082 | ;; z15 version using a single instruction. |
1083 | | -(rule 7 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bor x (bnot y)))) |
| 1083 | +(rule 7 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bor x (bnot y)))) |
1084 | 1084 | (or_not_reg ty x y)) |
1085 | | -(rule 8 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bor (bnot y) x))) |
| 1085 | +(rule 8 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bor (bnot y) x))) |
1086 | 1086 | (or_not_reg ty x y)) |
1087 | 1087 |
|
1088 | 1088 | ;; Or-not two vector registers. |
|
1119 | 1119 | ;; forms early on. |
1120 | 1120 |
|
1121 | 1121 | ;; z15 version using a single instruction. |
1122 | | -(rule 5 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bxor x (bnot y)))) |
| 1122 | +(rule 5 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bxor x (bnot y)))) |
1123 | 1123 | (not_xor_reg ty x y)) |
1124 | | -(rule 6 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bxor (bnot y) x))) |
| 1124 | +(rule 6 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bxor (bnot y) x))) |
1125 | 1125 | (not_xor_reg ty x y)) |
1126 | 1126 |
|
1127 | 1127 | ;; Xor-not two vector registers. |
|
1134 | 1134 | ;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
1135 | 1135 |
|
1136 | 1136 | ;; z15 version using a NAND instruction. |
1137 | | -(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bitselect x y z))) |
| 1137 | +(rule 2 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bitselect x y z))) |
1138 | 1138 | (let ((rx Reg x) |
1139 | 1139 | (if_true Reg (and_reg ty y rx)) |
1140 | 1140 | (if_false Reg (and_not_reg ty z rx))) |
1141 | 1141 | (or_reg ty if_false if_true))) |
1142 | 1142 |
|
1143 | 1143 | ;; z14 version using XOR with -1. |
1144 | | -(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bitselect x y z))) |
| 1144 | +(rule 1 (lower (has_type (and (mie3_disabled) (fits_in_64 ty)) (bitselect x y z))) |
1145 | 1145 | (let ((rx Reg x) |
1146 | 1146 | (if_true Reg (and_reg ty y rx)) |
1147 | 1147 | (if_false Reg (and_reg ty z (not_reg ty rx)))) |
|
1329 | 1329 |
|
1330 | 1330 | ;; On z15, the POPCNT instruction has a variant to compute a full 64-bit |
1331 | 1331 | ;; population count, which we also use for 16- and 32-bit types. |
1332 | | -(rule -1 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (popcnt x))) |
| 1332 | +(rule -1 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (popcnt x))) |
1333 | 1333 | (popcnt_reg (put_in_reg_zext64 x))) |
1334 | 1334 |
|
1335 | 1335 | ;; On z14, we use the regular POPCNT, which computes the population count |
|
1340 | 1340 | ;; $I16, where we instead accumulate in the low byte and clear high bits |
1341 | 1341 | ;; via an explicit and operation.) |
1342 | 1342 |
|
1343 | | -(rule (lower (has_type (and (mie2_disabled) $I16) (popcnt x))) |
| 1343 | +(rule (lower (has_type (and (mie3_disabled) $I16) (popcnt x))) |
1344 | 1344 | (let ((cnt2 Reg (popcnt_byte x)) |
1345 | 1345 | (cnt1 Reg (add_reg $I32 cnt2 (lshr_imm $I32 cnt2 8)))) |
1346 | 1346 | (and_uimm16shifted $I32 cnt1 (uimm16shifted 255 0)))) |
1347 | 1347 |
|
1348 | | -(rule (lower (has_type (and (mie2_disabled) $I32) (popcnt x))) |
| 1348 | +(rule (lower (has_type (and (mie3_disabled) $I32) (popcnt x))) |
1349 | 1349 | (let ((cnt4 Reg (popcnt_byte x)) |
1350 | 1350 | (cnt2 Reg (add_reg $I32 cnt4 (lshl_imm $I32 cnt4 16))) |
1351 | 1351 | (cnt1 Reg (add_reg $I32 cnt2 (lshl_imm $I32 cnt2 8)))) |
1352 | 1352 | (lshr_imm $I32 cnt1 24))) |
1353 | 1353 |
|
1354 | | -(rule (lower (has_type (and (mie2_disabled) $I64) (popcnt x))) |
| 1354 | +(rule (lower (has_type (and (mie3_disabled) $I64) (popcnt x))) |
1355 | 1355 | (let ((cnt8 Reg (popcnt_byte x)) |
1356 | 1356 | (cnt4 Reg (add_reg $I64 cnt8 (lshl_imm $I64 cnt8 32))) |
1357 | 1357 | (cnt2 Reg (add_reg $I64 cnt4 (lshl_imm $I64 cnt4 16))) |
|
3054 | 3054 | ;; On z15 this can use the NN(G)RK instruction. On z14, perform an And |
3055 | 3055 | ;; operation and invert the result. In the little-endian case, we can |
3056 | 3056 | ;; simply byte-swap the source operand. |
3057 | | -(rule 4 (atomic_rmw_body ib (and (mie2_enabled) (ty_32_or_64 ty)) (bigendian) |
| 3057 | +(rule 4 (atomic_rmw_body ib (and (mie3_enabled) (ty_32_or_64 ty)) (bigendian) |
3058 | 3058 | (AtomicRmwOp.Nand) tmp val src) |
3059 | 3059 | (push_alu_reg ib (aluop_not_and ty) tmp val src)) |
3060 | | -(rule 3 (atomic_rmw_body ib (and (mie2_enabled) (ty_32_or_64 ty)) (littleendian) |
| 3060 | +(rule 3 (atomic_rmw_body ib (and (mie3_enabled) (ty_32_or_64 ty)) (littleendian) |
3061 | 3061 | (AtomicRmwOp.Nand) tmp val src) |
3062 | 3062 | (push_alu_reg ib (aluop_not_and ty) tmp val (bswap_reg ty src))) |
3063 | | -(rule 2 (atomic_rmw_body ib (and (mie2_disabled) (ty_32_or_64 ty)) (bigendian) |
| 3063 | +(rule 2 (atomic_rmw_body ib (and (mie3_disabled) (ty_32_or_64 ty)) (bigendian) |
3064 | 3064 | (AtomicRmwOp.Nand) tmp val src) |
3065 | 3065 | (push_not_reg ib ty tmp |
3066 | 3066 | (push_alu_reg ib (aluop_and ty) tmp val src))) |
3067 | | -(rule 1 (atomic_rmw_body ib (and (mie2_disabled) (ty_32_or_64 ty)) (littleendian) |
| 3067 | +(rule 1 (atomic_rmw_body ib (and (mie3_disabled) (ty_32_or_64 ty)) (littleendian) |
3068 | 3068 | (AtomicRmwOp.Nand) tmp val src) |
3069 | 3069 | (push_not_reg ib ty tmp |
3070 | 3070 | (push_alu_reg ib (aluop_and ty) tmp val (bswap_reg ty src)))) |
|
0 commit comments