Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/.cspell/project-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ lcgr
ldar
ldaxp
ldclrp
ldfadd
ldfmaxnm
ldfminnm
ldiapp
ldrexd
ldsetp
Expand Down Expand Up @@ -182,6 +185,7 @@ versatilepb
virt
vmlinux
vmovdqa
vreg
vtable
vtables
wfxt
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ See the [`atomic128` module's readme](https://github.com/taiki-e/portable-atomic
- When unstable `--cfg portable_atomic_unstable_f128` is also enabled, `AtomicF128` for [unstable `f128`](https://github.com/rust-lang/rust/issues/116909) is also provided.

Note:
- Most of `fetch_*` operations of atomic floats are implemented using CAS loops, which can be slower than equivalent operations of atomic integers. (AArch64 with FEAT_LSFE and GPU targets have atomic instructions for float, [so we plan to use these instructions for them in the future.](https://github.com/taiki-e/portable-atomic/issues/34))
- Atomic float's `fetch_{add,sub,min,max}` are usually implemented using CAS loops, which can be slower than equivalent operations of atomic integers. As an exception, AArch64 with FEAT_LSFE and GPU targets have atomic float instructions and we use them on AArch64 when `lsfe` target feature is available at compile-time. We [plan to use atomic float instructions for GPU targets as well in the future.](https://github.com/taiki-e/portable-atomic/issues/34))
- Unstable cfgs are outside of the normal semver guarantees and minor or patch versions of portable-atomic may make breaking changes to them at any time.

- **`std`**<br>
Expand Down
7 changes: 5 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ fn main() {

if version.minor >= 80 {
println!(
r#"cargo:rustc-check-cfg=cfg(target_feature,values("fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
r#"cargo:rustc-check-cfg=cfg(target_feature,values("lsfe","fast-serialization","load-store-on-cond","distinct-ops","miscellaneous-extensions-3"))"#
);

// Custom cfgs set by build script. Not public API.
Expand All @@ -59,7 +59,7 @@ fn main() {
// TODO: handle multi-line target_feature_fallback
// grep -F 'target_feature_fallback("' build.rs | grep -Ev '^ *//' | sed -E 's/^.*target_feature_fallback\(//; s/",.*$/"/' | LC_ALL=C sort -u | tr '\n' ',' | sed -E 's/,$/\n/'
println!(
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","fast-serialization","load-store-on-cond","lse","lse128","lse2","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","v6","zaamo","zabha","zacas"))"#
r#"cargo:rustc-check-cfg=cfg(portable_atomic_target_feature,values("cmpxchg16b","distinct-ops","fast-serialization","load-store-on-cond","lse","lse128","lse2","lsfe","mclass","miscellaneous-extensions-3","quadword-atomics","rcpc3","v6","zaamo","zabha","zacas"))"#
);
}

Expand Down Expand Up @@ -286,6 +286,9 @@ fn main() {
target_feature_fallback("lse", lse);
}
}
// As of rustc 1.85, target_feature "lsfe" is not available on rustc side:
// https://github.com/rust-lang/rust/blob/1.85.0/compiler/rustc_target/src/target_features.rs
target_feature_fallback("lsfe", false);

// As of Apple M1/M1 Pro, on Apple hardware, CAS-loop-based RMW is much slower than
// LL/SC-loop-based RMW: https://github.com/taiki-e/portable-atomic/pull/89
Expand Down
8 changes: 4 additions & 4 deletions src/imp/atomic128/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -434,10 +434,10 @@ macro_rules! atomic_rmw_inst {
};
($op:ident, $order:ident, write = $write:ident) => {
match $order {
Ordering::Relaxed => $op!("2", ""),
Ordering::Acquire => $op!("a", ""),
Ordering::Release => $op!("6", ""),
Ordering::AcqRel => $op!("e", ""),
Ordering::Relaxed => $op!("2", ""), // ""
Ordering::Acquire => $op!("a", ""), // "a"
Ordering::Release => $op!("6", ""), // "l"
Ordering::AcqRel => $op!("e", ""), // "al"
// In MSVC environments, SeqCst stores/writes needs fences after writes.
// https://reviews.llvm.org/D141748
#[cfg(target_env = "msvc")]
Expand Down
75 changes: 68 additions & 7 deletions src/imp/detect/aarch64_aa64reg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,20 @@ include!("common.rs");
struct AA64Reg {
aa64isar0: u64,
aa64isar1: u64,
#[cfg(test)]
aa64isar3: u64,
aa64mmfr2: u64,
}

#[cold]
fn _detect(info: &mut CpuInfo) {
let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg();
let AA64Reg {
aa64isar0,
aa64isar1,
#[cfg(test)]
aa64isar3,
aa64mmfr2,
} = imp::aa64reg();

// ID_AA64ISAR0_EL1, AArch64 Instruction Set Attribute Register 0
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0
Expand All @@ -75,6 +83,14 @@ fn _detect(info: &mut CpuInfo) {
if extract(aa64isar1, 23, 20) >= 0b0011 {
info.set(CpuInfoFlag::rcpc3);
}
// ID_AA64ISAR3_EL1, AArch64 Instruction Set Attribute Register 3
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64ISAR3-EL1--AArch64-Instruction-Set-Attribute-Register-3
// LSFE, bits [19:16]
// > FEAT_LSFE implements the functionality identified by the value 0b0001
#[cfg(test)]
if extract(aa64isar3, 19, 16) >= 0b0001 {
info.set(CpuInfoFlag::lsfe);
}
// ID_AA64MMFR2_EL1, AArch64 Memory Model Feature Register 2
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64MMFR2-EL1--AArch64-Memory-Model-Feature-Register-2
// AT, bits [35:32]
Expand Down Expand Up @@ -115,13 +131,35 @@ mod imp {
out(reg) aa64isar1,
options(pure, nomem, nostack, preserves_flags),
);
#[cfg(test)]
#[cfg(not(portable_atomic_pre_llvm_18))]
let aa64isar3: u64;
// ID_AA64ISAR3_EL1 is only recognized on LLVM 18+.
// https://github.com/llvm/llvm-project/commit/17baba9fa2728b1b1134f9dccb9318debd5a9a1b
#[cfg(test)]
#[cfg(not(portable_atomic_pre_llvm_18))]
asm!(
"mrs {0}, ID_AA64ISAR3_EL1",
out(reg) aa64isar3,
options(pure, nomem, nostack, preserves_flags),
);
let aa64mmfr2: u64;
asm!(
"mrs {0}, ID_AA64MMFR2_EL1",
out(reg) aa64mmfr2,
options(pure, nomem, nostack, preserves_flags),
);
AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 }
AA64Reg {
aa64isar0,
aa64isar1,
#[cfg(test)]
#[cfg(not(portable_atomic_pre_llvm_18))]
aa64isar3,
#[cfg(test)]
#[cfg(portable_atomic_pre_llvm_18)]
aa64isar3: 0,
aa64mmfr2,
}
}
}
}
Expand Down Expand Up @@ -213,6 +251,8 @@ mod imp {
Some(AA64Reg {
aa64isar0: buf.ac_aa64isar0,
aa64isar1: buf.ac_aa64isar1,
#[cfg(test)]
aa64isar3: 0,
aa64mmfr2: buf.ac_aa64mmfr2,
})
}
Expand All @@ -226,7 +266,13 @@ mod imp {
// https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f
match sysctl_cpu_id(c!("machdep.cpu0.cpu_id")) {
Some(cpu_id) => cpu_id,
None => AA64Reg { aa64isar0: 0, aa64isar1: 0, aa64mmfr2: 0 },
None => AA64Reg {
aa64isar0: 0,
aa64isar1: 0,
#[cfg(test)]
aa64isar3: 0,
aa64mmfr2: 0,
},
}
}
}
Expand Down Expand Up @@ -285,7 +331,13 @@ mod imp {
let aa64isar0 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR0]).unwrap_or(0);
let aa64isar1 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR1]).unwrap_or(0);
let aa64mmfr2 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64MMFR2]).unwrap_or(0);
AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 }
AA64Reg {
aa64isar0,
aa64isar1,
#[cfg(test)]
aa64isar3: 0,
aa64mmfr2,
}
}

fn sysctl64(mib: &[ffi::c_int]) -> Option<u64> {
Expand Down Expand Up @@ -330,11 +382,12 @@ mod tests {
#[test]
#[cfg_attr(portable_atomic_test_detect_false, ignore)]
fn test_aa64reg() {
let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg();
let AA64Reg { aa64isar0, aa64isar1, aa64isar3, aa64mmfr2 } = imp::aa64reg();
test_helper::eprintln_nocapture!(
"aa64isar0={},aa64isar1={},aa64mmfr2={}",
"aa64isar0={},aa64isar1={},aa64isar3={},aa64mmfr2={}",
aa64isar0,
aa64isar1,
aa64isar3,
aa64mmfr2,
);
let atomic = extract(aa64isar0, 23, 20);
Expand All @@ -353,6 +406,12 @@ mod tests {
} else {
assert!(lrcpc < 0b0011, "{}", lrcpc);
}
let lsfe = extract(aa64isar3, 19, 16);
if detect().lsfe() {
assert_eq!(lsfe, 0b0001);
} else {
assert_eq!(lsfe, 0b0000);
}
let at = extract(aa64mmfr2, 35, 32);
if detect().lse2() {
assert_eq!(at, 0b0001);
Expand Down Expand Up @@ -484,6 +543,7 @@ mod tests {
Ok(AA64Reg {
aa64isar0: buf.ac_aa64isar0,
aa64isar1: buf.ac_aa64isar1,
aa64isar3: 0,
aa64mmfr2: buf.ac_aa64mmfr2,
})
}
Expand Down Expand Up @@ -520,10 +580,11 @@ mod tests {
}
}

let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg();
let AA64Reg { aa64isar0, aa64isar1, aa64isar3, aa64mmfr2 } = imp::aa64reg();
let sysctl_output = SysctlMachdepOutput::new();
assert_eq!(aa64isar0, sysctl_output.field("machdep.id_aa64isar0").unwrap_or(0));
assert_eq!(aa64isar1, sysctl_output.field("machdep.id_aa64isar1").unwrap_or(0));
assert_eq!(aa64isar3, sysctl_output.field("machdep.id_aa64isar3").unwrap_or(0));
assert_eq!(aa64mmfr2, sysctl_output.field("machdep.id_aa64mmfr2").unwrap_or(0));
}
}
3 changes: 3 additions & 0 deletions src/imp/detect/aarch64_apple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ fn _detect(info: &mut CpuInfo) {
check!(lse, "hw.optional.arm.FEAT_LSE" || "hw.optional.armv8_1_atomics");
check!(lse2, "hw.optional.arm.FEAT_LSE2");
check!(lse128, "hw.optional.arm.FEAT_LSE128");
#[cfg(test)]
check!(lsfe, "hw.optional.arm.FEAT_LSFE");
check!(rcpc3, "hw.optional.arm.FEAT_LRCPC3");
}

Expand Down Expand Up @@ -257,6 +259,7 @@ mod tests {
(c!("hw.optional.armv8_1_atomics"), Some(1)),
(c!("hw.optional.arm.FEAT_LSE2"), Some(1)),
(c!("hw.optional.arm.FEAT_LSE128"), None),
(c!("hw.optional.arm.FEAT_LSFE"), None),
(c!("hw.optional.arm.FEAT_LRCPC"), Some(1)),
(c!("hw.optional.arm.FEAT_LRCPC2"), Some(1)),
(c!("hw.optional.arm.FEAT_LRCPC3"), None),
Expand Down
7 changes: 7 additions & 0 deletions src/imp/detect/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,13 @@ flags! {
// > If FEAT_LSE128 is implemented, then FEAT_LSE is implemented.
#[cfg_attr(not(test), allow(dead_code))]
lse128("lse128", any(target_feature /* nightly */, portable_atomic_target_feature)),
// FEAT_LSFE, Large System Float Extension
// https://developer.arm.com/documentation/109697/2024_12/Feature-descriptions/The-Armv9-6-architecture-extension
// > This feature is supported in AArch64 state only.
// > FEAT_LSFE is OPTIONAL from Armv9.3.
// > If FEAT_LSFE is implemented, then FEAT_FP is implemented.
#[cfg(test)]
lsfe("lsfe", any(target_feature /* N/A */, portable_atomic_target_feature)),
}

// LLVM definitions: https://github.com/llvm/llvm-project/blob/llvmorg-20.1.0/llvm/lib/Target/PowerPC/PPC.td
Expand Down
Loading
Loading