Skip to content

Commit 7d8b329

Browse files
Move creating gpu blobs to m1n1.
Instead of doing gpu initialization partially in m1n1 and partially in kernel, build all the blobs in m1n1 and put them in the device tree to be picked up by the kernel. Most of the code here is copied and pasted kernel code, with some fixups Signed-off-by: Asahi Lina <[email protected]> Co-authored-by: Sasha Finkelstein <[email protected]> Signed-off-by: Sasha Finkelstein <[email protected]>
1 parent 7ff0705 commit 7d8b329

21 files changed

Lines changed: 3832 additions & 9 deletions

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,5 @@ __pycache__
1212
/m1n1-*\.src\.rpm
1313
/m1n1-*\.tar\.gz
1414
/rust-fatfs-*\.tar\.gz
15+
target
16+
.idea

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ FP_OBJECTS := \
171171

172172
BUILD_OBJS := $(patsubst %,build/%,$(OBJECTS))
173173
BUILD_FP_OBJS := $(patsubst %,build/%,$(FP_OBJECTS))
174-
BUILD_ALL_OBJS := $(BUILD_OBJS) $(BUILD_FP_OBJS)
174+
BUILD_ALL_OBJS := $(BUILD_FP_OBJS) $(BUILD_OBJS)
175175
NAME := m1n1
176176
TARGET := m1n1.macho
177177
TARGET_RAW := m1n1.bin
@@ -191,7 +191,7 @@ rustfmt:
191191
rustfmt-check:
192192
cd rust && cargo fmt --check
193193

194-
build/$(RUST_LIB): rust/src/* rust/*
194+
build/$(RUST_LIB): rust/src/* rust/* rust/src/gpu/* rust/src/gpu/hw/*
195195
$(QUIET)echo " RS $@"
196196
$(QUIET)mkdir -p $(DEPDIR)
197197
$(QUIET)mkdir -p "$(dir $@)"

rust/Cargo.lock

Lines changed: 6 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ crate-type = [ "staticlib" ]
1313
[dependencies]
1414
fatfs = { path = "vendor/rust-fatfs", default-features = false, features = ["lfn", "alloc"] }
1515
uuid = { version = "1.7.0", default-features = false }
16+
versions = { path = "./versions" }
1617

1718
[patch.crates-io]
1819
uuid = { path = "vendor/uuid" }

rust/src/float.rs

Lines changed: 345 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,345 @@
1+
// SPDX-License-Identifier: GPL-2.0-only OR MIT
2+
3+
//! Basic soft floating-point support
4+
//!
5+
//! The GPU firmware requires a large number of power-related configuration values, many of which
6+
//! are IEEE 754 32-bit floating point values. These values change not only between GPU/SoC
7+
//! variants, but also between specific hardware platforms using these SoCs, so they must be
8+
//! derived from device tree properties. There are many redundant values computed from the same
9+
//! inputs with simple add/sub/mul/div calculations, plus a few values that are actually specific
10+
//! to each individual device depending on its binning and fused voltage configuration, so it
11+
//! doesn't make sense to store the final values to be passed to the firmware in the device tree.
12+
//!
13+
//! Therefore, we need a way to perform floating-point calculations in the kernel.
14+
//!
15+
//! Using the actual FPU from kernel mode is asking for trouble, since there is no way to bound
16+
//! the execution of FPU instructions to a controlled section of code without outright putting it
17+
//! in its own compilation unit, which is quite painful for Rust. Since these calculations only
18+
//! have to happen at initialization time and there is no need for performance, let's use a simple
19+
//! software float implementation instead.
20+
//!
21+
//! This implementation makes no attempt to be fully IEEE754 compliant, but it's good enough and
22+
//! gives bit-identical results to macOS in the vast majority of cases, with one or two exceptions
23+
//! related to slightly non-compliant rounding.
24+
25+
use core::ops;
26+
27+
/// An IEEE754-compatible floating point number implemented in software.
28+
#[derive(Default, Debug, Copy, Clone)]
29+
#[repr(transparent)]
30+
pub struct F32(u32);
31+
32+
#[derive(Default, Debug, Copy, Clone)]
33+
struct F32U {
34+
sign: bool,
35+
exp: i32,
36+
frac: i64,
37+
}
38+
39+
impl F32 {
40+
#[allow(dead_code)]
41+
/// Convert a raw 32-bit representation into an F32
42+
pub(crate) const fn from_bits(u: u32) -> F32 {
43+
F32(u)
44+
}
45+
46+
// Convert a `f32` value into an F32
47+
//
48+
// This must ONLY be used in const context. Use the `f32!{}` macro to do it safely.
49+
#[doc(hidden)]
50+
pub(crate) const fn from_f32(v: f32) -> F32 {
51+
// Replace with to_bits() after kernel Rust minreq is >= 1.83.0
52+
#[allow(clippy::transmute_float_to_int)]
53+
// SAFETY: Transmuting f32 to u32 is always safe
54+
F32(unsafe { core::mem::transmute::<f32, u32>(v) })
55+
}
56+
57+
// Convert an F32 into a `f32` value
58+
//
59+
// For testing only.
60+
#[doc(hidden)]
61+
#[cfg(test)]
62+
pub(crate) fn to_f32(self) -> f32 {
63+
f32::from_bits(self.0)
64+
}
65+
66+
const fn unpack(&self) -> F32U {
67+
F32U {
68+
sign: self.0 & (1 << 31) != 0,
69+
exp: ((self.0 >> 23) & 0xff) as i32 - 127,
70+
frac: (((self.0 & 0x7fffff) | 0x800000) as i64) << 9,
71+
}
72+
.norm()
73+
}
74+
}
75+
76+
/// Safely construct an `F32` out of a constant floating-point value.
77+
///
78+
/// This ensures that the conversion happens in const context, so no floating point operations are
79+
/// emitted.
80+
#[macro_export]
81+
macro_rules! f32 {
82+
([$($val:expr),*]) => {{
83+
[$(f32!($val)),*]
84+
}};
85+
($val:expr) => {{
86+
const _K: $crate::float::F32 = $crate::float::F32::from_f32($val);
87+
_K
88+
}};
89+
}
90+
91+
impl ops::Neg for F32 {
92+
type Output = F32;
93+
94+
fn neg(self) -> F32 {
95+
F32(self.0 ^ (1 << 31))
96+
}
97+
}
98+
99+
impl ops::Add<F32> for F32 {
100+
type Output = F32;
101+
102+
fn add(self, rhs: F32) -> F32 {
103+
self.unpack().add(rhs.unpack()).pack()
104+
}
105+
}
106+
107+
impl ops::Sub<F32> for F32 {
108+
type Output = F32;
109+
110+
fn sub(self, rhs: F32) -> F32 {
111+
self.unpack().add((-rhs).unpack()).pack()
112+
}
113+
}
114+
115+
impl ops::Mul<F32> for F32 {
116+
type Output = F32;
117+
118+
fn mul(self, rhs: F32) -> F32 {
119+
self.unpack().mul(rhs.unpack()).pack()
120+
}
121+
}
122+
123+
impl ops::Div<F32> for F32 {
124+
type Output = F32;
125+
126+
fn div(self, rhs: F32) -> F32 {
127+
self.unpack().div(rhs.unpack()).pack()
128+
}
129+
}
130+
131+
macro_rules! from_ints {
132+
($u:ty, $i:ty) => {
133+
impl From<$i> for F32 {
134+
fn from(v: $i) -> F32 {
135+
F32U::from_i64(v as i64).pack()
136+
}
137+
}
138+
impl From<$u> for F32 {
139+
fn from(v: $u) -> F32 {
140+
F32U::from_u64(v as u64).pack()
141+
}
142+
}
143+
};
144+
}
145+
146+
from_ints!(u8, i8);
147+
from_ints!(u16, i16);
148+
from_ints!(u32, i32);
149+
from_ints!(u64, i64);
150+
151+
impl F32U {
152+
const INFINITY: F32U = f32!(f32::INFINITY).unpack();
153+
const NEG_INFINITY: F32U = f32!(f32::NEG_INFINITY).unpack();
154+
155+
fn from_i64(v: i64) -> F32U {
156+
F32U {
157+
sign: v < 0,
158+
exp: 32,
159+
frac: v.abs(),
160+
}
161+
.norm()
162+
}
163+
164+
fn from_u64(mut v: u64) -> F32U {
165+
let mut exp = 32;
166+
if v >= (1 << 63) {
167+
exp = 31;
168+
v >>= 1;
169+
}
170+
F32U {
171+
sign: false,
172+
exp,
173+
frac: v as i64,
174+
}
175+
.norm()
176+
}
177+
178+
fn shr(&mut self, shift: i32) {
179+
if shift > 63 {
180+
self.exp = 0;
181+
self.frac = 0;
182+
} else {
183+
self.frac >>= shift;
184+
}
185+
}
186+
187+
fn align(a: &mut F32U, b: &mut F32U) {
188+
if a.exp > b.exp {
189+
b.shr(a.exp - b.exp);
190+
b.exp = a.exp;
191+
} else {
192+
a.shr(b.exp - a.exp);
193+
a.exp = b.exp;
194+
}
195+
}
196+
197+
fn mul(self, other: F32U) -> F32U {
198+
F32U {
199+
sign: self.sign != other.sign,
200+
exp: self.exp + other.exp,
201+
frac: ((self.frac >> 8) * (other.frac >> 8)) >> 16,
202+
}
203+
}
204+
205+
fn div(self, other: F32U) -> F32U {
206+
if other.frac == 0 || self.is_inf() {
207+
if self.sign {
208+
F32U::NEG_INFINITY
209+
} else {
210+
F32U::INFINITY
211+
}
212+
} else {
213+
F32U {
214+
sign: self.sign != other.sign,
215+
exp: self.exp - other.exp,
216+
frac: ((self.frac << 24) / (other.frac >> 8)),
217+
}
218+
}
219+
}
220+
221+
fn add(mut self, mut other: F32U) -> F32U {
222+
F32U::align(&mut self, &mut other);
223+
if self.sign == other.sign {
224+
self.frac += other.frac;
225+
} else {
226+
self.frac -= other.frac;
227+
}
228+
if self.frac < 0 {
229+
self.sign = !self.sign;
230+
self.frac = -self.frac;
231+
}
232+
self
233+
}
234+
235+
const fn norm(mut self) -> F32U {
236+
let lz = self.frac.leading_zeros() as i32;
237+
if lz > 31 {
238+
self.frac <<= lz - 31;
239+
self.exp -= lz - 31;
240+
} else if lz < 31 {
241+
self.frac >>= 31 - lz;
242+
self.exp += 31 - lz;
243+
}
244+
245+
if self.is_zero() {
246+
return F32U {
247+
sign: self.sign,
248+
frac: 0,
249+
exp: 0,
250+
};
251+
}
252+
self
253+
}
254+
255+
const fn is_zero(&self) -> bool {
256+
self.frac == 0 || self.exp < -126
257+
}
258+
259+
const fn is_inf(&self) -> bool {
260+
self.exp > 127
261+
}
262+
263+
const fn pack(mut self) -> F32 {
264+
self = self.norm();
265+
if !self.is_zero() {
266+
self.frac += 0x100;
267+
self = self.norm();
268+
}
269+
270+
if self.is_inf() {
271+
if self.sign {
272+
return f32!(f32::NEG_INFINITY);
273+
} else {
274+
return f32!(f32::INFINITY);
275+
}
276+
} else if self.is_zero() {
277+
if self.sign {
278+
return f32!(-0.0);
279+
} else {
280+
return f32!(0.0);
281+
}
282+
}
283+
284+
F32(if self.sign { 1u32 << 31 } else { 0u32 }
285+
| ((self.exp + 127) as u32) << 23
286+
| ((self.frac >> 9) & 0x7fffff) as u32)
287+
}
288+
}
289+
290+
// TODO: Fix failing cases
291+
#[cfg(test)]
292+
mod tests {
293+
use super::*;
294+
#[test]
295+
fn test_all() {
296+
fn add(a: f32, b: f32) {
297+
assert_eq!((F32::from_f32(a) + F32::from_f32(b)).to_f32(), a + b);
298+
}
299+
fn sub(a: f32, b: f32) {
300+
assert_eq!((F32::from_f32(a) - F32::from_f32(b)).to_f32(), a - b);
301+
}
302+
fn mul(a: f32, b: f32) {
303+
assert_eq!((F32::from_f32(a) * F32::from_f32(b)).to_f32(), a * b);
304+
}
305+
fn div(a: f32, b: f32) {
306+
assert_eq!((F32::from_f32(a) / F32::from_f32(b)).to_f32(), a / b);
307+
}
308+
309+
fn test(a: f32, b: f32) {
310+
add(a, b);
311+
sub(a, b);
312+
mul(a, b);
313+
div(a, b);
314+
}
315+
316+
test(1.123, 7.567);
317+
test(1.123, 1.456);
318+
test(7.567, 1.123);
319+
test(1.123, -7.567);
320+
test(1.123, -1.456);
321+
test(7.567, -1.123);
322+
test(-1.123, -7.567);
323+
test(-1.123, -1.456);
324+
test(-7.567, -1.123);
325+
test(1000.123, 0.001);
326+
test(1000.123, 0.0000001);
327+
test(0.0012, 1000.123);
328+
test(0.0000001, 1000.123);
329+
//test(0., 0.);
330+
test(0., 1.);
331+
test(1., 0.);
332+
test(1., 1.);
333+
test(2., f32::INFINITY);
334+
test(2., f32::NEG_INFINITY);
335+
test(f32::INFINITY, 2.);
336+
test(f32::NEG_INFINITY, 2.);
337+
test(f32::NEG_INFINITY, 2.);
338+
test(f32::MAX, 2.);
339+
test(f32::MIN, 2.);
340+
//test(f32::MIN_POSITIVE, 2.);
341+
//test(2., f32::MAX);
342+
//test(2., f32::MIN);
343+
test(2., f32::MIN_POSITIVE);
344+
}
345+
}

0 commit comments

Comments
 (0)