Skip to content

Commit 84fc09a

Browse files
committed
perf: optimized msb
1 parent 8a1a651 commit 84fc09a

File tree

5 files changed

+178
-175
lines changed

5 files changed

+178
-175
lines changed

sqrt-gas-usage.log

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,54 @@
11
No files changed, compilation skipped
22
Script ran successfully.
3-
Gas used: 392949170
3+
Gas used: 383299292
44

55
== Logs ==
66
===== all 8-bit numbers =====
7-
total gas used = 104192 / 256
8-
minimum = 407, 407, ...
9-
quartiles = ..., 407.0, ..., 407.0, ..., 407.0, ...
10-
average = 407.0 ± 0.0
11-
maximum = ..., 407, 407
7+
total gas used = 98816 / 256
8+
minimum = 386, 386, ...
9+
quartiles = ..., 386.0, ..., 386.0, ..., 386.0, ...
10+
average = 386.0 ± 0.0
11+
maximum = ..., 386, 386
1212

1313
===== all 16-bit numbers =====
14-
total gas used = 26673152 / 65536
15-
minimum = 407, 407, ...
16-
quartiles = ..., 407.0, ..., 407.0, ..., 407.0, ...
17-
average = 407.0 ± 0.0
18-
maximum = ..., 407, 407
14+
total gas used = 25296896 / 65536
15+
minimum = 386, 386, ...
16+
quartiles = ..., 386.0, ..., 386.0, ..., 386.0, ...
17+
average = 386.0 ± 0.0
18+
maximum = ..., 386, 386
1919

2020
===== all powers of two =====
21-
total gas used = 104192 / 256
22-
minimum = 407, 407, ...
23-
quartiles = ..., 407.0, ..., 407.0, ..., 407.0, ...
24-
average = 407.0 ± 0.0
25-
maximum = ..., 407, 407
21+
total gas used = 98816 / 256
22+
minimum = 386, 386, ...
23+
quartiles = ..., 386.0, ..., 386.0, ..., 386.0, ...
24+
average = 386.0 ± 0.0
25+
maximum = ..., 386, 386
2626

2727
===== almost powers of two =====
28-
total gas used = 104192 / 256
29-
minimum = 407, 407, ...
30-
quartiles = ..., 407.0, ..., 407.0, ..., 407.0, ...
31-
average = 407.0 ± 0.0
32-
maximum = ..., 407, 407
28+
total gas used = 98816 / 256
29+
minimum = 386, 386, ...
30+
quartiles = ..., 386.0, ..., 386.0, ..., 386.0, ...
31+
average = 386.0 ± 0.0
32+
maximum = ..., 386, 386
3333

3434
===== randomized values =====
35-
total gas used = 106692608 / 262144
36-
minimum = 407, 407, ...
37-
quartiles = ..., 407.0, ..., 407.0, ..., 407.0, ...
38-
average = 407.0 ± 0.0
39-
maximum = ..., 407, 407
35+
total gas used = 101187584 / 262144
36+
minimum = 386, 386, ...
37+
quartiles = ..., 386.0, ..., 386.0, ..., 386.0, ...
38+
average = 386.0 ± 0.0
39+
maximum = ..., 386, 386
4040

4141
===== uint16 * 1e18 =====
42-
total gas used = 26672745 / 65535
43-
minimum = 407, 407, ...
44-
quartiles = ..., 407.0, ..., 407.0, ..., 407.0, ...
45-
average = 407.0 ± 0.0
46-
maximum = ..., 407, 407
42+
total gas used = 25296510 / 65535
43+
minimum = 386, 386, ...
44+
quartiles = ..., 386.0, ..., 386.0, ..., 386.0, ...
45+
average = 386.0 ± 0.0
46+
maximum = ..., 386, 386
4747

4848
===== uint16 * 1e18 * 1e18 =====
49-
total gas used = 26672745 / 65535
50-
minimum = 407, 407, ...
51-
quartiles = ..., 407.0, ..., 407.0, ..., 407.0, ...
52-
average = 407.0 ± 0.0
53-
maximum = ..., 407, 407
49+
total gas used = 25296510 / 65535
50+
minimum = 386, 386, ...
51+
quartiles = ..., 386.0, ..., 386.0, ..., 386.0, ...
52+
average = 386.0 ± 0.0
53+
maximum = ..., 386, 386
5454

src/Common.sol

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -589,10 +589,25 @@ function sqrt(uint256 x) pure returns (uint256 result) {
589589
// $$
590590
//
591591
// Consequently, $2^{log_2(x) /2} is a good first approximation of sqrt(x) with at least one correct bit.
592-
unchecked {
593-
// ideally, we should use arithmetic operators, but solc is not smart enough to optimize `2**(msb(x)/2)`
594-
/// forge-lint: disable-next-line(incorrect-shift)
595-
result = 1 << (msb(x) >> 1);
592+
assembly ("memory-safe") {
593+
// 2^128
594+
result := shl(7, lt(0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF, x))
595+
// 2^64
596+
result := or(result, shl(6, lt(0xFFFFFFFFFFFFFFFF, shr(result, x))))
597+
// 2^32
598+
result := or(result, shl(5, lt(0xFFFFFFFF, shr(result, x))))
599+
// 2^16
600+
result := or(result, shl(4, lt(0xFFFF, shr(result, x))))
601+
// 2^8
602+
result := or(result, shl(3, lt(0xFF, shr(result, x))))
603+
// 2^4
604+
result := or(result, shl(2, lt(0xf, shr(result, x))))
605+
// 2^2
606+
result := or(result, shl(1, lt(0x3, shr(result, x))))
607+
// 2^1
608+
result := or(result, lt(0x1, shr(result, x)))
609+
610+
result := shl(shr(1, result), 1)
596611
}
597612

598613
// At this point, `result` is an estimation with at least one bit of precision. We know the true value has at

z-gas-snapshot.log

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ Frac_Unit_Test:test_Frac() (gas: 481713)
123123
Frac_Unit_Test:test_Frac_Negative() (gas: 481714)
124124
Frac_Unit_Test:test_Frac_Zero() (gas: 3527)
125125
Frac_Unit_Test:test_Frac_Zero() (gas: 3645)
126-
Gm_Unit_Test:test_Gm() (gas: 1409807)
127-
Gm_Unit_Test:test_Gm() (gas: 785107)
126+
Gm_Unit_Test:test_Gm() (gas: 1409351)
127+
Gm_Unit_Test:test_Gm() (gas: 784897)
128128
Gm_Unit_Test:test_Gm_OneOperandZero() (gas: 109685)
129129
Gm_Unit_Test:test_Gm_OneOperandZero() (gas: 109708)
130130
Gm_Unit_Test:test_RevertWhen_ProductNegative_A() (gas: 3881)
@@ -254,10 +254,10 @@ SD59x18_Helpers_Fuzz_Test:testFuzz_Xor(int256,int256) (runs: 256, μ: 4580, ~: 4
254254
Sqrt_Unit_Test:test_RevertWhen_GtMaxPermitted() (gas: 3725)
255255
Sqrt_Unit_Test:test_RevertWhen_GtMaxPermitted() (gas: 3929)
256256
Sqrt_Unit_Test:test_RevertWhen_Negative() (gas: 3611)
257-
Sqrt_Unit_Test:test_Sqrt() (gas: 811902)
258-
Sqrt_Unit_Test:test_Sqrt() (gas: 813634)
259-
Sqrt_Unit_Test:test_Sqrt_Zero() (gas: 4020)
260-
Sqrt_Unit_Test:test_Sqrt_Zero() (gas: 4138)
257+
Sqrt_Unit_Test:test_Sqrt() (gas: 811722)
258+
Sqrt_Unit_Test:test_Sqrt() (gas: 813349)
259+
Sqrt_Unit_Test:test_Sqrt_Zero() (gas: 4008)
260+
Sqrt_Unit_Test:test_Sqrt_Zero() (gas: 4119)
261261
UD21x18_Casting_Fuzz_Test:testFuzz_IntoSD59x18(uint128) (runs: 256, μ: 3703, ~: 3703)
262262
UD21x18_Casting_Fuzz_Test:testFuzz_IntoUD60x18(uint128) (runs: 256, μ: 3659, ~: 3659)
263263
UD21x18_Casting_Fuzz_Test:testFuzz_IntoUint256(uint128) (runs: 256, μ: 3621, ~: 3621)

zasm/Msb.asm

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* "src/Common.sol":24492:24606 contract Msb {... */
1+
/* "src/Common.sol":24985:25099 contract Msb {... */
22
mstore(0x40, 0x80)
33
callvalue
44
dup1
@@ -18,7 +18,7 @@ tag_1:
1818
stop
1919

2020
sub_0: assembly {
21-
/* "src/Common.sol":24492:24606 contract Msb {... */
21+
/* "src/Common.sol":24985:25099 contract Msb {... */
2222
mstore(0x40, 0x80)
2323
callvalue
2424
dup1
@@ -37,7 +37,7 @@ sub_0: assembly {
3737
jumpi
3838
tag_2:
3939
revert(0x00, 0x00)
40-
/* "src/Common.sol":24511:24604 function run(uint256 x) external pure returns (uint256 result) {... */
40+
/* "src/Common.sol":25004:25097 function run(uint256 x) external pure returns (uint256 result) {... */
4141
tag_3:
4242
tag_4
4343
tag_5
@@ -151,7 +151,7 @@ sub_0: assembly {
151151
/* "src/Common.sol":13378:13413 or(result, lt(0x1, shr(result, x))) */
152152
or
153153
swap1
154-
/* "src/Common.sol":24511:24604 function run(uint256 x) external pure returns (uint256 result) {... */
154+
/* "src/Common.sol":25004:25097 function run(uint256 x) external pure returns (uint256 result) {... */
155155
jump
156156
tag_4:
157157
mload(0x40)
@@ -163,7 +163,7 @@ sub_0: assembly {
163163
0x20
164164
/* "#utility.yul":318:336 */
165165
add
166-
/* "src/Common.sol":24511:24604 function run(uint256 x) external pure returns (uint256 result) {... */
166+
/* "src/Common.sol":25004:25097 function run(uint256 x) external pure returns (uint256 result) {... */
167167
mload(0x40)
168168
dup1
169169
swap2
@@ -205,5 +205,5 @@ sub_0: assembly {
205205
pop
206206
jump // out
207207

208-
auxdata: 0xa2646970667358221220d9458d50c811b782a3198de871d62d3a270b5eec3b6beb7e8056825f739e407664736f6c634300081e0033
208+
auxdata: 0xa26469706673582212207cd130062af9b1632f028dfccf1d1c9e3f15c8035fd2385b832f365e4e950dd264736f6c634300081e0033
209209
}

0 commit comments

Comments
 (0)