Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
b2eedbe
Strings: add toUint, toInt and hexToUint
Amxx Aug 28, 2024
efd2f30
codespell
Amxx Aug 28, 2024
bc42b25
Update contracts/utils/Strings.sol
Amxx Aug 29, 2024
07f4b44
Update .changeset/eighty-hounds-promise.md
Amxx Sep 2, 2024
40ba631
Update contracts/utils/Strings.sol
Amxx Sep 3, 2024
07ec518
Update Strings.sol
Amxx Sep 3, 2024
95fb0db
Apply suggestions from code review
Amxx Sep 3, 2024
f263819
Update contracts/utils/Strings.sol
Amxx Sep 3, 2024
f51fbe6
Update Strings.sol
Amxx Sep 3, 2024
52a301b
Fix value variable
cairoeth Sep 3, 2024
027859e
make return explicit
Amxx Sep 4, 2024
a91a999
branchless
Amxx Sep 4, 2024
86abf5a
Update contracts/utils/Strings.sol
Amxx Sep 5, 2024
6dca3cb
Update contracts/utils/Strings.sol
Amxx Sep 5, 2024
a7a6e9e
add try variants + use for governor proposal parsing
Amxx Sep 9, 2024
ec9a659
parseAddress
Amxx Sep 11, 2024
568dc7b
use string literal for 0x
Amxx Sep 17, 2024
0292c31
Apply suggestions from code review
Amxx Sep 17, 2024
aea4a14
add support for + prefix in parseInt
Amxx Sep 17, 2024
cf78a9f
Remove invalid "memory-safe" annotation.
Amxx Sep 17, 2024
26cec97
Merge branch 'master' into feature/parse-strings
Amxx Sep 18, 2024
3a7f904
Merge branch 'master' into feature/parse-strings
Amxx Oct 11, 2024
4d18729
Add Bytes.sol
Amxx Oct 11, 2024
c7a7c94
codespell
Amxx Oct 11, 2024
d6319e8
cleanup
Amxx Oct 11, 2024
b3bf461
Update .changeset/eighty-hounds-promise.md
Amxx Oct 11, 2024
2ab63b7
Update .changeset/rude-cougars-look.md
Amxx Oct 11, 2024
231b93b
optimization
Amxx Oct 11, 2024
24f1490
optimization
Amxx Oct 11, 2024
43f0dc1
testing
Amxx Oct 11, 2024
7b7c1fd
comment update
Amxx Oct 11, 2024
2abfa49
Update contracts/utils/Strings.sol
Amxx Oct 11, 2024
f433e6d
making unsafeReadBytesOffset private
Amxx Oct 11, 2024
27c7c0d
optimize
Amxx Oct 11, 2024
75e1e4c
Update contracts/utils/README.adoc
Amxx Oct 11, 2024
4f48757
Update contracts/governance/Governor.sol
Amxx Oct 11, 2024
1ec1e3f
rename parseHex to parseHexUint
Amxx Oct 11, 2024
53d72d7
fix tests
Amxx Oct 11, 2024
c5790f8
optimize
Amxx Oct 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/eighty-hounds-promise.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'openzeppelin-solidity': minor
---

`Strings`: Add `parseUint`, `parseInt`, `parseHex` and `parseAddress` to parse strings into numbers and addresses. Also provide variant of these function that parse substrings, and `tryXxx` variants that do not revert on invalid input.
61 changes: 9 additions & 52 deletions contracts/governance/Governor.sol
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {DoubleEndedQueue} from "../utils/structs/DoubleEndedQueue.sol";
import {Address} from "../utils/Address.sol";
import {Context} from "../utils/Context.sol";
import {Nonces} from "../utils/Nonces.sol";
import {Strings} from "../utils/Strings.sol";
import {IGovernor, IERC6372} from "./IGovernor.sol";

/**
Expand Down Expand Up @@ -760,68 +761,24 @@ abstract contract Governor is Context, ERC165, EIP712, Nonces, IGovernor, IERC72
address proposer,
string memory description
) internal view virtual returns (bool) {
uint256 len = bytes(description).length;
uint256 length = bytes(description).length;

// Length is too short to contain a valid proposer suffix
if (len < 52) {
if (length < 52) {
return true;
}

// Extract what would be the `#proposer=0x` marker beginning the suffix
bytes12 marker;
assembly {
// - Start of the string contents in memory = description + 32
// - First character of the marker = len - 52
// - Length of "#proposer=0x0000000000000000000000000000000000000000" = 52
// - We read the memory word starting at the first character of the marker:
// - (description + 32) + (len - 52) = description + (len - 20)
// - Note: Solidity will ignore anything past the first 12 bytes
marker := mload(add(description, sub(len, 20)))
}
// Extract what would be the `#proposer=` marker beginning the suffix
bytes10 marker = bytes10(Strings.unsafeReadBytesOffset(bytes(description), length - 52));

// If the marker is not found, there is no proposer suffix to check
if (marker != bytes12("#proposer=0x")) {
if (marker != bytes10("#proposer=")) {
return true;
}

// Parse the 40 characters following the marker as uint160
uint160 recovered = 0;
for (uint256 i = len - 40; i < len; ++i) {
(bool isHex, uint8 value) = _tryHexToUint(bytes(description)[i]);
// If any of the characters is not a hex digit, ignore the suffix entirely
if (!isHex) {
return true;
}
recovered = (recovered << 4) | value;
}

return recovered == uint160(proposer);
}

/**
* @dev Try to parse a character from a string as a hex value. Returns `(true, value)` if the char is in
* `[0-9a-fA-F]` and `(false, 0)` otherwise. Value is guaranteed to be in the range `0 <= value < 16`
*/
function _tryHexToUint(bytes1 char) private pure returns (bool, uint8) {
uint8 c = uint8(char);
unchecked {
// Case 0-9
if (47 < c && c < 58) {
return (true, c - 48);
}
// Case A-F
else if (64 < c && c < 71) {
return (true, c - 55);
}
// Case a-f
else if (96 < c && c < 103) {
return (true, c - 87);
}
// Else: not a hex char
else {
return (false, 0);
}
}
// Check that the last 42 characters (after the marker) is a properly formated address.
(bool success, address recovered) = Strings.tryParseAddress(description, length - 42, length);
return !success || recovered == proposer;
}

/**
Expand Down
256 changes: 256 additions & 0 deletions contracts/utils/Strings.sol
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
pragma solidity ^0.8.20;

import {Math} from "./math/Math.sol";
import {SafeCast} from "./math/SafeCast.sol";
import {SignedMath} from "./math/SignedMath.sol";

/**
* @dev String operations.
*/
library Strings {
using SafeCast for *;

bytes16 private constant HEX_DIGITS = "0123456789abcdef";
uint8 private constant ADDRESS_LENGTH = 20;

Expand All @@ -18,6 +21,16 @@ library Strings {
*/
error StringsInsufficientHexLength(uint256 value, uint256 length);

/**
* @dev The string being parsed contains characters that are not in scope of the given base.
*/
error StringsInvalidChar();

/**
* @dev The string being parsed is not a properly formated address.
*/
error StringsInvalidAddressFormat();

/**
* @dev Converts a `uint256` to its ASCII `string` decimal representation.
*/
Expand Down Expand Up @@ -115,4 +128,247 @@ library Strings {
function equal(string memory a, string memory b) internal pure returns (bool) {
return bytes(a).length == bytes(b).length && keccak256(bytes(a)) == keccak256(bytes(b));
}

/**
* @dev Parse a decimal string and returns the value as a `uint256`.
*
* This function will revert if:
* - the string contains any character that is not in [0-9].
* - the result does not fit in a `uint256`.
*/
function parseUint(string memory input) internal pure returns (uint256) {
return parseUint(input, 0, bytes(input).length);
}

/**
* @dev Variant of {parseUint} that parses a substring of `input` located between position `begin` (included) and
* `end` (excluded).
*/
function parseUint(string memory input, uint256 begin, uint256 end) internal pure returns (uint256) {
(bool success, uint256 value) = tryParseUint(input, begin, end);
if (!success) revert StringsInvalidChar();
return value;
}

/**
* @dev Variant of {parseUint-string} that returns false if the parsing fails because of an invalid character.
*
* This function will still revert if the result does not fit in a `uint256`
*/
function tryParseUint(string memory input) internal pure returns (bool success, uint256 value) {
return tryParseUint(input, 0, bytes(input).length);
}

/**
* @dev Variant of {parseUint-string-uint256-uint256} that returns false if the parsing fails because of an invalid
* character.
*
* This function will still revert if the result does not fit in a `uint256`
*/
function tryParseUint(
string memory input,
uint256 begin,
uint256 end
) internal pure returns (bool success, uint256 value) {
bytes memory buffer = bytes(input);

uint256 result = 0;
for (uint256 i = begin; i < end; ++i) {
uint8 chr = _tryParseChr(buffer[i]);
if (chr > 9) return (false, 0);
result *= 10;
result += chr;
}
return (true, result);
}

/**
* @dev Parse a decimal string and returns the value as a `int256`.
*
* This function will revert if:
* - the string contains any character (outside the prefix) that is not in [0-9].
* - the result does not fit in a `int256`.
*/
function parseInt(string memory input) internal pure returns (int256) {
return parseInt(input, 0, bytes(input).length);
}

/**
* @dev Variant of {parseInt-string} that parses a substring of `input` located between position `begin` (included) and
* `end` (excluded).
*/
function parseInt(string memory input, uint256 begin, uint256 end) internal pure returns (int256) {
(bool success, int256 value) = tryParseInt(input, begin, end);
if (!success) revert StringsInvalidChar();
return value;
}

/**
* @dev Variant of {parseInt-string} that returns false if the parsing fails because of an invalid character.
*
* This function will still revert if the result does not fit in a `int256`
*/
function tryParseInt(string memory input) internal pure returns (bool success, int256 value) {
return tryParseInt(input, 0, bytes(input).length);
}

/**
* @dev Variant of {parseInt-string-uint256-uint256} that returns false if the parsing fails because of an invalid
* character.
*
* This function will still revert if the result does not fit in a `int256`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is a try function, shouldn't it return success == false?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is consistent with all the other try functions added. I agree that it is expected to return success == false instead of revert.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The revert is caused by

result *= 10;
result += factor * int8(chr);

overflowing.

Note that this is done in a loop. Any branch that you add here will be multiplied by the number of characteres in the substring.

If you can find a nice and cheap way to return false, please feel free to change that

Copy link
Contributor

@cairoeth cairoeth Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the revert could also happen for tryParseUint, no?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it affect all variants (3) of this function. (uint, int, hex)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks all for your comments. I think the contract is currently well-documented anyway, and updating to a version that doesn't revert (in the future) is the kind of breaking change that'd be fine in a minor version.

I wouldn't block merging

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively, we can check that the length of the string in tryParseUint is >= 78, and check for overflow use Math.tryAdd in the last digit if the string's length == 78. (78 digits are in 2*256)

Similarly for tryParseHex but with 64 digits instead.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ernestognw zero prefix would create false positives. I'm not sure we can rule them out.

*/
function tryParseInt(
string memory input,
uint256 begin,
uint256 end
) internal pure returns (bool success, int256 value) {
bytes memory buffer = bytes(input);

// check presence of a negative sign.
bool isNegative = bytes1(unsafeReadBytesOffset(buffer, begin)) == 0x2d;
int8 factor = isNegative ? int8(-1) : int8(1);
uint256 offset = isNegative.toUint();

int256 result = 0;
for (uint256 i = begin + offset; i < end; ++i) {
uint8 chr = _tryParseChr(buffer[i]);
if (chr > 9) return (false, 0);
result *= 10;
result += factor * int8(chr);
}
return (true, result);
}

/**
* @dev Parse a hexadecimal string (with or without "0x" prefix), and returns the value as a `uint256`.
*
* This function will revert if:
* - the string contains any character (outside the prefix) that is not in [0-9a-fA-F].
* - the result does not fit in a `uint256`.
*/
function parseHex(string memory input) internal pure returns (uint256) {
return parseHex(input, 0, bytes(input).length);
}

/**
* @dev Variant of {parseHex} that parses a substring of `input` located between position `begin` (included) and
* `end` (excluded).
*/
function parseHex(string memory input, uint256 begin, uint256 end) internal pure returns (uint256) {
(bool success, uint256 value) = tryParseHex(input, begin, end);
if (!success) revert StringsInvalidChar();
return value;
}

/**
* @dev Variant of {parseHex-string} that returns false if the parsing fails because of an invalid character.
*
* This function will still revert if the result does not fit in a `uint256`
*/
function tryParseHex(string memory input) internal pure returns (bool success, uint256 value) {
return tryParseHex(input, 0, bytes(input).length);
}

/**
* @dev Variant of {parseHex-string-uint256-uint256} that returns false if the parsing fails because of an
* invalid character.
*
* This function will still revert if the result does not fit in a `uint256`
*/
function tryParseHex(
string memory input,
uint256 begin,
uint256 end
) internal pure returns (bool success, uint256 value) {
bytes memory buffer = bytes(input);

// skip 0x prefix if present
bool hasPrefix = bytes2(unsafeReadBytesOffset(buffer, begin)) == 0x3078;
uint256 offset = hasPrefix.toUint() * 2;

uint256 result = 0;
for (uint256 i = begin + offset; i < end; ++i) {
uint8 chr = _tryParseChr(buffer[i]);
if (chr > 15) return (false, 0);
result *= 16;
result += chr;
}
return (true, result);
}

/**
* @dev Parse a hexadecimal string (with or without "0x" prefix), and returns the value as an `address`.
*
* This function will revert if:
* - the string is not formated as `(0x)?[0-9a-fA-F]{40}`
*/
function parseAddress(string memory input) internal pure returns (address) {
return parseAddress(input, 0, bytes(input).length);
}

/**
* @dev Variant of {parseAddress} that parses a substring of `input` located between position `begin` (included) and
* `end` (excluded).
*/
function parseAddress(string memory input, uint256 begin, uint256 end) internal pure returns (address) {
(bool success, address value) = tryParseAddress(input, begin, end);
if (!success) revert StringsInvalidAddressFormat();
return value;
}

/**
* @dev Variant of {parseAddress-string} that returns false if the parsing fails because input is not a properly
* formated address.
*/
function tryParseAddress(string memory input) internal pure returns (bool success, address value) {
return tryParseAddress(input, 0, bytes(input).length);
}

/**
* @dev Variant of {parseAddress-string-uint256-uint256} that returns false if the parsing fails because input is not a properly
* formated address.
*/
function tryParseAddress(
string memory input,
uint256 begin,
uint256 end
) internal pure returns (bool success, address value) {
// check that input is the correct length
bool hasPrefix = bytes2(unsafeReadBytesOffset(bytes(input), begin)) == 0x3078;
uint256 expectedLength = 40 + hasPrefix.toUint() * 2;

if (end - begin == expectedLength) {
// length garantees that this does not overflow, and value2 is at most type(uint160).max
(bool s, uint256 v) = tryParseHex(input, begin, end);
return (s, address(uint160(v)));
} else {
return (false, address(0));
}
}

// TODO: documentation.
function unsafeReadBytesOffset(bytes memory buffer, uint256 offset) internal pure returns (bytes32 value) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bytes operation so it shouldn't be in the Strings library.

Copy link
Collaborator Author

@Amxx Amxx Sep 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where should that go ? A Bytes library ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that the same function exists in RSA.sol

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this function was present in other places but always private.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. private, and marked as memory safe, because the private calls are all enforcing the safety.

But we are starting to use it in many places ... so having 3 private implementation of the same functions (with different names?) doesn't feel right.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. private, and marked as memory safe, because the private calls are all enforcing the safety.

Right... That was the issue. That using memory-unsafe functions disables some optimizations globally, so they were important to avoid.

Copy link
Collaborator Author

@Amxx Amxx Sep 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So do we want to have:

  • multiple private (and memory safe) implementations
    • code duplication
  • one internal, none memory safe implementation
    • negative impact on optimisation ?
  • one internal, memory safe (checks memory bound) implementation
    • we lose the whole point of the function because we duplicate mloads

?

Copy link
Collaborator Author

@Amxx Amxx Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extra option is:

  • one internal, that is not actually memory safe, but that we mark as "memory-safe". And we document it should only be used with input that are known to be within the bounds
    • if user don't give bad input, they may have unexpected (unsafe) behavior ...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that the last option is a safe recommendation. I'm too stupid regarding how compiler optimizations work so feel free to disregard, but I wonder if guaranteeing the safetiness with a private function will have a negative effect on the so-called "‘stack-to-memory’ mover"

assembly ("memory-safe") {
value := mload(add(buffer, add(0x20, offset)))
}
}

function _tryParseChr(bytes1 chr) private pure returns (uint8) {
uint8 value = uint8(chr);

// Try to parse `chr`:
// - Case 1: [0-9]
// - Case 2: [a-f]
// - Case 2: [A-F]
// - otherwise not supported
unchecked {
if (value > 47 && value < 58) value -= 48;
else if (value > 96 && value < 103) value -= 87;
else if (value > 64 && value < 71) value -= 55;
else return type(uint8).max;
}

return value;
}
}
27 changes: 27 additions & 0 deletions test/utils/Strings.t.sol
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// SPDX-License-Identifier: MIT

pragma solidity ^0.8.20;

import {Test} from "forge-std/Test.sol";

import {Strings} from "@openzeppelin/contracts/utils/Strings.sol";

contract StringsTest is Test {
using Strings for *;

function testParse(uint256 value) external {
assertEq(value, value.toString().parseUint());
}

function testParseSigned(int256 value) external {
assertEq(value, value.toStringSigned().parseInt());
}

function testParseHex(uint256 value) external {
assertEq(value, value.toHexString().parseHex());
}

function testParseChecksumHex(address value) external {
assertEq(value, value.toChecksumHexString().parseAddress());
}
}
Loading