diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index e5bd811..efeb672 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -23,7 +23,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: - python-version: "3.7" + python-version: "3.8" - run: pip install tox - run: tox -e ${{ matrix.toxenv }} @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "pypy-3.7"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "pypy-3.7"] steps: - uses: actions/checkout@v2 @@ -39,7 +39,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - run: pip install tox - - run: tox -e py + - run: tox packaging: runs-on: ${{ matrix.os }} @@ -57,7 +57,7 @@ jobs: runs-on: macos-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v2 @@ -65,5 +65,4 @@ jobs: with: python-version: ${{ matrix.python-version }} - run: pip install tox - - run: tox -e py - + - run: tox diff --git a/pyproject.toml b/pyproject.toml index d8c65f4..edee9e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,8 @@ [tool.mypy] show_error_codes = true strict = true +# specifically this is because there is no typing stubs for immutabledict +ignore_missing_imports = true files = ["."] diff --git a/setup.cfg b/setup.cfg index dd55872..685a63a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,6 +28,10 @@ packages = [options.package_data] canonicaljson = py.typed +[options.extras_require] +orjson = + orjson; python_version>"3.8" + [flake8] # see https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes # for error codes. The ones we ignore are: diff --git a/src/canonicaljson/__init__.py b/src/canonicaljson/__init__.py index 3873974..cab7b20 100644 --- a/src/canonicaljson/__init__.py +++ b/src/canonicaljson/__init__.py @@ -15,8 +15,19 @@ # limitations under the License. import functools import json +import math from typing import Callable, Generator, Type, TypeVar +use_orjson = False +try: + import orjson + + use_orjson = True + +except ImportError: + orjson = None # type: ignore [assignment] + + __version__ = "2.0.0" @@ -74,12 +85,34 @@ def register_preserialisation_callback( ) +def check_for_nan_and_inf(data: object) -> None: + """ + Recursively checks for NaN and Inf values in a dictionary or list. + Raises ValueError if found. + """ + if isinstance(data, dict): + for key, value in data.items(): + check_for_nan_and_inf(value) + elif isinstance(data, list): + for item in data: + check_for_nan_and_inf(item) + elif isinstance(data, float): + if math.isnan(data) or math.isinf(data): + raise ValueError + + def encode_canonical_json(data: object) -> bytes: """Encodes the given `data` as a UTF-8 canonical JSON bytestring. This encoding is the shortest possible. Dictionary keys are lexicographically sorted by unicode code point. """ + if use_orjson: + check_for_nan_and_inf(data) + return orjson.dumps( + data, default=_preprocess_for_serialisation, option=orjson.OPT_SORT_KEYS + ) + s = _canonical_encoder.encode(data) return s.encode("utf-8") @@ -101,6 +134,13 @@ def iterencode_canonical_json(data: object) -> Generator[bytes, None, None]: def encode_pretty_printed_json(data: object) -> bytes: """Encodes the given `data` as a UTF-8 human-readable JSON bytestring.""" + if use_orjson: + # Unfortunately, orjson decided to hardcode their indent to 2 + check_for_nan_and_inf(data) + return orjson.dumps( + data, default=_preprocess_for_serialisation, option=orjson.OPT_INDENT_2 + ) + return _pretty_encoder.encode(data).encode("utf-8") diff --git a/tests/test_canonicaljson.py b/tests/test_canonicaljson.py index 2ae6f7f..9481533 100644 --- a/tests/test_canonicaljson.py +++ b/tests/test_canonicaljson.py @@ -15,8 +15,11 @@ # limitations under the License. import unittest from math import inf, nan +from typing import Any, Union from unittest.mock import Mock +from immutabledict import immutabledict + from canonicaljson import ( encode_canonical_json, encode_pretty_printed_json, @@ -25,6 +28,12 @@ register_preserialisation_callback, ) +try: + import orjson + +except ImportError: + orjson = None # type: ignore [assignment] + class TestCanonicalJson(unittest.TestCase): def test_encode_canonical(self) -> None: @@ -98,10 +107,16 @@ def test_encode_pretty_printed(self) -> None: self.assertEqual(encode_pretty_printed_json({}), b"{}") self.assertEqual(list(iterencode_pretty_printed_json({})), [b"{}"]) + if orjson is not None: + # orjson's pretty print style is a flag option and is hardcoded to "2", + # so this will be slightly different. + comparison = b'{\n "la merde amus\xc3\xa9e": "\xF0\x9F\x92\xA9"\n}' + else: + comparison = b'{\n "la merde amus\xc3\xa9e": "\xF0\x9F\x92\xA9"\n}' # non-ascii should come out utf8-encoded. self.assertEqual( encode_pretty_printed_json({"la merde amusée": "💩"}), - b'{\n "la merde amus\xc3\xa9e": "\xF0\x9F\x92\xA9"\n}', + comparison, ) def test_unknown_type(self) -> None: @@ -136,6 +151,68 @@ def test_invalid_float_values(self) -> None: with self.assertRaises(ValueError): encode_pretty_printed_json(nan) + def test_invalid_nested_float_values(self) -> None: + """Infinity/-Infinity/NaN are not allowed in canonicaljson.""" + data_with_inf = {"a": 1, "b": float("inf")} + data_with_neg_inf = {"a": 1, "b": -float("inf")} + data_with_nan = {"a": 1, "b": float("nan")} + list_with_inf = {"a": [1, float("inf")]} + list_with_neg_inf = {"a": [1, -float("inf")]} + list_with_nan = {"a": [1, float("nan")]} + + with self.assertRaises(ValueError): + encode_canonical_json(data_with_inf) + + with self.assertRaises(ValueError): + encode_pretty_printed_json(data_with_inf) + + with self.assertRaises(ValueError): + encode_canonical_json(data_with_neg_inf) + + with self.assertRaises(ValueError): + encode_pretty_printed_json(data_with_neg_inf) + + with self.assertRaises(ValueError): + encode_canonical_json(data_with_nan) + + with self.assertRaises(ValueError): + encode_pretty_printed_json(data_with_nan) + + with self.assertRaises(ValueError): + encode_canonical_json(list_with_inf) + + with self.assertRaises(ValueError): + encode_pretty_printed_json(list_with_inf) + + with self.assertRaises(ValueError): + encode_canonical_json(list_with_neg_inf) + + with self.assertRaises(ValueError): + encode_pretty_printed_json(list_with_neg_inf) + + with self.assertRaises(ValueError): + encode_canonical_json(list_with_nan) + + with self.assertRaises(ValueError): + encode_pretty_printed_json(list_with_nan) + + def test_immutable_dict_handling(self) -> None: + im_d: immutabledict[str, Union[str, int]] = immutabledict( + {"key1": "value1", "key2": 42} + ) + + # Lifted from Synapse's __init__.py + def _immutabledict_cb(d: immutabledict[str, Any]) -> Any: + try: + return d._dict + except Exception: + # Paranoia: fall back to a `dict()` call, in case a future version of + # immutabledict removes `_dict` from the implementation. + return dict(d) + + register_preserialisation_callback(immutabledict, _immutabledict_cb) + encode_canonical_json(im_d) + def test_encode_unknown_class_raises(self) -> None: class C: pass diff --git a/tox.ini b/tox.ini index 32606d7..bc00fef 100644 --- a/tox.ini +++ b/tox.ini @@ -1,34 +1,57 @@ [tox] -envlist = packaging, pep8, black, py37, py38, py39, py310, pypy3, mypy, isort +envlist = packaging, pep8, black, py{38,39,310,311,312,313}-base, py{38,39,310,311,312,313}-orjson, pypy3, mypy, isort, coverage isolated_build = True [testenv:py] deps = coverage + immutabledict + +[testenv:coverage] +; Use this after the other coverage steps have run, to combine and summarize the results +skip_install = True +deps = + {[testenv:py]deps} commands = - coverage run --source canonicaljson -m unittest + coverage combine coverage report -m --fail-under 100 +[testenv:py{37,38,39,310,311,312,313}-base] +deps = + {[testenv:py]deps} +commands = + coverage run -p --source canonicaljson -m unittest + +[testenv:py{37,38,39,310,311,312,313}-orjson] +deps = + {[testenv:py]deps} + +extras = + orjson + +commands = + coverage run -p --source canonicaljson -m unittest + [testenv:packaging] deps = check-manifest commands = check-manifest [testenv:pep8] -basepython = python3.7 +basepython = python3.8 deps = flake8 commands = flake8 src tests [testenv:isort] -basepython = python3.7 +basepython = python3.8 deps = isort commands = isort --check src tests [testenv:black] -basepython = python3.7 +basepython = python3.8 deps = black==23.1.0 commands = python -m black --check --diff src tests @@ -37,4 +60,5 @@ commands = python -m black --check --diff src tests deps = mypy==1.0 types-setuptools==57.4.14 + types-orjson==3.6.0 commands = mypy src tests