Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,23 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: "3.7"
python-version: "3.8"
- run: pip install tox
- run: tox -e ${{ matrix.toxenv }}

unittest:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "pypy-3.7"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "pypy-3.7"]

steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- run: pip install tox
- run: tox -e py
- run: tox

packaging:
runs-on: ${{ matrix.os }}
Expand All @@ -57,13 +57,12 @@ jobs:
runs-on: macos-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]

steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- run: pip install tox
- run: tox -e py

- run: tox
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[tool.mypy]
show_error_codes = true
strict = true
# specifically this is because there is no typing stubs for immutabledict
ignore_missing_imports = true

files = ["."]

Expand Down
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ packages =
[options.package_data]
canonicaljson = py.typed

[options.extras_require]
orjson =
orjson; python_version>"3.8"

[flake8]
# see https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes
# for error codes. The ones we ignore are:
Expand Down
40 changes: 40 additions & 0 deletions src/canonicaljson/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,19 @@
# limitations under the License.
import functools
import json
import math
from typing import Callable, Generator, Type, TypeVar

use_orjson = False
try:
import orjson

use_orjson = True

except ImportError:
orjson = None # type: ignore [assignment]


__version__ = "2.0.0"


Expand Down Expand Up @@ -74,12 +85,34 @@ def register_preserialisation_callback(
)


def check_for_nan_and_inf(data: object) -> None:
"""
Recursively checks for NaN and Inf values in a dictionary or list.
Raises ValueError if found.
"""
if isinstance(data, dict):
for key, value in data.items():
check_for_nan_and_inf(value)
elif isinstance(data, list):
for item in data:
check_for_nan_and_inf(item)
elif isinstance(data, float):
if math.isnan(data) or math.isinf(data):
raise ValueError


def encode_canonical_json(data: object) -> bytes:
"""Encodes the given `data` as a UTF-8 canonical JSON bytestring.

This encoding is the shortest possible. Dictionary keys are
lexicographically sorted by unicode code point.
"""
if use_orjson:
check_for_nan_and_inf(data)
return orjson.dumps(
data, default=_preprocess_for_serialisation, option=orjson.OPT_SORT_KEYS
)

s = _canonical_encoder.encode(data)
return s.encode("utf-8")

Expand All @@ -101,6 +134,13 @@ def iterencode_canonical_json(data: object) -> Generator[bytes, None, None]:
def encode_pretty_printed_json(data: object) -> bytes:
"""Encodes the given `data` as a UTF-8 human-readable JSON bytestring."""

if use_orjson:
# Unfortunately, orjson decided to hardcode their indent to 2
check_for_nan_and_inf(data)
return orjson.dumps(
data, default=_preprocess_for_serialisation, option=orjson.OPT_INDENT_2
)

return _pretty_encoder.encode(data).encode("utf-8")


Expand Down
79 changes: 78 additions & 1 deletion tests/test_canonicaljson.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
# limitations under the License.
import unittest
from math import inf, nan
from typing import Any, Union
from unittest.mock import Mock

from immutabledict import immutabledict

from canonicaljson import (
encode_canonical_json,
encode_pretty_printed_json,
Expand All @@ -25,6 +28,12 @@
register_preserialisation_callback,
)

try:
import orjson

except ImportError:
orjson = None # type: ignore [assignment]


class TestCanonicalJson(unittest.TestCase):
def test_encode_canonical(self) -> None:
Expand Down Expand Up @@ -98,10 +107,16 @@ def test_encode_pretty_printed(self) -> None:
self.assertEqual(encode_pretty_printed_json({}), b"{}")
self.assertEqual(list(iterencode_pretty_printed_json({})), [b"{}"])

if orjson is not None:
# orjson's pretty print style is a flag option and is hardcoded to "2",
# so this will be slightly different.
Comment on lines +111 to +112
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be worth proposing an OPT_INDENT_4 to upstream?

It's not ideal that we'll be changing the output of encode_pretty_printed_json (in case it breaks downstream test cases), but it's possible with a version bump/note in the changelog.

comparison = b'{\n "la merde amus\xc3\xa9e": "\xF0\x9F\x92\xA9"\n}'
else:
comparison = b'{\n "la merde amus\xc3\xa9e": "\xF0\x9F\x92\xA9"\n}'
# non-ascii should come out utf8-encoded.
self.assertEqual(
encode_pretty_printed_json({"la merde amusée": "💩"}),
b'{\n "la merde amus\xc3\xa9e": "\xF0\x9F\x92\xA9"\n}',
comparison,
)

def test_unknown_type(self) -> None:
Expand Down Expand Up @@ -136,6 +151,68 @@ def test_invalid_float_values(self) -> None:
with self.assertRaises(ValueError):
encode_pretty_printed_json(nan)

def test_invalid_nested_float_values(self) -> None:
"""Infinity/-Infinity/NaN are not allowed in canonicaljson."""
data_with_inf = {"a": 1, "b": float("inf")}
data_with_neg_inf = {"a": 1, "b": -float("inf")}
data_with_nan = {"a": 1, "b": float("nan")}
list_with_inf = {"a": [1, float("inf")]}
list_with_neg_inf = {"a": [1, -float("inf")]}
list_with_nan = {"a": [1, float("nan")]}

with self.assertRaises(ValueError):
encode_canonical_json(data_with_inf)

with self.assertRaises(ValueError):
encode_pretty_printed_json(data_with_inf)

with self.assertRaises(ValueError):
encode_canonical_json(data_with_neg_inf)

with self.assertRaises(ValueError):
encode_pretty_printed_json(data_with_neg_inf)

with self.assertRaises(ValueError):
encode_canonical_json(data_with_nan)

with self.assertRaises(ValueError):
encode_pretty_printed_json(data_with_nan)

with self.assertRaises(ValueError):
encode_canonical_json(list_with_inf)

with self.assertRaises(ValueError):
encode_pretty_printed_json(list_with_inf)

with self.assertRaises(ValueError):
encode_canonical_json(list_with_neg_inf)

with self.assertRaises(ValueError):
encode_pretty_printed_json(list_with_neg_inf)

with self.assertRaises(ValueError):
encode_canonical_json(list_with_nan)

with self.assertRaises(ValueError):
encode_pretty_printed_json(list_with_nan)
Comment on lines +156 to +197
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be more readable if the test cases were looped over, i.e.:

test_cases = {
    "data_with_inf": {"a": 1, "b": float("inf")},
    ...
}

for name, test_case in test_cases.items():
    with self.assertRaises(ValueError, f"Passing test case '{name}' to encode_canonical_json failed to raise a ValueError"):
        encode_canonical_json(test_case)

    with self.assertRaises(ValueError,  f"Passing test case '{name}' to encode_pretty_printed_json failed to raise a ValueError"):
        encode_pretty_printed_json(test_case)

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I like that. Yes, will do 🫡


def test_immutable_dict_handling(self) -> None:
im_d: immutabledict[str, Union[str, int]] = immutabledict(
{"key1": "value1", "key2": 42}
)

# Lifted from Synapse's __init__.py
def _immutabledict_cb(d: immutabledict[str, Any]) -> Any:
try:
return d._dict
except Exception:
# Paranoia: fall back to a `dict()` call, in case a future version of
# immutabledict removes `_dict` from the implementation.
return dict(d)

register_preserialisation_callback(immutabledict, _immutabledict_cb)
encode_canonical_json(im_d)

def test_encode_unknown_class_raises(self) -> None:
class C:
pass
Expand Down
34 changes: 29 additions & 5 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,34 +1,57 @@
[tox]
envlist = packaging, pep8, black, py37, py38, py39, py310, pypy3, mypy, isort
envlist = packaging, pep8, black, py{38,39,310,311,312,313}-base, py{38,39,310,311,312,313}-orjson, pypy3, mypy, isort, coverage
isolated_build = True

[testenv:py]
deps =
coverage
immutabledict

[testenv:coverage]
; Use this after the other coverage steps have run, to combine and summarize the results
skip_install = True
deps =
{[testenv:py]deps}

commands =
coverage run --source canonicaljson -m unittest
coverage combine
coverage report -m --fail-under 100

[testenv:py{37,38,39,310,311,312,313}-base]
deps =
{[testenv:py]deps}
commands =
coverage run -p --source canonicaljson -m unittest

[testenv:py{37,38,39,310,311,312,313}-orjson]
deps =
{[testenv:py]deps}

extras =
orjson

commands =
coverage run -p --source canonicaljson -m unittest

[testenv:packaging]
deps =
check-manifest
commands = check-manifest

[testenv:pep8]
basepython = python3.7
basepython = python3.8
deps =
flake8
commands = flake8 src tests

[testenv:isort]
basepython = python3.7
basepython = python3.8
deps =
isort
commands = isort --check src tests

[testenv:black]
basepython = python3.7
basepython = python3.8
deps =
black==23.1.0
commands = python -m black --check --diff src tests
Expand All @@ -37,4 +60,5 @@ commands = python -m black --check --diff src tests
deps =
mypy==1.0
types-setuptools==57.4.14
types-orjson==3.6.0
commands = mypy src tests
Loading