diff --git a/README.md b/README.md index 8262f1d..f1c60dd 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,28 @@ If the JSON string is malformed, the `parse` function will throw an error: loads("wrong") # MalformedJSON: Malformed node or string on line 1 ``` +### Handling text around JSON + +Sometimes JSON might be embedded in other text. You can use `PREFIX` and `POSTFIX` options to handle this: + +```python +from partial_json_parser import loads, PREFIX, POSTFIX + +# Handle text before JSON +result = loads('This is your JSON: {"key": "value"}', PREFIX) +print(result) # Outputs: {'key': 'value'} + +# Handle text after JSON +result = loads('{"key": "value"} - end of JSON', POSTFIX) +print(result) # Outputs: {'key': 'value'} + +# Handle both +result = loads('Start of JSON: {"key": "value"} - end of JSON', PREFIX | POSTFIX) +print(result) # Outputs: {'key': 'value'} +``` + +Note that `PREFIX` looks for the first `{` or `[` character and `POSTFIX` looks for the last `}` or `]` character to determine the JSON boundaries. + ## API Reference ### loads(json_string, [allow_partial], [parser]) @@ -149,7 +171,9 @@ Enum class that specifies what kind of partialness is allowed during JSON parsin - `SPECIAL`: Allow all special values. - `ATOM`: Allow all atomic values. - `COLLECTION`: Allow all collection values. -- `ALL`: Allow all values. +- `PREFIX`: Allow text before the JSON string starts (e.g. `This is your JSON: {"key": "value"}`). +- `POSTFIX`: Allow text after the JSON string ends (e.g. `{"key": "value"} - end of JSON`). +- `ALL`: Allow all values ## Testing diff --git a/src/partial_json_parser/core/myelin.py b/src/partial_json_parser/core/myelin.py index 963867c..e2a7d18 100644 --- a/src/partial_json_parser/core/myelin.py +++ b/src/partial_json_parser/core/myelin.py @@ -20,6 +20,78 @@ def join_closing_tokens(stack: List[Tuple[int, str]]): def fix_fast(json_string: str, allow_partial: Union[Allow, int] = ALL): allow = Allow(allow_partial) + + # Handle PREFIX by finding first { or [ + if PREFIX in allow: + first_brace = json_string.find('{') + first_bracket = json_string.find('[') + + if first_brace != -1 and (first_bracket == -1 or first_brace < first_bracket): + json_string = json_string[first_brace:] + elif first_bracket != -1: + json_string = json_string[first_bracket:] + + # Handle POSTFIX by finding last } or ] + if POSTFIX in allow: + last_brace = json_string.rfind('}') + last_bracket = json_string.rfind(']') + + if last_brace != -1 and (last_bracket == -1 or last_brace > last_bracket): + json_string = json_string[:last_brace + 1] + elif last_bracket != -1: + json_string = json_string[:last_bracket + 1] + + # Always enable STR when handling PREFIX/POSTFIX + if PREFIX in allow or POSTFIX in allow: + allow = Allow(allow | STR) + + return _fix(json_string, allow, True) + + +def fix_fast_old(json_string: str, allow_partial: Union[Allow, int] = ALL): + allow = Allow(allow_partial) + original_allow = allow + + # Handle PREFIX by finding first { or [ + if PREFIX in allow: + first_brace = json_string.find('{') + first_bracket = json_string.find('[') + + if first_brace != -1 and (first_bracket == -1 or first_brace < first_bracket): + json_string = json_string[first_brace:] + elif first_bracket != -1: + json_string = json_string[first_bracket:] + + # Handle POSTFIX by finding matching closing brace/bracket + if POSTFIX in allow: + # Find opening token + first_char = json_string[0] if json_string else '' + if first_char not in '{[': + # No valid JSON start found + return _fix(json_string, original_allow, True) + + # Find matching closing token + closing_char = '}' if first_char == '{' else ']' + stack = [] + in_string = False + + for i, char in enumerate(json_string): + if char == '"' and (i == 0 or json_string[i-1] != '\\'): + in_string = not in_string + elif not in_string: + if char in '{[': + stack.append(char) + elif char in ']}': + if not stack: + break + if (char == '}' and stack[-1] == '{') or (char == ']' and stack[-1] == '['): + stack.pop() + if not stack: # Found matching closing token + json_string = json_string[:i+1] + break + + # Remove PREFIX/POSTFIX from allow since we've handled them + allow = Allow(allow & ~(PREFIX | POSTFIX)) def is_escaped(index: int): text_before = json_string[:index] diff --git a/src/partial_json_parser/core/options.py b/src/partial_json_parser/core/options.py index 4563bff..cb4b6be 100644 --- a/src/partial_json_parser/core/options.py +++ b/src/partial_json_parser/core/options.py @@ -13,12 +13,14 @@ class Allow(IntFlag): NAN = auto() INFINITY = auto() _INFINITY = auto() + PREFIX = auto() + POSTFIX = auto() INF = INFINITY | _INFINITY SPECIAL = NULL | BOOL | INF | NAN ATOM = STR | NUM | SPECIAL COLLECTION = ARR | OBJ - ALL = ATOM | COLLECTION + ALL = ATOM | COLLECTION | PREFIX | POSTFIX STR = Allow.STR @@ -35,6 +37,8 @@ class Allow(IntFlag): ATOM = Allow.ATOM COLLECTION = Allow.COLLECTION ALL = Allow.ALL +PREFIX = Allow.PREFIX +POSTFIX = Allow.POSTFIX __all__ = [ @@ -53,4 +57,6 @@ class Allow(IntFlag): "ATOM", "COLLECTION", "ALL", + "PREFIX", + "POSTFIX", ]