Skip to content

Commit d98d45e

Browse files
committed
fix(dsl): add JSON double-quotes to string literals inside containers
String literal values from Literal[] and Enum when used inside container types (List, Tuple, Dict) were emitted as bare words in the regex (e.g. `Paris` instead of `"Paris"`). This made the generated regex inconsistent with List[str], which correctly produces quoted JSON strings. Add _ensure_json_quoted() helper that wraps bare String terms in double-quote delimiters. Applied in _handle_list, _handle_tuple and _handle_dict so that only container contexts receive quoting. Standalone Literal usage is unaffected. Before: List[Literal['Paris','London']] → \[(Paris|London)...\] After: List[Literal['Paris','London']] → \[("Paris"|"London")...\] Fixes #1630
1 parent d248188 commit d98d45e

2 files changed

Lines changed: 233 additions & 1992 deletions

File tree

outlines/types/dsl.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,22 @@ def _handle_literal(args: tuple) -> Alternatives:
812812
return Alternatives([python_types_to_terms(arg) for arg in args])
813813

814814

815+
def _ensure_json_quoted(term: Term) -> Term:
816+
"""Wrap bare ``String`` terms in double quotes for JSON container contexts.
817+
818+
When string literal values (from ``Literal`` or ``Enum``) appear inside
819+
container types (``List``, ``Tuple``, ``Dict``), they must be JSON-quoted
820+
so the generated regex matches valid JSON. ``Regex``-based terms (e.g.
821+
``types.string``) already include their own quotes and are left untouched.
822+
"""
823+
if isinstance(term, String):
824+
return Sequence([String('"'), term, String('"')])
825+
if isinstance(term, Alternatives):
826+
quoted = [_ensure_json_quoted(t) for t in term.terms]
827+
return Alternatives(quoted)
828+
return term
829+
830+
815831
def _handle_union(args: tuple, recursion_depth: int) -> Alternatives:
816832
# Handle the Optional[T] type
817833
if len(args) == 2 and (type(None) in args or None in args):
@@ -833,7 +849,7 @@ def _handle_list(args: tuple, recursion_depth: int) -> Sequence:
833849
"Only homogeneous lists are supported. You should provide exactly "
834850
+ "one argument to `List`, got {args}."
835851
)
836-
item_type = python_types_to_terms(args[0], recursion_depth + 1)
852+
item_type = _ensure_json_quoted(python_types_to_terms(args[0], recursion_depth + 1))
837853
return Sequence(
838854
[
839855
String("["),
@@ -848,7 +864,7 @@ def _handle_tuple(args: tuple, recursion_depth: int) -> Union[Sequence, String]:
848864
if len(args) == 0 or args == ((),):
849865
return String("()")
850866
elif len(args) == 2 and args[1] is Ellipsis:
851-
item_term = python_types_to_terms(args[0], recursion_depth + 1)
867+
item_term = _ensure_json_quoted(python_types_to_terms(args[0], recursion_depth + 1))
852868
return Sequence(
853869
[
854870
String("("),
@@ -858,7 +874,7 @@ def _handle_tuple(args: tuple, recursion_depth: int) -> Union[Sequence, String]:
858874
]
859875
)
860876
else:
861-
items = [python_types_to_terms(arg, recursion_depth + 1) for arg in args]
877+
items = [_ensure_json_quoted(python_types_to_terms(arg, recursion_depth + 1)) for arg in args]
862878
separator = String(", ")
863879
elements = []
864880
for i, item in enumerate(items):
@@ -872,8 +888,8 @@ def _handle_dict(args: tuple, recursion_depth: int) -> Sequence:
872888
if args is None or len(args) != 2:
873889
raise TypeError(f"Dict must have exactly two type arguments. Got {args}.")
874890
# Add dict support with key:value pairs
875-
key_type = python_types_to_terms(args[0], recursion_depth + 1)
876-
value_type = python_types_to_terms(args[1], recursion_depth + 1)
891+
key_type = _ensure_json_quoted(python_types_to_terms(args[0], recursion_depth + 1))
892+
value_type = _ensure_json_quoted(python_types_to_terms(args[1], recursion_depth + 1))
877893
return Sequence(
878894
[
879895
String("{"),

0 commit comments

Comments
 (0)