From 4a7f905e28140310f1b93b0002e3a936ad2731c9 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Sat, 6 Sep 2025 02:17:42 -0500 Subject: [PATCH 1/2] json : support enum values within allOf --- common/json-schema-to-grammar.cpp | 10 +++++++++- examples/json_schema_to_grammar.py | 10 +++++++++- tests/test-json-schema-to-grammar.cpp | 20 +++++++++++++++++++ .../public_legacy/json-schema-to-grammar.mjs | 13 +++++++++++- 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 637891f5069..ccd359a0f49 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -843,9 +843,10 @@ class SchemaConverter { _build_object_rule( properties, required, name, schema.contains("additionalProperties") ? schema["additionalProperties"] : json())); - } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) { + } else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) { std::unordered_set required; std::vector> properties; + std::vector enum_values; std::string hybrid_name = name; std::function add_component = [&](const json & comp_schema, bool is_required) { if (comp_schema.contains("$ref")) { @@ -857,6 +858,10 @@ class SchemaConverter { required.insert(prop.key()); } } + } else if (comp_schema.contains("enum")) { + for (const auto & v : comp_schema["enum"]) { + enum_values.push_back(_generate_constant_rule(v)); + } } else { // todo warning } @@ -870,6 +875,9 @@ class SchemaConverter { add_component(t, true); } } + if (!enum_values.empty()) { + return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space"); + } return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json())); } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { json items = schema.contains("items") ? schema["items"] : schema["prefixItems"]; diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index ed379585546..4c7871e27c6 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -586,9 +586,10 @@ def visit(self, schema, name): properties = list(schema.get('properties', {}).items()) return self._add_rule(rule_name, self._build_object_rule(properties, required, name, schema.get('additionalProperties'))) - elif schema_type in (None, 'object') and 'allOf' in schema: + elif schema_type in (None, 'object', 'string') and 'allOf' in schema: required = set() properties = [] + enum_values = [] hybrid_name = name def add_component(comp_schema, is_required): if (ref := comp_schema.get('$ref')) is not None: @@ -600,6 +601,9 @@ def add_component(comp_schema, is_required): if is_required: required.add(prop_name) + if 'enum' in comp_schema: + enum_values.extend(comp_schema['enum']) + for t in schema['allOf']: if 'anyOf' in t: for tt in t['anyOf']: @@ -607,6 +611,10 @@ def add_component(comp_schema, is_required): else: add_component(t, is_required=True) + if enum_values: + rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in enum_values)) + ') space' + return self._add_rule(rule_name, rule) + return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=None)) elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema): diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 78ee55e246f..af171e933f0 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -1209,6 +1209,26 @@ static void test_all(const std::string & lang, std::function { const ref = compSchema.$ref; if (ref !== undefined) { @@ -648,6 +649,12 @@ export class SchemaConverter { } } } + + if ('enum' in compSchema) { + for (const v of compSchema.enum) { + enum_values.push(v); + } + } }; for (const t of schema.allOf) { @@ -660,6 +667,10 @@ export class SchemaConverter { } } + if (enum_values.length > 0) { + const rule = '(' + enum_values.map(v => this._generateConstantRule(v)).join(' | ') + ') space'; + return this._addRule(ruleName, rule); + } return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null)); } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) { const items = schema.items ?? schema.prefixItems; From 121d92cc2b6e793a092e6d5e5d1db5a9bbac4657 Mon Sep 17 00:00:00 2001 From: Alde Rojas Date: Sat, 6 Sep 2025 17:18:27 -0500 Subject: [PATCH 2/2] fix : only allow the intersection of enums --- common/json-schema-to-grammar.cpp | 18 ++++++++++--- examples/json_schema_to_grammar.py | 15 +++++++---- tests/test-json-schema-to-grammar.cpp | 27 ++++++++++++++++++- .../public_legacy/json-schema-to-grammar.mjs | 16 ++++++----- 4 files changed, 60 insertions(+), 16 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index ccd359a0f49..182c787544f 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -846,7 +846,7 @@ class SchemaConverter { } else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) { std::unordered_set required; std::vector> properties; - std::vector enum_values; + std::map enum_values; std::string hybrid_name = name; std::function add_component = [&](const json & comp_schema, bool is_required) { if (comp_schema.contains("$ref")) { @@ -860,7 +860,11 @@ class SchemaConverter { } } else if (comp_schema.contains("enum")) { for (const auto & v : comp_schema["enum"]) { - enum_values.push_back(_generate_constant_rule(v)); + const auto rule = _generate_constant_rule(v); + if (enum_values.find(rule) == enum_values.end()) { + enum_values[rule] = 0; + } + enum_values[rule] += 1; } } else { // todo warning @@ -876,7 +880,15 @@ class SchemaConverter { } } if (!enum_values.empty()) { - return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space"); + std::vector enum_intersection; + for (const auto & p : enum_values) { + if (p.second == schema["allOf"].size()) { + enum_intersection.push_back(p.first); + } + } + if (!enum_intersection.empty()) { + return _add_rule(rule_name, "(" + string_join(enum_intersection, " | ") + ") space"); + } } return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json())); } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 4c7871e27c6..2d57549046b 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -589,7 +589,7 @@ def visit(self, schema, name): elif schema_type in (None, 'object', 'string') and 'allOf' in schema: required = set() properties = [] - enum_values = [] + enum_sets = [] hybrid_name = name def add_component(comp_schema, is_required): if (ref := comp_schema.get('$ref')) is not None: @@ -602,7 +602,7 @@ def add_component(comp_schema, is_required): required.add(prop_name) if 'enum' in comp_schema: - enum_values.extend(comp_schema['enum']) + enum_sets.append(set(comp_schema['enum'])) for t in schema['allOf']: if 'anyOf' in t: @@ -611,9 +611,14 @@ def add_component(comp_schema, is_required): else: add_component(t, is_required=True) - if enum_values: - rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in enum_values)) + ') space' - return self._add_rule(rule_name, rule) + if enum_sets: + enum_intersection = enum_sets[0] + for s in enum_sets[1:]: + enum_intersection &= s + + if enum_intersection: + rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in sorted(enum_intersection))) + ') space' + return self._add_rule(rule_name, rule) return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=None)) diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index af171e933f0..67df240c6fe 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -1211,7 +1211,7 @@ static void test_all(const std::string & lang, std::function { const ref = compSchema.$ref; if (ref !== undefined) { @@ -651,9 +651,7 @@ export class SchemaConverter { } if ('enum' in compSchema) { - for (const v of compSchema.enum) { - enum_values.push(v); - } + enumSets.push(new Set(compSchema.enum || [])); } }; @@ -667,9 +665,13 @@ export class SchemaConverter { } } - if (enum_values.length > 0) { - const rule = '(' + enum_values.map(v => this._generateConstantRule(v)).join(' | ') + ') space'; - return this._addRule(ruleName, rule); + if (enumSets.length > 0) { + const enumIntersection = new Set([...enumSets[0]].filter(v => enumSets.every(s => s.has(v)))); + if (enumIntersection.size > 0) { + const sortedEnums = [...enumIntersection].sort((a, b) => a.localeCompare(b)); + const rule = '(' + sortedEnums.map(v => this._generateConstantRule(v)).join(' | ') + ') space'; + return this._addRule(ruleName, rule); + } } return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null)); } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {