diff --git a/cpp/json_schema_converter.cc b/cpp/json_schema_converter.cc index 5cd0eafb..000b7cde 100644 --- a/cpp/json_schema_converter.cc +++ b/cpp/json_schema_converter.cc @@ -3255,13 +3255,60 @@ std::string JSONSchemaConverter::VisitObject( } indentManager_->StartIndent(); - if (object_spec.pattern_properties.size() > 0 || - !object_spec.property_names.is()) { - // Case 1: patternProperties or propertyNames is difined + if (object_spec.properties.size() > 0) { + // Case 1: properties are defined + // Notes: patternProperties or propertyNames is ignored when properties are defined + result += " " + GetPartialRuleForProperties( + object_spec.properties, + object_spec.required_properties, + additional_property, + rule_name, + additional_suffix, + object_spec.min_properties, + object_spec.max_properties, + json_format + ); + could_be_empty = object_spec.required_properties.empty() && object_spec.min_properties == 0; + } else if (!additional_property.is() && + (!additional_property.is() || additional_property.get())) { + // Case 2: no properties are defined and additional properties are allowed + // Notes: patternProperties or propertyNames is ignored when additional properties are allowed + if (object_spec.max_properties != 0) { + std::string other_property_pattern; + switch (json_format) { + case (JSONFormat::kJSON): { + other_property_pattern += GetOtherPropertyPattern( + kBasicString, additional_property, rule_name, additional_suffix + ); + result += " " + NextSeparator() + " " + other_property_pattern + " "; + break; + } + case (JSONFormat::kXML): { + other_property_pattern += GetOtherPropertyPattern( + kXMLVariableName, additional_property, rule_name, additional_suffix, JSONFormat::kXML + ); + result += " " + other_property_pattern + " "; + break; + } + } + if (object_spec.max_properties != 0) { + result += GetPropertyWithNumberConstrains( + NextSeparator() + " " + other_property_pattern, + object_spec.min_properties, + object_spec.max_properties, + 1 + ) + + " " + NextSeparator(true); + } + } + could_be_empty = object_spec.min_properties == 0; + } else if (object_spec.pattern_properties.size() > 0 || + !object_spec.property_names.is()) { + // Case 3: patternProperties or propertyNames is difined // TODO: Here we only handle the case that additionalProperties=False - // TODO: The coexistence of properties, required, etc. has not been addressed yet, - // as it may cause schema conflicts // TODO: The situation of duplicate keys has not been resolved yet + // Notes: The priority of patternProperties is higher than propertyNames, i.e., + // if patternProperties is defined, propertyNames is ignored // Initialize the beginning sequence of a property. std::string beg_seq; @@ -3341,51 +3388,6 @@ std::string JSONSchemaConverter::VisitObject( } could_be_empty = object_spec.min_properties == 0; } - } else if (object_spec.properties.size() > 0) { - // Case 2: properties are defined - result += " " + GetPartialRuleForProperties( - object_spec.properties, - object_spec.required_properties, - additional_property, - rule_name, - additional_suffix, - object_spec.min_properties, - object_spec.max_properties, - json_format - ); - could_be_empty = object_spec.required_properties.empty() && object_spec.min_properties == 0; - } else if (!additional_property.is() && - (!additional_property.is() || additional_property.get())) { - // Case 3: no properties are defined and additional properties are allowed - if (object_spec.max_properties != 0) { - std::string other_property_pattern; - switch (json_format) { - case (JSONFormat::kJSON): { - other_property_pattern += GetOtherPropertyPattern( - kBasicString, additional_property, rule_name, additional_suffix - ); - result += " " + NextSeparator() + " " + other_property_pattern + " "; - break; - } - case (JSONFormat::kXML): { - other_property_pattern += GetOtherPropertyPattern( - kXMLVariableName, additional_property, rule_name, additional_suffix, JSONFormat::kXML - ); - result += " " + other_property_pattern + " "; - break; - } - } - if (object_spec.max_properties != 0) { - result += GetPropertyWithNumberConstrains( - NextSeparator() + " " + other_property_pattern, - object_spec.min_properties, - object_spec.max_properties, - 1 - ) + - " " + NextSeparator(true); - } - } - could_be_empty = object_spec.min_properties == 0; } indentManager_->EndIndent(); diff --git a/docs/xgrammar_features/json_schema.md b/docs/xgrammar_features/json_schema.md new file mode 100644 index 00000000..4d976d24 --- /dev/null +++ b/docs/xgrammar_features/json_schema.md @@ -0,0 +1,68 @@ +# Supported JSON Schema + +XGrammar provides common and robust support for converting JSON Schema into EBNF grammars, enabling precise, constraint-based generation and validation of JSON data. This document outlines the supported features. For more detailed information of JSON schema, please refer to [JSON Schema](https://json-schema.org). + +## Generic Keywords + +* `type`: All json schema types are supported, including `string`, `number`, `integer`, `object`, `array`, `boolean`, and `null`. + +* `enum` + +* `const` + +## Applicators +* `anyOf`: Fully supported. + +* `oneOf`: Be regarded as `anyOf`. + +* `allOf`: Only one schema is supported now. + +## Referencing +* `$ref`: Supports internal schema references. E.g. `"#/$defs/user"` refers to the `"user"` field of the `"$defs"` field of the global json schema object. + +## String + +* `minLength` / `maxLength` + +* `pattern`: Regular expressions are supported. Lookahead and lookbehind zero-length assertions are ignored. + +* `format`: All formats are supported, including: + - `date`, `time`, `date-time`, `duration`, `email`, `ipv4`, `ipv6`, `hostname`, `uuid`, `uri`, `uri-reference`, `uri-template`, `json-pointer`, `relative-json-pointer`. + +## Integer + +* `minimum` / `maximum` + +* `exclusiveMinimum` / `exclusiveMaximum` + +## Number + +* `minimum` / `maximum`: The precision is $10^{−6}$. + +* `exclusiveMinimum` / `exclusiveMaximum`: The precision is $10^{−6}$. + +## Array + +* `items` + +* `prefixItems` + +* `minItems` / `maxItems` + +* `unevaluatedItems` + +## Object + +* `properties` + +* `required` + +* `additionalProperties` + +* `unevaluatedProperties` + +* `minProperties` / `maxProperties` + +* `patternProperties`: Only supports `additionalProperties=False` and will be ignored when `properties` or `required` is set. + +* `propertyNames`: Only supports `additionalProperties=False` and will be ignored when `properties`, `required` or `patternProperties` is set.