Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions common/json-schema-to-grammar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,15 +416,30 @@ class common_schema_converter {
i++;
} else if (c == '(') {
i++;
if (i < length) {
if (sub_pattern[i] == '?') {
if (i < length && sub_pattern[i] == '?') {
if (i + 1 < length && sub_pattern[i + 1] == ':') {
i += 2; // skip "?:" for non-capturing group, treat as regular group
} else {
// lookahead/lookbehind (?=, ?!, ?<=, ?<!) - not supported
_warnings.push_back("Unsupported pattern syntax");
// skip to matching ')' to avoid UB on empty seq
int depth = 1;
while (i < length && depth > 0) {
if (sub_pattern[i] == '\\' && i + 1 < length) {
i += 2; // skip escaped character
} else {
if (sub_pattern[i] == '(') depth++;
else if (sub_pattern[i] == ')') depth--;
i++;
}
}
continue;
}
}
seq.emplace_back("(" + to_rule(transform()) + ")", false);
} else if (c == ')') {
i++;
if (start > 0 && sub_pattern[start - 1] != '(') {
if (start > 0 && sub_pattern[start - 1] != '(' && (start < 2 || sub_pattern[start - 2] != '?' || sub_pattern[start - 1] != ':')) {
_errors.push_back("Unbalanced parentheses");
}
return join_seq();
Expand Down
41 changes: 41 additions & 0 deletions tests/test-json-schema-to-grammar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1525,6 +1525,47 @@ int main() {
}
});

// C++ only tests (features not yet supported in JS/Python implementations)
{
fprintf(stderr, "#\n# Testing C++ only features\n#\n");
auto run = [](const TestCase & tc) {
fprintf(stderr, "- %s\n", tc.name.c_str());
try {
tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema), true));
tc.verify_status(SUCCESS);
} catch (const std::invalid_argument & ex) {
fprintf(stderr, "Error: %s\n", ex.what());
tc.verify_status(FAILURE);
}
};

run({
SUCCESS,
"regexp with non-capturing group",
R"""({
"type": "string",
"pattern": "^(?:foo|bar)baz$"
})""",
R"""(
root ::= "\"" (("foo" | "bar") "baz") "\"" space
space ::= | " " | "\n"{1,2} [ \t]{0,20}
)""",
});

run({
SUCCESS,
"regexp with nested non-capturing groups",
R"""({
"type": "string",
"pattern": "^(?:(?:ab)+c)?d$"
})""",
R"""(
root ::= "\"" ((("ab")+ "c")? "d") "\"" space
space ::= | " " | "\n"{1,2} [ \t]{0,20}
)""",
});
}

if (getenv("LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR")) {
fprintf(stderr, "\033[33mWARNING: Skipping slow tests on emulator.\n\033[0m");
} else {
Expand Down
Loading