From 9cd07c2f9d575096cd68d3cfee5bc84b25803163 Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Fri, 12 Apr 2024 16:17:07 -0400 Subject: [PATCH 01/10] Cleaning up integration tests to share code between tests and make it simpler to add new tests. --- tests/test-grammar-integration.cpp | 113 +++++++++-------------------- 1 file changed, 33 insertions(+), 80 deletions(-) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 2d8f228e376..26e39eefa84 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -11,14 +11,8 @@ #include #include -static void test_simple_grammar() { - // Test case for a simple grammar - const std::string grammar_str = R"""(root ::= expr -expr ::= term ("+" term)* -term ::= number -number ::= [0-9]+)"""; - - grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str()); +static llama_grammar* build_grammar(const std::string & grammar_str) { + auto parsed_grammar = grammar_parser::parse(grammar_str.c_str()); // Ensure we parsed correctly assert(!parsed_grammar.rules.empty()); @@ -30,8 +24,10 @@ number ::= [0-9]+)"""; llama_grammar* grammar = llama_grammar_init( grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root")); - std::string input = "123+456"; + return grammar; +} +static bool match_string(const std::string & input, llama_grammar* grammar) { auto decoded = decode_utf8(input, {}); const auto & code_points = decoded.first; @@ -39,19 +35,34 @@ number ::= [0-9]+)"""; for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { auto prev_stacks = grammar->stacks; llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks); - assert(!grammar->stacks.empty()); + if (grammar->stacks.empty()) { + // no stacks means that the grammar failed to match at this point + return false; + } } - bool completed_grammar = false; - for (const auto & stack : grammar->stacks) { if (stack.empty()) { - completed_grammar = true; - break; + // An empty stack means that the grammar has been completed + return true; } } - assert(completed_grammar); + return false; +} + +static void test_simple_grammar() { + // Test case for a simple grammar + const std::string grammar_str = R"""(root ::= expr +expr ::= term ("+" term)* +term ::= number +number ::= [0-9]+)"""; + + auto grammar = build_grammar(grammar_str); + + bool matched = match_string("123+456", grammar); + + assert(matched); // Clean up allocated memory llama_grammar_free(grammar); @@ -68,17 +79,7 @@ variable ::= [a-zA-Z_][a-zA-Z0-9_]* function-call ::= variable ws "(" (expression ("," ws expression)*)? ")" ws ::= [ \t\n\r]?)"""; - grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str()); - - // Ensure we parsed correctly - assert(!parsed_grammar.rules.empty()); - - // Ensure we have a root node - assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end())); - - std::vector grammar_rules(parsed_grammar.c_rules()); - llama_grammar* grammar = llama_grammar_init( - grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root")); + auto grammar = build_grammar(grammar_str); // Save the original grammar stacks so that we can reset after every new string we want to test auto original_stacks = grammar->stacks; @@ -130,35 +131,9 @@ ws ::= [ \t\n\r]?)"""; // Passing strings for (const auto & test_string : test_strings_pass) { - auto decoded = decode_utf8(test_string, {}); - - const auto & code_points = decoded.first; - - int pos = 0; - for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { - ++pos; - auto prev_stacks = grammar->stacks; - llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks); - - // Expect that each code point will not cause the grammar to fail - if (grammar->stacks.empty()) { - fprintf(stdout, "Error at position %d\n", pos); - fprintf(stderr, "Unexpected character '%s'\n", unicode_cpt_to_utf8(*it).c_str()); - fprintf(stderr, "Input string is %s:\n", test_string.c_str()); - } - assert(!grammar->stacks.empty()); - } - - bool completed_grammar = false; - - for (const auto & stack : grammar->stacks) { - if (stack.empty()) { - completed_grammar = true; - break; - } - } + bool matched = match_string(test_string, grammar); - assert(completed_grammar); + assert(matched); // Reset the grammar stacks grammar->stacks = original_stacks; @@ -166,32 +141,9 @@ ws ::= [ \t\n\r]?)"""; // Failing strings for (const auto & test_string : test_strings_fail) { - auto decoded = decode_utf8(test_string, {}); - - const auto & code_points = decoded.first; - bool parse_failed = false; - - for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { - auto prev_stacks = grammar->stacks; - llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks); - if (grammar->stacks.empty()) { - parse_failed = true; - break; - } - assert(!grammar->stacks.empty()); - } - - bool completed_grammar = false; - - for (const auto & stack : grammar->stacks) { - if (stack.empty()) { - completed_grammar = true; - break; - } - } + bool matched = match_string(test_string, grammar); - // Ensure that the grammar is not completed, or that each string failed to match as-expected - assert((!completed_grammar) || parse_failed); + assert(!matched); // Reset the grammar stacks grammar->stacks = original_stacks; @@ -231,7 +183,7 @@ number ::= [0-9]+)"""; // Ensure we did NOT parsed correctly assert(parsed_grammar.rules.empty()); - fprintf(stderr, "End of expected error. Test successful.\n"); + fprintf(stderr, "End of expected error.\n"); } int main() { @@ -239,5 +191,6 @@ int main() { test_complex_grammar(); test_failure_missing_root(); test_failure_missing_reference(); + fprintf(stdout, "All tests passed.\n"); return 0; } From ab606c030f9d32d45829a97a9263cad3025f6e3a Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Fri, 12 Apr 2024 16:35:10 -0400 Subject: [PATCH 02/10] Add tests around quantifiers to ensure both matching and non-matching compliance. --- tests/test-grammar-integration.cpp | 110 +++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 26e39eefa84..f6ec8258720 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -153,6 +153,115 @@ ws ::= [ \t\n\r]?)"""; llama_grammar_free(grammar); } +static void test_quantifiers() { + // Populate test data with grammar strings and their associated collections of expected passing and failing strings + const std::vector< + std::tuple< + std::string, + std::vector, + std::vector>> + test_data = { + { + // Grammar + R"""(root ::= "a"*)""", + // Passing strings + { + "", + "a", + "aaaaa", + "aaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + }, + // Failing strings + { + "b", + "ab", + "aab", + "ba", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" + } + }, + { + // Grammar + R"""(root ::= "a"+)""", + // Passing strings + { + "a", + "aaaaa", + "aaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + }, + // Failing strings + { + "", + "b", + "ab", + "aab", + "ba", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" + } + }, + { + // Grammar + R"""(root ::= "a"?)""", + // Passing strings + { + "", + "a" + }, + // Failing strings + { + "b", + "ab", + "aa", + "ba", + } + } + }; + + for (const auto & test_datum : test_data) { + const auto & [grammar_str, passing_strings, failing_strings] = test_datum; + + auto grammar = build_grammar(grammar_str); + + // Save the original grammar stacks so that we can reset after every new string we want to test + auto original_stacks = grammar->stacks; + + // Passing strings + for (const auto & test_string : passing_strings) { + bool matched = match_string(test_string, grammar); + + if (!matched) { + fprintf(stderr, "Against grammar: %s\n", grammar_str.c_str()); + fprintf(stderr, "Failed to match string: %s\n", test_string.c_str()); + } + + assert(matched); + + // Reset the grammar stacks + grammar->stacks = original_stacks; + } + + // Failing strings + for (const auto & test_string : failing_strings) { + bool matched = match_string(test_string, grammar); + + if (matched) { + fprintf(stderr, "Against grammar: %s\n", grammar_str.c_str()); + fprintf(stderr, "Improperly matched string: %s\n", test_string.c_str()); + } + + assert(!matched); + + // Reset the grammar stacks + grammar->stacks = original_stacks; + } + + // Clean up allocated memory + llama_grammar_free(grammar); + } +} + static void test_failure_missing_root() { // Test case for a grammar that is missing a root rule const std::string grammar_str = R"""(rot ::= expr @@ -189,6 +298,7 @@ number ::= [0-9]+)"""; int main() { test_simple_grammar(); test_complex_grammar(); + test_quantifiers(); test_failure_missing_root(); test_failure_missing_reference(); fprintf(stdout, "All tests passed.\n"); From 8f17815ca8bc9566f8746d76895becbbe3a33301 Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Fri, 12 Apr 2024 16:49:05 -0400 Subject: [PATCH 03/10] Add slightly more complex grammar with quantifiers to test references with quantifiers. --- tests/test-grammar-integration.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index f6ec8258720..28e8992d32d 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -216,6 +216,30 @@ static void test_quantifiers() { "aa", "ba", } + }, + { + // Grammar + R"""( +root ::= cons+ vowel* cons? (vowel cons)* +vowel ::= [aeiouy] +cons ::= [bcdfghjklmnpqrstvwxyz] +)""", + // Passing strings + { + "yes", + "no", + "noyes", + "crwth", + "four", + "bryyyy", + }, + // Failing strings + { + "yess", + "yesno", + "forty", + "catyyy", + } } }; From 7b04c215ebc6c741cc8245c2ba32a61288f9c369 Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Fri, 12 Apr 2024 22:04:45 -0400 Subject: [PATCH 04/10] Fixing build when C++17 is not present. --- tests/test-grammar-integration.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 28e8992d32d..93586486c94 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -10,6 +10,7 @@ #include "unicode.h" #include #include +#include static llama_grammar* build_grammar(const std::string & grammar_str) { auto parsed_grammar = grammar_parser::parse(grammar_str.c_str()); @@ -244,7 +245,9 @@ cons ::= [bcdfghjklmnpqrstvwxyz] }; for (const auto & test_datum : test_data) { - const auto & [grammar_str, passing_strings, failing_strings] = test_datum; + const auto & grammar_str = std::get<0>(test_datum); + const auto & passing_strings = std::get<1>(test_datum); + const auto & failing_strings = std::get<2>(test_datum); auto grammar = build_grammar(grammar_str); From b7f24dba00bb6936b8d7e459c6a40f19fb3d486d Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Sat, 27 Apr 2024 09:38:45 -0400 Subject: [PATCH 05/10] Separating test calls to give more helpful stack traces on failure. Adding verbose messages to give visibility for what is being tested. --- tests/test-grammar-integration.cpp | 371 ++++++++++++++--------------- 1 file changed, 173 insertions(+), 198 deletions(-) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 93586486c94..cc326e4aabe 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -52,88 +52,24 @@ static bool match_string(const std::string & input, llama_grammar* grammar) { return false; } -static void test_simple_grammar() { - // Test case for a simple grammar - const std::string grammar_str = R"""(root ::= expr -expr ::= term ("+" term)* -term ::= number -number ::= [0-9]+)"""; - - auto grammar = build_grammar(grammar_str); - - bool matched = match_string("123+456", grammar); - - assert(matched); - - // Clean up allocated memory - llama_grammar_free(grammar); -} - -static void test_complex_grammar() { - // Test case for a more complex grammar, with both failure strings and success strings - const std::string grammar_str = R"""(root ::= expression -expression ::= term ws (("+"|"-") ws term)* -term ::= factor ws (("*"|"/") ws factor)* -factor ::= number | variable | "(" expression ")" | function-call -number ::= [0-9]+ -variable ::= [a-zA-Z_][a-zA-Z0-9_]* -function-call ::= variable ws "(" (expression ("," ws expression)*)? ")" -ws ::= [ \t\n\r]?)"""; +static void test_grammar(const std::string & grammar_str, const std::vector & passing_strings, const std::vector & failing_strings) { + fprintf(stderr, "🟢 Testing grammar: %s\n", grammar_str.c_str()); auto grammar = build_grammar(grammar_str); // Save the original grammar stacks so that we can reset after every new string we want to test auto original_stacks = grammar->stacks; - // Test a few strings - std::vector test_strings_pass = { - "42", - "1*2*3*4*5", - "x", - "x+10", - "x1+y2", - "(a+b)*(c-d)", - "func()", - "func(x,y+2)", - "a*(b+c)-d/e", - "f(g(x),h(y,z))", - "x + 10", - "x1 + y2", - "(a + b) * (c - d)", - "func()", - "func(x, y + 2)", - "a * (b + c) - d / e", - "f(g(x), h(y, z))", - "123+456", - "123*456*789-123/456+789*123", - "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456" - }; - - std::vector test_strings_fail = { - "+", - "/ 3x", - "x + + y", - "a * / b", - "func(,)", - "func(x y)", - "(a + b", - "x + y)", - "a + b * (c - d", - "42 +", - "x +", - "x + 10 +", - "(a + b) * (c - d", - "func(", - "func(x, y + 2", - "a * (b + c) - d /", - "f(g(x), h(y, z)", - "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/", - }; - // Passing strings - for (const auto & test_string : test_strings_pass) { + for (const auto & test_string : passing_strings) { bool matched = match_string(test_string, grammar); + if (!matched) { + fprintf(stderr, " ❌ Failed to match string: %s\n", test_string.c_str()); + } else { + fprintf(stdout, " ✅︎ Matched string: %s\n", test_string.c_str()); + } + assert(matched); // Reset the grammar stacks @@ -141,9 +77,14 @@ ws ::= [ \t\n\r]?)"""; } // Failing strings - for (const auto & test_string : test_strings_fail) { + for (const auto & test_string : failing_strings) { bool matched = match_string(test_string, grammar); + if (matched) { + fprintf(stderr, " ❌ Improperly matched string: %s\n", test_string.c_str()); + } else { + fprintf(stdout, " ✅︎ Correctly did not match string: %s\n", test_string.c_str()); + } assert(!matched); // Reset the grammar stacks @@ -154,142 +95,170 @@ ws ::= [ \t\n\r]?)"""; llama_grammar_free(grammar); } -static void test_quantifiers() { - // Populate test data with grammar strings and their associated collections of expected passing and failing strings - const std::vector< - std::tuple< - std::string, - std::vector, - std::vector>> - test_data = { +static void test_simple_grammar() { + // Test case for a simple grammar + const std::string grammar_str = R"""(root ::= expr +expr ::= term ("+" term)* +term ::= number +number ::= [0-9]+)"""; + + auto grammar = build_grammar(grammar_str); + + bool matched = match_string("123+456", grammar); + + assert(matched); + + // Clean up allocated memory + llama_grammar_free(grammar); +} + +static void test_complex_grammar() { + // Test case for a more complex grammar, with both failure strings and success strings + test_grammar( + // Grammar + R"""( + root ::= expression + expression ::= term ws (("+"|"-") ws term)* + term ::= factor ws (("*"|"/") ws factor)* + factor ::= number | variable | "(" expression ")" | function-call + number ::= [0-9]+ + variable ::= [a-zA-Z_][a-zA-Z0-9_]* + function-call ::= variable ws "(" (expression ("," ws expression)*)? ")" + ws ::= [ \t\n\r]?)""", + // Passing strings { - // Grammar - R"""(root ::= "a"*)""", - // Passing strings - { - "", - "a", - "aaaaa", - "aaaaaaaaaaaaaaaaaa", - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - }, - // Failing strings - { - "b", - "ab", - "aab", - "ba", - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" - } + "42", + "1*2*3*4*5", + "x", + "x+10", + "x1+y2", + "(a+b)*(c-d)", + "func()", + "func(x,y+2)", + "a*(b+c)-d/e", + "f(g(x),h(y,z))", + "x + 10", + "x1 + y2", + "(a + b) * (c - d)", + "func()", + "func(x, y + 2)", + "a * (b + c) - d / e", + "f(g(x), h(y, z))", + "123+456", + "123*456*789-123/456+789*123", + "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456" }, + // Failing strings + { + "+", + "/ 3x", + "x + + y", + "a * / b", + "func(,)", + "func(x y)", + "(a + b", + "x + y)", + "a + b * (c - d", + "42 +", + "x +", + "x + 10 +", + "(a + b) * (c - d", + "func(", + "func(x, y + 2", + "a * (b + c) - d /", + "f(g(x), h(y, z)", + "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/", + } + ); +} + +static void test_quantifiers() { + // A collection of tests to exercise * + and ? quantifiers + + test_grammar( + // Grammar + R"""(root ::= "a"*)""", + // Passing strings { - // Grammar - R"""(root ::= "a"+)""", - // Passing strings - { - "a", - "aaaaa", - "aaaaaaaaaaaaaaaaaa", - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - }, - // Failing strings - { - "", - "b", - "ab", - "aab", - "ba", - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" - } + "", + "a", + "aaaaa", + "aaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" }, + // Failing strings + { + "b", + "ab", + "aab", + "ba", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" + } + ); + test_grammar( + // Grammar + R"""(root ::= "a"+)""", + // Passing strings { - // Grammar - R"""(root ::= "a"?)""", - // Passing strings - { - "", - "a" - }, - // Failing strings - { - "b", - "ab", - "aa", - "ba", - } + "a", + "aaaaa", + "aaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" }, + // Failing strings { - // Grammar - R"""( -root ::= cons+ vowel* cons? (vowel cons)* -vowel ::= [aeiouy] -cons ::= [bcdfghjklmnpqrstvwxyz] -)""", - // Passing strings - { - "yes", - "no", - "noyes", - "crwth", - "four", - "bryyyy", - }, - // Failing strings - { - "yess", - "yesno", - "forty", - "catyyy", - } + "", + "b", + "ab", + "aab", + "ba", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" } - }; - - for (const auto & test_datum : test_data) { - const auto & grammar_str = std::get<0>(test_datum); - const auto & passing_strings = std::get<1>(test_datum); - const auto & failing_strings = std::get<2>(test_datum); - - auto grammar = build_grammar(grammar_str); - - // Save the original grammar stacks so that we can reset after every new string we want to test - auto original_stacks = grammar->stacks; - + ); + test_grammar( + // Grammar + R"""(root ::= "a"?)""", // Passing strings - for (const auto & test_string : passing_strings) { - bool matched = match_string(test_string, grammar); - - if (!matched) { - fprintf(stderr, "Against grammar: %s\n", grammar_str.c_str()); - fprintf(stderr, "Failed to match string: %s\n", test_string.c_str()); - } - - assert(matched); - - // Reset the grammar stacks - grammar->stacks = original_stacks; + { + "", + "a" + }, + // Failing strings + { + "b", + "ab", + "aa", + "ba", } - + ); + test_grammar( + // Grammar + R"""( + root ::= cons+ vowel* cons? (vowel cons)* + vowel ::= [aeiouy] + cons ::= [bcdfghjklmnpqrstvwxyz] + )""", + // Passing strings + { + "yes", + "no", + "noyes", + "crwth", + "four", + "bryyyy", + }, // Failing strings - for (const auto & test_string : failing_strings) { - bool matched = match_string(test_string, grammar); - - if (matched) { - fprintf(stderr, "Against grammar: %s\n", grammar_str.c_str()); - fprintf(stderr, "Improperly matched string: %s\n", test_string.c_str()); - } - - assert(!matched); - - // Reset the grammar stacks - grammar->stacks = original_stacks; + { + "yess", + "yesno", + "forty", + "catyyy", } - - // Clean up allocated memory - llama_grammar_free(grammar); - } + ); } static void test_failure_missing_root() { + fprintf(stderr, "🟢 Testing for missing root node:\n"); // Test case for a grammar that is missing a root rule const std::string grammar_str = R"""(rot ::= expr expr ::= term ("+" term)* @@ -303,26 +272,32 @@ number ::= [0-9]+)"""; // Ensure we do NOT have a root node assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()); + fprintf(stderr, " ✅︎ Passed\n"); } static void test_failure_missing_reference() { + fprintf(stderr, "🟢 Testing for missing reference node:\n"); + // Test case for a grammar that is missing a referenced rule - const std::string grammar_str = R"""(root ::= expr + const std::string grammar_str = +R"""(root ::= expr expr ::= term ("+" term)* term ::= numero number ::= [0-9]+)"""; - fprintf(stderr, "Expected error: "); + fprintf(stderr, " Expected error: "); grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str()); // Ensure we did NOT parsed correctly assert(parsed_grammar.rules.empty()); - fprintf(stderr, "End of expected error.\n"); + fprintf(stderr, " End of expected error.\n"); + fprintf(stderr, " ✅︎ Passed\n"); } int main() { + fprintf(stdout, "Running grammar integration tests...\n"); test_simple_grammar(); test_complex_grammar(); test_quantifiers(); From e3218de5a5f0ae0de7fc0fc1aba597fc2c27a3e9 Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Sat, 27 Apr 2024 09:45:53 -0400 Subject: [PATCH 06/10] Adding quotes around strings to explicitly show whitespace --- tests/test-grammar-integration.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index cc326e4aabe..44e43b91a0a 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -65,9 +65,9 @@ static void test_grammar(const std::string & grammar_str, const std::vector Date: Sat, 27 Apr 2024 09:47:54 -0400 Subject: [PATCH 07/10] Removing trailing whitespace. --- tests/test-grammar-integration.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 44e43b91a0a..fe43ad552fc 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -279,7 +279,7 @@ static void test_failure_missing_reference() { fprintf(stderr, "🟢 Testing for missing reference node:\n"); // Test case for a grammar that is missing a referenced rule - const std::string grammar_str = + const std::string grammar_str = R"""(root ::= expr expr ::= term ("+" term)* term ::= numero From 7fe2fb3fed23ff2122790c50394e9c717859032e Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Mon, 29 Apr 2024 14:17:59 -0400 Subject: [PATCH 08/10] Implementing suggestions from @ochafik -- grammars and test strings now print and flush before tests to aid in debugging segfaults and whatnot. --- tests/test-grammar-integration.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index fe43ad552fc..51e59f05a84 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -53,21 +53,27 @@ static bool match_string(const std::string & input, llama_grammar* grammar) { } static void test_grammar(const std::string & grammar_str, const std::vector & passing_strings, const std::vector & failing_strings) { - fprintf(stderr, "🟢 Testing grammar: %s\n", grammar_str.c_str()); + fprintf(stderr, "⚪ Testing grammar: %s\n", grammar_str.c_str()); + fflush(stderr); auto grammar = build_grammar(grammar_str); // Save the original grammar stacks so that we can reset after every new string we want to test auto original_stacks = grammar->stacks; + fprintf(stderr, " Checking valid strings:\n"); + // Passing strings for (const auto & test_string : passing_strings) { + fprintf(stderr, " \"%s\" ", test_string.c_str()); + fflush(stderr); + bool matched = match_string(test_string, grammar); if (!matched) { - fprintf(stderr, " ❌ Failed to match string: \"%s\"\n", test_string.c_str()); + fprintf(stderr, "❌ (failed to match)\n"); } else { - fprintf(stdout, " ✅︎ Matched string: \"%s\"\n", test_string.c_str()); + fprintf(stdout, "✅︎\n"); } assert(matched); @@ -76,14 +82,19 @@ static void test_grammar(const std::string & grammar_str, const std::vectorstacks = original_stacks; } + fprintf(stderr, " Checking invalid strings:\n"); + // Failing strings for (const auto & test_string : failing_strings) { + fprintf(stderr, " \"%s\" ", test_string.c_str()); + fflush(stderr); + bool matched = match_string(test_string, grammar); if (matched) { - fprintf(stderr, " ❌ Improperly matched string: \"%s\"\n", test_string.c_str()); + fprintf(stderr, "❌ (incorrectly matched)\n"); } else { - fprintf(stdout, " ✅︎ Correctly did not match string: \"%s\"\n", test_string.c_str()); + fprintf(stdout, "✅︎\n"); } assert(!matched); From b712ae4047e0d77e4d9b579b13b6c053c821b55d Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Mon, 29 Apr 2024 14:29:42 -0400 Subject: [PATCH 09/10] Cleaning up forgotten symbols. Modifying simple test to use test harness. Added comments for more verbose descriptions of what each test is accomplishing. --- tests/test-grammar-integration.cpp | 51 +++++++++++++++++++----------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 51e59f05a84..39434fcf4e3 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -52,8 +52,8 @@ static bool match_string(const std::string & input, llama_grammar* grammar) { return false; } -static void test_grammar(const std::string & grammar_str, const std::vector & passing_strings, const std::vector & failing_strings) { - fprintf(stderr, "⚪ Testing grammar: %s\n", grammar_str.c_str()); +static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector & passing_strings, const std::vector & failing_strings) { + fprintf(stderr, "⚪ Testing %s. Grammar: %s\n", test_desc.c_str(), grammar_str.c_str()); fflush(stderr); auto grammar = build_grammar(grammar_str); @@ -61,7 +61,7 @@ static void test_grammar(const std::string & grammar_str, const std::vectorstacks; - fprintf(stderr, " Checking valid strings:\n"); + fprintf(stderr, " Valid strings:\n"); // Passing strings for (const auto & test_string : passing_strings) { @@ -82,7 +82,7 @@ static void test_grammar(const std::string & grammar_str, const std::vectorstacks = original_stacks; } - fprintf(stderr, " Checking invalid strings:\n"); + fprintf(stderr, " Invalid strings:\n"); // Failing strings for (const auto & test_string : failing_strings) { @@ -108,24 +108,33 @@ static void test_grammar(const std::string & grammar_str, const std::vector Date: Mon, 29 Apr 2024 14:38:06 -0400 Subject: [PATCH 10/10] Unicode symbol modifications to hopefully make log easier to parse visually. --- tests/test-grammar-integration.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 39434fcf4e3..1a4004e2ab1 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -53,7 +53,7 @@ static bool match_string(const std::string & input, llama_grammar* grammar) { } static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector & passing_strings, const std::vector & failing_strings) { - fprintf(stderr, "⚪ Testing %s. Grammar: %s\n", test_desc.c_str(), grammar_str.c_str()); + fprintf(stderr, "⚫ Testing %s. Grammar: %s\n", test_desc.c_str(), grammar_str.c_str()); fflush(stderr); auto grammar = build_grammar(grammar_str); @@ -61,7 +61,7 @@ static void test_grammar(const std::string & test_desc, const std::string & gram // Save the original grammar stacks so that we can reset after every new string we want to test auto original_stacks = grammar->stacks; - fprintf(stderr, " Valid strings:\n"); + fprintf(stderr, " 🔵 Valid strings:\n"); // Passing strings for (const auto & test_string : passing_strings) { @@ -82,7 +82,7 @@ static void test_grammar(const std::string & test_desc, const std::string & gram grammar->stacks = original_stacks; } - fprintf(stderr, " Invalid strings:\n"); + fprintf(stderr, " 🟠 Invalid strings:\n"); // Failing strings for (const auto & test_string : failing_strings) { @@ -282,7 +282,7 @@ static void test_quantifiers() { } static void test_failure_missing_root() { - fprintf(stderr, "⚪ Testing missing root node:\n"); + fprintf(stderr, "⚫ Testing missing root node:\n"); // Test case for a grammar that is missing a root rule const std::string grammar_str = R"""(rot ::= expr expr ::= term ("+" term)* @@ -300,7 +300,7 @@ number ::= [0-9]+)"""; } static void test_failure_missing_reference() { - fprintf(stderr, "⚪ Testing missing reference node:\n"); + fprintf(stderr, "⚫ Testing missing reference node:\n"); // Test case for a grammar that is missing a referenced rule const std::string grammar_str =