Skip to content
Merged
Show file tree
Hide file tree
Changes from 45 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
1f4b310
Kinda working.
SharafMohamed Nov 14, 2025
dfa411e
Merge branch 'main' into log-event-boundary-new
SharafMohamed Nov 28, 2025
3ad08ea
Fix edge cases; Update tests.
SharafMohamed Dec 3, 2025
2cc83c9
Update test docstring.
SharafMohamed Dec 3, 2025
3a21375
Fix examples/common.cpp.
SharafMohamed Dec 3, 2025
fc520fd
Cleanup.
SharafMohamed Dec 3, 2025
caca5bc
Add clarifying comment.
SharafMohamed Dec 3, 2025
dd8e45e
Fix accessors for timestamp in output buffer.
SharafMohamed Dec 3, 2025
db78cd6
Fix caching.
SharafMohamed Dec 3, 2025
1351a71
reset m_has_timestamp.
SharafMohamed Dec 3, 2025
1f11aa2
Always get capture_string on wrap around.
SharafMohamed Dec 3, 2025
25b2874
Remove m_has_timestamp and replace it with optional m_timestamp.
SharafMohamed Dec 15, 2025
4bd25cb
Update reader parser to use header check.
SharafMohamed Dec 15, 2025
80376bc
Compile examples.
SharafMohamed Dec 15, 2025
db50803
Replace macros.
SharafMohamed Dec 15, 2025
3d02572
Have get_reg_ids_from_capture throw instead of returning optional.
SharafMohamed Dec 15, 2025
0dfd168
Update readme.
SharafMohamed Dec 15, 2025
82bfd86
Update example schema.
SharafMohamed Dec 15, 2025
a64e9b5
Update readme again.
SharafMohamed Dec 15, 2025
39ebb8f
Improve example schema.
SharafMohamed Dec 15, 2025
67d6e72
Improve example schema.
SharafMohamed Dec 15, 2025
df74af5
Remove extra : in example schema.
SharafMohamed Dec 15, 2025
494f8d1
Fix wording.
SharafMohamed Dec 15, 2025
725a71e
Return captures as tokens to prevent invalidating cache during nested…
SharafMohamed Dec 16, 2025
482df96
Merge branch 'main' into log-event-boundary-new
SharafMohamed Dec 16, 2025
919b598
Update readme to use as $txt$ placeholders and add examples to make i…
SharafMohamed Dec 17, 2025
bd0f4fd
Rename get_capture_token to get_sub_token.
SharafMohamed Dec 17, 2025
23d466c
Add newline.
SharafMohamed Dec 17, 2025
6cb38ea
Update get_sub_token.
SharafMohamed Dec 17, 2025
5c17c15
Allow for non escaped hyphens outside of ranges.
SharafMohamed Dec 17, 2025
7b4d618
Remove unused header.
SharafMohamed Dec 18, 2025
aa61693
Remove unused header.
SharafMohamed Dec 18, 2025
6771a4f
Replace txt with plaintext.
SharafMohamed Dec 18, 2025
48ea212
Merge branch 'log-event-boundary-new' into hyphen_fix
SharafMohamed Dec 18, 2025
c1e4390
Fix cmakelists.
SharafMohamed Dec 18, 2025
791e83e
Fix cmakelists.
SharafMohamed Dec 18, 2025
a47ab26
Merge branch 'main' into log-event-boundary-new
SharafMohamed Dec 19, 2025
351bd18
Merge branch 'main' into log-event-boundary-new
davidlion Dec 19, 2025
096ca4b
Merge branch 'main' into log-event-boundary-new
SharafMohamed Dec 19, 2025
9e538b7
Merge branch 'log-event-boundary-new' into hyphen_fix
SharafMohamed Dec 19, 2025
9a7d974
Merge branch 'log-event-boundary-new' of https://github.com/SharafMoh…
SharafMohamed Dec 19, 2025
3b4dd08
Lint.
SharafMohamed Dec 19, 2025
9002260
Merge branch 'log-event-boundary-new' into hyphen_fix
SharafMohamed Dec 19, 2025
fdc5178
Merge branch 'main' into hyphen_fix
SharafMohamed Dec 19, 2025
95aa1d2
Remove escaped hyphens.
SharafMohamed Dec 19, 2025
2a61ca7
update schema.md for hyphens.
SharafMohamed Dec 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,11 @@ header:Log (?<pid>\d+) (?<timestamp>\[\d{8}\-\d{2}:\d{2}:\d{2}\]){0,1}
delimiters: \t\r\n:,!;%

// Keywords
header:(?<timestamp>\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}(\.\d{3}){0,1})
header:Log (?<pid>\d+) (?<timestamp>\[\d{8}\-\d{2}:\d{2}:\d{2}\]){0,1}
header:(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(\.\d{3}){0,1})
header:Log (?<pid>\d+) (?<timestamp>\[\d{8}-\d{2}:\d{2}:\d{2}\]){0,1}
header:--- Log:
int:\-{0,1}\d+
float:\-{0,1}\d+\.\d+
int:-{0,1}\d+
float:-{0,1}\d+\.\d+

// Custom variables
hex:[a-fA-F]+
Expand Down
18 changes: 1 addition & 17 deletions src/log_surgeon/SchemaParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,6 @@ auto SchemaParser::add_lexical_rules() -> void {
}

auto SchemaParser::add_productions() -> void {
// add_production("Schema", {}, new_schema_rule);
add_production("Schema", {"Comment"}, new_schema_rule);
add_production("Schema", {"SchemaVar"}, new_schema_rule_with_var);
add_production(
Expand Down Expand Up @@ -617,6 +616,7 @@ auto SchemaParser::add_productions() -> void {
add_production("CompleteGroup", {"Wildcard"}, regex_identity_rule);
add_production("CompleteGroup", {"Shorthand"}, regex_identity_rule);
add_production("CompleteGroup", {"Literal"}, regex_identity_rule);
add_production("CompleteGroup", {"Dash"}, regex_literal_rule);
add_production(
"IncompleteGroup",
{"IncompleteGroup", "LiteralRange"},
Expand Down Expand Up @@ -645,31 +645,15 @@ auto SchemaParser::add_productions() -> void {
add_production("Literal", {"Percent"}, regex_literal_rule);
add_production("Literal", {"Ampersand"}, regex_literal_rule);
add_production("Literal", {"Apostrophe"}, regex_literal_rule);
add_production("Literal", {"Backslash", "Lparen"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "Rparen"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "Star"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "Plus"}, regex_cancel_literal_rule);
add_production("Literal", {"Comma"}, regex_literal_rule);
add_production("Literal", {"Backslash", "Dash"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "Dot"}, regex_cancel_literal_rule);
add_production("Literal", {"ForwardSlash"}, regex_literal_rule);
add_production("Literal", {"AlphaNumeric"}, regex_literal_rule);
add_production("Literal", {"Colon"}, regex_literal_rule);
add_production("Literal", {"SemiColon"}, regex_literal_rule);
add_production("Literal", {"Equal"}, regex_literal_rule);
add_production("Literal", {"At"}, regex_literal_rule);
add_production("Literal", {"Backslash", "Lbracket"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "Backslash"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "Rbracket"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "Hat"}, regex_cancel_literal_rule);
add_production("Literal", {"Underscore"}, regex_literal_rule);
add_production("Literal", {"Backtick"}, regex_literal_rule);
add_production("Literal", {"Backslash", "Lbrace"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "Vbar"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "Rbrace"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "Langle"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "Rangle"}, regex_cancel_literal_rule);
add_production("Literal", {"Backslash", "QuestionMark"}, regex_cancel_literal_rule);
add_production("Literal", {"Tilde"}, regex_literal_rule);
add_production(
"Literal",
Expand Down
36 changes: 18 additions & 18 deletions tests/test-buffer-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,10 +424,10 @@ TEST_CASE("single_line_with_optional_capture", "[BufferParser]") {
TEST_CASE("single_line_with_clp_default_vars", "[BufferParser]") {
constexpr string_view cDelimitersSchema{R"(delimiters: \n\r[:,)"};
constexpr string_view cVarSchema1{
R"(header:(?<timestamp>(\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}[,\.]\d{0,3})))"
R"(header:(?<timestamp>(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}[,\.]\d{0,3})))"
};
constexpr string_view cVarSchema2{R"(int:\-{0,1}[0-9]+)"};
constexpr string_view cVarSchema3{R"(float:\-{0,1}[0-9]+\.[0-9]+)"};
constexpr string_view cVarSchema2{R"(int:-{0,1}[0-9]+)"};
constexpr string_view cVarSchema3{R"(float:-{0,1}[0-9]+\.[0-9]+)"};
constexpr string_view cVarSchema4{R"(hex:[a-fA-F]+)"};
constexpr string_view cVarSchema5{
R"(keyValuePair:[^ \r\n=]+=(?<val>[^ \r\n]*[A-Za-z0-9][^ \r\n]*))"
Expand Down Expand Up @@ -492,7 +492,7 @@ TEST_CASE("single_line_with_clp_default_vars", "[BufferParser]") {
* ### Schema Definition
* @code
* delimiters: \n\r[:,
* int: \-{0,1}[0-9]+
* int: -{0,1}[0-9]+
* @endcode
*
* ### Test Input
Expand All @@ -516,7 +516,7 @@ TEST_CASE("single_line_with_clp_default_vars", "[BufferParser]") {
*/
TEST_CASE("multi_line_with_newline_static_var_sequence", "[BufferParser]") {
constexpr string_view cDelimitersSchema{R"(delimiters: \n\r[:,)"};
constexpr string_view cVarSchema{R"(int:\-{0,1}[0-9]+)"};
constexpr string_view cVarSchema{R"(int:-{0,1}[0-9]+)"};
constexpr string_view cInput{"1234567\nText 1234567"};
ExpectedEvent const expected_event1{
.m_logtype{R"(<int><newLine>)"},
Expand Down Expand Up @@ -549,7 +549,7 @@ TEST_CASE("multi_line_with_newline_static_var_sequence", "[BufferParser]") {
* ### Schema Definition
* @code
* delimiters: \n\r[:,
* int: \-{0,1}[0-9]+
* int: -{0,1}[0-9]+
* @endcode
*
* ### Test Input
Expand All @@ -574,7 +574,7 @@ TEST_CASE("multi_line_with_newline_static_var_sequence", "[BufferParser]") {
*/
TEST_CASE("multi_line_with_static_newline_static_var_sequence", "[BufferParser]") {
constexpr string_view cDelimitersSchema{R"(delimiters: \n\r[:,)"};
constexpr string_view cVarSchema{R"(int:\-{0,1}[0-9]+)"};
constexpr string_view cVarSchema{R"(int:-{0,1}[0-9]+)"};
constexpr string_view cInput{"1234567 abc\nText 1234567"};
ExpectedEvent const expected_event1{
.m_logtype{R"(<int> abc<newLine>)"},
Expand Down Expand Up @@ -605,7 +605,7 @@ TEST_CASE("multi_line_with_static_newline_static_var_sequence", "[BufferParser]"
* ### Schema Definition
* @code
* delimiters: \n\r[:,
* int: \-{0,1}[0-9]+
* int: -{0,1}[0-9]+
* @endcode
*
* ### Test Input
Expand All @@ -629,7 +629,7 @@ TEST_CASE("multi_line_with_static_newline_static_var_sequence", "[BufferParser]"
*/
TEST_CASE("multi_line_with_static_newline_var_sequence", "[BufferParser]") {
constexpr string_view cDelimitersSchema{R"(delimiters: \n\r[:,)"};
constexpr string_view cVarSchema{R"(int:\-{0,1}[0-9]+)"};
constexpr string_view cVarSchema{R"(int:-{0,1}[0-9]+)"};
constexpr string_view cInput{"1234567 abc\n1234567"};
ExpectedEvent const expected_event1{
.m_logtype{"<int> abc\n"},
Expand Down Expand Up @@ -662,7 +662,7 @@ TEST_CASE("multi_line_with_static_newline_var_sequence", "[BufferParser]") {
* ### Schema Definition
* @code
* delimiters: \n\r[:,
* int: \-{0,1}[0-9]+
* int: -{0,1}[0-9]+
* @endcode
*
* ### Test Input
Expand All @@ -687,7 +687,7 @@ TEST_CASE("multi_line_with_static_newline_var_sequence", "[BufferParser]") {
*/
TEST_CASE("multi_line_with_static_newline_var_newline_sequence", "[BufferParser]") {
constexpr string_view cDelimitersSchema{R"(delimiters: \n\r[:,)"};
constexpr string_view cVarSchema{R"(int:\-{0,1}[0-9]+)"};
constexpr string_view cVarSchema{R"(int:-{0,1}[0-9]+)"};
constexpr string_view cInput{"1234567 abc\n1234567\n"};
ExpectedEvent const expected_event1{
.m_logtype{"<int> abc\n"},
Expand Down Expand Up @@ -720,7 +720,7 @@ TEST_CASE("multi_line_with_static_newline_var_newline_sequence", "[BufferParser]
* ### Schema Definition
* @code
* delimiters: \n\r[:,
* int: \-{0,1}[0-9]+
* int: -{0,1}[0-9]+
* @endcode
*
* ### Input Example
Expand All @@ -744,7 +744,7 @@ TEST_CASE("multi_line_with_static_newline_var_newline_sequence", "[BufferParser]
*/
TEST_CASE("multi_line_with_delim_newline_var_sequence", "[BufferParser]") {
constexpr string_view cDelimitersSchema{R"(delimiters: \n\r[:,)"};
constexpr string_view cRule{R"(int:\-{0,1}[0-9]+)"};
constexpr string_view cRule{R"(int:-{0,1}[0-9]+)"};
constexpr string_view cInput{"1234567 \n1234567"};
ExpectedEvent const expected_event1{
.m_logtype{"<int> \n"},
Expand Down Expand Up @@ -923,7 +923,7 @@ TEST_CASE("multi_line_with_delimited_vars", "[BufferParser]") {
* ### Schema Definition
* @code
* delimiters: \n\r[:,
* header:(?<timestamp>\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}) (?<PID>\d{4}) (?<TID>\d{4})
* header:(?<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}) (?<PID>\d{4}) (?<TID>\d{4})
* \
* (?<LogLevel>I|D|E|W)
* @endcode
Expand All @@ -948,7 +948,7 @@ TEST_CASE("multi_line_with_delimited_vars", "[BufferParser]") {
*/
TEST_CASE("multi_capture_one", "[BufferParser]") {
constexpr string_view cDelimitersSchema{R"(delimiters: \n\r[:,)"};
constexpr string_view cTime{R"((?<timestamp>\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}))"};
constexpr string_view cTime{R"((?<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}))"};
constexpr string_view cPid{R"((?<PID>\d{4}))"};
constexpr string_view cTid{R"((?<TID>\d{4}))"};
constexpr string_view cLogLevel{R"((?<LogLevel>I|D|E|W))"};
Expand Down Expand Up @@ -995,7 +995,7 @@ TEST_CASE("multi_capture_one", "[BufferParser]") {
* @code
* delimiters: \n\r[:,
* header:(?<timestamp>[A-Za-z]{3} \d{2} \d{2}:\d{2}:\d{2})
* ip\-(?<IP>\d{3}\-\d{2}\-\d{2}\-\d{2}) \
* ip-(?<IP>\d{3}-\d{2}-\d{2}-\d{2}) \
* ku\[(?<PID>\d{4})\]: (?<LogLevel>I|D|E|W)(?<LID>\d{4}) \
* (?<LTime>\d{2}:\d{2}:\d{2}\.\d{4}) (?<TID>\d{4})
* @endcode
Expand All @@ -1020,7 +1020,7 @@ TEST_CASE("multi_capture_one", "[BufferParser]") {
TEST_CASE("multi_capture_two", "[BufferParser]") {
constexpr string_view cDelimitersSchema{R"(delimiters: \n\r[:,)"};
constexpr string_view cTime{R"((?<timestamp>[A-Za-z]{3} \d{2} \d{2}:\d{2}:\d{2}))"};
constexpr string_view cIp{R"((?<IP>\d{3}\-\d{2}\-\d{2}\-\d{2}))"};
constexpr string_view cIp{R"((?<IP>\d{3}-\d{2}-\d{2}-\d{2}))"};
constexpr string_view cPid{R"((?<PID>\d{4}))"};
constexpr string_view cLogLevel{R"((?<LogLevel>I|D|E|W))"};
constexpr string_view cLid{R"((?<LID>\d{4}))"};
Expand All @@ -1030,7 +1030,7 @@ TEST_CASE("multi_capture_two", "[BufferParser]") {
" 1111 Y failed"};

string const header_rule{fmt::format(
R"(header:{} ip\-{} ku\[{}\]: {}{} {} {})",
R"(header:{} ip-{} ku\[{}\]: {}{} {} {})",
cTime,
cIp,
cPid,
Expand Down
2 changes: 1 addition & 1 deletion tests/test-dfa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ TEST_CASE("multi_valued_capture_containing_repetition", "[DFA]") {
* @brief Create a DFA for matching an integer.
*/
TEST_CASE("int_var", "[DFA]") {
string const var_schema{"int:\\-{0,1}\\d+"};
string const var_schema{"int:-{0,1}\\d+"};
string const expected_serialized_dfa{
"0:byte_transitions={--()->1,0-()->2,1-()->2,2-()->2,3-()->2,4-()->2,5-()->2,6-()->2,7-"
"()->2,8-()->2,9-()->2}\n"
Expand Down
2 changes: 1 addition & 1 deletion tests/test-nfa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ TEST_CASE("multi_valued_capture_containing_repetition", "[NFA]") {
* @brief Create a NFA for matching an integer.
*/
TEST_CASE("int_var", "[NFA]") {
string const var_schema{"int:\\-{0,1}\\d+"};
string const var_schema{"int:-{0,1}\\d+"};
string const expected_serialized_nfa{
"0:byte_transitions={--->1},spontaneous_transition={1[]}\n"
"1:byte_transitions={0-->2,1-->2,2-->2,3-->2,4-->2,5-->2,6-->2,7-->2,8-->2,9-->2},"
Expand Down
39 changes: 36 additions & 3 deletions tests/test-schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ TEST_CASE("add_invalid_vars", "[Schema]") {
*/
TEST_CASE("add_invalid_var_priorities", "[Schema]") {
constexpr string_view cVarString1{"uId:userID=123"};
constexpr string_view cVarString2{R"(int:\-{0,1}\d+)"};
constexpr string_view cVarString3{R"(float:\-{0,1}\d+\.\d+)"};
constexpr string_view cVarString2{R"(int:-{0,1}\d+)"};
constexpr string_view cVarString3{R"(float:-{0,1}\d+\.\d+)"};
constexpr int32_t invalidPos1{3};
constexpr int32_t invalidPos2{-2};

Expand Down Expand Up @@ -191,11 +191,12 @@ TEST_CASE("add_underscore_name", "[Schema]") {
* @brief Create a schema, adding non-unique capture group names.
*/
TEST_CASE("non_unique_capture_names", "[Schema]") {
Schema schema;
vector<string> const var_names{"var_name1", "var_name2", "var_name3"};
string const cap_name{"cap_name"};

for (auto const& var_name : var_names) {
CAPTURE(var_name);
Schema schema;
string const var_schema{var_name + string(":a(?<") + cap_name + string(">_)b")};
schema.add_variable(string_view(var_schema), -1);
auto const schema_ast = schema.release_schema_ast_ptr();
Expand All @@ -216,3 +217,35 @@ TEST_CASE("non_unique_capture_names", "[Schema]") {
REQUIRE(cap_name == captures.at(0)->get_name());
}
}

/**
* @ingroup unit_tests_schema
* @brief Create a schema, adding non-escaped hyphens.
*/
TEST_CASE("non_escaped_hyphens", "[Schema]") {
vector<string> const vars{R"(v1:ID-(?<id>\d{4}))", R"(v2:\d{4}-\d{4}-\d{4})", R"(v3:[a\-z])"};

Schema schema;
schema.add_variable(string_view(vars[0]), -1);
schema.add_variable(string_view(vars[1]), -1);
schema.add_variable(string_view(vars[2]), -1);
auto const schema_ast = schema.release_schema_ast_ptr();
REQUIRE(schema_ast->m_schema_vars.size() == 3);
REQUIRE(schema.release_schema_ast_ptr()->m_schema_vars.empty());

REQUIRE(nullptr != schema_ast->m_schema_vars.at(0));
REQUIRE(nullptr != schema_ast->m_schema_vars.at(1));
REQUIRE(nullptr != schema_ast->m_schema_vars.at(2));
auto& schema_var_ast0 = dynamic_cast<SchemaVarAST&>(*schema_ast->m_schema_vars.at(0));
auto& schema_var_ast1 = dynamic_cast<SchemaVarAST&>(*schema_ast->m_schema_vars.at(1));
auto& schema_var_ast2 = dynamic_cast<SchemaVarAST&>(*schema_ast->m_schema_vars.at(2));
REQUIRE_NOTHROW([&]() -> void {
std::ignore = dynamic_cast<RegexASTCatByte&>(*schema_var_ast0.m_regex_ptr);
}());
REQUIRE_NOTHROW([&]() -> void {
std::ignore = dynamic_cast<RegexASTCatByte&>(*schema_var_ast1.m_regex_ptr);
}());
REQUIRE_NOTHROW([&]() -> void {
std::ignore = dynamic_cast<RegexASTGroupByte&>(*schema_var_ast2.m_regex_ptr);
}());
}
Loading