From 40c99c065c583b8926dc5b74ebab6c9813cc1f94 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 29 Jul 2025 06:13:08 -0400 Subject: [PATCH 001/168] Add QueryIntepretation to log-surgeon with major modification to improve documentation, code quality, and be archive independent. --- CMakeLists.txt | 2 + .../query_parser/QueryInterpretation.cpp | 120 ++++++++++ .../query_parser/QueryInterpretation.hpp | 223 ++++++++++++++++++ 3 files changed, 345 insertions(+) create mode 100644 src/log_surgeon/query_parser/QueryInterpretation.cpp create mode 100644 src/log_surgeon/query_parser/QueryInterpretation.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a6b558b..78c6cb85 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,6 +82,8 @@ set(SOURCE_FILES src/log_surgeon/finite_automata/TagOperation.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.tpp + src/log_surgeon/query_parser/QueryInterpretation.cpp + src/log_surgeon/query_parser/QueryInterpretation.hpp src/log_surgeon/Lalr1Parser.hpp src/log_surgeon/Lalr1Parser.tpp src/log_surgeon/Lexer.hpp diff --git a/src/log_surgeon/query_parser/QueryInterpretation.cpp b/src/log_surgeon/query_parser/QueryInterpretation.cpp new file mode 100644 index 00000000..97376438 --- /dev/null +++ b/src/log_surgeon/query_parser/QueryInterpretation.cpp @@ -0,0 +1,120 @@ +#include "QueryInterpretation.hpp" + +#include +#include +#include +#include + +#include + +#include + +using log_surgeon::lexers::ByteLexer; +using std::string; +using std::vector; + +namespace log_surgeon::query_parser { +auto VariableQueryToken::operator<(VariableQueryToken const& rhs) const -> bool { + if (m_variable_type < rhs.m_variable_type) { + return true; + } + if (m_variable_type > rhs.m_variable_type) { + return false; + } + if (m_query_substring < rhs.m_query_substring) { + return true; + } + if (m_query_substring > rhs.m_query_substring) { + return false; + } + if (m_has_wildcard != rhs.m_has_wildcard) { + return rhs.m_has_wildcard; + } + if (m_is_encoded != rhs.m_is_encoded) { + return rhs.m_is_encoded; + } + return false; +} + +auto VariableQueryToken::operator>(VariableQueryToken const& rhs) const -> bool { + if (m_variable_type > rhs.m_variable_type) { + return true; + } + if (m_variable_type < rhs.m_variable_type) { + return false; + } + if (m_query_substring > rhs.m_query_substring) { + return true; + } + if (m_query_substring < rhs.m_query_substring) { + return false; + } + if (m_has_wildcard != rhs.m_has_wildcard) { + return m_has_wildcard; + } + if (m_is_encoded != rhs.m_is_encoded) { + return m_is_encoded; + } + return false; +} + +void QueryInterpretation::append_logtype(QueryInterpretation& suffix) { + auto const& first_new_token = suffix.m_logtype[0]; + if (auto& prev_token = m_logtype.back(); + false == m_logtype.empty() && std::holds_alternative(prev_token) + && false == suffix.m_logtype.empty() + && std::holds_alternative(first_new_token)) + { + std::get(prev_token).append(std::get(first_new_token)); + m_logtype.insert(m_logtype.end(), suffix.m_logtype.begin() + 1, suffix.m_logtype.end()); + } else { + m_logtype.insert(m_logtype.end(), suffix.m_logtype.begin(), suffix.m_logtype.end()); + } +} + +auto QueryInterpretation::operator<(QueryInterpretation const& rhs) const -> bool { + if (m_logtype.size() < rhs.m_logtype.size()) { + return true; + } + if (m_logtype.size() > rhs.m_logtype.size()) { + return false; + } + for (uint32_t i{0}; i < m_logtype.size(); ++i) { + if (m_logtype[i] < rhs.m_logtype[i]) { + return true; + } + if (m_logtype[i] > rhs.m_logtype[i]) { + return false; + } + } + return false; +} + +auto QueryInterpretation::serialize() const -> string { + vector token_strings; + vector has_wildcard_strings; + vector is_encoded_strings; + + for (auto const& token : m_logtype) { + if (std::holds_alternative(token)) { + token_strings.emplace_back(std::get(token).get_query_substring()); + has_wildcard_strings.emplace_back("0"); + is_encoded_strings.emplace_back("0"); + } else { + auto const& var = std::get(token); + token_strings.emplace_back( + fmt::format("<{}>({})", var.get_variable_type(), var.get_query_substring()) + ); + has_wildcard_strings.emplace_back(var.get_has_wildcard() ? "1" : "0"); + is_encoded_strings.emplace_back(var.get_is_encoded_with_wildcard() ? "1" : "0"); + } + } + + return fmt::format( + "logtype='{}', has_wildcard='{}', is_encoded_with_wildcard='{}'", + fmt::join(token_strings, ""), + fmt::join(has_wildcard_strings, ""), + fmt::join(is_encoded_strings, "") + ); +} +} // namespace log_surgeon::query_parser diff --git a/src/log_surgeon/query_parser/QueryInterpretation.hpp b/src/log_surgeon/query_parser/QueryInterpretation.hpp new file mode 100644 index 00000000..cbc1ea9d --- /dev/null +++ b/src/log_surgeon/query_parser/QueryInterpretation.hpp @@ -0,0 +1,223 @@ +#ifndef LOG_SURGEON_QUERY_PARSER_QUERY_INTERPRETATION_HPP +#define LOG_SURGEON_QUERY_PARSER_QUERY_INTERPRETATION_HPP + +#include +#include +#include +#include +#include +#include + +#include + +namespace log_surgeon::query_parser { +/** + * Represents static-text in the query as a token. + * + * Stores the raw text as a string and provides comparison operations. + */ +class StaticQueryToken { +public: + explicit StaticQueryToken(std::string query_substring) + : m_query_substring(std::move(query_substring)) {} + + auto operator==(StaticQueryToken const& rhs) const -> bool = default; + + auto operator!=(StaticQueryToken const& rhs) const -> bool = default; + + auto operator<(StaticQueryToken const& rhs) const -> bool { + return m_query_substring < rhs.m_query_substring; + } + + auto operator>(StaticQueryToken const& rhs) const -> bool { + return m_query_substring > rhs.m_query_substring; + } + + auto append(StaticQueryToken const& rhs) -> void { + m_query_substring += rhs.get_query_substring(); + } + + [[nodiscard]] auto get_query_substring() const -> std::string const& { + return m_query_substring; + } + +private: + std::string m_query_substring; +}; + +/** + * Represents a variable in the query as a token. + * + * Stores the raw text as a string, as well as metadata specifying: + * - if the variable contains a wildcard, + * - the length of the variable. + * Also provides comparison operations. + */ +class VariableQueryToken { +public: + VariableQueryToken( + uint32_t const variable_type, + std::string query_substring, + bool const has_wildcard, + bool const is_encoded + ) + : m_variable_type(variable_type), + m_query_substring(std::move(query_substring)), + m_has_wildcard(has_wildcard), + m_is_encoded(is_encoded) {} + + auto operator==(VariableQueryToken const& rhs) const -> bool = default; + + auto operator!=(VariableQueryToken const& rhs) const -> bool = default; + + /** + * Lexicographical less-than comparison. + * + * Compares member variables in the following order: + * 1. `m_variable_type` + * 2. `m_query_substring` + * 3. `m_has_wildcard` (`false` < `true`) + * 4. `m_is_encoded` (`false` < `true`) + * + * @param rhs The `VariableQueryToken` to compare against. + * @return true if this object is considered less than rhs, false otherwise. + */ + auto operator<(VariableQueryToken const& rhs) const -> bool; + + /** + * Lexicographical greater-than comparison. + * + * Compares member variables in the following order: + * 1. `m_variable_type` + * 2. `m_query_substring` + * 3. `m_has_wildcard` (`true` > `false`) + * 4. `m_is_encoded` (`true` > `false`) + * + * @param rhs The `VariableQueryToken` to compare against. + * @return true if this object is considered greater than rhs, false otherwise. + */ + auto operator>(VariableQueryToken const& rhs) const -> bool; + + [[nodiscard]] auto get_variable_type() const -> uint32_t { return m_variable_type; } + + [[nodiscard]] auto get_query_substring() const -> std::string const& { + return m_query_substring; + } + + [[nodiscard]] auto get_has_wildcard() const -> bool { return m_has_wildcard; } + + [[nodiscard]] auto get_is_encoded_with_wildcard() const -> bool { + return m_is_encoded && m_has_wildcard; + } + +private: + uint32_t m_variable_type; + std::string m_query_substring; + bool m_has_wildcard{false}; + bool m_is_encoded{false}; +}; + +/** + * Represents a query as a sequence of static-text and variable tokens. + * + * The token sequence is stored in a canonicalized form - e.g., adjacent static tokens are merged - + * to ensure a unique internal representation for accurate comparison. + */ +class QueryInterpretation { +public: + QueryInterpretation() = default; + + explicit QueryInterpretation(std::string const& query_substring) { + append_static_token(query_substring); + } + + QueryInterpretation( + uint32_t const variable_type, + std::string query_substring, + bool const contains_wildcard, + bool const is_encoded + ) { + append_variable_token( + variable_type, + std::move(query_substring), + contains_wildcard, + is_encoded + ); + } + + auto operator==(QueryInterpretation const& rhs) const -> bool { + return m_logtype == rhs.m_logtype; + } + + /** + * Lexicographical less-than comparison. + * + * Comparison is performed in the following order: + * 1. By number of tokens in the logtype (shorter logtypes are considered less). + * 2. By lexicographical ordering of individual tokens (based on their `<` and `>` operators). + * + * @param rhs The `QueryInterpretation` to compare against. + * @return true if this object is considered less than rhs, false otherwise. + */ + auto operator<(QueryInterpretation const& rhs) const -> bool; + + auto clear() -> void { m_logtype.clear(); } + + /** + * Appends the logtype of another `QueryInterpretation` to this one. + * + * If the last token in this logtype and the first token in the suffix are both + * `StaticQueryToken`, they are merged to avoid unnecessary token boundaries. The merged token + * replaces the last token of this logtype, and the remaining suffix tokens are appended as-is. + * + * This merging behavior ensures a canonical internal representation, which is essential for + * maintaining consistent comparison semantics. + * + * @param suffix The `QueryInterpretation` to append. + */ + auto append_logtype(QueryInterpretation& suffix) -> void; + + auto append_static_token(std::string const& query_substring) -> void { + StaticQueryToken static_query_token(query_substring); + if (auto& prev_token = m_logtype.back(); + false == m_logtype.empty() && std::holds_alternative(prev_token)) + { + std::get(prev_token).append(static_query_token); + } else { + m_logtype.emplace_back(static_query_token); + } + } + + auto append_variable_token( + uint32_t const variable_type, + std::string query_substring, + bool const contains_wildcard, + bool const is_encoded + ) -> void { + m_logtype.emplace_back(VariableQueryToken( + variable_type, + std::move(query_substring), + contains_wildcard, + is_encoded + )); + } + + [[nodiscard]] auto get_logtype() const + -> std::vector> { + return m_logtype; + } + + /** + * @return A string representation of the QueryInterpretation. + */ + [[nodiscard]] auto serialize() const -> std::string; + + static constexpr std::string_view cIntVarName = "int"; + static constexpr std::string_view cFloatVarName = "float"; + +private: + std::vector> m_logtype; +}; +} // namespace log_surgeon::query_parser + +#endif // LOG_SURGEON_QUERY_PARSER_QUERY_INTERPRETATION_HPP From 43f9730f8add1bd6ca7eb5f169d0c06b0720bde2 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 29 Jul 2025 06:50:04 -0400 Subject: [PATCH 002/168] Remove is_encoded; Remove constexpr of unused vars; In append_logtype safely return if empty. --- .../query_parser/QueryInterpretation.cpp | 17 +++----- .../query_parser/QueryInterpretation.hpp | 43 ++++++------------- 2 files changed, 17 insertions(+), 43 deletions(-) diff --git a/src/log_surgeon/query_parser/QueryInterpretation.cpp b/src/log_surgeon/query_parser/QueryInterpretation.cpp index 97376438..355b2198 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/query_parser/QueryInterpretation.cpp @@ -30,9 +30,6 @@ auto VariableQueryToken::operator<(VariableQueryToken const& rhs) const -> bool if (m_has_wildcard != rhs.m_has_wildcard) { return rhs.m_has_wildcard; } - if (m_is_encoded != rhs.m_is_encoded) { - return rhs.m_is_encoded; - } return false; } @@ -52,13 +49,13 @@ auto VariableQueryToken::operator>(VariableQueryToken const& rhs) const -> bool if (m_has_wildcard != rhs.m_has_wildcard) { return m_has_wildcard; } - if (m_is_encoded != rhs.m_is_encoded) { - return m_is_encoded; - } return false; } void QueryInterpretation::append_logtype(QueryInterpretation& suffix) { + if(suffix.m_logtype.empty()) { + return; + } auto const& first_new_token = suffix.m_logtype[0]; if (auto& prev_token = m_logtype.back(); false == m_logtype.empty() && std::holds_alternative(prev_token) @@ -93,28 +90,24 @@ auto QueryInterpretation::operator<(QueryInterpretation const& rhs) const -> boo auto QueryInterpretation::serialize() const -> string { vector token_strings; vector has_wildcard_strings; - vector is_encoded_strings; for (auto const& token : m_logtype) { if (std::holds_alternative(token)) { token_strings.emplace_back(std::get(token).get_query_substring()); has_wildcard_strings.emplace_back("0"); - is_encoded_strings.emplace_back("0"); } else { auto const& var = std::get(token); token_strings.emplace_back( fmt::format("<{}>({})", var.get_variable_type(), var.get_query_substring()) ); has_wildcard_strings.emplace_back(var.get_has_wildcard() ? "1" : "0"); - is_encoded_strings.emplace_back(var.get_is_encoded_with_wildcard() ? "1" : "0"); } } return fmt::format( - "logtype='{}', has_wildcard='{}', is_encoded_with_wildcard='{}'", + "logtype='{}', has_wildcard='{}'", fmt::join(token_strings, ""), - fmt::join(has_wildcard_strings, ""), - fmt::join(is_encoded_strings, "") + fmt::join(has_wildcard_strings, "") ); } } // namespace log_surgeon::query_parser diff --git a/src/log_surgeon/query_parser/QueryInterpretation.hpp b/src/log_surgeon/query_parser/QueryInterpretation.hpp index cbc1ea9d..0ea2a531 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/query_parser/QueryInterpretation.hpp @@ -14,7 +14,7 @@ namespace log_surgeon::query_parser { /** * Represents static-text in the query as a token. * - * Stores the raw text as a string and provides comparison operations. + * Stores the raw log as a string. */ class StaticQueryToken { public: @@ -48,23 +48,20 @@ class StaticQueryToken { /** * Represents a variable in the query as a token. * - * Stores the raw text as a string, as well as metadata specifying: - * - if the variable contains a wildcard, - * - the length of the variable. - * Also provides comparison operations. + * Stores the raw log as a string with metadata specifying: + * 1. The variable type. + * 2. If the variable contains a wildcard. */ class VariableQueryToken { public: VariableQueryToken( uint32_t const variable_type, std::string query_substring, - bool const has_wildcard, - bool const is_encoded + bool const has_wildcard ) : m_variable_type(variable_type), m_query_substring(std::move(query_substring)), - m_has_wildcard(has_wildcard), - m_is_encoded(is_encoded) {} + m_has_wildcard(has_wildcard) {} auto operator==(VariableQueryToken const& rhs) const -> bool = default; @@ -77,7 +74,6 @@ class VariableQueryToken { * 1. `m_variable_type` * 2. `m_query_substring` * 3. `m_has_wildcard` (`false` < `true`) - * 4. `m_is_encoded` (`false` < `true`) * * @param rhs The `VariableQueryToken` to compare against. * @return true if this object is considered less than rhs, false otherwise. @@ -91,7 +87,6 @@ class VariableQueryToken { * 1. `m_variable_type` * 2. `m_query_substring` * 3. `m_has_wildcard` (`true` > `false`) - * 4. `m_is_encoded` (`true` > `false`) * * @param rhs The `VariableQueryToken` to compare against. * @return true if this object is considered greater than rhs, false otherwise. @@ -106,15 +101,10 @@ class VariableQueryToken { [[nodiscard]] auto get_has_wildcard() const -> bool { return m_has_wildcard; } - [[nodiscard]] auto get_is_encoded_with_wildcard() const -> bool { - return m_is_encoded && m_has_wildcard; - } - private: uint32_t m_variable_type; std::string m_query_substring; bool m_has_wildcard{false}; - bool m_is_encoded{false}; }; /** @@ -134,14 +124,12 @@ class QueryInterpretation { QueryInterpretation( uint32_t const variable_type, std::string query_substring, - bool const contains_wildcard, - bool const is_encoded + bool const contains_wildcard ) { append_variable_token( variable_type, std::move(query_substring), - contains_wildcard, - is_encoded + contains_wildcard ); } @@ -191,15 +179,11 @@ class QueryInterpretation { auto append_variable_token( uint32_t const variable_type, std::string query_substring, - bool const contains_wildcard, - bool const is_encoded + bool const contains_wildcard ) -> void { - m_logtype.emplace_back(VariableQueryToken( - variable_type, - std::move(query_substring), - contains_wildcard, - is_encoded - )); + m_logtype.emplace_back( + VariableQueryToken(variable_type, std::move(query_substring), contains_wildcard) + ); } [[nodiscard]] auto get_logtype() const @@ -212,9 +196,6 @@ class QueryInterpretation { */ [[nodiscard]] auto serialize() const -> std::string; - static constexpr std::string_view cIntVarName = "int"; - static constexpr std::string_view cFloatVarName = "float"; - private: std::vector> m_logtype; }; From 45978ce48abe450e5a1078d603ac7b4b88e36585 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 29 Jul 2025 06:56:50 -0400 Subject: [PATCH 003/168] Gaurd against empty strings; Rename for clarity; Lint. --- .../query_parser/QueryInterpretation.cpp | 15 ++++++++++----- .../query_parser/QueryInterpretation.hpp | 6 +----- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/log_surgeon/query_parser/QueryInterpretation.cpp b/src/log_surgeon/query_parser/QueryInterpretation.cpp index 355b2198..37332952 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/query_parser/QueryInterpretation.cpp @@ -53,16 +53,21 @@ auto VariableQueryToken::operator>(VariableQueryToken const& rhs) const -> bool } void QueryInterpretation::append_logtype(QueryInterpretation& suffix) { - if(suffix.m_logtype.empty()) { + if (suffix.m_logtype.empty()) { return; } + if (m_logtype.empty()) { + m_logtype = suffix.m_logtype; + return; + } + + auto& last_old_token = m_logtype.back(); auto const& first_new_token = suffix.m_logtype[0]; - if (auto& prev_token = m_logtype.back(); - false == m_logtype.empty() && std::holds_alternative(prev_token) - && false == suffix.m_logtype.empty() + if (std::holds_alternative(last_old_token) && std::holds_alternative(first_new_token)) { - std::get(prev_token).append(std::get(first_new_token)); + std::get(last_old_token) + .append(std::get(first_new_token)); m_logtype.insert(m_logtype.end(), suffix.m_logtype.begin() + 1, suffix.m_logtype.end()); } else { m_logtype.insert(m_logtype.end(), suffix.m_logtype.begin(), suffix.m_logtype.end()); diff --git a/src/log_surgeon/query_parser/QueryInterpretation.hpp b/src/log_surgeon/query_parser/QueryInterpretation.hpp index 0ea2a531..71fb0c7b 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/query_parser/QueryInterpretation.hpp @@ -126,11 +126,7 @@ class QueryInterpretation { std::string query_substring, bool const contains_wildcard ) { - append_variable_token( - variable_type, - std::move(query_substring), - contains_wildcard - ); + append_variable_token(variable_type, std::move(query_substring), contains_wildcard); } auto operator==(QueryInterpretation const& rhs) const -> bool { From 33664c7bcc8a17d1420ac31bdfadc9abfc540e2f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 31 Jul 2025 06:17:36 -0400 Subject: [PATCH 004/168] Add unit-tests; Rename m_logtype to m_tokens; Rename append_logtype to append_query_interpretation; Handle empty case in append static token; Set operator== to be default --- docs/doxygen/mainpage.dox | 3 + .../query_parser/QueryInterpretation.cpp | 28 +-- .../query_parser/QueryInterpretation.hpp | 24 ++- tests/CMakeLists.txt | 3 + tests/test-query-interpretation.cpp | 199 ++++++++++++++++++ tests/test-static-query-token.cpp | 45 ++++ tests/test-variable-query-token.cpp | 97 +++++++++ 7 files changed, 374 insertions(+), 25 deletions(-) create mode 100644 tests/test-query-interpretation.cpp create mode 100644 tests/test-static-query-token.cpp create mode 100644 tests/test-variable-query-token.cpp diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox index aab958ce..3a1cf2c4 100644 --- a/docs/doxygen/mainpage.dox +++ b/docs/doxygen/mainpage.dox @@ -14,7 +14,10 @@ * - @ref unit_tests_dfa "DFA" * - @ref unit_tests_nfa "NFA" * - @ref unit_tests_prefix_tree "Prefix tree" + * - @ref unit_tests_query_interpretation "Query Interpretation" * - @ref unit_tests_regex_ast "Regex AST" * - @ref unit_tests_register_handler "Register handler" * - @ref unit_tests_schema "Schema" + * - @ref unit_tests_static_query_token "Static Query Token" + * - @ref unit_tests_variable_query_token "Variable Query Token" */ diff --git a/src/log_surgeon/query_parser/QueryInterpretation.cpp b/src/log_surgeon/query_parser/QueryInterpretation.cpp index 37332952..615225dd 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/query_parser/QueryInterpretation.cpp @@ -52,40 +52,40 @@ auto VariableQueryToken::operator>(VariableQueryToken const& rhs) const -> bool return false; } -void QueryInterpretation::append_logtype(QueryInterpretation& suffix) { - if (suffix.m_logtype.empty()) { +void QueryInterpretation::append_query_interpretation(QueryInterpretation& suffix) { + if (suffix.m_tokens.empty()) { return; } - if (m_logtype.empty()) { - m_logtype = suffix.m_logtype; + if (m_tokens.empty()) { + m_tokens = suffix.m_tokens; return; } - auto& last_old_token = m_logtype.back(); - auto const& first_new_token = suffix.m_logtype[0]; + auto& last_old_token = m_tokens.back(); + auto const& first_new_token = suffix.m_tokens[0]; if (std::holds_alternative(last_old_token) && std::holds_alternative(first_new_token)) { std::get(last_old_token) .append(std::get(first_new_token)); - m_logtype.insert(m_logtype.end(), suffix.m_logtype.begin() + 1, suffix.m_logtype.end()); + m_tokens.insert(m_tokens.end(), suffix.m_tokens.begin() + 1, suffix.m_tokens.end()); } else { - m_logtype.insert(m_logtype.end(), suffix.m_logtype.begin(), suffix.m_logtype.end()); + m_tokens.insert(m_tokens.end(), suffix.m_tokens.begin(), suffix.m_tokens.end()); } } auto QueryInterpretation::operator<(QueryInterpretation const& rhs) const -> bool { - if (m_logtype.size() < rhs.m_logtype.size()) { + if (m_tokens.size() < rhs.m_tokens.size()) { return true; } - if (m_logtype.size() > rhs.m_logtype.size()) { + if (m_tokens.size() > rhs.m_tokens.size()) { return false; } - for (uint32_t i{0}; i < m_logtype.size(); ++i) { - if (m_logtype[i] < rhs.m_logtype[i]) { + for (uint32_t i{0}; i < m_tokens.size(); ++i) { + if (m_tokens[i] < rhs.m_tokens[i]) { return true; } - if (m_logtype[i] > rhs.m_logtype[i]) { + if (m_tokens[i] > rhs.m_tokens[i]) { return false; } } @@ -96,7 +96,7 @@ auto QueryInterpretation::serialize() const -> string { vector token_strings; vector has_wildcard_strings; - for (auto const& token : m_logtype) { + for (auto const& token : m_tokens) { if (std::holds_alternative(token)) { token_strings.emplace_back(std::get(token).get_query_substring()); has_wildcard_strings.emplace_back("0"); diff --git a/src/log_surgeon/query_parser/QueryInterpretation.hpp b/src/log_surgeon/query_parser/QueryInterpretation.hpp index 71fb0c7b..4a86035f 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/query_parser/QueryInterpretation.hpp @@ -129,9 +129,7 @@ class QueryInterpretation { append_variable_token(variable_type, std::move(query_substring), contains_wildcard); } - auto operator==(QueryInterpretation const& rhs) const -> bool { - return m_logtype == rhs.m_logtype; - } + auto operator==(QueryInterpretation const& rhs) const -> bool = default; /** * Lexicographical less-than comparison. @@ -145,7 +143,7 @@ class QueryInterpretation { */ auto operator<(QueryInterpretation const& rhs) const -> bool; - auto clear() -> void { m_logtype.clear(); } + auto clear() -> void { m_tokens.clear(); } /** * Appends the logtype of another `QueryInterpretation` to this one. @@ -159,16 +157,20 @@ class QueryInterpretation { * * @param suffix The `QueryInterpretation` to append. */ - auto append_logtype(QueryInterpretation& suffix) -> void; + auto append_query_interpretation(QueryInterpretation& suffix) -> void; auto append_static_token(std::string const& query_substring) -> void { + if (query_substring.empty()) { + return; + } + StaticQueryToken static_query_token(query_substring); - if (auto& prev_token = m_logtype.back(); - false == m_logtype.empty() && std::holds_alternative(prev_token)) + if (auto& prev_token = m_tokens.back(); + false == m_tokens.empty() && std::holds_alternative(prev_token)) { std::get(prev_token).append(static_query_token); } else { - m_logtype.emplace_back(static_query_token); + m_tokens.emplace_back(static_query_token); } } @@ -177,14 +179,14 @@ class QueryInterpretation { std::string query_substring, bool const contains_wildcard ) -> void { - m_logtype.emplace_back( + m_tokens.emplace_back( VariableQueryToken(variable_type, std::move(query_substring), contains_wildcard) ); } [[nodiscard]] auto get_logtype() const -> std::vector> { - return m_logtype; + return m_tokens; } /** @@ -193,7 +195,7 @@ class QueryInterpretation { [[nodiscard]] auto serialize() const -> std::string; private: - std::vector> m_logtype; + std::vector> m_tokens; }; } // namespace log_surgeon::query_parser diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e54a47ab..b2f36083 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,9 +6,12 @@ target_sources(unit-test test-dfa.cpp test-nfa.cpp test-prefix-tree.cpp + test-query-interpretation.cpp test-regex-ast.cpp test-register-handler.cpp test-schema.cpp + test-static-query-token.cpp + test-variable-query-token.cpp ) target_link_libraries(unit-test PRIVATE Catch2::Catch2WithMain log_surgeon::log_surgeon) diff --git a/tests/test-query-interpretation.cpp b/tests/test-query-interpretation.cpp new file mode 100644 index 00000000..216935e7 --- /dev/null +++ b/tests/test-query-interpretation.cpp @@ -0,0 +1,199 @@ +#include +#include + +#include + +#include + +/** + * @defgroup unit_tests_query_interpretation `QueryInterpretation` unit tests. + * @brief Unit tests for `QueryInterpretation` construction, mutation, and comparison. + + * These unit tests contain the `QueryInterpretation` tag. + */ + +using log_surgeon::query_parser::QueryInterpretation; +using std::string_view; + +/** + * @ingroup unit_tests_query_interpretation + * @brief Creates an empty `QueryInterpretation` and tests serialization. + */ +TEST_CASE("empty_query_interpretation", "[QueryInterpretation]") { + constexpr string_view cExpectedSerialization{"logtype='', has_wildcard=''"}; + + QueryInterpretation const qi; + REQUIRE(qi.serialize() == cExpectedSerialization); +} + +/** + * @ingroup unit_tests_query_interpretation + * @brief Creates a `QueryInterpretation` with only static-text and tests serialization. + */ +TEST_CASE("static_text_query_interpretation", "[QueryInterpretation]") { + constexpr string_view cExpectedSerialization{"logtype='Static text', has_wildcard='0'"}; + + QueryInterpretation const qi{"Static text"}; + REQUIRE(qi.serialize() == cExpectedSerialization); +} + +/** + * @ingroup unit_tests_query_interpretation + * @brief Creates a `QueryInterpretation` with a variable and tests serialization. + */ +TEST_CASE("variable_query_interpretation", "[QueryInterpretation]") { + constexpr uint32_t cHasNumberId{7}; + constexpr string_view cExpectedSerialization{"logtype='<7>(var123)', has_wildcard='0'"}; + + QueryInterpretation const qi{cHasNumberId, "var123", false}; + REQUIRE(qi.serialize() == cExpectedSerialization); +} + +/** + * @ingroup unit_tests_query_interpretation + * @brief Creates a `QueryInterpretation` with a wildcard variable and tests serialization. + */ +TEST_CASE("wildcard_variable_query_interpretation", "[QueryInterpretation]") { + constexpr uint32_t cFloatId{1}; + constexpr string_view cExpectedSerialization{"logtype='<1>(123.123*)', has_wildcard='1'"}; + + QueryInterpretation const qi{cFloatId, "123.123*", true}; + REQUIRE(qi.serialize() == cExpectedSerialization); +} + +/** + * @ingroup unit_tests_query_interpretation + * @brief Appends empty static-text to a `QueryInterpretation` and tests serialization. + */ +TEST_CASE("append_empty_static_text", "[QueryInterpretation]") { + constexpr string_view cExpectedSerialization{"logtype='', has_wildcard=''"}; + + QueryInterpretation qi; + qi.append_static_token(""); + REQUIRE(qi.serialize() == cExpectedSerialization); +} + +/** + * @ingroup unit_tests_query_interpretation + * @brief Appends an empty variable to a `QueryInterpretation` and tests serialization. + */ +TEST_CASE("append_empty_variable", "[QueryInterpretation]") { + constexpr uint32_t cEmptyId{0}; + constexpr string_view cExpectedSerialization{"logtype='<0>()', has_wildcard='0'"}; + + QueryInterpretation qi; + qi.append_variable_token(cEmptyId, "", false); + REQUIRE(qi.serialize() == cExpectedSerialization); +} + +/** + * @ingroup unit_tests_query_interpretation + * @brief Appends a sequence of static and variable tokens and tests serialization. + */ +TEST_CASE("append_tokens", "[QueryInterpretation]") { + constexpr uint32_t cFloatId{1}; + constexpr uint32_t cIntId{2}; + constexpr string_view cExpectedSerialization{ + "logtype='start <2>(*123*) middle <1>(12.3) end', has_wildcard='01000'" + }; + + QueryInterpretation qi; + qi.append_static_token("start "); + qi.append_variable_token(cIntId, "*123*", true); + qi.append_static_token(" middle "); + qi.append_variable_token(cFloatId, "12.3", false); + qi.append_static_token(" end"); + REQUIRE(qi.serialize() == cExpectedSerialization); +} + +/** + * @ingroup unit_tests_query_interpretation + * @brief Tests whether adjacent static-text tokens are merged for canonicalization. + */ +TEST_CASE("append_canonicalization", "[QueryInterpretation]") { + constexpr string_view cExpectedSerialization{"logtype='ab', has_wildcard='0'"}; + + QueryInterpretation qi; + qi.append_static_token("a"); + qi.append_static_token("b"); + REQUIRE(qi.serialize() == cExpectedSerialization); +} + +/** + * @ingroup unit_tests_query_interpretation + * @brief Appends a `QueryInterpretation` to another and tests serialization and canonicalization. + */ +TEST_CASE("append_query_interpretation", "[QueryInterpretation]") { + constexpr string_view cExpectedSerialization{"logtype='foobar', has_wildcard='0'"}; + + QueryInterpretation prefix{"foo"}; + QueryInterpretation suffix{"bar"}; + prefix.append_query_interpretation(suffix); + REQUIRE(prefix.serialize() == cExpectedSerialization); +} + +/** + * @ingroup unit_tests_query_interpretation + * @brief Tests `operator<` with various token types and orders. + */ +TEST_CASE("less_than_operator", "[QueryInterpretation]") { + constexpr uint32_t cFloatId{1}; + constexpr uint32_t cIntId{2}; + constexpr uint32_t cHasNumberId{7}; + + QueryInterpretation qi1; + QueryInterpretation qi2; + + SECTION("different_length_logtype") { + qi1.append_static_token("a"); + qi2.append_static_token("a"); + qi2.append_variable_token(cFloatId, "1.1", false); + + REQUIRE(qi1 < qi2); + REQUIRE_FALSE(qi2 < qi1); + } + + SECTION("different_static_content") { + qi1.append_static_token("a"); + qi2.append_static_token("b"); + + REQUIRE(qi1 < qi2); + REQUIRE_FALSE(qi2 < qi1); + } + + SECTION("different_var_types") { + qi1.append_variable_token(cIntId, "123", false); + qi2.append_variable_token(cHasNumberId, "123", false); + + REQUIRE(qi1 < qi2); + REQUIRE_FALSE(qi2 < qi1); + } + + SECTION("different_var_values") { + qi1.append_variable_token(cIntId, "123", false); + qi2.append_variable_token(cIntId, "456", false); + + REQUIRE(qi1 < qi2); + REQUIRE_FALSE(qi2 < qi1); + } + + SECTION("token_order") { + qi1.append_static_token("hello"); + qi1.append_variable_token(cIntId, "123", false); + qi2.append_variable_token(cIntId, "123", false); + qi2.append_static_token("hello"); + + // `StaticQueryToken` is a lower index in the variant so is considered less than + // `VariableQueryToken`. + REQUIRE(qi1 < qi2); + REQUIRE_FALSE(qi2 < qi1); + } + + SECTION("identical_tokens") { + qi1.append_variable_token(cIntId, "123", false); + qi2.append_variable_token(cIntId, "123", false); + + REQUIRE_FALSE(qi1 < qi2); + REQUIRE_FALSE(qi2 < qi1); + } +} diff --git a/tests/test-static-query-token.cpp b/tests/test-static-query-token.cpp new file mode 100644 index 00000000..b646b1bf --- /dev/null +++ b/tests/test-static-query-token.cpp @@ -0,0 +1,45 @@ +#include + +#include + +/** + * @defgroup unit_tests_static_query_token `StaticQueryToken` unit tests. + * @brief Unit tests for `StaticQueryToken` construction, mutation, and comparison. + + * These unit tests contain the `StaticQueryToken` tag. + */ + +using log_surgeon::query_parser::StaticQueryToken; + +/** + * @ingroup unit_tests_static_query_token + * @brief Tests `operator<` and `operator>`. + */ +TEST_CASE("comparison_operators", "[StaticQueryToken]") { + StaticQueryToken empty_token{""}; + StaticQueryToken token_abc{"abc"}; + StaticQueryToken token_def{"def"}; + StaticQueryToken another_token_abc{"abc"}; + + SECTION("less_than_operator") { + REQUIRE(empty_token < token_abc); + REQUIRE(empty_token < token_def); + REQUIRE(token_abc < token_def); + REQUIRE_FALSE(token_abc < empty_token); + REQUIRE_FALSE(token_def < empty_token); + REQUIRE_FALSE(token_def < token_abc); + // False for same value + REQUIRE_FALSE(token_abc < another_token_abc); + } + + SECTION("greater_than_operator") { + REQUIRE(token_abc > empty_token); + REQUIRE(token_def > empty_token); + REQUIRE(token_def > token_abc); + REQUIRE_FALSE(empty_token > token_abc); + REQUIRE_FALSE(empty_token > token_def); + REQUIRE_FALSE(token_abc > token_def); + // False for same value + REQUIRE_FALSE(token_abc > another_token_abc); + } +} diff --git a/tests/test-variable-query-token.cpp b/tests/test-variable-query-token.cpp new file mode 100644 index 00000000..a764f374 --- /dev/null +++ b/tests/test-variable-query-token.cpp @@ -0,0 +1,97 @@ +#include + +#include + +#include + +/** + * @defgroup unit_tests_variable_query_token `VariableQueryToken` unit tests. + * @brief Unit tests for `VariableQueryToken` construction and comparison. + + * These unit tests contain the `VariableQueryToken` tag. + */ + +using log_surgeon::query_parser::VariableQueryToken; + +/** + * @ingroup unit_tests_variable_query_token + * @brief Tests `operator<` and `operator>`. + */ +TEST_CASE("comparison_operators", "[VariableQueryToken]") { + constexpr uint32_t cEmptyId{0}; + constexpr uint32_t cIntId{2}; + constexpr uint32_t cHasNumberId{7}; + + VariableQueryToken empty_token{cEmptyId, "", false}; + VariableQueryToken token_int_123{cIntId, "123", false}; + VariableQueryToken token_int_456{cIntId, "456", false}; + VariableQueryToken token_has_number_123{cHasNumberId, "123", false}; + VariableQueryToken token_has_number_user123_wildcard{cHasNumberId, "user123*", true}; + VariableQueryToken another_token_int_123{cIntId, "123", false}; + + SECTION("less_than_operator") { + // empty token + REQUIRE(empty_token < token_int_123); + REQUIRE(empty_token < token_int_456); + REQUIRE(empty_token < token_has_number_123); + REQUIRE(empty_token < token_has_number_user123_wildcard); + REQUIRE_FALSE(token_int_123 < empty_token); + REQUIRE_FALSE(token_int_456 < empty_token); + REQUIRE_FALSE(token_has_number_123 < empty_token); + REQUIRE_FALSE(token_has_number_user123_wildcard < empty_token); + + // token_int_123 + REQUIRE(token_int_123 < token_int_456); + REQUIRE(token_int_123 < token_has_number_123); + REQUIRE(token_int_123 < token_has_number_user123_wildcard); + REQUIRE_FALSE(token_int_456 < token_int_123); + REQUIRE_FALSE(token_has_number_123 < token_int_123); + REQUIRE_FALSE(token_has_number_user123_wildcard < token_int_123); + + // token_int_456 + REQUIRE(token_int_456 < token_has_number_123); + REQUIRE(token_int_456 < token_has_number_user123_wildcard); + REQUIRE_FALSE(token_has_number_123 < token_int_456); + REQUIRE_FALSE(token_has_number_user123_wildcard < token_int_456); + + // token_has_number_123 + REQUIRE(token_has_number_123 < token_has_number_user123_wildcard); + REQUIRE_FALSE(token_has_number_user123_wildcard < token_has_number_123); + + // False for same value + REQUIRE_FALSE(token_int_123 < another_token_int_123); + } + + SECTION("greater_than_operator") { + // empty token + REQUIRE(token_int_123 > empty_token); + REQUIRE(token_int_456 > empty_token); + REQUIRE(token_has_number_123 > empty_token); + REQUIRE(token_has_number_user123_wildcard > empty_token); + REQUIRE_FALSE(empty_token > token_int_123); + REQUIRE_FALSE(empty_token > token_int_456); + REQUIRE_FALSE(empty_token > token_has_number_123); + REQUIRE_FALSE(empty_token > token_has_number_user123_wildcard); + + // token_int_123 + REQUIRE(token_int_456 > token_int_123); + REQUIRE(token_has_number_123 > token_int_123); + REQUIRE(token_has_number_user123_wildcard > token_int_123); + REQUIRE_FALSE(token_int_123 > token_int_456); + REQUIRE_FALSE(token_int_123 > token_has_number_123); + REQUIRE_FALSE(token_int_123 > token_has_number_user123_wildcard); + + // token_int_456 + REQUIRE(token_has_number_123 > token_int_456); + REQUIRE(token_has_number_user123_wildcard > token_int_456); + REQUIRE_FALSE(token_int_456 > token_has_number_123); + REQUIRE_FALSE(token_int_456 > token_has_number_user123_wildcard); + + // token_has_number_123 + REQUIRE(token_has_number_user123_wildcard > token_has_number_123); + REQUIRE_FALSE(token_has_number_123 > token_has_number_user123_wildcard); + + // False for same value + REQUIRE_FALSE(token_int_123 > another_token_int_123); + } +} From 3854c276ee2b29eec63d8b8f3de2bce315a05e03 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 31 Jul 2025 06:25:17 -0400 Subject: [PATCH 005/168] Add unit-test for appending an empty query interpretation. --- tests/test-query-interpretation.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test-query-interpretation.cpp b/tests/test-query-interpretation.cpp index 216935e7..d3e93634 100644 --- a/tests/test-query-interpretation.cpp +++ b/tests/test-query-interpretation.cpp @@ -86,6 +86,19 @@ TEST_CASE("append_empty_variable", "[QueryInterpretation]") { REQUIRE(qi.serialize() == cExpectedSerialization); } +/** + * @ingroup unit_tests_query_interpretation + * @brief Appends an empty `QueryInterpretation` to another and tests serialization. + */ +TEST_CASE("append_empty_query_interpretation", "[QueryInterpretation]") { + constexpr string_view cExpectedSerialization{"logtype='hello', has_wildcard='0'"}; + + QueryInterpretation query_interpretation{"hello"}; + QueryInterpretation empty_query_interpretation; + query_interpretation.append_query_interpretation(empty_query_interpretation); + REQUIRE(query_interpretation.serialize() == cExpectedSerialization); +} + /** * @ingroup unit_tests_query_interpretation * @brief Appends a sequence of static and variable tokens and tests serialization. From 7aec1734602dfedff42aa22bc784a0a6d40d114e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 31 Jul 2025 06:31:24 -0400 Subject: [PATCH 006/168] rename qi to query_interpretation. --- tests/test-query-interpretation.cpp | 108 ++++++++++++++-------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/tests/test-query-interpretation.cpp b/tests/test-query-interpretation.cpp index d3e93634..f7992f1e 100644 --- a/tests/test-query-interpretation.cpp +++ b/tests/test-query-interpretation.cpp @@ -22,8 +22,8 @@ using std::string_view; TEST_CASE("empty_query_interpretation", "[QueryInterpretation]") { constexpr string_view cExpectedSerialization{"logtype='', has_wildcard=''"}; - QueryInterpretation const qi; - REQUIRE(qi.serialize() == cExpectedSerialization); + QueryInterpretation const query_interpretation; + REQUIRE(query_interpretation.serialize() == cExpectedSerialization); } /** @@ -33,8 +33,8 @@ TEST_CASE("empty_query_interpretation", "[QueryInterpretation]") { TEST_CASE("static_text_query_interpretation", "[QueryInterpretation]") { constexpr string_view cExpectedSerialization{"logtype='Static text', has_wildcard='0'"}; - QueryInterpretation const qi{"Static text"}; - REQUIRE(qi.serialize() == cExpectedSerialization); + QueryInterpretation const query_interpretation{"Static text"}; + REQUIRE(query_interpretation.serialize() == cExpectedSerialization); } /** @@ -45,8 +45,8 @@ TEST_CASE("variable_query_interpretation", "[QueryInterpretation]") { constexpr uint32_t cHasNumberId{7}; constexpr string_view cExpectedSerialization{"logtype='<7>(var123)', has_wildcard='0'"}; - QueryInterpretation const qi{cHasNumberId, "var123", false}; - REQUIRE(qi.serialize() == cExpectedSerialization); + QueryInterpretation const query_interpretation{cHasNumberId, "var123", false}; + REQUIRE(query_interpretation.serialize() == cExpectedSerialization); } /** @@ -57,8 +57,8 @@ TEST_CASE("wildcard_variable_query_interpretation", "[QueryInterpretation]") { constexpr uint32_t cFloatId{1}; constexpr string_view cExpectedSerialization{"logtype='<1>(123.123*)', has_wildcard='1'"}; - QueryInterpretation const qi{cFloatId, "123.123*", true}; - REQUIRE(qi.serialize() == cExpectedSerialization); + QueryInterpretation const query_interpretation{cFloatId, "123.123*", true}; + REQUIRE(query_interpretation.serialize() == cExpectedSerialization); } /** @@ -68,9 +68,9 @@ TEST_CASE("wildcard_variable_query_interpretation", "[QueryInterpretation]") { TEST_CASE("append_empty_static_text", "[QueryInterpretation]") { constexpr string_view cExpectedSerialization{"logtype='', has_wildcard=''"}; - QueryInterpretation qi; - qi.append_static_token(""); - REQUIRE(qi.serialize() == cExpectedSerialization); + QueryInterpretation query_interpretation; + query_interpretation.append_static_token(""); + REQUIRE(query_interpretation.serialize() == cExpectedSerialization); } /** @@ -81,9 +81,9 @@ TEST_CASE("append_empty_variable", "[QueryInterpretation]") { constexpr uint32_t cEmptyId{0}; constexpr string_view cExpectedSerialization{"logtype='<0>()', has_wildcard='0'"}; - QueryInterpretation qi; - qi.append_variable_token(cEmptyId, "", false); - REQUIRE(qi.serialize() == cExpectedSerialization); + QueryInterpretation query_interpretation; + query_interpretation.append_variable_token(cEmptyId, "", false); + REQUIRE(query_interpretation.serialize() == cExpectedSerialization); } /** @@ -110,13 +110,13 @@ TEST_CASE("append_tokens", "[QueryInterpretation]") { "logtype='start <2>(*123*) middle <1>(12.3) end', has_wildcard='01000'" }; - QueryInterpretation qi; - qi.append_static_token("start "); - qi.append_variable_token(cIntId, "*123*", true); - qi.append_static_token(" middle "); - qi.append_variable_token(cFloatId, "12.3", false); - qi.append_static_token(" end"); - REQUIRE(qi.serialize() == cExpectedSerialization); + QueryInterpretation query_interpretation; + query_interpretation.append_static_token("start "); + query_interpretation.append_variable_token(cIntId, "*123*", true); + query_interpretation.append_static_token(" middle "); + query_interpretation.append_variable_token(cFloatId, "12.3", false); + query_interpretation.append_static_token(" end"); + REQUIRE(query_interpretation.serialize() == cExpectedSerialization); } /** @@ -126,10 +126,10 @@ TEST_CASE("append_tokens", "[QueryInterpretation]") { TEST_CASE("append_canonicalization", "[QueryInterpretation]") { constexpr string_view cExpectedSerialization{"logtype='ab', has_wildcard='0'"}; - QueryInterpretation qi; - qi.append_static_token("a"); - qi.append_static_token("b"); - REQUIRE(qi.serialize() == cExpectedSerialization); + QueryInterpretation query_interpretation; + query_interpretation.append_static_token("a"); + query_interpretation.append_static_token("b"); + REQUIRE(query_interpretation.serialize() == cExpectedSerialization); } /** @@ -154,59 +154,59 @@ TEST_CASE("less_than_operator", "[QueryInterpretation]") { constexpr uint32_t cIntId{2}; constexpr uint32_t cHasNumberId{7}; - QueryInterpretation qi1; - QueryInterpretation qi2; + QueryInterpretation query_interpretation1; + QueryInterpretation query_interpretation2; SECTION("different_length_logtype") { - qi1.append_static_token("a"); - qi2.append_static_token("a"); - qi2.append_variable_token(cFloatId, "1.1", false); + query_interpretation1.append_static_token("a"); + query_interpretation2.append_static_token("a"); + query_interpretation2.append_variable_token(cFloatId, "1.1", false); - REQUIRE(qi1 < qi2); - REQUIRE_FALSE(qi2 < qi1); + REQUIRE(query_interpretation1 < query_interpretation2); + REQUIRE_FALSE(query_interpretation2 < query_interpretation1); } SECTION("different_static_content") { - qi1.append_static_token("a"); - qi2.append_static_token("b"); + query_interpretation1.append_static_token("a"); + query_interpretation2.append_static_token("b"); - REQUIRE(qi1 < qi2); - REQUIRE_FALSE(qi2 < qi1); + REQUIRE(query_interpretation1 < query_interpretation2); + REQUIRE_FALSE(query_interpretation2 < query_interpretation1); } SECTION("different_var_types") { - qi1.append_variable_token(cIntId, "123", false); - qi2.append_variable_token(cHasNumberId, "123", false); + query_interpretation1.append_variable_token(cIntId, "123", false); + query_interpretation2.append_variable_token(cHasNumberId, "123", false); - REQUIRE(qi1 < qi2); - REQUIRE_FALSE(qi2 < qi1); + REQUIRE(query_interpretation1 < query_interpretation2); + REQUIRE_FALSE(query_interpretation2 < query_interpretation1); } SECTION("different_var_values") { - qi1.append_variable_token(cIntId, "123", false); - qi2.append_variable_token(cIntId, "456", false); + query_interpretation1.append_variable_token(cIntId, "123", false); + query_interpretation2.append_variable_token(cIntId, "456", false); - REQUIRE(qi1 < qi2); - REQUIRE_FALSE(qi2 < qi1); + REQUIRE(query_interpretation1 < query_interpretation2); + REQUIRE_FALSE(query_interpretation2 < query_interpretation1); } SECTION("token_order") { - qi1.append_static_token("hello"); - qi1.append_variable_token(cIntId, "123", false); - qi2.append_variable_token(cIntId, "123", false); - qi2.append_static_token("hello"); + query_interpretation1.append_static_token("hello"); + query_interpretation1.append_variable_token(cIntId, "123", false); + query_interpretation2.append_variable_token(cIntId, "123", false); + query_interpretation2.append_static_token("hello"); // `StaticQueryToken` is a lower index in the variant so is considered less than // `VariableQueryToken`. - REQUIRE(qi1 < qi2); - REQUIRE_FALSE(qi2 < qi1); + REQUIRE(query_interpretation1 < query_interpretation2); + REQUIRE_FALSE(query_interpretation2 < query_interpretation1); } SECTION("identical_tokens") { - qi1.append_variable_token(cIntId, "123", false); - qi2.append_variable_token(cIntId, "123", false); + query_interpretation1.append_variable_token(cIntId, "123", false); + query_interpretation2.append_variable_token(cIntId, "123", false); - REQUIRE_FALSE(qi1 < qi2); - REQUIRE_FALSE(qi2 < qi1); + REQUIRE_FALSE(query_interpretation1 < query_interpretation2); + REQUIRE_FALSE(query_interpretation2 < query_interpretation1); } } From beeb54e54eef2af3e660ae7e1c96e211618f36f9 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 4 Aug 2025 10:42:30 -0400 Subject: [PATCH 007/168] Add initial WildcardExpression code. --- CMakeLists.txt | 2 + .../query_parser/WildcardExpression.cpp | 117 ++++++++++++++++ .../query_parser/WildcardExpression.hpp | 125 ++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 src/log_surgeon/query_parser/WildcardExpression.cpp create mode 100644 src/log_surgeon/query_parser/WildcardExpression.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 78c6cb85..ff16e5a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,6 +84,8 @@ set(SOURCE_FILES src/log_surgeon/finite_automata/UnicodeIntervalTree.tpp src/log_surgeon/query_parser/QueryInterpretation.cpp src/log_surgeon/query_parser/QueryInterpretation.hpp + src/log_surgeon/query_parser/WildcardExpression.cpp + src/log_surgeon/query_parser/WildcardExpression.hpp src/log_surgeon/Lalr1Parser.hpp src/log_surgeon/Lalr1Parser.tpp src/log_surgeon/Lexer.hpp diff --git a/src/log_surgeon/query_parser/WildcardExpression.cpp b/src/log_surgeon/query_parser/WildcardExpression.cpp new file mode 100644 index 00000000..29489e7d --- /dev/null +++ b/src/log_surgeon/query_parser/WildcardExpression.cpp @@ -0,0 +1,117 @@ +#include "WildcardExpression.hpp" + +#include +#include +#include +#include + +#include + +namespace log_surgeon::query_parser { +WildcardExpression::WildcardExpression(std::string processed_search_string) + : m_processed_search_string(std::move(processed_search_string)) { + m_is_greedy_wildcard.reserve(m_processed_search_string.size()); + m_is_non_greedy_wildcard.reserve(m_processed_search_string.size()); + m_is_escape.reserve(m_processed_search_string.size()); + bool is_escaped = false; + for (auto const& c : m_processed_search_string) { + if (is_escaped) { + m_is_greedy_wildcard.push_back(false); + m_is_non_greedy_wildcard.push_back(false); + m_is_escape.push_back(false); + is_escaped = false; + } else { + if ('\\' == c) { + m_is_greedy_wildcard.push_back(false); + m_is_non_greedy_wildcard.push_back(false); + m_is_escape.push_back(true); + is_escaped = true; + } else if ('*' == c) { + m_is_greedy_wildcard.push_back(true); + m_is_non_greedy_wildcard.push_back(false); + m_is_escape.push_back(false); + } else if ('?' == c) { + m_is_greedy_wildcard.push_back(false); + m_is_non_greedy_wildcard.push_back(true); + m_is_escape.push_back(false); + } else { + m_is_greedy_wildcard.push_back(false); + m_is_non_greedy_wildcard.push_back(false); + m_is_escape.push_back(false); + } + } + } +} + +WildcardExpressionView::WildcardExpressionView( + WildcardExpression const& wildcard_expression, + size_t const begin_idx, + size_t const end_idx +) + : m_expression{&wildcard_expression}, + m_begin_idx{begin_idx}, + m_end_idx{end_idx} { + m_end_idx = std::min(m_end_idx, wildcard_expression.length()); + m_begin_idx = std::min(m_begin_idx, m_end_idx); +} + +auto WildcardExpressionView::extend_to_adjacent_greedy_wildcards() const -> WildcardExpressionView { + auto extended_view{*this}; + bool const prev_char_is_greedy_wildcard{ + m_begin_idx > 0 && m_expression->char_is_greedy_wildcard(m_begin_idx - 1) + }; + if (prev_char_is_greedy_wildcard) { + --extended_view.m_begin_idx; + } + bool const next_char_is_greedy_wildcard{ + m_end_idx < m_expression->length() && m_expression->char_is_greedy_wildcard(m_end_idx) + }; + if (next_char_is_greedy_wildcard) { + ++extended_view.m_end_idx; + } + return extended_view; +} + +auto WildcardExpressionView::surrounded_by_delims_or_wildcards(lexers::ByteLexer const& lexer) const + -> bool { + bool has_preceding_delim{}; + if (0 == m_begin_idx) { + has_preceding_delim = true; + } else { + bool const preceded_by_greedy_wildcard{ + m_expression->char_is_greedy_wildcard(m_begin_idx - 1) + }; + bool const preceded_by_non_greedy_wildcard{ + m_expression->char_is_non_greedy_wildcard(m_begin_idx - 1) + }; + bool const preceded_by_delimiter{ + lexer.is_delimiter(m_expression->get_char(m_begin_idx - 1)) + }; + has_preceding_delim = preceded_by_greedy_wildcard || preceded_by_non_greedy_wildcard + || preceded_by_delimiter; + } + + bool has_succeeding_delim{}; + if (m_expression->length() == m_end_idx) { + has_succeeding_delim = true; + } else { + bool const succeeded_by_greedy_wildcard{m_expression->char_is_greedy_wildcard(m_end_idx)}; + bool const succeeded_by_non_greedy_wildcard{ + m_expression->char_is_non_greedy_wildcard(m_end_idx) + }; + // E.g. "foo:", where ':' is a delimiter + bool const succeeded_by_unescaped_delim{ + false == m_expression->char_is_escape(m_end_idx) + && lexer.is_delimiter(m_expression->get_char(m_end_idx)) + }; + // E.g. "foo\\", where '\' is a delimiter + bool const succeeded_by_escaped_delim{ + m_expression->char_is_escape(m_end_idx) + && lexer.is_delimiter(m_expression->get_char(m_end_idx + 1)) + }; + has_succeeding_delim = succeeded_by_greedy_wildcard || succeeded_by_non_greedy_wildcard + || succeeded_by_unescaped_delim || succeeded_by_escaped_delim; + } + return has_preceding_delim && has_succeeding_delim; +} +} // namespace log_surgeon::query_parser diff --git a/src/log_surgeon/query_parser/WildcardExpression.hpp b/src/log_surgeon/query_parser/WildcardExpression.hpp new file mode 100644 index 00000000..fc1d8c02 --- /dev/null +++ b/src/log_surgeon/query_parser/WildcardExpression.hpp @@ -0,0 +1,125 @@ +#ifndef LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_HPP +#define LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_HPP + +#include +#include +#include + +#include + +namespace log_surgeon::query_parser { +/** + * A pattern for matching strings. The pattern supports two types of wildcards: + * - '*' matches zero or more characters + * - '?' matches any single character + * + * To match a literal '*' or '?', the pattern should escape it with a backslash (`\`). + */ +class WildcardExpression { +public: + explicit WildcardExpression(std::string processed_search_string); + + [[nodiscard]] auto substr(size_t const begin_idx, size_t const length) const -> std::string { + return m_processed_search_string.substr(begin_idx, length); + } + + [[nodiscard]] auto length() const -> size_t { return m_processed_search_string.size(); } + + [[nodiscard]] auto char_is_greedy_wildcard(size_t const idx) const -> bool { + return m_is_greedy_wildcard[idx]; + } + + [[nodiscard]] auto char_is_non_greedy_wildcard(size_t const idx) const -> bool { + return m_is_non_greedy_wildcard[idx]; + } + + [[nodiscard]] auto char_is_escape(size_t const idx) const -> bool { return m_is_escape[idx]; } + + [[nodiscard]] auto get_char(size_t const idx) const -> char { + return m_processed_search_string[idx]; + } + +private: + std::vector m_is_greedy_wildcard; + std::vector m_is_non_greedy_wildcard; + std::vector m_is_escape; + std::string m_processed_search_string; +}; + +/** + * A view of a WildcardExpression. + */ +class WildcardExpressionView { +public: + /** + * Creates a view of the range [begin_idx, end_idx) in the given wildcard expression. + * + * NOTE: To ensure validity, end_idx is limited to wildcard_expression.length(), and then + * begin_idx is limited to end_idx. + * @param wildcard_expression + * @param begin_idx + * @param end_idx + */ + WildcardExpressionView( + WildcardExpression const& wildcard_expression, + size_t begin_idx, + size_t end_idx + ); + + /** + * @return A copy of this view, but extended to include adjacent greedy wildcards. + */ + [[nodiscard]] auto extend_to_adjacent_greedy_wildcards() const -> WildcardExpressionView; + + [[nodiscard]] auto is_greedy_wildcard() const -> bool { + return 1 == length() && m_expression->char_is_greedy_wildcard(m_begin_idx); + } + + [[nodiscard]] auto is_non_greedy_wildcard() const -> bool { + return 1 == length() && m_expression->char_is_non_greedy_wildcard(m_begin_idx); + } + + [[nodiscard]] auto starts_or_ends_with_greedy_wildcard() const -> bool { + return length() > 0 + && (m_expression->char_is_greedy_wildcard(m_begin_idx) + || m_expression->char_is_greedy_wildcard(m_end_idx - 1)); + } + + /** + * @param lexer + * @return Whether the substring in view is surrounded by delimiters or unescaped wildcards. + * NOTE: This method assumes that the viewed string is preceded and succeeded by a delimiter. + */ + [[nodiscard]] auto surrounded_by_delims_or_wildcards(lexers::ByteLexer const& lexer) const + -> bool; + + [[nodiscard]] auto length() const -> size_t { return m_end_idx - m_begin_idx; } + + [[nodiscard]] auto char_is_greedy_wildcard(size_t const idx) const -> bool { + return m_expression->char_is_greedy_wildcard(m_begin_idx + idx); + } + + [[nodiscard]] auto char_is_non_greedy_wildcard(size_t const idx) const -> bool { + return m_expression->char_is_non_greedy_wildcard(m_begin_idx + idx); + } + + [[nodiscard]] auto char_is_escape(size_t const idx) const -> bool { + return m_expression->char_is_escape(m_begin_idx + idx); + } + + [[nodiscard]] auto get_char(size_t const idx) const -> char { + return m_expression->get_char(m_begin_idx + idx); + } + + [[nodiscard]] auto get_value() const -> std::string { + return m_expression->substr(m_begin_idx, m_end_idx - m_begin_idx); + } + +private: + WildcardExpression const* m_expression; + size_t m_begin_idx; + size_t m_end_idx; +}; +} // namespace log_surgeon::query_parser + +#endif // LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_HPP From e8fe2e8a5eb77b09027ee7cace78583d72855a9a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 4 Aug 2025 11:11:48 -0400 Subject: [PATCH 008/168] Split classes into seperate files. --- CMakeLists.txt | 3 + .../query_parser/QueryInterpretation.cpp | 38 ------- .../query_parser/QueryInterpretation.hpp | 98 +------------------ .../query_parser/StaticQueryToken.hpp | 43 ++++++++ .../query_parser/VariableQueryToken.cpp | 41 ++++++++ .../query_parser/VariableQueryToken.hpp | 75 ++++++++++++++ 6 files changed, 164 insertions(+), 134 deletions(-) create mode 100644 src/log_surgeon/query_parser/StaticQueryToken.hpp create mode 100644 src/log_surgeon/query_parser/VariableQueryToken.cpp create mode 100644 src/log_surgeon/query_parser/VariableQueryToken.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 78c6cb85..9ed7cb11 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,6 +84,9 @@ set(SOURCE_FILES src/log_surgeon/finite_automata/UnicodeIntervalTree.tpp src/log_surgeon/query_parser/QueryInterpretation.cpp src/log_surgeon/query_parser/QueryInterpretation.hpp + src/log_surgeon/query_parser/StaticQueryToken.hpp + src/log_surgeon/query_parser/VariableQueryToken.cpp + src/log_surgeon/query_parser/VariableQueryToken.hpp src/log_surgeon/Lalr1Parser.hpp src/log_surgeon/Lalr1Parser.tpp src/log_surgeon/Lexer.hpp diff --git a/src/log_surgeon/query_parser/QueryInterpretation.cpp b/src/log_surgeon/query_parser/QueryInterpretation.cpp index 615225dd..8acda7be 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/query_parser/QueryInterpretation.cpp @@ -14,44 +14,6 @@ using std::string; using std::vector; namespace log_surgeon::query_parser { -auto VariableQueryToken::operator<(VariableQueryToken const& rhs) const -> bool { - if (m_variable_type < rhs.m_variable_type) { - return true; - } - if (m_variable_type > rhs.m_variable_type) { - return false; - } - if (m_query_substring < rhs.m_query_substring) { - return true; - } - if (m_query_substring > rhs.m_query_substring) { - return false; - } - if (m_has_wildcard != rhs.m_has_wildcard) { - return rhs.m_has_wildcard; - } - return false; -} - -auto VariableQueryToken::operator>(VariableQueryToken const& rhs) const -> bool { - if (m_variable_type > rhs.m_variable_type) { - return true; - } - if (m_variable_type < rhs.m_variable_type) { - return false; - } - if (m_query_substring > rhs.m_query_substring) { - return true; - } - if (m_query_substring < rhs.m_query_substring) { - return false; - } - if (m_has_wildcard != rhs.m_has_wildcard) { - return m_has_wildcard; - } - return false; -} - void QueryInterpretation::append_query_interpretation(QueryInterpretation& suffix) { if (suffix.m_tokens.empty()) { return; diff --git a/src/log_surgeon/query_parser/QueryInterpretation.hpp b/src/log_surgeon/query_parser/QueryInterpretation.hpp index 4a86035f..f8f58f13 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/query_parser/QueryInterpretation.hpp @@ -9,104 +9,10 @@ #include #include +#include +#include namespace log_surgeon::query_parser { -/** - * Represents static-text in the query as a token. - * - * Stores the raw log as a string. - */ -class StaticQueryToken { -public: - explicit StaticQueryToken(std::string query_substring) - : m_query_substring(std::move(query_substring)) {} - - auto operator==(StaticQueryToken const& rhs) const -> bool = default; - - auto operator!=(StaticQueryToken const& rhs) const -> bool = default; - - auto operator<(StaticQueryToken const& rhs) const -> bool { - return m_query_substring < rhs.m_query_substring; - } - - auto operator>(StaticQueryToken const& rhs) const -> bool { - return m_query_substring > rhs.m_query_substring; - } - - auto append(StaticQueryToken const& rhs) -> void { - m_query_substring += rhs.get_query_substring(); - } - - [[nodiscard]] auto get_query_substring() const -> std::string const& { - return m_query_substring; - } - -private: - std::string m_query_substring; -}; - -/** - * Represents a variable in the query as a token. - * - * Stores the raw log as a string with metadata specifying: - * 1. The variable type. - * 2. If the variable contains a wildcard. - */ -class VariableQueryToken { -public: - VariableQueryToken( - uint32_t const variable_type, - std::string query_substring, - bool const has_wildcard - ) - : m_variable_type(variable_type), - m_query_substring(std::move(query_substring)), - m_has_wildcard(has_wildcard) {} - - auto operator==(VariableQueryToken const& rhs) const -> bool = default; - - auto operator!=(VariableQueryToken const& rhs) const -> bool = default; - - /** - * Lexicographical less-than comparison. - * - * Compares member variables in the following order: - * 1. `m_variable_type` - * 2. `m_query_substring` - * 3. `m_has_wildcard` (`false` < `true`) - * - * @param rhs The `VariableQueryToken` to compare against. - * @return true if this object is considered less than rhs, false otherwise. - */ - auto operator<(VariableQueryToken const& rhs) const -> bool; - - /** - * Lexicographical greater-than comparison. - * - * Compares member variables in the following order: - * 1. `m_variable_type` - * 2. `m_query_substring` - * 3. `m_has_wildcard` (`true` > `false`) - * - * @param rhs The `VariableQueryToken` to compare against. - * @return true if this object is considered greater than rhs, false otherwise. - */ - auto operator>(VariableQueryToken const& rhs) const -> bool; - - [[nodiscard]] auto get_variable_type() const -> uint32_t { return m_variable_type; } - - [[nodiscard]] auto get_query_substring() const -> std::string const& { - return m_query_substring; - } - - [[nodiscard]] auto get_has_wildcard() const -> bool { return m_has_wildcard; } - -private: - uint32_t m_variable_type; - std::string m_query_substring; - bool m_has_wildcard{false}; -}; - /** * Represents a query as a sequence of static-text and variable tokens. * diff --git a/src/log_surgeon/query_parser/StaticQueryToken.hpp b/src/log_surgeon/query_parser/StaticQueryToken.hpp new file mode 100644 index 00000000..3597b7fd --- /dev/null +++ b/src/log_surgeon/query_parser/StaticQueryToken.hpp @@ -0,0 +1,43 @@ +#ifndef LOG_SURGEON_QUERY_PARSER_STATIC_QUERY_TOKEN_HPP +#define LOG_SURGEON_QUERY_PARSER_STATIC_QUERY_TOKEN_HPP + +#include +#include + +namespace log_surgeon::query_parser { +/** + * Represents static-text in the query as a token. + * + * Stores the raw log as a string. + */ +class StaticQueryToken { +public: + explicit StaticQueryToken(std::string query_substring) + : m_query_substring(std::move(query_substring)) {} + + auto operator==(StaticQueryToken const& rhs) const -> bool = default; + + auto operator!=(StaticQueryToken const& rhs) const -> bool = default; + + auto operator<(StaticQueryToken const& rhs) const -> bool { + return m_query_substring < rhs.m_query_substring; + } + + auto operator>(StaticQueryToken const& rhs) const -> bool { + return m_query_substring > rhs.m_query_substring; + } + + auto append(StaticQueryToken const& rhs) -> void { + m_query_substring += rhs.get_query_substring(); + } + + [[nodiscard]] auto get_query_substring() const -> std::string const& { + return m_query_substring; + } + +private: + std::string m_query_substring; +}; +} // namespace log_surgeon::query_parser + +#endif // LOG_SURGEON_QUERY_PARSER_STATIC_QUERY_TOKEN_HPP diff --git a/src/log_surgeon/query_parser/VariableQueryToken.cpp b/src/log_surgeon/query_parser/VariableQueryToken.cpp new file mode 100644 index 00000000..d6d461e8 --- /dev/null +++ b/src/log_surgeon/query_parser/VariableQueryToken.cpp @@ -0,0 +1,41 @@ +#include + +namespace log_surgeon::query_parser { +auto VariableQueryToken::operator<(VariableQueryToken const& rhs) const -> bool { + if (m_variable_type < rhs.m_variable_type) { + return true; + } + if (m_variable_type > rhs.m_variable_type) { + return false; + } + if (m_query_substring < rhs.m_query_substring) { + return true; + } + if (m_query_substring > rhs.m_query_substring) { + return false; + } + if (m_has_wildcard != rhs.m_has_wildcard) { + return rhs.m_has_wildcard; + } + return false; +} + +auto VariableQueryToken::operator>(VariableQueryToken const& rhs) const -> bool { + if (m_variable_type > rhs.m_variable_type) { + return true; + } + if (m_variable_type < rhs.m_variable_type) { + return false; + } + if (m_query_substring > rhs.m_query_substring) { + return true; + } + if (m_query_substring < rhs.m_query_substring) { + return false; + } + if (m_has_wildcard != rhs.m_has_wildcard) { + return m_has_wildcard; + } + return false; +} +} // namespace log_surgeon::query_parser diff --git a/src/log_surgeon/query_parser/VariableQueryToken.hpp b/src/log_surgeon/query_parser/VariableQueryToken.hpp new file mode 100644 index 00000000..a9a10947 --- /dev/null +++ b/src/log_surgeon/query_parser/VariableQueryToken.hpp @@ -0,0 +1,75 @@ +#ifndef LOG_SURGEON_QUERY_PARSER_VARIABLE_QUERY_TOKEN_HPP +#define LOG_SURGEON_QUERY_PARSER_VARIABLE_QUERY_TOKEN_HPP + +#include +#include +#include +#include + +#include + +namespace log_surgeon::query_parser { +/** + * Represents a variable in the query as a token. + * + * Stores the raw log as a string with metadata specifying: + * 1. The variable type. + * 2. If the variable contains a wildcard. + */ +class VariableQueryToken { +public: + VariableQueryToken( + uint32_t const variable_type, + std::string query_substring, + bool const has_wildcard + ) + : m_variable_type(variable_type), + m_query_substring(std::move(query_substring)), + m_has_wildcard(has_wildcard) {} + + auto operator==(VariableQueryToken const& rhs) const -> bool = default; + + auto operator!=(VariableQueryToken const& rhs) const -> bool = default; + + /** + * Lexicographical less-than comparison. + * + * Compares member variables in the following order: + * 1. `m_variable_type` + * 2. `m_query_substring` + * 3. `m_has_wildcard` (`false` < `true`) + * + * @param rhs The `VariableQueryToken` to compare against. + * @return true if this object is considered less than rhs, false otherwise. + */ + auto operator<(VariableQueryToken const& rhs) const -> bool; + + /** + * Lexicographical greater-than comparison. + * + * Compares member variables in the following order: + * 1. `m_variable_type` + * 2. `m_query_substring` + * 3. `m_has_wildcard` (`true` > `false`) + * + * @param rhs The `VariableQueryToken` to compare against. + * @return true if this object is considered greater than rhs, false otherwise. + */ + auto operator>(VariableQueryToken const& rhs) const -> bool; + + [[nodiscard]] auto get_variable_type() const -> uint32_t { return m_variable_type; } + + [[nodiscard]] auto get_query_substring() const -> std::string const& { + return m_query_substring; + } + + [[nodiscard]] auto get_has_wildcard() const -> bool { return m_has_wildcard; } + +private: + uint32_t m_variable_type; + std::string m_query_substring; + bool m_has_wildcard{false}; +}; +} // namespace log_surgeon::query_parser + +#endif // LOG_SURGEON_QUERY_PARSER_VARIABLE_QUERY_TOKEN_HPP From 86732548e173e5e95a4fcb3cb0147c3464e0ac94 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 4 Aug 2025 19:53:53 -0400 Subject: [PATCH 009/168] Pass clang-tidy on new tests. --- src/log_surgeon/query_parser/QueryInterpretation.cpp | 5 ++++- src/log_surgeon/query_parser/QueryInterpretation.hpp | 2 -- src/log_surgeon/query_parser/VariableQueryToken.cpp | 2 +- src/log_surgeon/query_parser/VariableQueryToken.hpp | 3 --- tests/test-static-query-token.cpp | 2 +- tests/test-variable-query-token.cpp | 2 +- 6 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/log_surgeon/query_parser/QueryInterpretation.cpp b/src/log_surgeon/query_parser/QueryInterpretation.cpp index 8acda7be..ee4d2f1b 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/query_parser/QueryInterpretation.cpp @@ -2,11 +2,14 @@ #include #include -#include #include +#include #include +#include +#include +#include #include using log_surgeon::lexers::ByteLexer; diff --git a/src/log_surgeon/query_parser/QueryInterpretation.hpp b/src/log_surgeon/query_parser/QueryInterpretation.hpp index f8f58f13..536c0697 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/query_parser/QueryInterpretation.hpp @@ -3,12 +3,10 @@ #include #include -#include #include #include #include -#include #include #include diff --git a/src/log_surgeon/query_parser/VariableQueryToken.cpp b/src/log_surgeon/query_parser/VariableQueryToken.cpp index d6d461e8..59a41b9a 100644 --- a/src/log_surgeon/query_parser/VariableQueryToken.cpp +++ b/src/log_surgeon/query_parser/VariableQueryToken.cpp @@ -1,4 +1,4 @@ -#include +#include "VariableQueryToken.hpp" namespace log_surgeon::query_parser { auto VariableQueryToken::operator<(VariableQueryToken const& rhs) const -> bool { diff --git a/src/log_surgeon/query_parser/VariableQueryToken.hpp b/src/log_surgeon/query_parser/VariableQueryToken.hpp index a9a10947..b774e689 100644 --- a/src/log_surgeon/query_parser/VariableQueryToken.hpp +++ b/src/log_surgeon/query_parser/VariableQueryToken.hpp @@ -3,11 +3,8 @@ #include #include -#include #include -#include - namespace log_surgeon::query_parser { /** * Represents a variable in the query as a token. diff --git a/tests/test-static-query-token.cpp b/tests/test-static-query-token.cpp index b646b1bf..ab68fe12 100644 --- a/tests/test-static-query-token.cpp +++ b/tests/test-static-query-token.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/tests/test-variable-query-token.cpp b/tests/test-variable-query-token.cpp index a764f374..cbebafc8 100644 --- a/tests/test-variable-query-token.cpp +++ b/tests/test-variable-query-token.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include From d99ce893a4fdcd5575602d811d34995ecc13c96a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 13:10:52 -0400 Subject: [PATCH 010/168] Rename query_parser to wildcard_query_parser. --- CMakeLists.txt | 10 +++++----- .../QueryInterpretation.cpp | 8 ++++---- .../QueryInterpretation.hpp | 14 +++++++------- .../StaticQueryToken.hpp | 10 +++++----- .../VariableQueryToken.cpp | 4 ++-- .../VariableQueryToken.hpp | 10 +++++----- tests/test-query-interpretation.cpp | 4 ++-- tests/test-static-query-token.cpp | 4 ++-- tests/test-variable-query-token.cpp | 4 ++-- 9 files changed, 34 insertions(+), 34 deletions(-) rename src/log_surgeon/{query_parser => wildcard_query_parser}/QueryInterpretation.cpp (91%) rename src/log_surgeon/{query_parser => wildcard_query_parser}/QueryInterpretation.hpp (88%) rename src/log_surgeon/{query_parser => wildcard_query_parser}/StaticQueryToken.hpp (76%) rename src/log_surgeon/{query_parser => wildcard_query_parser}/VariableQueryToken.cpp (91%) rename src/log_surgeon/{query_parser => wildcard_query_parser}/VariableQueryToken.hpp (87%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ed7cb11..f9618343 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,11 +82,11 @@ set(SOURCE_FILES src/log_surgeon/finite_automata/TagOperation.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.tpp - src/log_surgeon/query_parser/QueryInterpretation.cpp - src/log_surgeon/query_parser/QueryInterpretation.hpp - src/log_surgeon/query_parser/StaticQueryToken.hpp - src/log_surgeon/query_parser/VariableQueryToken.cpp - src/log_surgeon/query_parser/VariableQueryToken.hpp + src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp + src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp + src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp + src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp + src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp src/log_surgeon/Lalr1Parser.hpp src/log_surgeon/Lalr1Parser.tpp src/log_surgeon/Lexer.hpp diff --git a/src/log_surgeon/query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp similarity index 91% rename from src/log_surgeon/query_parser/QueryInterpretation.cpp rename to src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index ee4d2f1b..8b91b87f 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -6,8 +6,8 @@ #include #include -#include -#include +#include +#include #include #include @@ -16,7 +16,7 @@ using log_surgeon::lexers::ByteLexer; using std::string; using std::vector; -namespace log_surgeon::query_parser { +namespace log_surgeon::wildcard_query_parser { void QueryInterpretation::append_query_interpretation(QueryInterpretation& suffix) { if (suffix.m_tokens.empty()) { return; @@ -80,4 +80,4 @@ auto QueryInterpretation::serialize() const -> string { fmt::join(has_wildcard_strings, "") ); } -} // namespace log_surgeon::query_parser +} // namespace log_surgeon::wildcard_query_parser diff --git a/src/log_surgeon/query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp similarity index 88% rename from src/log_surgeon/query_parser/QueryInterpretation.hpp rename to src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index 536c0697..f497753e 100644 --- a/src/log_surgeon/query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -1,5 +1,5 @@ -#ifndef LOG_SURGEON_QUERY_PARSER_QUERY_INTERPRETATION_HPP -#define LOG_SURGEON_QUERY_PARSER_QUERY_INTERPRETATION_HPP +#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_INTERPRETATION_HPP +#define LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_INTERPRETATION_HPP #include #include @@ -7,10 +7,10 @@ #include #include -#include -#include +#include +#include -namespace log_surgeon::query_parser { +namespace log_surgeon::wildcard_query_parser { /** * Represents a query as a sequence of static-text and variable tokens. * @@ -101,6 +101,6 @@ class QueryInterpretation { private: std::vector> m_tokens; }; -} // namespace log_surgeon::query_parser +} // namespace log_surgeon::wildcard_query_parser -#endif // LOG_SURGEON_QUERY_PARSER_QUERY_INTERPRETATION_HPP +#endif // LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_INTERPRETATION_HPP diff --git a/src/log_surgeon/query_parser/StaticQueryToken.hpp b/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp similarity index 76% rename from src/log_surgeon/query_parser/StaticQueryToken.hpp rename to src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp index 3597b7fd..29bfe153 100644 --- a/src/log_surgeon/query_parser/StaticQueryToken.hpp +++ b/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp @@ -1,10 +1,10 @@ -#ifndef LOG_SURGEON_QUERY_PARSER_STATIC_QUERY_TOKEN_HPP -#define LOG_SURGEON_QUERY_PARSER_STATIC_QUERY_TOKEN_HPP +#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_STATIC_QUERY_TOKEN_HPP +#define LOG_SURGEON_WILDCARD_QUERY_PARSER_STATIC_QUERY_TOKEN_HPP #include #include -namespace log_surgeon::query_parser { +namespace log_surgeon::wildcard_query_parser { /** * Represents static-text in the query as a token. * @@ -38,6 +38,6 @@ class StaticQueryToken { private: std::string m_query_substring; }; -} // namespace log_surgeon::query_parser +} // namespace log_surgeon::wildcard_query_parser -#endif // LOG_SURGEON_QUERY_PARSER_STATIC_QUERY_TOKEN_HPP +#endif // LOG_SURGEON_WILDCARD_QUERY_PARSER_STATIC_QUERY_TOKEN_HPP diff --git a/src/log_surgeon/query_parser/VariableQueryToken.cpp b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp similarity index 91% rename from src/log_surgeon/query_parser/VariableQueryToken.cpp rename to src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp index 59a41b9a..74369757 100644 --- a/src/log_surgeon/query_parser/VariableQueryToken.cpp +++ b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp @@ -1,6 +1,6 @@ #include "VariableQueryToken.hpp" -namespace log_surgeon::query_parser { +namespace log_surgeon::wildcard_query_parser { auto VariableQueryToken::operator<(VariableQueryToken const& rhs) const -> bool { if (m_variable_type < rhs.m_variable_type) { return true; @@ -38,4 +38,4 @@ auto VariableQueryToken::operator>(VariableQueryToken const& rhs) const -> bool } return false; } -} // namespace log_surgeon::query_parser +} // namespace log_surgeon::wildcard_query_parser diff --git a/src/log_surgeon/query_parser/VariableQueryToken.hpp b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp similarity index 87% rename from src/log_surgeon/query_parser/VariableQueryToken.hpp rename to src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp index b774e689..417d5e62 100644 --- a/src/log_surgeon/query_parser/VariableQueryToken.hpp +++ b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp @@ -1,11 +1,11 @@ -#ifndef LOG_SURGEON_QUERY_PARSER_VARIABLE_QUERY_TOKEN_HPP -#define LOG_SURGEON_QUERY_PARSER_VARIABLE_QUERY_TOKEN_HPP +#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_VARIABLE_QUERY_TOKEN_HPP +#define LOG_SURGEON_WILDCARD_QUERY_PARSER_VARIABLE_QUERY_TOKEN_HPP #include #include #include -namespace log_surgeon::query_parser { +namespace log_surgeon::wildcard_query_parser { /** * Represents a variable in the query as a token. * @@ -67,6 +67,6 @@ class VariableQueryToken { std::string m_query_substring; bool m_has_wildcard{false}; }; -} // namespace log_surgeon::query_parser +} // namespace log_surgeon::wildcard_query_parser -#endif // LOG_SURGEON_QUERY_PARSER_VARIABLE_QUERY_TOKEN_HPP +#endif // LOG_SURGEON_WILDCARD_QUERY_PARSER_VARIABLE_QUERY_TOKEN_HPP diff --git a/tests/test-query-interpretation.cpp b/tests/test-query-interpretation.cpp index f7992f1e..6c016009 100644 --- a/tests/test-query-interpretation.cpp +++ b/tests/test-query-interpretation.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include @@ -12,7 +12,7 @@ * These unit tests contain the `QueryInterpretation` tag. */ -using log_surgeon::query_parser::QueryInterpretation; +using log_surgeon::wildcard_query_parser::QueryInterpretation; using std::string_view; /** diff --git a/tests/test-static-query-token.cpp b/tests/test-static-query-token.cpp index ab68fe12..467dfa77 100644 --- a/tests/test-static-query-token.cpp +++ b/tests/test-static-query-token.cpp @@ -1,4 +1,4 @@ -#include +#include #include @@ -9,7 +9,7 @@ * These unit tests contain the `StaticQueryToken` tag. */ -using log_surgeon::query_parser::StaticQueryToken; +using log_surgeon::wildcard_query_parser::StaticQueryToken; /** * @ingroup unit_tests_static_query_token diff --git a/tests/test-variable-query-token.cpp b/tests/test-variable-query-token.cpp index cbebafc8..eaadb61a 100644 --- a/tests/test-variable-query-token.cpp +++ b/tests/test-variable-query-token.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include @@ -11,7 +11,7 @@ * These unit tests contain the `VariableQueryToken` tag. */ -using log_surgeon::query_parser::VariableQueryToken; +using log_surgeon::wildcard_query_parser::VariableQueryToken; /** * @ingroup unit_tests_variable_query_token From b4d3f1e17504d599e1bcb8aac782459ffe746695 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 14:01:52 -0400 Subject: [PATCH 011/168] Use three-way comparitor in StaticQuery class. --- .../wildcard_query_parser/StaticQueryToken.hpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp b/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp index 29bfe153..be36ced3 100644 --- a/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp +++ b/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp @@ -1,6 +1,7 @@ #ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_STATIC_QUERY_TOKEN_HPP #define LOG_SURGEON_WILDCARD_QUERY_PARSER_STATIC_QUERY_TOKEN_HPP +#include #include #include @@ -15,17 +16,7 @@ class StaticQueryToken { explicit StaticQueryToken(std::string query_substring) : m_query_substring(std::move(query_substring)) {} - auto operator==(StaticQueryToken const& rhs) const -> bool = default; - - auto operator!=(StaticQueryToken const& rhs) const -> bool = default; - - auto operator<(StaticQueryToken const& rhs) const -> bool { - return m_query_substring < rhs.m_query_substring; - } - - auto operator>(StaticQueryToken const& rhs) const -> bool { - return m_query_substring > rhs.m_query_substring; - } + auto operator<=>(StaticQueryToken const& rhs) const -> std::strong_ordering = default; auto append(StaticQueryToken const& rhs) -> void { m_query_substring += rhs.get_query_substring(); From ebc51a16b75095d91e381822e9084abd41516ed5 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 14:04:36 -0400 Subject: [PATCH 012/168] Use three-way comparitor in VariableQueryToken class. --- .../VariableQueryToken.cpp | 47 ++++++------------- .../VariableQueryToken.hpp | 25 ++-------- 2 files changed, 19 insertions(+), 53 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp index 74369757..49ac99bd 100644 --- a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp +++ b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp @@ -1,41 +1,22 @@ #include "VariableQueryToken.hpp" +#include + +using std::strong_ordering; + namespace log_surgeon::wildcard_query_parser { -auto VariableQueryToken::operator<(VariableQueryToken const& rhs) const -> bool { - if (m_variable_type < rhs.m_variable_type) { - return true; - } - if (m_variable_type > rhs.m_variable_type) { - return false; - } - if (m_query_substring < rhs.m_query_substring) { - return true; - } - if (m_query_substring > rhs.m_query_substring) { - return false; - } - if (m_has_wildcard != rhs.m_has_wildcard) { - return rhs.m_has_wildcard; +auto VariableQueryToken::operator<=>(VariableQueryToken const& rhs) const -> strong_ordering { + auto const variable_type_cmp{m_variable_type <=> rhs.m_variable_type}; + if (std::strong_ordering::equal != variable_type_cmp) { + return variable_type_cmp; } - return false; -} -auto VariableQueryToken::operator>(VariableQueryToken const& rhs) const -> bool { - if (m_variable_type > rhs.m_variable_type) { - return true; - } - if (m_variable_type < rhs.m_variable_type) { - return false; - } - if (m_query_substring > rhs.m_query_substring) { - return true; + auto const query_substring_cmp{m_query_substring <=> rhs.m_query_substring}; + if (std::strong_ordering::equal != query_substring_cmp) { + return query_substring_cmp; } - if (m_query_substring < rhs.m_query_substring) { - return false; - } - if (m_has_wildcard != rhs.m_has_wildcard) { - return m_has_wildcard; - } - return false; + + // bool does not have a <=> operator, so we have to manual order it: + return static_cast(m_has_wildcard) <=> static_cast(rhs.m_has_wildcard); } } // namespace log_surgeon::wildcard_query_parser diff --git a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp index 417d5e62..ad2780ab 100644 --- a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp +++ b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp @@ -1,6 +1,7 @@ #ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_VARIABLE_QUERY_TOKEN_HPP #define LOG_SURGEON_WILDCARD_QUERY_PARSER_VARIABLE_QUERY_TOKEN_HPP +#include #include #include #include @@ -24,35 +25,19 @@ class VariableQueryToken { m_query_substring(std::move(query_substring)), m_has_wildcard(has_wildcard) {} - auto operator==(VariableQueryToken const& rhs) const -> bool = default; - - auto operator!=(VariableQueryToken const& rhs) const -> bool = default; - /** - * Lexicographical less-than comparison. + * Lexicographical three-way comparison operator. * * Compares member variables in the following order: * 1. `m_variable_type` * 2. `m_query_substring` - * 3. `m_has_wildcard` (`false` < `true`) + * 3. `m_has_wildcard` (with `false` considered less than `true`) * * @param rhs The `VariableQueryToken` to compare against. - * @return true if this object is considered less than rhs, false otherwise. + * @return The relative ordering of `this` with respect to `rhs`. */ - auto operator<(VariableQueryToken const& rhs) const -> bool; + auto operator<=>(VariableQueryToken const& rhs) const -> std::strong_ordering; - /** - * Lexicographical greater-than comparison. - * - * Compares member variables in the following order: - * 1. `m_variable_type` - * 2. `m_query_substring` - * 3. `m_has_wildcard` (`true` > `false`) - * - * @param rhs The `VariableQueryToken` to compare against. - * @return true if this object is considered greater than rhs, false otherwise. - */ - auto operator>(VariableQueryToken const& rhs) const -> bool; [[nodiscard]] auto get_variable_type() const -> uint32_t { return m_variable_type; } From b60f628eb66675463a99b6fab2af80b13335a6d4 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 16:20:22 -0400 Subject: [PATCH 013/168] Use three-way comparitor in QueryInterpretation class. --- .../QueryInterpretation.cpp | 45 ++++++++++++------- .../QueryInterpretation.hpp | 13 +++--- 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index 8b91b87f..f179c67c 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -1,6 +1,7 @@ #include "QueryInterpretation.hpp" -#include +#include +#include #include #include #include @@ -13,8 +14,14 @@ #include using log_surgeon::lexers::ByteLexer; +using std::declval; +using std::lexicographical_compare_three_way; +using std::same_as; using std::string; +using std::strong_ordering; +using std::variant; using std::vector; +using std::weak_ordering; namespace log_surgeon::wildcard_query_parser { void QueryInterpretation::append_query_interpretation(QueryInterpretation& suffix) { @@ -39,22 +46,30 @@ void QueryInterpretation::append_query_interpretation(QueryInterpretation& suffi } } -auto QueryInterpretation::operator<(QueryInterpretation const& rhs) const -> bool { - if (m_tokens.size() < rhs.m_tokens.size()) { - return true; - } - if (m_tokens.size() > rhs.m_tokens.size()) { - return false; +// Helper to ensure variant is strongly ordered. +template struct IsStronglyOrderedVariant; + +template struct IsStronglyOrderedVariant> { + static constexpr bool cValue{(same_as() <=> declval()),strong_ordering> + && ...)}; +}; + +auto QueryInterpretation::operator<=>(QueryInterpretation const& rhs) const -> strong_ordering { + // Make sure the variants types are strongly ordered. + static_assert( + IsStronglyOrderedVariant::cValue, + "All variant types in `m_tokens` must have `operator<=>` returning `std::strong_ordering`." + ); + + // Can't return `<=>` directly as `variant` is weakly ordered regardless of its types. + auto const tokens_weak_cmp{m_tokens <=> rhs.m_tokens}; + if (weak_ordering::less == tokens_weak_cmp) { + return strong_ordering::less; } - for (uint32_t i{0}; i < m_tokens.size(); ++i) { - if (m_tokens[i] < rhs.m_tokens[i]) { - return true; - } - if (m_tokens[i] > rhs.m_tokens[i]) { - return false; - } + if (weak_ordering::greater == tokens_weak_cmp) { + return strong_ordering::greater; } - return false; + return strong_ordering::equal; } auto QueryInterpretation::serialize() const -> string { diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index f497753e..1f9c55a9 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -1,6 +1,7 @@ #ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_INTERPRETATION_HPP #define LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_INTERPRETATION_HPP +#include #include #include #include @@ -33,19 +34,15 @@ class QueryInterpretation { append_variable_token(variable_type, std::move(query_substring), contains_wildcard); } - auto operator==(QueryInterpretation const& rhs) const -> bool = default; - /** - * Lexicographical less-than comparison. + * Lexicographical three-way comparison operator. * - * Comparison is performed in the following order: - * 1. By number of tokens in the logtype (shorter logtypes are considered less). - * 2. By lexicographical ordering of individual tokens (based on their `<` and `>` operators). + * Compares `m_tokens` lexicographically using their three-way comparison. * * @param rhs The `QueryInterpretation` to compare against. - * @return true if this object is considered less than rhs, false otherwise. + * @return The relative ordering of `this` with respect to `rhs`. */ - auto operator<(QueryInterpretation const& rhs) const -> bool; + auto operator<=>(QueryInterpretation const& rhs) const -> std::strong_ordering; auto clear() -> void { m_tokens.clear(); } From 5c3425585d7b1f7cd6653b8eae1d4f6ca8e10be3 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 19:03:14 -0400 Subject: [PATCH 014/168] Fix format errors. --- .../QueryInterpretation.cpp | 16 ++++++++++------ .../wildcard_query_parser/VariableQueryToken.hpp | 1 - 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index f179c67c..ddd6aafc 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -47,18 +47,22 @@ void QueryInterpretation::append_query_interpretation(QueryInterpretation& suffi } // Helper to ensure variant is strongly ordered. -template struct IsStronglyOrderedVariant; +template +struct IsStronglyOrderedVariant; -template struct IsStronglyOrderedVariant> { - static constexpr bool cValue{(same_as() <=> declval()),strong_ordering> - && ...)}; +template +struct IsStronglyOrderedVariant> { + static constexpr bool cValue{ + (same_as() <=> declval()), strong_ordering> && ...) + }; }; auto QueryInterpretation::operator<=>(QueryInterpretation const& rhs) const -> strong_ordering { // Make sure the variants types are strongly ordered. static_assert( - IsStronglyOrderedVariant::cValue, - "All variant types in `m_tokens` must have `operator<=>` returning `std::strong_ordering`." + IsStronglyOrderedVariant::cValue, + "All variant types in `m_tokens` must have `operator<=>` returning " + "`std::strong_ordering`." ); // Can't return `<=>` directly as `variant` is weakly ordered regardless of its types. diff --git a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp index ad2780ab..63a4eb04 100644 --- a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp +++ b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp @@ -38,7 +38,6 @@ class VariableQueryToken { */ auto operator<=>(VariableQueryToken const& rhs) const -> std::strong_ordering; - [[nodiscard]] auto get_variable_type() const -> uint32_t { return m_variable_type; } [[nodiscard]] auto get_query_substring() const -> std::string const& { From 169c5afe3d26b07f2bc1f4db3fe00c6a9f0db77d Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 19:11:48 -0400 Subject: [PATCH 015/168] Fix tidy errors. --- src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index ddd6aafc..b74f6ffd 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -15,7 +16,6 @@ using log_surgeon::lexers::ByteLexer; using std::declval; -using std::lexicographical_compare_three_way; using std::same_as; using std::string; using std::strong_ordering; From fad4702bdf052d0ba7595af27b703d13de78da69 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 20:54:13 -0400 Subject: [PATCH 016/168] Update test-static-query-token.cpp for three-way operator. --- tests/test-static-query-token.cpp | 125 +++++++++++++++++++++++------- 1 file changed, 98 insertions(+), 27 deletions(-) diff --git a/tests/test-static-query-token.cpp b/tests/test-static-query-token.cpp index 467dfa77..e573bfba 100644 --- a/tests/test-static-query-token.cpp +++ b/tests/test-static-query-token.cpp @@ -1,3 +1,5 @@ +#include + #include #include @@ -10,36 +12,105 @@ */ using log_surgeon::wildcard_query_parser::StaticQueryToken; +using std::strong_ordering; + +namespace { +/** + * Tests comparison operators when `lhs` == `rhs`. + * @param lhs `StaticQueryToken` on the lhs of the operator. + * @param rhs `StaticQueryToken` on the rhs of the operator. + */ +auto test_equal(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void; + +/** + * Tests comparison operators when `lhs` > `rhs`. + * @param lhs `StaticQueryToken` on the lhs of the operator. + * @param rhs `StaticQueryToken` on the rhs of the operator. + */ +auto test_greater_than(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void; + +/** + * Tests comparison operators when `lhs` < `rhs`. + * @param lhs `StaticQueryToken` on the lhs of the operator. + * @param rhs `StaticQueryToken` on the rhs of the operator. + */ +auto test_less_than(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void; + +auto test_equal(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void { + REQUIRE((lhs <=> rhs) == strong_ordering::equal); + REQUIRE(lhs == rhs); + REQUIRE(lhs <= rhs); + REQUIRE(lhs >= rhs); + REQUIRE(rhs == lhs); + REQUIRE(rhs <= lhs); + REQUIRE(rhs >= lhs); + + REQUIRE_FALSE(lhs != rhs); + REQUIRE_FALSE(lhs < rhs); + REQUIRE_FALSE(lhs > rhs); + REQUIRE_FALSE(rhs != lhs); + REQUIRE_FALSE(rhs < lhs); + REQUIRE_FALSE(rhs > lhs); +} + +auto test_greater_than(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void { + REQUIRE((lhs <=> rhs) == strong_ordering::greater); + REQUIRE(lhs != rhs); + REQUIRE(lhs >= rhs); + REQUIRE(lhs > rhs); + REQUIRE(rhs != lhs); + REQUIRE(rhs <= lhs); + REQUIRE(rhs < lhs); + + REQUIRE_FALSE(lhs == rhs); + REQUIRE_FALSE(lhs <= rhs); + REQUIRE_FALSE(lhs < rhs); + REQUIRE_FALSE(rhs == lhs); + REQUIRE_FALSE(rhs >= lhs); + REQUIRE_FALSE(rhs > lhs); +} + +auto test_less_than(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void { + REQUIRE((lhs <=> rhs) == strong_ordering::less); + REQUIRE(lhs != rhs); + REQUIRE(lhs <= rhs); + REQUIRE(lhs < rhs); + REQUIRE(rhs != lhs); + REQUIRE(rhs >= lhs); + REQUIRE(rhs > lhs); + + REQUIRE_FALSE(lhs == rhs); + REQUIRE_FALSE(lhs >= rhs); + REQUIRE_FALSE(lhs > rhs); + REQUIRE_FALSE(rhs == lhs); + REQUIRE_FALSE(rhs <= lhs); + REQUIRE_FALSE(rhs < lhs); +} +} // namespace /** * @ingroup unit_tests_static_query_token * @brief Tests `operator<` and `operator>`. */ -TEST_CASE("comparison_operators", "[StaticQueryToken]") { - StaticQueryToken empty_token{""}; - StaticQueryToken token_abc{"abc"}; - StaticQueryToken token_def{"def"}; - StaticQueryToken another_token_abc{"abc"}; - - SECTION("less_than_operator") { - REQUIRE(empty_token < token_abc); - REQUIRE(empty_token < token_def); - REQUIRE(token_abc < token_def); - REQUIRE_FALSE(token_abc < empty_token); - REQUIRE_FALSE(token_def < empty_token); - REQUIRE_FALSE(token_def < token_abc); - // False for same value - REQUIRE_FALSE(token_abc < another_token_abc); - } - - SECTION("greater_than_operator") { - REQUIRE(token_abc > empty_token); - REQUIRE(token_def > empty_token); - REQUIRE(token_def > token_abc); - REQUIRE_FALSE(empty_token > token_abc); - REQUIRE_FALSE(empty_token > token_def); - REQUIRE_FALSE(token_abc > token_def); - // False for same value - REQUIRE_FALSE(token_abc > another_token_abc); - } +TEST_CASE("three_way_and_derived_comparisons", "[StaticQueryToken]") { + StaticQueryToken const empty_token{""}; + StaticQueryToken const token_abc{"abc"}; + StaticQueryToken const token_def{"def"}; + StaticQueryToken const another_token_abc{"abc"}; + + // empty_token + test_equal(empty_token, empty_token); + test_less_than(empty_token, token_abc); + test_less_than(empty_token, token_def); + + // token_abc + test_greater_than(token_abc, empty_token); + test_equal(token_abc, token_abc); + test_less_than(token_abc, token_def); + test_equal(token_abc, another_token_abc); + + // token_def + test_greater_than(token_def, empty_token); + test_greater_than(token_def, token_abc); + test_equal(token_def, token_def); } From e331aea752db43ff0e7be7039b8c92f38a600d55 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 21:06:45 -0400 Subject: [PATCH 017/168] Move comparison helpers into its own file. --- tests/CMakeLists.txt | 1 + tests/comparison_test_utils.hpp | 90 +++++++++++++++++++++++++++++++ tests/test-static-query-token.cpp | 80 ++------------------------- 3 files changed, 95 insertions(+), 76 deletions(-) create mode 100644 tests/comparison_test_utils.hpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b2f36083..56ab8700 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,6 +1,7 @@ add_executable(unit-test) target_sources(unit-test PRIVATE + comparison_test_utils.hpp test-buffer-parser.cpp test-capture.cpp test-dfa.cpp diff --git a/tests/comparison_test_utils.hpp b/tests/comparison_test_utils.hpp new file mode 100644 index 00000000..a70c46b6 --- /dev/null +++ b/tests/comparison_test_utils.hpp @@ -0,0 +1,90 @@ +#ifndef LOG_SURGEON_TESTS_COMPARISON_TEST_UTILS_HPP +#define LOG_SURGEON_TESTS_COMPARISON_TEST_UTILS_HPP + +#include + +#include + +using std::strong_ordering; + +namespace log_surgeon::tests { +/** + * Tests comparison operators when `lhs` == `rhs`. + * @param lhs Value on the lhs of the operator. + * @param rhs Value on the rhs of the operator. + */ +template +auto test_equal(T const& lhs, T const& rhs) -> void; + +/** + * Tests comparison operators when `lhs` > `rhs`. + * @param lhs Value on the lhs of the operator. + * @param rhs Value on the rhs of the operator. + */ +template +auto test_greater_than(T const& lhs, T const& rhs) -> void; + +/** + * Tests comparison operators when `lhs` < `rhs`. + * @param lhs Value on the lhs of the operator. + * @param rhs Value on the rhs of the operator. + */ +template +auto test_less_than(T const& lhs, T const& rhs) -> void; + +template +auto test_equal(T const& lhs, T const& rhs) -> void { + REQUIRE((lhs <=> rhs) == strong_ordering::equal); + REQUIRE(lhs == rhs); + REQUIRE(lhs <= rhs); + REQUIRE(lhs >= rhs); + REQUIRE(rhs == lhs); + REQUIRE(rhs <= lhs); + REQUIRE(rhs >= lhs); + + REQUIRE_FALSE(lhs != rhs); + REQUIRE_FALSE(lhs < rhs); + REQUIRE_FALSE(lhs > rhs); + REQUIRE_FALSE(rhs != lhs); + REQUIRE_FALSE(rhs < lhs); + REQUIRE_FALSE(rhs > lhs); +} + +template +auto test_greater_than(T const& lhs, T const& rhs) -> void { + REQUIRE((lhs <=> rhs) == strong_ordering::greater); + REQUIRE(lhs != rhs); + REQUIRE(lhs >= rhs); + REQUIRE(lhs > rhs); + REQUIRE(rhs != lhs); + REQUIRE(rhs <= lhs); + REQUIRE(rhs < lhs); + + REQUIRE_FALSE(lhs == rhs); + REQUIRE_FALSE(lhs <= rhs); + REQUIRE_FALSE(lhs < rhs); + REQUIRE_FALSE(rhs == lhs); + REQUIRE_FALSE(rhs >= lhs); + REQUIRE_FALSE(rhs > lhs); +} + +template +auto test_less_than(T const& lhs, T const& rhs) -> void { + REQUIRE((lhs <=> rhs) == strong_ordering::less); + REQUIRE(lhs != rhs); + REQUIRE(lhs <= rhs); + REQUIRE(lhs < rhs); + REQUIRE(rhs != lhs); + REQUIRE(rhs >= lhs); + REQUIRE(rhs > lhs); + + REQUIRE_FALSE(lhs == rhs); + REQUIRE_FALSE(lhs >= rhs); + REQUIRE_FALSE(lhs > rhs); + REQUIRE_FALSE(rhs == lhs); + REQUIRE_FALSE(rhs <= lhs); + REQUIRE_FALSE(rhs < lhs); +} +} // namespace log_surgeon::tests + +#endif // LOG_SURGEON_TESTS_COMPARISON_TEST_UTILS_HPP diff --git a/tests/test-static-query-token.cpp b/tests/test-static-query-token.cpp index e573bfba..8d20365d 100644 --- a/tests/test-static-query-token.cpp +++ b/tests/test-static-query-token.cpp @@ -1,4 +1,4 @@ -#include +#include "comparison_test_utils.hpp" #include @@ -11,82 +11,10 @@ * These unit tests contain the `StaticQueryToken` tag. */ +using log_surgeon::tests::test_equal; +using log_surgeon::tests::test_greater_than; +using log_surgeon::tests::test_less_than; using log_surgeon::wildcard_query_parser::StaticQueryToken; -using std::strong_ordering; - -namespace { -/** - * Tests comparison operators when `lhs` == `rhs`. - * @param lhs `StaticQueryToken` on the lhs of the operator. - * @param rhs `StaticQueryToken` on the rhs of the operator. - */ -auto test_equal(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void; - -/** - * Tests comparison operators when `lhs` > `rhs`. - * @param lhs `StaticQueryToken` on the lhs of the operator. - * @param rhs `StaticQueryToken` on the rhs of the operator. - */ -auto test_greater_than(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void; - -/** - * Tests comparison operators when `lhs` < `rhs`. - * @param lhs `StaticQueryToken` on the lhs of the operator. - * @param rhs `StaticQueryToken` on the rhs of the operator. - */ -auto test_less_than(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void; - -auto test_equal(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void { - REQUIRE((lhs <=> rhs) == strong_ordering::equal); - REQUIRE(lhs == rhs); - REQUIRE(lhs <= rhs); - REQUIRE(lhs >= rhs); - REQUIRE(rhs == lhs); - REQUIRE(rhs <= lhs); - REQUIRE(rhs >= lhs); - - REQUIRE_FALSE(lhs != rhs); - REQUIRE_FALSE(lhs < rhs); - REQUIRE_FALSE(lhs > rhs); - REQUIRE_FALSE(rhs != lhs); - REQUIRE_FALSE(rhs < lhs); - REQUIRE_FALSE(rhs > lhs); -} - -auto test_greater_than(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void { - REQUIRE((lhs <=> rhs) == strong_ordering::greater); - REQUIRE(lhs != rhs); - REQUIRE(lhs >= rhs); - REQUIRE(lhs > rhs); - REQUIRE(rhs != lhs); - REQUIRE(rhs <= lhs); - REQUIRE(rhs < lhs); - - REQUIRE_FALSE(lhs == rhs); - REQUIRE_FALSE(lhs <= rhs); - REQUIRE_FALSE(lhs < rhs); - REQUIRE_FALSE(rhs == lhs); - REQUIRE_FALSE(rhs >= lhs); - REQUIRE_FALSE(rhs > lhs); -} - -auto test_less_than(StaticQueryToken const& lhs, StaticQueryToken const& rhs) -> void { - REQUIRE((lhs <=> rhs) == strong_ordering::less); - REQUIRE(lhs != rhs); - REQUIRE(lhs <= rhs); - REQUIRE(lhs < rhs); - REQUIRE(rhs != lhs); - REQUIRE(rhs >= lhs); - REQUIRE(rhs > lhs); - - REQUIRE_FALSE(lhs == rhs); - REQUIRE_FALSE(lhs >= rhs); - REQUIRE_FALSE(lhs > rhs); - REQUIRE_FALSE(rhs == lhs); - REQUIRE_FALSE(rhs <= lhs); - REQUIRE_FALSE(rhs < lhs); -} -} // namespace /** * @ingroup unit_tests_static_query_token From 5d34f849b41671b26268a69b2c7e49d0590aa9ee Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 21:12:10 -0400 Subject: [PATCH 018/168] Use a duplicate of the last token so it looks nicer. --- tests/test-static-query-token.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-static-query-token.cpp b/tests/test-static-query-token.cpp index 8d20365d..82e2cd56 100644 --- a/tests/test-static-query-token.cpp +++ b/tests/test-static-query-token.cpp @@ -24,7 +24,7 @@ TEST_CASE("three_way_and_derived_comparisons", "[StaticQueryToken]") { StaticQueryToken const empty_token{""}; StaticQueryToken const token_abc{"abc"}; StaticQueryToken const token_def{"def"}; - StaticQueryToken const another_token_abc{"abc"}; + StaticQueryToken const another_token_def{"def"}; // empty_token test_equal(empty_token, empty_token); @@ -35,10 +35,10 @@ TEST_CASE("three_way_and_derived_comparisons", "[StaticQueryToken]") { test_greater_than(token_abc, empty_token); test_equal(token_abc, token_abc); test_less_than(token_abc, token_def); - test_equal(token_abc, another_token_abc); // token_def test_greater_than(token_def, empty_token); test_greater_than(token_def, token_abc); test_equal(token_def, token_def); + test_equal(token_def, another_token_def); } From ecd985f6b5ba4bf6de78ddcff27aecde07eb3752 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 21:23:26 -0400 Subject: [PATCH 019/168] Update test-variable-query-token.cpp for three-way operator. --- tests/test-variable-query-token.cpp | 123 +++++++++++----------------- 1 file changed, 49 insertions(+), 74 deletions(-) diff --git a/tests/test-variable-query-token.cpp b/tests/test-variable-query-token.cpp index eaadb61a..1e089c99 100644 --- a/tests/test-variable-query-token.cpp +++ b/tests/test-variable-query-token.cpp @@ -1,3 +1,5 @@ +#include "comparison_test_utils.hpp" + #include #include @@ -11,6 +13,9 @@ * These unit tests contain the `VariableQueryToken` tag. */ +using log_surgeon::tests::test_equal; +using log_surgeon::tests::test_greater_than; +using log_surgeon::tests::test_less_than; using log_surgeon::wildcard_query_parser::VariableQueryToken; /** @@ -20,78 +25,48 @@ using log_surgeon::wildcard_query_parser::VariableQueryToken; TEST_CASE("comparison_operators", "[VariableQueryToken]") { constexpr uint32_t cEmptyId{0}; constexpr uint32_t cIntId{2}; - constexpr uint32_t cHasNumberId{7}; - - VariableQueryToken empty_token{cEmptyId, "", false}; - VariableQueryToken token_int_123{cIntId, "123", false}; - VariableQueryToken token_int_456{cIntId, "456", false}; - VariableQueryToken token_has_number_123{cHasNumberId, "123", false}; - VariableQueryToken token_has_number_user123_wildcard{cHasNumberId, "user123*", true}; - VariableQueryToken another_token_int_123{cIntId, "123", false}; - - SECTION("less_than_operator") { - // empty token - REQUIRE(empty_token < token_int_123); - REQUIRE(empty_token < token_int_456); - REQUIRE(empty_token < token_has_number_123); - REQUIRE(empty_token < token_has_number_user123_wildcard); - REQUIRE_FALSE(token_int_123 < empty_token); - REQUIRE_FALSE(token_int_456 < empty_token); - REQUIRE_FALSE(token_has_number_123 < empty_token); - REQUIRE_FALSE(token_has_number_user123_wildcard < empty_token); - - // token_int_123 - REQUIRE(token_int_123 < token_int_456); - REQUIRE(token_int_123 < token_has_number_123); - REQUIRE(token_int_123 < token_has_number_user123_wildcard); - REQUIRE_FALSE(token_int_456 < token_int_123); - REQUIRE_FALSE(token_has_number_123 < token_int_123); - REQUIRE_FALSE(token_has_number_user123_wildcard < token_int_123); - - // token_int_456 - REQUIRE(token_int_456 < token_has_number_123); - REQUIRE(token_int_456 < token_has_number_user123_wildcard); - REQUIRE_FALSE(token_has_number_123 < token_int_456); - REQUIRE_FALSE(token_has_number_user123_wildcard < token_int_456); - - // token_has_number_123 - REQUIRE(token_has_number_123 < token_has_number_user123_wildcard); - REQUIRE_FALSE(token_has_number_user123_wildcard < token_has_number_123); - - // False for same value - REQUIRE_FALSE(token_int_123 < another_token_int_123); - } - - SECTION("greater_than_operator") { - // empty token - REQUIRE(token_int_123 > empty_token); - REQUIRE(token_int_456 > empty_token); - REQUIRE(token_has_number_123 > empty_token); - REQUIRE(token_has_number_user123_wildcard > empty_token); - REQUIRE_FALSE(empty_token > token_int_123); - REQUIRE_FALSE(empty_token > token_int_456); - REQUIRE_FALSE(empty_token > token_has_number_123); - REQUIRE_FALSE(empty_token > token_has_number_user123_wildcard); - - // token_int_123 - REQUIRE(token_int_456 > token_int_123); - REQUIRE(token_has_number_123 > token_int_123); - REQUIRE(token_has_number_user123_wildcard > token_int_123); - REQUIRE_FALSE(token_int_123 > token_int_456); - REQUIRE_FALSE(token_int_123 > token_has_number_123); - REQUIRE_FALSE(token_int_123 > token_has_number_user123_wildcard); - - // token_int_456 - REQUIRE(token_has_number_123 > token_int_456); - REQUIRE(token_has_number_user123_wildcard > token_int_456); - REQUIRE_FALSE(token_int_456 > token_has_number_123); - REQUIRE_FALSE(token_int_456 > token_has_number_user123_wildcard); - - // token_has_number_123 - REQUIRE(token_has_number_user123_wildcard > token_has_number_123); - REQUIRE_FALSE(token_has_number_123 > token_has_number_user123_wildcard); - - // False for same value - REQUIRE_FALSE(token_int_123 > another_token_int_123); - } + constexpr uint32_t cHasNumId{7}; + + VariableQueryToken const empty_token{cEmptyId, "", false}; + VariableQueryToken const token_int_123{cIntId, "123", false}; + VariableQueryToken const token_int_456{cIntId, "456", false}; + VariableQueryToken const token_has_number_123{cHasNumId, "123", false}; + VariableQueryToken const token_has_number_user123_wildcard{cHasNumId, "user123*", true}; + VariableQueryToken const another_token_has_number_user123_wildcard{cHasNumId, "user123*", true}; + + // empty_token + test_equal(empty_token, empty_token); + test_less_than(empty_token, token_int_123); + test_less_than(empty_token, token_int_456); + test_less_than(empty_token, token_has_number_123); + test_less_than(empty_token, token_has_number_user123_wildcard); + + // token_int_123 + test_greater_than(token_int_123, empty_token); + test_equal(token_int_123, token_int_123); + test_less_than(token_int_123, token_int_456); + test_less_than(token_int_123, token_has_number_123); + test_less_than(token_int_123, token_has_number_user123_wildcard); + + // token_int_456 + test_greater_than(token_int_456, empty_token); + test_greater_than(token_int_456, token_int_123); + test_equal(token_int_456, token_int_456); + test_less_than(token_int_456, token_has_number_123); + test_less_than(token_int_456, token_has_number_user123_wildcard); + + // token_has_number_123 + test_greater_than(token_has_number_123, empty_token); + test_greater_than(token_has_number_123, token_int_123); + test_greater_than(token_has_number_123, token_int_456); + test_equal(token_has_number_123, token_has_number_123); + test_less_than(token_has_number_123, token_has_number_user123_wildcard); + + // token_has_number_user123_wildcard + test_greater_than(token_has_number_user123_wildcard, empty_token); + test_greater_than(token_has_number_user123_wildcard, token_int_123); + test_greater_than(token_has_number_user123_wildcard, token_int_456); + test_greater_than(token_has_number_user123_wildcard, token_has_number_123); + test_equal(token_has_number_user123_wildcard, token_has_number_user123_wildcard); + test_equal(token_has_number_user123_wildcard, another_token_has_number_user123_wildcard); } From ea9c9cd229884bf7d2aaae4ffe5d28958a835e63 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 21:24:06 -0400 Subject: [PATCH 020/168] Fix format errors. --- tests/test-static-query-token.cpp | 4 ++-- tests/test-variable-query-token.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test-static-query-token.cpp b/tests/test-static-query-token.cpp index 82e2cd56..35f659da 100644 --- a/tests/test-static-query-token.cpp +++ b/tests/test-static-query-token.cpp @@ -1,9 +1,9 @@ -#include "comparison_test_utils.hpp" - #include #include +#include "comparison_test_utils.hpp" + /** * @defgroup unit_tests_static_query_token `StaticQueryToken` unit tests. * @brief Unit tests for `StaticQueryToken` construction, mutation, and comparison. diff --git a/tests/test-variable-query-token.cpp b/tests/test-variable-query-token.cpp index 1e089c99..f19e1c54 100644 --- a/tests/test-variable-query-token.cpp +++ b/tests/test-variable-query-token.cpp @@ -1,11 +1,11 @@ -#include "comparison_test_utils.hpp" - #include #include #include +#include "comparison_test_utils.hpp" + /** * @defgroup unit_tests_variable_query_token `VariableQueryToken` unit tests. * @brief Unit tests for `VariableQueryToken` construction and comparison. From 6574f803f442a94505303a845056198826d206ec Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 21:30:54 -0400 Subject: [PATCH 021/168] Add operator== for VariableQueryToken. --- src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp index 63a4eb04..526f8175 100644 --- a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp +++ b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp @@ -25,6 +25,11 @@ class VariableQueryToken { m_query_substring(std::move(query_substring)), m_has_wildcard(has_wildcard) {} + // Must be defined if `operator<=>` is not defaulted. + auto operator==(VariableQueryToken const& rhs) const -> bool { + return (*this <=> rhs) == std::strong_ordering::equal; + } + /** * Lexicographical three-way comparison operator. * From b6a3c3bee5e75f14df77937add85c2385a647f7b Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 6 Aug 2025 21:32:47 -0400 Subject: [PATCH 022/168] Add operator== for QueryInterpretation. --- .../wildcard_query_parser/QueryInterpretation.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index 1f9c55a9..0e717345 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -34,6 +34,11 @@ class QueryInterpretation { append_variable_token(variable_type, std::move(query_substring), contains_wildcard); } + // Must be defined if `operator<=>` is not defaulted. + auto operator==(QueryInterpretation const& rhs) const -> bool { + return (*this <=> rhs) == std::strong_ordering::equal; + } + /** * Lexicographical three-way comparison operator. * From 79a749dee8405c157a99f76237ed49e3e89a59b6 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 01:41:16 -0400 Subject: [PATCH 023/168] Update docstring and name of comparison unit tests. --- tests/test-static-query-token.cpp | 4 ++-- tests/test-variable-query-token.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test-static-query-token.cpp b/tests/test-static-query-token.cpp index 35f659da..f931dfa5 100644 --- a/tests/test-static-query-token.cpp +++ b/tests/test-static-query-token.cpp @@ -18,9 +18,9 @@ using log_surgeon::wildcard_query_parser::StaticQueryToken; /** * @ingroup unit_tests_static_query_token - * @brief Tests `operator<` and `operator>`. + * @brief Tests `operator<=>` and all derived operators. */ -TEST_CASE("three_way_and_derived_comparisons", "[StaticQueryToken]") { +TEST_CASE("comparison_operators", "[StaticQueryToken]") { StaticQueryToken const empty_token{""}; StaticQueryToken const token_abc{"abc"}; StaticQueryToken const token_def{"def"}; diff --git a/tests/test-variable-query-token.cpp b/tests/test-variable-query-token.cpp index f19e1c54..087bb968 100644 --- a/tests/test-variable-query-token.cpp +++ b/tests/test-variable-query-token.cpp @@ -20,7 +20,7 @@ using log_surgeon::wildcard_query_parser::VariableQueryToken; /** * @ingroup unit_tests_variable_query_token - * @brief Tests `operator<` and `operator>`. + * @brief Tests `operator==`, `operator<=>`, and all derived operators. */ TEST_CASE("comparison_operators", "[VariableQueryToken]") { constexpr uint32_t cEmptyId{0}; From cfef20424014139a1a7488a66a37b492d6161e50 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 01:44:25 -0400 Subject: [PATCH 024/168] Rename token variables to end in the word token. --- tests/test-static-query-token.cpp | 32 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/test-static-query-token.cpp b/tests/test-static-query-token.cpp index f931dfa5..b8397334 100644 --- a/tests/test-static-query-token.cpp +++ b/tests/test-static-query-token.cpp @@ -22,23 +22,23 @@ using log_surgeon::wildcard_query_parser::StaticQueryToken; */ TEST_CASE("comparison_operators", "[StaticQueryToken]") { StaticQueryToken const empty_token{""}; - StaticQueryToken const token_abc{"abc"}; - StaticQueryToken const token_def{"def"}; - StaticQueryToken const another_token_def{"def"}; + StaticQueryToken const abc_token{"abc"}; + StaticQueryToken const def_token{"def"}; + StaticQueryToken const another_def_token{"def"}; // empty_token test_equal(empty_token, empty_token); - test_less_than(empty_token, token_abc); - test_less_than(empty_token, token_def); - - // token_abc - test_greater_than(token_abc, empty_token); - test_equal(token_abc, token_abc); - test_less_than(token_abc, token_def); - - // token_def - test_greater_than(token_def, empty_token); - test_greater_than(token_def, token_abc); - test_equal(token_def, token_def); - test_equal(token_def, another_token_def); + test_less_than(empty_token, abc_token); + test_less_than(empty_token, def_token); + + // abc_token + test_greater_than(abc_token, empty_token); + test_equal(abc_token, abc_token); + test_less_than(abc_token, def_token); + + // def_token + test_greater_than(def_token, empty_token); + test_greater_than(def_token, abc_token); + test_equal(def_token, def_token); + test_equal(def_token, another_def_token); } From bd297e279824097ffbfb69547c4a0b57b3914a2a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 01:47:36 -0400 Subject: [PATCH 025/168] Rename token variables to end in the word token for VariableQueryToken too. --- tests/test-variable-query-token.cpp | 68 ++++++++++++++--------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/tests/test-variable-query-token.cpp b/tests/test-variable-query-token.cpp index 087bb968..ce793abb 100644 --- a/tests/test-variable-query-token.cpp +++ b/tests/test-variable-query-token.cpp @@ -28,45 +28,45 @@ TEST_CASE("comparison_operators", "[VariableQueryToken]") { constexpr uint32_t cHasNumId{7}; VariableQueryToken const empty_token{cEmptyId, "", false}; - VariableQueryToken const token_int_123{cIntId, "123", false}; - VariableQueryToken const token_int_456{cIntId, "456", false}; - VariableQueryToken const token_has_number_123{cHasNumId, "123", false}; - VariableQueryToken const token_has_number_user123_wildcard{cHasNumId, "user123*", true}; - VariableQueryToken const another_token_has_number_user123_wildcard{cHasNumId, "user123*", true}; + VariableQueryToken const int_123_token{cIntId, "123", false}; + VariableQueryToken const int_456_token{cIntId, "456", false}; + VariableQueryToken const has_number_123_token{cHasNumId, "123", false}; + VariableQueryToken const has_number_user123_wildcard_token{cHasNumId, "user123*", true}; + VariableQueryToken const another_has_number_user123_wildcard_token{cHasNumId, "user123*", true}; // empty_token test_equal(empty_token, empty_token); - test_less_than(empty_token, token_int_123); - test_less_than(empty_token, token_int_456); - test_less_than(empty_token, token_has_number_123); - test_less_than(empty_token, token_has_number_user123_wildcard); + test_less_than(empty_token, int_123_token); + test_less_than(empty_token, int_456_token); + test_less_than(empty_token, has_number_123_token); + test_less_than(empty_token, has_number_user123_wildcard_token); - // token_int_123 - test_greater_than(token_int_123, empty_token); - test_equal(token_int_123, token_int_123); - test_less_than(token_int_123, token_int_456); - test_less_than(token_int_123, token_has_number_123); - test_less_than(token_int_123, token_has_number_user123_wildcard); + // int_123_token + test_greater_than(int_123_token, empty_token); + test_equal(int_123_token, int_123_token); + test_less_than(int_123_token, int_456_token); + test_less_than(int_123_token, has_number_123_token); + test_less_than(int_123_token, has_number_user123_wildcard_token); - // token_int_456 - test_greater_than(token_int_456, empty_token); - test_greater_than(token_int_456, token_int_123); - test_equal(token_int_456, token_int_456); - test_less_than(token_int_456, token_has_number_123); - test_less_than(token_int_456, token_has_number_user123_wildcard); + // int_456_token + test_greater_than(int_456_token, empty_token); + test_greater_than(int_456_token, int_123_token); + test_equal(int_456_token, int_456_token); + test_less_than(int_456_token, has_number_123_token); + test_less_than(int_456_token, has_number_user123_wildcard_token); - // token_has_number_123 - test_greater_than(token_has_number_123, empty_token); - test_greater_than(token_has_number_123, token_int_123); - test_greater_than(token_has_number_123, token_int_456); - test_equal(token_has_number_123, token_has_number_123); - test_less_than(token_has_number_123, token_has_number_user123_wildcard); + // has_number_123_token + test_greater_than(has_number_123_token, empty_token); + test_greater_than(has_number_123_token, int_123_token); + test_greater_than(has_number_123_token, int_456_token); + test_equal(has_number_123_token, has_number_123_token); + test_less_than(has_number_123_token, has_number_user123_wildcard_token); - // token_has_number_user123_wildcard - test_greater_than(token_has_number_user123_wildcard, empty_token); - test_greater_than(token_has_number_user123_wildcard, token_int_123); - test_greater_than(token_has_number_user123_wildcard, token_int_456); - test_greater_than(token_has_number_user123_wildcard, token_has_number_123); - test_equal(token_has_number_user123_wildcard, token_has_number_user123_wildcard); - test_equal(token_has_number_user123_wildcard, another_token_has_number_user123_wildcard); + // has_number_user123_wildcard_token + test_greater_than(has_number_user123_wildcard_token, empty_token); + test_greater_than(has_number_user123_wildcard_token, int_123_token); + test_greater_than(has_number_user123_wildcard_token, int_456_token); + test_greater_than(has_number_user123_wildcard_token, has_number_123_token); + test_equal(has_number_user123_wildcard_token, has_number_user123_wildcard_token); + test_equal(has_number_user123_wildcard_token, another_has_number_user123_wildcard_token); } From 0cb92175f8af794c2341b07c10fa143d2f6b2b8e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 02:47:04 -0400 Subject: [PATCH 026/168] Update test-query-interpretation.cpp for three-way comparitor. --- tests/test-query-interpretation.cpp | 106 +++++++++++++--------------- 1 file changed, 49 insertions(+), 57 deletions(-) diff --git a/tests/test-query-interpretation.cpp b/tests/test-query-interpretation.cpp index 6c016009..90d87497 100644 --- a/tests/test-query-interpretation.cpp +++ b/tests/test-query-interpretation.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -5,6 +6,8 @@ #include +#include "comparison_test_utils.hpp" + /** * @defgroup unit_tests_query_interpretation `QueryInterpretation` unit tests. * @brief Unit tests for `QueryInterpretation` construction, mutation, and comparison. @@ -15,6 +18,11 @@ using log_surgeon::wildcard_query_parser::QueryInterpretation; using std::string_view; +using log_surgeon::tests::test_equal; +using log_surgeon::tests::test_greater_than; +using log_surgeon::tests::test_less_than; +using log_surgeon::wildcard_query_parser::VariableQueryToken; + /** * @ingroup unit_tests_query_interpretation * @brief Creates an empty `QueryInterpretation` and tests serialization. @@ -147,66 +155,50 @@ TEST_CASE("append_query_interpretation", "[QueryInterpretation]") { /** * @ingroup unit_tests_query_interpretation - * @brief Tests `operator<` with various token types and orders. + * @brief Tests `operator==`, `operator<=>`, and all derived operators. */ -TEST_CASE("less_than_operator", "[QueryInterpretation]") { - constexpr uint32_t cFloatId{1}; +TEST_CASE("comparison_operators", "[QueryInterpretation]") { constexpr uint32_t cIntId{2}; constexpr uint32_t cHasNumberId{7}; - QueryInterpretation query_interpretation1; - QueryInterpretation query_interpretation2; - - SECTION("different_length_logtype") { - query_interpretation1.append_static_token("a"); - query_interpretation2.append_static_token("a"); - query_interpretation2.append_variable_token(cFloatId, "1.1", false); - - REQUIRE(query_interpretation1 < query_interpretation2); - REQUIRE_FALSE(query_interpretation2 < query_interpretation1); - } - - SECTION("different_static_content") { - query_interpretation1.append_static_token("a"); - query_interpretation2.append_static_token("b"); - - REQUIRE(query_interpretation1 < query_interpretation2); - REQUIRE_FALSE(query_interpretation2 < query_interpretation1); - } - - SECTION("different_var_types") { - query_interpretation1.append_variable_token(cIntId, "123", false); - query_interpretation2.append_variable_token(cHasNumberId, "123", false); - - REQUIRE(query_interpretation1 < query_interpretation2); - REQUIRE_FALSE(query_interpretation2 < query_interpretation1); - } - - SECTION("different_var_values") { - query_interpretation1.append_variable_token(cIntId, "123", false); - query_interpretation2.append_variable_token(cIntId, "456", false); - - REQUIRE(query_interpretation1 < query_interpretation2); - REQUIRE_FALSE(query_interpretation2 < query_interpretation1); - } - - SECTION("token_order") { - query_interpretation1.append_static_token("hello"); - query_interpretation1.append_variable_token(cIntId, "123", false); - query_interpretation2.append_variable_token(cIntId, "123", false); - query_interpretation2.append_static_token("hello"); - - // `StaticQueryToken` is a lower index in the variant so is considered less than - // `VariableQueryToken`. - REQUIRE(query_interpretation1 < query_interpretation2); - REQUIRE_FALSE(query_interpretation2 < query_interpretation1); - } - - SECTION("identical_tokens") { - query_interpretation1.append_variable_token(cIntId, "123", false); - query_interpretation2.append_variable_token(cIntId, "123", false); - - REQUIRE_FALSE(query_interpretation1 < query_interpretation2); - REQUIRE_FALSE(query_interpretation2 < query_interpretation1); + std::vector ordered_interpretations; + // a + ordered_interpretations.emplace_back("a"); + // a(123) + ordered_interpretations.emplace_back("a"); + ordered_interpretations.back().append_variable_token(cIntId, "123", false); + // b + ordered_interpretations.emplace_back("b"); + // (123) + ordered_interpretations.emplace_back(cIntId, "123", false); + // (123)a + ordered_interpretations.emplace_back(cIntId, "123", false); + ordered_interpretations.back().append_static_token("a"); + // (123*) + ordered_interpretations.emplace_back(cIntId, "123*", true); + // (1234) + ordered_interpretations.emplace_back(cIntId, "1234", false); + // (456) + ordered_interpretations.emplace_back(cIntId, "456", false); + // (123) + ordered_interpretations.emplace_back(cHasNumberId, "123", false); + + // (abc*123) + QueryInterpretation const interpretation(cHasNumberId, "abc*123", true); + QueryInterpretation const duplicate_interpretation(cHasNumberId, "abc*123", true); + + for (size_t i{0}; i < ordered_interpretations.size(); i++) { + CAPTURE(i); + for (size_t j{0}; j < ordered_interpretations.size(); j++) { + CAPTURE(j); + if (i < j) { + test_less_than(ordered_interpretations[i], ordered_interpretations[j]); + } else if (i == j) { + test_equal(ordered_interpretations[i], ordered_interpretations[j]); + } else { + test_greater_than(ordered_interpretations[i], ordered_interpretations[j]); + } + } } + test_equal(interpretation, duplicate_interpretation); } From d620be6836de4d2cd0e25c66c14cff83ba7622dc Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 02:57:29 -0400 Subject: [PATCH 027/168] Fix tidy errors. --- tests/test-query-interpretation.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test-query-interpretation.cpp b/tests/test-query-interpretation.cpp index 90d87497..e66eb208 100644 --- a/tests/test-query-interpretation.cpp +++ b/tests/test-query-interpretation.cpp @@ -1,9 +1,11 @@ #include #include #include +#include #include +#include #include #include "comparison_test_utils.hpp" @@ -21,7 +23,6 @@ using std::string_view; using log_surgeon::tests::test_equal; using log_surgeon::tests::test_greater_than; using log_surgeon::tests::test_less_than; -using log_surgeon::wildcard_query_parser::VariableQueryToken; /** * @ingroup unit_tests_query_interpretation From db7c86a7ec5786457b0d1bfa3f8fae76678c897e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 02:59:14 -0400 Subject: [PATCH 028/168] Fix some naming. --- tests/test-query-interpretation.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test-query-interpretation.cpp b/tests/test-query-interpretation.cpp index e66eb208..cb14f50c 100644 --- a/tests/test-query-interpretation.cpp +++ b/tests/test-query-interpretation.cpp @@ -160,7 +160,7 @@ TEST_CASE("append_query_interpretation", "[QueryInterpretation]") { */ TEST_CASE("comparison_operators", "[QueryInterpretation]") { constexpr uint32_t cIntId{2}; - constexpr uint32_t cHasNumberId{7}; + constexpr uint32_t cHasNumId{7}; std::vector ordered_interpretations; // a @@ -181,12 +181,12 @@ TEST_CASE("comparison_operators", "[QueryInterpretation]") { ordered_interpretations.emplace_back(cIntId, "1234", false); // (456) ordered_interpretations.emplace_back(cIntId, "456", false); - // (123) - ordered_interpretations.emplace_back(cHasNumberId, "123", false); + // (123) + ordered_interpretations.emplace_back(cHasNumId, "123", false); - // (abc*123) - QueryInterpretation const interpretation(cHasNumberId, "abc*123", true); - QueryInterpretation const duplicate_interpretation(cHasNumberId, "abc*123", true); + // (abc*123) + QueryInterpretation const interpretation(cHasNumId, "abc*123", true); + QueryInterpretation const duplicate_interpretation(cHasNumId, "abc*123", true); for (size_t i{0}; i < ordered_interpretations.size(); i++) { CAPTURE(i); From 410adba86672e9a7f3c504704749e589c5c89528 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 19:09:47 -0400 Subject: [PATCH 029/168] Clean up variable query tokens unit-tests. --- tests/test-variable-query-token.cpp | 67 +++++++++++------------------ 1 file changed, 25 insertions(+), 42 deletions(-) diff --git a/tests/test-variable-query-token.cpp b/tests/test-variable-query-token.cpp index ce793abb..75258b0d 100644 --- a/tests/test-variable-query-token.cpp +++ b/tests/test-variable-query-token.cpp @@ -2,6 +2,7 @@ #include +#include #include #include "comparison_test_utils.hpp" @@ -27,46 +28,28 @@ TEST_CASE("comparison_operators", "[VariableQueryToken]") { constexpr uint32_t cIntId{2}; constexpr uint32_t cHasNumId{7}; - VariableQueryToken const empty_token{cEmptyId, "", false}; - VariableQueryToken const int_123_token{cIntId, "123", false}; - VariableQueryToken const int_456_token{cIntId, "456", false}; - VariableQueryToken const has_number_123_token{cHasNumId, "123", false}; - VariableQueryToken const has_number_user123_wildcard_token{cHasNumId, "user123*", true}; - VariableQueryToken const another_has_number_user123_wildcard_token{cHasNumId, "user123*", true}; - - // empty_token - test_equal(empty_token, empty_token); - test_less_than(empty_token, int_123_token); - test_less_than(empty_token, int_456_token); - test_less_than(empty_token, has_number_123_token); - test_less_than(empty_token, has_number_user123_wildcard_token); - - // int_123_token - test_greater_than(int_123_token, empty_token); - test_equal(int_123_token, int_123_token); - test_less_than(int_123_token, int_456_token); - test_less_than(int_123_token, has_number_123_token); - test_less_than(int_123_token, has_number_user123_wildcard_token); - - // int_456_token - test_greater_than(int_456_token, empty_token); - test_greater_than(int_456_token, int_123_token); - test_equal(int_456_token, int_456_token); - test_less_than(int_456_token, has_number_123_token); - test_less_than(int_456_token, has_number_user123_wildcard_token); - - // has_number_123_token - test_greater_than(has_number_123_token, empty_token); - test_greater_than(has_number_123_token, int_123_token); - test_greater_than(has_number_123_token, int_456_token); - test_equal(has_number_123_token, has_number_123_token); - test_less_than(has_number_123_token, has_number_user123_wildcard_token); - - // has_number_user123_wildcard_token - test_greater_than(has_number_user123_wildcard_token, empty_token); - test_greater_than(has_number_user123_wildcard_token, int_123_token); - test_greater_than(has_number_user123_wildcard_token, int_456_token); - test_greater_than(has_number_user123_wildcard_token, has_number_123_token); - test_equal(has_number_user123_wildcard_token, has_number_user123_wildcard_token); - test_equal(has_number_user123_wildcard_token, another_has_number_user123_wildcard_token); + std::vector ordered_tokens{ + {cEmptyId, "", false}, + {cIntId, "123", false}, + {cIntId, "456", false}, + {cHasNumId, "123", false}, + {cHasNumId, "user123*", true} + }; + VariableQueryToken const token{cHasNumId, "abc*123", true}; + VariableQueryToken const duplicate_token{cHasNumId, "abc*123", true}; + + for (size_t i{0}; i < ordered_tokens.size(); i++) { + CAPTURE(i); + for (size_t j{0}; j < ordered_tokens.size(); j++) { + CAPTURE(j); + if (i < j) { + test_less_than(ordered_tokens[i], ordered_tokens[j]); + } else if (i == j) { + test_equal(ordered_tokens[i], ordered_tokens[j]); + } else { + test_greater_than(ordered_tokens[i], ordered_tokens[j]); + } + } + } + test_equal(token, duplicate_token); } From 200b977757c26c0b8697cb1d5ce75024e8656048 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 19:11:20 -0400 Subject: [PATCH 030/168] Fix interpretation initialization that accidentally used parentheis instead of braces. --- tests/test-query-interpretation.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-query-interpretation.cpp b/tests/test-query-interpretation.cpp index cb14f50c..ea833903 100644 --- a/tests/test-query-interpretation.cpp +++ b/tests/test-query-interpretation.cpp @@ -185,8 +185,8 @@ TEST_CASE("comparison_operators", "[QueryInterpretation]") { ordered_interpretations.emplace_back(cHasNumId, "123", false); // (abc*123) - QueryInterpretation const interpretation(cHasNumId, "abc*123", true); - QueryInterpretation const duplicate_interpretation(cHasNumId, "abc*123", true); + QueryInterpretation const interpretation{cHasNumId, "abc*123", true}; + QueryInterpretation const duplicate_interpretation{cHasNumId, "abc*123", true}; for (size_t i{0}; i < ordered_interpretations.size(); i++) { CAPTURE(i); From bc31dd7a3d4b0a5b01dae5d8547ac638d656aa51 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 19:17:04 -0400 Subject: [PATCH 031/168] Add missing include. --- tests/test-variable-query-token.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test-variable-query-token.cpp b/tests/test-variable-query-token.cpp index 75258b0d..4995be37 100644 --- a/tests/test-variable-query-token.cpp +++ b/tests/test-variable-query-token.cpp @@ -1,4 +1,5 @@ #include +#include #include From 5d4fd8275c7a3950ab8a1e872cf94f8a303f3e89 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 19:17:49 -0400 Subject: [PATCH 032/168] Add const. --- tests/test-variable-query-token.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-variable-query-token.cpp b/tests/test-variable-query-token.cpp index 4995be37..e01c7df0 100644 --- a/tests/test-variable-query-token.cpp +++ b/tests/test-variable-query-token.cpp @@ -29,7 +29,7 @@ TEST_CASE("comparison_operators", "[VariableQueryToken]") { constexpr uint32_t cIntId{2}; constexpr uint32_t cHasNumId{7}; - std::vector ordered_tokens{ + std::vector const ordered_tokens{ {cEmptyId, "", false}, {cIntId, "123", false}, {cIntId, "456", false}, From f4e3b638490ef8e36c7f49297f6bcf9f76a7750d Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 19:24:33 -0400 Subject: [PATCH 033/168] Clean up static query token's unit-tests. --- tests/test-static-query-token.cpp | 45 +++++++++++++++++-------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/tests/test-static-query-token.cpp b/tests/test-static-query-token.cpp index b8397334..fff89c04 100644 --- a/tests/test-static-query-token.cpp +++ b/tests/test-static-query-token.cpp @@ -1,5 +1,8 @@ +#include + #include +#include #include #include "comparison_test_utils.hpp" @@ -21,24 +24,26 @@ using log_surgeon::wildcard_query_parser::StaticQueryToken; * @brief Tests `operator<=>` and all derived operators. */ TEST_CASE("comparison_operators", "[StaticQueryToken]") { - StaticQueryToken const empty_token{""}; - StaticQueryToken const abc_token{"abc"}; - StaticQueryToken const def_token{"def"}; - StaticQueryToken const another_def_token{"def"}; - - // empty_token - test_equal(empty_token, empty_token); - test_less_than(empty_token, abc_token); - test_less_than(empty_token, def_token); - - // abc_token - test_greater_than(abc_token, empty_token); - test_equal(abc_token, abc_token); - test_less_than(abc_token, def_token); - - // def_token - test_greater_than(def_token, empty_token); - test_greater_than(def_token, abc_token); - test_equal(def_token, def_token); - test_equal(def_token, another_def_token); + std::vector const ordered_tokens{ + StaticQueryToken{""}, + StaticQueryToken{"abc"}, + StaticQueryToken{"def"} + }; + StaticQueryToken const token{"ghi"}; + StaticQueryToken const duplicate_token{"ghi"}; + + for (size_t i{0}; i < ordered_tokens.size(); i++) { + CAPTURE(i); + for (size_t j{0}; j < ordered_tokens.size(); j++) { + CAPTURE(j); + if (i < j) { + test_less_than(ordered_tokens[i], ordered_tokens[j]); + } else if (i == j) { + test_equal(ordered_tokens[i], ordered_tokens[j]); + } else { + test_greater_than(ordered_tokens[i], ordered_tokens[j]); + } + } + } + test_equal(token, duplicate_token); } From f13251e0b5620ff6d3da2176620392d7245e8ab2 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 7 Aug 2025 20:18:53 -0400 Subject: [PATCH 034/168] Deduplicate test code by moving it into utils file. --- tests/comparison_test_utils.hpp | 35 ++++++++++++++++++++++++----- tests/test-query-interpretation.cpp | 21 +++-------------- tests/test-static-query-token.cpp | 20 +++-------------- tests/test-variable-query-token.cpp | 18 ++------------- 4 files changed, 38 insertions(+), 56 deletions(-) diff --git a/tests/comparison_test_utils.hpp b/tests/comparison_test_utils.hpp index a70c46b6..6e3f536d 100644 --- a/tests/comparison_test_utils.hpp +++ b/tests/comparison_test_utils.hpp @@ -2,11 +2,11 @@ #define LOG_SURGEON_TESTS_COMPARISON_TEST_UTILS_HPP #include +#include +#include #include -using std::strong_ordering; - namespace log_surgeon::tests { /** * Tests comparison operators when `lhs` == `rhs`. @@ -32,9 +32,16 @@ auto test_greater_than(T const& lhs, T const& rhs) -> void; template auto test_less_than(T const& lhs, T const& rhs) -> void; +/** + * Tests operators `<=>`, `==`, `!=`, `<`, `<=`, `>`, `>=` for every pair of elements in the vector. + * @param ordered_vector Vector where elements are ordered to be strictly ascending. + */ +template +auto pairwise_comparison_of_strictly_ascending_vector(std::vector const& ordered_vector) -> void; + template auto test_equal(T const& lhs, T const& rhs) -> void { - REQUIRE((lhs <=> rhs) == strong_ordering::equal); + REQUIRE((lhs <=> rhs) == std::strong_ordering::equal); REQUIRE(lhs == rhs); REQUIRE(lhs <= rhs); REQUIRE(lhs >= rhs); @@ -52,7 +59,7 @@ auto test_equal(T const& lhs, T const& rhs) -> void { template auto test_greater_than(T const& lhs, T const& rhs) -> void { - REQUIRE((lhs <=> rhs) == strong_ordering::greater); + REQUIRE((lhs <=> rhs) == std::strong_ordering::greater); REQUIRE(lhs != rhs); REQUIRE(lhs >= rhs); REQUIRE(lhs > rhs); @@ -70,7 +77,7 @@ auto test_greater_than(T const& lhs, T const& rhs) -> void { template auto test_less_than(T const& lhs, T const& rhs) -> void { - REQUIRE((lhs <=> rhs) == strong_ordering::less); + REQUIRE((lhs <=> rhs) == std::strong_ordering::less); REQUIRE(lhs != rhs); REQUIRE(lhs <= rhs); REQUIRE(lhs < rhs); @@ -85,6 +92,24 @@ auto test_less_than(T const& lhs, T const& rhs) -> void { REQUIRE_FALSE(rhs <= lhs); REQUIRE_FALSE(rhs < lhs); } + +template +auto pairwise_comparison_of_strictly_ascending_vector(std::vector const& ordered_vector) + -> void { + for (size_t i{0}; i < ordered_vector.size(); i++) { + CAPTURE(i); + for (size_t j{0}; j < ordered_vector.size(); j++) { + CAPTURE(j); + if (i < j) { + test_less_than(ordered_vector[i], ordered_vector[j]); + } else if (i == j) { + test_equal(ordered_vector[i], ordered_vector[j]); + } else { + test_greater_than(ordered_vector[i], ordered_vector[j]); + } + } + } +} } // namespace log_surgeon::tests #endif // LOG_SURGEON_TESTS_COMPARISON_TEST_UTILS_HPP diff --git a/tests/test-query-interpretation.cpp b/tests/test-query-interpretation.cpp index ea833903..7aafbd25 100644 --- a/tests/test-query-interpretation.cpp +++ b/tests/test-query-interpretation.cpp @@ -5,7 +5,6 @@ #include -#include #include #include "comparison_test_utils.hpp" @@ -17,13 +16,11 @@ * These unit tests contain the `QueryInterpretation` tag. */ +using log_surgeon::tests::pairwise_comparison_of_strictly_ascending_vector; +using log_surgeon::tests::test_equal; using log_surgeon::wildcard_query_parser::QueryInterpretation; using std::string_view; -using log_surgeon::tests::test_equal; -using log_surgeon::tests::test_greater_than; -using log_surgeon::tests::test_less_than; - /** * @ingroup unit_tests_query_interpretation * @brief Creates an empty `QueryInterpretation` and tests serialization. @@ -188,18 +185,6 @@ TEST_CASE("comparison_operators", "[QueryInterpretation]") { QueryInterpretation const interpretation{cHasNumId, "abc*123", true}; QueryInterpretation const duplicate_interpretation{cHasNumId, "abc*123", true}; - for (size_t i{0}; i < ordered_interpretations.size(); i++) { - CAPTURE(i); - for (size_t j{0}; j < ordered_interpretations.size(); j++) { - CAPTURE(j); - if (i < j) { - test_less_than(ordered_interpretations[i], ordered_interpretations[j]); - } else if (i == j) { - test_equal(ordered_interpretations[i], ordered_interpretations[j]); - } else { - test_greater_than(ordered_interpretations[i], ordered_interpretations[j]); - } - } - } + pairwise_comparison_of_strictly_ascending_vector(ordered_interpretations); test_equal(interpretation, duplicate_interpretation); } diff --git a/tests/test-static-query-token.cpp b/tests/test-static-query-token.cpp index fff89c04..88878a02 100644 --- a/tests/test-static-query-token.cpp +++ b/tests/test-static-query-token.cpp @@ -2,7 +2,6 @@ #include -#include #include #include "comparison_test_utils.hpp" @@ -14,9 +13,8 @@ * These unit tests contain the `StaticQueryToken` tag. */ +using log_surgeon::tests::pairwise_comparison_of_strictly_ascending_vector; using log_surgeon::tests::test_equal; -using log_surgeon::tests::test_greater_than; -using log_surgeon::tests::test_less_than; using log_surgeon::wildcard_query_parser::StaticQueryToken; /** @@ -24,7 +22,7 @@ using log_surgeon::wildcard_query_parser::StaticQueryToken; * @brief Tests `operator<=>` and all derived operators. */ TEST_CASE("comparison_operators", "[StaticQueryToken]") { - std::vector const ordered_tokens{ + std::vector const ordered_tokens{ StaticQueryToken{""}, StaticQueryToken{"abc"}, StaticQueryToken{"def"} @@ -32,18 +30,6 @@ TEST_CASE("comparison_operators", "[StaticQueryToken]") { StaticQueryToken const token{"ghi"}; StaticQueryToken const duplicate_token{"ghi"}; - for (size_t i{0}; i < ordered_tokens.size(); i++) { - CAPTURE(i); - for (size_t j{0}; j < ordered_tokens.size(); j++) { - CAPTURE(j); - if (i < j) { - test_less_than(ordered_tokens[i], ordered_tokens[j]); - } else if (i == j) { - test_equal(ordered_tokens[i], ordered_tokens[j]); - } else { - test_greater_than(ordered_tokens[i], ordered_tokens[j]); - } - } - } + pairwise_comparison_of_strictly_ascending_vector(ordered_tokens); test_equal(token, duplicate_token); } diff --git a/tests/test-variable-query-token.cpp b/tests/test-variable-query-token.cpp index e01c7df0..27d67b54 100644 --- a/tests/test-variable-query-token.cpp +++ b/tests/test-variable-query-token.cpp @@ -3,7 +3,6 @@ #include -#include #include #include "comparison_test_utils.hpp" @@ -15,9 +14,8 @@ * These unit tests contain the `VariableQueryToken` tag. */ +using log_surgeon::tests::pairwise_comparison_of_strictly_ascending_vector; using log_surgeon::tests::test_equal; -using log_surgeon::tests::test_greater_than; -using log_surgeon::tests::test_less_than; using log_surgeon::wildcard_query_parser::VariableQueryToken; /** @@ -39,18 +37,6 @@ TEST_CASE("comparison_operators", "[VariableQueryToken]") { VariableQueryToken const token{cHasNumId, "abc*123", true}; VariableQueryToken const duplicate_token{cHasNumId, "abc*123", true}; - for (size_t i{0}; i < ordered_tokens.size(); i++) { - CAPTURE(i); - for (size_t j{0}; j < ordered_tokens.size(); j++) { - CAPTURE(j); - if (i < j) { - test_less_than(ordered_tokens[i], ordered_tokens[j]); - } else if (i == j) { - test_equal(ordered_tokens[i], ordered_tokens[j]); - } else { - test_greater_than(ordered_tokens[i], ordered_tokens[j]); - } - } - } + pairwise_comparison_of_strictly_ascending_vector(ordered_tokens); test_equal(token, duplicate_token); } From 47a64f1927ecd3e0fa0161b2d19a7e63b139218e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 05:00:17 -0400 Subject: [PATCH 035/168] Fix docstrings to refer to the query instead of logs. --- src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp | 2 +- src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp b/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp index be36ced3..1ea87837 100644 --- a/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp +++ b/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp @@ -9,7 +9,7 @@ namespace log_surgeon::wildcard_query_parser { /** * Represents static-text in the query as a token. * - * Stores the raw log as a string. + * Stores a substring from the query as a string. */ class StaticQueryToken { public: diff --git a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp index 526f8175..761fd4af 100644 --- a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp +++ b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp @@ -10,7 +10,7 @@ namespace log_surgeon::wildcard_query_parser { /** * Represents a variable in the query as a token. * - * Stores the raw log as a string with metadata specifying: + * Stores a substring from the query as a string with metadata specifying: * 1. The variable type. * 2. If the variable contains a wildcard. */ From ffe0171238d421802c6ee703fe0b19a38c365800 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 05:02:06 -0400 Subject: [PATCH 036/168] Remove redundancy in descriptions. --- src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp | 2 +- src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp b/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp index 1ea87837..9018ec41 100644 --- a/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp +++ b/src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp @@ -9,7 +9,7 @@ namespace log_surgeon::wildcard_query_parser { /** * Represents static-text in the query as a token. * - * Stores a substring from the query as a string. + * Stores a substring from the query. */ class StaticQueryToken { public: diff --git a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp index 761fd4af..adc09fdc 100644 --- a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp +++ b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp @@ -10,7 +10,7 @@ namespace log_surgeon::wildcard_query_parser { /** * Represents a variable in the query as a token. * - * Stores a substring from the query as a string with metadata specifying: + * Stores a substring from the query with metadata specifying: * 1. The variable type. * 2. If the variable contains a wildcard. */ From 6e3684fbd68bd5d54f3b64864056c75a59e7f0ee Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 05:39:05 -0400 Subject: [PATCH 037/168] Prevent accessing back of empty vector. --- .../wildcard_query_parser/QueryInterpretation.hpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index 0e717345..73d17897 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -71,12 +71,16 @@ class QueryInterpretation { } StaticQueryToken static_query_token(query_substring); - if (auto& prev_token = m_tokens.back(); - false == m_tokens.empty() && std::holds_alternative(prev_token)) - { + if (m_tokens.empty()) { + m_tokens.emplace_back(std::move(static_query_token)); + return; + } + + auto& prev_token = m_tokens.back(); + if (std::holds_alternative(prev_token)) { std::get(prev_token).append(static_query_token); } else { - m_tokens.emplace_back(static_query_token); + m_tokens.emplace_back(std::move(static_query_token)); } } From 34043bf23230901145f0c5ebff038bbdb7db5ee0 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 05:44:44 -0400 Subject: [PATCH 038/168] Change has_wildcard to contains_wildcard for consistency. --- .../wildcard_query_parser/VariableQueryToken.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp index adc09fdc..f4cb8930 100644 --- a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp +++ b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp @@ -19,11 +19,11 @@ class VariableQueryToken { VariableQueryToken( uint32_t const variable_type, std::string query_substring, - bool const has_wildcard + bool const contains_wildcard ) : m_variable_type(variable_type), m_query_substring(std::move(query_substring)), - m_has_wildcard(has_wildcard) {} + m_contains_wildcard(contains_wildcard) {} // Must be defined if `operator<=>` is not defaulted. auto operator==(VariableQueryToken const& rhs) const -> bool { @@ -36,7 +36,7 @@ class VariableQueryToken { * Compares member variables in the following order: * 1. `m_variable_type` * 2. `m_query_substring` - * 3. `m_has_wildcard` (with `false` considered less than `true`) + * 3. `m_contains_wildcard` (with `false` considered less than `true`) * * @param rhs The `VariableQueryToken` to compare against. * @return The relative ordering of `this` with respect to `rhs`. @@ -49,12 +49,12 @@ class VariableQueryToken { return m_query_substring; } - [[nodiscard]] auto get_has_wildcard() const -> bool { return m_has_wildcard; } + [[nodiscard]] auto get_contains_wildcard() const -> bool { return m_contains_wildcard; } private: uint32_t m_variable_type; std::string m_query_substring; - bool m_has_wildcard{false}; + bool m_contains_wildcard{false}; }; } // namespace log_surgeon::wildcard_query_parser From 1f75b0a58f74eedc3d1d6787d583803bf9c60410 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 05:50:33 -0400 Subject: [PATCH 039/168] Change has_wildcard to contains_wildcard in more places for consistency. --- .../QueryInterpretation.cpp | 10 +++++----- .../VariableQueryToken.cpp | 2 +- tests/test-query-interpretation.cpp | 20 +++++++++---------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index b74f6ffd..753ca6d2 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -78,25 +78,25 @@ auto QueryInterpretation::operator<=>(QueryInterpretation const& rhs) const -> s auto QueryInterpretation::serialize() const -> string { vector token_strings; - vector has_wildcard_strings; + vector contains_wildcard_strings; for (auto const& token : m_tokens) { if (std::holds_alternative(token)) { token_strings.emplace_back(std::get(token).get_query_substring()); - has_wildcard_strings.emplace_back("0"); + contains_wildcard_strings.emplace_back("0"); } else { auto const& var = std::get(token); token_strings.emplace_back( fmt::format("<{}>({})", var.get_variable_type(), var.get_query_substring()) ); - has_wildcard_strings.emplace_back(var.get_has_wildcard() ? "1" : "0"); + contains_wildcard_strings.emplace_back(var.get_contains_wildcard() ? "1" : "0"); } } return fmt::format( - "logtype='{}', has_wildcard='{}'", + "logtype='{}', contains_wildcard='{}'", fmt::join(token_strings, ""), - fmt::join(has_wildcard_strings, "") + fmt::join(contains_wildcard_strings, "") ); } } // namespace log_surgeon::wildcard_query_parser diff --git a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp index 49ac99bd..5717d5f2 100644 --- a/src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp +++ b/src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp @@ -17,6 +17,6 @@ auto VariableQueryToken::operator<=>(VariableQueryToken const& rhs) const -> str } // bool does not have a <=> operator, so we have to manual order it: - return static_cast(m_has_wildcard) <=> static_cast(rhs.m_has_wildcard); + return static_cast(m_contains_wildcard) <=> static_cast(rhs.m_contains_wildcard); } } // namespace log_surgeon::wildcard_query_parser diff --git a/tests/test-query-interpretation.cpp b/tests/test-query-interpretation.cpp index 7aafbd25..bc4f2782 100644 --- a/tests/test-query-interpretation.cpp +++ b/tests/test-query-interpretation.cpp @@ -26,7 +26,7 @@ using std::string_view; * @brief Creates an empty `QueryInterpretation` and tests serialization. */ TEST_CASE("empty_query_interpretation", "[QueryInterpretation]") { - constexpr string_view cExpectedSerialization{"logtype='', has_wildcard=''"}; + constexpr string_view cExpectedSerialization{"logtype='', contains_wildcard=''"}; QueryInterpretation const query_interpretation; REQUIRE(query_interpretation.serialize() == cExpectedSerialization); @@ -37,7 +37,7 @@ TEST_CASE("empty_query_interpretation", "[QueryInterpretation]") { * @brief Creates a `QueryInterpretation` with only static-text and tests serialization. */ TEST_CASE("static_text_query_interpretation", "[QueryInterpretation]") { - constexpr string_view cExpectedSerialization{"logtype='Static text', has_wildcard='0'"}; + constexpr string_view cExpectedSerialization{"logtype='Static text', contains_wildcard='0'"}; QueryInterpretation const query_interpretation{"Static text"}; REQUIRE(query_interpretation.serialize() == cExpectedSerialization); @@ -49,7 +49,7 @@ TEST_CASE("static_text_query_interpretation", "[QueryInterpretation]") { */ TEST_CASE("variable_query_interpretation", "[QueryInterpretation]") { constexpr uint32_t cHasNumberId{7}; - constexpr string_view cExpectedSerialization{"logtype='<7>(var123)', has_wildcard='0'"}; + constexpr string_view cExpectedSerialization{"logtype='<7>(var123)', contains_wildcard='0'"}; QueryInterpretation const query_interpretation{cHasNumberId, "var123", false}; REQUIRE(query_interpretation.serialize() == cExpectedSerialization); @@ -61,7 +61,7 @@ TEST_CASE("variable_query_interpretation", "[QueryInterpretation]") { */ TEST_CASE("wildcard_variable_query_interpretation", "[QueryInterpretation]") { constexpr uint32_t cFloatId{1}; - constexpr string_view cExpectedSerialization{"logtype='<1>(123.123*)', has_wildcard='1'"}; + constexpr string_view cExpectedSerialization{"logtype='<1>(123.123*)', contains_wildcard='1'"}; QueryInterpretation const query_interpretation{cFloatId, "123.123*", true}; REQUIRE(query_interpretation.serialize() == cExpectedSerialization); @@ -72,7 +72,7 @@ TEST_CASE("wildcard_variable_query_interpretation", "[QueryInterpretation]") { * @brief Appends empty static-text to a `QueryInterpretation` and tests serialization. */ TEST_CASE("append_empty_static_text", "[QueryInterpretation]") { - constexpr string_view cExpectedSerialization{"logtype='', has_wildcard=''"}; + constexpr string_view cExpectedSerialization{"logtype='', contains_wildcard=''"}; QueryInterpretation query_interpretation; query_interpretation.append_static_token(""); @@ -85,7 +85,7 @@ TEST_CASE("append_empty_static_text", "[QueryInterpretation]") { */ TEST_CASE("append_empty_variable", "[QueryInterpretation]") { constexpr uint32_t cEmptyId{0}; - constexpr string_view cExpectedSerialization{"logtype='<0>()', has_wildcard='0'"}; + constexpr string_view cExpectedSerialization{"logtype='<0>()', contains_wildcard='0'"}; QueryInterpretation query_interpretation; query_interpretation.append_variable_token(cEmptyId, "", false); @@ -97,7 +97,7 @@ TEST_CASE("append_empty_variable", "[QueryInterpretation]") { * @brief Appends an empty `QueryInterpretation` to another and tests serialization. */ TEST_CASE("append_empty_query_interpretation", "[QueryInterpretation]") { - constexpr string_view cExpectedSerialization{"logtype='hello', has_wildcard='0'"}; + constexpr string_view cExpectedSerialization{"logtype='hello', contains_wildcard='0'"}; QueryInterpretation query_interpretation{"hello"}; QueryInterpretation empty_query_interpretation; @@ -113,7 +113,7 @@ TEST_CASE("append_tokens", "[QueryInterpretation]") { constexpr uint32_t cFloatId{1}; constexpr uint32_t cIntId{2}; constexpr string_view cExpectedSerialization{ - "logtype='start <2>(*123*) middle <1>(12.3) end', has_wildcard='01000'" + "logtype='start <2>(*123*) middle <1>(12.3) end', contains_wildcard='01000'" }; QueryInterpretation query_interpretation; @@ -130,7 +130,7 @@ TEST_CASE("append_tokens", "[QueryInterpretation]") { * @brief Tests whether adjacent static-text tokens are merged for canonicalization. */ TEST_CASE("append_canonicalization", "[QueryInterpretation]") { - constexpr string_view cExpectedSerialization{"logtype='ab', has_wildcard='0'"}; + constexpr string_view cExpectedSerialization{"logtype='ab', contains_wildcard='0'"}; QueryInterpretation query_interpretation; query_interpretation.append_static_token("a"); @@ -143,7 +143,7 @@ TEST_CASE("append_canonicalization", "[QueryInterpretation]") { * @brief Appends a `QueryInterpretation` to another and tests serialization and canonicalization. */ TEST_CASE("append_query_interpretation", "[QueryInterpretation]") { - constexpr string_view cExpectedSerialization{"logtype='foobar', has_wildcard='0'"}; + constexpr string_view cExpectedSerialization{"logtype='foobar', contains_wildcard='0'"}; QueryInterpretation prefix{"foo"}; QueryInterpretation suffix{"bar"}; From 5017b447b3ca1f8d99babe98218cdbe13018aa2d Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 06:01:23 -0400 Subject: [PATCH 040/168] Define the operator first in the cpp. --- .../QueryInterpretation.cpp | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index 753ca6d2..b23b8846 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -24,27 +24,6 @@ using std::vector; using std::weak_ordering; namespace log_surgeon::wildcard_query_parser { -void QueryInterpretation::append_query_interpretation(QueryInterpretation& suffix) { - if (suffix.m_tokens.empty()) { - return; - } - if (m_tokens.empty()) { - m_tokens = suffix.m_tokens; - return; - } - - auto& last_old_token = m_tokens.back(); - auto const& first_new_token = suffix.m_tokens[0]; - if (std::holds_alternative(last_old_token) - && std::holds_alternative(first_new_token)) - { - std::get(last_old_token) - .append(std::get(first_new_token)); - m_tokens.insert(m_tokens.end(), suffix.m_tokens.begin() + 1, suffix.m_tokens.end()); - } else { - m_tokens.insert(m_tokens.end(), suffix.m_tokens.begin(), suffix.m_tokens.end()); - } -} // Helper to ensure variant is strongly ordered. template @@ -76,6 +55,28 @@ auto QueryInterpretation::operator<=>(QueryInterpretation const& rhs) const -> s return strong_ordering::equal; } +void QueryInterpretation::append_query_interpretation(QueryInterpretation& suffix) { + if (suffix.m_tokens.empty()) { + return; + } + if (m_tokens.empty()) { + m_tokens = suffix.m_tokens; + return; + } + + auto& last_old_token = m_tokens.back(); + auto const& first_new_token = suffix.m_tokens[0]; + if (std::holds_alternative(last_old_token) + && std::holds_alternative(first_new_token)) + { + std::get(last_old_token) + .append(std::get(first_new_token)); + m_tokens.insert(m_tokens.end(), suffix.m_tokens.begin() + 1, suffix.m_tokens.end()); + } else { + m_tokens.insert(m_tokens.end(), suffix.m_tokens.begin(), suffix.m_tokens.end()); + } +} + auto QueryInterpretation::serialize() const -> string { vector token_strings; vector contains_wildcard_strings; From e7247b75e47e0c86fac37b5b1e5a2b5adc2d67dc Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 06:03:44 -0400 Subject: [PATCH 041/168] Move append_static_token into cpp. --- .../QueryInterpretation.cpp | 19 +++++++++++++++++++ .../QueryInterpretation.hpp | 19 +------------------ 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index b23b8846..2310b84e 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -77,6 +77,25 @@ void QueryInterpretation::append_query_interpretation(QueryInterpretation& suffi } } +auto QueryInterpretation::append_static_token(std::string const& query_substring) -> void { + if (query_substring.empty()) { + return; + } + + StaticQueryToken static_query_token(query_substring); + if (m_tokens.empty()) { + m_tokens.emplace_back(std::move(static_query_token)); + return; + } + + auto& prev_token = m_tokens.back(); + if (std::holds_alternative(prev_token)) { + std::get(prev_token).append(static_query_token); + } else { + m_tokens.emplace_back(std::move(static_query_token)); + } +} + auto QueryInterpretation::serialize() const -> string { vector token_strings; vector contains_wildcard_strings; diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index 73d17897..ba3659bd 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -65,24 +65,7 @@ class QueryInterpretation { */ auto append_query_interpretation(QueryInterpretation& suffix) -> void; - auto append_static_token(std::string const& query_substring) -> void { - if (query_substring.empty()) { - return; - } - - StaticQueryToken static_query_token(query_substring); - if (m_tokens.empty()) { - m_tokens.emplace_back(std::move(static_query_token)); - return; - } - - auto& prev_token = m_tokens.back(); - if (std::holds_alternative(prev_token)) { - std::get(prev_token).append(static_query_token); - } else { - m_tokens.emplace_back(std::move(static_query_token)); - } - } + auto append_static_token(std::string const& query_substring) -> void; auto append_variable_token( uint32_t const variable_type, From 2c376ea5319de2648a151525d30716fe699e35e0 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 06:04:45 -0400 Subject: [PATCH 042/168] Remove extra newline. --- src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index 2310b84e..6355464c 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -24,7 +24,6 @@ using std::vector; using std::weak_ordering; namespace log_surgeon::wildcard_query_parser { - // Helper to ensure variant is strongly ordered. template struct IsStronglyOrderedVariant; From 230f0dc074d0635122ce08224fe6c25c6ea3925a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 06:09:59 -0400 Subject: [PATCH 043/168] Added docstring for append_static_token. --- .../wildcard_query_parser/QueryInterpretation.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index ba3659bd..2b49d4f5 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -65,6 +65,16 @@ class QueryInterpretation { */ auto append_query_interpretation(QueryInterpretation& suffix) -> void; + /** + * Appends a static query substring to the current interpretation. + * + * If the input string is empty, the function returns immediately. Otherwise, it attempts to + * merge the new static substring into the last token if the last token is a + * `StaticQueryToken`. If merging is not possible, a new `StaticQueryToken` is created from the + * input substring and it is added as a new entry in the token vector. + * + * @param query_substring The static portion of the query to append. + */ auto append_static_token(std::string const& query_substring) -> void; auto append_variable_token( From 6318ad343a637e55418f86e21203c40c89e37527 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 06:12:58 -0400 Subject: [PATCH 044/168] Remove space. --- src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index 2b49d4f5..c0ac70df 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -72,7 +72,7 @@ class QueryInterpretation { * merge the new static substring into the last token if the last token is a * `StaticQueryToken`. If merging is not possible, a new `StaticQueryToken` is created from the * input substring and it is added as a new entry in the token vector. - * + * * @param query_substring The static portion of the query to append. */ auto append_static_token(std::string const& query_substring) -> void; From 267c26a1742af95486ba62ce82633a63571b843c Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 06:24:44 -0400 Subject: [PATCH 045/168] Since append_query_interpretations doesn't modify suffix, make it const. --- src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp | 2 +- src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index 6355464c..02a932d2 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -54,7 +54,7 @@ auto QueryInterpretation::operator<=>(QueryInterpretation const& rhs) const -> s return strong_ordering::equal; } -void QueryInterpretation::append_query_interpretation(QueryInterpretation& suffix) { +void QueryInterpretation::append_query_interpretation(QueryInterpretation const& suffix) { if (suffix.m_tokens.empty()) { return; } diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index c0ac70df..e97f6286 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -63,7 +63,7 @@ class QueryInterpretation { * * @param suffix The `QueryInterpretation` to append. */ - auto append_query_interpretation(QueryInterpretation& suffix) -> void; + auto append_query_interpretation(QueryInterpretation const& suffix) -> void; /** * Appends a static query substring to the current interpretation. From b4728e7985b8ff5198627687c6e3e8eebc3d19a9 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 06:39:40 -0400 Subject: [PATCH 046/168] Fix tidy warnnings. --- tests/test-query-interpretation.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test-query-interpretation.cpp b/tests/test-query-interpretation.cpp index bc4f2782..6dc9b20d 100644 --- a/tests/test-query-interpretation.cpp +++ b/tests/test-query-interpretation.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -100,7 +99,7 @@ TEST_CASE("append_empty_query_interpretation", "[QueryInterpretation]") { constexpr string_view cExpectedSerialization{"logtype='hello', contains_wildcard='0'"}; QueryInterpretation query_interpretation{"hello"}; - QueryInterpretation empty_query_interpretation; + QueryInterpretation const empty_query_interpretation; query_interpretation.append_query_interpretation(empty_query_interpretation); REQUIRE(query_interpretation.serialize() == cExpectedSerialization); } @@ -146,7 +145,7 @@ TEST_CASE("append_query_interpretation", "[QueryInterpretation]") { constexpr string_view cExpectedSerialization{"logtype='foobar', contains_wildcard='0'"}; QueryInterpretation prefix{"foo"}; - QueryInterpretation suffix{"bar"}; + QueryInterpretation const suffix{"bar"}; prefix.append_query_interpretation(suffix); REQUIRE(prefix.serialize() == cExpectedSerialization); } From 8efa83aa470bfeee3a0e6aa6f6f074a843d91454 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 07:14:06 -0400 Subject: [PATCH 047/168] Fix tidy warnings. --- src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp | 2 +- tests/comparison_test_utils.hpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index 02a932d2..76529368 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -1,9 +1,9 @@ #include "QueryInterpretation.hpp" -#include #include #include #include +#include #include #include diff --git a/tests/comparison_test_utils.hpp b/tests/comparison_test_utils.hpp index 6e3f536d..a0c4b847 100644 --- a/tests/comparison_test_utils.hpp +++ b/tests/comparison_test_utils.hpp @@ -2,6 +2,7 @@ #define LOG_SURGEON_TESTS_COMPARISON_TEST_UTILS_HPP #include +#include #include #include From dc2e7c912cfdb52f164e21bfa79959bdd19035ee Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 08:10:07 -0400 Subject: [PATCH 048/168] Move static_assert into the QueryInterpretations header. --- .../QueryInterpretation.cpp | 24 +++---------------- .../QueryInterpretation.hpp | 23 ++++++++++++++++++ 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index 76529368..121b4df0 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -15,35 +15,17 @@ #include using log_surgeon::lexers::ByteLexer; -using std::declval; using std::same_as; using std::string; using std::strong_ordering; -using std::variant; using std::vector; using std::weak_ordering; namespace log_surgeon::wildcard_query_parser { -// Helper to ensure variant is strongly ordered. -template -struct IsStronglyOrderedVariant; - -template -struct IsStronglyOrderedVariant> { - static constexpr bool cValue{ - (same_as() <=> declval()), strong_ordering> && ...) - }; -}; - auto QueryInterpretation::operator<=>(QueryInterpretation const& rhs) const -> strong_ordering { - // Make sure the variants types are strongly ordered. - static_assert( - IsStronglyOrderedVariant::cValue, - "All variant types in `m_tokens` must have `operator<=>` returning " - "`std::strong_ordering`." - ); - - // Can't return `<=>` directly as `variant` is weakly ordered regardless of its types. + // `<=>` for a `variant` returns a `weak_ordering`. However, we statically assert the types used + // in `m_tokens` have `<=>` which returns `strong_ordering`. Therefore, we can convert the + // result of `<=>` between two `m_tokens` from `weak_ordering` to `strong_ordering`. auto const tokens_weak_cmp{m_tokens <=> rhs.m_tokens}; if (weak_ordering::less == tokens_weak_cmp) { return strong_ordering::less; diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index e97f6286..ba69e38f 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -2,6 +2,7 @@ #define LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_INTERPRETATION_HPP #include +#include #include #include #include @@ -12,6 +13,23 @@ #include namespace log_surgeon::wildcard_query_parser { +// Concepts and structs to ensure the `variant` used by `QueryInterpretation` is strongly ordered. +template +concept StronglyOrdered = requires(T a, T b) { + {a <=> b} -> std::same_as; +}; + +template +concept StronglyOrderedVariant = (StronglyOrdered && ...); + +template +struct IsStronglyOrderedVariant; + +template +struct IsStronglyOrderedVariant> { + static constexpr bool cValue{StronglyOrderedVariant}; +}; + /** * Represents a query as a sequence of static-text and variable tokens. * @@ -99,6 +117,11 @@ class QueryInterpretation { private: std::vector> m_tokens; + static_assert( + IsStronglyOrderedVariant::cValue, + "All variant types in `m_tokens` must have `operator<=>` returning " + "`std::strong_ordering`." + ); }; } // namespace log_surgeon::wildcard_query_parser From aaf52f7459576f0326a8b791825b52261329e05f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 08:10:57 -0400 Subject: [PATCH 049/168] Format. --- src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index ba69e38f..9e6661ec 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -16,7 +16,9 @@ namespace log_surgeon::wildcard_query_parser { // Concepts and structs to ensure the `variant` used by `QueryInterpretation` is strongly ordered. template concept StronglyOrdered = requires(T a, T b) { - {a <=> b} -> std::same_as; + { + a <=> b + } -> std::same_as; }; template From 8cdb975fafcafda72566929f186e90e3ac766148 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 09:05:46 -0400 Subject: [PATCH 050/168] Use std::three_way_comparable to simplify concept. --- .../QueryInterpretation.hpp | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index 9e6661ec..db6338fe 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -2,7 +2,6 @@ #define LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_INTERPRETATION_HPP #include -#include #include #include #include @@ -13,23 +12,17 @@ #include namespace log_surgeon::wildcard_query_parser { -// Concepts and structs to ensure the `variant` used by `QueryInterpretation` is strongly ordered. -template -concept StronglyOrdered = requires(T a, T b) { - { - a <=> b - } -> std::same_as; -}; - +// Concepts and structs to enforce the `variant` used by `QueryInterpretation` is strongly three way +// comparable. template -concept StronglyOrderedVariant = (StronglyOrdered && ...); +concept StronglyThreeWayComparable = (std::three_way_comparable && ...); template -struct IsStronglyOrderedVariant; +struct IsStronglyThreeWayComparableVariant; template -struct IsStronglyOrderedVariant> { - static constexpr bool cValue{StronglyOrderedVariant}; +struct IsStronglyThreeWayComparableVariant> { + static constexpr bool cValue{StronglyThreeWayComparable}; }; /** @@ -120,7 +113,7 @@ class QueryInterpretation { private: std::vector> m_tokens; static_assert( - IsStronglyOrderedVariant::cValue, + IsStronglyThreeWayComparable::cValue, "All variant types in `m_tokens` must have `operator<=>` returning " "`std::strong_ordering`." ); From 90ff8b98436ec8ea026f302c3307e44ed1c80fc3 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 09:09:09 -0400 Subject: [PATCH 051/168] Fix compiler error from previous commit. --- src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp index db6338fe..dd9953fc 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp @@ -113,7 +113,7 @@ class QueryInterpretation { private: std::vector> m_tokens; static_assert( - IsStronglyThreeWayComparable::cValue, + IsStronglyThreeWayComparableVariant::cValue, "All variant types in `m_tokens` must have `operator<=>` returning " "`std::strong_ordering`." ); From 0124d64e0c4e10fa5ea4d0b83b10d5c8a8ab021b Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 09:15:18 -0400 Subject: [PATCH 052/168] Remove unused headers and using declarations. --- src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp index 121b4df0..2dd61ede 100644 --- a/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp +++ b/src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp @@ -1,21 +1,17 @@ #include "QueryInterpretation.hpp" #include -#include #include #include #include #include -#include #include #include #include #include -using log_surgeon::lexers::ByteLexer; -using std::same_as; using std::string; using std::strong_ordering; using std::vector; From 8e4b4843161842679e59b9221a5bfa18b4d60910 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 8 Aug 2025 09:25:36 -0400 Subject: [PATCH 053/168] Use concept to enforce template type in comparison utils. --- tests/comparison_test_utils.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/comparison_test_utils.hpp b/tests/comparison_test_utils.hpp index a0c4b847..41742f1f 100644 --- a/tests/comparison_test_utils.hpp +++ b/tests/comparison_test_utils.hpp @@ -9,12 +9,15 @@ #include namespace log_surgeon::tests { +template +concept StronglyThreeWayComparable = std::three_way_comparable; + /** * Tests comparison operators when `lhs` == `rhs`. * @param lhs Value on the lhs of the operator. * @param rhs Value on the rhs of the operator. */ -template +template auto test_equal(T const& lhs, T const& rhs) -> void; /** @@ -22,7 +25,7 @@ auto test_equal(T const& lhs, T const& rhs) -> void; * @param lhs Value on the lhs of the operator. * @param rhs Value on the rhs of the operator. */ -template +template auto test_greater_than(T const& lhs, T const& rhs) -> void; /** @@ -30,17 +33,17 @@ auto test_greater_than(T const& lhs, T const& rhs) -> void; * @param lhs Value on the lhs of the operator. * @param rhs Value on the rhs of the operator. */ -template +template auto test_less_than(T const& lhs, T const& rhs) -> void; /** * Tests operators `<=>`, `==`, `!=`, `<`, `<=`, `>`, `>=` for every pair of elements in the vector. * @param ordered_vector Vector where elements are ordered to be strictly ascending. */ -template +template auto pairwise_comparison_of_strictly_ascending_vector(std::vector const& ordered_vector) -> void; -template +template auto test_equal(T const& lhs, T const& rhs) -> void { REQUIRE((lhs <=> rhs) == std::strong_ordering::equal); REQUIRE(lhs == rhs); @@ -58,7 +61,7 @@ auto test_equal(T const& lhs, T const& rhs) -> void { REQUIRE_FALSE(rhs > lhs); } -template +template auto test_greater_than(T const& lhs, T const& rhs) -> void { REQUIRE((lhs <=> rhs) == std::strong_ordering::greater); REQUIRE(lhs != rhs); @@ -76,7 +79,7 @@ auto test_greater_than(T const& lhs, T const& rhs) -> void { REQUIRE_FALSE(rhs > lhs); } -template +template auto test_less_than(T const& lhs, T const& rhs) -> void { REQUIRE((lhs <=> rhs) == std::strong_ordering::less); REQUIRE(lhs != rhs); @@ -94,7 +97,7 @@ auto test_less_than(T const& lhs, T const& rhs) -> void { REQUIRE_FALSE(rhs < lhs); } -template +template auto pairwise_comparison_of_strictly_ascending_vector(std::vector const& ordered_vector) -> void { for (size_t i{0}; i < ordered_vector.size(); i++) { From 21f77bf5add3dfde91eaa56234e6a317b4aa498b Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 12 Aug 2025 10:46:50 -0400 Subject: [PATCH 054/168] Complete refactor of the WildcardExpression class. --- CMakeLists.txt | 2 + .../WIldcardCharacter.hpp | 31 +++++ .../WildcardExpression.cpp | 118 +++--------------- .../WildcardExpression.hpp | 111 ++-------------- .../WildcardExpressionView.cpp | 61 +++++++++ .../WildcardExpressionView.hpp | 71 +++++++++++ 6 files changed, 190 insertions(+), 204 deletions(-) create mode 100644 src/log_surgeon/wildcard_query_parser/WIldcardCharacter.hpp create mode 100644 src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp create mode 100644 src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bd1990b9..cfe1dd21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,8 @@ set(SOURCE_FILES src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp + src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp + src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp src/log_surgeon/Lalr1Parser.hpp src/log_surgeon/Lalr1Parser.tpp src/log_surgeon/Lexer.hpp diff --git a/src/log_surgeon/wildcard_query_parser/WIldcardCharacter.hpp b/src/log_surgeon/wildcard_query_parser/WIldcardCharacter.hpp new file mode 100644 index 00000000..26bac742 --- /dev/null +++ b/src/log_surgeon/wildcard_query_parser/WIldcardCharacter.hpp @@ -0,0 +1,31 @@ +#ifndef LOG_SURGEON_QUERY_PARSER_WILDCARD_CHARACTER_HPP +#define LOG_SURGEON_QUERY_PARSER_WILDCARD_CHARACTER_HPP + +#include + +namespace log_surgeon::wildcard_query_parser { +enum class CharType : uint8_t {Normal, GreedyWildcard, NonGreedyWildcard, Escape}; + +class WildcardCharacter { +public: + WildcardCharacter(char const value, CharType const type) : m_value{value}, m_type{type} {} + + [[nodiscard]] auto value() const -> char { return m_value; } + + [[nodiscard]] auto is_greedy_wildcard() const -> bool { + return CharType::GreedyWildcard == m_type; + } + + [[nodiscard]] auto is_non_greedy_wildcard() const -> bool { + return CharType::NonGreedyWildcard == m_type; + } + + [[nodiscard]] auto is_escape() const -> bool { return CharType::Escape == m_type; } + +private: + char m_value; + CharType m_type; +}; +} // log_surgeon::wildcard_query_parser + +#endif // LOG_SURGEON_QUERY_PARSER_WILDCARD_CHARACTER_HPP \ No newline at end of file diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp index 29489e7d..98cd3dc8 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp @@ -1,117 +1,27 @@ #include "WildcardExpression.hpp" #include -#include #include #include -#include - -namespace log_surgeon::query_parser { +namespace log_surgeon::wildcard_query_parser { WildcardExpression::WildcardExpression(std::string processed_search_string) : m_processed_search_string(std::move(processed_search_string)) { - m_is_greedy_wildcard.reserve(m_processed_search_string.size()); - m_is_non_greedy_wildcard.reserve(m_processed_search_string.size()); - m_is_escape.reserve(m_processed_search_string.size()); - bool is_escaped = false; + m_chars.reserve(m_processed_search_string.size()); for (auto const& c : m_processed_search_string) { - if (is_escaped) { - m_is_greedy_wildcard.push_back(false); - m_is_non_greedy_wildcard.push_back(false); - m_is_escape.push_back(false); - is_escaped = false; - } else { - if ('\\' == c) { - m_is_greedy_wildcard.push_back(false); - m_is_non_greedy_wildcard.push_back(false); - m_is_escape.push_back(true); - is_escaped = true; - } else if ('*' == c) { - m_is_greedy_wildcard.push_back(true); - m_is_non_greedy_wildcard.push_back(false); - m_is_escape.push_back(false); + auto type{CharType::Normal}; + if (m_chars.empty() || false == m_chars.back().is_escape()) { + if ('*' == c) { + type = CharType::GreedyWildcard; + m_contains_wildcard = true; } else if ('?' == c) { - m_is_greedy_wildcard.push_back(false); - m_is_non_greedy_wildcard.push_back(true); - m_is_escape.push_back(false); - } else { - m_is_greedy_wildcard.push_back(false); - m_is_non_greedy_wildcard.push_back(false); - m_is_escape.push_back(false); - } + type = CharType::NonGreedyWildcard; + m_contains_wildcard = true; + } else if ('\\' == c) { + type = CharType::Escape; + } } + m_chars.emplace_back(c, type); } } - -WildcardExpressionView::WildcardExpressionView( - WildcardExpression const& wildcard_expression, - size_t const begin_idx, - size_t const end_idx -) - : m_expression{&wildcard_expression}, - m_begin_idx{begin_idx}, - m_end_idx{end_idx} { - m_end_idx = std::min(m_end_idx, wildcard_expression.length()); - m_begin_idx = std::min(m_begin_idx, m_end_idx); -} - -auto WildcardExpressionView::extend_to_adjacent_greedy_wildcards() const -> WildcardExpressionView { - auto extended_view{*this}; - bool const prev_char_is_greedy_wildcard{ - m_begin_idx > 0 && m_expression->char_is_greedy_wildcard(m_begin_idx - 1) - }; - if (prev_char_is_greedy_wildcard) { - --extended_view.m_begin_idx; - } - bool const next_char_is_greedy_wildcard{ - m_end_idx < m_expression->length() && m_expression->char_is_greedy_wildcard(m_end_idx) - }; - if (next_char_is_greedy_wildcard) { - ++extended_view.m_end_idx; - } - return extended_view; -} - -auto WildcardExpressionView::surrounded_by_delims_or_wildcards(lexers::ByteLexer const& lexer) const - -> bool { - bool has_preceding_delim{}; - if (0 == m_begin_idx) { - has_preceding_delim = true; - } else { - bool const preceded_by_greedy_wildcard{ - m_expression->char_is_greedy_wildcard(m_begin_idx - 1) - }; - bool const preceded_by_non_greedy_wildcard{ - m_expression->char_is_non_greedy_wildcard(m_begin_idx - 1) - }; - bool const preceded_by_delimiter{ - lexer.is_delimiter(m_expression->get_char(m_begin_idx - 1)) - }; - has_preceding_delim = preceded_by_greedy_wildcard || preceded_by_non_greedy_wildcard - || preceded_by_delimiter; - } - - bool has_succeeding_delim{}; - if (m_expression->length() == m_end_idx) { - has_succeeding_delim = true; - } else { - bool const succeeded_by_greedy_wildcard{m_expression->char_is_greedy_wildcard(m_end_idx)}; - bool const succeeded_by_non_greedy_wildcard{ - m_expression->char_is_non_greedy_wildcard(m_end_idx) - }; - // E.g. "foo:", where ':' is a delimiter - bool const succeeded_by_unescaped_delim{ - false == m_expression->char_is_escape(m_end_idx) - && lexer.is_delimiter(m_expression->get_char(m_end_idx)) - }; - // E.g. "foo\\", where '\' is a delimiter - bool const succeeded_by_escaped_delim{ - m_expression->char_is_escape(m_end_idx) - && lexer.is_delimiter(m_expression->get_char(m_end_idx + 1)) - }; - has_succeeding_delim = succeeded_by_greedy_wildcard || succeeded_by_non_greedy_wildcard - || succeeded_by_unescaped_delim || succeeded_by_escaped_delim; - } - return has_preceding_delim && has_succeeding_delim; -} -} // namespace log_surgeon::query_parser +} // namespace log_surgeon::wildcard_query_parser diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp index fc1d8c02..7c3751de 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp @@ -1,125 +1,36 @@ #ifndef LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_HPP #define LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_HPP -#include #include #include -#include +#include -namespace log_surgeon::query_parser { +namespace log_surgeon::wildcard_query_parser { /** - * A pattern for matching strings. The pattern supports two types of wildcards: + * An expression for matching strings. The expression supports two types of wildcards: * - '*' matches zero or more characters * - '?' matches any single character * - * To match a literal '*' or '?', the pattern should escape it with a backslash (`\`). + * To match a literal '*' or '?', the expression should escape it with a backslash (`\`). */ class WildcardExpression { public: explicit WildcardExpression(std::string processed_search_string); - [[nodiscard]] auto substr(size_t const begin_idx, size_t const length) const -> std::string { - return m_processed_search_string.substr(begin_idx, length); + [[nodiscard]] auto get_chars() const -> std::vector const& { + return m_chars; } - [[nodiscard]] auto length() const -> size_t { return m_processed_search_string.size(); } - - [[nodiscard]] auto char_is_greedy_wildcard(size_t const idx) const -> bool { - return m_is_greedy_wildcard[idx]; - } - - [[nodiscard]] auto char_is_non_greedy_wildcard(size_t const idx) const -> bool { - return m_is_non_greedy_wildcard[idx]; - } - - [[nodiscard]] auto char_is_escape(size_t const idx) const -> bool { return m_is_escape[idx]; } - - [[nodiscard]] auto get_char(size_t const idx) const -> char { - return m_processed_search_string[idx]; + [[nodiscard]] auto get_string() const -> std::string const& { + return m_processed_search_string; } private: - std::vector m_is_greedy_wildcard; - std::vector m_is_non_greedy_wildcard; - std::vector m_is_escape; + bool m_contains_wildcard; + std::vector m_chars; std::string m_processed_search_string; }; - -/** - * A view of a WildcardExpression. - */ -class WildcardExpressionView { -public: - /** - * Creates a view of the range [begin_idx, end_idx) in the given wildcard expression. - * - * NOTE: To ensure validity, end_idx is limited to wildcard_expression.length(), and then - * begin_idx is limited to end_idx. - * @param wildcard_expression - * @param begin_idx - * @param end_idx - */ - WildcardExpressionView( - WildcardExpression const& wildcard_expression, - size_t begin_idx, - size_t end_idx - ); - - /** - * @return A copy of this view, but extended to include adjacent greedy wildcards. - */ - [[nodiscard]] auto extend_to_adjacent_greedy_wildcards() const -> WildcardExpressionView; - - [[nodiscard]] auto is_greedy_wildcard() const -> bool { - return 1 == length() && m_expression->char_is_greedy_wildcard(m_begin_idx); - } - - [[nodiscard]] auto is_non_greedy_wildcard() const -> bool { - return 1 == length() && m_expression->char_is_non_greedy_wildcard(m_begin_idx); - } - - [[nodiscard]] auto starts_or_ends_with_greedy_wildcard() const -> bool { - return length() > 0 - && (m_expression->char_is_greedy_wildcard(m_begin_idx) - || m_expression->char_is_greedy_wildcard(m_end_idx - 1)); - } - - /** - * @param lexer - * @return Whether the substring in view is surrounded by delimiters or unescaped wildcards. - * NOTE: This method assumes that the viewed string is preceded and succeeded by a delimiter. - */ - [[nodiscard]] auto surrounded_by_delims_or_wildcards(lexers::ByteLexer const& lexer) const - -> bool; - - [[nodiscard]] auto length() const -> size_t { return m_end_idx - m_begin_idx; } - - [[nodiscard]] auto char_is_greedy_wildcard(size_t const idx) const -> bool { - return m_expression->char_is_greedy_wildcard(m_begin_idx + idx); - } - - [[nodiscard]] auto char_is_non_greedy_wildcard(size_t const idx) const -> bool { - return m_expression->char_is_non_greedy_wildcard(m_begin_idx + idx); - } - - [[nodiscard]] auto char_is_escape(size_t const idx) const -> bool { - return m_expression->char_is_escape(m_begin_idx + idx); - } - - [[nodiscard]] auto get_char(size_t const idx) const -> char { - return m_expression->get_char(m_begin_idx + idx); - } - - [[nodiscard]] auto get_value() const -> std::string { - return m_expression->substr(m_begin_idx, m_end_idx - m_begin_idx); - } - -private: - WildcardExpression const* m_expression; - size_t m_begin_idx; - size_t m_end_idx; -}; -} // namespace log_surgeon::query_parser +} // namespace log_surgeon::wildcard_query_parser #endif // LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_HPP diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp new file mode 100644 index 00000000..1c5fdb3a --- /dev/null +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -0,0 +1,61 @@ +#include "WildcardExpressionView.hpp" + +#include +#include +#include +#include + +#include + +using std::string; + +namespace log_surgeon::wildcard_query_parser { +WildcardExpressionView::WildcardExpressionView( + WildcardExpression const& expression, + size_t begin_idx, + size_t end_idx +) + : m_expression{&expression} { + std::span const full_span{m_expression->get_chars()}; + end_idx = std::min(end_idx, full_span.size()); + begin_idx = std::min(begin_idx, end_idx); + m_chars = full_span.subspan(begin_idx, end_idx - begin_idx); + std::string_view const full_view{m_expression->get_string()}; + m_search_string = full_view.substr(begin_idx, end_idx - begin_idx); +} + +auto WildcardExpressionView::extend_to_adjacent_greedy_wildcards() const -> WildcardExpressionView { + auto [begin_idx, end_idx]{get_indicies()}; + + std::span const full_span{m_expression->get_chars()}; + + if (begin_idx > 0 && full_span[begin_idx - 1].is_greedy_wildcard()) { + --begin_idx; + } + if (end_idx < full_span.size() && full_span[end_idx].is_greedy_wildcard()) { + ++end_idx; + } + return {*m_expression, begin_idx, end_idx}; +} + +auto WildcardExpressionView::generate_regex_string() const -> string { + string regex_string; + for (auto const& wildcard_char : m_chars) { + if (wildcard_char.is_escape()) { + continue; + } + auto const& value{wildcard_char.value()}; + if (wildcard_char.is_greedy_wildcard()) { + regex_string += ".*"; + } else if (wildcard_char.is_non_greedy_wildcard()) { + regex_string += "."; + } else if (SchemaParser::get_special_regex_characters().contains(value)) { + regex_string += "\\"; + regex_string += value; + } else { + regex_string += value; + } + } + return regex_string; +} +} // namespace log_surgeon::wildcard_query_parser diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp new file mode 100644 index 00000000..15395281 --- /dev/null +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -0,0 +1,71 @@ +#ifndef LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP +#define LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP + +#include +#include +#include +#include + +#include + +namespace log_surgeon::wildcard_query_parser { +/** + * A lightweight, non-owning view into a contiguous subrange of a WildcardExpression. + * + * This class provides a span to the underlying character vector and a view into the corresponding + * search string. It ensures that these are always valid by clamping the provided indices to the + * expression's length. + * + * Utilities include: + * - Generating a regex string for the view. + * - Checking if the view starts or ends with a greedy wildcard. + * - Extending the view to include adjacent greedy wildcards. + */ +class WildcardExpressionView { +public: + /** + * Creates a view of the range [`begin_idx`, `end_idx`) in the given wildcard expression. + * + * NOTE: To ensure validity, `end_idx` is limited to `wildcard_expression.length()`, and then + * `begin_idx` is limited to `end_idx`. + * @param expression + * @param begin_idx + * @param end_idx + */ + WildcardExpressionView( + WildcardExpression const& expression, + size_t begin_idx, + size_t end_idx + ); + + /** + * @return A copy of this view, but extended to include adjacent greedy wildcards. + */ + [[nodiscard]] auto extend_to_adjacent_greedy_wildcards() const -> WildcardExpressionView; + + [[nodiscard]] auto starts_or_ends_with_greedy_wildcard() const -> bool { + return false == m_chars.empty() + && (m_chars[0].is_greedy_wildcard() || m_chars.back().is_greedy_wildcard()); + } + + [[nodiscard]] auto generate_regex_string() const -> std::string; + + [[nodiscard]] auto get_string() const -> std::string_view { + return m_search_string; + } + +private: + [[nodiscard]] auto get_indicies() const -> std::pair { + auto const& full_chars{m_expression->get_chars()}; + auto const begin_ptr{m_chars.data()}; + auto begin_idx{static_cast(begin_ptr - full_chars.data())}; + return {begin_idx, begin_idx + m_chars.size()}; + } + + WildcardExpression const* m_expression; + std::span m_chars; + std::string_view m_search_string; +}; +} // namespace log_surgeon::wildcard_query_parser + +#endif // LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP From 59432d469308ae6ef079e6d0378678fcff3d6773 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 12 Aug 2025 10:48:28 -0400 Subject: [PATCH 055/168] Format. --- .../wildcard_query_parser/WIldcardCharacter.hpp | 13 +++++++++---- .../wildcard_query_parser/WildcardExpression.cpp | 6 +++--- .../WildcardExpressionView.hpp | 16 +++++----------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WIldcardCharacter.hpp b/src/log_surgeon/wildcard_query_parser/WIldcardCharacter.hpp index 26bac742..6fcbe212 100644 --- a/src/log_surgeon/wildcard_query_parser/WIldcardCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/WIldcardCharacter.hpp @@ -4,7 +4,12 @@ #include namespace log_surgeon::wildcard_query_parser { -enum class CharType : uint8_t {Normal, GreedyWildcard, NonGreedyWildcard, Escape}; +enum class CharType : uint8_t { + Normal, + GreedyWildcard, + NonGreedyWildcard, + Escape +}; class WildcardCharacter { public: @@ -21,11 +26,11 @@ class WildcardCharacter { } [[nodiscard]] auto is_escape() const -> bool { return CharType::Escape == m_type; } - + private: char m_value; CharType m_type; }; -} // log_surgeon::wildcard_query_parser +} // namespace log_surgeon::wildcard_query_parser -#endif // LOG_SURGEON_QUERY_PARSER_WILDCARD_CHARACTER_HPP \ No newline at end of file +#endif // LOG_SURGEON_QUERY_PARSER_WILDCARD_CHARACTER_HPP diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp index 98cd3dc8..1b3fc787 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp @@ -15,11 +15,11 @@ WildcardExpression::WildcardExpression(std::string processed_search_string) type = CharType::GreedyWildcard; m_contains_wildcard = true; } else if ('?' == c) { - type = CharType::NonGreedyWildcard; + type = CharType::NonGreedyWildcard; m_contains_wildcard = true; } else if ('\\' == c) { - type = CharType::Escape; - } + type = CharType::Escape; + } } m_chars.emplace_back(c, type); } diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index 15395281..fae4f953 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -32,11 +32,7 @@ class WildcardExpressionView { * @param begin_idx * @param end_idx */ - WildcardExpressionView( - WildcardExpression const& expression, - size_t begin_idx, - size_t end_idx - ); + WildcardExpressionView(WildcardExpression const& expression, size_t begin_idx, size_t end_idx); /** * @return A copy of this view, but extended to include adjacent greedy wildcards. @@ -49,10 +45,8 @@ class WildcardExpressionView { } [[nodiscard]] auto generate_regex_string() const -> std::string; - - [[nodiscard]] auto get_string() const -> std::string_view { - return m_search_string; - } + + [[nodiscard]] auto get_string() const -> std::string_view { return m_search_string; } private: [[nodiscard]] auto get_indicies() const -> std::pair { @@ -61,9 +55,9 @@ class WildcardExpressionView { auto begin_idx{static_cast(begin_ptr - full_chars.data())}; return {begin_idx, begin_idx + m_chars.size()}; } - + WildcardExpression const* m_expression; - std::span m_chars; + std::span m_chars; std::string_view m_search_string; }; } // namespace log_surgeon::wildcard_query_parser From c5ea9a8088cbd0411bf95a58076eb98f6f223633 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 12 Aug 2025 10:51:22 -0400 Subject: [PATCH 056/168] Remove redundant docstring. --- .../wildcard_query_parser/WildcardExpressionView.hpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index fae4f953..fca27ebb 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -23,15 +23,6 @@ namespace log_surgeon::wildcard_query_parser { */ class WildcardExpressionView { public: - /** - * Creates a view of the range [`begin_idx`, `end_idx`) in the given wildcard expression. - * - * NOTE: To ensure validity, `end_idx` is limited to `wildcard_expression.length()`, and then - * `begin_idx` is limited to `end_idx`. - * @param expression - * @param begin_idx - * @param end_idx - */ WildcardExpressionView(WildcardExpression const& expression, size_t begin_idx, size_t end_idx); /** From be8d931e8288f0fbf9ed6af24f4be0e47e6f8cdd Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 12 Aug 2025 11:08:58 -0400 Subject: [PATCH 057/168] Add is_well_formed check. --- .../WildcardExpressionView.cpp | 17 +++++++++++++++++ .../WildcardExpressionView.hpp | 16 ++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index 1c5fdb3a..f109b9d9 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -38,6 +38,23 @@ auto WildcardExpressionView::extend_to_adjacent_greedy_wildcards() const -> Wild return {*m_expression, begin_idx, end_idx}; } +auto WildcardExpressionView::is_well_formed() const -> bool { + if (m_chars.empty()) { + // Empty substring is trivially well-formed as it has no characters to violate requirements. + return true; + } + auto const [begin_idx, end_idx]{get_indicies()}; + if (begin_idx > 0 && m_expression->get_chars()[begin_idx - 1].is_escape()) { + // Substring starting immediately after an escape char is invalid. + return false; + } + if (m_chars.back().is_escape()) { + // Substring ending on an escape char is invalid. + return false; + } + return true; +} + auto WildcardExpressionView::generate_regex_string() const -> string { string regex_string; for (auto const& wildcard_char : m_chars) { diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index fca27ebb..78ea96e9 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -35,6 +35,22 @@ class WildcardExpressionView { && (m_chars[0].is_greedy_wildcard() || m_chars.back().is_greedy_wildcard()); } + /** + * Checks whether the `WildcardExpressionView` is a well-formed subrange. + * + * A subrange is considered well-formed if: + * - It does not start immediately after an escaped character in the original expression. + * - It does not end on an escape character. + * + * This helps to avoid invalid substrings that are not consistent with the original intention + * of the WildcardExpression. For example take the search query "* \*text\* *": + * - The substring "*text" would incorrectly indicate a literal wildcard. + * - The substring "text\" would have no clear meaning. + * + * @return `true` if the substring is well-formed, `false` otherwise. + */ + [[nodiscard]] auto is_well_formed() const -> bool; + [[nodiscard]] auto generate_regex_string() const -> std::string; [[nodiscard]] auto get_string() const -> std::string_view { return m_search_string; } From 715479822a27349ba87909f36a801721d80f62f0 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Aug 2025 07:29:52 -0400 Subject: [PATCH 058/168] Fix type in WildcardCharacter.hpp file name and add it to CMakeLists.txt. --- CMakeLists.txt | 1 + .../{WIldcardCharacter.hpp => WildcardCharacter.hpp} | 0 src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp | 2 +- 3 files changed, 2 insertions(+), 1 deletion(-) rename src/log_surgeon/wildcard_query_parser/{WIldcardCharacter.hpp => WildcardCharacter.hpp} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index cfe1dd21..3f1cb5b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,7 @@ set(SOURCE_FILES src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp + src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp diff --git a/src/log_surgeon/wildcard_query_parser/WIldcardCharacter.hpp b/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp similarity index 100% rename from src/log_surgeon/wildcard_query_parser/WIldcardCharacter.hpp rename to src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp index 7c3751de..c8ce6c51 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp @@ -4,7 +4,7 @@ #include #include -#include +#include namespace log_surgeon::wildcard_query_parser { /** From 0bcd93b6713ff9e966bb0a2faa6e824a6ac605e5 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Aug 2025 07:59:22 -0400 Subject: [PATCH 059/168] Move contains_wildcard into the view generate_regex_string method; Add docstring for this method as well. --- .../wildcard_query_parser/WildcardExpression.cpp | 2 -- .../wildcard_query_parser/WildcardExpression.hpp | 1 - .../WildcardExpressionView.cpp | 9 +++++++-- .../WildcardExpressionView.hpp | 16 +++++++++++++++- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp index 1b3fc787..8bd004c3 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp @@ -13,10 +13,8 @@ WildcardExpression::WildcardExpression(std::string processed_search_string) if (m_chars.empty() || false == m_chars.back().is_escape()) { if ('*' == c) { type = CharType::GreedyWildcard; - m_contains_wildcard = true; } else if ('?' == c) { type = CharType::NonGreedyWildcard; - m_contains_wildcard = true; } else if ('\\' == c) { type = CharType::Escape; } diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp index c8ce6c51..3c477952 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp @@ -27,7 +27,6 @@ class WildcardExpression { } private: - bool m_contains_wildcard; std::vector m_chars; std::string m_processed_search_string; }; diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index f109b9d9..0f21ac8d 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -55,8 +56,10 @@ auto WildcardExpressionView::is_well_formed() const -> bool { return true; } -auto WildcardExpressionView::generate_regex_string() const -> string { +auto WildcardExpressionView::generate_regex_string() const -> std::pair { string regex_string; + bool regex_contains_wildcard{false}; + for (auto const& wildcard_char : m_chars) { if (wildcard_char.is_escape()) { continue; @@ -64,8 +67,10 @@ auto WildcardExpressionView::generate_regex_string() const -> string { auto const& value{wildcard_char.value()}; if (wildcard_char.is_greedy_wildcard()) { regex_string += ".*"; + regex_contains_wildcard = true; } else if (wildcard_char.is_non_greedy_wildcard()) { regex_string += "."; + regex_contains_wildcard = true; } else if (SchemaParser::get_special_regex_characters().contains(value)) { regex_string += "\\"; regex_string += value; @@ -73,6 +78,6 @@ auto WildcardExpressionView::generate_regex_string() const -> string { regex_string += value; } } - return regex_string; + return {regex_string, regex_contains_wildcard}; } } // namespace log_surgeon::wildcard_query_parser diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index 78ea96e9..60ab124b 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -51,7 +52,20 @@ class WildcardExpressionView { */ [[nodiscard]] auto is_well_formed() const -> bool; - [[nodiscard]] auto generate_regex_string() const -> std::string; + /** + * Builds a regex string representing this view. + * + * Converts: + * - Greedy wildcards (`*`) -> `.*`. + * - Non-greedy wildcards (`?`) -> `.`. + * - Escaped wildcards (`\*`, `\?`) -> literal (`*`, `?`). + * - Regex special characters (e.g., `.`) -> escaped literal (e.g., `\.`). + * + * @return a pair containing: + * - `std::string` storing the regex string. + * - `bool` indicating whether the regex string contains any wildcards. + */ + [[nodiscard]] auto generate_regex_string() const -> std::pair; [[nodiscard]] auto get_string() const -> std::string_view { return m_search_string; } From db937eb44b786d37728da2999b85953204d60147 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Aug 2025 08:00:31 -0400 Subject: [PATCH 060/168] Add missing include. --- src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index 0f21ac8d..bebdfa9c 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include From 12308df617940588fac11ed01195b6f24318eaa4 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Aug 2025 08:00:46 -0400 Subject: [PATCH 061/168] Fix typo in add missing include. --- .../wildcard_query_parser/WildcardExpressionView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index bebdfa9c..35de4321 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include From 56460c964f74b426d9c935cbeecea1121bdfb3be Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Aug 2025 08:01:52 -0400 Subject: [PATCH 062/168] Fix spelling. --- .../wildcard_query_parser/WildcardExpressionView.cpp | 4 ++-- .../wildcard_query_parser/WildcardExpressionView.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index 35de4321..d9edbfef 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -27,7 +27,7 @@ WildcardExpressionView::WildcardExpressionView( } auto WildcardExpressionView::extend_to_adjacent_greedy_wildcards() const -> WildcardExpressionView { - auto [begin_idx, end_idx]{get_indicies()}; + auto [begin_idx, end_idx]{get_indices()}; std::span const full_span{m_expression->get_chars()}; @@ -45,7 +45,7 @@ auto WildcardExpressionView::is_well_formed() const -> bool { // Empty substring is trivially well-formed as it has no characters to violate requirements. return true; } - auto const [begin_idx, end_idx]{get_indicies()}; + auto const [begin_idx, end_idx]{get_indices()}; if (begin_idx > 0 && m_expression->get_chars()[begin_idx - 1].is_escape()) { // Substring starting immediately after an escape char is invalid. return false; diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index 60ab124b..80e118a9 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -70,7 +70,7 @@ class WildcardExpressionView { [[nodiscard]] auto get_string() const -> std::string_view { return m_search_string; } private: - [[nodiscard]] auto get_indicies() const -> std::pair { + [[nodiscard]] auto get_indices() const -> std::pair { auto const& full_chars{m_expression->get_chars()}; auto const begin_ptr{m_chars.data()}; auto begin_idx{static_cast(begin_ptr - full_chars.data())}; From b1838d49938b375a391a6202cdc221b3654306c0 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Aug 2025 08:12:22 -0400 Subject: [PATCH 063/168] Reserve regex string size. --- src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index d9edbfef..4e063a1d 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -59,6 +59,7 @@ auto WildcardExpressionView::is_well_formed() const -> bool { auto WildcardExpressionView::generate_regex_string() const -> std::pair { string regex_string; + regex_string.reserve(m_chars.size() * 2); bool regex_contains_wildcard{false}; for (auto const& wildcard_char : m_chars) { From 248f746bfc6687144d0cca37493b2407d4a5b565 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Aug 2025 08:18:56 -0400 Subject: [PATCH 064/168] Remove unused header; Remove reference to char. --- src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp index 8bd004c3..2030ae20 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp @@ -1,6 +1,5 @@ #include "WildcardExpression.hpp" -#include #include #include @@ -8,7 +7,7 @@ namespace log_surgeon::wildcard_query_parser { WildcardExpression::WildcardExpression(std::string processed_search_string) : m_processed_search_string(std::move(processed_search_string)) { m_chars.reserve(m_processed_search_string.size()); - for (auto const& c : m_processed_search_string) { + for (auto const c : m_processed_search_string) { auto type{CharType::Normal}; if (m_chars.empty() || false == m_chars.back().is_escape()) { if ('*' == c) { From 5bb3fa50af57a5343334fdda48e07dbd059df6bf Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 13 Aug 2025 08:25:39 -0400 Subject: [PATCH 065/168] Fix tidy warnings. --- src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp | 2 ++ .../wildcard_query_parser/WildcardExpressionView.cpp | 1 + .../wildcard_query_parser/WildcardExpressionView.hpp | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp index 2030ae20..891b1d42 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp @@ -3,6 +3,8 @@ #include #include +#include + namespace log_surgeon::wildcard_query_parser { WildcardExpression::WildcardExpression(std::string processed_search_string) : m_processed_search_string(std::move(processed_search_string)) { diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index 4e063a1d..9b5422b5 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -8,6 +8,7 @@ #include #include +#include using std::string; diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index 80e118a9..ca0f4da5 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -7,6 +7,7 @@ #include #include +#include #include namespace log_surgeon::wildcard_query_parser { @@ -72,7 +73,7 @@ class WildcardExpressionView { private: [[nodiscard]] auto get_indices() const -> std::pair { auto const& full_chars{m_expression->get_chars()}; - auto const begin_ptr{m_chars.data()}; + auto const* begin_ptr{m_chars.data()}; auto begin_idx{static_cast(begin_ptr - full_chars.data())}; return {begin_idx, begin_idx + m_chars.size()}; } From ddd22da0e454f772f429398299d9cc98b5ccb19f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 10:00:36 -0400 Subject: [PATCH 066/168] Improve docstring. --- .../wildcard_query_parser/WildcardExpressionView.hpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index ca0f4da5..826800e2 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -56,11 +56,10 @@ class WildcardExpressionView { /** * Builds a regex string representing this view. * - * Converts: - * - Greedy wildcards (`*`) -> `.*`. - * - Non-greedy wildcards (`?`) -> `.`. - * - Escaped wildcards (`\*`, `\?`) -> literal (`*`, `?`). - * - Regex special characters (e.g., `.`) -> escaped literal (e.g., `\.`). + * Transformations: + * - Greedy wildcard (`*`) -> `.*`. + * - Non-greedy wildcard (`?`) -> `.`. + * - All other characters preserved literally, escaping (with `\`) as needed for regex syntax. * * @return a pair containing: * - `std::string` storing the regex string. From f2caabecb992d27fd80872c76dc1ea79ea04f499 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 10:11:47 -0400 Subject: [PATCH 067/168] Remove doc comments from cpp and move them into hpp docstring. --- .../WildcardExpressionView.cpp | 3 --- .../WildcardExpressionView.hpp | 14 ++++++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index 9b5422b5..1852aae8 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -43,16 +43,13 @@ auto WildcardExpressionView::extend_to_adjacent_greedy_wildcards() const -> Wild auto WildcardExpressionView::is_well_formed() const -> bool { if (m_chars.empty()) { - // Empty substring is trivially well-formed as it has no characters to violate requirements. return true; } auto const [begin_idx, end_idx]{get_indices()}; if (begin_idx > 0 && m_expression->get_chars()[begin_idx - 1].is_escape()) { - // Substring starting immediately after an escape char is invalid. return false; } if (m_chars.back().is_escape()) { - // Substring ending on an escape char is invalid. return false; } return true; diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index 826800e2..c89b5382 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -38,16 +38,18 @@ class WildcardExpressionView { } /** - * Checks whether the `WildcardExpressionView` is a well-formed subrange. + * Checks whether this `WildcardExpressionView` represents a well-formed subrange. * - * A subrange is considered well-formed if: + * A subrange is well-formed if: * - It does not start immediately after an escaped character in the original expression. * - It does not end on an escape character. * - * This helps to avoid invalid substrings that are not consistent with the original intention - * of the WildcardExpression. For example take the search query "* \*text\* *": - * - The substring "*text" would incorrectly indicate a literal wildcard. - * - The substring "text\" would have no clear meaning. + * By these rules, an empty substring is always well-formed. + * + * These constraints ensure well-formed substrings are consistent with the original intention of + * the WildcardExpression. For example, given the search query "* \*text\* *": + * - The substring "*text" is not well-formed, as it incorrectly indicates a literal wildcard. + * - The substring "text\" is not well-formed, as a single `\` has no clear meaning. * * @return `true` if the substring is well-formed, `false` otherwise. */ From c105a2908b1069940e6c8ced3a0af413fa1a2e55 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 10:14:11 -0400 Subject: [PATCH 068/168] Make CharType nested within WildcardCharacter. --- .../wildcard_query_parser/WildcardCharacter.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp b/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp index 6fcbe212..29bbfba9 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp @@ -4,13 +4,6 @@ #include namespace log_surgeon::wildcard_query_parser { -enum class CharType : uint8_t { - Normal, - GreedyWildcard, - NonGreedyWildcard, - Escape -}; - class WildcardCharacter { public: WildcardCharacter(char const value, CharType const type) : m_value{value}, m_type{type} {} @@ -28,6 +21,13 @@ class WildcardCharacter { [[nodiscard]] auto is_escape() const -> bool { return CharType::Escape == m_type; } private: + enum class CharType : uint8_t { + Normal, + GreedyWildcard, + NonGreedyWildcard, + Escape + }; + char m_value; CharType m_type; }; From 9e676793ddcc42af19c27e16f9ffd66482babdb5 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 10:18:39 -0400 Subject: [PATCH 069/168] Rename CharType to Type; Move Type to be public; Use WildcardCharacter::Type in WildcardExpression. --- .../WildcardCharacter.hpp | 24 +++++++++---------- .../WildcardExpression.cpp | 8 +++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp b/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp index 29bbfba9..df2aefe4 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp @@ -6,30 +6,30 @@ namespace log_surgeon::wildcard_query_parser { class WildcardCharacter { public: - WildcardCharacter(char const value, CharType const type) : m_value{value}, m_type{type} {} + enum class Type : uint8_t { + Normal, + GreedyWildcard, + NonGreedyWildcard, + Escape + }; + + WildcardCharacter(char const value, Type const type) : m_value{value}, m_type{type} {} [[nodiscard]] auto value() const -> char { return m_value; } [[nodiscard]] auto is_greedy_wildcard() const -> bool { - return CharType::GreedyWildcard == m_type; + return Type::GreedyWildcard == m_type; } [[nodiscard]] auto is_non_greedy_wildcard() const -> bool { - return CharType::NonGreedyWildcard == m_type; + return Type::NonGreedyWildcard == m_type; } - [[nodiscard]] auto is_escape() const -> bool { return CharType::Escape == m_type; } + [[nodiscard]] auto is_escape() const -> bool { return Type::Escape == m_type; } private: - enum class CharType : uint8_t { - Normal, - GreedyWildcard, - NonGreedyWildcard, - Escape - }; - char m_value; - CharType m_type; + Type m_type; }; } // namespace log_surgeon::wildcard_query_parser diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp index 891b1d42..f86213da 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp @@ -10,14 +10,14 @@ WildcardExpression::WildcardExpression(std::string processed_search_string) : m_processed_search_string(std::move(processed_search_string)) { m_chars.reserve(m_processed_search_string.size()); for (auto const c : m_processed_search_string) { - auto type{CharType::Normal}; + auto type{WildcardCharacter::Type::Normal}; if (m_chars.empty() || false == m_chars.back().is_escape()) { if ('*' == c) { - type = CharType::GreedyWildcard; + type = WildcardCharacter::Type::GreedyWildcard; } else if ('?' == c) { - type = CharType::NonGreedyWildcard; + type = WildcardCharacter::Type::NonGreedyWildcard; } else if ('\\' == c) { - type = CharType::Escape; + type = WildcardCharacter::Type::Escape; } } m_chars.emplace_back(c, type); From 7a91cf62e70a74a4e430b41c722f21a201eb9fb8 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 10:22:27 -0400 Subject: [PATCH 070/168] Rename m_processed_search_string to m_search_string. --- .../wildcard_query_parser/WildcardExpression.cpp | 8 ++++---- .../wildcard_query_parser/WildcardExpression.hpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp index f86213da..2ddd4bd8 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp @@ -6,10 +6,10 @@ #include namespace log_surgeon::wildcard_query_parser { -WildcardExpression::WildcardExpression(std::string processed_search_string) - : m_processed_search_string(std::move(processed_search_string)) { - m_chars.reserve(m_processed_search_string.size()); - for (auto const c : m_processed_search_string) { +WildcardExpression::WildcardExpression(std::string search_string) + : m_search_string(std::move(search_string)) { + m_chars.reserve(m_search_string.size()); + for (auto const c : m_search_string) { auto type{WildcardCharacter::Type::Normal}; if (m_chars.empty() || false == m_chars.back().is_escape()) { if ('*' == c) { diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp index 3c477952..250ae57e 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp @@ -16,19 +16,19 @@ namespace log_surgeon::wildcard_query_parser { */ class WildcardExpression { public: - explicit WildcardExpression(std::string processed_search_string); + explicit WildcardExpression(std::string search_string); [[nodiscard]] auto get_chars() const -> std::vector const& { return m_chars; } [[nodiscard]] auto get_string() const -> std::string const& { - return m_processed_search_string; + return m_search_string; } private: std::vector m_chars; - std::string m_processed_search_string; + std::string m_search_string; }; } // namespace log_surgeon::wildcard_query_parser From 2d0fdd9d37bfd3938371b2178afd3c4e2693425f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 10:25:25 -0400 Subject: [PATCH 071/168] Rename get_string to get_search_string. --- src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp | 2 +- .../wildcard_query_parser/WildcardExpressionView.cpp | 2 +- .../wildcard_query_parser/WildcardExpressionView.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp index 250ae57e..f9401ab0 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp @@ -22,7 +22,7 @@ class WildcardExpression { return m_chars; } - [[nodiscard]] auto get_string() const -> std::string const& { + [[nodiscard]] auto get_search_string() const -> std::string const& { return m_search_string; } diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index 1852aae8..38ced6a1 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -23,7 +23,7 @@ WildcardExpressionView::WildcardExpressionView( end_idx = std::min(end_idx, full_span.size()); begin_idx = std::min(begin_idx, end_idx); m_chars = full_span.subspan(begin_idx, end_idx - begin_idx); - std::string_view const full_view{m_expression->get_string()}; + std::string_view const full_view{m_expression->get_search_string()}; m_search_string = full_view.substr(begin_idx, end_idx - begin_idx); } diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index c89b5382..0ee5306b 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -69,7 +69,7 @@ class WildcardExpressionView { */ [[nodiscard]] auto generate_regex_string() const -> std::pair; - [[nodiscard]] auto get_string() const -> std::string_view { return m_search_string; } + [[nodiscard]] auto get_search_string() const -> std::string_view { return m_search_string; } private: [[nodiscard]] auto get_indices() const -> std::pair { From e20cf92c69a5eb60649693afa52480930014921a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 10:26:23 -0400 Subject: [PATCH 072/168] Format. --- src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp | 4 +--- src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp b/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp index df2aefe4..6f29272e 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp @@ -17,9 +17,7 @@ class WildcardCharacter { [[nodiscard]] auto value() const -> char { return m_value; } - [[nodiscard]] auto is_greedy_wildcard() const -> bool { - return Type::GreedyWildcard == m_type; - } + [[nodiscard]] auto is_greedy_wildcard() const -> bool { return Type::GreedyWildcard == m_type; } [[nodiscard]] auto is_non_greedy_wildcard() const -> bool { return Type::NonGreedyWildcard == m_type; diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp index f9401ab0..c9ab98eb 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp @@ -22,9 +22,7 @@ class WildcardExpression { return m_chars; } - [[nodiscard]] auto get_search_string() const -> std::string const& { - return m_search_string; - } + [[nodiscard]] auto get_search_string() const -> std::string const& { return m_search_string; } private: std::vector m_chars; From 7104a624d310afd02a46c9a4e1550849aa00d5da Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 10:46:46 -0400 Subject: [PATCH 073/168] Add WildcardCharacter unit-tests. --- docs/doxygen/mainpage.dox | 1 + tests/CMakeLists.txt | 1 + tests/test-wildcard-character.cpp | 61 +++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 tests/test-wildcard-character.cpp diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox index 3a1cf2c4..7dbe04b0 100644 --- a/docs/doxygen/mainpage.dox +++ b/docs/doxygen/mainpage.dox @@ -20,4 +20,5 @@ * - @ref unit_tests_schema "Schema" * - @ref unit_tests_static_query_token "Static Query Token" * - @ref unit_tests_variable_query_token "Variable Query Token" + * - @ref unit_tests_wildcard_character "Wildcard Character" */ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c158e866..36d6aae0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -14,6 +14,7 @@ target_sources( test-schema.cpp test-static-query-token.cpp test-variable-query-token.cpp + test-wildcard-character.cpp ) target_link_libraries( diff --git a/tests/test-wildcard-character.cpp b/tests/test-wildcard-character.cpp new file mode 100644 index 00000000..7a2fc526 --- /dev/null +++ b/tests/test-wildcard-character.cpp @@ -0,0 +1,61 @@ +#include + +#include + + +/** + * @defgroup unit_tests_wildcard_character `WildcardCharacter` unit tests. + * @brief Unit tests for `WildcardCharacter` to verify storage and type predicate methods. + + * These unit tests contain the `WildcardCharacter` tag. + */ + +using log_surgeon::wildcard_query_parser::WildcardCharacter; + +/** + * @ingroup unit_tests_wildcard_character + * @brief Tests a `WildcardCharacter` that stores a normal character. + */ +TEST_CASE("normal", "[WildcardCharacter]") { + WildcardCharacter const wildcard_character{'a', WildcardCharacter::Type::Normal}; + REQUIRE('a' == wildcard_character.value()); + REQUIRE_FALSE(wildcard_character.is_greedy_wildcard()); + REQUIRE_FALSE(wildcard_character.is_non_greedy_wildcard()); + REQUIRE_FALSE(wildcard_character.is_escape()); +} + +/** + * @ingroup unit_tests_wildcard_character + * @brief Tests a `WildcardCharacter` that stores a greedy wildcard. + */ +TEST_CASE("greedy_wildcard", "[WildcardCharacter]") { + WildcardCharacter const wildcard_character{'*', WildcardCharacter::Type::GreedyWildcard}; + REQUIRE('*' == wildcard_character.value()); + REQUIRE(wildcard_character.is_greedy_wildcard()); + REQUIRE_FALSE(wildcard_character.is_non_greedy_wildcard()); + REQUIRE_FALSE(wildcard_character.is_escape()); +} + +/** + * @ingroup unit_tests_wildcard_character + * @brief Tests a `WildcardCharacter` that stores a non-greedy wildcard. + */ +TEST_CASE("non_greedy_wildcard", "[WildcardCharacter]") { + WildcardCharacter const wildcard_character{'?', WildcardCharacter::Type::NonGreedyWildcard}; + REQUIRE('?' == wildcard_character.value()); + REQUIRE_FALSE(wildcard_character.is_greedy_wildcard()); + REQUIRE(wildcard_character.is_non_greedy_wildcard()); + REQUIRE_FALSE(wildcard_character.is_escape()); +} + +/** + * @ingroup unit_tests_wildcard_character + * @brief Tests a `WildcardCharacter` that stores an escape. + */ +TEST_CASE("escape", "[WildcardCharacter]") { + WildcardCharacter const wildcard_character{'\\', WildcardCharacter::Type::Escape}; + REQUIRE('\\' == wildcard_character.value()); + REQUIRE_FALSE(wildcard_character.is_greedy_wildcard()); + REQUIRE_FALSE(wildcard_character.is_non_greedy_wildcard()); + REQUIRE(wildcard_character.is_escape()); +} From 0e4b768ffede936e3ad2d3786de427704fb53125 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 10:47:47 -0400 Subject: [PATCH 074/168] Format. --- tests/test-wildcard-character.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/test-wildcard-character.cpp b/tests/test-wildcard-character.cpp index 7a2fc526..4f630dff 100644 --- a/tests/test-wildcard-character.cpp +++ b/tests/test-wildcard-character.cpp @@ -2,7 +2,6 @@ #include - /** * @defgroup unit_tests_wildcard_character `WildcardCharacter` unit tests. * @brief Unit tests for `WildcardCharacter` to verify storage and type predicate methods. @@ -17,11 +16,11 @@ using log_surgeon::wildcard_query_parser::WildcardCharacter; * @brief Tests a `WildcardCharacter` that stores a normal character. */ TEST_CASE("normal", "[WildcardCharacter]") { - WildcardCharacter const wildcard_character{'a', WildcardCharacter::Type::Normal}; - REQUIRE('a' == wildcard_character.value()); - REQUIRE_FALSE(wildcard_character.is_greedy_wildcard()); - REQUIRE_FALSE(wildcard_character.is_non_greedy_wildcard()); - REQUIRE_FALSE(wildcard_character.is_escape()); + WildcardCharacter const wildcard_character{'a', WildcardCharacter::Type::Normal}; + REQUIRE('a' == wildcard_character.value()); + REQUIRE_FALSE(wildcard_character.is_greedy_wildcard()); + REQUIRE_FALSE(wildcard_character.is_non_greedy_wildcard()); + REQUIRE_FALSE(wildcard_character.is_escape()); } /** From 9adf7e9501d88d23f0e508a0647ad3c09cbaeaaa Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 11:01:56 -0400 Subject: [PATCH 075/168] Improve naming of wildcard character test cases. --- docs/doxygen/mainpage.dox | 1 + tests/CMakeLists.txt | 1 + tests/test-wildcard-character.cpp | 8 ++++---- tests/test-wildcard-expression.cpp | 18 ++++++++++++++++++ 4 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 tests/test-wildcard-expression.cpp diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox index 7dbe04b0..50b58515 100644 --- a/docs/doxygen/mainpage.dox +++ b/docs/doxygen/mainpage.dox @@ -21,4 +21,5 @@ * - @ref unit_tests_static_query_token "Static Query Token" * - @ref unit_tests_variable_query_token "Variable Query Token" * - @ref unit_tests_wildcard_character "Wildcard Character" + * - @ref unit_tests_wildcard_expression "Wildcard Expression" */ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 36d6aae0..b7c5fc63 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -15,6 +15,7 @@ target_sources( test-static-query-token.cpp test-variable-query-token.cpp test-wildcard-character.cpp + test-wildcard-expression.cpp ) target_link_libraries( diff --git a/tests/test-wildcard-character.cpp b/tests/test-wildcard-character.cpp index 4f630dff..9a8f54ac 100644 --- a/tests/test-wildcard-character.cpp +++ b/tests/test-wildcard-character.cpp @@ -15,7 +15,7 @@ using log_surgeon::wildcard_query_parser::WildcardCharacter; * @ingroup unit_tests_wildcard_character * @brief Tests a `WildcardCharacter` that stores a normal character. */ -TEST_CASE("normal", "[WildcardCharacter]") { +TEST_CASE("normal_expression_character", "[WildcardCharacter]") { WildcardCharacter const wildcard_character{'a', WildcardCharacter::Type::Normal}; REQUIRE('a' == wildcard_character.value()); REQUIRE_FALSE(wildcard_character.is_greedy_wildcard()); @@ -27,7 +27,7 @@ TEST_CASE("normal", "[WildcardCharacter]") { * @ingroup unit_tests_wildcard_character * @brief Tests a `WildcardCharacter` that stores a greedy wildcard. */ -TEST_CASE("greedy_wildcard", "[WildcardCharacter]") { +TEST_CASE("greedy_wildcard_expression_character", "[WildcardCharacter]") { WildcardCharacter const wildcard_character{'*', WildcardCharacter::Type::GreedyWildcard}; REQUIRE('*' == wildcard_character.value()); REQUIRE(wildcard_character.is_greedy_wildcard()); @@ -39,7 +39,7 @@ TEST_CASE("greedy_wildcard", "[WildcardCharacter]") { * @ingroup unit_tests_wildcard_character * @brief Tests a `WildcardCharacter` that stores a non-greedy wildcard. */ -TEST_CASE("non_greedy_wildcard", "[WildcardCharacter]") { +TEST_CASE("non_greedy_wildcard_expression_character", "[WildcardCharacter]") { WildcardCharacter const wildcard_character{'?', WildcardCharacter::Type::NonGreedyWildcard}; REQUIRE('?' == wildcard_character.value()); REQUIRE_FALSE(wildcard_character.is_greedy_wildcard()); @@ -51,7 +51,7 @@ TEST_CASE("non_greedy_wildcard", "[WildcardCharacter]") { * @ingroup unit_tests_wildcard_character * @brief Tests a `WildcardCharacter` that stores an escape. */ -TEST_CASE("escape", "[WildcardCharacter]") { +TEST_CASE("escape_expression_character", "[WildcardCharacter]") { WildcardCharacter const wildcard_character{'\\', WildcardCharacter::Type::Escape}; REQUIRE('\\' == wildcard_character.value()); REQUIRE_FALSE(wildcard_character.is_greedy_wildcard()); diff --git a/tests/test-wildcard-expression.cpp b/tests/test-wildcard-expression.cpp new file mode 100644 index 00000000..35adea4c --- /dev/null +++ b/tests/test-wildcard-expression.cpp @@ -0,0 +1,18 @@ +#include + +#include + +/** + * @defgroup unit_tests_wildcard_expression `WildcardExpression` unit tests. + * @brief Unit tests for `WildcardExpression` to verify storage and type predicate methods. + + * These unit tests contain the `WildcardExpression` tag. + */ + +/** + * @ingroup unit_tests_wildcard_expression + * @brief Tests an empty `WildcardExpression`. + */ +TEST_CASE("empty", "[WildcardExpression]") { + +} \ No newline at end of file From 2a039a0f535bd2f9f40e75719de7f583279d2a76 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 11:22:19 -0400 Subject: [PATCH 076/168] Rename WildcardCharacter to ExpressionCharacter. --- CMakeLists.txt | 2 +- docs/doxygen/mainpage.dox | 2 +- ...dCharacter.hpp => ExpressionCharacter.hpp} | 10 ++-- .../WildcardExpression.cpp | 10 ++-- .../WildcardExpression.hpp | 6 +- .../WildcardExpressionView.cpp | 10 ++-- .../WildcardExpressionView.hpp | 4 +- tests/CMakeLists.txt | 2 +- tests/test-expression-character.cpp | 60 +++++++++++++++++++ tests/test-wildcard-character.cpp | 60 ------------------- 10 files changed, 83 insertions(+), 83 deletions(-) rename src/log_surgeon/wildcard_query_parser/{WildcardCharacter.hpp => ExpressionCharacter.hpp} (69%) create mode 100644 tests/test-expression-character.cpp delete mode 100644 tests/test-wildcard-character.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f1cb5b2..d73e9737 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,12 +84,12 @@ set(SOURCE_FILES src/log_surgeon/finite_automata/TagOperation.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.tpp + src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp - src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox index 50b58515..a46dedb4 100644 --- a/docs/doxygen/mainpage.dox +++ b/docs/doxygen/mainpage.dox @@ -12,6 +12,7 @@ * * - @ref unit_tests_capture "Capture" * - @ref unit_tests_dfa "DFA" + * - @ref unit_tests_expression_character "Expression Character" * - @ref unit_tests_nfa "NFA" * - @ref unit_tests_prefix_tree "Prefix tree" * - @ref unit_tests_query_interpretation "Query Interpretation" @@ -20,6 +21,5 @@ * - @ref unit_tests_schema "Schema" * - @ref unit_tests_static_query_token "Static Query Token" * - @ref unit_tests_variable_query_token "Variable Query Token" - * - @ref unit_tests_wildcard_character "Wildcard Character" * - @ref unit_tests_wildcard_expression "Wildcard Expression" */ diff --git a/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp similarity index 69% rename from src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp rename to src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp index 6f29272e..9d43dec4 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp @@ -1,10 +1,10 @@ -#ifndef LOG_SURGEON_QUERY_PARSER_WILDCARD_CHARACTER_HPP -#define LOG_SURGEON_QUERY_PARSER_WILDCARD_CHARACTER_HPP +#ifndef LOG_SURGEON_QUERY_PARSER_EXPRESSION_CHARACTER_HPP +#define LOG_SURGEON_QUERY_PARSER_EXPRESSION_CHARACTER_HPP #include namespace log_surgeon::wildcard_query_parser { -class WildcardCharacter { +class ExpressionCharacter { public: enum class Type : uint8_t { Normal, @@ -13,7 +13,7 @@ class WildcardCharacter { Escape }; - WildcardCharacter(char const value, Type const type) : m_value{value}, m_type{type} {} + ExpressionCharacter(char const value, Type const type) : m_value{value}, m_type{type} {} [[nodiscard]] auto value() const -> char { return m_value; } @@ -31,4 +31,4 @@ class WildcardCharacter { }; } // namespace log_surgeon::wildcard_query_parser -#endif // LOG_SURGEON_QUERY_PARSER_WILDCARD_CHARACTER_HPP +#endif // LOG_SURGEON_QUERY_PARSER_EXPRESSION_CHARACTER_HPP diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp index 2ddd4bd8..d32b6fc1 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp @@ -3,21 +3,21 @@ #include #include -#include +#include namespace log_surgeon::wildcard_query_parser { WildcardExpression::WildcardExpression(std::string search_string) : m_search_string(std::move(search_string)) { m_chars.reserve(m_search_string.size()); for (auto const c : m_search_string) { - auto type{WildcardCharacter::Type::Normal}; + auto type{ExpressionCharacter::Type::Normal}; if (m_chars.empty() || false == m_chars.back().is_escape()) { if ('*' == c) { - type = WildcardCharacter::Type::GreedyWildcard; + type = ExpressionCharacter::Type::GreedyWildcard; } else if ('?' == c) { - type = WildcardCharacter::Type::NonGreedyWildcard; + type = ExpressionCharacter::Type::NonGreedyWildcard; } else if ('\\' == c) { - type = WildcardCharacter::Type::Escape; + type = ExpressionCharacter::Type::Escape; } } m_chars.emplace_back(c, type); diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp index c9ab98eb..94cf96ba 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp @@ -4,7 +4,7 @@ #include #include -#include +#include namespace log_surgeon::wildcard_query_parser { /** @@ -18,14 +18,14 @@ class WildcardExpression { public: explicit WildcardExpression(std::string search_string); - [[nodiscard]] auto get_chars() const -> std::vector const& { + [[nodiscard]] auto get_chars() const -> std::vector const& { return m_chars; } [[nodiscard]] auto get_search_string() const -> std::string const& { return m_search_string; } private: - std::vector m_chars; + std::vector m_chars; std::string m_search_string; }; } // namespace log_surgeon::wildcard_query_parser diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index 38ced6a1..303901af 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -60,15 +60,15 @@ auto WildcardExpressionView::generate_regex_string() const -> std::pair #include -#include +#include #include namespace log_surgeon::wildcard_query_parser { @@ -80,7 +80,7 @@ class WildcardExpressionView { } WildcardExpression const* m_expression; - std::span m_chars; + std::span m_chars; std::string_view m_search_string; }; } // namespace log_surgeon::wildcard_query_parser diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b7c5fc63..c528be05 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,6 +6,7 @@ target_sources( test-buffer-parser.cpp test-capture.cpp test-dfa.cpp + test-expression-character.cpp test-nfa.cpp test-prefix-tree.cpp test-query-interpretation.cpp @@ -14,7 +15,6 @@ target_sources( test-schema.cpp test-static-query-token.cpp test-variable-query-token.cpp - test-wildcard-character.cpp test-wildcard-expression.cpp ) diff --git a/tests/test-expression-character.cpp b/tests/test-expression-character.cpp new file mode 100644 index 00000000..a1747fb5 --- /dev/null +++ b/tests/test-expression-character.cpp @@ -0,0 +1,60 @@ +#include + +#include + +/** + * @defgroup unit_tests_expression_character `ExpressionCharacter` unit tests. + * @brief Unit tests for `ExpressionCharacter` to verify storage and type predicate methods. + + * These unit tests contain the `ExpressionCharacter` tag. + */ + +using log_surgeon::wildcard_query_parser::ExpressionCharacter; + +/** + * @ingroup unit_tests_expression_character + * @brief Tests a `ExpressionCharacter` that stores a normal character. + */ +TEST_CASE("normal_expression_character", "[ExpressionCharacter]") { + ExpressionCharacter const expression_character{'a', ExpressionCharacter::Type::Normal}; + REQUIRE('a' == expression_character.value()); + REQUIRE_FALSE(expression_character.is_greedy_wildcard()); + REQUIRE_FALSE(expression_character.is_non_greedy_wildcard()); + REQUIRE_FALSE(expression_character.is_escape()); +} + +/** + * @ingroup unit_tests_expression_character + * @brief Tests a `ExpressionCharacter` that stores a greedy wildcard. + */ +TEST_CASE("greedy_wildcard_expression_character", "[ExpressionCharacter]") { + ExpressionCharacter const expression_character{'*', ExpressionCharacter::Type::GreedyWildcard}; + REQUIRE('*' == expression_character.value()); + REQUIRE(expression_character.is_greedy_wildcard()); + REQUIRE_FALSE(expression_character.is_non_greedy_wildcard()); + REQUIRE_FALSE(expression_character.is_escape()); +} + +/** + * @ingroup unit_tests_expression_character + * @brief Tests a `ExpressionCharacter` that stores a non-greedy wildcard. + */ +TEST_CASE("non_greedy_wildcard_expression_character", "[ExpressionCharacter]") { + ExpressionCharacter const expression_character{'?', ExpressionCharacter::Type::NonGreedyWildcard}; + REQUIRE('?' == expression_character.value()); + REQUIRE_FALSE(expression_character.is_greedy_wildcard()); + REQUIRE(expression_character.is_non_greedy_wildcard()); + REQUIRE_FALSE(expression_character.is_escape()); +} + +/** + * @ingroup unit_tests_expression_character + * @brief Tests a `ExpressionCharacter` that stores an escape. + */ +TEST_CASE("escape_expression_character", "[ExpressionCharacter]") { + ExpressionCharacter const expression_character{'\\', ExpressionCharacter::Type::Escape}; + REQUIRE('\\' == expression_character.value()); + REQUIRE_FALSE(expression_character.is_greedy_wildcard()); + REQUIRE_FALSE(expression_character.is_non_greedy_wildcard()); + REQUIRE(expression_character.is_escape()); +} diff --git a/tests/test-wildcard-character.cpp b/tests/test-wildcard-character.cpp deleted file mode 100644 index 9a8f54ac..00000000 --- a/tests/test-wildcard-character.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include - -#include - -/** - * @defgroup unit_tests_wildcard_character `WildcardCharacter` unit tests. - * @brief Unit tests for `WildcardCharacter` to verify storage and type predicate methods. - - * These unit tests contain the `WildcardCharacter` tag. - */ - -using log_surgeon::wildcard_query_parser::WildcardCharacter; - -/** - * @ingroup unit_tests_wildcard_character - * @brief Tests a `WildcardCharacter` that stores a normal character. - */ -TEST_CASE("normal_expression_character", "[WildcardCharacter]") { - WildcardCharacter const wildcard_character{'a', WildcardCharacter::Type::Normal}; - REQUIRE('a' == wildcard_character.value()); - REQUIRE_FALSE(wildcard_character.is_greedy_wildcard()); - REQUIRE_FALSE(wildcard_character.is_non_greedy_wildcard()); - REQUIRE_FALSE(wildcard_character.is_escape()); -} - -/** - * @ingroup unit_tests_wildcard_character - * @brief Tests a `WildcardCharacter` that stores a greedy wildcard. - */ -TEST_CASE("greedy_wildcard_expression_character", "[WildcardCharacter]") { - WildcardCharacter const wildcard_character{'*', WildcardCharacter::Type::GreedyWildcard}; - REQUIRE('*' == wildcard_character.value()); - REQUIRE(wildcard_character.is_greedy_wildcard()); - REQUIRE_FALSE(wildcard_character.is_non_greedy_wildcard()); - REQUIRE_FALSE(wildcard_character.is_escape()); -} - -/** - * @ingroup unit_tests_wildcard_character - * @brief Tests a `WildcardCharacter` that stores a non-greedy wildcard. - */ -TEST_CASE("non_greedy_wildcard_expression_character", "[WildcardCharacter]") { - WildcardCharacter const wildcard_character{'?', WildcardCharacter::Type::NonGreedyWildcard}; - REQUIRE('?' == wildcard_character.value()); - REQUIRE_FALSE(wildcard_character.is_greedy_wildcard()); - REQUIRE(wildcard_character.is_non_greedy_wildcard()); - REQUIRE_FALSE(wildcard_character.is_escape()); -} - -/** - * @ingroup unit_tests_wildcard_character - * @brief Tests a `WildcardCharacter` that stores an escape. - */ -TEST_CASE("escape_expression_character", "[WildcardCharacter]") { - WildcardCharacter const wildcard_character{'\\', WildcardCharacter::Type::Escape}; - REQUIRE('\\' == wildcard_character.value()); - REQUIRE_FALSE(wildcard_character.is_greedy_wildcard()); - REQUIRE_FALSE(wildcard_character.is_non_greedy_wildcard()); - REQUIRE(wildcard_character.is_escape()); -} From 109c1980024f2ff9bf2172a912dd63532b5ff04a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 11:52:58 -0400 Subject: [PATCH 077/168] Add unit-tests for WildcardExpression. --- tests/test-wildcard-expression.cpp | 96 +++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 3 deletions(-) diff --git a/tests/test-wildcard-expression.cpp b/tests/test-wildcard-expression.cpp index 35adea4c..99d784d1 100644 --- a/tests/test-wildcard-expression.cpp +++ b/tests/test-wildcard-expression.cpp @@ -1,3 +1,5 @@ +#include + #include #include @@ -9,10 +11,98 @@ * These unit tests contain the `WildcardExpression` tag. */ +using log_surgeon::wildcard_query_parser::WildcardExpression; +using std::string; + /** * @ingroup unit_tests_wildcard_expression * @brief Tests an empty `WildcardExpression`. */ -TEST_CASE("empty", "[WildcardExpression]") { - -} \ No newline at end of file +TEST_CASE("empty_wildcard_expression", "[WildcardExpression]") { + WildcardExpression const expression{""}; + REQUIRE(expression.get_search_string().empty()); + REQUIRE(expression.get_chars().empty()); +} + +/** + * @ingroup unit_tests_wildcard_expression + * @brief Tests a `WildcardExpression` with only normal characters. + */ +TEST_CASE("normal_character_wildcard_expression", "[WildcardExpression]") { + string const input{"abc"}; + + WildcardExpression const expression{input}; + REQUIRE(input == expression.get_search_string()); + + auto const& expression_chars{expression.get_chars()}; + REQUIRE(input.size() == expression_chars.size()); + + for (auto const& expression_char : expression_chars) { + REQUIRE_FALSE(expression_char.is_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_non_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_escape()); + } +} + +/** + * @ingroup unit_tests_wildcard_expression + * @brief Tests a `WildcardExpression` with mixed normal and wildcard characters. + */ +TEST_CASE("normal_and_wildcard_character_wildcard_expression", "[WildcardExpression]") { + string const input{"a*b?c"}; + + WildcardExpression const expression{input}; + REQUIRE(input == expression.get_search_string()); + + auto const& expression_chars{expression.get_chars()}; + REQUIRE(input.size() == expression_chars.size()); + + for (size_t i{0}; i < expression_chars.size(); i++) { + auto const& expression_char{expression_chars[i]}; + REQUIRE(input[i] == expression_char.value()); + if (0 == i || 2 == i || 4 == i) { + REQUIRE_FALSE(expression_char.is_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_non_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_escape()); + } else if (1 == i) { + REQUIRE(expression_char.is_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_non_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_escape()); + } else { + REQUIRE_FALSE(expression_char.is_greedy_wildcard()); + REQUIRE(expression_char.is_non_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_escape()); + } + } +} + +/** + * @ingroup unit_tests_wildcard_expression + * @brief Tests a `WildcardExpression` with mixed normal and escape characters. + */ +TEST_CASE("normal_and_escape_character_wildcard_expression", "[WildcardExpression]") { + string const input{R"(a\*b\?c\\)"}; + constexpr size_t cFirstEscapePos{1}; + constexpr size_t cSecondEscapePos{4}; + constexpr size_t cLastEscapePos{7}; + + WildcardExpression const expression{input}; + REQUIRE(input == expression.get_search_string()); + + auto const& expression_chars{expression.get_chars()}; + REQUIRE(input.size() == expression_chars.size()); + + for (size_t i{0}; i < expression_chars.size(); i++) { + auto const& expression_char{expression_chars[i]}; + REQUIRE(input[i] == expression_char.value()); + if (cFirstEscapePos == i || cSecondEscapePos == i || cLastEscapePos == i) { + REQUIRE_FALSE(expression_char.is_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_non_greedy_wildcard()); + REQUIRE(expression_char.is_escape()); + } else { + REQUIRE_FALSE(expression_char.is_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_non_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_escape()); + } + } +} From bf4ad21db7ce63c9c9aba330b6b4846436c0bda7 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 11:54:40 -0400 Subject: [PATCH 078/168] Format. --- tests/test-expression-character.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test-expression-character.cpp b/tests/test-expression-character.cpp index a1747fb5..d114b064 100644 --- a/tests/test-expression-character.cpp +++ b/tests/test-expression-character.cpp @@ -40,7 +40,10 @@ TEST_CASE("greedy_wildcard_expression_character", "[ExpressionCharacter]") { * @brief Tests a `ExpressionCharacter` that stores a non-greedy wildcard. */ TEST_CASE("non_greedy_wildcard_expression_character", "[ExpressionCharacter]") { - ExpressionCharacter const expression_character{'?', ExpressionCharacter::Type::NonGreedyWildcard}; + ExpressionCharacter const expression_character{ + '?', + ExpressionCharacter::Type::NonGreedyWildcard + }; REQUIRE('?' == expression_character.value()); REQUIRE_FALSE(expression_character.is_greedy_wildcard()); REQUIRE(expression_character.is_non_greedy_wildcard()); From 2f87ae10625a27f97f5fb4548e649a08c4d47fd9 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 19:31:46 -0400 Subject: [PATCH 079/168] Update extend_to_adjacent_wildcards method to run a success flag; Add first view unit-test. --- docs/doxygen/mainpage.dox | 1 + .../WildcardExpressionView.cpp | 8 +++-- .../WildcardExpressionView.hpp | 7 ++-- tests/CMakeLists.txt | 1 + tests/test-wildcard-expression-view.cpp | 35 +++++++++++++++++++ 5 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 tests/test-wildcard-expression-view.cpp diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox index a46dedb4..2c8803f2 100644 --- a/docs/doxygen/mainpage.dox +++ b/docs/doxygen/mainpage.dox @@ -22,4 +22,5 @@ * - @ref unit_tests_static_query_token "Static Query Token" * - @ref unit_tests_variable_query_token "Variable Query Token" * - @ref unit_tests_wildcard_expression "Wildcard Expression" + * - @ref unit_tests_wildcard_expression_view "Wildcard Expression View" */ diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index 303901af..9cfad589 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -27,18 +27,22 @@ WildcardExpressionView::WildcardExpressionView( m_search_string = full_view.substr(begin_idx, end_idx - begin_idx); } -auto WildcardExpressionView::extend_to_adjacent_greedy_wildcards() const -> WildcardExpressionView { +auto WildcardExpressionView::extend_to_adjacent_greedy_wildcards() const -> std::pair { auto [begin_idx, end_idx]{get_indices()}; + bool is_extended{false}; std::span const full_span{m_expression->get_chars()}; if (begin_idx > 0 && full_span[begin_idx - 1].is_greedy_wildcard()) { --begin_idx; + is_extended = true; } if (end_idx < full_span.size() && full_span[end_idx].is_greedy_wildcard()) { ++end_idx; + is_extended = true; } - return {*m_expression, begin_idx, end_idx}; + WildcardExpressionView wildcard_expression_view{*m_expression, begin_idx, end_idx}; + return {is_extended, wildcard_expression_view}; } auto WildcardExpressionView::is_well_formed() const -> bool { diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index 588439f1..47ba5044 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -28,9 +28,12 @@ class WildcardExpressionView { WildcardExpressionView(WildcardExpression const& expression, size_t begin_idx, size_t end_idx); /** - * @return A copy of this view, but extended to include adjacent greedy wildcards. + * Tries to extends the view to include adjacent greedy wildcards from the original expression. + * @return A pair containing: + * - True if there exists adjacent greedy wildcards in the original expression, false otherwise. + * - A copy of this view, with any greedy wildcard extensions that could be made. */ - [[nodiscard]] auto extend_to_adjacent_greedy_wildcards() const -> WildcardExpressionView; + [[nodiscard]] auto extend_to_adjacent_greedy_wildcards() const -> std::pair; [[nodiscard]] auto starts_or_ends_with_greedy_wildcard() const -> bool { return false == m_chars.empty() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c528be05..06dfac7f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -16,6 +16,7 @@ target_sources( test-static-query-token.cpp test-variable-query-token.cpp test-wildcard-expression.cpp + test-wildcard-expression-view.cpp ) target_link_libraries( diff --git a/tests/test-wildcard-expression-view.cpp b/tests/test-wildcard-expression-view.cpp new file mode 100644 index 00000000..255aef30 --- /dev/null +++ b/tests/test-wildcard-expression-view.cpp @@ -0,0 +1,35 @@ +#include +#include + +#include + +/** + * @defgroup unit_tests_wildcard_expression_view `WildcardExpressionView` unit tests. + * @brief Unit tests for `WildcardExpressionView` to ... . + + * These unit tests contain the `WildcardExpressionView` tag. + */ + +using log_surgeon::wildcard_query_parser::WildcardExpression; +using log_surgeon::wildcard_query_parser::WildcardExpressionView; + +/** + * @ingroup unit_tests_wildcard_expression_view + * @brief Tests an empty `WildcardExpressionView`. + */ +TEST_CASE("empty_wildcard_expression_view", "[WildcardExpressionView]") { + WildcardExpression const expression{""}; + WildcardExpressionView const view{expression, 0, 0}; + + REQUIRE(view.is_well_formed()); + REQUIRE(view.get_search_string().empty()); + REQUIRE_FALSE(view.starts_or_ends_with_greedy_wildcard()); + + auto const [regex_string, contains_wildcard]{view.generate_regex_string()}; + REQUIRE(regex_string.empty()); + REQUIRE_FALSE(contains_wildcard); + + auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; + REQUIRE_FALSE(is_extended); + REQUIRE(view.get_search_string() == extended_view.get_search_string()); +} From e631ec3b56e999dad3444930de957342b6752346 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 19:33:04 -0400 Subject: [PATCH 080/168] Improve consistency in expression unit-tests by checking values in normal char test. --- tests/test-wildcard-expression.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test-wildcard-expression.cpp b/tests/test-wildcard-expression.cpp index 99d784d1..95cc5410 100644 --- a/tests/test-wildcard-expression.cpp +++ b/tests/test-wildcard-expression.cpp @@ -37,7 +37,9 @@ TEST_CASE("normal_character_wildcard_expression", "[WildcardExpression]") { auto const& expression_chars{expression.get_chars()}; REQUIRE(input.size() == expression_chars.size()); - for (auto const& expression_char : expression_chars) { + for (size_t i{0}; i < expression_chars.size(); i++) { + auto const& expression_char{expression_chars[i]}; + REQUIRE(input[i] == expression_char.value()); REQUIRE_FALSE(expression_char.is_greedy_wildcard()); REQUIRE_FALSE(expression_char.is_non_greedy_wildcard()); REQUIRE_FALSE(expression_char.is_escape()); From 89b4bd4f30ab3b6f3df291b5e4f9bd68852c0232 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 19:38:54 -0400 Subject: [PATCH 081/168] Fix naming in header guards. --- .../wildcard_query_parser/ExpressionCharacter.hpp | 6 +++--- .../wildcard_query_parser/WildcardExpression.hpp | 6 +++--- .../wildcard_query_parser/WildcardExpressionView.hpp | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp index 9d43dec4..37f6e387 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp @@ -1,5 +1,5 @@ -#ifndef LOG_SURGEON_QUERY_PARSER_EXPRESSION_CHARACTER_HPP -#define LOG_SURGEON_QUERY_PARSER_EXPRESSION_CHARACTER_HPP +#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_CHARACTER_HPP +#define LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_CHARACTER_HPP #include @@ -31,4 +31,4 @@ class ExpressionCharacter { }; } // namespace log_surgeon::wildcard_query_parser -#endif // LOG_SURGEON_QUERY_PARSER_EXPRESSION_CHARACTER_HPP +#endif // LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_CHARACTER_HPP diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp index 94cf96ba..0c362509 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp @@ -1,5 +1,5 @@ -#ifndef LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_HPP -#define LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_HPP +#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_HPP +#define LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_HPP #include #include @@ -30,4 +30,4 @@ class WildcardExpression { }; } // namespace log_surgeon::wildcard_query_parser -#endif // LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_HPP +#endif // LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_HPP diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index 47ba5044..96e86db1 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -1,5 +1,5 @@ -#ifndef LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP -#define LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP +#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP +#define LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP #include #include @@ -88,4 +88,4 @@ class WildcardExpressionView { }; } // namespace log_surgeon::wildcard_query_parser -#endif // LOG_SURGEON_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP +#endif // LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP From aee3c62c3911d9b11e500b67b942666f2aab8eb4 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 20:01:41 -0400 Subject: [PATCH 082/168] Rename WildcardExpression to Expression. --- CMakeLists.txt | 4 +- docs/doxygen/mainpage.dox | 2 +- ...{WildcardExpression.cpp => Expression.cpp} | 4 +- ...{WildcardExpression.hpp => Expression.hpp} | 10 ++--- .../WildcardExpressionView.cpp | 4 +- .../WildcardExpressionView.hpp | 10 ++--- tests/CMakeLists.txt | 2 +- ...ard-expression.cpp => test-expression.cpp} | 42 +++++++++---------- tests/test-wildcard-expression-view.cpp | 6 +-- 9 files changed, 42 insertions(+), 42 deletions(-) rename src/log_surgeon/wildcard_query_parser/{WildcardExpression.cpp => Expression.cpp} (89%) rename src/log_surgeon/wildcard_query_parser/{WildcardExpression.hpp => Expression.hpp} (73%) rename tests/{test-wildcard-expression.cpp => test-expression.cpp} (68%) diff --git a/CMakeLists.txt b/CMakeLists.txt index d73e9737..b2ba4aa5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,14 +84,14 @@ set(SOURCE_FILES src/log_surgeon/finite_automata/TagOperation.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp src/log_surgeon/finite_automata/UnicodeIntervalTree.tpp + src/log_surgeon/wildcard_query_parser/Expression.cpp + src/log_surgeon/wildcard_query_parser/Expression.hpp src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp - src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp - src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp src/log_surgeon/Lalr1Parser.hpp diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox index 2c8803f2..a8e3c582 100644 --- a/docs/doxygen/mainpage.dox +++ b/docs/doxygen/mainpage.dox @@ -12,6 +12,7 @@ * * - @ref unit_tests_capture "Capture" * - @ref unit_tests_dfa "DFA" + * - @ref unit_tests_expression "Expression" * - @ref unit_tests_expression_character "Expression Character" * - @ref unit_tests_nfa "NFA" * - @ref unit_tests_prefix_tree "Prefix tree" @@ -21,6 +22,5 @@ * - @ref unit_tests_schema "Schema" * - @ref unit_tests_static_query_token "Static Query Token" * - @ref unit_tests_variable_query_token "Variable Query Token" - * - @ref unit_tests_wildcard_expression "Wildcard Expression" * - @ref unit_tests_wildcard_expression_view "Wildcard Expression View" */ diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp b/src/log_surgeon/wildcard_query_parser/Expression.cpp similarity index 89% rename from src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp rename to src/log_surgeon/wildcard_query_parser/Expression.cpp index d32b6fc1..65fad81d 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.cpp +++ b/src/log_surgeon/wildcard_query_parser/Expression.cpp @@ -1,4 +1,4 @@ -#include "WildcardExpression.hpp" +#include "Expression.hpp" #include #include @@ -6,7 +6,7 @@ #include namespace log_surgeon::wildcard_query_parser { -WildcardExpression::WildcardExpression(std::string search_string) +Expression::Expression(std::string search_string) : m_search_string(std::move(search_string)) { m_chars.reserve(m_search_string.size()); for (auto const c : m_search_string) { diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp b/src/log_surgeon/wildcard_query_parser/Expression.hpp similarity index 73% rename from src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp rename to src/log_surgeon/wildcard_query_parser/Expression.hpp index 0c362509..cac314fa 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpression.hpp +++ b/src/log_surgeon/wildcard_query_parser/Expression.hpp @@ -1,5 +1,5 @@ -#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_HPP -#define LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_HPP +#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_HPP +#define LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_HPP #include #include @@ -14,9 +14,9 @@ namespace log_surgeon::wildcard_query_parser { * * To match a literal '*' or '?', the expression should escape it with a backslash (`\`). */ -class WildcardExpression { +class Expression { public: - explicit WildcardExpression(std::string search_string); + explicit Expression(std::string search_string); [[nodiscard]] auto get_chars() const -> std::vector const& { return m_chars; @@ -30,4 +30,4 @@ class WildcardExpression { }; } // namespace log_surgeon::wildcard_query_parser -#endif // LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_HPP +#endif // LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_HPP diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp index 9cfad589..38018869 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp @@ -8,13 +8,13 @@ #include #include -#include +#include using std::string; namespace log_surgeon::wildcard_query_parser { WildcardExpressionView::WildcardExpressionView( - WildcardExpression const& expression, + Expression const& expression, size_t begin_idx, size_t end_idx ) diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp index 96e86db1..de4372a7 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp @@ -8,11 +8,11 @@ #include #include -#include +#include namespace log_surgeon::wildcard_query_parser { /** - * A lightweight, non-owning view into a contiguous subrange of a WildcardExpression. + * A lightweight, non-owning view into a contiguous subrange of an `Expression`. * * This class provides a span to the underlying character vector and a view into the corresponding * search string. It ensures that these are always valid by clamping the provided indices to the @@ -25,7 +25,7 @@ namespace log_surgeon::wildcard_query_parser { */ class WildcardExpressionView { public: - WildcardExpressionView(WildcardExpression const& expression, size_t begin_idx, size_t end_idx); + WildcardExpressionView(Expression const& expression, size_t begin_idx, size_t end_idx); /** * Tries to extends the view to include adjacent greedy wildcards from the original expression. @@ -50,7 +50,7 @@ class WildcardExpressionView { * By these rules, an empty substring is always well-formed. * * These constraints ensure well-formed substrings are consistent with the original intention of - * the WildcardExpression. For example, given the search query "* \*text\* *": + * the `Expression`. For example, given the search query "* \*text\* *": * - The substring "*text" is not well-formed, as it incorrectly indicates a literal wildcard. * - The substring "text\" is not well-formed, as a single `\` has no clear meaning. * @@ -82,7 +82,7 @@ class WildcardExpressionView { return {begin_idx, begin_idx + m_chars.size()}; } - WildcardExpression const* m_expression; + Expression const* m_expression; std::span m_chars; std::string_view m_search_string; }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 06dfac7f..28813d76 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,6 +6,7 @@ target_sources( test-buffer-parser.cpp test-capture.cpp test-dfa.cpp + test-expression.cpp test-expression-character.cpp test-nfa.cpp test-prefix-tree.cpp @@ -15,7 +16,6 @@ target_sources( test-schema.cpp test-static-query-token.cpp test-variable-query-token.cpp - test-wildcard-expression.cpp test-wildcard-expression-view.cpp ) diff --git a/tests/test-wildcard-expression.cpp b/tests/test-expression.cpp similarity index 68% rename from tests/test-wildcard-expression.cpp rename to tests/test-expression.cpp index 95cc5410..10b3e53d 100644 --- a/tests/test-wildcard-expression.cpp +++ b/tests/test-expression.cpp @@ -1,37 +1,37 @@ #include -#include +#include #include /** - * @defgroup unit_tests_wildcard_expression `WildcardExpression` unit tests. - * @brief Unit tests for `WildcardExpression` to verify storage and type predicate methods. + * @defgroup unit_tests_expression `Expression` unit tests. + * @brief Unit tests for `Expression` to verify storage and type predicate methods. - * These unit tests contain the `WildcardExpression` tag. + * These unit tests contain the `Expression` tag. */ -using log_surgeon::wildcard_query_parser::WildcardExpression; +using log_surgeon::wildcard_query_parser::Expression; using std::string; /** - * @ingroup unit_tests_wildcard_expression - * @brief Tests an empty `WildcardExpression`. + * @ingroup unit_tests_expression + * @brief Tests an empty `Expression`. */ -TEST_CASE("empty_wildcard_expression", "[WildcardExpression]") { - WildcardExpression const expression{""}; +TEST_CASE("empty_expression", "[Expression]") { + Expression const expression{""}; REQUIRE(expression.get_search_string().empty()); REQUIRE(expression.get_chars().empty()); } /** - * @ingroup unit_tests_wildcard_expression - * @brief Tests a `WildcardExpression` with only normal characters. + * @ingroup unit_tests_expression + * @brief Tests a `Expression` with only normal characters. */ -TEST_CASE("normal_character_wildcard_expression", "[WildcardExpression]") { +TEST_CASE("normal_character_expression", "[Expression]") { string const input{"abc"}; - WildcardExpression const expression{input}; + Expression const expression{input}; REQUIRE(input == expression.get_search_string()); auto const& expression_chars{expression.get_chars()}; @@ -47,13 +47,13 @@ TEST_CASE("normal_character_wildcard_expression", "[WildcardExpression]") { } /** - * @ingroup unit_tests_wildcard_expression - * @brief Tests a `WildcardExpression` with mixed normal and wildcard characters. + * @ingroup unit_tests_expression + * @brief Tests a `Expression` with mixed normal and wildcard characters. */ -TEST_CASE("normal_and_wildcard_character_wildcard_expression", "[WildcardExpression]") { +TEST_CASE("normal_and_wildcard_character_expression", "[Expression]") { string const input{"a*b?c"}; - WildcardExpression const expression{input}; + Expression const expression{input}; REQUIRE(input == expression.get_search_string()); auto const& expression_chars{expression.get_chars()}; @@ -79,16 +79,16 @@ TEST_CASE("normal_and_wildcard_character_wildcard_expression", "[WildcardExpress } /** - * @ingroup unit_tests_wildcard_expression - * @brief Tests a `WildcardExpression` with mixed normal and escape characters. + * @ingroup unit_tests_expression + * @brief Tests a `Expression` with mixed normal and escape characters. */ -TEST_CASE("normal_and_escape_character_wildcard_expression", "[WildcardExpression]") { +TEST_CASE("normal_and_escape_character_expression", "[Expression]") { string const input{R"(a\*b\?c\\)"}; constexpr size_t cFirstEscapePos{1}; constexpr size_t cSecondEscapePos{4}; constexpr size_t cLastEscapePos{7}; - WildcardExpression const expression{input}; + Expression const expression{input}; REQUIRE(input == expression.get_search_string()); auto const& expression_chars{expression.get_chars()}; diff --git a/tests/test-wildcard-expression-view.cpp b/tests/test-wildcard-expression-view.cpp index 255aef30..a6632b9d 100644 --- a/tests/test-wildcard-expression-view.cpp +++ b/tests/test-wildcard-expression-view.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -10,7 +10,7 @@ * These unit tests contain the `WildcardExpressionView` tag. */ -using log_surgeon::wildcard_query_parser::WildcardExpression; +using log_surgeon::wildcard_query_parser::Expression; using log_surgeon::wildcard_query_parser::WildcardExpressionView; /** @@ -18,7 +18,7 @@ using log_surgeon::wildcard_query_parser::WildcardExpressionView; * @brief Tests an empty `WildcardExpressionView`. */ TEST_CASE("empty_wildcard_expression_view", "[WildcardExpressionView]") { - WildcardExpression const expression{""}; + Expression const expression{""}; WildcardExpressionView const view{expression, 0, 0}; REQUIRE(view.is_well_formed()); From b8ee95bfea8a47d875b62608ef484b9cf69505f8 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 20:11:13 -0400 Subject: [PATCH 083/168] Rename WildcardExpressionView to ExpressionView. --- CMakeLists.txt | 4 ++-- docs/doxygen/mainpage.dox | 2 +- ...rdExpressionView.cpp => ExpressionView.cpp} | 12 ++++++------ ...rdExpressionView.hpp => ExpressionView.hpp} | 14 +++++++------- tests/CMakeLists.txt | 2 +- ...ssion-view.cpp => test-expression-view.cpp} | 18 +++++++++--------- 6 files changed, 26 insertions(+), 26 deletions(-) rename src/log_surgeon/wildcard_query_parser/{WildcardExpressionView.cpp => ExpressionView.cpp} (84%) rename src/log_surgeon/wildcard_query_parser/{WildcardExpressionView.hpp => ExpressionView.hpp} (87%) rename tests/{test-wildcard-expression-view.cpp => test-expression-view.cpp} (55%) diff --git a/CMakeLists.txt b/CMakeLists.txt index b2ba4aa5..5d883e85 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,13 +87,13 @@ set(SOURCE_FILES src/log_surgeon/wildcard_query_parser/Expression.cpp src/log_surgeon/wildcard_query_parser/Expression.hpp src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp + src/log_surgeon/wildcard_query_parser/ExpressionView.cpp + src/log_surgeon/wildcard_query_parser/ExpressionView.hpp src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp src/log_surgeon/wildcard_query_parser/VariableQueryToken.cpp src/log_surgeon/wildcard_query_parser/VariableQueryToken.hpp - src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp - src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp src/log_surgeon/Lalr1Parser.hpp src/log_surgeon/Lalr1Parser.tpp src/log_surgeon/Lexer.hpp diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox index a8e3c582..ab640c7f 100644 --- a/docs/doxygen/mainpage.dox +++ b/docs/doxygen/mainpage.dox @@ -14,6 +14,7 @@ * - @ref unit_tests_dfa "DFA" * - @ref unit_tests_expression "Expression" * - @ref unit_tests_expression_character "Expression Character" + * - @ref unit_tests_expression_view "Expression View" * - @ref unit_tests_nfa "NFA" * - @ref unit_tests_prefix_tree "Prefix tree" * - @ref unit_tests_query_interpretation "Query Interpretation" @@ -22,5 +23,4 @@ * - @ref unit_tests_schema "Schema" * - @ref unit_tests_static_query_token "Static Query Token" * - @ref unit_tests_variable_query_token "Variable Query Token" - * - @ref unit_tests_wildcard_expression_view "Wildcard Expression View" */ diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp similarity index 84% rename from src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp rename to src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index 38018869..1b04edce 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -1,4 +1,4 @@ -#include "WildcardExpressionView.hpp" +#include "ExpressionView.hpp" #include #include @@ -13,7 +13,7 @@ using std::string; namespace log_surgeon::wildcard_query_parser { -WildcardExpressionView::WildcardExpressionView( +ExpressionView::ExpressionView( Expression const& expression, size_t begin_idx, size_t end_idx @@ -27,7 +27,7 @@ WildcardExpressionView::WildcardExpressionView( m_search_string = full_view.substr(begin_idx, end_idx - begin_idx); } -auto WildcardExpressionView::extend_to_adjacent_greedy_wildcards() const -> std::pair { +auto ExpressionView::extend_to_adjacent_greedy_wildcards() const -> std::pair { auto [begin_idx, end_idx]{get_indices()}; bool is_extended{false}; @@ -41,11 +41,11 @@ auto WildcardExpressionView::extend_to_adjacent_greedy_wildcards() const -> std: ++end_idx; is_extended = true; } - WildcardExpressionView wildcard_expression_view{*m_expression, begin_idx, end_idx}; + ExpressionView wildcard_expression_view{*m_expression, begin_idx, end_idx}; return {is_extended, wildcard_expression_view}; } -auto WildcardExpressionView::is_well_formed() const -> bool { +auto ExpressionView::is_well_formed() const -> bool { if (m_chars.empty()) { return true; } @@ -59,7 +59,7 @@ auto WildcardExpressionView::is_well_formed() const -> bool { return true; } -auto WildcardExpressionView::generate_regex_string() const -> std::pair { +auto ExpressionView::generate_regex_string() const -> std::pair { string regex_string; regex_string.reserve(m_chars.size() * 2); bool regex_contains_wildcard{false}; diff --git a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp similarity index 87% rename from src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp rename to src/log_surgeon/wildcard_query_parser/ExpressionView.hpp index de4372a7..071586b5 100644 --- a/src/log_surgeon/wildcard_query_parser/WildcardExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp @@ -1,5 +1,5 @@ -#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP -#define LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP +#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_VIEW_HPP +#define LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_VIEW_HPP #include #include @@ -23,9 +23,9 @@ namespace log_surgeon::wildcard_query_parser { * - Checking if the view starts or ends with a greedy wildcard. * - Extending the view to include adjacent greedy wildcards. */ -class WildcardExpressionView { +class ExpressionView { public: - WildcardExpressionView(Expression const& expression, size_t begin_idx, size_t end_idx); + ExpressionView(Expression const& expression, size_t begin_idx, size_t end_idx); /** * Tries to extends the view to include adjacent greedy wildcards from the original expression. @@ -33,7 +33,7 @@ class WildcardExpressionView { * - True if there exists adjacent greedy wildcards in the original expression, false otherwise. * - A copy of this view, with any greedy wildcard extensions that could be made. */ - [[nodiscard]] auto extend_to_adjacent_greedy_wildcards() const -> std::pair; + [[nodiscard]] auto extend_to_adjacent_greedy_wildcards() const -> std::pair; [[nodiscard]] auto starts_or_ends_with_greedy_wildcard() const -> bool { return false == m_chars.empty() @@ -41,7 +41,7 @@ class WildcardExpressionView { } /** - * Checks whether this `WildcardExpressionView` represents a well-formed subrange. + * Checks whether this `ExpressionView` represents a well-formed subrange. * * A subrange is well-formed if: * - It does not start immediately after an escaped character in the original expression. @@ -88,4 +88,4 @@ class WildcardExpressionView { }; } // namespace log_surgeon::wildcard_query_parser -#endif // LOG_SURGEON_WILDCARD_QUERY_PARSER_WILDCARD_EXPRESSION_VIEW_HPP +#endif // LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_VIEW_HPP diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 28813d76..2838d8a4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,6 +8,7 @@ target_sources( test-dfa.cpp test-expression.cpp test-expression-character.cpp + test-expression-view.cpp test-nfa.cpp test-prefix-tree.cpp test-query-interpretation.cpp @@ -16,7 +17,6 @@ target_sources( test-schema.cpp test-static-query-token.cpp test-variable-query-token.cpp - test-wildcard-expression-view.cpp ) target_link_libraries( diff --git a/tests/test-wildcard-expression-view.cpp b/tests/test-expression-view.cpp similarity index 55% rename from tests/test-wildcard-expression-view.cpp rename to tests/test-expression-view.cpp index a6632b9d..f4bb9554 100644 --- a/tests/test-wildcard-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -1,25 +1,25 @@ #include -#include +#include #include /** - * @defgroup unit_tests_wildcard_expression_view `WildcardExpressionView` unit tests. - * @brief Unit tests for `WildcardExpressionView` to ... . + * @defgroup unit_tests_expression_view `ExpressionView` unit tests. + * @brief Unit tests for `ExpressionView` to ... . - * These unit tests contain the `WildcardExpressionView` tag. + * These unit tests contain the `ExpressionView` tag. */ using log_surgeon::wildcard_query_parser::Expression; -using log_surgeon::wildcard_query_parser::WildcardExpressionView; +using log_surgeon::wildcard_query_parser::ExpressionView; /** - * @ingroup unit_tests_wildcard_expression_view - * @brief Tests an empty `WildcardExpressionView`. + * @ingroup unit_tests_expression_view + * @brief Tests an empty `ExpressionView`. */ -TEST_CASE("empty_wildcard_expression_view", "[WildcardExpressionView]") { +TEST_CASE("empty_expression_view", "[ExpressionView]") { Expression const expression{""}; - WildcardExpressionView const view{expression, 0, 0}; + ExpressionView const view{expression, 0, 0}; REQUIRE(view.is_well_formed()); REQUIRE(view.get_search_string().empty()); From 66b92a795347b1cbb3c5d924a5e430b30155f32d Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 20:16:03 -0400 Subject: [PATCH 084/168] Format. --- src/log_surgeon/wildcard_query_parser/Expression.cpp | 3 +-- src/log_surgeon/wildcard_query_parser/ExpressionView.cpp | 9 +++------ src/log_surgeon/wildcard_query_parser/ExpressionView.hpp | 5 +++-- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Expression.cpp b/src/log_surgeon/wildcard_query_parser/Expression.cpp index 65fad81d..c3e9dd78 100644 --- a/src/log_surgeon/wildcard_query_parser/Expression.cpp +++ b/src/log_surgeon/wildcard_query_parser/Expression.cpp @@ -6,8 +6,7 @@ #include namespace log_surgeon::wildcard_query_parser { -Expression::Expression(std::string search_string) - : m_search_string(std::move(search_string)) { +Expression::Expression(std::string search_string) : m_search_string(std::move(search_string)) { m_chars.reserve(m_search_string.size()); for (auto const c : m_search_string) { auto type{ExpressionCharacter::Type::Normal}; diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index 1b04edce..0dc9d3ed 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -13,11 +13,7 @@ using std::string; namespace log_surgeon::wildcard_query_parser { -ExpressionView::ExpressionView( - Expression const& expression, - size_t begin_idx, - size_t end_idx -) +ExpressionView::ExpressionView(Expression const& expression, size_t begin_idx, size_t end_idx) : m_expression{&expression} { std::span const full_span{m_expression->get_chars()}; end_idx = std::min(end_idx, full_span.size()); @@ -27,7 +23,8 @@ ExpressionView::ExpressionView( m_search_string = full_view.substr(begin_idx, end_idx - begin_idx); } -auto ExpressionView::extend_to_adjacent_greedy_wildcards() const -> std::pair { +auto ExpressionView::extend_to_adjacent_greedy_wildcards() const + -> std::pair { auto [begin_idx, end_idx]{get_indices()}; bool is_extended{false}; diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp index 071586b5..d18bea9b 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp @@ -7,8 +7,8 @@ #include #include -#include #include +#include namespace log_surgeon::wildcard_query_parser { /** @@ -33,7 +33,8 @@ class ExpressionView { * - True if there exists adjacent greedy wildcards in the original expression, false otherwise. * - A copy of this view, with any greedy wildcard extensions that could be made. */ - [[nodiscard]] auto extend_to_adjacent_greedy_wildcards() const -> std::pair; + [[nodiscard]] auto extend_to_adjacent_greedy_wildcards() const + -> std::pair; [[nodiscard]] auto starts_or_ends_with_greedy_wildcard() const -> bool { return false == m_chars.empty() From 883b31ec1aa57ffb5c819ed6d2a725feeb0c366c Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 20:54:25 -0400 Subject: [PATCH 085/168] Most of view tests are added now. --- tests/test-expression-view.cpp | 109 +++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index f4bb9554..f4c5c08b 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -1,6 +1,9 @@ +#include + #include #include +#include #include /** @@ -12,6 +15,7 @@ using log_surgeon::wildcard_query_parser::Expression; using log_surgeon::wildcard_query_parser::ExpressionView; +using std::string; /** * @ingroup unit_tests_expression_view @@ -33,3 +37,108 @@ TEST_CASE("empty_expression_view", "[ExpressionView]") { REQUIRE_FALSE(is_extended); REQUIRE(view.get_search_string() == extended_view.get_search_string()); } + +/** + * @ingroup unit_tests_expression_view + * @brief Tests an `ExpressionView` that captures the entire `Expression`. + */ +TEST_CASE("full_expression_view", "[ExpressionView]") { + string const input{"abc"}; + + Expression const expression{input}; + ExpressionView const view{expression, 0, 3}; + + REQUIRE(view.is_well_formed()); + REQUIRE(input == view.get_search_string()); + REQUIRE_FALSE(view.starts_or_ends_with_greedy_wildcard()); + + auto const [regex_string, contains_wildcard]{view.generate_regex_string()}; + REQUIRE(input == regex_string); + REQUIRE_FALSE(contains_wildcard); + + auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; + REQUIRE_FALSE(is_extended); + REQUIRE(view.get_search_string() == extended_view.get_search_string()); +} + +/** + * @ingroup unit_tests_expression_view + * @brief Tests an `ExpressionView` that captures a subrange of `Expression` with wildcards. + */ +TEST_CASE("wildcard_subrange_expression_view", "[ExpressionView]") { + string const input{"a*b?c"}; + + size_t constexpr cBeginPos{1}; + size_t constexpr cEndPos{4}; + string const expected_search_string{"*b?"}; + string const expected_regex_string{".*b."}; + + Expression const expression{input}; + ExpressionView const view{expression, cBeginPos, cEndPos}; + + REQUIRE(view.is_well_formed()); + REQUIRE(expected_search_string == view.get_search_string()); + REQUIRE(view.starts_or_ends_with_greedy_wildcard()); + + auto const [regex_string, contains_wildcard]{view.generate_regex_string()}; + REQUIRE(expected_regex_string == regex_string); + REQUIRE(contains_wildcard); + + auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; + REQUIRE_FALSE(is_extended); + REQUIRE(view.get_search_string() == extended_view.get_search_string()); +} + +/** + * @ingroup unit_tests_expression_view + * @brief Tests an `ExpressionView` that captures a subrange of `Expression` with escaped literals. + */ +TEST_CASE("escape_subrange_expression_view", "[ExpressionView]") { + string const input{R"(a\*b\?c)"}; + + size_t constexpr cBeginPos{1}; + size_t constexpr cEndPos{6}; + string const expected_search_string{R"(\*b\?)"}; + string const expected_regex_string{R"(\*b\?)"}; + + Expression const expression{input}; + ExpressionView const view{expression, cBeginPos, cEndPos}; + + REQUIRE(view.is_well_formed()); + REQUIRE(expected_search_string == view.get_search_string()); + REQUIRE_FALSE(view.starts_or_ends_with_greedy_wildcard()); + + auto const [regex_string, contains_wildcard]{view.generate_regex_string()}; + REQUIRE(expected_regex_string == regex_string); + REQUIRE_FALSE(contains_wildcard); + + auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; + REQUIRE_FALSE(is_extended); + REQUIRE(view.get_search_string() == extended_view.get_search_string()); +} + +/** + * @ingroup unit_tests_expression_view + * @brief Tests `ExpressionView`s for well-formedness. + */ +TEST_CASE("well_formed_expression_view", "[ExpressionView]") { + string const input{R"(a\*b\?c)"}; + size_t escape_pos1{1}; + size_t escape_pos2{4}; + + Expression const expression{input}; + for (size_t start_pos{0}; start_pos < input.size(); ++start_pos) { + for (size_t end_pos{start_pos + 1}; end_pos <= input.size(); ++end_pos) { + ExpressionView const view{expression, start_pos, end_pos}; + CAPTURE(start_pos); + CAPTURE(end_pos); + if (start_pos == escape_pos1 + 1 || start_pos == escape_pos2 + 1) { + REQUIRE_FALSE(view.is_well_formed()); + } else if (end_pos == escape_pos1 + 1 || end_pos == escape_pos2 + 1) { + REQUIRE_FALSE(view.is_well_formed()); + } else { + REQUIRE(view.is_well_formed()); + } + } + } +} From 6fec901569a3017d420fce6e870c8500a408f952 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 20:54:52 -0400 Subject: [PATCH 086/168] Format. --- tests/test-expression-view.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index f4c5c08b..6649bc87 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -68,8 +68,8 @@ TEST_CASE("full_expression_view", "[ExpressionView]") { TEST_CASE("wildcard_subrange_expression_view", "[ExpressionView]") { string const input{"a*b?c"}; - size_t constexpr cBeginPos{1}; - size_t constexpr cEndPos{4}; + constexpr size_t cBeginPos{1}; + constexpr size_t cEndPos{4}; string const expected_search_string{"*b?"}; string const expected_regex_string{".*b."}; @@ -96,8 +96,8 @@ TEST_CASE("wildcard_subrange_expression_view", "[ExpressionView]") { TEST_CASE("escape_subrange_expression_view", "[ExpressionView]") { string const input{R"(a\*b\?c)"}; - size_t constexpr cBeginPos{1}; - size_t constexpr cEndPos{6}; + constexpr size_t cBeginPos{1}; + constexpr size_t cEndPos{6}; string const expected_search_string{R"(\*b\?)"}; string const expected_regex_string{R"(\*b\?)"}; From fe2ded3dc9570193ba850b4503f0e5f8d18aacb4 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 20:59:59 -0400 Subject: [PATCH 087/168] Tidy. --- src/log_surgeon/wildcard_query_parser/ExpressionView.cpp | 2 +- tests/test-expression-view.cpp | 9 +++++---- tests/test-expression.cpp | 1 + 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index 0dc9d3ed..15e5339d 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -38,7 +38,7 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const ++end_idx; is_extended = true; } - ExpressionView wildcard_expression_view{*m_expression, begin_idx, end_idx}; + ExpressionView const wildcard_expression_view{*m_expression, begin_idx, end_idx}; return {is_extended, wildcard_expression_view}; } diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index 6649bc87..5b8bb63c 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -123,8 +124,8 @@ TEST_CASE("escape_subrange_expression_view", "[ExpressionView]") { */ TEST_CASE("well_formed_expression_view", "[ExpressionView]") { string const input{R"(a\*b\?c)"}; - size_t escape_pos1{1}; - size_t escape_pos2{4}; + constexpr size_t cEscapePos1{1}; + constexpr size_t cEscapePos2{4}; Expression const expression{input}; for (size_t start_pos{0}; start_pos < input.size(); ++start_pos) { @@ -132,9 +133,9 @@ TEST_CASE("well_formed_expression_view", "[ExpressionView]") { ExpressionView const view{expression, start_pos, end_pos}; CAPTURE(start_pos); CAPTURE(end_pos); - if (start_pos == escape_pos1 + 1 || start_pos == escape_pos2 + 1) { + if (start_pos == cEscapePos1 + 1 || start_pos == cEscapePos2 + 1) { REQUIRE_FALSE(view.is_well_formed()); - } else if (end_pos == escape_pos1 + 1 || end_pos == escape_pos2 + 1) { + } else if (end_pos == cEscapePos1 + 1 || end_pos == cEscapePos2 + 1) { REQUIRE_FALSE(view.is_well_formed()); } else { REQUIRE(view.is_well_formed()); diff --git a/tests/test-expression.cpp b/tests/test-expression.cpp index 10b3e53d..63afd0a9 100644 --- a/tests/test-expression.cpp +++ b/tests/test-expression.cpp @@ -1,3 +1,4 @@ +#include #include #include From adacad79f3d3541e7e300c92e612a3ff928cf549 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Thu, 14 Aug 2025 21:02:05 -0400 Subject: [PATCH 088/168] Fix typo. --- src/log_surgeon/wildcard_query_parser/ExpressionView.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp index d18bea9b..cf228643 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp @@ -28,7 +28,7 @@ class ExpressionView { ExpressionView(Expression const& expression, size_t begin_idx, size_t end_idx); /** - * Tries to extends the view to include adjacent greedy wildcards from the original expression. + * Tries to extend the view to include adjacent greedy wildcards from the original expression. * @return A pair containing: * - True if there exists adjacent greedy wildcards in the original expression, false otherwise. * - A copy of this view, with any greedy wildcard extensions that could be made. From 91e7049f13728d217c78d6f967e50737e6ef081f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 06:47:09 -0400 Subject: [PATCH 089/168] Grammar. --- tests/test-expression-character.cpp | 8 ++++---- tests/test-expression.cpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test-expression-character.cpp b/tests/test-expression-character.cpp index d114b064..303cb607 100644 --- a/tests/test-expression-character.cpp +++ b/tests/test-expression-character.cpp @@ -13,7 +13,7 @@ using log_surgeon::wildcard_query_parser::ExpressionCharacter; /** * @ingroup unit_tests_expression_character - * @brief Tests a `ExpressionCharacter` that stores a normal character. + * @brief Tests an `ExpressionCharacter` that stores a normal character. */ TEST_CASE("normal_expression_character", "[ExpressionCharacter]") { ExpressionCharacter const expression_character{'a', ExpressionCharacter::Type::Normal}; @@ -25,7 +25,7 @@ TEST_CASE("normal_expression_character", "[ExpressionCharacter]") { /** * @ingroup unit_tests_expression_character - * @brief Tests a `ExpressionCharacter` that stores a greedy wildcard. + * @brief Tests an `ExpressionCharacter` that stores a greedy wildcard. */ TEST_CASE("greedy_wildcard_expression_character", "[ExpressionCharacter]") { ExpressionCharacter const expression_character{'*', ExpressionCharacter::Type::GreedyWildcard}; @@ -37,7 +37,7 @@ TEST_CASE("greedy_wildcard_expression_character", "[ExpressionCharacter]") { /** * @ingroup unit_tests_expression_character - * @brief Tests a `ExpressionCharacter` that stores a non-greedy wildcard. + * @brief Tests an `ExpressionCharacter` that stores a non-greedy wildcard. */ TEST_CASE("non_greedy_wildcard_expression_character", "[ExpressionCharacter]") { ExpressionCharacter const expression_character{ @@ -52,7 +52,7 @@ TEST_CASE("non_greedy_wildcard_expression_character", "[ExpressionCharacter]") { /** * @ingroup unit_tests_expression_character - * @brief Tests a `ExpressionCharacter` that stores an escape. + * @brief Tests an `ExpressionCharacter` that stores an escape. */ TEST_CASE("escape_expression_character", "[ExpressionCharacter]") { ExpressionCharacter const expression_character{'\\', ExpressionCharacter::Type::Escape}; diff --git a/tests/test-expression.cpp b/tests/test-expression.cpp index 63afd0a9..31bfb169 100644 --- a/tests/test-expression.cpp +++ b/tests/test-expression.cpp @@ -27,7 +27,7 @@ TEST_CASE("empty_expression", "[Expression]") { /** * @ingroup unit_tests_expression - * @brief Tests a `Expression` with only normal characters. + * @brief Tests an `Expression` with only normal characters. */ TEST_CASE("normal_character_expression", "[Expression]") { string const input{"abc"}; @@ -49,7 +49,7 @@ TEST_CASE("normal_character_expression", "[Expression]") { /** * @ingroup unit_tests_expression - * @brief Tests a `Expression` with mixed normal and wildcard characters. + * @brief Tests an `Expression` with mixed normal and wildcard characters. */ TEST_CASE("normal_and_wildcard_character_expression", "[Expression]") { string const input{"a*b?c"}; @@ -81,7 +81,7 @@ TEST_CASE("normal_and_wildcard_character_expression", "[Expression]") { /** * @ingroup unit_tests_expression - * @brief Tests a `Expression` with mixed normal and escape characters. + * @brief Tests an `Expression` with mixed normal and escape characters. */ TEST_CASE("normal_and_escape_character_expression", "[Expression]") { string const input{R"(a\*b\?c\\)"}; From 34eefe095d926e00be8f1f90bdea5d183f965216 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 06:48:11 -0400 Subject: [PATCH 090/168] Remove magic number in test. --- tests/test-expression-view.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index 5b8bb63c..ffcdfa37 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -47,7 +47,7 @@ TEST_CASE("full_expression_view", "[ExpressionView]") { string const input{"abc"}; Expression const expression{input}; - ExpressionView const view{expression, 0, 3}; + ExpressionView const view{expression, 0, input.size()}; REQUIRE(view.is_well_formed()); REQUIRE(input == view.get_search_string()); From 1bdc456ba4ae9c4cccf2f864e012ea5a8facee2d Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 06:58:08 -0400 Subject: [PATCH 091/168] Improve clarity of expression unit-test. --- tests/test-expression.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-expression.cpp b/tests/test-expression.cpp index 31bfb169..fefc1d9b 100644 --- a/tests/test-expression.cpp +++ b/tests/test-expression.cpp @@ -63,11 +63,11 @@ TEST_CASE("normal_and_wildcard_character_expression", "[Expression]") { for (size_t i{0}; i < expression_chars.size(); i++) { auto const& expression_char{expression_chars[i]}; REQUIRE(input[i] == expression_char.value()); - if (0 == i || 2 == i || 4 == i) { + if ('*' == input[i]) { REQUIRE_FALSE(expression_char.is_greedy_wildcard()); REQUIRE_FALSE(expression_char.is_non_greedy_wildcard()); REQUIRE_FALSE(expression_char.is_escape()); - } else if (1 == i) { + } else if ('?' == input[i]) { REQUIRE(expression_char.is_greedy_wildcard()); REQUIRE_FALSE(expression_char.is_non_greedy_wildcard()); REQUIRE_FALSE(expression_char.is_escape()); From 04732740334942f2c648e4fda4efd99347eff7aa Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 07:08:29 -0400 Subject: [PATCH 092/168] Fix logic error from previous commit in expression unit-test. --- tests/test-expression.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test-expression.cpp b/tests/test-expression.cpp index fefc1d9b..2e4ba995 100644 --- a/tests/test-expression.cpp +++ b/tests/test-expression.cpp @@ -64,16 +64,16 @@ TEST_CASE("normal_and_wildcard_character_expression", "[Expression]") { auto const& expression_char{expression_chars[i]}; REQUIRE(input[i] == expression_char.value()); if ('*' == input[i]) { - REQUIRE_FALSE(expression_char.is_greedy_wildcard()); + REQUIRE(expression_char.is_greedy_wildcard()); REQUIRE_FALSE(expression_char.is_non_greedy_wildcard()); REQUIRE_FALSE(expression_char.is_escape()); } else if ('?' == input[i]) { - REQUIRE(expression_char.is_greedy_wildcard()); - REQUIRE_FALSE(expression_char.is_non_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_greedy_wildcard()); + REQUIRE(expression_char.is_non_greedy_wildcard()); REQUIRE_FALSE(expression_char.is_escape()); } else { REQUIRE_FALSE(expression_char.is_greedy_wildcard()); - REQUIRE(expression_char.is_non_greedy_wildcard()); + REQUIRE_FALSE(expression_char.is_non_greedy_wildcard()); REQUIRE_FALSE(expression_char.is_escape()); } } From 9fd521fc9c5da62dc302caa9b55d91323ce9480c Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 07:09:45 -0400 Subject: [PATCH 093/168] Add unit tests for a view that starts or ends with greedy wildcards. --- tests/test-expression-view.cpp | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index ffcdfa37..bf86d376 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -143,3 +143,37 @@ TEST_CASE("well_formed_expression_view", "[ExpressionView]") { } } } + +/** + * @ingroup unit_tests_expression_view + * @brief Tests `ExpressionView`s for flanking greedy wildcards. + */ +TEST_CASE("expression_view_starting_or_ending_with_greedy_wildcard", "[ExpressionView]") { + SECTION("starts_with_greedy_wildcard") { + string const input{"*abc"}; + Expression const expression{input}; + ExpressionView const view{expression, 0, input.size()}; + REQUIRE(view.starts_or_ends_with_greedy_wildcard()); + } + + SECTION("ends_with_greedy_wildcard") { + string const input{"abc*"}; + Expression const expression{input}; + ExpressionView const view{expression, 0, input.size()}; + REQUIRE(view.starts_or_ends_with_greedy_wildcard()); + } + + SECTION("starts_and_ends_with_greedy_wildcard") { + string const input{"*abc*"}; + Expression const expression{input}; + ExpressionView const view{expression, 0, input.size()}; + REQUIRE(view.starts_or_ends_with_greedy_wildcard()); + } + + SECTION("no_flanking_greedy_wildcard") { + string const input{"a*b"}; + Expression const expression{input}; + ExpressionView const view{expression, 0, input.size()}; + REQUIRE_FALSE(view.starts_or_ends_with_greedy_wildcard()); + } +} From aeb416ccdd05a5d8cc69bb8d202247f5f85c6d29 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 07:23:34 -0400 Subject: [PATCH 094/168] Add unit tests for extending a view. --- tests/test-expression-view.cpp | 50 ++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index bf86d376..7ecb5ca2 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -177,3 +177,53 @@ TEST_CASE("expression_view_starting_or_ending_with_greedy_wildcard", "[Expressio REQUIRE_FALSE(view.starts_or_ends_with_greedy_wildcard()); } } + +/** + * @ingroup unit_tests_expression_view + * @brief Tests extending `ExpressionView` to include adjacent greedy wildcards. + */ +TEST_CASE("extend_expression_view_to_adjacent_greedy_wildcards", "[ExpressionView]") { + SECTION("prefix_greedy_wildcard") { + string const input{"*abc?"}; + string const expected_extended_string{"*abc"}; + + Expression const expression{input}; + ExpressionView const view{expression, 1, input.size()-1}; + auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; + REQUIRE(is_extended); + REQUIRE(expected_extended_string == extended_view.get_search_string()); + } + + SECTION("suffix_greedy_wildcard") { + string const input{"?abc*"}; + string const expected_extended_string{"abc*"}; + + Expression const expression{input}; + ExpressionView const view{expression, 1, input.size()-1}; + auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; + REQUIRE(is_extended); + REQUIRE(expected_extended_string == extended_view.get_search_string()); + } + + SECTION("suffix_and_prefix_greedy_wildcard") { + string const input{"*a?c*"}; + string const expected_extended_string{"*a?c*"}; + + Expression const expression{input}; + ExpressionView const view{expression, 1, input.size()-1}; + auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; + REQUIRE(is_extended); + REQUIRE(expected_extended_string == extended_view.get_search_string()); + } + + SECTION("no_extension") { + string const input{"?a*c?"}; + string const expected_extended_string{"a*c"}; + + Expression const expression{input}; + ExpressionView const view{expression, 1, input.size()-1}; + auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; + REQUIRE_FALSE(is_extended); + REQUIRE(expected_extended_string == extended_view.get_search_string()); + } +} From 7868ba64cf42cec2c71cc7ecbff49bb9e10e6f7f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 07:31:12 -0400 Subject: [PATCH 095/168] Format. --- tests/test-expression-view.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index 7ecb5ca2..bc4e16b6 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -188,7 +188,7 @@ TEST_CASE("extend_expression_view_to_adjacent_greedy_wildcards", "[ExpressionVie string const expected_extended_string{"*abc"}; Expression const expression{input}; - ExpressionView const view{expression, 1, input.size()-1}; + ExpressionView const view{expression, 1, input.size() - 1}; auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; REQUIRE(is_extended); REQUIRE(expected_extended_string == extended_view.get_search_string()); @@ -199,7 +199,7 @@ TEST_CASE("extend_expression_view_to_adjacent_greedy_wildcards", "[ExpressionVie string const expected_extended_string{"abc*"}; Expression const expression{input}; - ExpressionView const view{expression, 1, input.size()-1}; + ExpressionView const view{expression, 1, input.size() - 1}; auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; REQUIRE(is_extended); REQUIRE(expected_extended_string == extended_view.get_search_string()); @@ -210,7 +210,7 @@ TEST_CASE("extend_expression_view_to_adjacent_greedy_wildcards", "[ExpressionVie string const expected_extended_string{"*a?c*"}; Expression const expression{input}; - ExpressionView const view{expression, 1, input.size()-1}; + ExpressionView const view{expression, 1, input.size() - 1}; auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; REQUIRE(is_extended); REQUIRE(expected_extended_string == extended_view.get_search_string()); @@ -221,7 +221,7 @@ TEST_CASE("extend_expression_view_to_adjacent_greedy_wildcards", "[ExpressionVie string const expected_extended_string{"a*c"}; Expression const expression{input}; - ExpressionView const view{expression, 1, input.size()-1}; + ExpressionView const view{expression, 1, input.size() - 1}; auto const [is_extended, extended_view]{view.extend_to_adjacent_greedy_wildcards()}; REQUIRE_FALSE(is_extended); REQUIRE(expected_extended_string == extended_view.get_search_string()); From 49ffec75017008df32570c8a5914626749da86a7 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 07:48:25 -0400 Subject: [PATCH 096/168] Add unit-tests to test snapping. --- tests/test-expression-view.cpp | 54 ++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index bc4e16b6..2637e773 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -118,6 +118,60 @@ TEST_CASE("escape_subrange_expression_view", "[ExpressionView]") { REQUIRE(view.get_search_string() == extended_view.get_search_string()); } +/** + * @ingroup unit_tests_expression_view + * @brief Tests bound snapping during `ExpressionView`construction. + * + * Negative casted values test wrap-around behavior. + */ +TEST_CASE("expression_view_bound_snapping", "[ExpressionView]") { + string const input{"abcdefg"}; + Expression const expression{input}; + auto constexpr cNegativeValue{-5}; + auto constexpr cLargeValue{1000}; + auto constexpr cMiddlePos{4}; + + SECTION("start_after_end") { + ExpressionView const view{expression, cMiddlePos, cMiddlePos - 1}; + REQUIRE(view.get_search_string().empty()); + } + + SECTION("start_equal_end") { + ExpressionView const view{expression, cMiddlePos, cMiddlePos}; + REQUIRE(view.get_search_string().empty()); + } + + SECTION("start_beyond_size") { + ExpressionView const view{expression, cLargeValue, input.size()}; + REQUIRE(view.get_search_string().empty()); + } + + SECTION("end_beyond_size") { + ExpressionView const view{expression, 0, cLargeValue}; + REQUIRE(input == view.get_search_string()); + } + + SECTION("start_before_zero") { + ExpressionView const view{expression, static_cast(cNegativeValue), input.size()}; + REQUIRE(view.get_search_string().empty()); + } + + SECTION("end_before_zero") { + ExpressionView const view{expression, 0, static_cast(cNegativeValue)}; + REQUIRE(input == view.get_search_string()); + } + + SECTION("start_before_zero_and_end_beyond_size") { + ExpressionView const view{expression, static_cast(cNegativeValue), cLargeValue}; + REQUIRE(view.get_search_string().empty()); + } + + SECTION("start_beyond_size_and_end_before_zero") { + ExpressionView const view{expression, cLargeValue, static_cast(cNegativeValue)}; + REQUIRE(view.get_search_string().empty()); + } +} + /** * @ingroup unit_tests_expression_view * @brief Tests `ExpressionView`s for well-formedness. From 61f6baf81c7b02fca1ebbb1db1b0ba457a60e197 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 07:50:20 -0400 Subject: [PATCH 097/168] Reword snapping to clamping. --- tests/test-expression-view.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index 2637e773..b079d7d0 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -120,11 +120,11 @@ TEST_CASE("escape_subrange_expression_view", "[ExpressionView]") { /** * @ingroup unit_tests_expression_view - * @brief Tests bound snapping during `ExpressionView`construction. + * @brief Tests bound clamping during `ExpressionView`construction. * * Negative casted values test wrap-around behavior. */ -TEST_CASE("expression_view_bound_snapping", "[ExpressionView]") { +TEST_CASE("expression_view_bound_clamping", "[ExpressionView]") { string const input{"abcdefg"}; Expression const expression{input}; auto constexpr cNegativeValue{-5}; From c9da3f5196e722fcb6e39b563c438f0044b93544 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 08:06:37 -0400 Subject: [PATCH 098/168] Add unit tests for generating regex. --- tests/test-expression-view.cpp | 60 ++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index b079d7d0..a83cc6b2 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -281,3 +281,63 @@ TEST_CASE("extend_expression_view_to_adjacent_greedy_wildcards", "[ExpressionVie REQUIRE(expected_extended_string == extended_view.get_search_string()); } } + +/** + * @ingroup unit_tests_expression_view + * @brief Tests generating regex strings from `ExpressionView`. + */ +TEST_CASE("extend_expression_view_to_regex_string", "[ExpressionView]") { + SECTION("normal_case") { + string const input{R"(a*b?c\*d\?e\\f)"}; + string const expected_regex_string{R"(a.*b.c\*d\?e\\f)"}; + Expression const expression{input}; + ExpressionView const view{expression, 0, input.size()}; + + auto const[regex_string, contains_wildcard]{view.generate_regex_string()}; + REQUIRE(expected_regex_string == regex_string); + REQUIRE(contains_wildcard); + } + + SECTION("single_greedy_wildcard") { + string const input{"*"}; + string const expected_regex_string{".*"}; + Expression const expression{input}; + ExpressionView const view{expression, 0, input.size()}; + + auto const[regex_string, contains_wildcard]{view.generate_regex_string()}; + REQUIRE(expected_regex_string == regex_string); + REQUIRE(contains_wildcard); + } + + SECTION("single_non_greedy_wildcard") { + string const input{"?"}; + string const expected_regex_string{"."}; + Expression const expression{input}; + ExpressionView const view{expression, 0, input.size()}; + + auto const[regex_string, contains_wildcard]{view.generate_regex_string()}; + REQUIRE(expected_regex_string == regex_string); + REQUIRE(contains_wildcard); + } + + SECTION("consecutive_wildcards") { + string const input{"**??"}; + string const expected_regex_string{".*.*.."}; + Expression const expression{input}; + ExpressionView const view{expression, 0, input.size()}; + + auto const[regex_string, contains_wildcard]{view.generate_regex_string()}; + REQUIRE(expected_regex_string == regex_string); + REQUIRE(contains_wildcard); + } + + SECTION("empty_subrange") { + string const input{"abc"}; + Expression const expression{input}; + ExpressionView const view{expression, 0, 0}; + + auto const[regex_string, contains_wildcard]{view.generate_regex_string()}; + REQUIRE(regex_string.empty()); + REQUIRE_FALSE(contains_wildcard); + } +} From 73c6336e42938541713261074e3079c3b44c5050 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 08:18:20 -0400 Subject: [PATCH 099/168] Format. --- tests/test-expression-view.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index a83cc6b2..6f275a34 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -127,9 +127,9 @@ TEST_CASE("escape_subrange_expression_view", "[ExpressionView]") { TEST_CASE("expression_view_bound_clamping", "[ExpressionView]") { string const input{"abcdefg"}; Expression const expression{input}; - auto constexpr cNegativeValue{-5}; - auto constexpr cLargeValue{1000}; - auto constexpr cMiddlePos{4}; + constexpr auto cNegativeValue{-5}; + constexpr auto cLargeValue{1000}; + constexpr auto cMiddlePos{4}; SECTION("start_after_end") { ExpressionView const view{expression, cMiddlePos, cMiddlePos - 1}; @@ -293,7 +293,7 @@ TEST_CASE("extend_expression_view_to_regex_string", "[ExpressionView]") { Expression const expression{input}; ExpressionView const view{expression, 0, input.size()}; - auto const[regex_string, contains_wildcard]{view.generate_regex_string()}; + auto const [regex_string, contains_wildcard]{view.generate_regex_string()}; REQUIRE(expected_regex_string == regex_string); REQUIRE(contains_wildcard); } @@ -304,7 +304,7 @@ TEST_CASE("extend_expression_view_to_regex_string", "[ExpressionView]") { Expression const expression{input}; ExpressionView const view{expression, 0, input.size()}; - auto const[regex_string, contains_wildcard]{view.generate_regex_string()}; + auto const [regex_string, contains_wildcard]{view.generate_regex_string()}; REQUIRE(expected_regex_string == regex_string); REQUIRE(contains_wildcard); } @@ -315,7 +315,7 @@ TEST_CASE("extend_expression_view_to_regex_string", "[ExpressionView]") { Expression const expression{input}; ExpressionView const view{expression, 0, input.size()}; - auto const[regex_string, contains_wildcard]{view.generate_regex_string()}; + auto const [regex_string, contains_wildcard]{view.generate_regex_string()}; REQUIRE(expected_regex_string == regex_string); REQUIRE(contains_wildcard); } @@ -326,7 +326,7 @@ TEST_CASE("extend_expression_view_to_regex_string", "[ExpressionView]") { Expression const expression{input}; ExpressionView const view{expression, 0, input.size()}; - auto const[regex_string, contains_wildcard]{view.generate_regex_string()}; + auto const [regex_string, contains_wildcard]{view.generate_regex_string()}; REQUIRE(expected_regex_string == regex_string); REQUIRE(contains_wildcard); } @@ -336,7 +336,7 @@ TEST_CASE("extend_expression_view_to_regex_string", "[ExpressionView]") { Expression const expression{input}; ExpressionView const view{expression, 0, 0}; - auto const[regex_string, contains_wildcard]{view.generate_regex_string()}; + auto const [regex_string, contains_wildcard]{view.generate_regex_string()}; REQUIRE(regex_string.empty()); REQUIRE_FALSE(contains_wildcard); } From 16c5a3310f196aeabc9f617e46ced58eba5a6ec2 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 08:27:30 -0400 Subject: [PATCH 100/168] Fix unit test name. --- tests/test-expression-view.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index 6f275a34..33fc1836 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -286,7 +286,7 @@ TEST_CASE("extend_expression_view_to_adjacent_greedy_wildcards", "[ExpressionVie * @ingroup unit_tests_expression_view * @brief Tests generating regex strings from `ExpressionView`. */ -TEST_CASE("extend_expression_view_to_regex_string", "[ExpressionView]") { +TEST_CASE("generate_expression_view_regex_string", "[ExpressionView]") { SECTION("normal_case") { string const input{R"(a*b?c\*d\?e\\f)"}; string const expected_regex_string{R"(a.*b.c\*d\?e\\f)"}; From d03f952d0cbfc43fc54a48d3227b8ba60f41a8b9 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 11:50:17 -0400 Subject: [PATCH 101/168] Update docstrings. --- tests/test-expression-view.cpp | 3 ++- tests/test-expression.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index 33fc1836..5161562c 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -9,7 +9,8 @@ /** * @defgroup unit_tests_expression_view `ExpressionView` unit tests. - * @brief Unit tests for `ExpressionView` to ... . + * @brief Unit tests for `ExpressionView` to verify construction, bounds clamping, + * well-formedness, extension to adjacent wildcards, and regex generation. * These unit tests contain the `ExpressionView` tag. */ diff --git a/tests/test-expression.cpp b/tests/test-expression.cpp index 2e4ba995..ff326ae7 100644 --- a/tests/test-expression.cpp +++ b/tests/test-expression.cpp @@ -7,7 +7,7 @@ /** * @defgroup unit_tests_expression `Expression` unit tests. - * @brief Unit tests for `Expression` to verify storage and type predicate methods. + * @brief Unit tests for `Expression` to verify search string parsing during construction. * These unit tests contain the `Expression` tag. */ From 4bc0fa400de9380da250b22220c0c2af0e6ab7b4 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 15 Aug 2025 12:13:50 -0400 Subject: [PATCH 102/168] Add unit-test for regex meta characters. --- tests/test-expression-view.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test-expression-view.cpp b/tests/test-expression-view.cpp index 5161562c..a2b52201 100644 --- a/tests/test-expression-view.cpp +++ b/tests/test-expression-view.cpp @@ -341,4 +341,15 @@ TEST_CASE("generate_expression_view_regex_string", "[ExpressionView]") { REQUIRE(regex_string.empty()); REQUIRE_FALSE(contains_wildcard); } + + SECTION("escape_regex_meta_characters") { + string const input{R"(.+^()[]{}|\*\?\\)"}; + string const expected_regex_string{R"(\.\+\^\(\)\[\]\{\}\|\*\?\\)"}; + Expression const expression{input}; + ExpressionView const view{expression, 0, input.size()}; + + auto const [regex_string, contains_wildcard]{view.generate_regex_string()}; + REQUIRE(expected_regex_string == regex_string); + REQUIRE_FALSE(contains_wildcard); + } } From f86eade41ab7171eb733e4485cf818229ce59d76 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Aug 2025 07:45:47 -0400 Subject: [PATCH 103/168] Add test for multi-capture rule. --- tests/test-buffer-parser.cpp | 76 ++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index e28f9fcc..e7f70323 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -853,3 +853,79 @@ TEST_CASE("multi_line_with_delimited_vars", "[BufferParser]") { parse_and_validate(buffer_parser, cInput, {expected_event1, expected_event2}); } + +/** + * @ingroup test_buffer_parser_capture + * @brief Tests a multi-capture rule. + * + * This test verifies that a multi-capture rule correctly identifies the location of each capture + * group. It tests that `BufferParser` correctly flattens the logtype, as well as stores the full + * tree correctly. + * + * ### Schema Definition + * @code + * delimiters: \n\r\[:, + * header:(?\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}) (?\d{4}) (?\d{4}) + * ... (?I|D|E|W) + * @endcode + * + * ### Input Example + * @code + * "1999-12-12T01:02:03.456 1234 5678 I MyService A=TEXT B=1.1" + * @endcode + * + * ### Expected Logtype + * @code + * " MyService A=TEXT B=1.1" + * @endcode + * + * ### Expected Tokenization + * @code + * "1999-12-12T01:02:03.456 1234 5678 I" -> "header" + * " MyService" -> uncaught string + * " A=TEXT" -> uncaught string + * " B=1.1" -> uncaught string + * @endcode + */ +TEST_CASE("multi_capture", "[BufferParser]") { + constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; + constexpr string_view cTime{R"((?\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}))"}; + constexpr string_view cPid{R"((?\d{4}))"}; + constexpr string_view cTid{R"((?\d{4}))"}; + constexpr string_view cLevel{R"((?I|D|E|W))"}; + constexpr string_view cInput{"1999-12-12T01:02:03.456 1234 5678 I MyService A=TEXT B=1.1"}; + + string const header_capture_rule{ + "header:" + string(cTime) + " " + string(cPid) + " " + string(cTid) + " " + string(cLevel) + }; + ExpectedEvent const expected_event1{ + .m_logtype{" MyService A=TEXT B=1.1"}, + .m_timestamp_raw{""}, + .m_tokens{ + { + { + "1999-12-12T01:02:03.456 1234 5678 I", + "header", + { + { + {"timestamp", {{0}, {23}}}, + {"PID", {{24}, {28}}}, + {"TID", {{29}, {33}}}, + {"LogLevel", {{34}, {35}}} + } + } + }, + {" MyService"}, + {" A=TEXT"}, + {" B=1.1", "", {}} + } + } + }; + + Schema schema; + schema.add_delimiters(cDelimitersSchema); + schema.add_variable(header_capture_rule, -1); + BufferParser buffer_parser{std::move(schema.release_schema_ast_ptr())}; + + parse_and_validate(buffer_parser, cInput, {expected_event1}); +} From 454cbba3d8b5191156e379583192265ff630eabf Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Aug 2025 07:47:48 -0400 Subject: [PATCH 104/168] Format. --- tests/test-buffer-parser.cpp | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index e7f70323..5f914543 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -892,33 +892,26 @@ TEST_CASE("multi_capture", "[BufferParser]") { constexpr string_view cTime{R"((?\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}))"}; constexpr string_view cPid{R"((?\d{4}))"}; constexpr string_view cTid{R"((?\d{4}))"}; - constexpr string_view cLevel{R"((?I|D|E|W))"}; + constexpr string_view cLogLevel{R"((?I|D|E|W))"}; constexpr string_view cInput{"1999-12-12T01:02:03.456 1234 5678 I MyService A=TEXT B=1.1"}; string const header_capture_rule{ - "header:" + string(cTime) + " " + string(cPid) + " " + string(cTid) + " " + string(cLevel) + "header:" + string(cTime) + " " + string(cPid) + " " + string(cTid) + " " + + string(cLogLevel) }; ExpectedEvent const expected_event1{ .m_logtype{" MyService A=TEXT B=1.1"}, .m_timestamp_raw{""}, .m_tokens{ - { - { - "1999-12-12T01:02:03.456 1234 5678 I", - "header", - { - { - {"timestamp", {{0}, {23}}}, - {"PID", {{24}, {28}}}, - {"TID", {{29}, {33}}}, - {"LogLevel", {{34}, {35}}} - } - } - }, - {" MyService"}, - {" A=TEXT"}, - {" B=1.1", "", {}} - } + {{"1999-12-12T01:02:03.456 1234 5678 I", + "header", + {{{"timestamp", {{0}, {23}}}, + {"PID", {{24}, {28}}}, + {"TID", {{29}, {33}}}, + {"LogLevel", {{34}, {35}}}}}}, + {" MyService"}, + {" A=TEXT"}, + {" B=1.1", "", {}}} } }; From d9a99e85199ea21ff2d026417dc84a3fdf816c9d Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Aug 2025 08:01:18 -0400 Subject: [PATCH 105/168] Explicitly construct uncaught strings. --- tests/test-buffer-parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index 5f914543..a405a07b 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -909,8 +909,8 @@ TEST_CASE("multi_capture", "[BufferParser]") { {"PID", {{24}, {28}}}, {"TID", {{29}, {33}}}, {"LogLevel", {{34}, {35}}}}}}, - {" MyService"}, - {" A=TEXT"}, + {" MyService", "", {}}, + {" A=TEXT", "", {}}, {" B=1.1", "", {}}} } }; From 2699059eb3bf212c47e23d1dd3d6cf1ed815e267 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Aug 2025 08:04:39 -0400 Subject: [PATCH 106/168] Use format for readability. --- tests/test-buffer-parser.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index a405a07b..c79e0fe0 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -895,10 +895,7 @@ TEST_CASE("multi_capture", "[BufferParser]") { constexpr string_view cLogLevel{R"((?I|D|E|W))"}; constexpr string_view cInput{"1999-12-12T01:02:03.456 1234 5678 I MyService A=TEXT B=1.1"}; - string const header_capture_rule{ - "header:" + string(cTime) + " " + string(cPid) + " " + string(cTid) + " " - + string(cLogLevel) - }; + string const header_rule{fmt::format("header:{} {} {} {}", cTime, cPid, cTid, cLogLevel)}; ExpectedEvent const expected_event1{ .m_logtype{" MyService A=TEXT B=1.1"}, .m_timestamp_raw{""}, @@ -917,7 +914,7 @@ TEST_CASE("multi_capture", "[BufferParser]") { Schema schema; schema.add_delimiters(cDelimitersSchema); - schema.add_variable(header_capture_rule, -1); + schema.add_variable(header_rule, -1); BufferParser buffer_parser{std::move(schema.release_schema_ast_ptr())}; parse_and_validate(buffer_parser, cInput, {expected_event1}); From 62cc586f7e5e507d5d07e1f4958be2800d750d97 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Aug 2025 08:07:28 -0400 Subject: [PATCH 107/168] Switch to backslash for multi-line continuation. --- tests/test-buffer-parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index c79e0fe0..a0c08231 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -865,8 +865,8 @@ TEST_CASE("multi_line_with_delimited_vars", "[BufferParser]") { * ### Schema Definition * @code * delimiters: \n\r\[:, - * header:(?\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}) (?\d{4}) (?\d{4}) - * ... (?I|D|E|W) + * header:(?\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}) (?\d{4}) (?\d{4}) \ + * (?I|D|E|W) * @endcode * * ### Input Example From 1b48c67e9e27d58d26654bf834ef7c8ea6c8fc66 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Aug 2025 12:38:26 -0400 Subject: [PATCH 108/168] Add kube test case. --- tests/test-buffer-parser.cpp | 74 +++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index a0c08231..db860efb 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -887,7 +887,7 @@ TEST_CASE("multi_line_with_delimited_vars", "[BufferParser]") { * " B=1.1" -> uncaught string * @endcode */ -TEST_CASE("multi_capture", "[BufferParser]") { +TEST_CASE("multi_capture_one", "[BufferParser]") { constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; constexpr string_view cTime{R"((?\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}))"}; constexpr string_view cPid{R"((?\d{4}))"}; @@ -919,3 +919,75 @@ TEST_CASE("multi_capture", "[BufferParser]") { parse_and_validate(buffer_parser, cInput, {expected_event1}); } + +/** + * @ingroup test_buffer_parser_capture + * @brief Tests a multi-capture rule. + * + * This test also verifies that a multi-capture rule correctly identifies the location of each + * capture group. It tests that `BufferParser` correctly flattens the logtype, as well as stores the + * full tree correctly. + * + * ### Schema Definition + * @code + * delimiters: \n\r\[:, + * header:(?[A-Za-z]{3} \d{2} \d{2}:\d{2}:\d{2}) ip-(?\d{3}\-\d{2}\-\d{2}\-\d{2}) \ + * ku[(?\d{4})]: (?I|D|E|W)(?\d{4}) \ + * (?\d{2}:\d{2}:\d{2}\.\d{4}) (?\d{4}) + * @endcode + * + * ### Input Example + * @code + * "Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111 Y Failed" + * @endcode + * + * ### Expected Logtype + * @code + * " ip- ku[]: Y failed" + * @endcode + * + * ### Expected Tokenization + * @code + * "Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111" -> "header" + * " Y" -> uncaught string + * " Failed" -> uncaught string + * @endcode + */ +TEST_CASE("multi_capture_two", "[BufferParser]") { + constexpr string_view cDelimitersSchema{R"(delimiters: \n\r\[:,)"}; + constexpr string_view cTime{R"((?[A-Za-z]{3} \d{2} \d{2}:\d{2}:\d{2}))"}; + constexpr string_view cIp{R"((?\d{3}\-\d{2}\-\d{2}\-\d{2}))"}; + constexpr string_view cPid{R"((?\d{4}))"}; + constexpr string_view cLogLevel{R"((?I|D|E|W))"}; + constexpr string_view cLid{R"((?\d{4}))"}; + constexpr string_view cLTime{R"((?\d{2}:\d{2}:\d{2}\.\d{4}))"}; + constexpr string_view cTid{R"((?\d{4}))"}; + constexpr string_view cInput{"Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678" + " 1111 Y failed"}; + + string const header_rule{fmt::format(R"(header:{} ip\-{} ku\[{}\]: {}{} {} {})", cTime, cIp, cPid, cLogLevel, cLid, cLTime, cTid)}; + ExpectedEvent const expected_event1{ + .m_logtype{" ip- ku[]: Y failed"}, + .m_timestamp_raw{""}, + .m_tokens{ + {{"Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111", + "header", + {{{"timestamp", {{0}, {15}}}, + {"IP", {{19}, {31}}}, + {"PID", {{35}, {39}}}, + {"LogLevel", {{42}, {43}}}, + {"LID", {{43}, {47}}}, + {"LTime", {{48}, {61}}}, + {"TID", {{65}, {69}}}}}}, + {" Y", "", {}}, + {" failed", "", {}}} + } + }; + + Schema schema; + schema.add_delimiters(cDelimitersSchema); + schema.add_variable(header_rule, -1); + BufferParser buffer_parser{std::move(schema.release_schema_ast_ptr())}; + + parse_and_validate(buffer_parser, cInput, {expected_event1}); +} From 6335f1e91b0413b033ef36251ccd0cf8dfec2634 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Aug 2025 12:38:42 -0400 Subject: [PATCH 109/168] Format. --- tests/test-buffer-parser.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index db860efb..84845abb 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -965,7 +965,16 @@ TEST_CASE("multi_capture_two", "[BufferParser]") { constexpr string_view cInput{"Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678" " 1111 Y failed"}; - string const header_rule{fmt::format(R"(header:{} ip\-{} ku\[{}\]: {}{} {} {})", cTime, cIp, cPid, cLogLevel, cLid, cLTime, cTid)}; + string const header_rule{fmt::format( + R"(header:{} ip\-{} ku\[{}\]: {}{} {} {})", + cTime, + cIp, + cPid, + cLogLevel, + cLid, + cLTime, + cTid + )}; ExpectedEvent const expected_event1{ .m_logtype{" ip- ku[]: Y failed"}, .m_timestamp_raw{""}, From 099693f69c977d8ec25087ab9fc8ac953d0a09b5 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 18 Aug 2025 12:47:04 -0400 Subject: [PATCH 110/168] Fix case in docstring. --- tests/test-buffer-parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index 84845abb..7b086d08 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -938,7 +938,7 @@ TEST_CASE("multi_capture_one", "[BufferParser]") { * * ### Input Example * @code - * "Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111 Y Failed" + * "Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111 Y failed" * @endcode * * ### Expected Logtype @@ -950,7 +950,7 @@ TEST_CASE("multi_capture_one", "[BufferParser]") { * @code * "Jan 01 02:03:04 ip-999-99-99-99 ku[1234]: E5678 02:03:04.5678 1111" -> "header" * " Y" -> uncaught string - * " Failed" -> uncaught string + * " failed" -> uncaught string * @endcode */ TEST_CASE("multi_capture_two", "[BufferParser]") { From 3719fa88a6e67e59646ca944ef4997783636c5b5 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Sun, 24 Aug 2025 15:16:55 -0400 Subject: [PATCH 111/168] Add Query class. --- CMakeLists.txt | 2 + src/log_surgeon/Lexer.hpp | 4 + .../wildcard_query_parser/Expression.hpp | 3 + .../ExpressionCharacter.hpp | 11 ++ .../wildcard_query_parser/ExpressionView.cpp | 29 +++ .../wildcard_query_parser/ExpressionView.hpp | 23 +++ .../wildcard_query_parser/Query.cpp | 166 ++++++++++++++++++ .../wildcard_query_parser/Query.hpp | 119 +++++++++++++ 8 files changed, 357 insertions(+) create mode 100644 src/log_surgeon/wildcard_query_parser/Query.cpp create mode 100644 src/log_surgeon/wildcard_query_parser/Query.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d883e85..2602241e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,8 @@ set(SOURCE_FILES src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp src/log_surgeon/wildcard_query_parser/ExpressionView.cpp src/log_surgeon/wildcard_query_parser/ExpressionView.hpp + src/log_surgeon/wildcard_query_parser/Query.cpp + src/log_surgeon/wildcard_query_parser/Query.hpp src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp diff --git a/src/log_surgeon/Lexer.hpp b/src/log_surgeon/Lexer.hpp index 78ed68d8..a611c0d6 100644 --- a/src/log_surgeon/Lexer.hpp +++ b/src/log_surgeon/Lexer.hpp @@ -152,6 +152,10 @@ class Lexer { [[nodiscard]] auto get_has_delimiters() const -> bool const& { return m_has_delimiters; } + [[nodiscard]] auto get_delim_table() const -> std::array const& { + return m_is_delimiter; + } + [[nodiscard]] auto is_delimiter(uint8_t byte) const -> bool const& { return m_is_delimiter[byte]; } diff --git a/src/log_surgeon/wildcard_query_parser/Expression.hpp b/src/log_surgeon/wildcard_query_parser/Expression.hpp index cac314fa..01633ffe 100644 --- a/src/log_surgeon/wildcard_query_parser/Expression.hpp +++ b/src/log_surgeon/wildcard_query_parser/Expression.hpp @@ -1,6 +1,7 @@ #ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_HPP #define LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_HPP +#include #include #include @@ -24,6 +25,8 @@ class Expression { [[nodiscard]] auto get_search_string() const -> std::string const& { return m_search_string; } + [[nodiscard]] auto length() const -> size_t { return m_search_string.size(); } + private: std::vector m_chars; std::string m_search_string; diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp index 37f6e387..0c5c2aae 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp @@ -1,8 +1,11 @@ #ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_CHARACTER_HPP #define LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_CHARACTER_HPP +#include #include +#include + namespace log_surgeon::wildcard_query_parser { class ExpressionCharacter { public: @@ -23,6 +26,14 @@ class ExpressionCharacter { return Type::NonGreedyWildcard == m_type; } + [[nodiscard]] auto is_delim(std::array const& delim_table) const -> bool { + return delim_table[m_value]; + } + + [[nodiscard]] auto is_delim_or_wildcard(std::array const& delim_table) const -> bool { + return is_greedy_wildcard() || is_non_greedy_wildcard() || is_delim(delim_table); + } + [[nodiscard]] auto is_escape() const -> bool { return Type::Escape == m_type; } private: diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index 15e5339d..e317d99d 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -1,12 +1,14 @@ #include "ExpressionView.hpp" #include +#include #include #include #include #include #include +#include #include #include @@ -42,6 +44,33 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const return {is_extended, wildcard_expression_view}; } +[[nodiscard]] auto ExpressionView::is_surrounded_by_delims_or_wildcards(std::array const& delim_table) const -> bool { + auto const [begin_idx, end_idx]{get_indices()}; + + bool has_preceding{false}; + if (0 == begin_idx) { + has_preceding = true; + } else { + auto const& preceding_char{m_expression->get_chars()[begin_idx - 1]}; + has_preceding = preceding_char.is_delim_or_wildcard(delim_table); + } + + bool has_succeeding{false}; + if (m_expression->length() == end_idx) { + has_succeeding = true; + } else { + auto const& succeeding_char{m_expression->get_chars()[end_idx]}; + if(succeeding_char.is_escape()) { + auto const& logical_succeeding_char{m_expression->get_chars()[end_idx + 1]}; + has_succeeding = logical_succeeding_char.is_delim_or_wildcard(delim_table); + } else { + has_succeeding = succeeding_char.is_delim_or_wildcard(delim_table); + } + } + + return has_preceding && has_succeeding; +} + auto ExpressionView::is_well_formed() const -> bool { if (m_chars.empty()) { return true; diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp index cf228643..c4f8a70c 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp @@ -41,6 +41,29 @@ class ExpressionView { && (m_chars[0].is_greedy_wildcard() || m_chars.back().is_greedy_wildcard()); } + /** + * Checks whether the view is surrounded by delimiters or wildcards. + * + * An expression is considered surrounded if both its left and right boundary satisfy certain + * requirements. + * + * Left boundary: + * - The view is at the start of the expression, or + * - The character immediately left of the view is a delimiter or wildcard. + * + * Right boundary: + * - The view is at the end of the expression, or + * - The character immediately right of the view is a delimiter or wildcard, or + * - The character immediately right of the view is an escape character and the character to its + * immediate right is a delimiter or wildcard. + * + * @param delim_table Table indicating for each character whether or not it is a delimiter. + * @return true when both preceding and succeeding boundaries qualify; false otherwise. + */ + [[nodiscard]] auto is_surrounded_by_delims_or_wildcards( + std::array const& delim_table + ) const -> bool; + /** * Checks whether this `ExpressionView` represents a well-formed subrange. * diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp new file mode 100644 index 00000000..d93c23ad --- /dev/null +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -0,0 +1,166 @@ +#include "Query.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using log_surgeon::finite_automata::ByteDfaState; +using log_surgeon::finite_automata::ByteNfaState; +using log_surgeon::lexers::ByteLexer; +using std::set; +using std::string; +using std::vector; + +using ByteDfa = log_surgeon::finite_automata::Dfa; +using ByteLexicalRule = log_surgeon::LexicalRule; +using ByteNfa = log_surgeon::finite_automata::Nfa; + +namespace log_surgeon::wildcard_query_parser { + +Query::Query(string const& query_string) { + Expression const expression(query_string); + bool prev_is_escape{false}; + string unhandled_wildcard_sequence; + bool unhandled_wildcard_sequence_contains_greedy_wildcard{false}; + for(auto c : expression.get_chars()) { + if(false == unhandled_wildcard_sequence.empty() && false == c.is_greedy_wildcard() && + false == c.is_non_greedy_wildcard()) { + if (unhandled_wildcard_sequence_contains_greedy_wildcard) { + m_query_string.push_back('*'); + } else { + m_query_string += unhandled_wildcard_sequence; + } + unhandled_wildcard_sequence.clear(); + unhandled_wildcard_sequence_contains_greedy_wildcard = false; + } + + if(prev_is_escape) { + m_query_string.push_back(c.value()); + prev_is_escape = false; + } else if(c.is_escape()) { + prev_is_escape = true; + m_query_string.push_back(c.value()); + } else if(c.is_greedy_wildcard()) { + unhandled_wildcard_sequence.push_back(c.value()); + unhandled_wildcard_sequence_contains_greedy_wildcard = true; + } else if (c.is_non_greedy_wildcard()) { + unhandled_wildcard_sequence.push_back(c.value()); + } else { + m_query_string.push_back(c.value()); + } + } + if (false == unhandled_wildcard_sequence.empty()) { + if (unhandled_wildcard_sequence_contains_greedy_wildcard) { + m_query_string.push_back('*'); + } else { + m_query_string += unhandled_wildcard_sequence; + } + } +} + +auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const -> std::set { + Expression const expression{m_query_string}; + vector> query_interpretations(expression.length()); + + for (size_t end_idx = 1; end_idx <= expression.length(); ++end_idx) { + for (size_t begin_idx = 0; begin_idx < end_idx; ++begin_idx) { + ExpressionView expression_view{expression, begin_idx, end_idx}; + if (expression_view.starts_or_ends_with_greedy_wildcard()) { + continue; + } + + auto const extended_view{ + expression_view.extend_to_adjacent_greedy_wildcards().second + }; + auto const single_token_interpretations{ + get_all_single_token_interpretations(extended_view, lexer) + }; + if(single_token_interpretations.empty()) { + continue; + } + + if (begin_idx == 0) { + query_interpretations[end_idx - 1].insert( + std::make_move_iterator(single_token_interpretations.begin()), + std::make_move_iterator(single_token_interpretations.end()) + ); + } else { + for (auto const& prefix : query_interpretations[begin_idx - 1]) { + for (auto& suffix : single_token_interpretations) { + QueryInterpretation combined{prefix}; + combined.append_query_interpretation(suffix); + query_interpretations[end_idx - 1].insert(std::move(combined)); + } + } + } + } + } + return query_interpretations.back(); +} + +auto Query::get_all_single_token_interpretations(ExpressionView const& expression_view, ByteLexer const& lexer) -> std::vector { + vector interpretations; + + if (false == expression_view.is_well_formed()) { + return interpretations; + } + if ("*" == expression_view.get_search_string()) { + interpretations.emplace_back("*"); + return interpretations; + } + if (false == expression_view.is_surrounded_by_delims_or_wildcards(lexer.get_delim_table())) { + interpretations.emplace_back(string{expression_view.get_search_string()}); + return interpretations; + } + + auto const [regex_string, contains_wildcard]{expression_view.generate_regex_string()}; + + auto const matching_var_type_ids{get_matching_variable_types(regex_string, lexer)}; + if (matching_var_type_ids.empty() || contains_wildcard) { + interpretations.emplace_back(string{expression_view.get_search_string()}); + } + + for (auto const variable_type_id : matching_var_type_ids) { + interpretations.emplace_back( + variable_type_id, + string{expression_view.get_search_string()}, + contains_wildcard + ); + if (false == contains_wildcard) { + break; + } + } + return interpretations; +} + +auto Query::get_matching_variable_types(string const& regex_string, ByteLexer const& lexer) -> set { + NonTerminal::m_next_children_start = 0; + + Schema schema; + schema.add_variable("search:" + regex_string, -1); + auto const schema_ast = schema.release_schema_ast_ptr(); + auto& rule_ast = dynamic_cast(*schema_ast->m_schema_vars[0]); + vector rules; + rules.emplace_back(0, std::move(rule_ast.m_regex_ptr)); + // TODO: Optimize NFA creation. + ByteNfa const nfa{rules}; + // TODO: Optimize DFA creation. + ByteDfa const dfa{nfa}; + + // TODO: Could optimize to use a forward/reverse lexer in a lot of cases. + auto var_types = lexer.get_dfa()->get_intersect(&dfa); + return var_types; +} +} // namespace log_surgeon::wildcard_query_parser diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp new file mode 100644 index 00000000..17c2508d --- /dev/null +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -0,0 +1,119 @@ +#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_HPP +#define LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_HPP + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace log_surgeon::wildcard_query_parser { +class Query { +public: + explicit Query(std::string const& query_string); + + /** + * Generates all multi-token interpretations of the n-length query string (single-token + * interpretations of the query string belong to this set): + * + * 1. Interpret each substring [a,b) as a single token (1-length interpretation). + * - Denote T(a,b) to be the set of all valid single-token interpretations of substring [a,b). + * + * - Substrings adjacent to greedy wildcards must be interpreted as if they include them. + * - Example: query "a*b" is equivalent to "a***b". For a lexer with a `hasNum` variable + * type ("\w*\d*\w*"), without extensions, the only interpretations would be: + * {(a*b)}, + * {(a*) (b)}, + * {(a) (*b)}. + * However, a string like "a1 abc 1b" is also matched by "a*b", and requires the + * interpretation {(a*) (*) (*b)}. Extension ensures such cases + * are captured. + * - Note: isolated greedy wildcard (`*`) are never extended as the `Query` collapses + * repeated greedy wildcards. + * - Note: non-greedy wildcards (`?`) are not extended as "a?b" is not equivalent to "a??b". + * + * - Substrings that begin or end with a wildcard are skipped as they are redundant. + * - Example: in "a*b", substring (0,1] extends to "a*", therefore substring (0,2] "a*" is + * redundant. In other words, a decomposition like "a*" + "b" is a subset of the more + * general "a*" + "*" + "*b". + * + * 2. Let I(a) be the set of all multi-length interpretations of substring [0,a). + * - We can compute I(a) recursively using previously computed sets: + * + * I(a) = T(0,a) + * U (I(1) x T(1,a)) + * U (I(2) x T(2,a)) + * ... + * U (I(a-1) x T(a-1,a)) + * + * where x denotes the cross product: all combinations of prefix interpretations from I(i) + * and suffix interpretations from T(i,a). + * + * 3. Use dynamic programming to compute I(n) efficiently: + * - Instead of generating all possible combinations naively (O(2^n * k^n)), we store only + * unique interpretations, reducing complexity to roughly O(k^n), where k is the number of + * unique token types. + * - Compute I(n) iteratively in increasing order of substring length: + * - Compute T(0,1), then I(1) + * - Compute T(0,2), T(1,2), then I(2) + * - Compute T(0,3), T(1,3), T(2,3), then I(3) + * - ... + * - Compute T(0,n), ..., T(n-1,n), then I(n) + * + * @param lexer The lexer used to determine variable types and delimiters. + * @return A set of `QueryInterpretation` representing all valid multi-token interpretations of + * the full query string. + */ + [[nodiscard]] auto get_all_multi_token_interpretations(lexers::ByteLexer const& lexer) const -> std::set; + +private: + /** + * Generates all single-token interpretations for a given expression view matching a given lexer. + * + * A single-token interpretation can be one of: + * - A static token (literal text). + * - A variable token (e.g., int, float, hasNumber) as defined by the lexer's schema. Each + * unique variable types is considered a distinct interpretation. + * + * Rules: + * - If the substring is malformed (has hanging escape characters): + * - There are no valid interpretations. + * - Else if the substring: + * - Is an isolated greedy wildcard, `*, or + * - Is not surrounded by delimiters or wildcards (lexer won't consider it a variable), or + * - Does not match any variable. + * - Then: + * - The only interpretation is a static token. + * - Else, if the substring contains a wildcard: + * - The interpretations include a static token, plus a variable token for each matching type. + * - Else: + * - The only interpretation is the variable token corresponding to the highest priority match. + * + * @param expression_view The view of the substring to interpret. + * @param lexer The lexer used to determine variable types and delimiters. + * @return A vector of `Queryinterpretation` objects representing all valid single-token + * interpretations for the given substring. + */ + [[nodiscard]] static auto get_all_single_token_interpretations(ExpressionView const& expression_view, lexers::ByteLexer const& lexer) -> std::vector; + + /** + * Determines the set of variable types matched by the lexer for all strings generated from the + * input regex. + * + * Generates a DFA from the input regex and computes its intersection with the lexer's DFA. + * + * @param regex_string The input regex string for which to find matching variable types. + * @param lexer The lexer whose DFA is used for matching. + * @return The set of all matching variable type IDs. + */ + [[nodiscard]] static auto get_matching_variable_types(std::string const& regex_string, lexers::ByteLexer const& lexer) -> std::set; + + std::string m_query_string; +}; +} // namespace log_surgeon::wildcard_query_parser + +#endif // LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_HPP From 5b6b477de80b5abf3e8ff902104acb5371367518 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Sun, 24 Aug 2025 15:26:32 -0400 Subject: [PATCH 112/168] Format. --- .../ExpressionCharacter.hpp | 3 +- .../wildcard_query_parser/ExpressionView.cpp | 6 +- .../wildcard_query_parser/ExpressionView.hpp | 2 +- .../wildcard_query_parser/Query.cpp | 36 ++-- .../wildcard_query_parser/Query.hpp | 170 +++++++++--------- 5 files changed, 114 insertions(+), 103 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp index 0c5c2aae..e1fd2792 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp @@ -30,7 +30,8 @@ class ExpressionCharacter { return delim_table[m_value]; } - [[nodiscard]] auto is_delim_or_wildcard(std::array const& delim_table) const -> bool { + [[nodiscard]] auto is_delim_or_wildcard(std::array const& delim_table) const + -> bool { return is_greedy_wildcard() || is_non_greedy_wildcard() || is_delim(delim_table); } diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index e317d99d..ee4ea75a 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -44,7 +44,9 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const return {is_extended, wildcard_expression_view}; } -[[nodiscard]] auto ExpressionView::is_surrounded_by_delims_or_wildcards(std::array const& delim_table) const -> bool { +[[nodiscard]] auto ExpressionView::is_surrounded_by_delims_or_wildcards( + std::array const& delim_table +) const -> bool { auto const [begin_idx, end_idx]{get_indices()}; bool has_preceding{false}; @@ -60,7 +62,7 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const has_succeeding = true; } else { auto const& succeeding_char{m_expression->get_chars()[end_idx]}; - if(succeeding_char.is_escape()) { + if (succeeding_char.is_escape()) { auto const& logical_succeeding_char{m_expression->get_chars()[end_idx + 1]}; has_succeeding = logical_succeeding_char.is_delim_or_wildcard(delim_table); } else { diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp index c4f8a70c..026141d9 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp @@ -61,7 +61,7 @@ class ExpressionView { * @return true when both preceding and succeeding boundaries qualify; false otherwise. */ [[nodiscard]] auto is_surrounded_by_delims_or_wildcards( - std::array const& delim_table + std::array const& delim_table ) const -> bool; /** diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp index d93c23ad..5483bc71 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.cpp +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -28,17 +28,16 @@ using ByteLexicalRule = log_surgeon::LexicalRule; using ByteNfa = log_surgeon::finite_automata::Nfa; namespace log_surgeon::wildcard_query_parser { - Query::Query(string const& query_string) { Expression const expression(query_string); bool prev_is_escape{false}; string unhandled_wildcard_sequence; bool unhandled_wildcard_sequence_contains_greedy_wildcard{false}; - for(auto c : expression.get_chars()) { - if(false == unhandled_wildcard_sequence.empty() && false == c.is_greedy_wildcard() && + for (auto c : expression.get_chars()) { + if (false == unhandled_wildcard_sequence.empty() && false == c.is_greedy_wildcard() && false == c.is_non_greedy_wildcard()) { if (unhandled_wildcard_sequence_contains_greedy_wildcard) { - m_query_string.push_back('*'); + m_query_string.push_back('*'); } else { m_query_string += unhandled_wildcard_sequence; } @@ -46,13 +45,13 @@ Query::Query(string const& query_string) { unhandled_wildcard_sequence_contains_greedy_wildcard = false; } - if(prev_is_escape) { + if (prev_is_escape) { m_query_string.push_back(c.value()); prev_is_escape = false; - } else if(c.is_escape()) { + } else if (c.is_escape()) { prev_is_escape = true; m_query_string.push_back(c.value()); - } else if(c.is_greedy_wildcard()) { + } else if (c.is_greedy_wildcard()) { unhandled_wildcard_sequence.push_back(c.value()); unhandled_wildcard_sequence_contains_greedy_wildcard = true; } else if (c.is_non_greedy_wildcard()) { @@ -70,7 +69,8 @@ Query::Query(string const& query_string) { } } -auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const -> std::set { +auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const + -> std::set { Expression const expression{m_query_string}; vector> query_interpretations(expression.length()); @@ -81,20 +81,18 @@ auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const -> continue; } - auto const extended_view{ - expression_view.extend_to_adjacent_greedy_wildcards().second - }; + auto const extended_view{expression_view.extend_to_adjacent_greedy_wildcards().second}; auto const single_token_interpretations{ - get_all_single_token_interpretations(extended_view, lexer) + get_all_single_token_interpretations(extended_view, lexer) }; - if(single_token_interpretations.empty()) { + if (single_token_interpretations.empty()) { continue; } if (begin_idx == 0) { query_interpretations[end_idx - 1].insert( - std::make_move_iterator(single_token_interpretations.begin()), - std::make_move_iterator(single_token_interpretations.end()) + std::make_move_iterator(single_token_interpretations.begin()), + std::make_move_iterator(single_token_interpretations.end()) ); } else { for (auto const& prefix : query_interpretations[begin_idx - 1]) { @@ -110,7 +108,10 @@ auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const -> return query_interpretations.back(); } -auto Query::get_all_single_token_interpretations(ExpressionView const& expression_view, ByteLexer const& lexer) -> std::vector { +auto Query::get_all_single_token_interpretations( + ExpressionView const& expression_view, + ByteLexer const& lexer +) -> std::vector { vector interpretations; if (false == expression_view.is_well_formed()) { @@ -145,7 +146,8 @@ auto Query::get_all_single_token_interpretations(ExpressionView const& expressio return interpretations; } -auto Query::get_matching_variable_types(string const& regex_string, ByteLexer const& lexer) -> set { +auto Query::get_matching_variable_types(string const& regex_string, ByteLexer const& lexer) + -> set { NonTerminal::m_next_children_start = 0; Schema schema; diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 17c2508d..f8ebad39 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -16,89 +16,93 @@ class Query { public: explicit Query(std::string const& query_string); - /** - * Generates all multi-token interpretations of the n-length query string (single-token - * interpretations of the query string belong to this set): - * - * 1. Interpret each substring [a,b) as a single token (1-length interpretation). - * - Denote T(a,b) to be the set of all valid single-token interpretations of substring [a,b). - * - * - Substrings adjacent to greedy wildcards must be interpreted as if they include them. - * - Example: query "a*b" is equivalent to "a***b". For a lexer with a `hasNum` variable - * type ("\w*\d*\w*"), without extensions, the only interpretations would be: - * {(a*b)}, - * {(a*) (b)}, - * {(a) (*b)}. - * However, a string like "a1 abc 1b" is also matched by "a*b", and requires the - * interpretation {(a*) (*) (*b)}. Extension ensures such cases - * are captured. - * - Note: isolated greedy wildcard (`*`) are never extended as the `Query` collapses - * repeated greedy wildcards. - * - Note: non-greedy wildcards (`?`) are not extended as "a?b" is not equivalent to "a??b". - * - * - Substrings that begin or end with a wildcard are skipped as they are redundant. - * - Example: in "a*b", substring (0,1] extends to "a*", therefore substring (0,2] "a*" is - * redundant. In other words, a decomposition like "a*" + "b" is a subset of the more - * general "a*" + "*" + "*b". - * - * 2. Let I(a) be the set of all multi-length interpretations of substring [0,a). - * - We can compute I(a) recursively using previously computed sets: - * - * I(a) = T(0,a) - * U (I(1) x T(1,a)) - * U (I(2) x T(2,a)) - * ... - * U (I(a-1) x T(a-1,a)) - * - * where x denotes the cross product: all combinations of prefix interpretations from I(i) - * and suffix interpretations from T(i,a). - * - * 3. Use dynamic programming to compute I(n) efficiently: - * - Instead of generating all possible combinations naively (O(2^n * k^n)), we store only - * unique interpretations, reducing complexity to roughly O(k^n), where k is the number of - * unique token types. - * - Compute I(n) iteratively in increasing order of substring length: - * - Compute T(0,1), then I(1) - * - Compute T(0,2), T(1,2), then I(2) - * - Compute T(0,3), T(1,3), T(2,3), then I(3) - * - ... - * - Compute T(0,n), ..., T(n-1,n), then I(n) - * - * @param lexer The lexer used to determine variable types and delimiters. - * @return A set of `QueryInterpretation` representing all valid multi-token interpretations of - * the full query string. - */ - [[nodiscard]] auto get_all_multi_token_interpretations(lexers::ByteLexer const& lexer) const -> std::set; + /** + * Generates all multi-token interpretations of the n-length query string (single-token + * interpretations of the query string belong to this set): + * + * 1. Interpret each substring [a,b) as a single token (1-length interpretation). + * - Denote T(a,b) to be the set of all valid single-token interpretations of substring [a,b). + * + * - Substrings adjacent to greedy wildcards must be interpreted as if they include them. + * - Example: query "a*b" is equivalent to "a***b". For a lexer with a `hasNum` variable + * type ("\w*\d*\w*"), without extensions, the only interpretations would be: + * {(a*b)}, + * {(a*) (b)}, + * {(a) (*b)}. + * However, a string like "a1 abc 1b" is also matched by "a*b", and requires the + * interpretation {(a*) (*) (*b)}. Extension ensures such cases + * are captured. + * - Note: isolated greedy wildcard (`*`) are never extended as the `Query` collapses + * repeated greedy wildcards. + * - Note: non-greedy wildcards (`?`) are not extended as "a?b" is not equivalent to "a??b". + * + * - Substrings that begin or end with a wildcard are skipped as they are redundant. + * - Example: in "a*b", substring (0,1] extends to "a*", therefore substring (0,2] "a*" is + * redundant. In other words, a decomposition like "a*" + "b" is a subset of the more + * general "a*" + "*" + "*b". + * + * 2. Let I(a) be the set of all multi-length interpretations of substring [0,a). + * - We can compute I(a) recursively using previously computed sets: + * + * I(a) = T(0,a) + * U (I(1) x T(1,a)) + * U (I(2) x T(2,a)) + * ... + * U (I(a-1) x T(a-1,a)) + * + * where x denotes the cross product: all combinations of prefix interpretations from I(i) + * and suffix interpretations from T(i,a). + * + * 3. Use dynamic programming to compute I(n) efficiently: + * - Instead of generating all possible combinations naively (O(2^n * k^n)), we store only + * unique interpretations, reducing complexity to roughly O(k^n), where k is the number of + * unique token types. + * - Compute I(n) iteratively in increasing order of substring length: + * - Compute T(0,1), then I(1) + * - Compute T(0,2), T(1,2), then I(2) + * - Compute T(0,3), T(1,3), T(2,3), then I(3) + * - ... + * - Compute T(0,n), ..., T(n-1,n), then I(n) + * + * @param lexer The lexer used to determine variable types and delimiters. + * @return A set of `QueryInterpretation` representing all valid multi-token interpretations of + * the full query string. + */ + [[nodiscard]] auto get_all_multi_token_interpretations(lexers::ByteLexer const& lexer) const + -> std::set; private: - /** - * Generates all single-token interpretations for a given expression view matching a given lexer. - * - * A single-token interpretation can be one of: - * - A static token (literal text). - * - A variable token (e.g., int, float, hasNumber) as defined by the lexer's schema. Each - * unique variable types is considered a distinct interpretation. - * - * Rules: - * - If the substring is malformed (has hanging escape characters): - * - There are no valid interpretations. - * - Else if the substring: - * - Is an isolated greedy wildcard, `*, or - * - Is not surrounded by delimiters or wildcards (lexer won't consider it a variable), or - * - Does not match any variable. - * - Then: - * - The only interpretation is a static token. - * - Else, if the substring contains a wildcard: - * - The interpretations include a static token, plus a variable token for each matching type. - * - Else: - * - The only interpretation is the variable token corresponding to the highest priority match. - * - * @param expression_view The view of the substring to interpret. - * @param lexer The lexer used to determine variable types and delimiters. - * @return A vector of `Queryinterpretation` objects representing all valid single-token - * interpretations for the given substring. - */ - [[nodiscard]] static auto get_all_single_token_interpretations(ExpressionView const& expression_view, lexers::ByteLexer const& lexer) -> std::vector; + /** + * Generates all single-token interpretations for a given expression view matching a given lexer. + * + * A single-token interpretation can be one of: + * - A static token (literal text). + * - A variable token (e.g., int, float, hasNumber) as defined by the lexer's schema. Each + * unique variable types is considered a distinct interpretation. + * + * Rules: + * - If the substring is malformed (has hanging escape characters): + * - There are no valid interpretations. + * - Else if the substring: + * - Is an isolated greedy wildcard, `*, or + * - Is not surrounded by delimiters or wildcards (lexer won't consider it a variable), or + * - Does not match any variable. + * - Then: + * - The only interpretation is a static token. + * - Else, if the substring contains a wildcard: + * - The interpretations include a static token, plus a variable token for each matching type. + * - Else: + * - The only interpretation is the variable token corresponding to the highest priority match. + * + * @param expression_view The view of the substring to interpret. + * @param lexer The lexer used to determine variable types and delimiters. + * @return A vector of `Queryinterpretation` objects representing all valid single-token + * interpretations for the given substring. + */ + [[nodiscard]] static auto get_all_single_token_interpretations( + ExpressionView const& expression_view, + lexers::ByteLexer const& lexer + ) -> std::vector; /** * Determines the set of variable types matched by the lexer for all strings generated from the @@ -110,7 +114,9 @@ class Query { * @param lexer The lexer whose DFA is used for matching. * @return The set of all matching variable type IDs. */ - [[nodiscard]] static auto get_matching_variable_types(std::string const& regex_string, lexers::ByteLexer const& lexer) -> std::set; + [[nodiscard]] static auto + get_matching_variable_types(std::string const& regex_string, lexers::ByteLexer const& lexer) + -> std::set; std::string m_query_string; }; From 502ade7ef5dd5e454d4104fcbf5ce9f72d77e020 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Sun, 24 Aug 2025 15:28:33 -0400 Subject: [PATCH 113/168] Format again. --- src/log_surgeon/wildcard_query_parser/Query.cpp | 4 ++-- src/log_surgeon/wildcard_query_parser/Query.hpp | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp index 5483bc71..82b2874d 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.cpp +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -34,8 +34,8 @@ Query::Query(string const& query_string) { string unhandled_wildcard_sequence; bool unhandled_wildcard_sequence_contains_greedy_wildcard{false}; for (auto c : expression.get_chars()) { - if (false == unhandled_wildcard_sequence.empty() && false == c.is_greedy_wildcard() && - false == c.is_non_greedy_wildcard()) { + if (false == unhandled_wildcard_sequence.empty() && false == c.is_greedy_wildcard() + && false == c.is_non_greedy_wildcard()) { if (unhandled_wildcard_sequence_contains_greedy_wildcard) { m_query_string.push_back('*'); } else { diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index f8ebad39..fa6f0041 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -21,7 +21,8 @@ class Query { * interpretations of the query string belong to this set): * * 1. Interpret each substring [a,b) as a single token (1-length interpretation). - * - Denote T(a,b) to be the set of all valid single-token interpretations of substring [a,b). + * - Denote T(a,b) to be the set of all valid single-token interpretations of substring + * [a,b). * * - Substrings adjacent to greedy wildcards must be interpreted as if they include them. * - Example: query "a*b" is equivalent to "a***b". For a lexer with a `hasNum` variable @@ -34,7 +35,8 @@ class Query { * are captured. * - Note: isolated greedy wildcard (`*`) are never extended as the `Query` collapses * repeated greedy wildcards. - * - Note: non-greedy wildcards (`?`) are not extended as "a?b" is not equivalent to "a??b". + * - Note: non-greedy wildcards (`?`) are not extended as "a?b" is not equivalent to + * "a??b". * * - Substrings that begin or end with a wildcard are skipped as they are redundant. * - Example: in "a*b", substring (0,1] extends to "a*", therefore substring (0,2] "a*" is @@ -73,7 +75,8 @@ class Query { private: /** - * Generates all single-token interpretations for a given expression view matching a given lexer. + * Generates all single-token interpretations for a given expression view matching a given + * lexer. * * A single-token interpretation can be one of: * - A static token (literal text). @@ -92,7 +95,8 @@ class Query { * - Else, if the substring contains a wildcard: * - The interpretations include a static token, plus a variable token for each matching type. * - Else: - * - The only interpretation is the variable token corresponding to the highest priority match. + * - The only interpretation is the variable token corresponding to the highest priority + * match. * * @param expression_view The view of the substring to interpret. * @param lexer The lexer used to determine variable types and delimiters. From 3e88c662f5dc3c60dcba078e46bb7ceabca495d0 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Sun, 24 Aug 2025 15:36:22 -0400 Subject: [PATCH 114/168] Format again again. --- src/log_surgeon/wildcard_query_parser/Query.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp index 82b2874d..33b5fc6b 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.cpp +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -35,7 +35,8 @@ Query::Query(string const& query_string) { bool unhandled_wildcard_sequence_contains_greedy_wildcard{false}; for (auto c : expression.get_chars()) { if (false == unhandled_wildcard_sequence.empty() && false == c.is_greedy_wildcard() - && false == c.is_non_greedy_wildcard()) { + && false == c.is_non_greedy_wildcard()) + { if (unhandled_wildcard_sequence_contains_greedy_wildcard) { m_query_string.push_back('*'); } else { From cf405e549a188b4a53d530382151d2f2dfb10954 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Sun, 24 Aug 2025 15:49:44 -0400 Subject: [PATCH 115/168] Tidy. --- .../wildcard_query_parser/ExpressionCharacter.hpp | 2 +- src/log_surgeon/wildcard_query_parser/ExpressionView.hpp | 2 ++ src/log_surgeon/wildcard_query_parser/Query.cpp | 9 +++++++-- src/log_surgeon/wildcard_query_parser/Query.hpp | 1 - 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp index e1fd2792..a6bc8cd7 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp @@ -27,7 +27,7 @@ class ExpressionCharacter { } [[nodiscard]] auto is_delim(std::array const& delim_table) const -> bool { - return delim_table[m_value]; + return delim_table.at(m_value); } [[nodiscard]] auto is_delim_or_wildcard(std::array const& delim_table) const diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp index 026141d9..6a582b37 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp @@ -1,12 +1,14 @@ #ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_VIEW_HPP #define LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_VIEW_HPP +#include #include #include #include #include #include +#include #include #include diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp index 33b5fc6b..cc0d3800 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.cpp +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -1,8 +1,11 @@ #include "Query.hpp" +#include #include +#include #include #include +#include #include #include @@ -11,7 +14,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -77,7 +82,7 @@ auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const for (size_t end_idx = 1; end_idx <= expression.length(); ++end_idx) { for (size_t begin_idx = 0; begin_idx < end_idx; ++begin_idx) { - ExpressionView expression_view{expression, begin_idx, end_idx}; + ExpressionView const expression_view{expression, begin_idx, end_idx}; if (expression_view.starts_or_ends_with_greedy_wildcard()) { continue; } @@ -97,7 +102,7 @@ auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const ); } else { for (auto const& prefix : query_interpretations[begin_idx - 1]) { - for (auto& suffix : single_token_interpretations) { + for (auto const& suffix : single_token_interpretations) { QueryInterpretation combined{prefix}; combined.append_query_interpretation(suffix); query_interpretations[end_idx - 1].insert(std::move(combined)); diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index fa6f0041..94a40916 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -7,7 +7,6 @@ #include #include -#include #include #include From f7729cfaafd291a91a8d572fc83e27e1c4dcadc9 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 25 Aug 2025 10:34:44 -0400 Subject: [PATCH 116/168] Add unit tests. --- docs/doxygen/mainpage.dox | 1 + .../ExpressionCharacter.hpp | 6 +- .../wildcard_query_parser/ExpressionView.cpp | 20 ++-- .../wildcard_query_parser/ExpressionView.hpp | 2 + .../wildcard_query_parser/Query.cpp | 11 +- .../wildcard_query_parser/Query.hpp | 9 +- tests/CMakeLists.txt | 1 + tests/test-query.cpp | 104 ++++++++++++++++++ 8 files changed, 139 insertions(+), 15 deletions(-) create mode 100644 tests/test-query.cpp diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox index ab640c7f..329e4875 100644 --- a/docs/doxygen/mainpage.dox +++ b/docs/doxygen/mainpage.dox @@ -17,6 +17,7 @@ * - @ref unit_tests_expression_view "Expression View" * - @ref unit_tests_nfa "NFA" * - @ref unit_tests_prefix_tree "Prefix tree" + * - @ref unit_tests_query "Query" * - @ref unit_tests_query_interpretation "Query Interpretation" * - @ref unit_tests_regex_ast "Regex AST" * - @ref unit_tests_register_handler "Register handler" diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp index a6bc8cd7..428b8820 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp @@ -26,13 +26,17 @@ class ExpressionCharacter { return Type::NonGreedyWildcard == m_type; } + [[nodiscard]] auto is_wildcard() const -> bool { + return Type::GreedyWildcard == m_type || Type::NonGreedyWildcard == m_type; + } + [[nodiscard]] auto is_delim(std::array const& delim_table) const -> bool { return delim_table.at(m_value); } [[nodiscard]] auto is_delim_or_wildcard(std::array const& delim_table) const -> bool { - return is_greedy_wildcard() || is_non_greedy_wildcard() || is_delim(delim_table); + return is_delim(delim_table) || is_wildcard(); } [[nodiscard]] auto is_escape() const -> bool { return Type::Escape == m_type; } diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index ee4ea75a..95e87a82 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -49,28 +49,32 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const ) const -> bool { auto const [begin_idx, end_idx]{get_indices()}; - bool has_preceding{false}; + bool has_left_boundary{false}; if (0 == begin_idx) { - has_preceding = true; + has_left_boundary = true; } else { auto const& preceding_char{m_expression->get_chars()[begin_idx - 1]}; - has_preceding = preceding_char.is_delim_or_wildcard(delim_table); + auto const& first_char{m_chars[0]}; + has_left_boundary = preceding_char.is_delim_or_wildcard(delim_table) + || first_char.is_greedy_wildcard(); } - bool has_succeeding{false}; + bool has_right_boundary{false}; if (m_expression->length() == end_idx) { - has_succeeding = true; + has_right_boundary = true; } else { auto const& succeeding_char{m_expression->get_chars()[end_idx]}; if (succeeding_char.is_escape()) { auto const& logical_succeeding_char{m_expression->get_chars()[end_idx + 1]}; - has_succeeding = logical_succeeding_char.is_delim_or_wildcard(delim_table); + has_right_boundary = logical_succeeding_char.is_delim_or_wildcard(delim_table); } else { - has_succeeding = succeeding_char.is_delim_or_wildcard(delim_table); + has_right_boundary = succeeding_char.is_delim_or_wildcard(delim_table); } + auto const& last_char{m_chars.back()}; + has_right_boundary = has_right_boundary || last_char.is_greedy_wildcard(); } - return has_preceding && has_succeeding; + return has_left_boundary && has_right_boundary; } auto ExpressionView::is_well_formed() const -> bool { diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp index 6a582b37..69758a62 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp @@ -51,10 +51,12 @@ class ExpressionView { * * Left boundary: * - The view is at the start of the expression, or + * - The first character is a greedy wildcard, or * - The character immediately left of the view is a delimiter or wildcard. * * Right boundary: * - The view is at the end of the expression, or + * - The last character is a greedy wildcard, or * - The character immediately right of the view is a delimiter or wildcard, or * - The character immediately right of the view is an escape character and the character to its * immediate right is a delimiter or wildcard. diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp index cc0d3800..e552e3ff 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.cpp +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -39,9 +39,7 @@ Query::Query(string const& query_string) { string unhandled_wildcard_sequence; bool unhandled_wildcard_sequence_contains_greedy_wildcard{false}; for (auto c : expression.get_chars()) { - if (false == unhandled_wildcard_sequence.empty() && false == c.is_greedy_wildcard() - && false == c.is_non_greedy_wildcard()) - { + if (false == unhandled_wildcard_sequence.empty() && false == c.is_wildcard()) { if (unhandled_wildcard_sequence_contains_greedy_wildcard) { m_query_string.push_back('*'); } else { @@ -80,10 +78,15 @@ auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const Expression const expression{m_query_string}; vector> query_interpretations(expression.length()); + if (m_query_string.empty()) { + return {}; + } + for (size_t end_idx = 1; end_idx <= expression.length(); ++end_idx) { for (size_t begin_idx = 0; begin_idx < end_idx; ++begin_idx) { ExpressionView const expression_view{expression, begin_idx, end_idx}; - if (expression_view.starts_or_ends_with_greedy_wildcard()) { + if ("*" != expression_view.get_search_string() + && expression_view.starts_or_ends_with_greedy_wildcard()) { continue; } diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 94a40916..59b8f336 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -25,7 +25,7 @@ class Query { * * - Substrings adjacent to greedy wildcards must be interpreted as if they include them. * - Example: query "a*b" is equivalent to "a***b". For a lexer with a `hasNum` variable - * type ("\w*\d*\w*"), without extensions, the only interpretations would be: + * type ("\w*\d+\w*"), without extensions, the only interpretations would be: * {(a*b)}, * {(a*) (b)}, * {(a) (*b)}. @@ -40,7 +40,8 @@ class Query { * - Substrings that begin or end with a wildcard are skipped as they are redundant. * - Example: in "a*b", substring (0,1] extends to "a*", therefore substring (0,2] "a*" is * redundant. In other words, a decomposition like "a*" + "b" is a subset of the more - * general "a*" + "*" + "*b". + * general "a*" + "*" + "*b". However, an isolated "*" must not be skipped as it is not + * captured by any other substring extension. * * 2. Let I(a) be the set of all multi-length interpretations of substring [0,a). * - We can compute I(a) recursively using previously computed sets: @@ -72,6 +73,10 @@ class Query { [[nodiscard]] auto get_all_multi_token_interpretations(lexers::ByteLexer const& lexer) const -> std::set; + [[nodiscard]] auto get_processed_query_string() const -> std::string { + return m_query_string; + } + private: /** * Generates all single-token interpretations for a given expression view matching a given diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2838d8a4..2121195d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -11,6 +11,7 @@ target_sources( test-expression-view.cpp test-nfa.cpp test-prefix-tree.cpp + test-query.cpp test-query-interpretation.cpp test-regex-ast.cpp test-register-handler.cpp diff --git a/tests/test-query.cpp b/tests/test-query.cpp new file mode 100644 index 00000000..84c069df --- /dev/null +++ b/tests/test-query.cpp @@ -0,0 +1,104 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +/** + * @defgroup unit_tests_query `Query` unit tests. + * @brief Unit tests for `Query` construction, mutation, and comparison. + + * These unit tests contain the `Query` tag. + */ + +using std::set; +using std::string; +using std::string_view; +using std::vector; + +using log_surgeon::BufferParser; +using log_surgeon::Schema; +using log_surgeon::wildcard_query_parser::Query; + +namespace { +/** + * Creates a query from the given query string and tests that its processed query string and + * interpretations matche the expeced values. + * + * @param raw_query_string The search query. + * @param expected_processed_query_string The processed search query. + * @param expected_serialized_interpretations The expected set of serialized interpretations. + */ +auto test_query( + string_view raw_query_string, + string_view expected_processed_query_string, + set const& expected_serialized_interpretations +) -> void; + +/** + * Initializes a `BufferParser` with delimiters "\n\r\[:" and variable "myVar:userID=(?123)". + * + * @result The initialized `BufferParser`. + */ +auto make_test_buffer() -> BufferParser; + +auto test_query( + string_view raw_query_string, + string_view expected_processed_query_string, + set const& expected_serialized_interpretations +) -> void { + auto const& buffer_parser{make_test_buffer()}; + auto const& lexer{buffer_parser.get_log_parser().m_lexer}; + + Query const query{string(raw_query_string)}; + REQUIRE(expected_processed_query_string == query.get_processed_query_string()); + + auto const interpretations{query.get_all_multi_token_interpretations(lexer)}; + set serialized_interpretations; + for (auto const& interpretation : interpretations) { + serialized_interpretations.insert(interpretation.serialize()); + } + + CAPTURE(expected_serialized_interpretations); + CAPTURE(serialized_interpretations); + REQUIRE(expected_serialized_interpretations.size() == serialized_interpretations.size()); + REQUIRE(expected_serialized_interpretations == serialized_interpretations); +} + +auto make_test_buffer() -> BufferParser { + Schema schema; + schema.add_delimiters(R"(delimiters: \n\r\[:,)"); + schema.add_variable(R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)", -1); + return BufferParser(std::move(schema.release_schema_ast_ptr())); +} +} // namespace + +/** + * @ingroup unit_tests_query + * @brief Creates and tests an empty `Query`. + */ +TEST_CASE("empty_query", "[Query]") { + constexpr string_view raw_query_string{""}; + constexpr string_view processed_query_string{""}; + set const expected_serialized_interpretations; + + test_query(raw_query_string, processed_query_string, expected_serialized_interpretations); +} + +/** + * @ingroup unit_tests_query + * @brief Creates and tests a greedy wildcard `Query`. + */ +TEST_CASE("greedy_wildcard_query", "[Query]") { + constexpr string_view raw_query_string{"*"}; + constexpr string_view processed_query_string{"*"}; + set const expected_serialized_interpretations{"logtype='*', contains_wildcard='0'"}; + + test_query(raw_query_string, processed_query_string, expected_serialized_interpretations); +} From ed29b6dbf2de65e32b245b8dbaec00934b4e3c64 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 25 Aug 2025 10:45:40 -0400 Subject: [PATCH 117/168] Format. --- src/log_surgeon/wildcard_query_parser/ExpressionView.cpp | 2 +- src/log_surgeon/wildcard_query_parser/Query.cpp | 3 ++- src/log_surgeon/wildcard_query_parser/Query.hpp | 4 +--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index 95e87a82..91f64b37 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -56,7 +56,7 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const auto const& preceding_char{m_expression->get_chars()[begin_idx - 1]}; auto const& first_char{m_chars[0]}; has_left_boundary = preceding_char.is_delim_or_wildcard(delim_table) - || first_char.is_greedy_wildcard(); + || first_char.is_greedy_wildcard(); } bool has_right_boundary{false}; diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp index e552e3ff..6954a6d5 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.cpp +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -86,7 +86,8 @@ auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const for (size_t begin_idx = 0; begin_idx < end_idx; ++begin_idx) { ExpressionView const expression_view{expression, begin_idx, end_idx}; if ("*" != expression_view.get_search_string() - && expression_view.starts_or_ends_with_greedy_wildcard()) { + && expression_view.starts_or_ends_with_greedy_wildcard()) + { continue; } diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 59b8f336..5ad261cc 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -73,9 +73,7 @@ class Query { [[nodiscard]] auto get_all_multi_token_interpretations(lexers::ByteLexer const& lexer) const -> std::set; - [[nodiscard]] auto get_processed_query_string() const -> std::string { - return m_query_string; - } + [[nodiscard]] auto get_processed_query_string() const -> std::string { return m_query_string; } private: /** From 2bc577ddd6a5d7b6c7b7e45957bbd90a49826246 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 25 Aug 2025 11:29:43 -0400 Subject: [PATCH 118/168] Tidy. --- tests/test-query.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 84c069df..b7399487 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include @@ -20,7 +20,6 @@ using std::set; using std::string; using std::string_view; -using std::vector; using log_surgeon::BufferParser; using log_surgeon::Schema; @@ -84,11 +83,11 @@ auto make_test_buffer() -> BufferParser { * @brief Creates and tests an empty `Query`. */ TEST_CASE("empty_query", "[Query]") { - constexpr string_view raw_query_string{""}; - constexpr string_view processed_query_string{""}; + constexpr string_view cRawQueryString; + constexpr string_view cProcessedQueryString; set const expected_serialized_interpretations; - test_query(raw_query_string, processed_query_string, expected_serialized_interpretations); + test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); } /** @@ -96,9 +95,9 @@ TEST_CASE("empty_query", "[Query]") { * @brief Creates and tests a greedy wildcard `Query`. */ TEST_CASE("greedy_wildcard_query", "[Query]") { - constexpr string_view raw_query_string{"*"}; - constexpr string_view processed_query_string{"*"}; + constexpr string_view cRawQueryString{"*"}; + constexpr string_view cProcessedQueryString{"*"}; set const expected_serialized_interpretations{"logtype='*', contains_wildcard='0'"}; - test_query(raw_query_string, processed_query_string, expected_serialized_interpretations); + test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); } From cb82695f920f0141447d1c79d33edefb3bf6d554 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 25 Aug 2025 14:17:23 -0400 Subject: [PATCH 119/168] Fixed unit-tests. --- tests/test-query.cpp | 172 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 154 insertions(+), 18 deletions(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index b7399487..324ee213 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -3,11 +3,11 @@ #include #include -#include +#include #include +#include #include -#include #include /** @@ -17,14 +17,14 @@ * These unit tests contain the `Query` tag. */ +using log_surgeon::lexers::ByteLexer; +using log_surgeon::Schema; +using log_surgeon::SchemaVarAST; +using log_surgeon::wildcard_query_parser::Query; using std::set; using std::string; using std::string_view; -using log_surgeon::BufferParser; -using log_surgeon::Schema; -using log_surgeon::wildcard_query_parser::Query; - namespace { /** * Creates a query from the given query string and tests that its processed query string and @@ -41,19 +41,18 @@ auto test_query( ) -> void; /** - * Initializes a `BufferParser` with delimiters "\n\r\[:" and variable "myVar:userID=(?123)". + * Initializes a `ByteLexer` with delimiters "\n\r\[:" and variable "myVar:userID=(?123)". * - * @result The initialized `BufferParser`. + * @result The initialized `ByteLexer`. */ -auto make_test_buffer() -> BufferParser; +auto make_test_lexer() -> ByteLexer; auto test_query( - string_view raw_query_string, - string_view expected_processed_query_string, + string_view const raw_query_string, + string_view const expected_processed_query_string, set const& expected_serialized_interpretations ) -> void { - auto const& buffer_parser{make_test_buffer()}; - auto const& lexer{buffer_parser.get_log_parser().m_lexer}; + auto const& lexer{make_test_lexer()}; Query const query{string(raw_query_string)}; REQUIRE(expected_processed_query_string == query.get_processed_query_string()); @@ -64,17 +63,24 @@ auto test_query( serialized_interpretations.insert(interpretation.serialize()); } - CAPTURE(expected_serialized_interpretations); - CAPTURE(serialized_interpretations); - REQUIRE(expected_serialized_interpretations.size() == serialized_interpretations.size()); REQUIRE(expected_serialized_interpretations == serialized_interpretations); } -auto make_test_buffer() -> BufferParser { +auto make_test_lexer() -> ByteLexer { Schema schema; schema.add_delimiters(R"(delimiters: \n\r\[:,)"); schema.add_variable(R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)", -1); - return BufferParser(std::move(schema.release_schema_ast_ptr())); + + ByteLexer lexer; + lexer.m_symbol_id["hasNumber"] = 0; + lexer.m_id_symbol[0] = "hasNumber"; + + auto const schema_ast = schema.release_schema_ast_ptr(); + auto& capture_rule_ast = dynamic_cast(*schema_ast->m_schema_vars[0]); + lexer.add_rule(lexer.m_symbol_id["hasNumber"], std::move(capture_rule_ast.m_regex_ptr)); + + lexer.generate(); + return lexer; } } // namespace @@ -101,3 +107,133 @@ TEST_CASE("greedy_wildcard_query", "[Query]") { test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); } + +/** + * @ingroup unit_tests_query + * @brief Creates and tests a query with repeated greedy wildcards. + */ +TEST_CASE("repeated_greedy_wildcard_query", "[Query]") { + constexpr string_view cRawQueryString{"a**b"}; + constexpr string_view cProcessedQueryString{"a*b"}; + set const expected_serialized_interpretations{ + {"logtype='a*b', contains_wildcard='0'"}, + {"logtype='a***b', contains_wildcard='0'"}, + {"logtype='<0>(a*)**b', contains_wildcard='10'"}, + {"logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'"}, + {"logtype='<0>(a*b)', contains_wildcard='1'"}, + {"logtype='a**<0>(*b)', contains_wildcard='01'"} + }; + + test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); +} + +/** + * @ingroup unit_tests_query + * @brief Creates and tests a query with a non-greedy wildcard followed by a greedy wildcard. + */ +TEST_CASE("short_wildcard_sequence_query", "[Query]") { + constexpr string_view cRawQueryString{"a?*b"}; + constexpr string_view cProcessedQueryString{"a*b"}; + set const expected_serialized_interpretations{ + {"logtype='a*b', contains_wildcard='0'"}, + {"logtype='a***b', contains_wildcard='0'"}, + {"logtype='<0>(a*)**b', contains_wildcard='10'"}, + {"logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'"}, + {"logtype='<0>(a*b)', contains_wildcard='1'"}, + {"logtype='a**<0>(*b)', contains_wildcard='01'"} + }; + + test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); +} + +/** + * @ingroup unit_tests_query + * @brief Creates and tests a query with a long mixed wildcard sequence. + */ +TEST_CASE("long_mixed_wildcard_sequence_query", "[Query]") { + constexpr string_view cRawQueryString{"a?*?*?*?b"}; + constexpr string_view cProcessedQueryString{"a*b"}; + set const expected_serialized_interpretations{ + {"logtype='a*b', contains_wildcard='0'"}, + {"logtype='a***b', contains_wildcard='0'"}, + {"logtype='<0>(a*)**b', contains_wildcard='10'"}, + {"logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'"}, + {"logtype='<0>(a*b)', contains_wildcard='1'"}, + {"logtype='a**<0>(*b)', contains_wildcard='01'"} + }; + + test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); +} + +/** + * @ingroup unit_tests_query + * @brief Creates and tests a query with a long non-greedy wildcard sequence. + */ +TEST_CASE("long_non_greedy_wildcard_sequence_query", "[Query]") { + constexpr string_view cRawQueryString{"a????b"}; + constexpr string_view cProcessedQueryString{"a????b"}; + set const expected_serialized_interpretations{ + {R"(logtype='a????b', contains_wildcard='0')"}, + + {R"(logtype='<0>(a?)???b', contains_wildcard='10')"}, + {R"(logtype='<0>(a??)??b', contains_wildcard='10')"}, + {R"(logtype='<0>(a???)?b', contains_wildcard='10')"}, + {R"(logtype='<0>(a????b)', contains_wildcard='1')"}, + + {R"(logtype='a?<0>(?)??b', contains_wildcard='010')"}, + {R"(logtype='a?<0>(??)?b', contains_wildcard='010')"}, + {R"(logtype='a?<0>(???b)', contains_wildcard='01')"}, + {R"(logtype='a?<0>(?)?<0>(?b)', contains_wildcard='0101')"}, + + {R"(logtype='a??<0>(?)?b', contains_wildcard='010')"}, + {R"(logtype='a??<0>(??b)', contains_wildcard='01')"}, + + {R"(logtype='a???<0>(?b)', contains_wildcard='01')"}, + + {R"(logtype='<0>(a?)?<0>(?)?b', contains_wildcard='1010')"}, + {R"(logtype='<0>(a?)?<0>(??b)', contains_wildcard='101')"}, + {R"(logtype='<0>(a?)??<0>(?b)', contains_wildcard='101')"}, + + {R"(logtype='<0>(a??)?<0>(?b)', contains_wildcard='101')"}, + + // Double dipping on delimiters + {R"(logtype='<0>(a?)<0>(?)??b', contains_wildcard='110')"}, + {R"(logtype='<0>(a?)<0>(??)?b', contains_wildcard='110')"}, + {R"(logtype='<0>(a?)<0>(???b)', contains_wildcard='11')"}, + {R"(logtype='<0>(a?)<0>(?)?<0>(?b)', contains_wildcard='1101')"}, + {R"(logtype='<0>(a?)?<0>(?)<0>(?b)', contains_wildcard='1011')"}, + + {R"(logtype='<0>(a??)<0>(?)?b', contains_wildcard='110')"}, + {R"(logtype='<0>(a??)<0>(??b)', contains_wildcard='11')"}, + + {R"(logtype='<0>(a???)<0>(?b)', contains_wildcard='11')"}, + + {R"(logtype='a?<0>(?)<0>(?)?b', contains_wildcard='0110')"}, + {R"(logtype='a?<0>(?)<0>(??b)', contains_wildcard='011')"}, + + {R"(logtype='a?<0>(??)<0>(?b)', contains_wildcard='011')"}, + {R"(logtype='a??<0>(?)<0>(?b)', contains_wildcard='011')"}, + + {R"(logtype='<0>(a?)<0>(?)<0>(?)?b', contains_wildcard='1110')"}, + {R"(logtype='<0>(a?)<0>(?)<0>(??b)', contains_wildcard='111')"}, + {R"(logtype='<0>(a?)<0>(??)<0>(?b)', contains_wildcard='111')"}, + {R"(logtype='<0>(a??)<0>(?)<0>(?b)', contains_wildcard='111')"}, + {R"(logtype='a?<0>(?)<0>(?)<0>(?b)', contains_wildcard='0111')"}, + + {R"(logtype='<0>(a?)<0>(?)<0>(?)<0>(?b)', contains_wildcard='1111')"} + }; + + test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); +} + +/** + * @ingroup unit_tests_query + * @brief Creates and tests a query with an escaped '*' character. + */ +TEST_CASE("escaped_star_query", "[Query]") { + constexpr string_view cRawQueryString{"*"}; + constexpr string_view cProcessedQueryString{"*"}; + set const expected_serialized_interpretations{"logtype='*', contains_wildcard='0'"}; + + test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); +} From 9d8724829673665ee8455402de2725ccf6206e6a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 25 Aug 2025 14:19:19 -0400 Subject: [PATCH 120/168] Fix typo. --- tests/test-query.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 324ee213..729e0a63 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -43,7 +43,7 @@ auto test_query( /** * Initializes a `ByteLexer` with delimiters "\n\r\[:" and variable "myVar:userID=(?123)". * - * @result The initialized `ByteLexer`. + * @return The initialized `ByteLexer`. */ auto make_test_lexer() -> ByteLexer; From ad58be5c20c52f423399a2764a6502238d9e616c Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 25 Aug 2025 14:20:10 -0400 Subject: [PATCH 121/168] Fix typo. --- tests/test-query.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 729e0a63..068e6c6c 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -28,7 +28,7 @@ using std::string_view; namespace { /** * Creates a query from the given query string and tests that its processed query string and - * interpretations matche the expeced values. + * interpretations match the expeced values. * * @param raw_query_string The search query. * @param expected_processed_query_string The processed search query. From fbe4e1689a330d222f720f7a5e0c5d6741859d82 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 25 Aug 2025 14:20:26 -0400 Subject: [PATCH 122/168] Fix typo. --- tests/test-query.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 068e6c6c..e04dd9ed 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -28,7 +28,7 @@ using std::string_view; namespace { /** * Creates a query from the given query string and tests that its processed query string and - * interpretations match the expeced values. + * interpretations match the expected values. * * @param raw_query_string The search query. * @param expected_processed_query_string The processed search query. From bb799d910c1c2c24156fd9ba2377adbede0344ee Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 25 Aug 2025 14:26:02 -0400 Subject: [PATCH 123/168] Fix docstring. --- tests/test-query.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index e04dd9ed..f28b3ee7 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -12,7 +12,7 @@ /** * @defgroup unit_tests_query `Query` unit tests. - * @brief Unit tests for `Query` construction, mutation, and comparison. + * @brief Unit tests for `Query` construction and interpretation. * These unit tests contain the `Query` tag. */ From 468ab3106ee95a60c4cdec980787c8a5b6edea74 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 25 Aug 2025 14:29:33 -0400 Subject: [PATCH 124/168] Fix typos. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 5ad261cc..8eea699d 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -83,13 +83,13 @@ class Query { * A single-token interpretation can be one of: * - A static token (literal text). * - A variable token (e.g., int, float, hasNumber) as defined by the lexer's schema. Each - * unique variable types is considered a distinct interpretation. + * unique variable type is considered a distinct interpretation. * * Rules: * - If the substring is malformed (has hanging escape characters): * - There are no valid interpretations. * - Else if the substring: - * - Is an isolated greedy wildcard, `*, or + * - Is an isolated greedy wildcard, "*", or * - Is not surrounded by delimiters or wildcards (lexer won't consider it a variable), or * - Does not match any variable. * - Then: @@ -102,7 +102,7 @@ class Query { * * @param expression_view The view of the substring to interpret. * @param lexer The lexer used to determine variable types and delimiters. - * @return A vector of `Queryinterpretation` objects representing all valid single-token + * @return A vector of `QueryInterpretation` objects representing all valid single-token * interpretations for the given substring. */ [[nodiscard]] static auto get_all_single_token_interpretations( From af30e98a6b7812bc713a3c984369a813ec945456 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 25 Aug 2025 16:41:17 -0400 Subject: [PATCH 125/168] Fix docstring. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 8eea699d..0a7fba9a 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -37,7 +37,7 @@ class Query { * - Note: non-greedy wildcards (`?`) are not extended as "a?b" is not equivalent to * "a??b". * - * - Substrings that begin or end with a wildcard are skipped as they are redundant. + * - Substrings that begin or end with a greedy wildcard are skipped as they are redundant. * - Example: in "a*b", substring (0,1] extends to "a*", therefore substring (0,2] "a*" is * redundant. In other words, a decomposition like "a*" + "b" is a subset of the more * general "a*" + "*" + "*b". However, an isolated "*" must not be skipped as it is not From da8337760b2321139beee030206e0c8dcf2b200d Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Mon, 25 Aug 2025 16:52:56 -0400 Subject: [PATCH 126/168] Retype to unsigned char. --- .../wildcard_query_parser/ExpressionCharacter.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp index 428b8820..15557bc3 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp @@ -16,9 +16,11 @@ class ExpressionCharacter { Escape }; - ExpressionCharacter(char const value, Type const type) : m_value{value}, m_type{type} {} + ExpressionCharacter(unsigned char const value, Type const type) + : m_value{value}, + m_type{type} {} - [[nodiscard]] auto value() const -> char { return m_value; } + [[nodiscard]] auto value() const -> unsigned char { return m_value; } [[nodiscard]] auto is_greedy_wildcard() const -> bool { return Type::GreedyWildcard == m_type; } @@ -42,7 +44,7 @@ class ExpressionCharacter { [[nodiscard]] auto is_escape() const -> bool { return Type::Escape == m_type; } private: - char m_value; + unsigned char m_value; Type m_type; }; } // namespace log_surgeon::wildcard_query_parser From b4dc1e9051be49d7348fd06f87e74cb1ddab1765 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 05:27:23 -0400 Subject: [PATCH 127/168] Fix UB. --- src/log_surgeon/wildcard_query_parser/ExpressionView.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index 91f64b37..b90386c1 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -44,7 +44,7 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const return {is_extended, wildcard_expression_view}; } -[[nodiscard]] auto ExpressionView::is_surrounded_by_delims_or_wildcards( +auto ExpressionView::is_surrounded_by_delims_or_wildcards( std::array const& delim_table ) const -> bool { auto const [begin_idx, end_idx]{get_indices()}; @@ -54,9 +54,8 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const has_left_boundary = true; } else { auto const& preceding_char{m_expression->get_chars()[begin_idx - 1]}; - auto const& first_char{m_chars[0]}; has_left_boundary = preceding_char.is_delim_or_wildcard(delim_table) - || first_char.is_greedy_wildcard(); + || false == m_chars.empty() && m_chars[0].is_greedy_wildcard(); } bool has_right_boundary{false}; @@ -70,8 +69,8 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const } else { has_right_boundary = succeeding_char.is_delim_or_wildcard(delim_table); } - auto const& last_char{m_chars.back()}; - has_right_boundary = has_right_boundary || last_char.is_greedy_wildcard(); + has_right_boundary = has_right_boundary + || false == m_chars.empty() && m_chars.back().is_greedy_wildcard(); } return has_left_boundary && has_right_boundary; From 07f82d1d4afc41abc0a1a441f683082f651566d4 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 05:33:08 -0400 Subject: [PATCH 128/168] Reserve query string size. --- src/log_surgeon/wildcard_query_parser/Query.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp index 6954a6d5..bea4f38a 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.cpp +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -34,7 +34,9 @@ using ByteNfa = log_surgeon::finite_automata::Nfa; namespace log_surgeon::wildcard_query_parser { Query::Query(string const& query_string) { + m_query_string.reserve(query_string.size()); Expression const expression(query_string); + bool prev_is_escape{false}; string unhandled_wildcard_sequence; bool unhandled_wildcard_sequence_contains_greedy_wildcard{false}; From 9929151fc849d26c075b1f19859ef23e3a105b76 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 05:44:26 -0400 Subject: [PATCH 129/168] Remove complexity claims. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 0a7fba9a..746b409f 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -56,9 +56,8 @@ class Query { * and suffix interpretations from T(i,a). * * 3. Use dynamic programming to compute I(n) efficiently: - * - Instead of generating all possible combinations naively (O(2^n * k^n)), we store only - * unique interpretations, reducing complexity to roughly O(k^n), where k is the number of - * unique token types. + * - Instead of generating all possible combinations naively, we store only unique + * interpretations by recurisvely building up the combinations as shown below. * - Compute I(n) iteratively in increasing order of substring length: * - Compute T(0,1), then I(1) * - Compute T(0,2), T(1,2), then I(2) From bf678eca33c5886d67938fb156c82d36d5f667df Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 05:46:59 -0400 Subject: [PATCH 130/168] Return const reference to avoid copy. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 746b409f..69324800 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -72,7 +72,9 @@ class Query { [[nodiscard]] auto get_all_multi_token_interpretations(lexers::ByteLexer const& lexer) const -> std::set; - [[nodiscard]] auto get_processed_query_string() const -> std::string { return m_query_string; } + [[nodiscard]] auto get_processed_query_string() const -> std::string const& { + return m_query_string; + } private: /** From 270330ba028c602280d8a3bc333a36cc9400027f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 05:52:13 -0400 Subject: [PATCH 131/168] Fix docstring. --- tests/test-query.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index f28b3ee7..5fe93a2d 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -41,7 +41,8 @@ auto test_query( ) -> void; /** - * Initializes a `ByteLexer` with delimiters "\n\r\[:" and variable "myVar:userID=(?123)". + * Initializes a `ByteLexer` with delimiters "\n\r\[:" and variable + * "hasNumber:[A-Za-z]*\d+[A-Za-z]*". * * @return The initialized `ByteLexer`. */ From 9addd94ed9b8b258f4d44490e7d231fd439456ba Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 05:54:58 -0400 Subject: [PATCH 132/168] Remove accidental reference. --- tests/test-query.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 5fe93a2d..4297c5c6 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -53,7 +53,7 @@ auto test_query( string_view const expected_processed_query_string, set const& expected_serialized_interpretations ) -> void { - auto const& lexer{make_test_lexer()}; + auto const lexer{make_test_lexer()}; Query const query{string(raw_query_string)}; REQUIRE(expected_processed_query_string == query.get_processed_query_string()); From aab46596093481436f4d2b99feb224ceaa129f0f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 05:57:59 -0400 Subject: [PATCH 133/168] Add some checks that enforce test schema changes to be followed through. --- tests/test-query.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 4297c5c6..8e2c1931 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -77,6 +77,9 @@ auto make_test_lexer() -> ByteLexer { lexer.m_id_symbol[0] = "hasNumber"; auto const schema_ast = schema.release_schema_ast_ptr(); + REQUIRE(nullptr != schema_ast); + REQUIRE(1 == schema_ast->m_schema_vars.size()); + REQUIRE(nullptr != schema_ast->m_schema_vars[0]); auto& capture_rule_ast = dynamic_cast(*schema_ast->m_schema_vars[0]); lexer.add_rule(lexer.m_symbol_id["hasNumber"], std::move(capture_rule_ast.m_regex_ptr)); From 8349c76e9f109d8c1501ba65acb708598c04af54 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 06:01:34 -0400 Subject: [PATCH 134/168] Remove unneeded braces in set initialization. --- tests/test-query.cpp | 104 +++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 8e2c1931..d0fd3977 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -120,12 +120,12 @@ TEST_CASE("repeated_greedy_wildcard_query", "[Query]") { constexpr string_view cRawQueryString{"a**b"}; constexpr string_view cProcessedQueryString{"a*b"}; set const expected_serialized_interpretations{ - {"logtype='a*b', contains_wildcard='0'"}, - {"logtype='a***b', contains_wildcard='0'"}, - {"logtype='<0>(a*)**b', contains_wildcard='10'"}, - {"logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'"}, - {"logtype='<0>(a*b)', contains_wildcard='1'"}, - {"logtype='a**<0>(*b)', contains_wildcard='01'"} + "logtype='a*b', contains_wildcard='0'", + "logtype='a***b', contains_wildcard='0'", + "logtype='<0>(a*)**b', contains_wildcard='10'", + "logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'", + "logtype='<0>(a*b)', contains_wildcard='1'", + "logtype='a**<0>(*b)', contains_wildcard='01'" }; test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); @@ -139,12 +139,12 @@ TEST_CASE("short_wildcard_sequence_query", "[Query]") { constexpr string_view cRawQueryString{"a?*b"}; constexpr string_view cProcessedQueryString{"a*b"}; set const expected_serialized_interpretations{ - {"logtype='a*b', contains_wildcard='0'"}, - {"logtype='a***b', contains_wildcard='0'"}, - {"logtype='<0>(a*)**b', contains_wildcard='10'"}, - {"logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'"}, - {"logtype='<0>(a*b)', contains_wildcard='1'"}, - {"logtype='a**<0>(*b)', contains_wildcard='01'"} + "logtype='a*b', contains_wildcard='0'", + "logtype='a***b', contains_wildcard='0'", + "logtype='<0>(a*)**b', contains_wildcard='10'", + "logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'", + "logtype='<0>(a*b)', contains_wildcard='1'", + "logtype='a**<0>(*b)', contains_wildcard='01'" }; test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); @@ -158,12 +158,12 @@ TEST_CASE("long_mixed_wildcard_sequence_query", "[Query]") { constexpr string_view cRawQueryString{"a?*?*?*?b"}; constexpr string_view cProcessedQueryString{"a*b"}; set const expected_serialized_interpretations{ - {"logtype='a*b', contains_wildcard='0'"}, - {"logtype='a***b', contains_wildcard='0'"}, - {"logtype='<0>(a*)**b', contains_wildcard='10'"}, - {"logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'"}, - {"logtype='<0>(a*b)', contains_wildcard='1'"}, - {"logtype='a**<0>(*b)', contains_wildcard='01'"} + "logtype='a*b', contains_wildcard='0'", + "logtype='a***b', contains_wildcard='0'", + "logtype='<0>(a*)**b', contains_wildcard='10'", + "logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'", + "logtype='<0>(a*b)', contains_wildcard='1'", + "logtype='a**<0>(*b)', contains_wildcard='01'" }; test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); @@ -177,54 +177,54 @@ TEST_CASE("long_non_greedy_wildcard_sequence_query", "[Query]") { constexpr string_view cRawQueryString{"a????b"}; constexpr string_view cProcessedQueryString{"a????b"}; set const expected_serialized_interpretations{ - {R"(logtype='a????b', contains_wildcard='0')"}, + R"(logtype='a????b', contains_wildcard='0')", - {R"(logtype='<0>(a?)???b', contains_wildcard='10')"}, - {R"(logtype='<0>(a??)??b', contains_wildcard='10')"}, - {R"(logtype='<0>(a???)?b', contains_wildcard='10')"}, - {R"(logtype='<0>(a????b)', contains_wildcard='1')"}, + R"(logtype='<0>(a?)???b', contains_wildcard='10')", + R"(logtype='<0>(a??)??b', contains_wildcard='10')", + R"(logtype='<0>(a???)?b', contains_wildcard='10')", + R"(logtype='<0>(a????b)', contains_wildcard='1')", - {R"(logtype='a?<0>(?)??b', contains_wildcard='010')"}, - {R"(logtype='a?<0>(??)?b', contains_wildcard='010')"}, - {R"(logtype='a?<0>(???b)', contains_wildcard='01')"}, - {R"(logtype='a?<0>(?)?<0>(?b)', contains_wildcard='0101')"}, + R"(logtype='a?<0>(?)??b', contains_wildcard='010')", + R"(logtype='a?<0>(??)?b', contains_wildcard='010')", + R"(logtype='a?<0>(???b)', contains_wildcard='01')", + R"(logtype='a?<0>(?)?<0>(?b)', contains_wildcard='0101')", - {R"(logtype='a??<0>(?)?b', contains_wildcard='010')"}, - {R"(logtype='a??<0>(??b)', contains_wildcard='01')"}, + R"(logtype='a??<0>(?)?b', contains_wildcard='010')", + R"(logtype='a??<0>(??b)', contains_wildcard='01')", - {R"(logtype='a???<0>(?b)', contains_wildcard='01')"}, + R"(logtype='a???<0>(?b)', contains_wildcard='01')", - {R"(logtype='<0>(a?)?<0>(?)?b', contains_wildcard='1010')"}, - {R"(logtype='<0>(a?)?<0>(??b)', contains_wildcard='101')"}, - {R"(logtype='<0>(a?)??<0>(?b)', contains_wildcard='101')"}, + R"(logtype='<0>(a?)?<0>(?)?b', contains_wildcard='1010')", + R"(logtype='<0>(a?)?<0>(??b)', contains_wildcard='101')", + R"(logtype='<0>(a?)??<0>(?b)', contains_wildcard='101')", - {R"(logtype='<0>(a??)?<0>(?b)', contains_wildcard='101')"}, + R"(logtype='<0>(a??)?<0>(?b)', contains_wildcard='101')", // Double dipping on delimiters - {R"(logtype='<0>(a?)<0>(?)??b', contains_wildcard='110')"}, - {R"(logtype='<0>(a?)<0>(??)?b', contains_wildcard='110')"}, - {R"(logtype='<0>(a?)<0>(???b)', contains_wildcard='11')"}, - {R"(logtype='<0>(a?)<0>(?)?<0>(?b)', contains_wildcard='1101')"}, - {R"(logtype='<0>(a?)?<0>(?)<0>(?b)', contains_wildcard='1011')"}, + R"(logtype='<0>(a?)<0>(?)??b', contains_wildcard='110')", + R"(logtype='<0>(a?)<0>(??)?b', contains_wildcard='110')", + R"(logtype='<0>(a?)<0>(???b)', contains_wildcard='11')", + R"(logtype='<0>(a?)<0>(?)?<0>(?b)', contains_wildcard='1101')", + R"(logtype='<0>(a?)?<0>(?)<0>(?b)', contains_wildcard='1011')", - {R"(logtype='<0>(a??)<0>(?)?b', contains_wildcard='110')"}, - {R"(logtype='<0>(a??)<0>(??b)', contains_wildcard='11')"}, + R"(logtype='<0>(a??)<0>(?)?b', contains_wildcard='110')", + R"(logtype='<0>(a??)<0>(??b)', contains_wildcard='11')", - {R"(logtype='<0>(a???)<0>(?b)', contains_wildcard='11')"}, + R"(logtype='<0>(a???)<0>(?b)', contains_wildcard='11')", - {R"(logtype='a?<0>(?)<0>(?)?b', contains_wildcard='0110')"}, - {R"(logtype='a?<0>(?)<0>(??b)', contains_wildcard='011')"}, + R"(logtype='a?<0>(?)<0>(?)?b', contains_wildcard='0110')", + R"(logtype='a?<0>(?)<0>(??b)', contains_wildcard='011')", - {R"(logtype='a?<0>(??)<0>(?b)', contains_wildcard='011')"}, - {R"(logtype='a??<0>(?)<0>(?b)', contains_wildcard='011')"}, + R"(logtype='a?<0>(??)<0>(?b)', contains_wildcard='011')", + R"(logtype='a??<0>(?)<0>(?b)', contains_wildcard='011')", - {R"(logtype='<0>(a?)<0>(?)<0>(?)?b', contains_wildcard='1110')"}, - {R"(logtype='<0>(a?)<0>(?)<0>(??b)', contains_wildcard='111')"}, - {R"(logtype='<0>(a?)<0>(??)<0>(?b)', contains_wildcard='111')"}, - {R"(logtype='<0>(a??)<0>(?)<0>(?b)', contains_wildcard='111')"}, - {R"(logtype='a?<0>(?)<0>(?)<0>(?b)', contains_wildcard='0111')"}, + R"(logtype='<0>(a?)<0>(?)<0>(?)?b', contains_wildcard='1110')", + R"(logtype='<0>(a?)<0>(?)<0>(??b)', contains_wildcard='111')", + R"(logtype='<0>(a?)<0>(??)<0>(?b)', contains_wildcard='111')", + R"(logtype='<0>(a??)<0>(?)<0>(?b)', contains_wildcard='111')", + R"(logtype='a?<0>(?)<0>(?)<0>(?b)', contains_wildcard='0111')", - {R"(logtype='<0>(a?)<0>(?)<0>(?)<0>(?b)', contains_wildcard='1111')"} + R"(logtype='<0>(a?)<0>(?)<0>(?)<0>(?b)', contains_wildcard='1111')" }; test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); From a772ea20a92ffabeb978e19abea562e28ffc108a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 06:04:22 -0400 Subject: [PATCH 135/168] Use front() in place of [0] and add () around if check. --- src/log_surgeon/wildcard_query_parser/ExpressionView.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index b90386c1..75a92801 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -55,7 +55,7 @@ auto ExpressionView::is_surrounded_by_delims_or_wildcards( } else { auto const& preceding_char{m_expression->get_chars()[begin_idx - 1]}; has_left_boundary = preceding_char.is_delim_or_wildcard(delim_table) - || false == m_chars.empty() && m_chars[0].is_greedy_wildcard(); + || (false == m_chars.empty() && m_chars.front().is_greedy_wildcard()); } bool has_right_boundary{false}; @@ -70,7 +70,7 @@ auto ExpressionView::is_surrounded_by_delims_or_wildcards( has_right_boundary = succeeding_char.is_delim_or_wildcard(delim_table); } has_right_boundary = has_right_boundary - || false == m_chars.empty() && m_chars.back().is_greedy_wildcard(); + || (false == m_chars.empty() && m_chars.back().is_greedy_wildcard()); } return has_left_boundary && has_right_boundary; From acdab722003f37ed59ab010e6257a576d10b6181 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 06:08:17 -0400 Subject: [PATCH 136/168] Fix docstring. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 69324800..55c3857c 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -29,11 +29,11 @@ class Query { * {(a*b)}, * {(a*) (b)}, * {(a) (*b)}. - * However, a string like "a1 abc 1b" is also matched by "a*b", and requires the + * However, a string like "a1 abc 1b" is also matched by "a*b", and requires the * interpretation {(a*) (*) (*b)}. Extension ensures such cases * are captured. - * - Note: isolated greedy wildcard (`*`) are never extended as the `Query` collapses - * repeated greedy wildcards. + * - Note: isolated greedy wildcards (`*`) are never extended as the `Query` collapses + * repeated greedy wildcards during preprocessing. * - Note: non-greedy wildcards (`?`) are not extended as "a?b" is not equivalent to * "a??b". * From 3563e040efb5f25eda07c13d2777d197120b314a Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 06:08:52 -0400 Subject: [PATCH 137/168] Fix docstring. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 55c3857c..38e07159 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -38,7 +38,7 @@ class Query { * "a??b". * * - Substrings that begin or end with a greedy wildcard are skipped as they are redundant. - * - Example: in "a*b", substring (0,1] extends to "a*", therefore substring (0,2] "a*" is + * - Example: in "a*b", substring (0,1] extends to "a*", therefore substring [0,2) "a*" is * redundant. In other words, a decomposition like "a*" + "b" is a subset of the more * general "a*" + "*" + "*b". However, an isolated "*" must not be skipped as it is not * captured by any other substring extension. From c1a646621090c7ffbc334be118978dcdd6b1e5af Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 06:09:33 -0400 Subject: [PATCH 138/168] Fix typo. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 38e07159..f575a346 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -57,7 +57,7 @@ class Query { * * 3. Use dynamic programming to compute I(n) efficiently: * - Instead of generating all possible combinations naively, we store only unique - * interpretations by recurisvely building up the combinations as shown below. + * interpretations by recursively building up the combinations as shown below. * - Compute I(n) iteratively in increasing order of substring length: * - Compute T(0,1), then I(1) * - Compute T(0,2), T(1,2), then I(2) From 30e2ee38594b9214cfb3f49ad7fb5ab1217cf84e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 06:30:10 -0400 Subject: [PATCH 139/168] Switch type back to char and cast when needed. --- .../wildcard_query_parser/ExpressionCharacter.hpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp index 15557bc3..e5e907bb 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp @@ -16,11 +16,9 @@ class ExpressionCharacter { Escape }; - ExpressionCharacter(unsigned char const value, Type const type) - : m_value{value}, - m_type{type} {} + ExpressionCharacter(char const value, Type const type) : m_value{value}, m_type{type} {} - [[nodiscard]] auto value() const -> unsigned char { return m_value; } + [[nodiscard]] auto value() const -> char { return m_value; } [[nodiscard]] auto is_greedy_wildcard() const -> bool { return Type::GreedyWildcard == m_type; } @@ -33,7 +31,7 @@ class ExpressionCharacter { } [[nodiscard]] auto is_delim(std::array const& delim_table) const -> bool { - return delim_table.at(m_value); + return delim_table.at(static_cast(m_value)); } [[nodiscard]] auto is_delim_or_wildcard(std::array const& delim_table) const @@ -44,7 +42,7 @@ class ExpressionCharacter { [[nodiscard]] auto is_escape() const -> bool { return Type::Escape == m_type; } private: - unsigned char m_value; + char m_value; Type m_type; }; } // namespace log_surgeon::wildcard_query_parser From cf86fdc481bdc4c57204b58eb9bea5d04b2eb460 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 20:34:47 -0400 Subject: [PATCH 140/168] Fix docstring. --- tests/test-buffer-parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index 59c93250..b7191ebc 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -931,8 +931,8 @@ TEST_CASE("multi_capture_one", "[BufferParser]") { * ### Schema Definition * @code * delimiters: \n\r\[:, - * header:(?[A-Za-z]{3} \d{2} \d{2}:\d{2}:\d{2}) ip-(?\d{3}\-\d{2}\-\d{2}\-\d{2}) \ - * ku[(?\d{4})]: (?I|D|E|W)(?\d{4}) \ + * header:(?[A-Za-z]{3} \d{2} \d{2}:\d{2}:\d{2}) ip\-(?\d{3}\-\d{2}\-\d{2}\-\d{2}) \ + * ku\[(?\d{4})\]: (?I|D|E|W)(?\d{4}) \ * (?\d{2}:\d{2}:\d{2}\.\d{4}) (?\d{4}) * @endcode * From fb9c2d043017e43bc49c868cbe220d2c2040d484 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 20:41:22 -0400 Subject: [PATCH 141/168] Update docstrings to include log type. --- tests/test-buffer-parser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index b7191ebc..0428756a 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -856,7 +856,7 @@ TEST_CASE("multi_line_with_delimited_vars", "[BufferParser]") { /** * @ingroup test_buffer_parser_capture - * @brief Tests a multi-capture rule. + * @brief Tests a multi-capture rule parsing an Android log. * * This test verifies that a multi-capture rule correctly identifies the location of each capture * group. It tests that `BufferParser` correctly flattens the logtype, as well as stores the full @@ -922,7 +922,7 @@ TEST_CASE("multi_capture_one", "[BufferParser]") { /** * @ingroup test_buffer_parser_capture - * @brief Tests a multi-capture rule. + * @brief Tests a multi-capture rule parsing a Kubernetes log. * * This test also verifies that a multi-capture rule correctly identifies the location of each * capture group. It tests that `BufferParser` correctly flattens the logtype, as well as stores the From 326b242c00e885457ebbe50c725a9298339db1cd Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 26 Aug 2025 20:43:45 -0400 Subject: [PATCH 142/168] Changed expected_event1 to expected_event. --- tests/test-buffer-parser.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test-buffer-parser.cpp b/tests/test-buffer-parser.cpp index 0428756a..ceb1ab8c 100644 --- a/tests/test-buffer-parser.cpp +++ b/tests/test-buffer-parser.cpp @@ -896,7 +896,7 @@ TEST_CASE("multi_capture_one", "[BufferParser]") { constexpr string_view cInput{"1999-12-12T01:02:03.456 1234 5678 I MyService A=TEXT B=1.1"}; string const header_rule{fmt::format("header:{} {} {} {}", cTime, cPid, cTid, cLogLevel)}; - ExpectedEvent const expected_event1{ + ExpectedEvent const expected_event{ .m_logtype{" MyService A=TEXT B=1.1"}, .m_timestamp_raw{""}, .m_tokens{ @@ -917,7 +917,7 @@ TEST_CASE("multi_capture_one", "[BufferParser]") { schema.add_variable(header_rule, -1); BufferParser buffer_parser{std::move(schema.release_schema_ast_ptr())}; - parse_and_validate(buffer_parser, cInput, {expected_event1}); + parse_and_validate(buffer_parser, cInput, {expected_event}); } /** @@ -975,7 +975,7 @@ TEST_CASE("multi_capture_two", "[BufferParser]") { cLTime, cTid )}; - ExpectedEvent const expected_event1{ + ExpectedEvent const expected_event{ .m_logtype{" ip- ku[]: Y failed"}, .m_timestamp_raw{""}, .m_tokens{ @@ -998,5 +998,5 @@ TEST_CASE("multi_capture_two", "[BufferParser]") { schema.add_variable(header_rule, -1); BufferParser buffer_parser{std::move(schema.release_schema_ast_ptr())}; - parse_and_validate(buffer_parser, cInput, {expected_event1}); + parse_and_validate(buffer_parser, cInput, {expected_event}); } From 11bcee15515a98ce811d51cf674f4f6c953119f2 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Aug 2025 06:35:33 -0400 Subject: [PATCH 143/168] Rename m_query_string to m_processed_query_string. --- .../wildcard_query_parser/Query.cpp | 20 +++++++++---------- .../wildcard_query_parser/Query.hpp | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp index bea4f38a..e04f305e 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.cpp +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -34,7 +34,7 @@ using ByteNfa = log_surgeon::finite_automata::Nfa; namespace log_surgeon::wildcard_query_parser { Query::Query(string const& query_string) { - m_query_string.reserve(query_string.size()); + m_processed_query_string.reserve(query_string.size()); Expression const expression(query_string); bool prev_is_escape{false}; @@ -43,44 +43,44 @@ Query::Query(string const& query_string) { for (auto c : expression.get_chars()) { if (false == unhandled_wildcard_sequence.empty() && false == c.is_wildcard()) { if (unhandled_wildcard_sequence_contains_greedy_wildcard) { - m_query_string.push_back('*'); + m_processed_query_string.push_back('*'); } else { - m_query_string += unhandled_wildcard_sequence; + m_processed_query_string += unhandled_wildcard_sequence; } unhandled_wildcard_sequence.clear(); unhandled_wildcard_sequence_contains_greedy_wildcard = false; } if (prev_is_escape) { - m_query_string.push_back(c.value()); + m_processed_query_string.push_back(c.value()); prev_is_escape = false; } else if (c.is_escape()) { prev_is_escape = true; - m_query_string.push_back(c.value()); + m_processed_query_string.push_back(c.value()); } else if (c.is_greedy_wildcard()) { unhandled_wildcard_sequence.push_back(c.value()); unhandled_wildcard_sequence_contains_greedy_wildcard = true; } else if (c.is_non_greedy_wildcard()) { unhandled_wildcard_sequence.push_back(c.value()); } else { - m_query_string.push_back(c.value()); + m_processed_query_string.push_back(c.value()); } } if (false == unhandled_wildcard_sequence.empty()) { if (unhandled_wildcard_sequence_contains_greedy_wildcard) { - m_query_string.push_back('*'); + m_processed_query_string.push_back('*'); } else { - m_query_string += unhandled_wildcard_sequence; + m_processed_query_string += unhandled_wildcard_sequence; } } } auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const -> std::set { - Expression const expression{m_query_string}; + Expression const expression{m_processed_query_string}; vector> query_interpretations(expression.length()); - if (m_query_string.empty()) { + if (m_processed_query_string.empty()) { return {}; } diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index f575a346..515d5a42 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -73,7 +73,7 @@ class Query { -> std::set; [[nodiscard]] auto get_processed_query_string() const -> std::string const& { - return m_query_string; + return m_processed_query_string; } private: @@ -125,7 +125,7 @@ class Query { get_matching_variable_types(std::string const& regex_string, lexers::ByteLexer const& lexer) -> std::set; - std::string m_query_string; + std::string m_processed_query_string; }; } // namespace log_surgeon::wildcard_query_parser From fbfa02605279d6d3f684045e773790b4f1888cdb Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Aug 2025 06:38:52 -0400 Subject: [PATCH 144/168] Fix escaped star test case. --- tests/test-query.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index d0fd3977..97b36bbc 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -235,9 +235,9 @@ TEST_CASE("long_non_greedy_wildcard_sequence_query", "[Query]") { * @brief Creates and tests a query with an escaped '*' character. */ TEST_CASE("escaped_star_query", "[Query]") { - constexpr string_view cRawQueryString{"*"}; - constexpr string_view cProcessedQueryString{"*"}; - set const expected_serialized_interpretations{"logtype='*', contains_wildcard='0'"}; + constexpr string_view cRawQueryString{R"(a\*b)"}; + constexpr string_view cProcessedQueryString{R"(a\*b)"}; + set const expected_serialized_interpretations{R"(logtype='a\*b', contains_wildcard='0')"}; test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); } From 46344b422f5c1f0aded1f2a399dd1b201874eab4 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 27 Aug 2025 06:44:20 -0400 Subject: [PATCH 145/168] Format. --- tests/test-query.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 97b36bbc..16075a22 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -237,7 +237,9 @@ TEST_CASE("long_non_greedy_wildcard_sequence_query", "[Query]") { TEST_CASE("escaped_star_query", "[Query]") { constexpr string_view cRawQueryString{R"(a\*b)"}; constexpr string_view cProcessedQueryString{R"(a\*b)"}; - set const expected_serialized_interpretations{R"(logtype='a\*b', contains_wildcard='0')"}; + set const expected_serialized_interpretations{ + R"(logtype='a\*b', contains_wildcard='0')" + }; test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); } From 80bb255e79247cfbc78bca4d4f4540e691760ab2 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Fri, 29 Aug 2025 05:12:00 -0400 Subject: [PATCH 146/168] Improve naming and docstring for is_surrounded_by_delimiters. --- .../wildcard_query_parser/ExpressionView.cpp | 2 +- .../wildcard_query_parser/ExpressionView.hpp | 16 +++++++++------- src/log_surgeon/wildcard_query_parser/Query.cpp | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index 75a92801..cc903a6d 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -44,7 +44,7 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const return {is_extended, wildcard_expression_view}; } -auto ExpressionView::is_surrounded_by_delims_or_wildcards( +auto ExpressionView::is_surrounded_by_delims( std::array const& delim_table ) const -> bool { auto const [begin_idx, end_idx]{get_indices()}; diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp index 69758a62..009f5ff2 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp @@ -44,27 +44,29 @@ class ExpressionView { } /** - * Checks whether the view is surrounded by delimiters or wildcards. + * Checks whether the view is surrounded by delimiters. The start and end of an expression are + * always considered a delimiter. A greedy wildcard may represent a string that includes a + * flanking delimiter. * - * An expression is considered surrounded if both its left and right boundary satisfy certain + * A view is considered bounded if both its left and right boundary satisfy certain * requirements. * * Left boundary: * - The view is at the start of the expression, or * - The first character is a greedy wildcard, or - * - The character immediately left of the view is a delimiter or wildcard. + * - Immediately left of the view is a delimiter or wildcard. * * Right boundary: * - The view is at the end of the expression, or * - The last character is a greedy wildcard, or - * - The character immediately right of the view is a delimiter or wildcard, or - * - The character immediately right of the view is an escape character and the character to its + * - Immediately right of the view is a delimiter or wildcard, or + * - Immediately right of the view is an escape character and the character to its * immediate right is a delimiter or wildcard. * * @param delim_table Table indicating for each character whether or not it is a delimiter. - * @return true when both preceding and succeeding boundaries qualify; false otherwise. + * @return true when both left and right boundaries qualify; false otherwise. */ - [[nodiscard]] auto is_surrounded_by_delims_or_wildcards( + [[nodiscard]] auto is_surrounded_by_delims( std::array const& delim_table ) const -> bool; diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp index e04f305e..8252d173 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.cpp +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -133,7 +133,7 @@ auto Query::get_all_single_token_interpretations( interpretations.emplace_back("*"); return interpretations; } - if (false == expression_view.is_surrounded_by_delims_or_wildcards(lexer.get_delim_table())) { + if (false == expression_view.is_surrounded_by_delims(lexer.get_delim_table())) { interpretations.emplace_back(string{expression_view.get_search_string()}); return interpretations; } From 79d0114a30e5fb57b06d965167975d43793de64f Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 2 Sep 2025 10:03:43 -0400 Subject: [PATCH 147/168] Add multi-variable tests. --- tests/test-query.cpp | 216 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 194 insertions(+), 22 deletions(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 16075a22..d402f053 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -24,6 +25,7 @@ using log_surgeon::wildcard_query_parser::Query; using std::set; using std::string; using std::string_view; +using std::vector; namespace { /** @@ -32,28 +34,31 @@ namespace { * * @param raw_query_string The search query. * @param expected_processed_query_string The processed search query. + * @param schema_rules A vector of strings, each string representing a schema rule. * @param expected_serialized_interpretations The expected set of serialized interpretations. */ auto test_query( string_view raw_query_string, string_view expected_processed_query_string, + vector const& schema_rules, set const& expected_serialized_interpretations ) -> void; /** - * Initializes a `ByteLexer` with delimiters "\n\r\[:" and variable - * "hasNumber:[A-Za-z]*\d+[A-Za-z]*". + * Initializes a `ByteLexer` with space as a delimiter and the given `schema_rules`. * + * @param schema_rules A vector of strings, each string representing a schema rule. * @return The initialized `ByteLexer`. */ -auto make_test_lexer() -> ByteLexer; +auto make_test_lexer(vector const& schema_rules) -> ByteLexer; auto test_query( string_view const raw_query_string, string_view const expected_processed_query_string, + vector const& schema_rules, set const& expected_serialized_interpretations ) -> void { - auto const lexer{make_test_lexer()}; + auto const lexer{make_test_lexer(schema_rules)}; Query const query{string(raw_query_string)}; REQUIRE(expected_processed_query_string == query.get_processed_query_string()); @@ -67,21 +72,25 @@ auto test_query( REQUIRE(expected_serialized_interpretations == serialized_interpretations); } -auto make_test_lexer() -> ByteLexer { - Schema schema; - schema.add_delimiters(R"(delimiters: \n\r\[:,)"); - schema.add_variable(R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)", -1); - +auto make_test_lexer(vector const& schema_rules) -> ByteLexer { ByteLexer lexer; - lexer.m_symbol_id["hasNumber"] = 0; - lexer.m_id_symbol[0] = "hasNumber"; + lexer.set_delimiters({' '}); + + Schema schema; + size_t symbol_id{0}; + for (auto const& schema_rule : schema_rules) { + schema.add_variable(schema_rule, -1); + ++symbol_id; + } auto const schema_ast = schema.release_schema_ast_ptr(); REQUIRE(nullptr != schema_ast); - REQUIRE(1 == schema_ast->m_schema_vars.size()); - REQUIRE(nullptr != schema_ast->m_schema_vars[0]); - auto& capture_rule_ast = dynamic_cast(*schema_ast->m_schema_vars[0]); - lexer.add_rule(lexer.m_symbol_id["hasNumber"], std::move(capture_rule_ast.m_regex_ptr)); + REQUIRE(schema_rules.size() == schema_ast->m_schema_vars.size()); + for (size_t i{0}; i < schema_ast->m_schema_vars.size(); ++i) { + REQUIRE(nullptr != schema_ast->m_schema_vars[i]); + auto& capture_rule_ast{dynamic_cast(*schema_ast->m_schema_vars[i])}; + lexer.add_rule(i, std::move(capture_rule_ast.m_regex_ptr)); + } lexer.generate(); return lexer; @@ -95,9 +104,15 @@ auto make_test_lexer() -> ByteLexer { TEST_CASE("empty_query", "[Query]") { constexpr string_view cRawQueryString; constexpr string_view cProcessedQueryString; + vector const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}}; set const expected_serialized_interpretations; - test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); + test_query( + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations + ); } /** @@ -107,9 +122,15 @@ TEST_CASE("empty_query", "[Query]") { TEST_CASE("greedy_wildcard_query", "[Query]") { constexpr string_view cRawQueryString{"*"}; constexpr string_view cProcessedQueryString{"*"}; + vector const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}}; set const expected_serialized_interpretations{"logtype='*', contains_wildcard='0'"}; - test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); + test_query( + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations + ); } /** @@ -119,6 +140,7 @@ TEST_CASE("greedy_wildcard_query", "[Query]") { TEST_CASE("repeated_greedy_wildcard_query", "[Query]") { constexpr string_view cRawQueryString{"a**b"}; constexpr string_view cProcessedQueryString{"a*b"}; + vector const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}}; set const expected_serialized_interpretations{ "logtype='a*b', contains_wildcard='0'", "logtype='a***b', contains_wildcard='0'", @@ -128,7 +150,12 @@ TEST_CASE("repeated_greedy_wildcard_query", "[Query]") { "logtype='a**<0>(*b)', contains_wildcard='01'" }; - test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); + test_query( + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations + ); } /** @@ -138,6 +165,7 @@ TEST_CASE("repeated_greedy_wildcard_query", "[Query]") { TEST_CASE("short_wildcard_sequence_query", "[Query]") { constexpr string_view cRawQueryString{"a?*b"}; constexpr string_view cProcessedQueryString{"a*b"}; + vector const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}}; set const expected_serialized_interpretations{ "logtype='a*b', contains_wildcard='0'", "logtype='a***b', contains_wildcard='0'", @@ -147,7 +175,12 @@ TEST_CASE("short_wildcard_sequence_query", "[Query]") { "logtype='a**<0>(*b)', contains_wildcard='01'" }; - test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); + test_query( + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations + ); } /** @@ -157,6 +190,7 @@ TEST_CASE("short_wildcard_sequence_query", "[Query]") { TEST_CASE("long_mixed_wildcard_sequence_query", "[Query]") { constexpr string_view cRawQueryString{"a?*?*?*?b"}; constexpr string_view cProcessedQueryString{"a*b"}; + vector const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}}; set const expected_serialized_interpretations{ "logtype='a*b', contains_wildcard='0'", "logtype='a***b', contains_wildcard='0'", @@ -166,7 +200,12 @@ TEST_CASE("long_mixed_wildcard_sequence_query", "[Query]") { "logtype='a**<0>(*b)', contains_wildcard='01'" }; - test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); + test_query( + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations + ); } /** @@ -176,6 +215,7 @@ TEST_CASE("long_mixed_wildcard_sequence_query", "[Query]") { TEST_CASE("long_non_greedy_wildcard_sequence_query", "[Query]") { constexpr string_view cRawQueryString{"a????b"}; constexpr string_view cProcessedQueryString{"a????b"}; + vector const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}}; set const expected_serialized_interpretations{ R"(logtype='a????b', contains_wildcard='0')", @@ -227,7 +267,12 @@ TEST_CASE("long_non_greedy_wildcard_sequence_query", "[Query]") { R"(logtype='<0>(a?)<0>(?)<0>(?)<0>(?b)', contains_wildcard='1111')" }; - test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); + test_query( + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations + ); } /** @@ -237,9 +282,136 @@ TEST_CASE("long_non_greedy_wildcard_sequence_query", "[Query]") { TEST_CASE("escaped_star_query", "[Query]") { constexpr string_view cRawQueryString{R"(a\*b)"}; constexpr string_view cProcessedQueryString{R"(a\*b)"}; + vector const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}}; set const expected_serialized_interpretations{ R"(logtype='a\*b', contains_wildcard='0')" }; - test_query(cRawQueryString, cProcessedQueryString, expected_serialized_interpretations); + test_query( + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations + ); +} + +/** + * @ingroup unit_tests_query + * @brief Creates and tests a query with an escaped '*' character. + * + * NOTE: This has a static-text case as strings "1", "2', and "3" in isolation aren't surrounded by + * delimiters. These tokens then build up the interpretation "123". Although additional + * interpretations don't impact correctness, they may impact performance. We can optimize these out, + * but it'll make the code messy. Instead, we should eventually remove the explicit tracking of + * static-tokens, in favor of only tracking variable tokens. + */ +TEST_CASE("int_query", "[Query]") { + constexpr string_view cRawQueryString{"123"}; + constexpr string_view cProcessedQueryString{"123"}; + vector const schema_rules{{R"(int:\d+)"}}; + set const expected_serialized_interpretations{ + R"(logtype='123', contains_wildcard='0')", + R"(logtype='<0>(123)', contains_wildcard='0')" + }; + + test_query( + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations + ); +} + +/** + * @ingroup unit_tests_query + * @brief Creates and tests a query with multiple variable types. + * + * This test ensures that each non-wildcard token is assigned to the highest priority variable. + * + * NOTE: Similar to the above `int_query` test there are unneeded intepretations due to aggresively + * generating static-text tokens. + */ +TEST_CASE("non_wildcard_multi_variable_query", "[Query]") { + constexpr string_view cRawQueryString{"abc123 123"}; + constexpr string_view cProcessedQueryString{"abc123 123"}; + + SECTION("int_priority") { + vector const schema_rules{ + {R"(int:(\d+))"}, + {R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"} + }; + set const expected_serialized_interpretations{ + R"(logtype='abc123 123', contains_wildcard='0')", + R"(logtype='abc123 <0>(123)', contains_wildcard='00')", + R"(logtype='<1>(abc123) 123', contains_wildcard='00')", + R"(logtype='<1>(abc123) <0>(123)', contains_wildcard='000')" + }; + + test_query( + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations + ); + } + + SECTION("has_number_priority") { + vector const schema_rules{ + {R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}, + {R"(int:(\d+))"} + }; + set const expected_serialized_interpretations{ + R"(logtype='abc123 123', contains_wildcard='0')", + R"(logtype='abc123 <0>(123)', contains_wildcard='00')", + R"(logtype='<0>(abc123) 123', contains_wildcard='00')", + R"(logtype='<0>(abc123) <0>(123)', contains_wildcard='000')" + }; + + test_query( + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations + ); + } +} + +/** + * @ingroup unit_tests_query + * @brief Creates and tests a query with multiple variable types. + * + * This test ensures that each greedy wildcard token is identified as all correct token types. + * + * NOTE: Similar to the above `int_query` test there are unneeded intepretations due to aggresively + * generating static-text tokens. This same issue causes interpretations with redundant wildcards. + */ +TEST_CASE("wildcard_multi_variable_query", "[Query]") { + constexpr string_view cRawQueryString{"abc123* *123"}; + constexpr string_view cProcessedQueryString{"abc123* *123"}; + + vector const schema_rules{ + {R"(int:(\d+))"}, + {R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"} + }; + set const expected_serialized_interpretations{ + R"(logtype='abc123* *123', contains_wildcard='0')", + R"(logtype='abc123*** *123', contains_wildcard='0')", + R"(logtype='abc123* ***123', contains_wildcard='0')", + R"(logtype='abc123*** ***123', contains_wildcard='0')", + R"(logtype='abc123* **<0>(*123)', contains_wildcard='01')", + R"(logtype='abc123*** **<0>(*123)', contains_wildcard='01')", + R"(logtype='abc123* **<1>(*123)', contains_wildcard='01')", + R"(logtype='abc123*** **<1>(*123)', contains_wildcard='01')", + R"(logtype='<1>(abc123*)** *123', contains_wildcard='10')", + R"(logtype='<1>(abc123*)** ***123', contains_wildcard='10')", + R"(logtype='<1>(abc123*)** **<0>(*123)', contains_wildcard='101')", + R"(logtype='<1>(abc123*)** **<1>(*123)', contains_wildcard='101')" + }; + + test_query( + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations + ); } From 2091a02b09ebe58805c5b07db29475e812bd85d8 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 2 Sep 2025 10:06:54 -0400 Subject: [PATCH 148/168] Add missing header; Remove unused var. --- tests/test-query.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index d402f053..78cac92b 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -77,10 +78,8 @@ auto make_test_lexer(vector const& schema_rules) -> ByteLexer { lexer.set_delimiters({' '}); Schema schema; - size_t symbol_id{0}; for (auto const& schema_rule : schema_rules) { schema.add_variable(schema_rule, -1); - ++symbol_id; } auto const schema_ast = schema.release_schema_ast_ptr(); From 9eebe55a947fe12ab98e5ebf3cca28891ea57499 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 2 Sep 2025 10:07:37 -0400 Subject: [PATCH 149/168] Format. --- .../wildcard_query_parser/ExpressionView.cpp | 5 ++-- tests/test-query.cpp | 23 ++++++------------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index cc903a6d..e0a7e294 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -44,9 +44,8 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const return {is_extended, wildcard_expression_view}; } -auto ExpressionView::is_surrounded_by_delims( - std::array const& delim_table -) const -> bool { +auto ExpressionView::is_surrounded_by_delims(std::array const& delim_table) const + -> bool { auto const [begin_idx, end_idx]{get_indices()}; bool has_left_boundary{false}; diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 78cac92b..01fb614c 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -267,10 +267,10 @@ TEST_CASE("long_non_greedy_wildcard_sequence_query", "[Query]") { }; test_query( - cRawQueryString, - cProcessedQueryString, - schema_rules, - expected_serialized_interpretations + cRawQueryString, + cProcessedQueryString, + schema_rules, + expected_serialized_interpretations ); } @@ -335,10 +335,7 @@ TEST_CASE("non_wildcard_multi_variable_query", "[Query]") { constexpr string_view cProcessedQueryString{"abc123 123"}; SECTION("int_priority") { - vector const schema_rules{ - {R"(int:(\d+))"}, - {R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"} - }; + vector const schema_rules{{R"(int:(\d+))"}, {R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}}; set const expected_serialized_interpretations{ R"(logtype='abc123 123', contains_wildcard='0')", R"(logtype='abc123 <0>(123)', contains_wildcard='00')", @@ -355,10 +352,7 @@ TEST_CASE("non_wildcard_multi_variable_query", "[Query]") { } SECTION("has_number_priority") { - vector const schema_rules{ - {R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}, - {R"(int:(\d+))"} - }; + vector const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}, {R"(int:(\d+))"}}; set const expected_serialized_interpretations{ R"(logtype='abc123 123', contains_wildcard='0')", R"(logtype='abc123 <0>(123)', contains_wildcard='00')", @@ -388,10 +382,7 @@ TEST_CASE("wildcard_multi_variable_query", "[Query]") { constexpr string_view cRawQueryString{"abc123* *123"}; constexpr string_view cProcessedQueryString{"abc123* *123"}; - vector const schema_rules{ - {R"(int:(\d+))"}, - {R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"} - }; + vector const schema_rules{{R"(int:(\d+))"}, {R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}}; set const expected_serialized_interpretations{ R"(logtype='abc123* *123', contains_wildcard='0')", R"(logtype='abc123*** *123', contains_wildcard='0')", From d094f45ad3d6fbed12e204cc04fd70ce2d9e6adb Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 2 Sep 2025 10:18:44 -0400 Subject: [PATCH 150/168] Move TODOs into git issues. --- src/log_surgeon/wildcard_query_parser/Query.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp index 8252d173..d9e4ba36 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.cpp +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -168,12 +168,9 @@ auto Query::get_matching_variable_types(string const& regex_string, ByteLexer co auto& rule_ast = dynamic_cast(*schema_ast->m_schema_vars[0]); vector rules; rules.emplace_back(0, std::move(rule_ast.m_regex_ptr)); - // TODO: Optimize NFA creation. ByteNfa const nfa{rules}; - // TODO: Optimize DFA creation. ByteDfa const dfa{nfa}; - // TODO: Could optimize to use a forward/reverse lexer in a lot of cases. auto var_types = lexer.get_dfa()->get_intersect(&dfa); return var_types; } From 9ca5b454dfd2e1cbae2d9ba60df8e65c2330ad18 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 2 Sep 2025 10:22:15 -0400 Subject: [PATCH 151/168] Improve docstring indentation and remove comma for consistency. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 515d5a42..eba1e566 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -93,9 +93,9 @@ class Query { * - Is an isolated greedy wildcard, "*", or * - Is not surrounded by delimiters or wildcards (lexer won't consider it a variable), or * - Does not match any variable. - * - Then: - * - The only interpretation is a static token. - * - Else, if the substring contains a wildcard: + * - Then: + * - The only interpretation is a static token. + * - Else if the substring contains a wildcard: * - The interpretations include a static token, plus a variable token for each matching type. * - Else: * - The only interpretation is the variable token corresponding to the highest priority From 953e6bbd2b9e304bb7ceb643cc4830668194fb8b Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 2 Sep 2025 10:30:23 -0400 Subject: [PATCH 152/168] Move T(a,b) definition to relevent section; Indent equation for better grouping. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index eba1e566..ecd68de9 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -20,9 +20,6 @@ class Query { * interpretations of the query string belong to this set): * * 1. Interpret each substring [a,b) as a single token (1-length interpretation). - * - Denote T(a,b) to be the set of all valid single-token interpretations of substring - * [a,b). - * * - Substrings adjacent to greedy wildcards must be interpreted as if they include them. * - Example: query "a*b" is equivalent to "a***b". For a lexer with a `hasNum` variable * type ("\w*\d+\w*"), without extensions, the only interpretations would be: @@ -44,13 +41,14 @@ class Query { * captured by any other substring extension. * * 2. Let I(a) be the set of all multi-length interpretations of substring [0,a). - * - We can compute I(a) recursively using previously computed sets: + * - Let T(a,b) to be the set of all valid single-token interpretations of substring [a,b). + * - We can then compute I(a) recursively: * - * I(a) = T(0,a) - * U (I(1) x T(1,a)) - * U (I(2) x T(2,a)) - * ... - * U (I(a-1) x T(a-1,a)) + * I(a) = T(0,a) + * U (I(1) x T(1,a)) + * U (I(2) x T(2,a)) + * ... + * U (I(a-1) x T(a-1,a)) * * where x denotes the cross product: all combinations of prefix interpretations from I(i) * and suffix interpretations from T(i,a). From f6a0121420332391488030ef041909dfac210ef4 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 2 Sep 2025 10:32:08 -0400 Subject: [PATCH 153/168] Fix typo (0,1] to [0,1). --- src/log_surgeon/wildcard_query_parser/Query.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index ecd68de9..d69f6def 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -35,7 +35,7 @@ class Query { * "a??b". * * - Substrings that begin or end with a greedy wildcard are skipped as they are redundant. - * - Example: in "a*b", substring (0,1] extends to "a*", therefore substring [0,2) "a*" is + * - Example: in "a*b", substring [0,1) extends to "a*", therefore substring [0,2) "a*" is * redundant. In other words, a decomposition like "a*" + "b" is a subset of the more * general "a*" + "*" + "*b". However, an isolated "*" must not be skipped as it is not * captured by any other substring extension. From b7146769b24a21571a7b14e7aef30b50330dd477 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 2 Sep 2025 14:31:58 -0400 Subject: [PATCH 154/168] Update docstring. --- .../wildcard_query_parser/Query.hpp | 44 +++++++++++-------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index d69f6def..39e1eda1 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -20,25 +20,33 @@ class Query { * interpretations of the query string belong to this set): * * 1. Interpret each substring [a,b) as a single token (1-length interpretation). - * - Substrings adjacent to greedy wildcards must be interpreted as if they include them. - * - Example: query "a*b" is equivalent to "a***b". For a lexer with a `hasNum` variable - * type ("\w*\d+\w*"), without extensions, the only interpretations would be: - * {(a*b)}, - * {(a*) (b)}, - * {(a) (*b)}. - * However, a string like "a1 abc 1b" is also matched by "a*b", and requires the - * interpretation {(a*) (*) (*b)}. Extension ensures such cases - * are captured. - * - Note: isolated greedy wildcards (`*`) are never extended as the `Query` collapses - * repeated greedy wildcards during preprocessing. - * - Note: non-greedy wildcards (`?`) are not extended as "a?b" is not equivalent to - * "a??b". - * - * - Substrings that begin or end with a greedy wildcard are skipped as they are redundant. + * - Substrings adjacent to greedy wildcards must be interpreted as if they include them. To + * implement this, we extend all substrings to include adjacent wildcards. + * - Example: consider query "a*b" and variable type `hasNum` ("\w*\d+\w*"): + * - Without extension: + * - "a" -> static-text + * - "b" -> static-text + * - "a*" -> (a*) + * - "*b" -> (*b) + * - Multi-token interpretations (via step 2 below): + * - {a*b}, + * - {(a*)b}, + * - {a(*b)}. + * - None of these match a string like "a1 c 1b", which has interpretation + * {(a1) c (1b)}. By interpreting "a" as "a*" and "b" as "*b", the '*' + * is preserved allowing for interpretation {(a*)*(*b)}, which matches + * {(a1) c (1b)}. + * - Special cases: + * - Single-character greedy wildcards ("*") are not extended as they have no adjacent + * greedy wildcards (repeated wildcards are collapsed during preprocessing). + * - Substrings are not extended to non-greedy wildcards (`?`) as "a?b" =/= "a??b". + * - Substrings of length >= 2 that begin or end with a greedy wildcard are skipped as they + * are redundant. * - Example: in "a*b", substring [0,1) extends to "a*", therefore substring [0,2) "a*" is - * redundant. In other words, a decomposition like "a*" + "b" is a subset of the more - * general "a*" + "*" + "*b". However, an isolated "*" must not be skipped as it is not - * captured by any other substring extension. + * redundant. This avoids producing interpretation {(a*)b}, which is a subset of + * {(a*)*b}. + * - Note: The length >= 2 requirement avoids skipping 1-length greedy substrings ("*") + * as they are never redundant (i.e., no 0-length substring exists to extend). * * 2. Let I(a) be the set of all multi-length interpretations of substring [0,a). * - Let T(a,b) to be the set of all valid single-token interpretations of substring [a,b). From 8057ede4243624f35bccfbbd7ad40e865aa298ef Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 2 Sep 2025 14:33:07 -0400 Subject: [PATCH 155/168] Fix spacing. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 39e1eda1..39c92f68 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -45,8 +45,8 @@ class Query { * - Example: in "a*b", substring [0,1) extends to "a*", therefore substring [0,2) "a*" is * redundant. This avoids producing interpretation {(a*)b}, which is a subset of * {(a*)*b}. - * - Note: The length >= 2 requirement avoids skipping 1-length greedy substrings ("*") - * as they are never redundant (i.e., no 0-length substring exists to extend). + * - Note: The length >= 2 requirement avoids skipping 1-length greedy substrings ("*") as + * they are never redundant (i.e., no 0-length substring exists to extend). * * 2. Let I(a) be the set of all multi-length interpretations of substring [0,a). * - Let T(a,b) to be the set of all valid single-token interpretations of substring [a,b). From 55cad11c3b9ca5858a2e1edcaa1ce473c3a5ef3e Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 2 Sep 2025 14:40:24 -0400 Subject: [PATCH 156/168] Remove the concept of an escaped wildcard from the docstring. --- src/log_surgeon/wildcard_query_parser/ExpressionView.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp index 009f5ff2..93ddf017 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp @@ -61,7 +61,7 @@ class ExpressionView { * - The last character is a greedy wildcard, or * - Immediately right of the view is a delimiter or wildcard, or * - Immediately right of the view is an escape character and the character to its - * immediate right is a delimiter or wildcard. + * immediate right is a delimiter. * * @param delim_table Table indicating for each character whether or not it is a delimiter. * @return true when both left and right boundaries qualify; false otherwise. From 797b3768dbe11fff610e072ccaa6b82ff4a93fc0 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Tue, 2 Sep 2025 14:42:28 -0400 Subject: [PATCH 157/168] Move short circuit to top of method. --- src/log_surgeon/wildcard_query_parser/Query.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp index d9e4ba36..5a730eda 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.cpp +++ b/src/log_surgeon/wildcard_query_parser/Query.cpp @@ -77,13 +77,12 @@ Query::Query(string const& query_string) { auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const -> std::set { - Expression const expression{m_processed_query_string}; - vector> query_interpretations(expression.length()); - if (m_processed_query_string.empty()) { return {}; } + Expression const expression{m_processed_query_string}; + vector> query_interpretations(expression.length()); for (size_t end_idx = 1; end_idx <= expression.length(); ++end_idx) { for (size_t begin_idx = 0; begin_idx < end_idx; ++begin_idx) { ExpressionView const expression_view{expression, begin_idx, end_idx}; From b82aea5194f804aed634369218cef91384fc4aea Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 3 Sep 2025 10:21:16 -0400 Subject: [PATCH 158/168] Clarify in docstring interpretation length refers to tokens, query length refers to characters. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 39c92f68..10ac0d62 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -16,10 +16,10 @@ class Query { explicit Query(std::string const& query_string); /** - * Generates all multi-token interpretations of the n-length query string (single-token - * interpretations of the query string belong to this set): + * Generates all k-length interpretations of the n-length query string, where k is the number of + * tokens in the intepretation, n is the number of characters in the query, and 1 <= k < n. * - * 1. Interpret each substring [a,b) as a single token (1-length interpretation). + * 1. Interpret each substring [a,b) as a single token (k=1). * - Substrings adjacent to greedy wildcards must be interpreted as if they include them. To * implement this, we extend all substrings to include adjacent wildcards. * - Example: consider query "a*b" and variable type `hasNum` ("\w*\d+\w*"): @@ -48,7 +48,7 @@ class Query { * - Note: The length >= 2 requirement avoids skipping 1-length greedy substrings ("*") as * they are never redundant (i.e., no 0-length substring exists to extend). * - * 2. Let I(a) be the set of all multi-length interpretations of substring [0,a). + * 2. Let I(a) be the set of all k-length interpretations of substring [0,a), where 1 <= k < a. * - Let T(a,b) to be the set of all valid single-token interpretations of substring [a,b). * - We can then compute I(a) recursively: * From 73845f87e71fa30ca40d2e1be802a06d85bae899 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 3 Sep 2025 10:26:27 -0400 Subject: [PATCH 159/168] Discuss empty string case in docstring. --- src/log_surgeon/wildcard_query_parser/ExpressionView.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp index 93ddf017..a69575c5 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp @@ -53,12 +53,12 @@ class ExpressionView { * * Left boundary: * - The view is at the start of the expression, or - * - The first character is a greedy wildcard, or + * - The first character is a greedy wildcard (if non-empty), or * - Immediately left of the view is a delimiter or wildcard. * * Right boundary: * - The view is at the end of the expression, or - * - The last character is a greedy wildcard, or + * - The last character is a greedy wildcard (if non-empty), or * - Immediately right of the view is a delimiter or wildcard, or * - Immediately right of the view is an escape character and the character to its * immediate right is a delimiter. From 9261531f18ae97c7f444fae919d88047d88cab45 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 3 Sep 2025 10:38:10 -0400 Subject: [PATCH 160/168] Add docstring for caching. --- src/log_surgeon/Lexer.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/log_surgeon/Lexer.hpp b/src/log_surgeon/Lexer.hpp index a611c0d6..fe0e7a24 100644 --- a/src/log_surgeon/Lexer.hpp +++ b/src/log_surgeon/Lexer.hpp @@ -256,6 +256,7 @@ class Lexer { std::array m_is_first_char_of_a_variable{false}; std::vector> m_rules; uint32_t m_line{0}; + // `m_has_delimiters` is cached for performance bool m_has_delimiters{false}; std::unique_ptr> m_dfa; std::optional m_first_delimiter_pos{std::nullopt}; From a3bcc6ba435cd7c4bce98067b5b58b73659db1db Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 3 Sep 2025 11:03:31 -0400 Subject: [PATCH 161/168] Check if escaped character is a delim only, remove check for wildcard as thats impossible. --- src/log_surgeon/wildcard_query_parser/ExpressionView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index e0a7e294..e6a5a6b8 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -64,7 +64,7 @@ auto ExpressionView::is_surrounded_by_delims(std::array const auto const& succeeding_char{m_expression->get_chars()[end_idx]}; if (succeeding_char.is_escape()) { auto const& logical_succeeding_char{m_expression->get_chars()[end_idx + 1]}; - has_right_boundary = logical_succeeding_char.is_delim_or_wildcard(delim_table); + has_right_boundary = logical_succeeding_char.is_delim(delim_table); } else { has_right_boundary = succeeding_char.is_delim_or_wildcard(delim_table); } From de71fc729ab1b80581aca43f1c20478546b93380 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 3 Sep 2025 12:20:44 -0400 Subject: [PATCH 162/168] Add bounds check. --- src/log_surgeon/wildcard_query_parser/ExpressionView.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp index e6a5a6b8..f055c453 100644 --- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp +++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp @@ -63,8 +63,10 @@ auto ExpressionView::is_surrounded_by_delims(std::array const } else { auto const& succeeding_char{m_expression->get_chars()[end_idx]}; if (succeeding_char.is_escape()) { - auto const& logical_succeeding_char{m_expression->get_chars()[end_idx + 1]}; - has_right_boundary = logical_succeeding_char.is_delim(delim_table); + if (m_expression->length() > end_idx + 1) { + auto const& logical_succeeding_char{m_expression->get_chars()[end_idx + 1]}; + has_right_boundary = logical_succeeding_char.is_delim(delim_table); + } } else { has_right_boundary = succeeding_char.is_delim_or_wildcard(delim_table); } From 736557275f21ff8d1ad269a83145037fae0f2eeb Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 3 Sep 2025 12:24:20 -0400 Subject: [PATCH 163/168] Dynamic cast to ptr and check its not null to avoid throwing bad_cast on failure. --- tests/test-query.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 01fb614c..57e8bfb8 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -87,8 +87,9 @@ auto make_test_lexer(vector const& schema_rules) -> ByteLexer { REQUIRE(schema_rules.size() == schema_ast->m_schema_vars.size()); for (size_t i{0}; i < schema_ast->m_schema_vars.size(); ++i) { REQUIRE(nullptr != schema_ast->m_schema_vars[i]); - auto& capture_rule_ast{dynamic_cast(*schema_ast->m_schema_vars[i])}; - lexer.add_rule(i, std::move(capture_rule_ast.m_regex_ptr)); + auto* capture_rule_ast{dynamic_cast(schema_ast->m_schema_vars[i].get())}; + REQUIRE(nullptr != capture_rule_ast); + lexer.add_rule(i, std::move(capture_rule_ast->m_regex_ptr)); } lexer.generate(); From f7399720dc9ac99dc79e5a5ce17426a7f2feda3b Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 3 Sep 2025 12:26:05 -0400 Subject: [PATCH 164/168] Fix typos. --- tests/test-query.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test-query.cpp b/tests/test-query.cpp index 57e8bfb8..19687c6b 100644 --- a/tests/test-query.cpp +++ b/tests/test-query.cpp @@ -299,7 +299,7 @@ TEST_CASE("escaped_star_query", "[Query]") { * @ingroup unit_tests_query * @brief Creates and tests a query with an escaped '*' character. * - * NOTE: This has a static-text case as strings "1", "2', and "3" in isolation aren't surrounded by + * NOTE: This has a static-text case as strings "1", "2", and "3" in isolation aren't surrounded by * delimiters. These tokens then build up the interpretation "123". Although additional * interpretations don't impact correctness, they may impact performance. We can optimize these out, * but it'll make the code messy. Instead, we should eventually remove the explicit tracking of @@ -328,7 +328,7 @@ TEST_CASE("int_query", "[Query]") { * * This test ensures that each non-wildcard token is assigned to the highest priority variable. * - * NOTE: Similar to the above `int_query` test there are unneeded intepretations due to aggresively + * NOTE: Similar to the above `int_query` test there are unneeded interpretations due to aggresively * generating static-text tokens. */ TEST_CASE("non_wildcard_multi_variable_query", "[Query]") { @@ -376,7 +376,7 @@ TEST_CASE("non_wildcard_multi_variable_query", "[Query]") { * * This test ensures that each greedy wildcard token is identified as all correct token types. * - * NOTE: Similar to the above `int_query` test there are unneeded intepretations due to aggresively + * NOTE: Similar to the above `int_query` test there are unneeded interpretations due to aggresively * generating static-text tokens. This same issue causes interpretations with redundant wildcards. */ TEST_CASE("wildcard_multi_variable_query", "[Query]") { From f8db5dac71f341ebaa7f9ecc10790d2e7afaa727 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 3 Sep 2025 12:29:58 -0400 Subject: [PATCH 165/168] Update docstring for clarity. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 10ac0d62..81db8ee6 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -16,8 +16,7 @@ class Query { explicit Query(std::string const& query_string); /** - * Generates all k-length interpretations of the n-length query string, where k is the number of - * tokens in the intepretation, n is the number of characters in the query, and 1 <= k < n. + * Generates all k-token interpretations of the n-character query string, where 1 <= k < n. * * 1. Interpret each substring [a,b) as a single token (k=1). * - Substrings adjacent to greedy wildcards must be interpreted as if they include them. To From 1fd40670aa3922f0b6d1e3ccaad04b48456d52a5 Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 3 Sep 2025 12:32:40 -0400 Subject: [PATCH 166/168] Specify greedy wildcards to be accurate. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 81db8ee6..0fab4ddf 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -20,7 +20,7 @@ class Query { * * 1. Interpret each substring [a,b) as a single token (k=1). * - Substrings adjacent to greedy wildcards must be interpreted as if they include them. To - * implement this, we extend all substrings to include adjacent wildcards. + * implement this, we extend all substrings to include adjacent greedy wildcards. * - Example: consider query "a*b" and variable type `hasNum` ("\w*\d+\w*"): * - Without extension: * - "a" -> static-text From 9e9fb7cea9ea97d39bca0664b5b5b63875d6b8fd Mon Sep 17 00:00:00 2001 From: SharafMohamed Date: Wed, 3 Sep 2025 12:34:34 -0400 Subject: [PATCH 167/168] Fix grammar in docstring. Fix consistency of docstring. --- src/log_surgeon/wildcard_query_parser/Query.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp index 0fab4ddf..915077dd 100644 --- a/src/log_surgeon/wildcard_query_parser/Query.hpp +++ b/src/log_surgeon/wildcard_query_parser/Query.hpp @@ -47,8 +47,8 @@ class Query { * - Note: The length >= 2 requirement avoids skipping 1-length greedy substrings ("*") as * they are never redundant (i.e., no 0-length substring exists to extend). * - * 2. Let I(a) be the set of all k-length interpretations of substring [0,a), where 1 <= k < a. - * - Let T(a,b) to be the set of all valid single-token interpretations of substring [a,b). + * 2. Let I(a) be the set of all k-token interpretations of substring [0,a), where 1 <= k < a. + * - Let T(a,b) be the set of all valid single-token interpretations of substring [a,b). * - We can then compute I(a) recursively: * * I(a) = T(0,a) From 27d87b66a8e4c5032f4c921670c0982f6b851df6 Mon Sep 17 00:00:00 2001 From: davidlion Date: Thu, 4 Sep 2025 10:47:03 -0400 Subject: [PATCH 168/168] Tweak has_delim comment. --- src/log_surgeon/Lexer.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/log_surgeon/Lexer.hpp b/src/log_surgeon/Lexer.hpp index fe0e7a24..9503f681 100644 --- a/src/log_surgeon/Lexer.hpp +++ b/src/log_surgeon/Lexer.hpp @@ -256,8 +256,10 @@ class Lexer { std::array m_is_first_char_of_a_variable{false}; std::vector> m_rules; uint32_t m_line{0}; - // `m_has_delimiters` is cached for performance + + // For performance, `m_has_delimiters` caches whether any element in `m_is_delimiter` is true. bool m_has_delimiters{false}; + std::unique_ptr> m_dfa; std::optional m_first_delimiter_pos{std::nullopt}; bool m_asked_for_more_data{false};