diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7af8b272..891a4565 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,6 +88,8 @@ set(SOURCE_FILES
     src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp
     src/log_surgeon/wildcard_query_parser/ExpressionView.cpp
     src/log_surgeon/wildcard_query_parser/ExpressionView.hpp
+    src/log_surgeon/wildcard_query_parser/Query.cpp
+    src/log_surgeon/wildcard_query_parser/Query.hpp
     src/log_surgeon/wildcard_query_parser/QueryInterpretation.cpp
     src/log_surgeon/wildcard_query_parser/QueryInterpretation.hpp
     src/log_surgeon/wildcard_query_parser/StaticQueryToken.hpp
diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox
index ab640c7f..329e4875 100644
--- a/docs/doxygen/mainpage.dox
+++ b/docs/doxygen/mainpage.dox
@@ -17,6 +17,7 @@
  * - @ref unit_tests_expression_view "Expression View"
  * - @ref unit_tests_nfa "NFA"
  * - @ref unit_tests_prefix_tree "Prefix tree"
+ * - @ref unit_tests_query "Query"
  * - @ref unit_tests_query_interpretation "Query Interpretation"
  * - @ref unit_tests_regex_ast "Regex AST"
  * - @ref unit_tests_register_handler "Register handler"
diff --git a/src/log_surgeon/Lexer.hpp b/src/log_surgeon/Lexer.hpp
index 78ed68d8..9503f681 100644
--- a/src/log_surgeon/Lexer.hpp
+++ b/src/log_surgeon/Lexer.hpp
@@ -152,6 +152,10 @@ class Lexer {
 
     [[nodiscard]] auto get_has_delimiters() const -> bool const& { return m_has_delimiters; }
 
+    [[nodiscard]] auto get_delim_table() const -> std::array<bool, cSizeOfByte> const& {
+        return m_is_delimiter;
+    }
+
     [[nodiscard]] auto is_delimiter(uint8_t byte) const -> bool const& {
         return m_is_delimiter[byte];
     }
@@ -252,7 +256,10 @@ class Lexer {
     std::array<bool, cSizeOfByte> m_is_first_char_of_a_variable{false};
     std::vector<LexicalRule<TypedNfaState>> m_rules;
     uint32_t m_line{0};
+
+    // For performance, `m_has_delimiters` caches whether any element in `m_is_delimiter` is true.
     bool m_has_delimiters{false};
+
     std::unique_ptr<finite_automata::Dfa<TypedDfaState, TypedNfaState>> m_dfa;
     std::optional<uint32_t> m_first_delimiter_pos{std::nullopt};
     bool m_asked_for_more_data{false};
diff --git a/src/log_surgeon/wildcard_query_parser/Expression.hpp b/src/log_surgeon/wildcard_query_parser/Expression.hpp
index cac314fa..01633ffe 100644
--- a/src/log_surgeon/wildcard_query_parser/Expression.hpp
+++ b/src/log_surgeon/wildcard_query_parser/Expression.hpp
@@ -1,6 +1,7 @@
 #ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_HPP
 #define LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_HPP
 
+#include <cstddef>
 #include <string>
 #include <vector>
 
@@ -24,6 +25,8 @@ class Expression {
 
     [[nodiscard]] auto get_search_string() const -> std::string const& { return m_search_string; }
 
+    [[nodiscard]] auto length() const -> size_t { return m_search_string.size(); }
+
 private:
     std::vector<ExpressionCharacter> m_chars;
     std::string m_search_string;
diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp
index 37f6e387..e5e907bb 100644
--- a/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp
+++ b/src/log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp
@@ -1,8 +1,11 @@
 #ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_CHARACTER_HPP
 #define LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_CHARACTER_HPP
 
+#include <array>
 #include <cstdint>
 
+#include <log_surgeon/Constants.hpp>
+
 namespace log_surgeon::wildcard_query_parser {
 class ExpressionCharacter {
 public:
@@ -23,6 +26,19 @@ class ExpressionCharacter {
         return Type::NonGreedyWildcard == m_type;
     }
 
+    [[nodiscard]] auto is_wildcard() const -> bool {
+        return Type::GreedyWildcard == m_type || Type::NonGreedyWildcard == m_type;
+    }
+
+    [[nodiscard]] auto is_delim(std::array<bool, cSizeOfByte> const& delim_table) const -> bool {
+        return delim_table.at(static_cast<uint8_t>(m_value));
+    }
+
+    [[nodiscard]] auto is_delim_or_wildcard(std::array<bool, cSizeOfByte> const& delim_table) const
+            -> bool {
+        return is_delim(delim_table) || is_wildcard();
+    }
+
     [[nodiscard]] auto is_escape() const -> bool { return Type::Escape == m_type; }
 
 private:
diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp
index 15e5339d..f055c453 100644
--- a/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp
+++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.cpp
@@ -1,12 +1,14 @@
 #include "ExpressionView.hpp"
 
 #include <algorithm>
+#include <array>
 #include <cstddef>
 #include <span>
 #include <string>
 #include <string_view>
 #include <utility>
 
+#include <log_surgeon/Constants.hpp>
 #include <log_surgeon/SchemaParser.hpp>
 #include <log_surgeon/wildcard_query_parser/Expression.hpp>
 
@@ -42,6 +44,39 @@ auto ExpressionView::extend_to_adjacent_greedy_wildcards() const
     return {is_extended, wildcard_expression_view};
 }
 
+auto ExpressionView::is_surrounded_by_delims(std::array<bool, cSizeOfByte> const& delim_table) const
+        -> bool {
+    auto const [begin_idx, end_idx]{get_indices()};
+
+    bool has_left_boundary{false};
+    if (0 == begin_idx) {
+        has_left_boundary = true;
+    } else {
+        auto const& preceding_char{m_expression->get_chars()[begin_idx - 1]};
+        has_left_boundary = preceding_char.is_delim_or_wildcard(delim_table)
+                            || (false == m_chars.empty() && m_chars.front().is_greedy_wildcard());
+    }
+
+    bool has_right_boundary{false};
+    if (m_expression->length() == end_idx) {
+        has_right_boundary = true;
+    } else {
+        auto const& succeeding_char{m_expression->get_chars()[end_idx]};
+        if (succeeding_char.is_escape()) {
+            if (m_expression->length() > end_idx + 1) {
+                auto const& logical_succeeding_char{m_expression->get_chars()[end_idx + 1]};
+                has_right_boundary = logical_succeeding_char.is_delim(delim_table);
+            }
+        } else {
+            has_right_boundary = succeeding_char.is_delim_or_wildcard(delim_table);
+        }
+        has_right_boundary = has_right_boundary
+                             || (false == m_chars.empty() && m_chars.back().is_greedy_wildcard());
+    }
+
+    return has_left_boundary && has_right_boundary;
+}
+
 auto ExpressionView::is_well_formed() const -> bool {
     if (m_chars.empty()) {
         return true;
diff --git a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp
index cf228643..a69575c5 100644
--- a/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp
+++ b/src/log_surgeon/wildcard_query_parser/ExpressionView.hpp
@@ -1,12 +1,14 @@
 #ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_VIEW_HPP
 #define LOG_SURGEON_WILDCARD_QUERY_PARSER_EXPRESSION_VIEW_HPP
 
+#include <array>
 #include <cstddef>
 #include <span>
 #include <string>
 #include <string_view>
 #include <utility>
 
+#include <log_surgeon/Constants.hpp>
 #include <log_surgeon/wildcard_query_parser/Expression.hpp>
 #include <log_surgeon/wildcard_query_parser/ExpressionCharacter.hpp>
 
@@ -41,6 +43,33 @@ class ExpressionView {
                && (m_chars[0].is_greedy_wildcard() || m_chars.back().is_greedy_wildcard());
     }
 
+    /**
+     * Checks whether the view is surrounded by delimiters. The start and end of an expression are
+     * always considered a delimiter. A greedy wildcard may represent a string that includes a
+     * flanking delimiter.
+     *
+     * A view is considered bounded if both its left and right boundary satisfy certain
+     * requirements.
+     *
+     * Left boundary:
+     * - The view is at the start of the expression, or
+     * - The first character is a greedy wildcard (if non-empty), or
+     * - Immediately left of the view is a delimiter or wildcard.
+     *
+     * Right boundary:
+     * - The view is at the end of the expression, or
+     * - The last character is a greedy wildcard (if non-empty), or
+     * - Immediately right of the view is a delimiter or wildcard, or
+     * - Immediately right of the view is an escape character and the character to its
+     *   immediate right is a delimiter.
+     *
+     * @param delim_table Table indicating for each character whether or not it is a delimiter.
+     * @return true when both left and right boundaries qualify; false otherwise.
+     */
+    [[nodiscard]] auto is_surrounded_by_delims(
+            std::array<bool, cSizeOfByte> const& delim_table
+    ) const -> bool;
+
     /**
      * Checks whether this `ExpressionView` represents a well-formed subrange.
      *
diff --git a/src/log_surgeon/wildcard_query_parser/Query.cpp b/src/log_surgeon/wildcard_query_parser/Query.cpp
new file mode 100644
index 00000000..5a730eda
--- /dev/null
+++ b/src/log_surgeon/wildcard_query_parser/Query.cpp
@@ -0,0 +1,176 @@
+#include "Query.hpp"
+
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <log_surgeon/finite_automata/Dfa.hpp>
+#include <log_surgeon/finite_automata/DfaState.hpp>
+#include <log_surgeon/finite_automata/Nfa.hpp>
+#include <log_surgeon/finite_automata/NfaState.hpp>
+#include <log_surgeon/Lexer.hpp>
+#include <log_surgeon/LexicalRule.hpp>
+#include <log_surgeon/parser_types.hpp>
+#include <log_surgeon/Schema.hpp>
+#include <log_surgeon/SchemaParser.hpp>
+#include <log_surgeon/wildcard_query_parser/Expression.hpp>
+#include <log_surgeon/wildcard_query_parser/ExpressionView.hpp>
+#include <log_surgeon/wildcard_query_parser/QueryInterpretation.hpp>
+
+using log_surgeon::finite_automata::ByteDfaState;
+using log_surgeon::finite_automata::ByteNfaState;
+using log_surgeon::lexers::ByteLexer;
+using std::set;
+using std::string;
+using std::vector;
+
+using ByteDfa = log_surgeon::finite_automata::Dfa<ByteDfaState, ByteNfaState>;
+using ByteLexicalRule = log_surgeon::LexicalRule<ByteNfaState>;
+using ByteNfa = log_surgeon::finite_automata::Nfa<ByteNfaState>;
+
+namespace log_surgeon::wildcard_query_parser {
+Query::Query(string const& query_string) {
+    m_processed_query_string.reserve(query_string.size());
+    Expression const expression(query_string);
+
+    bool prev_is_escape{false};
+    string unhandled_wildcard_sequence;
+    bool unhandled_wildcard_sequence_contains_greedy_wildcard{false};
+    for (auto c : expression.get_chars()) {
+        if (false == unhandled_wildcard_sequence.empty() && false == c.is_wildcard()) {
+            if (unhandled_wildcard_sequence_contains_greedy_wildcard) {
+                m_processed_query_string.push_back('*');
+            } else {
+                m_processed_query_string += unhandled_wildcard_sequence;
+            }
+            unhandled_wildcard_sequence.clear();
+            unhandled_wildcard_sequence_contains_greedy_wildcard = false;
+        }
+
+        if (prev_is_escape) {
+            m_processed_query_string.push_back(c.value());
+            prev_is_escape = false;
+        } else if (c.is_escape()) {
+            prev_is_escape = true;
+            m_processed_query_string.push_back(c.value());
+        } else if (c.is_greedy_wildcard()) {
+            unhandled_wildcard_sequence.push_back(c.value());
+            unhandled_wildcard_sequence_contains_greedy_wildcard = true;
+        } else if (c.is_non_greedy_wildcard()) {
+            unhandled_wildcard_sequence.push_back(c.value());
+        } else {
+            m_processed_query_string.push_back(c.value());
+        }
+    }
+    if (false == unhandled_wildcard_sequence.empty()) {
+        if (unhandled_wildcard_sequence_contains_greedy_wildcard) {
+            m_processed_query_string.push_back('*');
+        } else {
+            m_processed_query_string += unhandled_wildcard_sequence;
+        }
+    }
+}
+
+auto Query::get_all_multi_token_interpretations(ByteLexer const& lexer) const
+        -> std::set<QueryInterpretation> {
+    if (m_processed_query_string.empty()) {
+        return {};
+    }
+
+    Expression const expression{m_processed_query_string};
+    vector<set<QueryInterpretation>> query_interpretations(expression.length());
+    for (size_t end_idx = 1; end_idx <= expression.length(); ++end_idx) {
+        for (size_t begin_idx = 0; begin_idx < end_idx; ++begin_idx) {
+            ExpressionView const expression_view{expression, begin_idx, end_idx};
+            if ("*" != expression_view.get_search_string()
+                && expression_view.starts_or_ends_with_greedy_wildcard())
+            {
+                continue;
+            }
+
+            auto const extended_view{expression_view.extend_to_adjacent_greedy_wildcards().second};
+            auto const single_token_interpretations{
+                    get_all_single_token_interpretations(extended_view, lexer)
+            };
+            if (single_token_interpretations.empty()) {
+                continue;
+            }
+
+            if (begin_idx == 0) {
+                query_interpretations[end_idx - 1].insert(
+                        std::make_move_iterator(single_token_interpretations.begin()),
+                        std::make_move_iterator(single_token_interpretations.end())
+                );
+            } else {
+                for (auto const& prefix : query_interpretations[begin_idx - 1]) {
+                    for (auto const& suffix : single_token_interpretations) {
+                        QueryInterpretation combined{prefix};
+                        combined.append_query_interpretation(suffix);
+                        query_interpretations[end_idx - 1].insert(std::move(combined));
+                    }
+                }
+            }
+        }
+    }
+    return query_interpretations.back();
+}
+
+auto Query::get_all_single_token_interpretations(
+        ExpressionView const& expression_view,
+        ByteLexer const& lexer
+) -> std::vector<QueryInterpretation> {
+    vector<QueryInterpretation> interpretations;
+
+    if (false == expression_view.is_well_formed()) {
+        return interpretations;
+    }
+    if ("*" == expression_view.get_search_string()) {
+        interpretations.emplace_back("*");
+        return interpretations;
+    }
+    if (false == expression_view.is_surrounded_by_delims(lexer.get_delim_table())) {
+        interpretations.emplace_back(string{expression_view.get_search_string()});
+        return interpretations;
+    }
+
+    auto const [regex_string, contains_wildcard]{expression_view.generate_regex_string()};
+
+    auto const matching_var_type_ids{get_matching_variable_types(regex_string, lexer)};
+    if (matching_var_type_ids.empty() || contains_wildcard) {
+        interpretations.emplace_back(string{expression_view.get_search_string()});
+    }
+
+    for (auto const variable_type_id : matching_var_type_ids) {
+        interpretations.emplace_back(
+                variable_type_id,
+                string{expression_view.get_search_string()},
+                contains_wildcard
+        );
+        if (false == contains_wildcard) {
+            break;
+        }
+    }
+    return interpretations;
+}
+
+auto Query::get_matching_variable_types(string const& regex_string, ByteLexer const& lexer)
+        -> set<uint32_t> {
+    NonTerminal::m_next_children_start = 0;
+
+    Schema schema;
+    schema.add_variable("search:" + regex_string, -1);
+    auto const schema_ast = schema.release_schema_ast_ptr();
+    auto& rule_ast = dynamic_cast<SchemaVarAST&>(*schema_ast->m_schema_vars[0]);
+    vector<ByteLexicalRule> rules;
+    rules.emplace_back(0, std::move(rule_ast.m_regex_ptr));
+    ByteNfa const nfa{rules};
+    ByteDfa const dfa{nfa};
+
+    auto var_types = lexer.get_dfa()->get_intersect(&dfa);
+    return var_types;
+}
+}  // namespace log_surgeon::wildcard_query_parser
diff --git a/src/log_surgeon/wildcard_query_parser/Query.hpp b/src/log_surgeon/wildcard_query_parser/Query.hpp
new file mode 100644
index 00000000..915077dd
--- /dev/null
+++ b/src/log_surgeon/wildcard_query_parser/Query.hpp
@@ -0,0 +1,137 @@
+#ifndef LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_HPP
+#define LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_HPP
+
+#include <cstdint>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <log_surgeon/Lexer.hpp>
+#include <log_surgeon/wildcard_query_parser/ExpressionView.hpp>
+#include <log_surgeon/wildcard_query_parser/QueryInterpretation.hpp>
+
+namespace log_surgeon::wildcard_query_parser {
+class Query {
+public:
+    explicit Query(std::string const& query_string);
+
+    /**
+     * Generates all k-token interpretations of the n-character query string, where 1 <= k < n.
+     *
+     * 1. Interpret each substring [a,b) as a single token (k=1).
+     *    - Substrings adjacent to greedy wildcards must be interpreted as if they include them. To
+     *      implement this, we extend all substrings to include adjacent greedy wildcards.
+     *      - Example: consider query "a*b" and variable type `hasNum` ("\w*\d+\w*"):
+     *        - Without extension:
+     *          - "a" -> static-text
+     *          - "b" -> static-text
+     *          - "a*" -> <hasNum>(a*)
+     *          - "*b" -> <hasNum>(*b)
+     *        - Multi-token interpretations (via step 2 below):
+     *          - {a*b},
+     *          - {<hasNum>(a*)b},
+     *          - {a<hasNum>(*b)}.
+     *        - None of these match a string like "a1 c 1b", which has interpretation
+     *          {<hasNum>(a1) c <hasNum>(1b)}. By interpreting "a" as "a*" and "b" as "*b", the '*'
+     *          is preserved allowing for interpretation {<hasNum>(a*)*<hasNum>(*b)}, which matches
+     *          {<hasNum>(a1) c <hasNum>(1b)}.
+     *    - Special cases:
+     *      - Single-character greedy wildcards ("*") are not extended as they have no adjacent
+     *        greedy wildcards (repeated wildcards are collapsed during preprocessing).
+     *      - Substrings are not extended to non-greedy wildcards (`?`) as "a?b" =/= "a??b".
+     *    - Substrings of length >= 2 that begin or end with a greedy wildcard are skipped as they
+     *      are redundant.
+     *      - Example: in "a*b", substring [0,1) extends to "a*", therefore substring [0,2) "a*" is
+     *        redundant. This avoids producing interpretation {<hasNum>(a*)b}, which is a subset of
+     *        {<hasNum>(a*)*b}.
+     *      - Note: The length >= 2 requirement avoids skipping 1-length greedy substrings ("*") as
+     *        they are never redundant (i.e., no 0-length substring exists to extend).
+     *
+     * 2. Let I(a) be the set of all k-token interpretations of substring [0,a), where 1 <= k < a.
+     *    - Let T(a,b) be the set of all valid single-token interpretations of substring [a,b).
+     *    - We can then compute I(a) recursively:
+     *
+     *        I(a) = T(0,a)
+     *               U (I(1) x T(1,a))
+     *               U (I(2) x T(2,a))
+     *               ...
+     *               U (I(a-1) x T(a-1,a))
+     *
+     *      where x denotes the cross product: all combinations of prefix interpretations from I(i)
+     *      and suffix interpretations from T(i,a).
+     *
+     * 3. Use dynamic programming to compute I(n) efficiently:
+     *    - Instead of generating all possible combinations naively, we store only unique
+     *      interpretations by recursively building up the combinations as shown below.
+     *    - Compute I(n) iteratively in increasing order of substring length:
+     *      - Compute T(0,1), then I(1)
+     *      - Compute T(0,2), T(1,2), then I(2)
+     *      - Compute T(0,3), T(1,3), T(2,3), then I(3)
+     *      - ...
+     *      - Compute T(0,n), ..., T(n-1,n), then I(n)
+     *
+     * @param lexer The lexer used to determine variable types and delimiters.
+     * @return A set of `QueryInterpretation` representing all valid multi-token interpretations of
+     * the full query string.
+     */
+    [[nodiscard]] auto get_all_multi_token_interpretations(lexers::ByteLexer const& lexer) const
+            -> std::set<QueryInterpretation>;
+
+    [[nodiscard]] auto get_processed_query_string() const -> std::string const& {
+        return m_processed_query_string;
+    }
+
+private:
+    /**
+     * Generates all single-token interpretations for a given expression view matching a given
+     * lexer.
+     *
+     * A single-token interpretation can be one of:
+     * - A static token (literal text).
+     * - A variable token (e.g., int, float, hasNumber) as defined by the lexer's schema. Each
+     * unique variable type is considered a distinct interpretation.
+     *
+     * Rules:
+     * - If the substring is malformed (has hanging escape characters):
+     *   - There are no valid interpretations.
+     * - Else if the substring:
+     *   - Is an isolated greedy wildcard, "*", or
+     *   - Is not surrounded by delimiters or wildcards (lexer won't consider it a variable), or
+     *   - Does not match any variable.
+     *   - Then:
+     *     - The only interpretation is a static token.
+     * - Else if the substring contains a wildcard:
+     *   - The interpretations include a static token, plus a variable token for each matching type.
+     * - Else:
+     *   - The only interpretation is the variable token corresponding to the highest priority
+     *     match.
+     *
+     * @param expression_view The view of the substring to interpret.
+     * @param lexer The lexer used to determine variable types and delimiters.
+     * @return A vector of `QueryInterpretation` objects representing all valid single-token
+     * interpretations for the given substring.
+     */
+    [[nodiscard]] static auto get_all_single_token_interpretations(
+            ExpressionView const& expression_view,
+            lexers::ByteLexer const& lexer
+    ) -> std::vector<QueryInterpretation>;
+
+    /**
+     * Determines the set of variable types matched by the lexer for all strings generated from the
+     * input regex.
+     *
+     * Generates a DFA from the input regex and computes its intersection with the lexer's DFA.
+     *
+     * @param regex_string The input regex string for which to find matching variable types.
+     * @param lexer The lexer whose DFA is used for matching.
+     * @return The set of all matching variable type IDs.
+     */
+    [[nodiscard]] static auto
+    get_matching_variable_types(std::string const& regex_string, lexers::ByteLexer const& lexer)
+            -> std::set<uint32_t>;
+
+    std::string m_processed_query_string;
+};
+}  // namespace log_surgeon::wildcard_query_parser
+
+#endif  // LOG_SURGEON_WILDCARD_QUERY_PARSER_QUERY_HPP
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 2838d8a4..2121195d 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -11,6 +11,7 @@ target_sources(
         test-expression-view.cpp
         test-nfa.cpp
         test-prefix-tree.cpp
+        test-query.cpp
         test-query-interpretation.cpp
         test-regex-ast.cpp
         test-register-handler.cpp
diff --git a/tests/test-query.cpp b/tests/test-query.cpp
new file mode 100644
index 00000000..19687c6b
--- /dev/null
+++ b/tests/test-query.cpp
@@ -0,0 +1,408 @@
+#include <cstddef>
+#include <set>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include <log_surgeon/Lexer.hpp>
+#include <log_surgeon/Schema.hpp>
+#include <log_surgeon/SchemaParser.hpp>
+#include <log_surgeon/wildcard_query_parser/Query.hpp>
+
+#include <catch2/catch_test_macros.hpp>
+
+/**
+ * @defgroup unit_tests_query `Query` unit tests.
+ * @brief Unit tests for `Query` construction and interpretation.
+
+ * These unit tests contain the `Query` tag.
+ */
+
+using log_surgeon::lexers::ByteLexer;
+using log_surgeon::Schema;
+using log_surgeon::SchemaVarAST;
+using log_surgeon::wildcard_query_parser::Query;
+using std::set;
+using std::string;
+using std::string_view;
+using std::vector;
+
+namespace {
+/**
+ * Creates a query from the given query string and tests that its processed query string and
+ * interpretations match the expected values.
+ *
+ * @param raw_query_string The search query.
+ * @param expected_processed_query_string The processed search query.
+ * @param schema_rules A vector of strings, each string representing a schema rule.
+ * @param expected_serialized_interpretations  The expected set of serialized interpretations.
+ */
+auto test_query(
+        string_view raw_query_string,
+        string_view expected_processed_query_string,
+        vector<string> const& schema_rules,
+        set<string> const& expected_serialized_interpretations
+) -> void;
+
+/**
+ * Initializes a `ByteLexer` with space as a delimiter and the given `schema_rules`.
+ *
+ * @param schema_rules A vector of strings, each string representing a schema rule.
+ * @return The initialized `ByteLexer`.
+ */
+auto make_test_lexer(vector<string> const& schema_rules) -> ByteLexer;
+
+auto test_query(
+        string_view const raw_query_string,
+        string_view const expected_processed_query_string,
+        vector<string> const& schema_rules,
+        set<string> const& expected_serialized_interpretations
+) -> void {
+    auto const lexer{make_test_lexer(schema_rules)};
+
+    Query const query{string(raw_query_string)};
+    REQUIRE(expected_processed_query_string == query.get_processed_query_string());
+
+    auto const interpretations{query.get_all_multi_token_interpretations(lexer)};
+    set<string> serialized_interpretations;
+    for (auto const& interpretation : interpretations) {
+        serialized_interpretations.insert(interpretation.serialize());
+    }
+
+    REQUIRE(expected_serialized_interpretations == serialized_interpretations);
+}
+
+auto make_test_lexer(vector<string> const& schema_rules) -> ByteLexer {
+    ByteLexer lexer;
+    lexer.set_delimiters({' '});
+
+    Schema schema;
+    for (auto const& schema_rule : schema_rules) {
+        schema.add_variable(schema_rule, -1);
+    }
+
+    auto const schema_ast = schema.release_schema_ast_ptr();
+    REQUIRE(nullptr != schema_ast);
+    REQUIRE(schema_rules.size() == schema_ast->m_schema_vars.size());
+    for (size_t i{0}; i < schema_ast->m_schema_vars.size(); ++i) {
+        REQUIRE(nullptr != schema_ast->m_schema_vars[i]);
+        auto* capture_rule_ast{dynamic_cast<SchemaVarAST*>(schema_ast->m_schema_vars[i].get())};
+        REQUIRE(nullptr != capture_rule_ast);
+        lexer.add_rule(i, std::move(capture_rule_ast->m_regex_ptr));
+    }
+
+    lexer.generate();
+    return lexer;
+}
+}  // namespace
+
+/**
+ * @ingroup unit_tests_query
+ * @brief Creates and tests an empty `Query`.
+ */
+TEST_CASE("empty_query", "[Query]") {
+    constexpr string_view cRawQueryString;
+    constexpr string_view cProcessedQueryString;
+    vector<string> const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}};
+    set<string> const expected_serialized_interpretations;
+
+    test_query(
+            cRawQueryString,
+            cProcessedQueryString,
+            schema_rules,
+            expected_serialized_interpretations
+    );
+}
+
+/**
+ * @ingroup unit_tests_query
+ * @brief Creates and tests a greedy wildcard `Query`.
+ */
+TEST_CASE("greedy_wildcard_query", "[Query]") {
+    constexpr string_view cRawQueryString{"*"};
+    constexpr string_view cProcessedQueryString{"*"};
+    vector<string> const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}};
+    set<string> const expected_serialized_interpretations{"logtype='*', contains_wildcard='0'"};
+
+    test_query(
+            cRawQueryString,
+            cProcessedQueryString,
+            schema_rules,
+            expected_serialized_interpretations
+    );
+}
+
+/**
+ * @ingroup unit_tests_query
+ * @brief Creates and tests a query with repeated greedy wildcards.
+ */
+TEST_CASE("repeated_greedy_wildcard_query", "[Query]") {
+    constexpr string_view cRawQueryString{"a**b"};
+    constexpr string_view cProcessedQueryString{"a*b"};
+    vector<string> const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}};
+    set<string> const expected_serialized_interpretations{
+            "logtype='a*b', contains_wildcard='0'",
+            "logtype='a***b', contains_wildcard='0'",
+            "logtype='<0>(a*)**b', contains_wildcard='10'",
+            "logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'",
+            "logtype='<0>(a*b)', contains_wildcard='1'",
+            "logtype='a**<0>(*b)', contains_wildcard='01'"
+    };
+
+    test_query(
+            cRawQueryString,
+            cProcessedQueryString,
+            schema_rules,
+            expected_serialized_interpretations
+    );
+}
+
+/**
+ * @ingroup unit_tests_query
+ * @brief Creates and tests a query with a non-greedy wildcard followed by a greedy wildcard.
+ */
+TEST_CASE("short_wildcard_sequence_query", "[Query]") {
+    constexpr string_view cRawQueryString{"a?*b"};
+    constexpr string_view cProcessedQueryString{"a*b"};
+    vector<string> const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}};
+    set<string> const expected_serialized_interpretations{
+            "logtype='a*b', contains_wildcard='0'",
+            "logtype='a***b', contains_wildcard='0'",
+            "logtype='<0>(a*)**b', contains_wildcard='10'",
+            "logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'",
+            "logtype='<0>(a*b)', contains_wildcard='1'",
+            "logtype='a**<0>(*b)', contains_wildcard='01'"
+    };
+
+    test_query(
+            cRawQueryString,
+            cProcessedQueryString,
+            schema_rules,
+            expected_serialized_interpretations
+    );
+}
+
+/**
+ * @ingroup unit_tests_query
+ * @brief Creates and tests a query with a long mixed wildcard sequence.
+ */
+TEST_CASE("long_mixed_wildcard_sequence_query", "[Query]") {
+    constexpr string_view cRawQueryString{"a?*?*?*?b"};
+    constexpr string_view cProcessedQueryString{"a*b"};
+    vector<string> const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}};
+    set<string> const expected_serialized_interpretations{
+            "logtype='a*b', contains_wildcard='0'",
+            "logtype='a***b', contains_wildcard='0'",
+            "logtype='<0>(a*)**b', contains_wildcard='10'",
+            "logtype='<0>(a*)*<0>(*b)', contains_wildcard='101'",
+            "logtype='<0>(a*b)', contains_wildcard='1'",
+            "logtype='a**<0>(*b)', contains_wildcard='01'"
+    };
+
+    test_query(
+            cRawQueryString,
+            cProcessedQueryString,
+            schema_rules,
+            expected_serialized_interpretations
+    );
+}
+
+/**
+ * @ingroup unit_tests_query
+ * @brief Creates and tests a query with a long non-greedy wildcard sequence.
+ */
+TEST_CASE("long_non_greedy_wildcard_sequence_query", "[Query]") {
+    constexpr string_view cRawQueryString{"a????b"};
+    constexpr string_view cProcessedQueryString{"a????b"};
+    vector<string> const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}};
+    set<string> const expected_serialized_interpretations{
+            R"(logtype='a????b', contains_wildcard='0')",
+
+            R"(logtype='<0>(a?)???b', contains_wildcard='10')",
+            R"(logtype='<0>(a??)??b', contains_wildcard='10')",
+            R"(logtype='<0>(a???)?b', contains_wildcard='10')",
+            R"(logtype='<0>(a????b)', contains_wildcard='1')",
+
+            R"(logtype='a?<0>(?)??b', contains_wildcard='010')",
+            R"(logtype='a?<0>(??)?b', contains_wildcard='010')",
+            R"(logtype='a?<0>(???b)', contains_wildcard='01')",
+            R"(logtype='a?<0>(?)?<0>(?b)', contains_wildcard='0101')",
+
+            R"(logtype='a??<0>(?)?b', contains_wildcard='010')",
+            R"(logtype='a??<0>(??b)', contains_wildcard='01')",
+
+            R"(logtype='a???<0>(?b)', contains_wildcard='01')",
+
+            R"(logtype='<0>(a?)?<0>(?)?b', contains_wildcard='1010')",
+            R"(logtype='<0>(a?)?<0>(??b)', contains_wildcard='101')",
+            R"(logtype='<0>(a?)??<0>(?b)', contains_wildcard='101')",
+
+            R"(logtype='<0>(a??)?<0>(?b)', contains_wildcard='101')",
+
+            // Double dipping on delimiters
+            R"(logtype='<0>(a?)<0>(?)??b', contains_wildcard='110')",
+            R"(logtype='<0>(a?)<0>(??)?b', contains_wildcard='110')",
+            R"(logtype='<0>(a?)<0>(???b)', contains_wildcard='11')",
+            R"(logtype='<0>(a?)<0>(?)?<0>(?b)', contains_wildcard='1101')",
+            R"(logtype='<0>(a?)?<0>(?)<0>(?b)', contains_wildcard='1011')",
+
+            R"(logtype='<0>(a??)<0>(?)?b', contains_wildcard='110')",
+            R"(logtype='<0>(a??)<0>(??b)', contains_wildcard='11')",
+
+            R"(logtype='<0>(a???)<0>(?b)', contains_wildcard='11')",
+
+            R"(logtype='a?<0>(?)<0>(?)?b', contains_wildcard='0110')",
+            R"(logtype='a?<0>(?)<0>(??b)', contains_wildcard='011')",
+
+            R"(logtype='a?<0>(??)<0>(?b)', contains_wildcard='011')",
+            R"(logtype='a??<0>(?)<0>(?b)', contains_wildcard='011')",
+
+            R"(logtype='<0>(a?)<0>(?)<0>(?)?b', contains_wildcard='1110')",
+            R"(logtype='<0>(a?)<0>(?)<0>(??b)', contains_wildcard='111')",
+            R"(logtype='<0>(a?)<0>(??)<0>(?b)', contains_wildcard='111')",
+            R"(logtype='<0>(a??)<0>(?)<0>(?b)', contains_wildcard='111')",
+            R"(logtype='a?<0>(?)<0>(?)<0>(?b)', contains_wildcard='0111')",
+
+            R"(logtype='<0>(a?)<0>(?)<0>(?)<0>(?b)', contains_wildcard='1111')"
+    };
+
+    test_query(
+            cRawQueryString,
+            cProcessedQueryString,
+            schema_rules,
+            expected_serialized_interpretations
+    );
+}
+
+/**
+ * @ingroup unit_tests_query
+ * @brief Creates and tests a query with an escaped '*' character.
+ */
+TEST_CASE("escaped_star_query", "[Query]") {
+    constexpr string_view cRawQueryString{R"(a\*b)"};
+    constexpr string_view cProcessedQueryString{R"(a\*b)"};
+    vector<string> const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}};
+    set<string> const expected_serialized_interpretations{
+            R"(logtype='a\*b', contains_wildcard='0')"
+    };
+
+    test_query(
+            cRawQueryString,
+            cProcessedQueryString,
+            schema_rules,
+            expected_serialized_interpretations
+    );
+}
+
+/**
+ * @ingroup unit_tests_query
+ * @brief Creates and tests a query with an escaped '*' character.
+ *
+ * NOTE: This has a static-text case as strings "1", "2", and "3" in isolation aren't surrounded by
+ * delimiters. These tokens then build up the interpretation "123". Although additional
+ * interpretations don't impact correctness, they may impact performance. We can optimize these out,
+ * but it'll make the code messy. Instead, we should eventually remove the explicit tracking of
+ * static-tokens, in favor of only tracking variable tokens.
+ */
+TEST_CASE("int_query", "[Query]") {
+    constexpr string_view cRawQueryString{"123"};
+    constexpr string_view cProcessedQueryString{"123"};
+    vector<string> const schema_rules{{R"(int:\d+)"}};
+    set<string> const expected_serialized_interpretations{
+            R"(logtype='123', contains_wildcard='0')",
+            R"(logtype='<0>(123)', contains_wildcard='0')"
+    };
+
+    test_query(
+            cRawQueryString,
+            cProcessedQueryString,
+            schema_rules,
+            expected_serialized_interpretations
+    );
+}
+
+/**
+ * @ingroup unit_tests_query
+ * @brief Creates and tests a query with multiple variable types.
+ *
+ * This test ensures that each non-wildcard token is assigned to the highest priority variable.
+ *
+ * NOTE: Similar to the above `int_query` test there are unneeded interpretations due to aggresively
+ * generating static-text tokens.
+ */
+TEST_CASE("non_wildcard_multi_variable_query", "[Query]") {
+    constexpr string_view cRawQueryString{"abc123 123"};
+    constexpr string_view cProcessedQueryString{"abc123 123"};
+
+    SECTION("int_priority") {
+        vector<string> const schema_rules{{R"(int:(\d+))"}, {R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}};
+        set<string> const expected_serialized_interpretations{
+                R"(logtype='abc123 123', contains_wildcard='0')",
+                R"(logtype='abc123 <0>(123)', contains_wildcard='00')",
+                R"(logtype='<1>(abc123) 123', contains_wildcard='00')",
+                R"(logtype='<1>(abc123) <0>(123)', contains_wildcard='000')"
+        };
+
+        test_query(
+                cRawQueryString,
+                cProcessedQueryString,
+                schema_rules,
+                expected_serialized_interpretations
+        );
+    }
+
+    SECTION("has_number_priority") {
+        vector<string> const schema_rules{{R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}, {R"(int:(\d+))"}};
+        set<string> const expected_serialized_interpretations{
+                R"(logtype='abc123 123', contains_wildcard='0')",
+                R"(logtype='abc123 <0>(123)', contains_wildcard='00')",
+                R"(logtype='<0>(abc123) 123', contains_wildcard='00')",
+                R"(logtype='<0>(abc123) <0>(123)', contains_wildcard='000')"
+        };
+
+        test_query(
+                cRawQueryString,
+                cProcessedQueryString,
+                schema_rules,
+                expected_serialized_interpretations
+        );
+    }
+}
+
+/**
+ * @ingroup unit_tests_query
+ * @brief Creates and tests a query with multiple variable types.
+ *
+ * This test ensures that each greedy wildcard token is identified as all correct token types.
+ *
+ * NOTE: Similar to the above `int_query` test there are unneeded interpretations due to aggresively
+ * generating static-text tokens. This same issue causes interpretations with redundant wildcards.
+ */
+TEST_CASE("wildcard_multi_variable_query", "[Query]") {
+    constexpr string_view cRawQueryString{"abc123* *123"};
+    constexpr string_view cProcessedQueryString{"abc123* *123"};
+
+    vector<string> const schema_rules{{R"(int:(\d+))"}, {R"(hasNumber:[A-Za-z]*\d+[A-Za-z]*)"}};
+    set<string> const expected_serialized_interpretations{
+            R"(logtype='abc123* *123', contains_wildcard='0')",
+            R"(logtype='abc123*** *123', contains_wildcard='0')",
+            R"(logtype='abc123* ***123', contains_wildcard='0')",
+            R"(logtype='abc123*** ***123', contains_wildcard='0')",
+            R"(logtype='abc123* **<0>(*123)', contains_wildcard='01')",
+            R"(logtype='abc123*** **<0>(*123)', contains_wildcard='01')",
+            R"(logtype='abc123* **<1>(*123)', contains_wildcard='01')",
+            R"(logtype='abc123*** **<1>(*123)', contains_wildcard='01')",
+            R"(logtype='<1>(abc123*)** *123', contains_wildcard='10')",
+            R"(logtype='<1>(abc123*)** ***123', contains_wildcard='10')",
+            R"(logtype='<1>(abc123*)** **<0>(*123)', contains_wildcard='101')",
+            R"(logtype='<1>(abc123*)** **<1>(*123)', contains_wildcard='101')"
+    };
+
+    test_query(
+            cRawQueryString,
+            cProcessedQueryString,
+            schema_rules,
+            expected_serialized_interpretations
+    );
+}