Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 21 additions & 19 deletions src/log_surgeon/Lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include <array>
#include <cstdint>
#include <map>
#include <memory>
#include <optional>
#include <set>
Expand All @@ -13,6 +12,7 @@
#include <vector>

#include <log_surgeon/Constants.hpp>
#include <log_surgeon/finite_automata/Capture.hpp>
#include <log_surgeon/finite_automata/Dfa.hpp>
#include <log_surgeon/finite_automata/DfaState.hpp>
#include <log_surgeon/finite_automata/NfaState.hpp>
Expand Down Expand Up @@ -166,29 +166,29 @@ class Lexer {
}

/**
* Retrieves a list of capture IDs for a given rule ID.
* These capture IDs correspond to the captures in the rule that were matched during lexing.
* Retrieves a list of capture pointers for a given rule ID.
* These pointers correspond to the captures in the rule that were matched during lexing.
* @param rule_id The ID of the rule to search for captures.
* @return A vector of capture IDs if the rule contains captures;
* @return A vector of capture pointers if the rule contains captures;
* @return std::nullopt if no captures are found for the rule.
*/
[[nodiscard]] auto get_capture_ids_from_rule_id(rule_id_t const rule_id) const
-> std::optional<std::vector<capture_id_t>> {
if (m_rule_id_to_capture_ids.contains(rule_id)) {
return m_rule_id_to_capture_ids.at(rule_id);
[[nodiscard]] auto get_captures_from_rule_id(rule_id_t const rule_id) const
-> std::optional<std::vector<finite_automata::Capture const*>> {
if (m_rule_id_to_capture.contains(rule_id)) {
return m_rule_id_to_capture.at(rule_id);
}
return std::nullopt;
}

/**
* @param capture_id ID associated with a capture within a rule.
* @param capture Pointer to the capture.
* @return The start and end tag of the capture on success.
* @return std::nullopt if no capture is associated with the given capture ID.
* @return std::nullopt if no tags are associated with the given capture.
*/
[[nodiscard]] auto get_tag_id_pair_from_capture_id(capture_id_t const capture_id) const
[[nodiscard]] auto get_tag_id_pair_from_capture(finite_automata::Capture const* const capture) const
-> std::optional<std::pair<tag_id_t, tag_id_t>> {
if (m_capture_id_to_tag_id_pair.contains(capture_id)) {
return m_capture_id_to_tag_id_pair.at(capture_id);
if (m_capture_to_tag_id_pair.contains(capture)) {
return m_capture_to_tag_id_pair.at(capture);
}
return std::nullopt;
}
Expand All @@ -209,14 +209,14 @@ class Lexer {
}

/**
* Retrieves the register IDs for the start and end tags associated with a given capture ID.
* @param capture_id The ID of the capture to search for.
* Retrieves the register IDs for the start and end tags associated with a given capture.
* @param capture Pointer to the capture to search for.
* @return A pair of register IDs corresponding to the start and end tags of the capture.
* @return std::nullopt if no such capture is found.
*/
[[nodiscard]] auto get_reg_ids_from_capture_id(capture_id_t const capture_id) const
[[nodiscard]] auto get_reg_ids_from_capture(finite_automata::Capture const* const capture) const
-> std::optional<std::pair<reg_id_t, reg_id_t>> {
auto const optional_tag_id_pair{get_tag_id_pair_from_capture_id(capture_id)};
auto const optional_tag_id_pair{get_tag_id_pair_from_capture(capture)};
if (false == optional_tag_id_pair.has_value()) {
return std::nullopt;
}
Expand Down Expand Up @@ -265,8 +265,10 @@ class Lexer {
bool m_asked_for_more_data{false};
TypedDfaState const* m_prev_state{nullptr};
TypedDfaState const* m_state{nullptr};
std::unordered_map<rule_id_t, std::vector<capture_id_t>> m_rule_id_to_capture_ids;
std::unordered_map<capture_id_t, std::pair<tag_id_t, tag_id_t>> m_capture_id_to_tag_id_pair;
std::unordered_map<rule_id_t, std::vector<finite_automata::Capture const*>>
m_rule_id_to_capture;
std::unordered_map<finite_automata::Capture const*, std::pair<tag_id_t, tag_id_t>>
m_capture_to_tag_id_pair;
};

namespace lexers {
Expand Down
23 changes: 6 additions & 17 deletions src/log_surgeon/Lexer.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include <cassert>
#include <memory>
#include <stack>
#include <stdexcept>
#include <string>
#include <vector>

Expand Down Expand Up @@ -438,28 +437,18 @@ auto Lexer<TypedNfaState, TypedDfaState>::get_highest_priority_rule(rule_id_t co
template <typename TypedNfaState, typename TypedDfaState>
void Lexer<TypedNfaState, TypedDfaState>::generate() {
for (auto const& rule : m_rules) {
for (auto const* capture : rule.get_captures()) {
std::string const capture_name{capture->get_name()};
if (m_symbol_id.contains(capture_name)) {
throw std::invalid_argument(
"`m_rules` contains capture names that are not unique."
);
auto const rule_id{rule.get_variable_id()};
if (false == rule.get_captures().empty()) {
m_rule_id_to_capture.try_emplace(rule_id);
for (auto const* capture : rule.get_captures()) {
m_rule_id_to_capture.at(rule_id).push_back(capture);
}
auto const capture_id{m_symbol_id.size()};
m_symbol_id.emplace(capture_name, capture_id);
m_id_symbol.emplace(capture_id, capture_name);

auto const rule_id{rule.get_variable_id()};
m_rule_id_to_capture_ids.try_emplace(rule_id);
m_rule_id_to_capture_ids.at(rule_id).push_back(capture_id);
}
}

finite_automata::Nfa<TypedNfaState> nfa{m_rules};
for (auto const& [capture, tag_id_pair] : nfa.get_capture_to_tag_id_pair()) {
std::string const capture_name{capture->get_name()};
auto const capture_id{m_symbol_id.at(capture_name)};
m_capture_id_to_tag_id_pair.emplace(capture_id, tag_id_pair);
m_capture_to_tag_id_pair.emplace(capture, tag_id_pair);
}

m_dfa = std::make_unique<finite_automata::Dfa<TypedDfaState, TypedNfaState>>(nfa);
Expand Down
14 changes: 7 additions & 7 deletions src/log_surgeon/LogEvent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,16 @@ auto LogEventView::get_logtype() const -> std::string {
{
logtype += token_view.release_delimiter();
}
if (auto const& optional_capture_ids{
m_log_parser.m_lexer.get_capture_ids_from_rule_id(rule_id)
if (auto const& optional_captures{
m_log_parser.m_lexer.get_captures_from_rule_id(rule_id)
};
optional_capture_ids.has_value())
optional_captures.has_value())
{
auto capture_view{token_view};
auto const& capture_ids{optional_capture_ids.value()};
for (auto const capture_id : capture_ids) {
auto const& captures{optional_captures.value()};
for (auto const capture : captures) {
auto const& optional_reg_id_pair{
m_log_parser.m_lexer.get_reg_ids_from_capture_id(capture_id)
m_log_parser.m_lexer.get_reg_ids_from_capture(capture)
};
if (false == optional_reg_id_pair.has_value()) {
continue;
Expand All @@ -86,7 +86,7 @@ auto LogEventView::get_logtype() const -> std::string {
capture_view.get_reversed_reg_positions(optional_reg_id_pair->second)
};

auto capture_name{m_log_parser.get_id_symbol(capture_id)};
auto capture_name{capture->get_name()};
if (false == start_positions.empty() && -1 < start_positions[0]
&& false == end_positions.empty() && -1 < end_positions[0])
{
Expand Down
9 changes: 8 additions & 1 deletion src/log_surgeon/finite_automata/Capture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,17 @@ class Capture {
public:
explicit Capture(std::string name) : m_name{std::move(name)} {}

[[nodiscard]] auto get_name() const -> std::string_view { return m_name; }
auto set_context(std::string rule_name, uint32_t pos) {
m_rule_name = std::move(rule_name);
m_pos = pos;
}

[[nodiscard]] auto get_name() const -> std::string const& { return m_name; }

private:
std::string m_name;
std::string m_rule_name;
uint32_t m_pos{0};
};
} // namespace log_surgeon::finite_automata

Expand Down
38 changes: 37 additions & 1 deletion src/log_surgeon/finite_automata/Nfa.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#ifndef LOG_SURGEON_FINITE_AUTOMATA_NFA_HPP
#define LOG_SURGEON_FINITE_AUTOMATA_NFA_HPP

#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
Expand All @@ -24,6 +23,42 @@
#include <log_surgeon/UniqueIdGenerator.hpp>

namespace log_surgeon::finite_automata {
/**
* Stores context about the NFA traversal to allow for capture ASTs to have the needed context.
* Specifically, for capture groups with the same name, we need to know the name of the variable
* containing the capture and the position of the capture in said variable.
*
* Note: This information cannot be known when constructing the AST as capture productions will be
* resolved before variable productions. So it must be resolved during NFA construction.
*/
class NfaContext {
public:
auto reset() -> void {
m_curr_rule_name.clear();
m_curr_pos = 0;
}

auto set_curr_rule_name(std::string curr_rule_name) -> void {
m_curr_rule_name = std::move(curr_rule_name);
}

auto set_curr_pos(uint32_t curr_pos) -> void {
m_curr_pos = curr_pos;
}

[[nodiscard]] auto get_curr_rule_name() const -> std::string const& {
return m_curr_rule_name;
}

[[nodiscard]] auto get_curr_pos() const -> uint32_t {
return m_curr_pos;
}

private:
std::string m_curr_rule_name;
uint32_t m_curr_pos{0};
};

/**
* Represents a Non-Deterministic Finite Automaton (NFA) designed to recognize a language based on
* a set of rules provided during initialization. This class serves as an intermediate
Expand Down Expand Up @@ -124,6 +159,7 @@ class Nfa {
TypedNfaState* m_root;
UniqueIdGenerator m_state_id_generator;
UniqueIdGenerator m_tag_id_generator;
NfaContext context;
};

template <typename TypedNfaState>
Expand Down
1 change: 0 additions & 1 deletion src/log_surgeon/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include <cstdint>

namespace log_surgeon {
using capture_id_t = uint32_t;
using reg_id_t = uint32_t;
using rule_id_t = uint32_t;
using tag_id_t = uint32_t;
Expand Down
Loading
Loading