Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/log_surgeon/Lexer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <array>
#include <cstdint>
#include <map>
Comment thread
SharafMohamed marked this conversation as resolved.
Outdated
#include <memory>
#include <optional>
#include <string>
Expand Down
34 changes: 17 additions & 17 deletions src/log_surgeon/Lexer.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,19 @@ auto Lexer<TypedNfaState, TypedDfaState>::scan(ParserInputBuffer& input_buffer,
m_match_pos = prev_byte_buf_pos;
m_match_line = m_line;
}
auto* next = state->next(next_char);
auto* dest_state = state->get_dest_state(next_char);
if (next_char == '\n') {
m_line++;
if (m_has_delimiters && !m_match) {
next = m_dfa->get_root()->next(next_char);
dest_state = m_dfa->get_root()->get_dest_state(next_char);
m_match = true;
m_type_ids = &(next->get_matching_variable_ids());
m_type_ids = &(dest_state->get_matching_variable_ids());
m_start_pos = prev_byte_buf_pos;
m_match_pos = input_buffer.storage().pos();
m_match_line = m_line;
}
}
if (input_buffer.log_fully_consumed() || next == nullptr) {
if (input_buffer.log_fully_consumed() || nullptr == dest_state) {
if (m_match) {
input_buffer.set_log_fully_consumed(false);
input_buffer.set_pos(m_match_pos);
Expand Down Expand Up @@ -165,7 +165,7 @@ auto Lexer<TypedNfaState, TypedDfaState>::scan(ParserInputBuffer& input_buffer,
state = m_dfa->get_root();
continue;
}
state = next;
state = dest_state;
}
}

Expand Down Expand Up @@ -215,19 +215,19 @@ auto Lexer<TypedNfaState, TypedDfaState>::scan_with_wildcard(
m_match_pos = prev_byte_buf_pos;
m_match_line = m_line;
}
TypedDfaState const* next = state->next(next_char);
TypedDfaState const* dest_state{state->get_dest_state(next_char)};
if (next_char == '\n') {
m_line++;
if (m_has_delimiters && !m_match) {
next = m_dfa->get_root()->next(next_char);
dest_state = m_dfa->get_root()->get_dest_state(next_char);
m_match = true;
m_type_ids = &(next->get_matching_variable_ids());
m_type_ids = &(dest_state->get_matching_variable_ids());
m_start_pos = prev_byte_buf_pos;
m_match_pos = input_buffer.storage().pos();
m_match_line = m_line;
}
}
if (input_buffer.log_fully_consumed() || next == nullptr) {
if (input_buffer.log_fully_consumed() || nullptr == dest_state) {
assert(input_buffer.log_fully_consumed());
if (!m_match || (m_match && m_match_pos != input_buffer.storage().pos())) {
token
Expand All @@ -243,8 +243,8 @@ auto Lexer<TypedNfaState, TypedDfaState>::scan_with_wildcard(
// BFS (keep track of m_type_ids)
if (wildcard == '?') {
for (uint32_t byte = 0; byte < cSizeOfByte; byte++) {
auto* next_state = state->next(byte);
if (next_state->is_accepting() == false) {
auto* dest_state{state->get_dest_state(byte)};
if (false == dest_state->is_accepting()) {
token
= Token{m_last_match_pos,
input_buffer.storage().pos(),
Expand Down Expand Up @@ -277,9 +277,9 @@ auto Lexer<TypedNfaState, TypedDfaState>::scan_with_wildcard(
if (m_is_delimiter[byte]) {
continue;
}
TypedDfaState const* next_state = current_state->next(byte);
if (visited_states.find(next_state) == visited_states.end()) {
unvisited_states.push(next_state);
TypedDfaState const* dest_state{current_state->get_dest_state(byte)};
if (false == visited_states.contains(dest_state)) {
unvisited_states.push(dest_state);
}
}
}
Expand All @@ -299,7 +299,7 @@ auto Lexer<TypedNfaState, TypedDfaState>::scan_with_wildcard(
return ErrorCode::Success;
}
}
state = next;
state = dest_state;
}
}

Expand Down Expand Up @@ -337,7 +337,7 @@ void Lexer<TypedNfaState, TypedDfaState>::reset() {
template <typename TypedNfaState, typename TypedDfaState>
void Lexer<TypedNfaState, TypedDfaState>::prepend_start_of_file_char(ParserInputBuffer& input_buffer
) {
m_prev_state = m_dfa->get_root()->next(utf8::cCharStartOfFile);
m_prev_state = m_dfa->get_root()->get_dest_state(utf8::cCharStartOfFile);
m_asked_for_more_data = true;
m_start_pos = input_buffer.storage().pos();
m_match_pos = input_buffer.storage().pos();
Expand Down Expand Up @@ -407,7 +407,7 @@ void Lexer<TypedNfaState, TypedDfaState>::generate() {
m_dfa = std::make_unique<finite_automata::Dfa<TypedDfaState>>(std::move(nfa));
auto const* state = m_dfa->get_root();
for (uint32_t i = 0; i < cSizeOfByte; i++) {
if (state->next(i) != nullptr) {
if (nullptr != state->get_dest_state(i)) {
m_is_first_char[i] = true;
} else {
m_is_first_char[i] = false;
Expand Down
2 changes: 2 additions & 0 deletions src/log_surgeon/LexicalRule.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
#define LOG_SURGEON_LEXICAL_RULE_HPP
#include <cstdint>
#include <memory>
#include <vector>

#include <log_surgeon/finite_automata/Capture.hpp>
#include <log_surgeon/finite_automata/RegexAST.hpp>

namespace log_surgeon {
Expand Down
9 changes: 8 additions & 1 deletion src/log_surgeon/SchemaParser.cpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
#include "SchemaParser.hpp"

#include <cerrno>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <memory>
#include <span>
#include <stdexcept>
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>

#include <log_surgeon/Constants.hpp>
#include <log_surgeon/FileReader.hpp>
#include <log_surgeon/finite_automata/Capture.hpp>
#include <log_surgeon/finite_automata/NfaState.hpp>
#include <log_surgeon/finite_automata/RegexAST.hpp>
#include <log_surgeon/Lalr1Parser.hpp>
#include <log_surgeon/Lexer.hpp>
#include <log_surgeon/Reader.hpp>
#include <log_surgeon/utils.hpp>

using ParserValueRegex = log_surgeon::ParserValue<std::unique_ptr<
Expand Down
4 changes: 2 additions & 2 deletions src/log_surgeon/finite_automata/DfaState.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class DfaState {
* @param character The character (byte or utf8) to transition on.
* @return A pointer to the DFA state reached after transitioning on `character`.
*/
[[nodiscard]] auto next(uint32_t character) const -> DfaState*;
[[nodiscard]] auto get_dest_state(uint32_t character) const -> DfaState const*;

private:
std::vector<uint32_t> m_matching_variable_ids;
Expand All @@ -57,7 +57,7 @@ class DfaState {
};

template <StateType state_type>
auto DfaState<state_type>::next(uint32_t character) const -> DfaState* {
auto DfaState<state_type>::get_dest_state(uint32_t character) const -> DfaState const* {
if constexpr (StateType::Byte == state_type) {
return m_bytes_transition[character];
} else {
Expand Down
10 changes: 5 additions & 5 deletions src/log_surgeon/finite_automata/DfaStatePair.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ auto DfaStatePair<TypedDfaState>::get_reachable_pairs(
) const -> void {
// TODO: Handle UTF-8 (multi-byte transitions) as well
for (uint32_t i = 0; i < cSizeOfByte; i++) {
auto next_state1 = m_state1->next(i);
auto next_state2 = m_state2->next(i);
if (next_state1 != nullptr && next_state2 != nullptr) {
DfaStatePair const reachable_pair{next_state1, next_state2};
if (visited_pairs.count(reachable_pair) == 0) {
auto const& dest_state1{m_state1->get_dest_state(i)};
auto const& dest_state2{m_state2->get_dest_state(i)};
if (nullptr != dest_state1 && nullptr != dest_state2) {
DfaStatePair const reachable_pair{dest_state1, dest_state2};
if (false == visited_pairs.contains(reachable_pair)) {
unvisited_pairs.insert(reachable_pair);
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/log_surgeon/finite_automata/Nfa.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef LOG_SURGEON_FINITE_AUTOMATA_NFA_HPP
#define LOG_SURGEON_FINITE_AUTOMATA_NFA_HPP

#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
Expand Down Expand Up @@ -86,7 +87,7 @@ class Nfa {

auto set_root(TypedNfaState* root) -> void { m_root = root; }

auto get_root() -> TypedNfaState* { return m_root; }
auto get_root() const -> TypedNfaState* { return m_root; }

[[nodiscard]] auto get_capture_to_tag_id_pair(
) const -> std::unordered_map<Capture const*, std::pair<tag_id_t, tag_id_t>> const& {
Expand Down
2 changes: 1 addition & 1 deletion tests/test-register-handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ TEST_CASE("`RegisterHandler` tests", "[RegisterHandler]") {
constexpr size_t cRegId2{1};

SECTION("Initial state is empty") {
RegisterHandler empty_handler{handler_init(0)};
RegisterHandler const empty_handler{handler_init(0)};
REQUIRE_THROWS_AS(empty_handler.get_reversed_positions(cRegId1), std::out_of_range);
}

Expand Down