Skip to content

Commit 2d596b7

Browse files
committed
Merge branch 'fixed-tagged-dfa' into add-register-to-dfa
2 parents efb7932 + 139b097 commit 2d596b7

File tree

10 files changed

+266
-106
lines changed

10 files changed

+266
-106
lines changed

src/log_surgeon/Lalr1Parser.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class NonTerminal {
7777
*/
7878
[[nodiscard]] auto token_cast(uint32_t i) const -> Token* {
7979
assert(i < cSizeOfAllChildren);
80-
return &std::get<Token>(NonTerminal::m_all_children[m_children_start + i]);
80+
return &std::get<Token>(m_all_children[m_children_start + i]);
8181
}
8282

8383
/**
@@ -89,7 +89,7 @@ class NonTerminal {
8989
*/
9090
[[nodiscard]] auto non_terminal_cast(uint32_t i) const -> NonTerminal* {
9191
assert(i < cSizeOfAllChildren);
92-
return &std::get<NonTerminal>(NonTerminal::m_all_children[m_children_start + i]);
92+
return &std::get<NonTerminal>(m_all_children[m_children_start + i]);
9393
}
9494

9595
/**

src/log_surgeon/Lexer.tpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,6 @@ auto Lexer<TypedNfaState, TypedDfaState>::get_rule(uint32_t const variable_id
378378
template <typename TypedNfaState, typename TypedDfaState>
379379
void Lexer<TypedNfaState, TypedDfaState>::generate() {
380380
finite_automata::Nfa<TypedNfaState> nfa{std::move(m_rules)};
381-
// TODO: DFA ignores tags. E.g., treats "capture:user=(?<user_id>\d+)" as "capture:user=\d+"
382381
m_dfa = std::make_unique<finite_automata::Dfa<TypedDfaState>>(std::move(nfa));
383382
auto const* state = m_dfa->get_root();
384383
for (uint32_t i = 0; i < cSizeOfByte; i++) {

src/log_surgeon/finite_automata/Dfa.hpp

Lines changed: 48 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,25 @@
1111
#include <log_surgeon/Constants.hpp>
1212
#include <log_surgeon/finite_automata/DfaStatePair.hpp>
1313
#include <log_surgeon/finite_automata/Nfa.hpp>
14-
#include <log_surgeon/finite_automata/RegisterHandler.hpp>
14+
#include <log_surgeon/finite_automata/Register.hpp>
1515

1616
namespace log_surgeon::finite_automata {
17+
template <typename TypedNfaState>
18+
using NfaStateSet = std::set<RegOpNfaStatePair<TypedNfaState>>;
19+
1720
template <typename TypedDfaState>
1821
class Dfa {
1922
public:
20-
template <typename NfaStateType>
21-
explicit Dfa(Nfa<NfaStateType> nfa);
23+
template <typename TypedNfaState>
24+
explicit Dfa(Nfa<TypedNfaState> nfa);
2225

2326
/**
2427
* Creates a new DFA state based on a set of NFA states and adds it to `m_states`.
2528
* @param nfa_state_set The set of NFA states represented by this DFA state.
2629
* @return A pointer to the new DFA state.
2730
*/
2831
template <typename TypedNfaState>
29-
auto new_state(std::set<TypedNfaState*> const& nfa_state_set) -> TypedDfaState*;
32+
auto new_state(NfaStateSet<TypedNfaState> const& nfa_state_set) -> TypedDfaState*;
3033

3134
auto get_root() const -> TypedDfaState const* { return m_states.at(0).get(); }
3235

@@ -42,81 +45,76 @@ class Dfa {
4245

4346
private:
4447
std::vector<std::unique_ptr<TypedDfaState>> m_states;
48+
<<<<<<< HEAD
4549
RegisterHandler m_register_handler;
50+
=======
51+
std::vector<std::unique_ptr<Register>> m_registers;
52+
>>>>>>> fixed-tagged-dfa
4653
};
4754

55+
// TODO: Add utf8 case
4856
template <typename TypedDfaState>
4957
template <typename TypedNfaState>
5058
Dfa<TypedDfaState>::Dfa(Nfa<TypedNfaState> nfa) {
51-
typedef std::set<TypedNfaState const*> StateSet;
52-
53-
std::map<StateSet, TypedDfaState*> dfa_states;
54-
std::stack<StateSet> unmarked_sets;
55-
auto create_dfa_state
56-
= [this, &dfa_states, &unmarked_sets](StateSet const& set) -> TypedDfaState* {
57-
auto* state = new_state(set);
58-
dfa_states[set] = state;
59-
unmarked_sets.push(set);
60-
return state;
59+
std::map<NfaStateSet<TypedNfaState>, TypedDfaState*> dfa_states;
60+
std::stack<NfaStateSet<TypedNfaState>> unvisited_nfa_sets;
61+
auto create_dfa_state = [this, &dfa_states, &unvisited_nfa_sets](
62+
NfaStateSet<TypedNfaState> const& nfa_state_set
63+
) -> TypedDfaState* {
64+
TypedDfaState* dfa_state = new_state(nfa_state_set);
65+
dfa_states[nfa_state_set] = dfa_state;
66+
unvisited_nfa_sets.push(nfa_state_set);
67+
return dfa_state;
6168
};
6269

63-
auto start_set = nfa.get_root()->epsilon_closure();
64-
create_dfa_state(start_set);
65-
while (false == unmarked_sets.empty()) {
66-
auto set = unmarked_sets.top();
67-
unmarked_sets.pop();
68-
auto* dfa_state = dfa_states.at(set);
69-
std::map<uint32_t, StateSet> ascii_transitions_map;
70-
// map<Interval, StateSet> transitions_map;
71-
for (auto const* s0 : set) {
70+
NfaStateSet<TypedNfaState> const initial_nfa_set = nfa.get_root()->epsilon_closure(m_registers);
71+
create_dfa_state(initial_nfa_set);
72+
while (!unvisited_nfa_sets.empty()) {
73+
NfaStateSet<TypedNfaState> current_nfa_set = unvisited_nfa_sets.top();
74+
unvisited_nfa_sets.pop();
75+
TypedDfaState* dfa_state = dfa_states.at(current_nfa_set);
76+
77+
std::map<uint32_t, NfaStateSet<TypedNfaState>> ascii_transitions_map;
78+
for (auto const& register_nfa_state_pair : current_nfa_set) {
7279
for (uint32_t i = 0; i < cSizeOfByte; i++) {
73-
for (auto* const s1 : s0->get_byte_transitions(i)) {
74-
StateSet closure = s1->epsilon_closure();
80+
for (auto const* s1 : register_nfa_state_pair.get_state()->get_byte_transitions(i))
81+
{
82+
NfaStateSet<TypedNfaState> closure = s1->epsilon_closure(m_registers);
7583
ascii_transitions_map[i].insert(closure.begin(), closure.end());
7684
}
7785
}
78-
// TODO: add this for the utf8 case
79-
/*
80-
for (auto const& data : s0->get_tree_transitions().all()) {
81-
for (auto* const s1 : data.m_value) {
82-
StateSet closure = s1->epsilon_closure();
83-
transitions_map[data.m_interval].insert(closure.begin(), closure.end());
84-
}
85-
}
86-
*/
8786
}
87+
<<<<<<< HEAD
8888
auto next_dfa_state
8989
= [&dfa_states, &create_dfa_state](StateSet const& set) -> TypedDfaState* {
9090
TypedDfaState* state{nullptr};
9191
auto it = dfa_states.find(set);
92+
=======
93+
94+
for (typename std::map<uint32_t, NfaStateSet<TypedNfaState>>::value_type const& kv :
95+
ascii_transitions_map)
96+
{
97+
auto const& dest_nfa_state_set = kv.second;
98+
TypedDfaState* dest_state;
99+
auto it = dfa_states.find(dest_nfa_state_set);
100+
>>>>>>> fixed-tagged-dfa
92101
if (it == dfa_states.end()) {
93-
state = create_dfa_state(set);
102+
dest_state = create_dfa_state(dest_nfa_state_set);
94103
} else {
95-
state = it->second;
104+
dest_state = it->second;
96105
}
97-
return state;
98-
};
99-
for (auto const& kv : ascii_transitions_map) {
100-
auto* dest_state = next_dfa_state(kv.second);
101106
dfa_state->add_byte_transition(kv.first, dest_state);
102107
}
103-
// TODO: add this for the utf8 case
104-
/*
105-
for (auto& kv : transitions_map) {
106-
TypedDfaState* dest_state = next_dfa_state(kv.second);
107-
dfa_state->add_tree_transition(kv.first, dest_state);
108-
}
109-
*/
110108
}
111109
}
112110

113111
template <typename TypedDfaState>
114112
template <typename TypedNfaState>
115-
auto Dfa<TypedDfaState>::new_state(std::set<TypedNfaState*> const& nfa_state_set
116-
) -> TypedDfaState* {
113+
auto Dfa<TypedDfaState>::new_state(NfaStateSet<TypedNfaState> const& nfa_state_set) -> TypedDfaState* {
117114
m_states.emplace_back(std::make_unique<TypedDfaState>());
118115
auto* dfa_state = m_states.back().get();
119-
for (auto const* nfa_state : nfa_state_set) {
116+
for (auto const& register_nfa_state_pair : nfa_state_set) {
117+
auto const* nfa_state = register_nfa_state_pair.get_state();
120118
if (nfa_state->is_accepting()) {
121119
dfa_state->add_matching_variable_id(nfa_state->get_matching_variable_id());
122120
}

src/log_surgeon/finite_automata/DfaState.hpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,18 @@
88
#include <type_traits>
99
#include <vector>
1010

11+
#include <log_surgeon/finite_automata/StateType.hpp>
1112
#include <log_surgeon/Constants.hpp>
12-
#include <log_surgeon/finite_automata/DfaStateType.hpp>
1313
#include <log_surgeon/finite_automata/UnicodeIntervalTree.hpp>
1414

1515
namespace log_surgeon::finite_automata {
16-
template <DfaStateType state_type>
16+
template <StateType state_type>
1717
class DfaState;
1818

19-
using ByteDfaState = DfaState<DfaStateType::Byte>;
20-
using Utf8DfaState = DfaState<DfaStateType::Utf8>;
19+
using DfaByteState = DfaState<StateType::Byte>;
20+
using DfaUtf8State = DfaState<StateType::Utf8>;
2121

22-
template <DfaStateType state_type>
22+
template <StateType state_type>
2323
class DfaState {
2424
public:
2525
using Tree = UnicodeIntervalTree<DfaState*>;
@@ -51,14 +51,14 @@ class DfaState {
5151
private:
5252
std::vector<uint32_t> m_matching_variable_ids;
5353
DfaState* m_bytes_transition[cSizeOfByte];
54-
// NOTE: We don't need m_tree_transitions for the `state_type == DfaStateType::Byte` case, so we
55-
// use an empty class (`std::tuple<>`) in that case.
56-
std::conditional_t<state_type == DfaStateType::Utf8, Tree, std::tuple<>> m_tree_transitions;
54+
// NOTE: We don't need m_tree_transitions for the `stateType == StateType::Byte` case, so we use
55+
// an empty class (`std::tuple<>`) in that case.
56+
std::conditional_t<state_type == StateType::Utf8, Tree, std::tuple<>> m_tree_transitions;
5757
};
5858

59-
template <DfaStateType state_type>
59+
template <StateType state_type>
6060
auto DfaState<state_type>::next(uint32_t character) const -> DfaState* {
61-
if constexpr (DfaStateType::Byte == state_type) {
61+
if constexpr (StateType::Byte == state_type) {
6262
return m_bytes_transition[character];
6363
} else {
6464
if (character < cSizeOfByte) {

src/log_surgeon/finite_automata/DfaStatePair.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ class DfaStatePair {
2727
m_state2(state2) {};
2828

2929
/**
30-
* Used for ordering in a set by considering the states' addresses.
3130
* @param rhs
3231
* @return Whether `m_state1` in lhs has a lower address than in rhs, or if they're equal,
3332
* whether `m_state2` in lhs has a lower address than in rhs.
@@ -73,7 +72,7 @@ auto DfaStatePair<TypedDfaState>::get_reachable_pairs(
7372
auto next_state1 = m_state1->next(i);
7473
auto next_state2 = m_state2->next(i);
7574
if (next_state1 != nullptr && next_state2 != nullptr) {
76-
DfaStatePair reachable_pair{next_state1, next_state2};
75+
DfaStatePair const reachable_pair{next_state1, next_state2};
7776
if (visited_pairs.count(reachable_pair) == 0) {
7877
unvisited_pairs.insert(reachable_pair);
7978
}

0 commit comments

Comments
 (0)