-
Notifications
You must be signed in to change notification settings - Fork 10
feat: Add RegisterOperation for TDFA into DFA transitions.
#89
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
c42a214
228e8b7
765598f
34a7f5f
2d64cc5
0e41cfb
5f7dce1
aa1ce5f
8eef207
415be20
939f0ab
7cbc7ee
3da3300
d70d4ef
1da6391
345655d
12e06d9
45dd3a4
8f8da2a
37aa5f8
a97103e
30db002
82e2195
59292da
32ad10d
0586f90
03ae78e
ffa78b1
e5b1cce
3b1dafc
0b397e2
11cb2a7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,10 @@ | |
| #include <stack> | ||
| #include <vector> | ||
|
|
||
| #include <fmt/core.h> | ||
| #include <fmt/format.h> | ||
|
|
||
| #include <log_surgeon/Constants.hpp> | ||
| #include <log_surgeon/finite_automata/DfaStatePair.hpp> | ||
| #include <log_surgeon/finite_automata/Nfa.hpp> | ||
|
|
||
|
|
@@ -17,6 +21,11 @@ class Dfa { | |
| public: | ||
| explicit Dfa(Nfa<TypedNfaState> const& nfa); | ||
|
|
||
| /** | ||
| * @return A string representation of the DFA. | ||
| */ | ||
| [[nodiscard]] auto serialize() const -> std::string; | ||
|
|
||
| /** | ||
| * Creates a new DFA state based on a set of NFA states and adds it to `m_states`. | ||
| * @param nfa_state_set The set of NFA states represented by this DFA state. | ||
|
|
@@ -38,6 +47,12 @@ class Dfa { | |
| [[nodiscard]] auto get_intersect(Dfa const* dfa_in) const -> std::set<uint32_t>; | ||
|
|
||
| private: | ||
| /** | ||
| * @return A vector representing the traversal order of the DFA states using breadth-first | ||
| * search (BFS). | ||
| */ | ||
| [[nodiscard]] auto get_bfs_traversal_order() const -> std::vector<TypedDfaState const*>; | ||
|
|
||
| std::vector<std::unique_ptr<TypedDfaState>> m_states; | ||
| }; | ||
|
|
||
|
|
@@ -61,10 +76,10 @@ Dfa<TypedDfaState, TypedNfaState>::Dfa(Nfa<TypedNfaState> const& nfa) { | |
| auto set = unmarked_sets.top(); | ||
| unmarked_sets.pop(); | ||
| auto* dfa_state = dfa_states.at(set); | ||
| std::map<uint32_t, StateSet> ascii_transitions_map; | ||
| std::map<uint8_t, StateSet> ascii_transitions_map; | ||
| // map<Interval, StateSet> transitions_map; | ||
| for (auto const* s0 : set) { | ||
| for (uint32_t i = 0; i < cSizeOfByte; i++) { | ||
| for (uint16_t i{0}; i < cSizeOfByte; ++i) { | ||
| for (auto* const s1 : s0->get_byte_transitions(i)) { | ||
| StateSet closure = s1->epsilon_closure(); | ||
| ascii_transitions_map[i].insert(closure.begin(), closure.end()); | ||
|
|
@@ -83,9 +98,9 @@ Dfa<TypedDfaState, TypedNfaState>::Dfa(Nfa<TypedNfaState> const& nfa) { | |
| } | ||
| return state; | ||
| }; | ||
| for (auto const& kv : ascii_transitions_map) { | ||
| auto* dest_state = next_dfa_state(kv.second); | ||
| dfa_state->add_byte_transition(kv.first, dest_state); | ||
| for (auto const& [byte, nfa_state_set] : ascii_transitions_map) { | ||
| auto* dest_state{next_dfa_state(nfa_state_set)}; | ||
| dfa_state->add_byte_transition(byte, {{}, dest_state}); | ||
| } | ||
| // TODO: add this for the utf8 case | ||
| } | ||
|
|
@@ -125,6 +140,55 @@ auto Dfa<TypedDfaState, TypedNfaState>::get_intersect(Dfa const* dfa_in | |
| } | ||
| return schema_types; | ||
| } | ||
|
|
||
| template <typename TypedDfaState, typename TypedNfaState> | ||
| auto Dfa<TypedDfaState, TypedNfaState>::get_bfs_traversal_order( | ||
| ) const -> std::vector<TypedDfaState const*> { | ||
| std::queue<TypedDfaState const*> state_queue; | ||
| std::unordered_set<TypedDfaState const*> visited_states; | ||
| std::vector<TypedDfaState const*> visited_order; | ||
| visited_states.reserve(m_states.size()); | ||
| visited_order.reserve(m_states.size()); | ||
|
|
||
| auto try_add_to_queue_and_visited | ||
| = [&state_queue, &visited_states](TypedDfaState const* dest_state) { | ||
| if (visited_states.insert(dest_state).second) { | ||
| state_queue.push(dest_state); | ||
| } | ||
| }; | ||
|
|
||
| try_add_to_queue_and_visited(get_root()); | ||
| while (false == state_queue.empty()) { | ||
| auto const* current_state = state_queue.front(); | ||
| visited_order.push_back(current_state); | ||
| state_queue.pop(); | ||
| // TODO: Handle the utf8 case | ||
| for (uint32_t idx{0}; idx < cSizeOfByte; ++idx) { | ||
|
Comment on lines
+167
to
+168
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Verification agent 🧩 Analysis chainTODO comment should be addressed. The TODO comment about handling the UTF-8 case should be addressed in this implementation since the PR focuses on DFA transitions. 🏁 Script executed: #!/bin/bash
# Check if there are other TODO comments related to UTF-8 in DFA-related files
rg "TODO.*utf8" src/log_surgeon/finite_automata/Length of output: 378 Action Required: Address UTF-8 Handling TODOs The DFA transition implementation still contains several TODO comments related to UTF-8 support. In particular, note these instances:
Given that this PR focuses on DFA transitions, please either implement the actual UTF-8 handling logic or provide a clear plan (e.g. a follow-up ticket) to address these comments consistently. This will help ensure that incomplete logic does not propagate in the codebase. |
||
| auto const dest_state{current_state->get_dest_state(idx)}; | ||
| if (nullptr != dest_state) { | ||
| try_add_to_queue_and_visited(dest_state); | ||
| } | ||
| } | ||
| } | ||
| return visited_order; | ||
| } | ||
|
|
||
| template <typename TypedDfaState, typename TypedNfaState> | ||
| auto Dfa<TypedDfaState, TypedNfaState>::serialize() const -> std::string { | ||
| auto const traversal_order = get_bfs_traversal_order(); | ||
|
|
||
| std::unordered_map<TypedDfaState const*, uint32_t> state_ids; | ||
SharafMohamed marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| state_ids.reserve(traversal_order.size()); | ||
| for (auto const* state : traversal_order) { | ||
SharafMohamed marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| state_ids.emplace(state, state_ids.size()); | ||
| } | ||
|
|
||
| std::vector<std::string> serialized_states; | ||
| for (auto const* state : traversal_order) { | ||
| serialized_states.emplace_back(state->serialize(state_ids)); | ||
| } | ||
| return fmt::format("{}\n", fmt::join(serialized_states, "\n")); | ||
| } | ||
| } // namespace log_surgeon::finite_automata | ||
|
|
||
| #endif // LOG_SURGEON_FINITE_AUTOMATA_DFA_HPP | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| #ifndef LOG_SURGEON_FINITE_AUTOMATA_DFATRANSITION_HPP | ||
| #define LOG_SURGEON_FINITE_AUTOMATA_DFATRANSITION_HPP | ||
|
|
||
| #include <cstdint> | ||
| #include <optional> | ||
| #include <string> | ||
| #include <unordered_map> | ||
| #include <utility> | ||
| #include <vector> | ||
|
|
||
| #include <fmt/core.h> | ||
| #include <fmt/format.h> | ||
|
|
||
| #include <log_surgeon/finite_automata/RegisterOperation.hpp> | ||
| #include <log_surgeon/finite_automata/StateType.hpp> | ||
|
|
||
| namespace log_surgeon::finite_automata { | ||
| template <StateType state_type> | ||
| class DfaState; | ||
|
|
||
| template <StateType state_type> | ||
| class DfaTransition { | ||
SharafMohamed marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| public: | ||
| DfaTransition() = default; | ||
SharafMohamed marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| DfaTransition(std::vector<RegisterOperation> reg_ops, DfaState<state_type>* dest_state) | ||
| : m_reg_ops{std::move(reg_ops)}, | ||
| m_dest_state{dest_state} {} | ||
|
|
||
| [[nodiscard]] auto get_reg_ops() const -> std::vector<RegisterOperation> { return m_reg_ops; } | ||
SharafMohamed marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| [[nodiscard]] auto get_dest_state() const -> DfaState<state_type>* { return m_dest_state; } | ||
|
|
||
| /** | ||
| * @param state_ids A map of states to their unique identifiers. | ||
| * @return A string representation of the DFA transition on success. | ||
| * @return Forwards `RegisterOperation::serialize`'s return value (std::nullopt) on failure. | ||
| * @return std::nullopt if `m_dest_state` is not in `statd_ids`. | ||
| */ | ||
| [[nodiscard]] auto serialize( | ||
| std::unordered_map<DfaState<state_type> const*, uint32_t> const& state_ids | ||
| ) const -> std::optional<std::string>; | ||
|
|
||
| private: | ||
| std::vector<RegisterOperation> m_reg_ops; | ||
| DfaState<state_type>* m_dest_state{nullptr}; | ||
SharafMohamed marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| }; | ||
|
|
||
| template <StateType state_type> | ||
| auto DfaTransition<state_type>::serialize( | ||
| std::unordered_map<DfaState<state_type> const*, uint32_t> const& state_ids | ||
| ) const -> std::optional<std::string> { | ||
| if (false == state_ids.contains(m_dest_state)) { | ||
| return std::nullopt; | ||
| } | ||
|
|
||
| std::vector<std::string> transformed_ops; | ||
| for (auto const& reg_op : m_reg_ops) { | ||
| auto const optional_serialized_op{reg_op.serialize()}; | ||
| if (false == optional_serialized_op.has_value()) { | ||
| return std::nullopt; | ||
| } | ||
| transformed_ops.push_back(optional_serialized_op.value()); | ||
SharafMohamed marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| return fmt::format("-({})->{}", fmt::join(transformed_ops, ","), state_ids.at(m_dest_state)); | ||
| } | ||
| } // namespace log_surgeon::finite_automata | ||
|
|
||
| #endif // LOG_SURGEON_FINITE_AUTOMATA_DFATRANSITION_HPP | ||
Uh oh!
There was an error while loading. Please reload this page.