44#include < cassert>
55#include < cstdint>
66#include < memory>
7+ #include < string>
78#include < tuple>
89#include < type_traits>
10+ #include < unordered_map>
911#include < vector>
1012
13+ #include < fmt/core.h>
14+ #include < fmt/format.h>
15+
1116#include < log_surgeon/Constants.hpp>
17+ #include < log_surgeon/finite_automata/DfaTransition.hpp>
18+ #include < log_surgeon/finite_automata/RegisterOperation.hpp>
1219#include < log_surgeon/finite_automata/StateType.hpp>
1320#include < log_surgeon/finite_automata/UnicodeIntervalTree.hpp>
1421
@@ -24,7 +31,11 @@ class DfaState {
2431public:
2532 using Tree = UnicodeIntervalTree<DfaState*>;
2633
27- DfaState () { std::fill (std::begin (m_bytes_transition), std::end (m_bytes_transition), nullptr ); }
34+ DfaState () {
35+ for (auto & transition : m_bytes_transition) {
36+ transition = DfaTransition<state_type>{{}, nullptr };
37+ }
38+ }
2839
2940 auto add_matching_variable_id (uint32_t const variable_id) -> void {
3041 m_matching_variable_ids.push_back (variable_id);
@@ -38,19 +49,32 @@ class DfaState {
3849 return false == m_matching_variable_ids.empty ();
3950 }
4051
41- auto add_byte_transition (uint8_t const & byte, DfaState* dest_state) -> void {
42- m_bytes_transition[byte] = dest_state;
52+ auto
53+ add_byte_transition (uint8_t const & byte, DfaTransition<state_type> dfa_transition) -> void {
54+ m_bytes_transition[byte] = dfa_transition;
55+ }
56+
57+ auto add_accepting_op (RegisterOperation const reg_op) -> void {
58+ m_accepting_ops.push_back (reg_op);
4359 }
4460
61+ /* *
62+ * @param state_ids A map of states to their unique identifiers.
63+ * @return A string representation of the DFA state.
64+ */
65+ [[nodiscard]] auto serialize (std::unordered_map<DfaState const *, uint32_t > const & state_ids
66+ ) const -> std::string;
67+
4568 /* *
4669 * @param character The character (byte or utf8) to transition on.
47- * @return A pointer to the DFA state reached after transitioning on `character`.
70+ * @return The destination DFA state reached after transitioning on `character`.
4871 */
4972 [[nodiscard]] auto get_dest_state (uint32_t character) const -> DfaState const *;
5073
5174private:
5275 std::vector<uint32_t > m_matching_variable_ids;
53- DfaState* m_bytes_transition[cSizeOfByte];
76+ std::vector<RegisterOperation> m_accepting_ops;
77+ DfaTransition<state_type> m_bytes_transition[cSizeOfByte];
5478 // NOTE: We don't need m_tree_transitions for the `state_type == StateType::Byte` case, so we
5579 // use an empty class (`std::tuple<>`) in that case.
5680 std::conditional_t <state_type == StateType::Utf8, Tree, std::tuple<>> m_tree_transitions;
@@ -59,20 +83,62 @@ class DfaState {
5983template <StateType state_type>
6084auto DfaState<state_type>::get_dest_state(uint32_t character) const -> DfaState const * {
6185 if constexpr (StateType::Byte == state_type) {
62- return m_bytes_transition[character];
86+ return m_bytes_transition[character]. get_dest_state () ;
6387 } else {
6488 if (character < cSizeOfByte) {
65- return m_bytes_transition[character];
89+ return m_bytes_transition[character]. get_dest_state () ;
6690 }
6791 std::unique_ptr<std::vector<typename Tree::Data>> result
6892 = m_tree_transitions.find (Interval (character, character));
6993 assert (result->size () <= 1 );
7094 if (false == result->empty ()) {
71- return result->front ().m_value ;
95+ return result->front ().m_value . get_dest_state () ;
7296 }
7397 return nullptr ;
7498 }
7599}
100+
101+ template <StateType state_type>
102+ auto DfaState<state_type>::serialize(std::unordered_map<DfaState const *, uint32_t > const & state_ids
103+ ) const -> std::string {
104+ auto const accepting_tags_string = is_accepting ()
105+ ? fmt::format (
106+ " accepting_tags={{{}}}," ,
107+ fmt::join (m_matching_variable_ids, " ," )
108+ )
109+ : " " ;
110+
111+ std::vector<std::string> accepting_op_strings;
112+ for (auto const & accepting_op : m_accepting_ops) {
113+ auto serialized_accepting_op{accepting_op.serialize ()};
114+ if (serialized_accepting_op.has_value ()) {
115+ accepting_op_strings.push_back (serialized_accepting_op.value ());
116+ }
117+ }
118+ auto const accepting_ops_string = is_accepting () ? fmt::format (
119+ " accepting_operations={{{}}}," ,
120+ fmt::join (accepting_op_strings, " ," )
121+ )
122+ : " " ;
123+
124+ std::vector<std::string> transition_strings;
125+ for (uint32_t idx{0 }; idx < cSizeOfByte; ++idx) {
126+ auto const byte_transition_string{m_bytes_transition[idx].serialize (state_ids)};
127+ if (byte_transition_string.has_value ()) {
128+ transition_strings.push_back (
129+ fmt::format (" {}{}" , static_cast <char >(idx), byte_transition_string.value ())
130+ );
131+ }
132+ }
133+
134+ return fmt::format (
135+ " {}:{}{}byte_transitions={{{}}}" ,
136+ state_ids.at (this ),
137+ accepting_tags_string,
138+ accepting_ops_string,
139+ fmt::join (transition_strings, " ," )
140+ );
141+ }
76142} // namespace log_surgeon::finite_automata
77143
78144#endif // LOG_SURGEON_FINITE_AUTOMATA_DFA_STATE
0 commit comments