|
1 | 1 | #include "LogEvent.hpp" |
2 | 2 |
|
| 3 | +#include <cstddef> |
3 | 4 | #include <cstdint> |
| 5 | +#include <iterator> |
4 | 6 | #include <memory> |
5 | 7 | #include <optional> |
| 8 | +#include <set> |
| 9 | +#include <stdexcept> |
6 | 10 | #include <string> |
7 | 11 | #include <vector> |
8 | 12 |
|
| 13 | +#include <ystdlib/error_handling/ErrorCode.hpp> |
| 14 | +#include <ystdlib/error_handling/Result.hpp> |
| 15 | + |
9 | 16 | #include <log_surgeon/Constants.hpp> |
| 17 | +#include <log_surgeon/finite_automata/Capture.hpp> |
10 | 18 | #include <log_surgeon/LogParser.hpp> |
11 | 19 | #include <log_surgeon/LogParserOutputBuffer.hpp> |
12 | 20 | #include <log_surgeon/Token.hpp> |
@@ -57,51 +65,104 @@ auto LogEventView::get_logtype() const -> std::string { |
57 | 65 | auto token_view{m_log_output_buffer->get_mutable_token(i)}; |
58 | 66 | auto const rule_id{token_view.get_type_ids()->at(0)}; |
59 | 67 | if (static_cast<uint32_t>(SymbolId::TokenUncaughtString) == rule_id) { |
60 | | - logtype += token_view.to_string_view(); |
| 68 | + logtype.append(token_view.to_string_view()); |
| 69 | + continue; |
| 70 | + } |
| 71 | + |
| 72 | + bool is_first_token{}; |
| 73 | + if (m_log_output_buffer->has_header()) { |
| 74 | + is_first_token = 0 == i; |
61 | 75 | } else { |
62 | | - bool is_first_token; |
63 | | - if (m_log_output_buffer->has_header()) { |
64 | | - is_first_token = 0 == i; |
65 | | - } else { |
66 | | - is_first_token = 1 == i; |
67 | | - } |
68 | | - if (static_cast<uint32_t>(SymbolId::TokenNewline) != rule_id && false == is_first_token) |
69 | | - { |
70 | | - logtype += token_view.release_delimiter(); |
71 | | - } |
72 | | - auto const& optional_captures{m_log_parser.m_lexer.get_captures_from_rule_id(rule_id)}; |
73 | | - if (optional_captures.has_value()) { |
74 | | - auto capture_view{token_view}; |
75 | | - auto const& captures{optional_captures.value()}; |
76 | | - for (auto const capture : captures) { |
77 | | - auto const [reg_start_id, reg_end_id]{ |
78 | | - m_log_parser.m_lexer.get_reg_ids_from_capture(capture) |
79 | | - }; |
80 | | - auto const start_positions{ |
81 | | - capture_view.get_reversed_reg_positions(reg_start_id) |
82 | | - }; |
83 | | - auto const end_positions{capture_view.get_reversed_reg_positions(reg_end_id)}; |
84 | | - |
85 | | - auto const& capture_name{capture->get_name()}; |
86 | | - if (false == start_positions.empty() && -1 < start_positions[0] |
87 | | - && false == end_positions.empty() && -1 < end_positions[0]) |
88 | | - { |
89 | | - capture_view.set_end_pos(start_positions[0]); |
90 | | - logtype.append(capture_view.to_string_view()); |
91 | | - logtype.append("<" + capture_name + ">"); |
92 | | - capture_view.set_start_pos(end_positions[0]); |
93 | | - } |
94 | | - } |
95 | | - capture_view.set_end_pos(token_view.get_end_pos()); |
96 | | - logtype.append(capture_view.to_string_view()); |
97 | | - } else { |
98 | | - logtype += "<" + m_log_parser.get_id_symbol(rule_id) + ">"; |
| 76 | + is_first_token = 1 == i; |
| 77 | + } |
| 78 | + if (static_cast<uint32_t>(SymbolId::TokenNewline) != rule_id && false == is_first_token) { |
| 79 | + logtype += token_view.release_delimiter(); |
| 80 | + } |
| 81 | + |
| 82 | + auto const matches{get_capture_matches(token_view)}; |
| 83 | + if (matches.has_error()) { |
| 84 | + logtype.append("<" + m_log_parser.get_id_symbol(rule_id) + ">"); |
| 85 | + continue; |
| 86 | + } |
| 87 | + auto prev_end_pos{token_view.get_start_pos()}; |
| 88 | + for (auto const& match : matches.value()) { |
| 89 | + if (match.m_leaf) { |
| 90 | + logtype.append( |
| 91 | + token_view.get_sub_token(prev_end_pos, match.m_pos.m_start).to_string_view() |
| 92 | + ); |
| 93 | + logtype.append("<" + match.m_capture->get_name() + ">"); |
| 94 | + prev_end_pos = match.m_pos.m_end; |
99 | 95 | } |
100 | 96 | } |
| 97 | + logtype.append( |
| 98 | + token_view.get_sub_token(prev_end_pos, token_view.get_end_pos()).to_string_view() |
| 99 | + ); |
101 | 100 | } |
102 | 101 | return logtype; |
103 | 102 | } |
104 | 103 |
|
| 104 | +auto LogEventView::get_capture_matches(Token const& root_var) const |
| 105 | + -> ystdlib::error_handling::Result<std::vector<Token::CaptureMatch>> { |
| 106 | + auto captures{ |
| 107 | + get_log_parser().m_lexer.get_captures_from_rule_id(root_var.get_type_ids()->at(0)) |
| 108 | + }; |
| 109 | + if (false == captures.has_value()) { |
| 110 | + return LogEventErrorCode{LogEventErrorCodeEnum::NoCaptureGroups}; |
| 111 | + } |
| 112 | + |
| 113 | + auto cmp{[](Token::CaptureMatch const& a, Token::CaptureMatch const& b) -> bool { |
| 114 | + if (a.m_pos.m_start != b.m_pos.m_start) { |
| 115 | + return a.m_pos.m_start < b.m_pos.m_start; |
| 116 | + } |
| 117 | + return a.m_pos.m_end > b.m_pos.m_end; |
| 118 | + }}; |
| 119 | + std::set<Token::CaptureMatch, decltype(cmp)> ordered_matches; |
| 120 | + for (auto const* const capture : captures.value()) { |
| 121 | + auto position{get_capture_position(root_var, capture)}; |
| 122 | + if (position.has_error()) { |
| 123 | + if (LogEventErrorCode{LogEventErrorCodeEnum::NoCaptureGroupMatch} == position.error()) { |
| 124 | + continue; |
| 125 | + } |
| 126 | + return position.error(); |
| 127 | + } |
| 128 | + ordered_matches.emplace(capture, position.value(), true); |
| 129 | + } |
| 130 | + if (ordered_matches.empty()) { |
| 131 | + return {{}}; |
| 132 | + } |
| 133 | + |
| 134 | + std::vector<Token::CaptureMatch> matches; |
| 135 | + matches.reserve(ordered_matches.size()); |
| 136 | + auto const last_match{std::prev(ordered_matches.end())}; |
| 137 | + for (auto match{ordered_matches.begin()}; match != last_match; ++match) { |
| 138 | + auto next_match{std::next(match)}; |
| 139 | + auto leaf{false}; |
| 140 | + if (match->m_pos.m_end <= next_match->m_pos.m_start) { |
| 141 | + leaf = true; |
| 142 | + } |
| 143 | + matches.emplace_back(match->m_capture, match->m_pos, leaf); |
| 144 | + } |
| 145 | + matches.emplace_back(last_match->m_capture, last_match->m_pos, true); |
| 146 | + return matches; |
| 147 | +} |
| 148 | + |
| 149 | +auto LogEventView::get_capture_position( |
| 150 | + Token const& root_var, |
| 151 | + finite_automata::Capture const* const& capture |
| 152 | +) const -> ystdlib::error_handling::Result<Token::CaptureMatchPosition> { |
| 153 | + auto const [start_reg_id, end_reg_id]{ |
| 154 | + get_log_parser().m_lexer.get_reg_ids_from_capture(capture) |
| 155 | + }; |
| 156 | + auto const start_positions{root_var.get_reversed_reg_positions(start_reg_id)}; |
| 157 | + auto const end_positions{root_var.get_reversed_reg_positions(end_reg_id)}; |
| 158 | + if (start_positions.empty() || 0 > start_positions[0] || end_positions.empty() |
| 159 | + || 0 > end_positions[0]) |
| 160 | + { |
| 161 | + return LogEventErrorCode{LogEventErrorCodeEnum::NoCaptureGroupMatch}; |
| 162 | + } |
| 163 | + return {start_positions[0], end_positions[0]}; |
| 164 | +} |
| 165 | + |
105 | 166 | LogEvent::LogEvent(LogEventView const& src) : LogEventView{src.get_log_parser()} { |
106 | 167 | set_multiline(src.is_multiline()); |
107 | 168 | m_log_output_buffer->set_has_header(src.m_log_output_buffer->has_header()); |
@@ -147,3 +208,24 @@ LogEvent::LogEvent(LogEventView const& src) : LogEventView{src.get_log_parser()} |
147 | 208 | } |
148 | 209 | } |
149 | 210 | } // namespace log_surgeon |
| 211 | + |
| 212 | +using log_surgeon::LogEventErrorCodeEnum; |
| 213 | + |
| 214 | +using LogEventErrorCategory = ystdlib::error_handling::ErrorCategory<LogEventErrorCodeEnum>; |
| 215 | + |
| 216 | +template <> |
| 217 | +auto LogEventErrorCategory::name() const noexcept -> char const* { |
| 218 | + return "log_surgeon::LogEvent"; |
| 219 | +} |
| 220 | + |
| 221 | +template <> |
| 222 | +auto LogEventErrorCategory::message(LogEventErrorCodeEnum error_enum) const -> std::string { |
| 223 | + switch (error_enum) { |
| 224 | + case LogEventErrorCodeEnum::NoCaptureGroups: |
| 225 | + return "LogEvent NoCaptureGroups"; |
| 226 | + case LogEventErrorCodeEnum::NoCaptureGroupMatch: |
| 227 | + return "LogEvent NoCaptureGroupMatch"; |
| 228 | + default: |
| 229 | + return "Unrecognized LogEventErrorCode"; |
| 230 | + } |
| 231 | +} |
0 commit comments