Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ message(STATUS "Found fmt ${fmt_VERSION}.")
find_package(Microsoft.GSL 4.0.0 REQUIRED)
message(STATUS "Found Microsoft.GSL ${Microsoft.GSL_VERSION}.")

find_package(ystdlib 0.1.0 REQUIRED)
message(STATUS "Found ystdlib ${ystdlib_VERSION}.")

if(log_surgeon_ENABLE_TESTS)
find_package(Catch2 3.8.1 REQUIRED)
message(STATUS "Found Catch2 ${Catch2_VERSION}.")
Expand Down Expand Up @@ -137,6 +140,7 @@ target_link_libraries(
PUBLIC
fmt::fmt
Microsoft.GSL::GSL
ystdlib::error_handling
)

target_include_directories(
Expand Down
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ while (false == parser.done()) {
if (ErrorCode err{parser.parse_next_event()}; ErrorCode::Success != err) {
throw runtime_error("Parsing Failed");
}
LogEventView const& event{parser.get_log_parser().get_log_event_view()};

// Get and print the timestamp
Token* timestamp{event.get_timestamp()};
Expand All @@ -63,7 +64,7 @@ while (false == parser.done()) {
}

// Get and print the log-level
auto const& loglevels = event.get_variables(*loglevel_id);
auto const& loglevels{event.get_variables(*loglevel_id)};
if (false == loglevels.empty()) {
// In case there are multiple matches, just get the first one
cout << "loglevel:" << loglevels[0]->to_string_view() << endl;
Expand All @@ -72,8 +73,7 @@ while (false == parser.done()) {
// Other analysis...

// Print the entire event
LogEventView const& event = parser.get_log_parser().get_log_event_view();
cout << event->to_string() << endl;
cout << event.to_string() << endl;
}
```

Expand All @@ -91,6 +91,7 @@ Requirements:
* [GSL] >= 4.0.0
* [Task] >= 3.38
* [uv] >= 0.7.10
* [ystdlib-cpp] >= 0.1.0

To build and install the project to `$HOME/.local`:

Expand Down Expand Up @@ -193,3 +194,4 @@ The following are issues we're aware of and working on:
[GSL]: https://github.com/microsoft/GSL
[Task]: https://taskfile.dev/
[uv]: https://docs.astral.sh/uv
[ystdlib-cpp]: https://github.com/y-scope/ystdlib-cpp
4 changes: 4 additions & 0 deletions cmake/log_surgeon-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ if(@Microsoft.GSL_FOUND@)
find_dependency(Microsoft.GSL)
endif()

if(@ystdlib_FOUND@)
find_dependency(ystdlib)
endif()

set_and_check(log_surgeon_INCLUDE_DIR "@PACKAGE_LOG_SURGEON_INSTALL_INCLUDE_DIR@")

check_required_components(log_surgeon)
Expand Down
2 changes: 1 addition & 1 deletion src/.clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ IncludeCategories:
Priority: 4

# External library headers. Update when adding new libraries.
- Regex: "^<(fmt|gsl)"
- Regex: "^<(fmt|gsl|ystdlib)"
Priority: 3

# C system headers
Expand Down
158 changes: 120 additions & 38 deletions src/log_surgeon/LogEvent.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
#include "LogEvent.hpp"

#include <cstddef>
#include <cstdint>
#include <iterator>
#include <memory>
#include <optional>
#include <set>
#include <stdexcept>
#include <string>
#include <vector>

#include <ystdlib/error_handling/ErrorCode.hpp>
#include <ystdlib/error_handling/Result.hpp>

#include <log_surgeon/Constants.hpp>
#include <log_surgeon/finite_automata/Capture.hpp>
#include <log_surgeon/LogParser.hpp>
#include <log_surgeon/LogParserOutputBuffer.hpp>
#include <log_surgeon/Token.hpp>
Expand Down Expand Up @@ -57,51 +65,104 @@ auto LogEventView::get_logtype() const -> std::string {
auto token_view{m_log_output_buffer->get_mutable_token(i)};
auto const rule_id{token_view.get_type_ids()->at(0)};
if (static_cast<uint32_t>(SymbolId::TokenUncaughtString) == rule_id) {
logtype += token_view.to_string_view();
logtype.append(token_view.to_string_view());
continue;
}

bool is_first_token{};
if (m_log_output_buffer->has_header()) {
is_first_token = 0 == i;
} else {
bool is_first_token;
if (m_log_output_buffer->has_header()) {
is_first_token = 0 == i;
} else {
is_first_token = 1 == i;
}
if (static_cast<uint32_t>(SymbolId::TokenNewline) != rule_id && false == is_first_token)
{
logtype += token_view.release_delimiter();
}
auto const& optional_captures{m_log_parser.m_lexer.get_captures_from_rule_id(rule_id)};
if (optional_captures.has_value()) {
auto capture_view{token_view};
auto const& captures{optional_captures.value()};
for (auto const capture : captures) {
auto const [reg_start_id, reg_end_id]{
m_log_parser.m_lexer.get_reg_ids_from_capture(capture)
};
auto const start_positions{
capture_view.get_reversed_reg_positions(reg_start_id)
};
auto const end_positions{capture_view.get_reversed_reg_positions(reg_end_id)};

auto const& capture_name{capture->get_name()};
if (false == start_positions.empty() && -1 < start_positions[0]
&& false == end_positions.empty() && -1 < end_positions[0])
{
capture_view.set_end_pos(start_positions[0]);
logtype.append(capture_view.to_string_view());
logtype.append("<" + capture_name + ">");
capture_view.set_start_pos(end_positions[0]);
}
}
capture_view.set_end_pos(token_view.get_end_pos());
logtype.append(capture_view.to_string_view());
} else {
logtype += "<" + m_log_parser.get_id_symbol(rule_id) + ">";
is_first_token = 1 == i;
}
if (static_cast<uint32_t>(SymbolId::TokenNewline) != rule_id && false == is_first_token) {
logtype += token_view.release_delimiter();
}

auto const matches{get_capture_matches(token_view)};
if (matches.has_error()) {
logtype.append("<" + m_log_parser.get_id_symbol(rule_id) + ">");
continue;
}
auto prev_end_pos{token_view.get_start_pos()};
for (auto const& match : matches.value()) {
if (match.m_leaf) {
logtype.append(
token_view.get_sub_token(prev_end_pos, match.m_pos.m_start).to_string_view()
);
logtype.append("<" + match.m_capture->get_name() + ">");
prev_end_pos = match.m_pos.m_end;
}
}
logtype.append(
token_view.get_sub_token(prev_end_pos, token_view.get_end_pos()).to_string_view()
);
}
return logtype;
}

auto LogEventView::get_capture_matches(Token const& root_var) const
-> ystdlib::error_handling::Result<std::vector<Token::CaptureMatch>> {
auto captures{
get_log_parser().m_lexer.get_captures_from_rule_id(root_var.get_type_ids()->at(0))
};
if (false == captures.has_value()) {
return LogEventErrorCode{LogEventErrorCodeEnum::NoCaptureGroups};
}

auto cmp{[](Token::CaptureMatch const& a, Token::CaptureMatch const& b) -> bool {
if (a.m_pos.m_start != b.m_pos.m_start) {
return a.m_pos.m_start < b.m_pos.m_start;
}
return a.m_pos.m_end > b.m_pos.m_end;
}};
std::set<Token::CaptureMatch, decltype(cmp)> ordered_matches;
for (auto const* const capture : captures.value()) {
auto position{get_capture_position(root_var, capture)};
if (position.has_error()) {
if (LogEventErrorCode{LogEventErrorCodeEnum::NoCaptureGroupMatch} == position.error()) {
continue;
}
return position.error();
Copy link
Contributor

@SharafMohamed SharafMohamed Jan 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This case is never currently reached right? Even if it was reachable, It seems like for it to be reached something has to be broken, but currently the code will just continue treating it as a non-capture var.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ya currently get_capture_position can only return NoCaptureGroupMatch, but this error handling is more future proof since any other error should cause get_capture_matches to fail.

Inside log surgeon get_capture_matches is only used in get_logtype which doesn't handle any errors, but the clp code that uses get_capture_matches would error. We could make get_logtype return a Result if we don't want it to always return something.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main question is what should happen if get_capture_matches fails with any other type of error. In the case of NoCaptureGroupMatch its clear that this is just a non-capture group, so get_logtype will just treat it as a non-capture and continue. But, with other error types it'll do the same which is probably wrong.

That being said, as of now the behavior is correct, so this could just be left as is and addressed in the future if a case does come up.

}
ordered_matches.emplace(capture, position.value(), true);
}
if (ordered_matches.empty()) {
return {{}};
}

std::vector<Token::CaptureMatch> matches;
matches.reserve(ordered_matches.size());
auto const last_match{std::prev(ordered_matches.end())};
for (auto match{ordered_matches.begin()}; match != last_match; ++match) {
auto next_match{std::next(match)};
auto leaf{false};
if (match->m_pos.m_end <= next_match->m_pos.m_start) {
leaf = true;
}
matches.emplace_back(match->m_capture, match->m_pos, leaf);
}
matches.emplace_back(last_match->m_capture, last_match->m_pos, true);
return matches;
}

auto LogEventView::get_capture_position(
Token const& root_var,
finite_automata::Capture const* const& capture
) const -> ystdlib::error_handling::Result<Token::CaptureMatchPosition> {
auto const [start_reg_id, end_reg_id]{
get_log_parser().m_lexer.get_reg_ids_from_capture(capture)
};
auto const start_positions{root_var.get_reversed_reg_positions(start_reg_id)};
auto const end_positions{root_var.get_reversed_reg_positions(end_reg_id)};
if (start_positions.empty() || 0 > start_positions[0] || end_positions.empty()
|| 0 > end_positions[0])
{
return LogEventErrorCode{LogEventErrorCodeEnum::NoCaptureGroupMatch};
}
return {start_positions[0], end_positions[0]};
}

LogEvent::LogEvent(LogEventView const& src) : LogEventView{src.get_log_parser()} {
set_multiline(src.is_multiline());
m_log_output_buffer->set_has_header(src.m_log_output_buffer->has_header());
Expand Down Expand Up @@ -147,3 +208,24 @@ LogEvent::LogEvent(LogEventView const& src) : LogEventView{src.get_log_parser()}
}
}
} // namespace log_surgeon

using log_surgeon::LogEventErrorCodeEnum;

using LogEventErrorCategory = ystdlib::error_handling::ErrorCategory<LogEventErrorCodeEnum>;

template <>
auto LogEventErrorCategory::name() const noexcept -> char const* {
return "log_surgeon::LogEvent";
}

template <>
auto LogEventErrorCategory::message(LogEventErrorCodeEnum error_enum) const -> std::string {
switch (error_enum) {
case LogEventErrorCodeEnum::NoCaptureGroups:
return "LogEvent NoCaptureGroups";
case LogEventErrorCodeEnum::NoCaptureGroupMatch:
return "LogEvent NoCaptureGroupMatch";
default:
return "Unrecognized LogEventErrorCode";
}
}
60 changes: 55 additions & 5 deletions src/log_surgeon/LogEvent.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
#ifndef LOG_SURGEON_LOG_EVENT_HPP
#define LOG_SURGEON_LOG_EVENT_HPP

#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <vector>

#include <ystdlib/error_handling/ErrorCode.hpp>
#include <ystdlib/error_handling/Result.hpp>

#include <log_surgeon/finite_automata/Capture.hpp>
#include <log_surgeon/LogParserOutputBuffer.hpp>
#include <log_surgeon/Token.hpp>

Expand Down Expand Up @@ -93,13 +100,16 @@ class LogEventView {
[[nodiscard]] auto to_string() const -> std::string;

/**
* Constructs a user friendly/readable representation of the log event's
* logtype. A logtype is essentially the static text of a log event with the
* variable components replaced with their name. Therefore, two separate log
* events from the same logging source code may have the same logtype.
* Constructs a user friendly/readable representation of the log event's logtype. A logtype is
* essentially the static text of a log event with the variable components replaced with their
* name. Therefore, two separate log events from the same logging source code may have the same
* logtype.
*
* If a schema variable may contain capture groups, any leaf capture group matches will be
* replaced with their name while all other text is treated as static text.
* @return The logtype of the log.
*/
auto get_logtype() const -> std::string;
[[nodiscard]] auto get_logtype() const -> std::string;

/**
* Adds a Token to the array of tokens of a particular token type.
Expand All @@ -115,6 +125,37 @@ class LogEventView {
m_log_var_occurrences[token_type_id].push_back(token_ptr);
}

/**
* Retrieves the position of match of type `capture` within `root_var`. `root_var` must be the
* root parent variable containing `capture`.
* @param root_var The parent log surgeon schema variable for `capture`.
* @param capture The capture group type.
* @return A result containing a `CapturePosition` on success, or an error code indicating the
* failure:
* - LogEventErrorCodeEnum::NoCaptureGroupMatch if `root_var` contains no valid match positions
* for `capture`.
*/
[[nodiscard]] auto get_capture_position(
Token const& root_var,
finite_automata::Capture const* const& capture
) const -> ystdlib::error_handling::Result<Token::CaptureMatchPosition>;

/**
* Returns the capture group matches within `root_var` sorted by their appearance within the
* text, with parent capture groups appearing before their children. More formally, they are
* sorted by increasing start position and then by decreasing end position.
*
* Since capture groups can only overlap when nested and cannot span across the boundary of
* another group, a capture group is a leaf if its end position is less than the end position of
* the next capture group (or it is the last capture group).
* @param root_var The root variable to get the capture groups from.
* @return A result containing the sorted capture group matches (empty if no matches were
* found), or an error code indicating the failure:
* - LogEventErrorCodeEnum::NoCaptureGroups if no capture groups exist for `root_var`.
*/
[[nodiscard]] auto get_capture_matches(log_surgeon::Token const& root_var) const
-> ystdlib::error_handling::Result<std::vector<Token::CaptureMatch>>;

// TODO: have LogParser own the output buffer as a LogEventView is already
// tied to a single log parser
std::unique_ptr<LogParserOutputBuffer> m_log_output_buffer;
Expand Down Expand Up @@ -143,6 +184,15 @@ class LogEvent : public LogEventView {
private:
std::vector<char> m_buffer;
};

enum class LogEventErrorCodeEnum : uint8_t {
NoCaptureGroups,
NoCaptureGroupMatch
};

using LogEventErrorCode = ystdlib::error_handling::ErrorCode<LogEventErrorCodeEnum>;
} // namespace log_surgeon

YSTDLIB_ERROR_HANDLING_MARK_AS_ERROR_CODE_ENUM(log_surgeon::LogEventErrorCodeEnum);

#endif // LOG_SURGEON_LOG_EVENT_HPP
Loading
Loading