Skip to content
Merged
Show file tree
Hide file tree
Changes from 40 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
402fda4
Add stub for log converter binary.
gibber9809 Oct 16, 2025
5e06d36
Implement stub which uses log-surgeon to extract timestamps from ever…
gibber9809 Oct 17, 2025
c21bdd8
Fix stub implementation.
gibber9809 Oct 20, 2025
9b3badc
Fix move constructor and move assignment operator in clp_s::FileWriter.
gibber9809 Oct 20, 2025
818826e
Apply lint fix to FileWriter
gibber9809 Oct 20, 2025
46e2d21
Convert parsed logs into kv-ir.
gibber9809 Oct 20, 2025
675bd44
Add output-dir command line argument.
gibber9809 Oct 20, 2025
83d7102
Update timestamp schema to accept month names
gibber9809 Oct 20, 2025
57714f3
Separate out classes into different files.
gibber9809 Oct 21, 2025
68467a4
Add docstrings to classes and methods.
gibber9809 Oct 21, 2025
46e5d92
Add log-converter to core build.
gibber9809 Oct 21, 2025
413b1f0
Fix clang-tidy errors in command line arguments helper class.
gibber9809 Oct 21, 2025
7f3a2e0
Fix almost all clang-tidy warnings in LogSerializer
gibber9809 Oct 21, 2025
2e7ae46
Fix clang-tidy warnings in LogConverter
gibber9809 Oct 21, 2025
99b66b3
Fix clang-tidy errors in log_converter
gibber9809 Oct 21, 2025
3c0d369
Add missing newline
gibber9809 Oct 21, 2025
26449d8
Fix bug where timestamp isn't parsed in first log message.
gibber9809 Oct 21, 2025
49452ce
Address rabbit comments.
gibber9809 Oct 21, 2025
55fd3bf
Apply suggestions from code review
gibber9809 Oct 21, 2025
1a6f780
Update taskfile to include log-converter in package build.
gibber9809 Oct 21, 2025
1729a75
Apply code review comments.
gibber9809 Oct 21, 2025
871b16a
Update test for clp core binaries.
gibber9809 Oct 21, 2025
7171829
Merge remote-tracking branch 'upstream/main' into log-converter
gibber9809 Oct 21, 2025
f0a5fb7
Fix up nodiscards
gibber9809 Oct 21, 2025
e4cc3db
Fix bugs introduced during refactoring.
gibber9809 Oct 22, 2025
daa81ba
Merge branch 'main' into log-converter
gibber9809 Oct 22, 2025
b12ef31
Merge branch 'main' into log-converter
kirkrodrigues Oct 23, 2025
b3e7c4b
Apply suggestions from code review
gibber9809 Oct 23, 2025
bae33e0
Address review comments.
gibber9809 Oct 23, 2025
3d3bc4c
Merge remote-tracking branch 'upstream/main' into log-converter
gibber9809 Oct 23, 2025
3c784c3
Remove unnecessary includes.
gibber9809 Oct 23, 2025
babad69
Apply suggestions from code review
gibber9809 Oct 23, 2025
1e1de58
Address review comments
gibber9809 Oct 23, 2025
a3c899e
Fix error introduced in rebase
gibber9809 Oct 23, 2025
0cafbcf
Address rabbit comments
gibber9809 Oct 23, 2025
b598862
Merge remote-tracking branch 'upstream/main' into log-converter
gibber9809 Oct 24, 2025
b1cfead
Update log-surgeon dependency to 840f262 pull in fix.
gibber9809 Oct 24, 2025
bc156d5
Update expected log-surgeon error message in unit tests.
gibber9809 Oct 24, 2025
9d4b53c
Apply suggestions from code review
gibber9809 Oct 24, 2025
653e451
Address review comments.
gibber9809 Oct 24, 2025
a3e28c9
Fix docstrings.
gibber9809 Oct 24, 2025
2488622
Rename files-from argument to inputs-from.
gibber9809 Oct 24, 2025
e5274db
Improve comment in LogSerializer to indicate why we serialize timesta…
gibber9809 Oct 24, 2025
1d17501
Merge branch 'main' into log-converter
gibber9809 Oct 24, 2025
dc06942
Nit clang-tidy fix.
LinZhihao-723 Oct 25, 2025
1778251
Small docstring fix.
LinZhihao-723 Oct 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions components/core/src/clp_s/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
add_subdirectory(indexer)
add_subdirectory(log_converter)
add_subdirectory(search)

set(
Expand Down
19 changes: 19 additions & 0 deletions components/core/src/clp_s/FileWriter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include <cstdio>
#include <string>
#include <utility>

#include "ErrorCode.hpp"
#include "TraceableException.hpp"
Expand All @@ -29,6 +30,24 @@ class FileWriter {
// Constructors
FileWriter() : m_file(nullptr), m_fd(-1) {}

// Delete copy constructor and assignment operator
FileWriter(FileWriter const&) = delete;
auto operator=(FileWriter const&) -> FileWriter& = delete;

// Define custom move constructor and assignment operator
FileWriter(FileWriter&& writer)
: m_file{std::exchange(writer.m_file, nullptr)},
m_fd{std::exchange(writer.m_fd, -1)} {}

auto operator=(FileWriter&& writer) -> FileWriter& {
if (this == &writer) {
return *this;
}
m_file = std::exchange(writer.m_file, nullptr);
m_fd = std::exchange(writer.m_fd, -1);
return *this;
}

// Destructor
~FileWriter();

Expand Down
37 changes: 37 additions & 0 deletions components/core/src/clp_s/log_converter/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
set(
CLP_S_LOG_CONVERTER_SOURCES
CommandLineArguments.cpp
CommandLineArguments.hpp
LogConverter.cpp
LogConverter.hpp
LogSerializer.cpp
LogSerializer.hpp
)

if(CLP_BUILD_EXECUTABLES)
add_executable(
log-converter
log_converter.cpp
${CLP_S_LOG_CONVERTER_SOURCES}
)
target_compile_features(log-converter PRIVATE cxx_std_20)
target_link_libraries(
log-converter
PRIVATE
Boost::program_options
clp_s::clp_dependencies
clp_s::io
fmt::fmt
log_surgeon::log_surgeon
msgpack-cxx
nlohmann_json::nlohmann_json
spdlog::spdlog
ystdlib::containers
ystdlib::error_handling
)
set_target_properties(
log-converter
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}"
)
endif()
202 changes: 202 additions & 0 deletions components/core/src/clp_s/log_converter/CommandLineArguments.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#include "CommandLineArguments.hpp"

#include <exception>
#include <iostream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <vector>

#include <boost/program_options/options_description.hpp>
#include <boost/program_options/parsers.hpp>
#include <boost/program_options/positional_options.hpp>
#include <boost/program_options/value_semantic.hpp>
#include <boost/program_options/variables_map.hpp>
#include <fmt/format.h>
#include <spdlog/spdlog.h>

#include "../ErrorCode.hpp"
#include "../FileReader.hpp"
#include "../InputConfig.hpp"

namespace po = boost::program_options;

namespace clp_s::log_converter {
namespace {
// Authorization method constants
constexpr std::string_view cNoAuth{"none"};
constexpr std::string_view cS3Auth{"s3"};

/**
* Reads and returns a list of paths from a file containing newline-delimited paths.
* @param input_path_list_file_path Path to the file containing the list of paths.
* @param path_destination The vector that the paths are pushed into.
* @return Whether paths were read successfully or not.
*/
[[nodiscard]] auto read_paths_from_file(
std::string const& input_path_list_file_path,
std::vector<std::string>& path_destination
) -> bool;

auto read_paths_from_file(
std::string const& input_path_list_file_path,
std::vector<std::string>& path_destination
) -> bool {
FileReader reader;
auto error_code = reader.try_open(input_path_list_file_path);
if (ErrorCodeFileNotFound == error_code) {
SPDLOG_ERROR(
"Failed to open input path list file {} - file not found",
input_path_list_file_path
);
return false;
}
if (ErrorCodeSuccess != error_code) {
SPDLOG_ERROR("Error opening input path list file {}", input_path_list_file_path);
return false;
}

std::string line;
while (true) {
error_code = reader.try_read_to_delimiter('\n', false, false, line);
if (ErrorCodeSuccess != error_code) {
break;
}
if (false == line.empty()) {
path_destination.push_back(line);
}
}

if (ErrorCodeEndOfFile != error_code) {
return false;
}
return true;
}

/**
* Validates and populates network authorization options.
* @param auth_method
* @param network_auth
* @throws std::invalid_argument if the authorization option is invalid
*/
void validate_network_auth(std::string_view auth_method, NetworkAuthOption& auth) {
if (cS3Auth == auth_method) {
auth.method = AuthMethod::S3PresignedUrlV4;
} else if (cNoAuth != auth_method) {
throw std::invalid_argument(fmt::format("Invalid authentication type \"{}\"", auth_method));
}
}
} // namespace

auto CommandLineArguments::parse_arguments(int argc, char const** argv)
-> CommandLineArguments::ParsingResult {
if (1 == argc) {
print_basic_usage();
return ParsingResult::Failure;
}

try {
po::variables_map parsed_command_line_options;

po::options_description general_options("General options");
general_options.add_options()("help,h", "Print help");

po::options_description conversion_positional_options;
std::vector<std::string> input_paths;
// clang-format off
conversion_positional_options.add_options()(
"input-paths",
po::value<std::vector<std::string>>(&input_paths)->value_name("PATHS"),
"input paths"
);
// clang-format on

po::options_description conversion_options("Conversion options");
std::string input_path_list_file_path;
std::string auth{cNoAuth};
// clang-format off
conversion_options.add_options()(
"files-from,f",
po::value<std::string>(&input_path_list_file_path)
->value_name("FILE")
->default_value(input_path_list_file_path),
"Convert files specified in FILE."
)(
"output-dir",
po::value<std::string>(&m_output_dir)
->value_name("OUTPUT_DIR")
->default_value(m_output_dir),
"Output directory for converted files."
)(
"auth",
po::value<std::string>(&auth)
->value_name("AUTH_METHOD")
->default_value(auth),
"Type of authentication required for network requests (s3 | none). Authentication"
" with s3 requires the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment"
" variables, and optionally the AWS_SESSION_TOKEN environment variable."
);
// clang-format on

po::positional_options_description positional_options;
positional_options.add("input-paths", -1);

po::options_description all_conversion_options;
all_conversion_options.add(general_options);
all_conversion_options.add(conversion_options);
all_conversion_options.add(conversion_positional_options);

po::store(
po::command_line_parser(argc, argv)
.options(all_conversion_options)
.positional(positional_options)
.run(),
parsed_command_line_options
);
po::notify(parsed_command_line_options);

if (parsed_command_line_options.contains("help")) {
if (argc > 2) {
SPDLOG_WARN("Ignoring all options besides --help.");
}

print_basic_usage();
po::options_description visible_options;
visible_options.add(general_options);
visible_options.add(conversion_options);
std::cerr << visible_options << '\n';
return ParsingResult::InfoCommand;
}

if (false == input_path_list_file_path.empty()) {
if (false == read_paths_from_file(input_path_list_file_path, input_paths)) {
SPDLOG_ERROR("Failed to read paths from {}", input_path_list_file_path);
return ParsingResult::Failure;
}
}

for (auto const& path : input_paths) {
if (false == get_input_files_for_raw_path(path, m_input_paths)) {
throw std::invalid_argument(fmt::format("Invalid input path \"{}\".", path));
}
}

if (m_input_paths.empty()) {
throw std::invalid_argument("No input paths specified.");
}

validate_network_auth(auth, m_network_auth);
} catch (std::exception& e) {
SPDLOG_ERROR("{}", e.what());
print_basic_usage();
std::cerr << "Try " << get_program_name() << " --help for detailed usage instructions\n";
return ParsingResult::Failure;
}

return ParsingResult::Success;
}

void CommandLineArguments::print_basic_usage() const {
std::cerr << "Usage: " << get_program_name() << " [INPUT_PATHS] [OPTIONS]\n";
}
} // namespace clp_s::log_converter
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#ifndef CLP_S_COMMANDLINEARGUMENTS_HPP
#define CLP_S_COMMANDLINEARGUMENTS_HPP

#include <cstdint>
#include <string>
#include <string_view>
#include <vector>

#include "../InputConfig.hpp"

namespace clp_s::log_converter {
class CommandLineArguments {
public:
// Types
enum class ParsingResult : uint8_t {
Success = 0,
InfoCommand,
Failure
};

// Constructors
explicit CommandLineArguments(std::string_view program_name) : m_program_name{program_name} {}

// Methods
[[nodiscard]] auto parse_arguments(int argc, char const** argv) -> ParsingResult;

[[nodiscard]] auto get_program_name() const -> std::string const& { return m_program_name; }

[[nodiscard]] auto get_input_paths() const -> std::vector<Path> const& { return m_input_paths; }

[[nodiscard]] auto get_network_auth() const -> NetworkAuthOption const& {
return m_network_auth;
}

[[nodiscard]] auto get_output_dir() const -> std::string const& { return m_output_dir; }

private:
// Methods
void print_basic_usage() const;

// Variables
std::string m_program_name;
std::vector<Path> m_input_paths;
NetworkAuthOption m_network_auth{};
std::string m_output_dir{"./"};
};
} // namespace clp_s::log_converter

#endif // CLP_S_COMMANDLINEARGUMENTS_HPP
Loading
Loading