Skip to content

Commit 96c3ef9

Browse files
committed
A bit of refactoring and corrections recommended by coderabitai
1 parent 303d4cb commit 96c3ef9

File tree

5 files changed

+48
-19
lines changed

5 files changed

+48
-19
lines changed

components/core/src/clp_s/CommandLineArguments.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,11 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
426426
po::value<size_t>(&m_max_document_size)->value_name("DOC_SIZE")->
427427
default_value(m_max_document_size),
428428
"Maximum allowed size (B) for a single document before ir generation fails."
429+
)(
430+
"max-ir-buffer-size",
431+
po::value<size_t>(&m_max_ir_buffer_size)->value_name("BUFFER_SIZE")->
432+
default_value(m_max_ir_buffer_size),
433+
"Maximum allowed size (B) for a in memory IR buffer befroe being written to file."
429434
)(
430435
"encoding-type",
431436
po::value<int>(&m_encoding_type)->value_name("ENCODING_TYPE")->
@@ -1040,11 +1045,10 @@ void CommandLineArguments::print_search_usage() const {
10401045
}
10411046

10421047
void CommandLineArguments::print_json_to_ir_usage() const {
1043-
std::cerr << "Usage: " << m_program_name << " r [OPTIONS] IRS_DIR [FILE/DIR ...]" << std::endl;
1048+
std::cerr << "Usage: " << m_program_name << " r [OPTIONS] IRS_DIR [FILE/DIR ...]\n";
10441049
}
10451050

10461051
void CommandLineArguments::print_ir_compression_usage() const {
1047-
std::cerr << "Usage: " << m_program_name << " i [OPTIONS] ARCHIVES_DIR [FILE/DIR ...]"
1048-
<< std::endl;
1052+
std::cerr << "Usage: " << m_program_name << " i [OPTIONS] ARCHIVES_DIR [FILE/DIR ...]\n";
10491053
}
10501054
} // namespace clp_s

components/core/src/clp_s/CommandLineArguments.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@ class CommandLineArguments {
6262

6363
size_t get_max_document_size() const { return m_max_document_size; }
6464

65-
int get_encoding_type() const { return m_encoding_type; }
65+
[[nodiscard]] auto get_max_ir_buffer_size() const -> size_t { return m_max_ir_buffer_size; }
66+
67+
[[nodiscard]] auto get_encoding_type() const -> int { return m_encoding_type; }
6668

6769
[[nodiscard]] bool print_archive_stats() const { return m_print_archive_stats; }
6870

@@ -182,6 +184,7 @@ class CommandLineArguments {
182184
bool m_ordered_decompression{false};
183185
size_t m_ordered_chunk_size{0};
184186
int m_encoding_type{8};
187+
size_t m_max_ir_buffer_size{512ULL * 1024 * 1024};
185188
// Metadata db variables
186189
std::optional<clp::GlobalMetadataDBConfig> m_metadata_db_config;
187190

components/core/src/clp_s/JsonParser.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "DictionaryWriter.hpp"
1919
#include "JsonFileIterator.hpp"
2020
#include "ParsedMessage.hpp"
21+
#include "SchemaTree.hpp"
2122

2223
using namespace simdjson;
2324

components/core/src/clp_s/JsonParser.hpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "ArchiveWriter.hpp"
2222
#include "ParsedMessage.hpp"
2323
#include "Schema.hpp"
24+
#include "SchemaTree.hpp"
2425

2526
using clp::BufferReader;
2627
using clp::ffi::ir_stream::Deserializer;
@@ -44,6 +45,7 @@ struct JsonToIRParserOption {
4445
std::vector<std::string> file_paths;
4546
std::string irs_dir;
4647
size_t max_document_size;
48+
size_t max_ir_buffer_size;
4749
int compression_level;
4850
int encoding;
4951
};
@@ -93,11 +95,11 @@ class JsonParser {
9395
void parse_line(ondemand::value line, int32_t parent_node_id, std::string const& key);
9496

9597
/**
96-
* Compresses the input files specified by the command line arguments into an archive.
98+
* Determines the archive node type based on the IR node type and value.
9799
* @param ir_node_type schema node type from the IR stream
98-
* @param node_has_value Boolean that say whether or not the node has value.
99-
* @param node_value The ir schema node value if the node has value
100-
* @return The clp-s archive Node Type that shoudl be used for the archive node
100+
* @param node_has_value Boolean that says whether or not the node has value.
101+
* @param node_value The IR schema node value if the node has value
102+
* @return The clp-s archive Node Type that should be used for the archive node
101103
*/
102104
static auto get_archive_node_type(
103105
clp::ffi::SchemaTreeNode::Type ir_node_type,
@@ -109,9 +111,9 @@ class JsonParser {
109111
* Get archive node id for ir node
110112
* @param ir_node_to_archive_node_unordered_map cache of node id conversions between
111113
* deserializer schema tree nodes and archive schema tree nodes
112-
* @param irNodeID
113-
* @param irType
114-
* @param irTree
114+
* @param ir_node_id ID of the IR node
115+
* @param archive_node_type Type of the archive node
116+
* @param ir_treeThe IR schema tree
115117
*/
116118
auto get_archive_node_id(
117119
std::unordered_map<int32_t, std::vector<std::pair<NodeType, int32_t>>>&

components/core/src/clp_s/clp-s.cpp

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,6 @@ using clp_s::CommandLineArguments;
4848

4949
namespace {
5050

51-
size_t max_ir_buffer_size = 1'000'000'000;
52-
5351
/**
5452
* Compresses the input files specified by the command line arguments into an archive.
5553
* @param command_line_arguments
@@ -87,6 +85,17 @@ auto run_serializer(clp_s::JsonToIRParserOption const& option, std::string path)
8785
*/
8886
auto generate_ir(CommandLineArguments const& command_line_arguments) -> bool;
8987

88+
/**
89+
* Fill in JsonParserOption instance based on command line user input
90+
* @param command_line_arguments
91+
* @param option
92+
* @return Whether setup was succesful
93+
*/
94+
auto setup_compression_options(
95+
CommandLineArguments const& command_line_arguments,
96+
clp_s::JsonParserOption& option
97+
) -> bool;
98+
9099
/**
91100
* Compresses the input IR files specified by the command line arguments into an archive.
92101
* @param command_line_arguments
@@ -238,7 +247,7 @@ auto run_serializer(clp_s::JsonToIRParserOption const& option, std::string path)
238247
return false;
239248
}
240249
flush_and_clear_serializer_buffer(serializer, ir_buf);
241-
if (ir_buf.size() >= max_ir_buffer_size) {
250+
if (ir_buf.size() >= option.max_ir_buffer_size) {
242251
total_size = total_size + ir_buf.size();
243252
zc.write(reinterpret_cast<char*>(ir_buf.data()), ir_buf.size());
244253
zc.flush();
@@ -278,11 +287,13 @@ auto generate_ir(CommandLineArguments const& command_line_arguments) -> bool {
278287
option.file_paths = command_line_arguments.get_file_paths();
279288
option.irs_dir = irs_dir.string();
280289
option.max_document_size = command_line_arguments.get_max_document_size();
290+
option.max_ir_buffer_size = command_line_arguments.get_max_ir_buffer_size();
281291
option.compression_level = command_line_arguments.get_compression_level();
282292
option.encoding = command_line_arguments.get_encoding_type();
283293

284294
if (false == clp_s::FileUtils::validate_path(option.file_paths)) {
285-
exit(1);
295+
SPDLOG_ERROR("Invalid file path(s) provided");
296+
return false;
286297
}
287298

288299
std::vector<std::string> all_file_paths;
@@ -304,9 +315,11 @@ auto generate_ir(CommandLineArguments const& command_line_arguments) -> bool {
304315
return true;
305316
}
306317

307-
auto ir_compress(CommandLineArguments const& command_line_arguments) -> bool {
318+
auto setup_compression_options(
319+
CommandLineArguments const& command_line_arguments,
320+
clp_s::JsonParserOption& option
321+
) -> bool {
308322
auto archives_dir = std::filesystem::path(command_line_arguments.get_archives_dir());
309-
310323
// Create output directory in case it doesn't exist
311324
try {
312325
std::filesystem::create_directory(archives_dir.string());
@@ -318,8 +331,6 @@ auto ir_compress(CommandLineArguments const& command_line_arguments) -> bool {
318331
);
319332
return false;
320333
}
321-
322-
clp_s::JsonParserOption option{};
323334
option.file_paths = command_line_arguments.get_file_paths();
324335
option.archives_dir = archives_dir.string();
325336
option.target_encoded_size = command_line_arguments.get_target_encoded_size();
@@ -340,6 +351,14 @@ auto ir_compress(CommandLineArguments const& command_line_arguments) -> bool {
340351
db_config.get_metadata_table_prefix()
341352
);
342353
}
354+
return true;
355+
}
356+
357+
auto ir_compress(CommandLineArguments const& command_line_arguments) -> bool {
358+
clp_s::JsonParserOption option{};
359+
if (false == setup_compression_options(command_line_arguments, option)) {
360+
return false;
361+
}
343362

344363
clp_s::JsonParser parser(option);
345364
if (false == parser.parse_from_ir()) {

0 commit comments

Comments
 (0)