Skip to content

Commit 303d4cb

Browse files
committed
map converted to unordered_map and various documentation, linting, and clang-tidying
1 parent 2facdd0 commit 303d4cb

File tree

6 files changed

+172
-111
lines changed

6 files changed

+172
-111
lines changed

components/core/src/clp_s/CMakeLists.txt

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,32 +4,12 @@ set(
44
CLP_SOURCES
55
../clp/BufferReader.cpp
66
../clp/BufferReader.hpp
7-
../clp/Defs.h
8-
../clp/ErrorCode.hpp
9-
../clp/FileDescriptor.cpp
10-
../clp/FileDescriptor.hpp
11-
../clp/GlobalMetadataDB.hpp
12-
../clp/GlobalMetadataDBConfig.cpp
13-
../clp/GlobalMetadataDBConfig.hpp
14-
../clp/GlobalMySQLMetadataDB.cpp
15-
../clp/GlobalMySQLMetadataDB.hpp
16-
../clp/MySQLDB.cpp
17-
../clp/MySQLDB.hpp
18-
../clp/MySQLParamBindings.cpp
19-
../clp/MySQLParamBindings.hpp
20-
../clp/MySQLPreparedStatement.cpp
21-
../clp/MySQLPreparedStatement.hpp
22-
../clp/ReadOnlyMemoryMappedFile.cpp
23-
../clp/ReadOnlyMemoryMappedFile.hpp
24-
../clp/ReaderInterface.cpp
25-
../clp/ReaderInterface.hpp
26-
../clp/TraceableException.hpp
27-
../clp/WriterInterface.cpp
28-
../clp/WriterInterface.hpp
297
../clp/cli_utils.cpp
308
../clp/cli_utils.hpp
319
../clp/database_utils.cpp
3210
../clp/database_utils.hpp
11+
../clp/Defs.h
12+
../clp/ErrorCode.hpp
3313
../clp/ffi/KeyValuePairLogEvent.cpp
3414
../clp/ffi/KeyValuePairLogEvent.hpp
3515
../clp/ffi/SchemaTree.cpp
@@ -51,21 +31,41 @@ set(
5131
../clp/ffi/ir_stream/utils.hpp
5232
../clp/ffi/utils.cpp
5333
../clp/ffi/utils.hpp
34+
../clp/FileDescriptor.cpp
35+
../clp/FileDescriptor.hpp
36+
../clp/GlobalMetadataDB.hpp
37+
../clp/GlobalMetadataDBConfig.cpp
38+
../clp/GlobalMetadataDBConfig.hpp
39+
../clp/GlobalMySQLMetadataDB.cpp
40+
../clp/GlobalMySQLMetadataDB.hpp
5441
../clp/ir/EncodedTextAst.cpp
5542
../clp/ir/EncodedTextAst.hpp
5643
../clp/ir/parsing.cpp
5744
../clp/ir/parsing.hpp
5845
../clp/ir/types.hpp
46+
../clp/MySQLDB.cpp
47+
../clp/MySQLDB.hpp
48+
../clp/MySQLParamBindings.cpp
49+
../clp/MySQLParamBindings.hpp
50+
../clp/MySQLPreparedStatement.cpp
51+
../clp/MySQLPreparedStatement.hpp
5952
../clp/networking/socket_utils.cpp
6053
../clp/networking/socket_utils.hpp
54+
../clp/ReadOnlyMemoryMappedFile.cpp
55+
../clp/ReadOnlyMemoryMappedFile.hpp
56+
../clp/ReaderInterface.cpp
57+
../clp/ReaderInterface.hpp
6158
../clp/streaming_archive/ArchiveMetadata.cpp
6259
../clp/streaming_archive/ArchiveMetadata.hpp
6360
../clp/streaming_compression/zstd/Decompressor.cpp
6461
../clp/streaming_compression/zstd/Decompressor.hpp
6562
../clp/time_types.hpp
63+
../clp/TraceableException.hpp
6664
../clp/type_utils.hpp
6765
../clp/utf8_utils.cpp
6866
../clp/utf8_utils.hpp
67+
../clp/WriterInterface.cpp
68+
../clp/WriterInterface.hpp
6969
)
7070

7171
set(

components/core/src/clp_s/CommandLineArguments.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
129129
case (char)Command::Compress:
130130
case (char)Command::Extract:
131131
case (char)Command::Search:
132-
case (char)Command::Json_To_IR:
133-
case (char)Command::IR_Compress:
132+
case (char)Command::JsonToIr:
133+
case (char)Command::IrCompress:
134134
m_command = (Command)command_input;
135135
break;
136136
default:
@@ -270,7 +270,7 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
270270

271271
m_metadata_db_config = std::move(metadata_db_config);
272272
}
273-
} else if (Command::IR_Compress == m_command) {
273+
} else if (Command::IrCompress == m_command) {
274274
po::options_description compression_positional_options;
275275
// clang-format off
276276
compression_positional_options.add_options()(
@@ -348,11 +348,11 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
348348
po::notify(parsed_command_line_options);
349349

350350
if (parsed_command_line_options.count("help")) {
351-
print_IR_compression_usage();
351+
print_ir_compression_usage();
352352

353-
std::cerr << "Examples:" << std::endl;
354-
std::cerr << " # Compress file1.ir and dir1 into archives-dir" << std::endl;
355-
std::cerr << " " << m_program_name << " i archives-dir file1.ir dir1" << std::endl;
353+
std::cerr << "Examples:\n";
354+
std::cerr << " # Compress file1.ir and dir1 into archives-dir\n";
355+
std::cerr << " " << m_program_name << " i archives-dir file1.ir dir1\n";
356356

357357
po::options_description visible_options;
358358
visible_options.add(general_options);
@@ -398,7 +398,7 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
398398

399399
m_metadata_db_config = std::move(metadata_db_config);
400400
}
401-
} else if ((char)Command::Json_To_IR == command_input) {
401+
} else if ((char)Command::JsonToIr == command_input) {
402402
po::options_description compression_positional_options;
403403
// clang-format off
404404
compression_positional_options.add_options()(
@@ -466,11 +466,11 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
466466
po::notify(parsed_command_line_options);
467467

468468
if (parsed_command_line_options.count("help")) {
469-
print_json_to_IR_usage();
469+
print_json_to_ir_usage();
470470

471-
std::cerr << "Examples:" << std::endl;
472-
std::cerr << " # Parse file1.json and dir1 into irs-dir" << std::endl;
473-
std::cerr << " " << m_program_name << " r irs-dir file1.json dir1" << std::endl;
471+
std::cerr << "Examples:\n";
472+
std::cerr << " # Parse file1.json and dir1 into irs-dir\n";
473+
std::cerr << " " << m_program_name << " r irs-dir file1.json dir1\n";
474474

475475
po::options_description visible_options;
476476
visible_options.add(general_options);
@@ -1039,11 +1039,11 @@ void CommandLineArguments::print_search_usage() const {
10391039
<< std::endl;
10401040
}
10411041

1042-
void CommandLineArguments::print_json_to_IR_usage() const {
1042+
void CommandLineArguments::print_json_to_ir_usage() const {
10431043
std::cerr << "Usage: " << m_program_name << " r [OPTIONS] IRS_DIR [FILE/DIR ...]" << std::endl;
10441044
}
10451045

1046-
void CommandLineArguments::print_IR_compression_usage() const {
1046+
void CommandLineArguments::print_ir_compression_usage() const {
10471047
std::cerr << "Usage: " << m_program_name << " i [OPTIONS] ARCHIVES_DIR [FILE/DIR ...]"
10481048
<< std::endl;
10491049
}

components/core/src/clp_s/CommandLineArguments.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ class CommandLineArguments {
2727
Compress = 'c',
2828
Extract = 'x',
2929
Search = 's',
30-
Json_To_IR = 'r',
31-
IR_Compress = 'i'
30+
JsonToIr = 'r',
31+
IrCompress = 'i'
3232
};
3333

3434
enum class OutputHandlerType : uint8_t {
@@ -161,9 +161,9 @@ class CommandLineArguments {
161161

162162
void print_search_usage() const;
163163

164-
void print_json_to_IR_usage() const;
164+
void print_json_to_ir_usage() const;
165165

166-
void print_IR_compression_usage() const;
166+
void print_ir_compression_usage() const;
167167

168168
// Variables
169169
std::string m_program_name;

components/core/src/clp_s/JsonParser.cpp

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,28 @@
11
#include "JsonParser.hpp"
22

3-
#include <iostream>
3+
#include <cstdint>
4+
#include <cstdlib>
5+
#include <optional>
46
#include <stack>
7+
#include <unordered_map>
58

69
#include <simdjson.h>
710
#include <spdlog/spdlog.h>
811

9-
#include "archive_constants.hpp"
12+
#include "../clp/ffi/SchemaTree.hpp"
13+
#include "../clp/ffi/SchemaTreeNode.hpp"
14+
#include "../clp/ffi/utils.hpp"
15+
#include "../clp/ffi/Value.hpp"
16+
#include "../clp/ir/types.hpp"
17+
#include "../clp/streaming_compression/zstd/Decompressor.hpp"
18+
#include "DictionaryWriter.hpp"
1019
#include "JsonFileIterator.hpp"
20+
#include "ParsedMessage.hpp"
21+
22+
using namespace simdjson;
1123

1224
namespace clp_s {
25+
1326
JsonParser::JsonParser(JsonParserOption const& option)
1427
: m_num_messages(0),
1528
m_target_encoded_size(option.target_encoded_size),
@@ -520,13 +533,13 @@ bool JsonParser::parse() {
520533
return true;
521534
}
522535

523-
NodeType get_archive_node_type(
536+
auto JsonParser::get_archive_node_type(
524537
clp::ffi::SchemaTreeNode::Type ir_node_type,
525538
bool node_has_value,
526539
std::optional<clp::ffi::Value> const& node_value
527-
) {
540+
) -> NodeType {
528541
// figure out what type the node is in archive node type
529-
NodeType archive_node_type;
542+
NodeType archive_node_type = NodeType::Unknown;
530543
switch (ir_node_type) {
531544
case clp::ffi::SchemaTreeNode::Type::Int:
532545
archive_node_type = NodeType::Integer;
@@ -559,29 +572,33 @@ NodeType get_archive_node_type(
559572
}
560573
break;
561574
default:
562-
archive_node_type = NodeType::Unknown;
563575
break;
564576
}
565577
return archive_node_type;
566578
}
567579

568-
//
569-
int JsonParser::get_archive_node_id(
570-
std::map<std::tuple<int32_t, NodeType>, int32_t>& ir_node_to_archive_node_map,
571-
int ir_node_id,
580+
auto JsonParser::get_archive_node_id(
581+
std::unordered_map<int32_t, std::vector<std::pair<NodeType, int32_t>>>&
582+
ir_node_to_archive_node_unordered_map,
583+
int32_t ir_node_id,
572584
NodeType archive_node_type,
573585
clp::ffi::SchemaTree const& ir_tree
574-
) {
575-
auto key = std::make_tuple(ir_node_id, archive_node_type);
576-
auto map_location = ir_node_to_archive_node_map.find(key);
577-
if (ir_node_to_archive_node_map.end() != map_location) {
578-
return map_location->second;
586+
) -> int {
587+
auto unordered_map_location = ir_node_to_archive_node_unordered_map.find(ir_node_id);
588+
if (ir_node_to_archive_node_unordered_map.end() != unordered_map_location) {
589+
auto translation_vector = unordered_map_location->second;
590+
for (int i = 0; i < translation_vector.size(); i++) {
591+
if (translation_vector[i].first == archive_node_type) {
592+
return translation_vector[i].second;
593+
}
594+
}
579595
}
580-
auto& curr_node = ir_tree.get_node(ir_node_id);
596+
597+
auto const& curr_node = ir_tree.get_node(ir_node_id);
581598
int32_t parent_node_id{-1};
582599
if (ir_node_id != curr_node.get_parent_id()) {
583600
parent_node_id = get_archive_node_id(
584-
ir_node_to_archive_node_map,
601+
ir_node_to_archive_node_unordered_map,
585602
curr_node.get_parent_id(),
586603
NodeType::Object,
587604
ir_tree
@@ -597,16 +614,23 @@ int JsonParser::get_archive_node_id(
597614
}
598615
int curr_node_archive_id
599616
= m_archive_writer->add_node(parent_node_id, archive_node_type, node_key);
600-
ir_node_to_archive_node_map.emplace(std::move(key), curr_node_archive_id);
617+
auto p = std::make_pair(archive_node_type, curr_node_archive_id);
618+
if (ir_node_to_archive_node_unordered_map.end() != unordered_map_location) {
619+
unordered_map_location->second.push_back(p);
620+
} else {
621+
std::vector<std::pair<NodeType, int32_t>> v;
622+
v.push_back(p);
623+
ir_node_to_archive_node_unordered_map.emplace(ir_node_id, v);
624+
}
601625
return curr_node_archive_id;
602626
}
603627

604628
void JsonParser::parse_kv_log_event(
605629
KeyValuePairLogEvent const& kv,
606-
std::map<std::tuple<int32_t, NodeType>, int32_t>& ir_node_to_archive_node_map
630+
std::unordered_map<int32_t, std::vector<std::pair<NodeType, int32_t>>>&
631+
ir_node_to_archive_node_unordered_map
607632
) {
608633
clp::ffi::SchemaTree const& tree = kv.get_schema_tree();
609-
610634
for (auto const& pair : kv.get_node_id_value_pairs()) {
611635
clp::ffi::SchemaTreeNode const& tree_node = tree.get_node(pair.first);
612636
clp::ffi::SchemaTreeNode::Type ir_node_type = tree_node.get_type();
@@ -621,7 +645,7 @@ void JsonParser::parse_kv_log_event(
621645
int node_id;
622646
try {
623647
node_id = get_archive_node_id(
624-
ir_node_to_archive_node_map,
648+
ir_node_to_archive_node_unordered_map,
625649
pair.first,
626650
archive_node_type,
627651
tree
@@ -705,11 +729,11 @@ void JsonParser::parse_kv_log_event(
705729
int32_t current_schema_id = m_archive_writer->add_schema(m_current_schema);
706730
m_current_parsed_message.set_id(current_schema_id);
707731
m_archive_writer->append_message(current_schema_id, m_current_schema, m_current_parsed_message);
708-
return;
709732
}
710733

711-
bool JsonParser::parse_from_IR() {
712-
std::map<std::tuple<int32_t, NodeType>, int32_t> ir_node_to_archive_node_map;
734+
auto JsonParser::parse_from_ir() -> bool {
735+
std::unordered_map<int32_t, std::vector<std::pair<NodeType, int32_t>>>
736+
ir_node_to_archive_node_unordered_map;
713737

714738
for (auto& file_path : m_file_paths) {
715739
int fsize = std::filesystem::file_size(file_path);
@@ -742,7 +766,7 @@ bool JsonParser::parse_from_IR() {
742766
m_current_schema.clear();
743767
auto const& kv_log_event = kv_log_event_result.value();
744768
try {
745-
parse_kv_log_event(kv_log_event, ir_node_to_archive_node_map);
769+
parse_kv_log_event(kv_log_event, ir_node_to_archive_node_unordered_map);
746770
} catch (std::string msg) {
747771
SPDLOG_ERROR("ERROR: {}" + msg);
748772
zd.close();
@@ -754,14 +778,14 @@ bool JsonParser::parse_from_IR() {
754778
}
755779
m_num_messages++;
756780
if (m_archive_writer->get_data_size() >= m_target_encoded_size) {
757-
ir_node_to_archive_node_map.clear();
781+
ir_node_to_archive_node_unordered_map.clear();
758782
split_archive();
759783
}
760784

761785
m_current_parsed_message.clear();
762786

763787
} while (true);
764-
ir_node_to_archive_node_map.clear();
788+
ir_node_to_archive_node_unordered_map.clear();
765789
zd.close();
766790
}
767791
return true;

0 commit comments

Comments
 (0)