Skip to content

Commit 1899c61

Browse files
committed
updates from first round of review, and linting
1 parent 9f7277b commit 1899c61

File tree

5 files changed

+123
-205
lines changed

5 files changed

+123
-205
lines changed

components/core/CMakeLists.txt

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -278,12 +278,6 @@ set(SOURCE_FILES_clp_s_unitTest
278278
src/clp_s/TimestampPattern.hpp
279279
src/clp_s/Utils.cpp
280280
src/clp_s/Utils.hpp
281-
src/clp_s/ZstdCompressor.hpp
282-
src/clp_s/ZstdCompressor.cpp
283-
src/clp_s/ZstdDecompressor.hpp
284-
src/clp_s/ZstdDecompressor.cpp
285-
src/clp_s/FileWriter.cpp
286-
src/clp_s/FileReader.cpp
287281
)
288282

289283
set(SOURCE_FILES_unitTest

components/core/src/clp_s/CMakeLists.txt

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@ add_subdirectory(search/kql)
22

33
set(
44
CLP_SOURCES
5-
../clp/cli_utils.cpp
6-
../clp/cli_utils.hpp
7-
../clp/database_utils.cpp
8-
../clp/database_utils.hpp
5+
../clp/BufferReader.cpp
6+
../clp/BufferReader.hpp
97
../clp/Defs.h
108
../clp/ErrorCode.hpp
9+
../clp/ErrorCode.hpp
10+
../clp/FileDescriptor.cpp
11+
../clp/FileDescriptor.hpp
1112
../clp/GlobalMetadataDB.hpp
1213
../clp/GlobalMetadataDBConfig.cpp
1314
../clp/GlobalMetadataDBConfig.hpp
@@ -19,56 +20,55 @@ set(
1920
../clp/MySQLParamBindings.hpp
2021
../clp/MySQLPreparedStatement.cpp
2122
../clp/MySQLPreparedStatement.hpp
22-
../clp/networking/socket_utils.cpp
23-
../clp/networking/socket_utils.hpp
23+
../clp/ReadOnlyMemoryMappedFile.cpp
24+
../clp/ReadOnlyMemoryMappedFile.hpp
25+
../clp/ReaderInterface.cpp
2426
../clp/ReaderInterface.cpp
2527
../clp/ReaderInterface.hpp
26-
../clp/streaming_archive/ArchiveMetadata.cpp
27-
../clp/streaming_archive/ArchiveMetadata.hpp
28+
../clp/ReaderInterface.hpp
2829
../clp/TraceableException.hpp
2930
../clp/WriterInterface.cpp
3031
../clp/WriterInterface.hpp
31-
../clp/ffi/ir_stream/Deserializer.hpp
32-
../clp/ffi/ir_stream/Deserializer.cpp
33-
../clp/ffi/ir_stream/Serializer.hpp
34-
../clp/ffi/ir_stream/Serializer.cpp
35-
../clp/ffi/ir_stream/utils.hpp
36-
../clp/ffi/ir_stream/utils.cpp
37-
../clp/BufferReader.hpp
38-
../clp/BufferReader.cpp
39-
../clp/type_utils.hpp
40-
../clp/ffi/Value.hpp
41-
../clp/ErrorCode.hpp
42-
../clp/ir/EncodedTextAst.hpp
43-
../clp/ir/EncodedTextAst.cpp
44-
../clp/ir/types.hpp
45-
../clp/ReaderInterface.hpp
46-
../clp/ReaderInterface.cpp
47-
../clp/time_types.hpp
48-
../clp/type_utils.hpp
49-
../clp/ffi/KeyValuePairLogEvent.hpp
32+
../clp/cli_utils.cpp
33+
../clp/cli_utils.hpp
34+
../clp/database_utils.cpp
35+
../clp/database_utils.hpp
5036
../clp/ffi/KeyValuePairLogEvent.cpp
51-
../clp/ffi/SchemaTree.hpp
37+
../clp/ffi/KeyValuePairLogEvent.hpp
5238
../clp/ffi/SchemaTree.cpp
39+
../clp/ffi/SchemaTree.hpp
5340
../clp/ffi/SchemaTreeNode.hpp
5441
../clp/ffi/Value.hpp
55-
../clp/ffi/ir_stream/decoding_methods.hpp
42+
../clp/ffi/Value.hpp
43+
../clp/ffi/ir_stream/Deserializer.cpp
44+
../clp/ffi/ir_stream/Deserializer.hpp
45+
../clp/ffi/ir_stream/Serializer.cpp
46+
../clp/ffi/ir_stream/Serializer.hpp
5647
../clp/ffi/ir_stream/decoding_methods.cpp
57-
../clp/ffi/ir_stream/encoding_methods.hpp
48+
../clp/ffi/ir_stream/decoding_methods.hpp
5849
../clp/ffi/ir_stream/encoding_methods.cpp
59-
../clp/ir/parsing.hpp
60-
../clp/ir/parsing.cpp
50+
../clp/ffi/ir_stream/encoding_methods.hpp
6151
../clp/ffi/ir_stream/protocol_constants.hpp
62-
../clp/ffi/utils.hpp
52+
../clp/ffi/ir_stream/utils.cpp
53+
../clp/ffi/ir_stream/utils.hpp
6354
../clp/ffi/utils.cpp
64-
../clp/utf8_utils.hpp
65-
../clp/utf8_utils.cpp
66-
../clp/streaming_compression/zstd/Decompressor.hpp
55+
../clp/ffi/utils.hpp
56+
../clp/ir/EncodedTextAst.cpp
57+
../clp/ir/EncodedTextAst.hpp
58+
../clp/ir/parsing.cpp
59+
../clp/ir/parsing.hpp
60+
../clp/ir/types.hpp
61+
../clp/networking/socket_utils.cpp
62+
../clp/networking/socket_utils.hpp
63+
../clp/streaming_archive/ArchiveMetadata.cpp
64+
../clp/streaming_archive/ArchiveMetadata.hpp
6765
../clp/streaming_compression/zstd/Decompressor.cpp
68-
../clp/ReadOnlyMemoryMappedFile.hpp
69-
../clp/ReadOnlyMemoryMappedFile.cpp
70-
../clp/FileDescriptor.hpp
71-
../clp/FileDescriptor.cpp
66+
../clp/streaming_compression/zstd/Decompressor.hpp
67+
../clp/time_types.hpp
68+
../clp/type_utils.hpp
69+
../clp/type_utils.hpp
70+
../clp/utf8_utils.cpp
71+
../clp/utf8_utils.hpp
7272
)
7373

7474
set(

components/core/src/clp_s/JsonParser.cpp

Lines changed: 67 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include "JsonParser.hpp"
22

3-
#include <fstream>
43
#include <iostream>
54
#include <stack>
65

@@ -522,170 +521,109 @@ bool JsonParser::parse() {
522521
}
523522

524523
NodeType get_archive_node_type(
525-
clp::ffi::SchemaTreeNode const& node,
526-
std::pair<clp::ffi::SchemaTreeNode::id_t, std::optional<clp::ffi::Value>> p
524+
clp::ffi::SchemaTreeNode::Type ir_node_type,
525+
bool node_has_value,
526+
std::optional<clp::ffi::Value> const& node_value
527527
) {
528-
auto const node_type = node.get_type();
529528
// figure out what type the node is in archive node type
530-
NodeType archiveNodeType;
531-
switch (node_type) {
529+
NodeType archive_node_type;
530+
switch (ir_node_type) {
532531
case clp::ffi::SchemaTreeNode::Type::Int:
533-
archiveNodeType = NodeType::Integer;
532+
archive_node_type = NodeType::Integer;
534533
break;
535534
case clp::ffi::SchemaTreeNode::Type::Float:
536-
archiveNodeType = NodeType::Float;
535+
archive_node_type = NodeType::Float;
537536
break;
538537
case clp::ffi::SchemaTreeNode::Type::Bool:
539-
archiveNodeType = NodeType::Boolean;
538+
archive_node_type = NodeType::Boolean;
540539
break;
541540
case clp::ffi::SchemaTreeNode::Type::UnstructuredArray:
542-
archiveNodeType = NodeType::UnstructuredArray;
541+
archive_node_type = NodeType::UnstructuredArray;
543542
break;
544543
case clp::ffi::SchemaTreeNode::Type::Str:
545-
// std::cerr << "In str\n";
546-
if (p.second.value().is<std::string>()) {
547-
// maybe special case for date string
548-
archiveNodeType = NodeType::VarString;
544+
if (node_value->is<std::string>()) {
545+
archive_node_type = NodeType::VarString;
549546
} else {
550-
archiveNodeType = NodeType::ClpString;
547+
archive_node_type = NodeType::ClpString;
551548
}
552549
break;
553550
case clp::ffi::SchemaTreeNode::Type::Obj:
554-
// std::cerr << "In obj\n";
555-
if (p.second.has_value()) {
556-
if (p.second.value().is_null()) {
557-
// std::cout << "Found Null\n";
558-
archiveNodeType = NodeType::NullValue;
551+
if (node_has_value) {
552+
if (node_value->is_null()) {
553+
archive_node_type = NodeType::NullValue;
559554
} else {
560-
archiveNodeType = NodeType::Object;
555+
archive_node_type = NodeType::Object;
561556
}
562557
} else {
563-
archiveNodeType = NodeType::Object;
558+
archive_node_type = NodeType::Object;
564559
}
565560
break;
566561
default:
567-
archiveNodeType = NodeType::Unknown;
562+
archive_node_type = NodeType::Unknown;
568563
break;
569564
}
570-
return archiveNodeType;
565+
return archive_node_type;
571566
}
572567

573568
//
574569
int JsonParser::get_archive_node_id(
575-
std::map<std::tuple<int, NodeType>, int>& cache,
576-
int irNodeID,
577-
NodeType archiveNodeType,
578-
clp::ffi::SchemaTree const& irTree
570+
std::map<std::tuple<int32_t, NodeType>, int32_t>& ir_node_to_archive_node_map,
571+
int ir_node_id,
572+
NodeType archive_node_type,
573+
clp::ffi::SchemaTree const& ir_tree
579574
) {
580-
std::tuple<int, NodeType> key(irNodeID, archiveNodeType);
581-
if (cache.find(key) != cache.end()) {
582-
return cache[key];
575+
auto key = std::make_tuple(ir_node_id, archive_node_type);
576+
auto map_location = ir_node_to_archive_node_map.find(key);
577+
if (ir_node_to_archive_node_map.end() != map_location) {
578+
return map_location->second;
583579
}
584-
auto& currNode = irTree.get_node(irNodeID);
585-
int parent_node_id;
586-
// Found the root
587-
if (currNode.get_parent_id() == 0) {
588-
parent_node_id = 0;
589-
} else {
590-
parent_node_id
591-
= get_archive_node_id(cache, currNode.get_parent_id(), NodeType::Object, irTree);
580+
auto& curr_node = ir_tree.get_node(ir_node_id);
581+
int32_t parent_node_id{0};
582+
if (0 != curr_node.get_parent_id()) {
583+
parent_node_id = get_archive_node_id(
584+
ir_node_to_archive_node_map,
585+
curr_node.get_parent_id(),
586+
NodeType::Object,
587+
ir_tree
588+
);
592589
}
593-
std::string nodeKey
594-
= clp::ffi::validate_and_escape_utf8_string(currNode.get_key_name()).value();
595-
int curr_node_archive_id = m_archive_writer->add_node(parent_node_id, archiveNodeType, nodeKey);
596-
cache[key] = curr_node_archive_id;
597-
return curr_node_archive_id;
598-
}
599-
600-
void print_kv_log_event(KeyValuePairLogEvent const& kv) {
601-
auto const num_kv_pairs = kv.get_node_id_value_pairs().size();
602-
std::cout << "number of kv pairs: " << num_kv_pairs << std::endl;
603-
auto const& tree = kv.get_schema_tree();
604-
for (auto const& pair : kv.get_node_id_value_pairs()) {
605-
auto const& tree_node = tree.get_node(pair.first);
606-
auto const node_type = tree_node.get_type();
607-
switch (node_type) {
608-
case clp::ffi::SchemaTreeNode::Type::Int:
609-
std::cout << "Int" << std::endl;
610-
break;
611-
case clp::ffi::SchemaTreeNode::Type::Float:
612-
std::cout << "Float" << std::endl;
613-
break;
614-
case clp::ffi::SchemaTreeNode::Type::Bool:
615-
std::cout << "Bool" << std::endl;
616-
break;
617-
case clp::ffi::SchemaTreeNode::Type::Str:
618-
std::cout << "Str" << std::endl;
619-
break;
620-
case clp::ffi::SchemaTreeNode::Type::UnstructuredArray:
621-
std::cout << "UArray" << std::endl;
622-
break;
623-
case clp::ffi::SchemaTreeNode::Type::Obj:
624-
std::cout << "Obj" << std::endl;
625-
break;
626-
default:
627-
std::cout << "???" << std::endl;
628-
break;
629-
}
630-
631-
if (!pair.second.has_value()) {
632-
std::cout << "{??:\t" << pair.first << ": Node doesn't have Value ... EMPTY OBJ}\n";
633-
continue;
634-
}
635-
if (pair.second.value().is<clp::ffi::value_int_t>()) {
636-
std::cout << "{INT:\t" << pair.first << ": "
637-
<< pair.second.value().get_immutable_view<clp::ffi::value_int_t>() << "}\n";
638-
} else if (pair.second.value().is<clp::ffi::value_float_t>()) {
639-
std::cout << "{FLOAT:\t" << pair.first << ": "
640-
<< pair.second.value().get_immutable_view<clp::ffi::value_float_t>() << "}\n";
641-
} else if (pair.second.value().is<clp::ffi::value_bool_t>()) {
642-
std::cout << "{BOOL:\t" << pair.first << ": "
643-
<< pair.second.value().get_immutable_view<clp::ffi::value_bool_t>() << "}\n";
644-
} else if (pair.second.value().is<std::string>()) {
645-
std::cout << "{STRING:\t" << pair.first << ": "
646-
<< pair.second.value().get_immutable_view<std::string>() << "}\n";
647-
} else if (pair.second.value().is<clp::ir::EightByteEncodedTextAst>()) {
648-
std::cout << "{EIGHTByte:\t" << pair.first << ": \n";
649-
auto decoded = pair.second.value()
650-
.get_immutable_view<clp::ir::EightByteEncodedTextAst>()
651-
.decode_and_unparse();
652-
if (std::nullopt != decoded) {
653-
std::cout << "\t Decoded & Unparsed: " << decoded.value() << std::endl;
654-
} else {
655-
std::cout << "\tNULL\n";
656-
}
657-
std::cout << "}\n";
658-
} else if (pair.second.value().is<clp::ir::FourByteEncodedTextAst>()) {
659-
std::cout << "{FOURByte:\t" << pair.first << ": \n";
660-
auto decoded = pair.second.value()
661-
.get_immutable_view<clp::ir::FourByteEncodedTextAst>()
662-
.decode_and_unparse();
663-
if (std::nullopt != decoded) {
664-
std::cout << "\tDecoded & Unparsed: " << decoded.value() << std::endl;
665-
} else {
666-
std::cout << "\tNULL\n";
667-
}
668-
std::cout << "}\n";
669-
} else {
670-
std::cout << "Unknown Type:\t" << pair.first << "\n";
671-
}
590+
auto validated_escaped_key
591+
= clp::ffi::validate_and_escape_utf8_string(curr_node.get_key_name());
592+
std::string node_key = "";
593+
if (validated_escaped_key.has_value()) {
594+
node_key = validated_escaped_key.value();
672595
}
673-
std::cout << "after for loop\n\n\n";
596+
int curr_node_archive_id
597+
= m_archive_writer->add_node(parent_node_id, archive_node_type, node_key);
598+
ir_node_to_archive_node_map.emplace(std::move(key), curr_node_archive_id);
599+
return curr_node_archive_id;
674600
}
675601

676602
void JsonParser::parse_kv_log_event(
677603
KeyValuePairLogEvent const& kv,
678-
std::map<std::tuple<int, NodeType>, int>& cache
604+
std::map<std::tuple<int32_t, NodeType>, int32_t>& ir_node_to_archive_node_map
679605
) {
680-
auto const num_kv_pairs = kv.get_node_id_value_pairs().size();
681606
clp::ffi::SchemaTree const& tree = kv.get_schema_tree();
682607

683608
for (auto const& pair : kv.get_node_id_value_pairs()) {
684609
clp::ffi::SchemaTreeNode const& tree_node = tree.get_node(pair.first);
685-
NodeType archiveNodeType = get_archive_node_type(tree_node, pair);
686-
int node_id = get_archive_node_id(cache, pair.first, archiveNodeType, tree);
610+
clp::ffi::SchemaTreeNode::Type ir_node_type = tree_node.get_type();
611+
bool node_has_value = pair.second.has_value();
612+
NodeType archive_node_type = NodeType::Unknown;
613+
if (node_has_value) {
614+
archive_node_type
615+
= get_archive_node_type(ir_node_type, node_has_value, pair.second.value());
616+
} else {
617+
archive_node_type = get_archive_node_type(ir_node_type, node_has_value, {});
618+
}
619+
int node_id = get_archive_node_id(
620+
ir_node_to_archive_node_map,
621+
pair.first,
622+
archive_node_type,
623+
tree
624+
);
687625

688-
switch (archiveNodeType) {
626+
switch (archive_node_type) {
689627
case NodeType::Integer: {
690628
int64_t i64_value = pair.second.value().get_immutable_view<clp::ffi::value_int_t>();
691629
m_current_parsed_message.add_value(node_id, i64_value);
@@ -756,7 +694,7 @@ void JsonParser::parse_kv_log_event(
756694
}
757695

758696
bool JsonParser::parse_from_IR() {
759-
std::map<std::tuple<int, NodeType>, int> id_conversion_cache;
697+
std::map<std::tuple<int32_t, NodeType>, int32_t> ir_node_to_archive_node_map;
760698
m_archive_writer->add_node(-1, NodeType::Unknown, "root");
761699

762700
for (auto& file_path : m_file_paths) {
@@ -790,23 +728,20 @@ bool JsonParser::parse_from_IR() {
790728
m_current_schema.clear();
791729
auto const& kv_log_event = kv_log_event_result.value();
792730

793-
// print_kv_log_event(kv_log_event);
794-
parse_kv_log_event(kv_log_event, id_conversion_cache);
731+
parse_kv_log_event(kv_log_event, ir_node_to_archive_node_map);
795732

796733
m_num_messages++;
797734
if (m_archive_writer->get_data_size() >= m_target_encoded_size) {
798-
std::cerr << "Splitting Archive\n\n";
799-
id_conversion_cache.clear();
735+
ir_node_to_archive_node_map.clear();
800736
m_archive_writer->add_node(-1, NodeType::Unknown, "root");
801737
split_archive();
802738
}
803739

804740
m_current_parsed_message.clear();
805741

806-
} while (1);
807-
id_conversion_cache.clear();
742+
} while (true);
743+
ir_node_to_archive_node_map.clear();
808744
zd.close();
809-
//infile.close();
810745
}
811746
return true;
812747
}

0 commit comments

Comments
 (0)