|
1 | 1 | #include "JsonParser.hpp" |
2 | 2 |
|
3 | | -#include <fstream> |
4 | 3 | #include <iostream> |
5 | 4 | #include <stack> |
6 | 5 |
|
@@ -522,170 +521,109 @@ bool JsonParser::parse() { |
522 | 521 | } |
523 | 522 |
|
524 | 523 | NodeType get_archive_node_type( |
525 | | - clp::ffi::SchemaTreeNode const& node, |
526 | | - std::pair<clp::ffi::SchemaTreeNode::id_t, std::optional<clp::ffi::Value>> p |
| 524 | + clp::ffi::SchemaTreeNode::Type ir_node_type, |
| 525 | + bool node_has_value, |
| 526 | + std::optional<clp::ffi::Value> const& node_value |
527 | 527 | ) { |
528 | | - auto const node_type = node.get_type(); |
529 | 528 | // figure out what type the node is in archive node type |
530 | | - NodeType archiveNodeType; |
531 | | - switch (node_type) { |
| 529 | + NodeType archive_node_type; |
| 530 | + switch (ir_node_type) { |
532 | 531 | case clp::ffi::SchemaTreeNode::Type::Int: |
533 | | - archiveNodeType = NodeType::Integer; |
| 532 | + archive_node_type = NodeType::Integer; |
534 | 533 | break; |
535 | 534 | case clp::ffi::SchemaTreeNode::Type::Float: |
536 | | - archiveNodeType = NodeType::Float; |
| 535 | + archive_node_type = NodeType::Float; |
537 | 536 | break; |
538 | 537 | case clp::ffi::SchemaTreeNode::Type::Bool: |
539 | | - archiveNodeType = NodeType::Boolean; |
| 538 | + archive_node_type = NodeType::Boolean; |
540 | 539 | break; |
541 | 540 | case clp::ffi::SchemaTreeNode::Type::UnstructuredArray: |
542 | | - archiveNodeType = NodeType::UnstructuredArray; |
| 541 | + archive_node_type = NodeType::UnstructuredArray; |
543 | 542 | break; |
544 | 543 | case clp::ffi::SchemaTreeNode::Type::Str: |
545 | | - // std::cerr << "In str\n"; |
546 | | - if (p.second.value().is<std::string>()) { |
547 | | - // maybe special case for date string |
548 | | - archiveNodeType = NodeType::VarString; |
| 544 | + if (node_value->is<std::string>()) { |
| 545 | + archive_node_type = NodeType::VarString; |
549 | 546 | } else { |
550 | | - archiveNodeType = NodeType::ClpString; |
| 547 | + archive_node_type = NodeType::ClpString; |
551 | 548 | } |
552 | 549 | break; |
553 | 550 | case clp::ffi::SchemaTreeNode::Type::Obj: |
554 | | - // std::cerr << "In obj\n"; |
555 | | - if (p.second.has_value()) { |
556 | | - if (p.second.value().is_null()) { |
557 | | - // std::cout << "Found Null\n"; |
558 | | - archiveNodeType = NodeType::NullValue; |
| 551 | + if (node_has_value) { |
| 552 | + if (node_value->is_null()) { |
| 553 | + archive_node_type = NodeType::NullValue; |
559 | 554 | } else { |
560 | | - archiveNodeType = NodeType::Object; |
| 555 | + archive_node_type = NodeType::Object; |
561 | 556 | } |
562 | 557 | } else { |
563 | | - archiveNodeType = NodeType::Object; |
| 558 | + archive_node_type = NodeType::Object; |
564 | 559 | } |
565 | 560 | break; |
566 | 561 | default: |
567 | | - archiveNodeType = NodeType::Unknown; |
| 562 | + archive_node_type = NodeType::Unknown; |
568 | 563 | break; |
569 | 564 | } |
570 | | - return archiveNodeType; |
| 565 | + return archive_node_type; |
571 | 566 | } |
572 | 567 |
|
573 | 568 | // |
574 | 569 | int JsonParser::get_archive_node_id( |
575 | | - std::map<std::tuple<int, NodeType>, int>& cache, |
576 | | - int irNodeID, |
577 | | - NodeType archiveNodeType, |
578 | | - clp::ffi::SchemaTree const& irTree |
| 570 | + std::map<std::tuple<int32_t, NodeType>, int32_t>& ir_node_to_archive_node_map, |
| 571 | + int ir_node_id, |
| 572 | + NodeType archive_node_type, |
| 573 | + clp::ffi::SchemaTree const& ir_tree |
579 | 574 | ) { |
580 | | - std::tuple<int, NodeType> key(irNodeID, archiveNodeType); |
581 | | - if (cache.find(key) != cache.end()) { |
582 | | - return cache[key]; |
| 575 | + auto key = std::make_tuple(ir_node_id, archive_node_type); |
| 576 | + auto map_location = ir_node_to_archive_node_map.find(key); |
| 577 | + if (ir_node_to_archive_node_map.end() != map_location) { |
| 578 | + return map_location->second; |
583 | 579 | } |
584 | | - auto& currNode = irTree.get_node(irNodeID); |
585 | | - int parent_node_id; |
586 | | - // Found the root |
587 | | - if (currNode.get_parent_id() == 0) { |
588 | | - parent_node_id = 0; |
589 | | - } else { |
590 | | - parent_node_id |
591 | | - = get_archive_node_id(cache, currNode.get_parent_id(), NodeType::Object, irTree); |
| 580 | + auto& curr_node = ir_tree.get_node(ir_node_id); |
| 581 | + int32_t parent_node_id{0}; |
| 582 | + if (0 != curr_node.get_parent_id()) { |
| 583 | + parent_node_id = get_archive_node_id( |
| 584 | + ir_node_to_archive_node_map, |
| 585 | + curr_node.get_parent_id(), |
| 586 | + NodeType::Object, |
| 587 | + ir_tree |
| 588 | + ); |
592 | 589 | } |
593 | | - std::string nodeKey |
594 | | - = clp::ffi::validate_and_escape_utf8_string(currNode.get_key_name()).value(); |
595 | | - int curr_node_archive_id = m_archive_writer->add_node(parent_node_id, archiveNodeType, nodeKey); |
596 | | - cache[key] = curr_node_archive_id; |
597 | | - return curr_node_archive_id; |
598 | | -} |
599 | | - |
600 | | -void print_kv_log_event(KeyValuePairLogEvent const& kv) { |
601 | | - auto const num_kv_pairs = kv.get_node_id_value_pairs().size(); |
602 | | - std::cout << "number of kv pairs: " << num_kv_pairs << std::endl; |
603 | | - auto const& tree = kv.get_schema_tree(); |
604 | | - for (auto const& pair : kv.get_node_id_value_pairs()) { |
605 | | - auto const& tree_node = tree.get_node(pair.first); |
606 | | - auto const node_type = tree_node.get_type(); |
607 | | - switch (node_type) { |
608 | | - case clp::ffi::SchemaTreeNode::Type::Int: |
609 | | - std::cout << "Int" << std::endl; |
610 | | - break; |
611 | | - case clp::ffi::SchemaTreeNode::Type::Float: |
612 | | - std::cout << "Float" << std::endl; |
613 | | - break; |
614 | | - case clp::ffi::SchemaTreeNode::Type::Bool: |
615 | | - std::cout << "Bool" << std::endl; |
616 | | - break; |
617 | | - case clp::ffi::SchemaTreeNode::Type::Str: |
618 | | - std::cout << "Str" << std::endl; |
619 | | - break; |
620 | | - case clp::ffi::SchemaTreeNode::Type::UnstructuredArray: |
621 | | - std::cout << "UArray" << std::endl; |
622 | | - break; |
623 | | - case clp::ffi::SchemaTreeNode::Type::Obj: |
624 | | - std::cout << "Obj" << std::endl; |
625 | | - break; |
626 | | - default: |
627 | | - std::cout << "???" << std::endl; |
628 | | - break; |
629 | | - } |
630 | | - |
631 | | - if (!pair.second.has_value()) { |
632 | | - std::cout << "{??:\t" << pair.first << ": Node doesn't have Value ... EMPTY OBJ}\n"; |
633 | | - continue; |
634 | | - } |
635 | | - if (pair.second.value().is<clp::ffi::value_int_t>()) { |
636 | | - std::cout << "{INT:\t" << pair.first << ": " |
637 | | - << pair.second.value().get_immutable_view<clp::ffi::value_int_t>() << "}\n"; |
638 | | - } else if (pair.second.value().is<clp::ffi::value_float_t>()) { |
639 | | - std::cout << "{FLOAT:\t" << pair.first << ": " |
640 | | - << pair.second.value().get_immutable_view<clp::ffi::value_float_t>() << "}\n"; |
641 | | - } else if (pair.second.value().is<clp::ffi::value_bool_t>()) { |
642 | | - std::cout << "{BOOL:\t" << pair.first << ": " |
643 | | - << pair.second.value().get_immutable_view<clp::ffi::value_bool_t>() << "}\n"; |
644 | | - } else if (pair.second.value().is<std::string>()) { |
645 | | - std::cout << "{STRING:\t" << pair.first << ": " |
646 | | - << pair.second.value().get_immutable_view<std::string>() << "}\n"; |
647 | | - } else if (pair.second.value().is<clp::ir::EightByteEncodedTextAst>()) { |
648 | | - std::cout << "{EIGHTByte:\t" << pair.first << ": \n"; |
649 | | - auto decoded = pair.second.value() |
650 | | - .get_immutable_view<clp::ir::EightByteEncodedTextAst>() |
651 | | - .decode_and_unparse(); |
652 | | - if (std::nullopt != decoded) { |
653 | | - std::cout << "\t Decoded & Unparsed: " << decoded.value() << std::endl; |
654 | | - } else { |
655 | | - std::cout << "\tNULL\n"; |
656 | | - } |
657 | | - std::cout << "}\n"; |
658 | | - } else if (pair.second.value().is<clp::ir::FourByteEncodedTextAst>()) { |
659 | | - std::cout << "{FOURByte:\t" << pair.first << ": \n"; |
660 | | - auto decoded = pair.second.value() |
661 | | - .get_immutable_view<clp::ir::FourByteEncodedTextAst>() |
662 | | - .decode_and_unparse(); |
663 | | - if (std::nullopt != decoded) { |
664 | | - std::cout << "\tDecoded & Unparsed: " << decoded.value() << std::endl; |
665 | | - } else { |
666 | | - std::cout << "\tNULL\n"; |
667 | | - } |
668 | | - std::cout << "}\n"; |
669 | | - } else { |
670 | | - std::cout << "Unknown Type:\t" << pair.first << "\n"; |
671 | | - } |
| 590 | + auto validated_escaped_key |
| 591 | + = clp::ffi::validate_and_escape_utf8_string(curr_node.get_key_name()); |
| 592 | + std::string node_key = ""; |
| 593 | + if (validated_escaped_key.has_value()) { |
| 594 | + node_key = validated_escaped_key.value(); |
672 | 595 | } |
673 | | - std::cout << "after for loop\n\n\n"; |
| 596 | + int curr_node_archive_id |
| 597 | + = m_archive_writer->add_node(parent_node_id, archive_node_type, node_key); |
| 598 | + ir_node_to_archive_node_map.emplace(std::move(key), curr_node_archive_id); |
| 599 | + return curr_node_archive_id; |
674 | 600 | } |
675 | 601 |
|
676 | 602 | void JsonParser::parse_kv_log_event( |
677 | 603 | KeyValuePairLogEvent const& kv, |
678 | | - std::map<std::tuple<int, NodeType>, int>& cache |
| 604 | + std::map<std::tuple<int32_t, NodeType>, int32_t>& ir_node_to_archive_node_map |
679 | 605 | ) { |
680 | | - auto const num_kv_pairs = kv.get_node_id_value_pairs().size(); |
681 | 606 | clp::ffi::SchemaTree const& tree = kv.get_schema_tree(); |
682 | 607 |
|
683 | 608 | for (auto const& pair : kv.get_node_id_value_pairs()) { |
684 | 609 | clp::ffi::SchemaTreeNode const& tree_node = tree.get_node(pair.first); |
685 | | - NodeType archiveNodeType = get_archive_node_type(tree_node, pair); |
686 | | - int node_id = get_archive_node_id(cache, pair.first, archiveNodeType, tree); |
| 610 | + clp::ffi::SchemaTreeNode::Type ir_node_type = tree_node.get_type(); |
| 611 | + bool node_has_value = pair.second.has_value(); |
| 612 | + NodeType archive_node_type = NodeType::Unknown; |
| 613 | + if (node_has_value) { |
| 614 | + archive_node_type |
| 615 | + = get_archive_node_type(ir_node_type, node_has_value, pair.second.value()); |
| 616 | + } else { |
| 617 | + archive_node_type = get_archive_node_type(ir_node_type, node_has_value, {}); |
| 618 | + } |
| 619 | + int node_id = get_archive_node_id( |
| 620 | + ir_node_to_archive_node_map, |
| 621 | + pair.first, |
| 622 | + archive_node_type, |
| 623 | + tree |
| 624 | + ); |
687 | 625 |
|
688 | | - switch (archiveNodeType) { |
| 626 | + switch (archive_node_type) { |
689 | 627 | case NodeType::Integer: { |
690 | 628 | int64_t i64_value = pair.second.value().get_immutable_view<clp::ffi::value_int_t>(); |
691 | 629 | m_current_parsed_message.add_value(node_id, i64_value); |
@@ -756,7 +694,7 @@ void JsonParser::parse_kv_log_event( |
756 | 694 | } |
757 | 695 |
|
758 | 696 | bool JsonParser::parse_from_IR() { |
759 | | - std::map<std::tuple<int, NodeType>, int> id_conversion_cache; |
| 697 | + std::map<std::tuple<int32_t, NodeType>, int32_t> ir_node_to_archive_node_map; |
760 | 698 | m_archive_writer->add_node(-1, NodeType::Unknown, "root"); |
761 | 699 |
|
762 | 700 | for (auto& file_path : m_file_paths) { |
@@ -790,23 +728,20 @@ bool JsonParser::parse_from_IR() { |
790 | 728 | m_current_schema.clear(); |
791 | 729 | auto const& kv_log_event = kv_log_event_result.value(); |
792 | 730 |
|
793 | | - // print_kv_log_event(kv_log_event); |
794 | | - parse_kv_log_event(kv_log_event, id_conversion_cache); |
| 731 | + parse_kv_log_event(kv_log_event, ir_node_to_archive_node_map); |
795 | 732 |
|
796 | 733 | m_num_messages++; |
797 | 734 | if (m_archive_writer->get_data_size() >= m_target_encoded_size) { |
798 | | - std::cerr << "Splitting Archive\n\n"; |
799 | | - id_conversion_cache.clear(); |
| 735 | + ir_node_to_archive_node_map.clear(); |
800 | 736 | m_archive_writer->add_node(-1, NodeType::Unknown, "root"); |
801 | 737 | split_archive(); |
802 | 738 | } |
803 | 739 |
|
804 | 740 | m_current_parsed_message.clear(); |
805 | 741 |
|
806 | | - } while (1); |
807 | | - id_conversion_cache.clear(); |
| 742 | + } while (true); |
| 743 | + ir_node_to_archive_node_map.clear(); |
808 | 744 | zd.close(); |
809 | | - //infile.close(); |
810 | 745 | } |
811 | 746 | return true; |
812 | 747 | } |
|
0 commit comments