Skip to content

Commit 7931f0f

Browse files
authored
refactor(clp): Prepare Grep code for deduplication with clp-s: (#1143)
- Move `Grep::process_raw_query` and associated utilities into a new `GrepCore` class. - Templatize dictionary-parameters in `GrepCore`. - Use dictionary IDs instead of pointers to dictionary entries in subquery generation. - Move segment matching out of `GrepCore::process_raw_query` flow. - Make callers of `GrepCore::process_raw_query` explicitly specify all wildcards in the query string. - Make callers of `GrepCore::process_raw_query` clean up the query string.
1 parent 51f1277 commit 7931f0f

File tree

16 files changed

+1202
-991
lines changed

16 files changed

+1202
-991
lines changed

components/core/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,8 @@ set(SOURCE_FILES_unitTest
579579
src/clp/GlobalSQLiteMetadataDB.hpp
580580
src/clp/Grep.cpp
581581
src/clp/Grep.hpp
582+
src/clp/GrepCore.cpp
583+
src/clp/GrepCore.hpp
582584
src/clp/hash_utils.cpp
583585
src/clp/hash_utils.hpp
584586
src/clp/ir/constants.hpp
@@ -627,6 +629,8 @@ set(SOURCE_FILES_unitTest
627629
src/clp/Profiler.hpp
628630
src/clp/Query.cpp
629631
src/clp/Query.hpp
632+
src/clp/QueryToken.cpp
633+
src/clp/QueryToken.hpp
630634
src/clp/ReaderInterface.cpp
631635
src/clp/ReaderInterface.hpp
632636
src/clp/ReadOnlyMemoryMappedFile.cpp
@@ -716,7 +720,7 @@ set(SOURCE_FILES_unitTest
716720
tests/test-ffi_KeyValuePairLogEvent.cpp
717721
tests/test-ffi_SchemaTree.cpp
718722
tests/test-FileDescriptorReader.cpp
719-
tests/test-Grep.cpp
723+
tests/test-GrepCore.cpp
720724
tests/test-hash_utils.cpp
721725
tests/test-ir_encoding_methods.cpp
722726
tests/test-ir_parsing.cpp

components/core/src/clp/EncodedVariableInterpreter.hpp

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,6 @@ class EncodedVariableInterpreter {
184184
* Encodes a string-form variable, and if it is dictionary variable, searches for its ID in the
185185
* given variable dictionary.
186186
* @tparam VariableDictionaryReaderType
187-
* @tparam VariableDictionaryEntryType
188187
* @param var_str
189188
* @param var_dict
190189
* @param ignore_case
@@ -194,9 +193,7 @@ class EncodedVariableInterpreter {
194193
* dictionary
195194
* @return false otherwise
196195
*/
197-
template <
198-
typename VariableDictionaryReaderType,
199-
typename VariableDictionaryEntryType = typename VariableDictionaryReaderType::entry_t>
196+
template <typename VariableDictionaryReaderType>
200197
static bool encode_and_search_dictionary(
201198
std::string_view var_str,
202199
VariableDictionaryReaderType const& var_dict,
@@ -437,7 +434,7 @@ bool EncodedVariableInterpreter::decode_variables_into_message(
437434
return true;
438435
}
439436

440-
template <typename VariableDictionaryReaderType, typename VariableDictionaryEntryType>
437+
template <typename VariableDictionaryReaderType>
441438
bool EncodedVariableInterpreter::encode_and_search_dictionary(
442439
std::string_view var_str,
443440
VariableDictionaryReaderType const& var_dict,
@@ -468,20 +465,18 @@ bool EncodedVariableInterpreter::encode_and_search_dictionary(
468465

469466
if (entries.size() == 1) {
470467
auto const* entry = entries.at(0);
471-
sub_query.add_dict_var(encode_var_dict_id(entry->get_id()), entry);
468+
sub_query.add_dict_var(encode_var_dict_id(entry->get_id()), entry->get_id());
472469
return true;
473470
}
474471

475-
std::unordered_set<VariableDictionaryEntryType const*> const entries_set{
476-
entries.cbegin(),
477-
entries.cend()
478-
};
479472
std::unordered_set<encoded_variable_t> encoded_vars;
473+
std::unordered_set<variable_dictionary_id_t> var_dict_ids;
480474
encoded_vars.reserve(entries.size());
481475
for (auto const* entry : entries) {
482476
encoded_vars.emplace(encode_var_dict_id(entry->get_id()));
477+
var_dict_ids.emplace(entry->get_id());
483478
}
484-
sub_query.add_imprecise_dict_var(encoded_vars, entries_set);
479+
sub_query.add_imprecise_dict_var(encoded_vars, var_dict_ids);
485480
}
486481

487482
return true;
@@ -504,11 +499,13 @@ bool EncodedVariableInterpreter::wildcard_search_dictionary_and_get_encoded_matc
504499

505500
// Encode matches
506501
std::unordered_set<encoded_variable_t> encoded_vars;
502+
std::unordered_set<variable_dictionary_id_t> var_dict_ids;
507503
for (auto entry : var_dict_entries) {
508504
encoded_vars.emplace(encode_var_dict_id(entry->get_id()));
505+
var_dict_ids.emplace(entry->get_id());
509506
}
510507

511-
sub_query.add_imprecise_dict_var(encoded_vars, var_dict_entries);
508+
sub_query.add_imprecise_dict_var(encoded_vars, var_dict_ids);
512509

513510
return true;
514511
}

0 commit comments

Comments
 (0)