Skip to content

Commit 1958026

Browse files
committed
Merge remote-tracking branch 'upstream/main' into ordered-compression
2 parents efd9218 + 53c4f52 commit 1958026

File tree

15 files changed

+129
-25
lines changed

15 files changed

+129
-25
lines changed

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
<!--
2-
Set the PR title to a meaningful commit message in imperative form. E.g.:
3-
4-
clp-s: Don't add implicit wildcards ('*') at the beginning and the end of a query (fixes #390).
2+
Set the PR title to a meaningful commit message that:
3+
- follows the Conventional Commits specification (https://www.conventionalcommits.org).
4+
- is in imperative form.
5+
Example:
6+
fix: Don't add implicit wildcards ('*') at the beginning and the end of a query (fixes #390).
57
-->
68

79
# Description
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: "clp-pr-title-checks"
2+
3+
on:
4+
pull_request_target:
5+
types: ["edited", "opened", "reopened"]
6+
branches: ["main"]
7+
8+
concurrency:
9+
group: "${{github.workflow}}-${{github.ref}}"
10+
11+
# Cancel in-progress jobs for efficiency
12+
cancel-in-progress: true
13+
14+
jobs:
15+
conventional-commits:
16+
permissions:
17+
# For amannn/action-semantic-pull-request
18+
pull-requests: "read"
19+
runs-on: "ubuntu-latest"
20+
steps:
21+
- uses: "amannn/action-semantic-pull-request@v5"
22+
env:
23+
GITHUB_TOKEN: "${{secrets.GITHUB_TOKEN}}"

components/core/src/clp/ffi/ir_stream/Serializer.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
namespace clp::ffi::ir_stream {
1616
/**
17-
* A work-in-progress class for serializing log events into the kv-pair IR format.
17+
* Class for serializing log events into the kv-pair IR format.
1818
*
1919
* This class:
2020
* - maintains all necessary internal data structures to track serialization state;

components/core/src/clp_s/JsonParser.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,12 @@ JsonParser::JsonParser(JsonParserOption const& option)
2121
}
2222

2323
if (false == m_timestamp_key.empty()) {
24-
clp_s::StringUtils::tokenize_column_descriptor(m_timestamp_key, m_timestamp_column);
24+
if (false
25+
== clp_s::StringUtils::tokenize_column_descriptor(m_timestamp_key, m_timestamp_column))
26+
{
27+
SPDLOG_ERROR("Can not parse invalid timestamp key: \"{}\"", m_timestamp_key);
28+
throw OperationFailed(ErrorCodeBadParam, __FILENAME__, __LINE__);
29+
}
2530
}
2631

2732
for (auto& file_path : option.file_paths) {

components/core/src/clp_s/TimestampDictionaryReader.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ void TimestampDictionaryReader::read_new_entries() {
4444
TimestampEntry entry;
4545
std::vector<std::string> tokens;
4646
entry.try_read_from_file(m_dictionary_decompressor);
47-
StringUtils::tokenize_column_descriptor(entry.get_key_name(), tokens);
47+
if (false == StringUtils::tokenize_column_descriptor(entry.get_key_name(), tokens)) {
48+
throw OperationFailed(ErrorCodeCorrupt, __FILENAME__, __LINE__);
49+
}
4850
m_entries.emplace_back(std::move(entry));
4951

5052
// TODO: Currently, we only allow a single authoritative timestamp column at ingestion time,

components/core/src/clp_s/Utils.cpp

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -427,18 +427,34 @@ bool StringUtils::convert_string_to_double(std::string const& raw, double& conve
427427
return true;
428428
}
429429

430-
void StringUtils::tokenize_column_descriptor(
430+
bool StringUtils::tokenize_column_descriptor(
431431
std::string const& descriptor,
432432
std::vector<std::string>& tokens
433433
) {
434-
// TODO: handle escaped . correctly
435-
auto start = 0U;
436-
auto end = descriptor.find('.');
437-
while (end != std::string::npos) {
438-
tokens.push_back(descriptor.substr(start, end - start));
439-
start = end + 1;
440-
end = descriptor.find('.', start);
434+
// TODO: add support for unicode sequences e.g. \u263A
435+
std::string cur_tok;
436+
for (size_t cur = 0; cur < descriptor.size(); ++cur) {
437+
if ('\\' == descriptor[cur]) {
438+
++cur;
439+
if (cur >= descriptor.size()) {
440+
return false;
441+
}
442+
} else if ('.' == descriptor[cur]) {
443+
if (cur_tok.empty()) {
444+
return false;
445+
}
446+
tokens.push_back(cur_tok);
447+
cur_tok.clear();
448+
continue;
449+
}
450+
cur_tok.push_back(descriptor[cur]);
441451
}
442-
tokens.push_back(descriptor.substr(start));
452+
453+
if (cur_tok.empty()) {
454+
return false;
455+
}
456+
457+
tokens.push_back(cur_tok);
458+
return true;
443459
}
444460
} // namespace clp_s

components/core/src/clp_s/Utils.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,9 +211,9 @@ class StringUtils {
211211
* Converts a string column descriptor delimited by '.' into a list of tokens
212212
* @param descriptor
213213
* @param tokens
214-
* @return the list of tokens pushed into the 'tokens' parameter
214+
* @return true if the descriptor was tokenized successfully, false otherwise
215215
*/
216-
static void
216+
[[nodiscard]] static bool
217217
tokenize_column_descriptor(std::string const& descriptor, std::vector<std::string>& tokens);
218218

219219
private:

components/core/src/clp_s/clp-s.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,10 @@ bool search_archive(
191191
try {
192192
for (auto const& column : command_line_arguments.get_projection_columns()) {
193193
std::vector<std::string> descriptor_tokens;
194-
StringUtils::tokenize_column_descriptor(column, descriptor_tokens);
194+
if (false == StringUtils::tokenize_column_descriptor(column, descriptor_tokens)) {
195+
SPDLOG_ERROR("Can not tokenize invalid column: \"{}\"", column);
196+
return false;
197+
}
195198
projection->add_column(ColumnDescriptor::create(descriptor_tokens));
196199
}
197200
} catch (clp_s::TraceableException& e) {

components/core/src/clp_s/search/kql/Kql.g4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ fragment ESCAPED_SPACE
9696
;
9797
9898
fragment SPECIAL_CHARACTER
99-
: [\\():<>"*?{}]
99+
: [\\():<>"*?{}.]
100100
;
101101

102102

components/core/src/clp_s/search/kql/kql.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,10 @@ class ParseTreeVisitor : public KqlBaseVisitor {
112112
std::string column = unquote_string(ctx->LITERAL()->getText());
113113

114114
std::vector<std::string> descriptor_tokens;
115-
StringUtils::tokenize_column_descriptor(column, descriptor_tokens);
115+
if (false == StringUtils::tokenize_column_descriptor(column, descriptor_tokens)) {
116+
SPDLOG_ERROR("Can not tokenize invalid column: \"{}\"", column);
117+
return nullptr;
118+
}
116119

117120
return ColumnDescriptor::create(descriptor_tokens);
118121
}
@@ -248,6 +251,10 @@ std::shared_ptr<Expression> parse_kql_expression(std::istream& in) {
248251
}
249252

250253
ParseTreeVisitor visitor;
251-
return std::any_cast<std::shared_ptr<Expression>>(visitor.visitStart(tree));
254+
try {
255+
return std::any_cast<std::shared_ptr<Expression>>(visitor.visitStart(tree));
256+
} catch (std::exception& e) {
257+
return {};
258+
}
252259
}
253260
} // namespace clp_s::search::kql

0 commit comments

Comments
 (0)