Skip to content

Commit 5060441

Browse files
authored
ARROW-10392: [C++][Gandiva] Avoid string copy while evaluating IN expression (#19)
Closes apache#8530 from pprudhvi/stringalloc Authored-by: Prudhvi Porandla <[email protected]> Signed-off-by: Pindikura Ravindra <[email protected]>
1 parent 0b7c6bf commit 5060441

File tree

2 files changed

+28
-1
lines changed

2 files changed

+28
-1
lines changed

cpp/src/gandiva/gdv_function_stubs.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ bool gdv_fn_in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len,
8383
}
8484
gandiva::InHolder<std::string>* holder =
8585
reinterpret_cast<gandiva::InHolder<std::string>*>(ptr);
86-
return holder->HasValue(std::string(data, data_len));
86+
return holder->HasValue(arrow::util::string_view(data, data_len));
8787
}
8888

8989
int32_t gdv_fn_populate_varlen_vector(int64_t context_ptr, int8_t* data_ptr,

cpp/src/gandiva/in_holder.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <string>
2121
#include <unordered_set>
2222

23+
#include "arrow/util/hashing.h"
2324
#include "gandiva/arrow.h"
2425
#include "gandiva/gandiva_aliases.h"
2526

@@ -42,4 +43,30 @@ class InHolder {
4243
std::unordered_set<Type> values_;
4344
};
4445

46+
template <>
47+
class InHolder<std::string> {
48+
public:
49+
explicit InHolder(std::unordered_set<std::string> values) : values_(std::move(values)) {
50+
values_lookup_.max_load_factor(0.25f);
51+
for (const std::string& value : values_) {
52+
values_lookup_.emplace(value);
53+
}
54+
}
55+
56+
bool HasValue(arrow::util::string_view value) const {
57+
return values_lookup_.count(value) == 1;
58+
}
59+
60+
private:
61+
struct string_view_hash {
62+
public:
63+
std::size_t operator()(arrow::util::string_view v) const {
64+
return arrow::internal::ComputeStringHash<0>(v.data(), v.length());
65+
}
66+
};
67+
68+
std::unordered_set<arrow::util::string_view, string_view_hash> values_lookup_;
69+
const std::unordered_set<std::string> values_;
70+
};
71+
4572
} // namespace gandiva

0 commit comments

Comments
 (0)