Skip to content

Commit 739e084

Browse files
authored
GDV-31:[C++]Caching projectors and filters for re-use. (apache#83)
Introducing a cache to hold the projectors and filters for re-use. The cache is a LRU that can hold 100 entries.
1 parent 77e952c commit 739e084

25 files changed

Lines changed: 621 additions & 126 deletions

cpp/src/gandiva/include/gandiva/configuration.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ class Configuration {
3434
public:
3535
const std::string &byte_code_file_path() const { return byte_code_file_path_; }
3636
friend class ConfigurationBuilder;
37+
std::size_t Hash() const;
38+
bool operator==(const Configuration &other) const;
39+
bool operator!=(const Configuration &other) const;
3740

3841
private:
3942
explicit Configuration(const std::string byte_code_file_path)

cpp/src/gandiva/include/gandiva/projector.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,30 +41,30 @@ class Projector {
4141
///
4242
/// \param[in] : schema schema for the record batches, and the expressions.
4343
/// \param[in] : exprs vector of expressions.
44-
/// \param[in] : pool memory pool used to allocate output arrays (if required).
4544
/// \param[out]: projector the returned projector object
4645
static Status Make(SchemaPtr schema, const ExpressionVector &exprs,
47-
arrow::MemoryPool *pool, std::shared_ptr<Projector> *projector);
46+
std::shared_ptr<Projector> *projector);
4847

4948
/// Build a projector for the given schema to evaluate the vector of expressions.
5049
/// Customize the projector with runtime configuration.
5150
///
5251
/// \param[in] : schema schema for the record batches, and the expressions.
5352
/// \param[in] : exprs vector of expressions.
54-
/// \param[in] : pool memory pool used to allocate output arrays (if required).
5553
/// \param[in] : run time configuration.
5654
/// \param[out]: projector the returned projector object
5755
static Status Make(SchemaPtr schema, const ExpressionVector &exprs,
58-
arrow::MemoryPool *pool, std::shared_ptr<Configuration>,
56+
std::shared_ptr<Configuration>,
5957
std::shared_ptr<Projector> *projector);
6058

6159
/// Evaluate the specified record batch, and return the allocated and populated output
6260
/// arrays. The output arrays will be allocated from the memory pool 'pool', and added
6361
/// to the vector 'output'.
6462
///
6563
/// \param[in] : batch the record batch. schema should be the same as the one in 'Make'
64+
/// \param[in] : pool memory pool used to allocate output arrays (if required).
6665
/// \param[out]: output the vector of allocated/populated arrays.
67-
Status Evaluate(const arrow::RecordBatch &batch, arrow::ArrayVector *ouput);
66+
Status Evaluate(const arrow::RecordBatch &batch, arrow::MemoryPool *pool,
67+
arrow::ArrayVector *ouput);
6868

6969
/// Evaluate the specified record batch, and populate the output arrays. The output
7070
/// arrays of sufficient capacity must be allocated by the caller.
@@ -76,11 +76,11 @@ class Projector {
7676

7777
private:
7878
Projector(std::unique_ptr<LLVMGenerator> llvm_generator, SchemaPtr schema,
79-
const FieldVector &output_fields, arrow::MemoryPool *pool,
80-
std::shared_ptr<Configuration>);
79+
const FieldVector &output_fields, std::shared_ptr<Configuration>);
8180

8281
/// Allocate an ArrowData of length 'length'.
83-
Status AllocArrayData(const DataTypePtr &type, int length, ArrayDataPtr *array_data);
82+
Status AllocArrayData(const DataTypePtr &type, int length, arrow::MemoryPool *pool,
83+
ArrayDataPtr *array_data);
8484

8585
/// Validate that the ArrayData has sufficient capacity to accomodate 'num_records'.
8686
Status ValidateArrayDataCapacity(const arrow::ArrayData &array_data,
@@ -92,7 +92,6 @@ class Projector {
9292
const std::unique_ptr<LLVMGenerator> llvm_generator_;
9393
const SchemaPtr schema_;
9494
const FieldVector output_fields_;
95-
arrow::MemoryPool *pool_;
9695
const std::shared_ptr<Configuration> configuration_;
9796
};
9897

cpp/src/gandiva/integ/binary_test.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ TEST_F(TestBinary, TestSimple) {
5757

5858
// Build a projector for the expressions.
5959
std::shared_ptr<Projector> projector;
60-
Status status = Projector::Make(schema, {expr}, pool_, &projector);
60+
Status status = Projector::Make(schema, {expr}, &projector);
6161
EXPECT_TRUE(status.ok()) << status.message();
6262

6363
// Create a row-batch with some sample data
@@ -75,7 +75,7 @@ TEST_F(TestBinary, TestSimple) {
7575

7676
// Evaluate expression
7777
arrow::ArrayVector outputs;
78-
status = projector->Evaluate(*in_batch, &outputs);
78+
status = projector->Evaluate(*in_batch, pool_, &outputs);
7979
EXPECT_TRUE(status.ok());
8080

8181
// Validate results

cpp/src/gandiva/integ/boolean_expr_test.cc

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ TEST_F(TestBooleanExpr, SimpleAnd) {
5656

5757
// Build a projector for the expressions.
5858
std::shared_ptr<Projector> projector;
59-
Status status = Projector::Make(schema, {expr}, pool_, &projector);
59+
Status status = Projector::Make(schema, {expr}, &projector);
6060
EXPECT_TRUE(status.ok());
6161

6262
// FALSE_VALID && ? => FALSE_VALID
@@ -67,7 +67,7 @@ TEST_F(TestBooleanExpr, SimpleAnd) {
6767
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb});
6868

6969
arrow::ArrayVector outputs;
70-
status = projector->Evaluate(*in_batch, &outputs);
70+
status = projector->Evaluate(*in_batch, pool_, &outputs);
7171
EXPECT_TRUE(status.ok());
7272
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
7373

@@ -78,7 +78,7 @@ TEST_F(TestBooleanExpr, SimpleAnd) {
7878
exp = MakeArrowArrayBool({false, false, false, false}, {true, false, false, false});
7979
in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb});
8080
outputs.clear();
81-
projector->Evaluate(*in_batch, &outputs);
81+
projector->Evaluate(*in_batch, pool_, &outputs);
8282
EXPECT_TRUE(status.ok());
8383
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
8484

@@ -89,7 +89,7 @@ TEST_F(TestBooleanExpr, SimpleAnd) {
8989
exp = MakeArrowArrayBool({false, false, true, false}, {true, false, true, false});
9090
in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb});
9191
outputs.clear();
92-
projector->Evaluate(*in_batch, &outputs);
92+
projector->Evaluate(*in_batch, pool_, &outputs);
9393
EXPECT_TRUE(status.ok());
9494
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
9595

@@ -100,7 +100,7 @@ TEST_F(TestBooleanExpr, SimpleAnd) {
100100
exp = MakeArrowArrayBool({false, false, false, false}, {true, false, false, false});
101101
in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb});
102102
outputs.clear();
103-
projector->Evaluate(*in_batch, &outputs);
103+
projector->Evaluate(*in_batch, pool_, &outputs);
104104
EXPECT_TRUE(status.ok());
105105
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
106106
}
@@ -129,7 +129,7 @@ TEST_F(TestBooleanExpr, SimpleOr) {
129129

130130
// Build a projector for the expressions.
131131
std::shared_ptr<Projector> projector;
132-
Status status = Projector::Make(schema, {expr}, pool_, &projector);
132+
Status status = Projector::Make(schema, {expr}, &projector);
133133
EXPECT_TRUE(status.ok());
134134

135135
// TRUE_VALID && ? => TRUE_VALID
@@ -140,7 +140,7 @@ TEST_F(TestBooleanExpr, SimpleOr) {
140140
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb});
141141

142142
arrow::ArrayVector outputs;
143-
status = projector->Evaluate(*in_batch, &outputs);
143+
status = projector->Evaluate(*in_batch, pool_, &outputs);
144144
EXPECT_TRUE(status.ok());
145145
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
146146

@@ -151,7 +151,7 @@ TEST_F(TestBooleanExpr, SimpleOr) {
151151
exp = MakeArrowArrayBool({false, false, true, false}, {false, false, true, false});
152152
in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb});
153153
outputs.clear();
154-
projector->Evaluate(*in_batch, &outputs);
154+
projector->Evaluate(*in_batch, pool_, &outputs);
155155
EXPECT_TRUE(status.ok());
156156
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
157157

@@ -162,7 +162,7 @@ TEST_F(TestBooleanExpr, SimpleOr) {
162162
exp = MakeArrowArrayBool({false, false, true, false}, {true, false, true, false});
163163
in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb});
164164
outputs.clear();
165-
projector->Evaluate(*in_batch, &outputs);
165+
projector->Evaluate(*in_batch, pool_, &outputs);
166166
EXPECT_TRUE(status.ok());
167167
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
168168

@@ -173,7 +173,7 @@ TEST_F(TestBooleanExpr, SimpleOr) {
173173
exp = MakeArrowArrayBool({false, false, true, false}, {false, false, true, false});
174174
in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb});
175175
outputs.clear();
176-
projector->Evaluate(*in_batch, &outputs);
176+
projector->Evaluate(*in_batch, pool_, &outputs);
177177
EXPECT_TRUE(status.ok());
178178
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
179179
}
@@ -206,7 +206,7 @@ TEST_F(TestBooleanExpr, AndThree) {
206206

207207
// Build a projector for the expressions.
208208
std::shared_ptr<Projector> projector;
209-
Status status = Projector::Make(schema, {expr}, pool_, &projector);
209+
Status status = Projector::Make(schema, {expr}, &projector);
210210
EXPECT_TRUE(status.ok());
211211

212212
int num_records = 8;
@@ -220,7 +220,7 @@ TEST_F(TestBooleanExpr, AndThree) {
220220
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb, arrayc});
221221

222222
arrow::ArrayVector outputs;
223-
status = projector->Evaluate(*in_batch, &outputs);
223+
status = projector->Evaluate(*in_batch, pool_, &outputs);
224224
EXPECT_TRUE(status.ok());
225225
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
226226
}
@@ -253,7 +253,7 @@ TEST_F(TestBooleanExpr, OrThree) {
253253

254254
// Build a projector for the expressions.
255255
std::shared_ptr<Projector> projector;
256-
Status status = Projector::Make(schema, {expr}, pool_, &projector);
256+
Status status = Projector::Make(schema, {expr}, &projector);
257257
EXPECT_TRUE(status.ok());
258258

259259
int num_records = 8;
@@ -267,7 +267,7 @@ TEST_F(TestBooleanExpr, OrThree) {
267267
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb, arrayc});
268268

269269
arrow::ArrayVector outputs;
270-
status = projector->Evaluate(*in_batch, &outputs);
270+
status = projector->Evaluate(*in_batch, pool_, &outputs);
271271
EXPECT_TRUE(status.ok());
272272
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
273273
}
@@ -313,7 +313,7 @@ TEST_F(TestBooleanExpr, BooleanAndInsideIf) {
313313

314314
// Build a projector for the expressions.
315315
std::shared_ptr<Projector> projector;
316-
Status status = Projector::Make(schema, {expr}, pool_, &projector);
316+
Status status = Projector::Make(schema, {expr}, &projector);
317317
EXPECT_TRUE(status.ok());
318318

319319
int num_records = 4;
@@ -325,7 +325,7 @@ TEST_F(TestBooleanExpr, BooleanAndInsideIf) {
325325
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb});
326326

327327
arrow::ArrayVector outputs;
328-
status = projector->Evaluate(*in_batch, &outputs);
328+
status = projector->Evaluate(*in_batch, pool_, &outputs);
329329
EXPECT_TRUE(status.ok());
330330
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
331331
}
@@ -364,7 +364,7 @@ TEST_F(TestBooleanExpr, IfInsideBooleanAnd) {
364364

365365
// Build a projector for the expressions.
366366
std::shared_ptr<Projector> projector;
367-
Status status = Projector::Make(schema, {expr}, pool_, &projector);
367+
Status status = Projector::Make(schema, {expr}, &projector);
368368
EXPECT_TRUE(status.ok());
369369

370370
int num_records = 4;
@@ -376,7 +376,7 @@ TEST_F(TestBooleanExpr, IfInsideBooleanAnd) {
376376
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {arraya, arrayb});
377377

378378
arrow::ArrayVector outputs;
379-
status = projector->Evaluate(*in_batch, &outputs);
379+
status = projector->Evaluate(*in_batch, pool_, &outputs);
380380
EXPECT_TRUE(status.ok());
381381
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
382382
}

cpp/src/gandiva/integ/date_time_test.cc

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ TEST_F(TestProjector, TestIsNull) {
6969
auto isnotnull_expr = TreeExprBuilder::MakeExpression("isnotnull", {t0}, b0);
7070

7171
std::shared_ptr<Projector> projector;
72-
Status status =
73-
Projector::Make(schema, {isnull_expr, isnotnull_expr}, pool_, &projector);
72+
Status status = Projector::Make(schema, {isnull_expr, isnotnull_expr}, &projector);
7473
ASSERT_TRUE(status.ok());
7574

7675
int num_records = 4;
@@ -92,7 +91,7 @@ TEST_F(TestProjector, TestIsNull) {
9291

9392
// Evaluate expression
9493
arrow::ArrayVector outputs;
95-
status = projector->Evaluate(*in_batch, &outputs);
94+
status = projector->Evaluate(*in_batch, pool_, &outputs);
9695
EXPECT_TRUE(status.ok());
9796

9897
// Validate results
@@ -124,8 +123,7 @@ TEST_F(TestProjector, TestDateTime) {
124123

125124
std::shared_ptr<Projector> projector;
126125
Status status = Projector::Make(
127-
schema, {date2year_expr, date2month_expr, ts2month_expr, ts2day_expr}, pool_,
128-
&projector);
126+
schema, {date2year_expr, date2month_expr, ts2month_expr, ts2day_expr}, &projector);
129127
ASSERT_TRUE(status.ok());
130128

131129
struct tm y1970 = {0};
@@ -169,7 +167,7 @@ TEST_F(TestProjector, TestDateTime) {
169167

170168
// Evaluate expression
171169
arrow::ArrayVector outputs;
172-
status = projector->Evaluate(*in_batch, &outputs);
170+
status = projector->Evaluate(*in_batch, pool_, &outputs);
173171
EXPECT_TRUE(status.ok());
174172

175173
// Validate results
@@ -193,8 +191,7 @@ TEST_F(TestProjector, TestTime) {
193191
TreeExprBuilder::MakeExpression("extractHour", {field0}, field_hour);
194192

195193
std::shared_ptr<Projector> projector;
196-
Status status =
197-
Projector::Make(schema, {time2min_expr, time2hour_expr}, pool_, &projector);
194+
Status status = Projector::Make(schema, {time2min_expr, time2hour_expr}, &projector);
198195
ASSERT_TRUE(status.ok());
199196

200197
// create input data
@@ -218,7 +215,7 @@ TEST_F(TestProjector, TestTime) {
218215

219216
// Evaluate expression
220217
arrow::ArrayVector outputs;
221-
status = projector->Evaluate(*in_batch, &outputs);
218+
status = projector->Evaluate(*in_batch, pool_, &outputs);
222219
EXPECT_TRUE(status.ok());
223220

224221
// Validate results
@@ -262,7 +259,7 @@ TEST_F(TestProjector, TestTimestampDiff) {
262259
std::shared_ptr<Projector> projector;
263260
auto exprs = {diff_secs_expr, diff_mins_expr, diff_hours_expr, diff_days_expr,
264261
diff_weeks_expr, diff_months_expr, diff_quarters_expr, diff_years_expr};
265-
Status status = Projector::Make(schema, exprs, pool_, &projector);
262+
Status status = Projector::Make(schema, exprs, &projector);
266263
ASSERT_TRUE(status.ok());
267264

268265
struct tm y1970 = {0};
@@ -313,7 +310,7 @@ TEST_F(TestProjector, TestTimestampDiff) {
313310

314311
// Evaluate expression
315312
arrow::ArrayVector outputs;
316-
status = projector->Evaluate(*in_batch, &outputs);
313+
status = projector->Evaluate(*in_batch, pool_, &outputs);
317314
EXPECT_TRUE(status.ok());
318315

319316
// Validate results

cpp/src/gandiva/integ/filter_test.cc

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,50 @@ class TestFilter : public ::testing::Test {
3232
arrow::MemoryPool* pool_;
3333
};
3434

35+
TEST_F(TestFilter, TestFilterCache) {
36+
// schema for input fields
37+
auto field0 = field("f0", int32());
38+
auto field1 = field("f1", int32());
39+
auto schema = arrow::schema({field0, field1});
40+
41+
// Build condition f0 + f1 < 10
42+
auto node_f0 = TreeExprBuilder::MakeField(field0);
43+
auto node_f1 = TreeExprBuilder::MakeField(field1);
44+
auto sum_func =
45+
TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
46+
auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
47+
auto less_than_10 = TreeExprBuilder::MakeFunction("less_than", {sum_func, literal_10},
48+
arrow::boolean());
49+
auto condition = TreeExprBuilder::MakeCondition(less_than_10);
50+
51+
std::shared_ptr<Filter> filter;
52+
Status status = Filter::Make(schema, condition, &filter);
53+
EXPECT_TRUE(status.ok());
54+
55+
// same schema and condition, should return the same filter as above.
56+
std::shared_ptr<Filter> cached_filter;
57+
status = Filter::Make(schema, condition, &cached_filter);
58+
EXPECT_TRUE(status.ok());
59+
EXPECT_TRUE(cached_filter.get() == filter.get());
60+
61+
// schema is different should return a new filter.
62+
auto field2 = field("f2", int32());
63+
auto different_schema = arrow::schema({field0, field1, field2});
64+
std::shared_ptr<Filter> should_be_new_filter;
65+
status = Filter::Make(different_schema, condition, &should_be_new_filter);
66+
EXPECT_TRUE(status.ok());
67+
EXPECT_TRUE(cached_filter.get() != should_be_new_filter.get());
68+
69+
// condition is different, should return a new filter.
70+
auto greater_than_10 = TreeExprBuilder::MakeFunction(
71+
"greater_than", {sum_func, literal_10}, arrow::boolean());
72+
auto new_condition = TreeExprBuilder::MakeCondition(greater_than_10);
73+
std::shared_ptr<Filter> should_be_new_filter1;
74+
status = Filter::Make(schema, new_condition, &should_be_new_filter1);
75+
EXPECT_TRUE(status.ok());
76+
EXPECT_TRUE(cached_filter.get() != should_be_new_filter1.get());
77+
}
78+
3579
TEST_F(TestFilter, TestSimple) {
3680
// schema for input fields
3781
auto field0 = field("f0", int32());

0 commit comments

Comments
 (0)