Skip to content

Commit ebb73ad

Browse files
authored
GDV-55: [C++] Added validation to projector build. (apache#33)
Validating the input schema and expressions during the projector build.
1 parent 8a266c1 commit ebb73ad

16 files changed

+561
-31
lines changed

src/gandiva/src/cpp/include/gandiva/status.h

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,16 @@
1919
#define GANDIVA_STATUS_H
2020

2121
#include <string>
22+
#include <sstream>
2223
#include <utility>
2324

2425
#define GANDIVA_RETURN_NOT_OK(status) \
2526
do { \
2627
Status _status = (status); \
2728
if (!_status.ok()) { \
2829
std::stringstream ss; \
29-
ss << __FILE__ << ":" << __LINE__ << " code: " << #status << "\n" << _status.message(); \
30+
ss << __FILE__ << ":" << __LINE__ << " code: " << _status.CodeAsString() \
31+
<< " \n " << _status.message(); \
3032
return Status(_status.code(), ss.str()); \
3133
} \
3234
} while (0)
@@ -36,7 +38,8 @@ do {
3638
if (!condition) { \
3739
Status _status = (status); \
3840
std::stringstream ss; \
39-
ss << __FILE__ << ":" << __LINE__ << " code: " << #status << "\n" << _status.message(); \
41+
ss << __FILE__ << ":" << __LINE__ << " code: " << _status.CodeAsString() \
42+
<< " \n " << _status.message(); \
4043
return Status(_status.code(), ss.str()); \
4144
} \
4245
} while (0)
@@ -56,6 +59,7 @@ enum class StatusCode : char {
5659
Invalid = 1,
5760
CodeGenError = 2,
5861
ArrowError = 3,
62+
ExpressionValidationError = 4,
5963
};
6064

6165
class Status {
@@ -92,11 +96,26 @@ class Status {
9296
return Status(StatusCode::Invalid, msg);
9397
}
9498

99+
static Status ArrowError(const std::string& msg) {
100+
return Status(StatusCode::ArrowError, msg);
101+
}
102+
103+
static Status ExpressionValidationError(const std::string& msg) {
104+
return Status(StatusCode::ExpressionValidationError, msg);
105+
}
106+
107+
95108
// Returns true if the status indicates success.
96109
bool ok() const { return (state_ == NULL); }
97110

98111
bool IsCodeGenError() const { return code() == StatusCode::CodeGenError; }
99112

113+
bool IsInvalid() const { return code() == StatusCode::Invalid; }
114+
115+
bool IsArrowError() const {return code() == StatusCode::ArrowError; }
116+
117+
bool IsExpressionValidationError() const {return code() == StatusCode::ExpressionValidationError; }
118+
100119
// Return a string representation of this status suitable for printing.
101120
// Returns the string "OK" for success.
102121
std::string ToString() const;
@@ -177,4 +196,4 @@ inline Status& Status::operator&=(Status&& s) {
177196
}
178197

179198
} // namespace gandiva
180-
#endif // GANDIVA_STATUS_H
199+
#endif // GANDIVA_STATUS_H

src/gandiva/src/cpp/include/gandiva/tree_expr_builder.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,25 +34,30 @@ class TreeExprBuilder {
3434
static NodePtr MakeLiteral(double value);
3535

3636
/// \brief create a node on arrow field.
37+
/// returns null if input is null.
3738
static NodePtr MakeField(FieldPtr field);
3839

3940
/// \brief create a node with a function.
41+
/// returns null if return_type is null
4042
static NodePtr MakeFunction(const std::string &name,
4143
const NodeVector &children,
4244
DataTypePtr return_type);
4345

44-
/// \brief Create a node with an if-else expression.
46+
/// \brief create a node with an if-else expression.
47+
/// returns null if any of the inputs is null.
4548
static NodePtr MakeIf(NodePtr condition,
4649
NodePtr this_node,
4750
NodePtr else_node,
4851
DataTypePtr result_type);
4952

5053
/// \brief create an expression with the specified root_node, and the
5154
/// result written to result_field.
55+
/// returns null if the result_field is null.
5256
static ExpressionPtr MakeExpression(NodePtr root_node,
5357
FieldPtr result_field);
5458

5559
/// \brief convenience function for simple function expressions.
60+
/// returns null if the out_field is null.
5661
static ExpressionPtr MakeExpression(const std::string &function,
5762
const FieldVector &in_fields,
5863
FieldPtr out_field);

src/gandiva/src/cpp/integ/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ project(gandiva)
1717
add_gandiva_integ_test(projector_test.cc)
1818
add_gandiva_integ_test(if_expr_test.cc)
1919
add_gandiva_integ_test(literal_test.cc)
20+
add_gandiva_integ_test(projector_build_validation_test.cc)
Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
// Copyright (C) 2017-2018 Dremio Corporation
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include <gtest/gtest.h>
16+
#include "arrow/memory_pool.h"
17+
#include "integ/test_util.h"
18+
#include "gandiva/projector.h"
19+
#include "gandiva/tree_expr_builder.h"
20+
21+
namespace gandiva {
22+
23+
using arrow::int32;
24+
using arrow::float32;
25+
using arrow::boolean;
26+
27+
class TestProjector : public ::testing::Test {
28+
public:
29+
void SetUp() { pool_ = arrow::default_memory_pool(); }
30+
31+
protected:
32+
arrow::MemoryPool* pool_;
33+
};
34+
35+
TEST_F(TestProjector, TestNonExistentFunction) {
36+
// schema for input fields
37+
auto field0 = field("f0", float32());
38+
auto field1 = field("f2", float32());
39+
auto schema = arrow::schema({field0, field1});
40+
41+
// output fields
42+
auto field_result = field("res", boolean());
43+
44+
// Build expression
45+
auto lt_expr = TreeExprBuilder::MakeExpression("non_existent_function",
46+
{field0, field1}, field_result);
47+
48+
// Build a projector for the expressions.
49+
std::shared_ptr<Projector> projector;
50+
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
51+
EXPECT_TRUE(status.IsExpressionValidationError());
52+
std::string expected_error =
53+
"Function bool non_existent_function(float, float) not supported yet.";
54+
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
55+
}
56+
57+
TEST_F(TestProjector, TestNotMatchingDataType) {
58+
// schema for input fields
59+
auto field0 = field("f0", float32());
60+
auto schema = arrow::schema({field0});
61+
62+
// output fields
63+
auto field_result = field("res", boolean());
64+
65+
// Build expression
66+
auto node_f0 = TreeExprBuilder::MakeField(field0);
67+
auto lt_expr = TreeExprBuilder::MakeExpression(node_f0, field_result);
68+
69+
// Build a projector for the expressions.
70+
std::shared_ptr<Projector> projector;
71+
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
72+
EXPECT_TRUE(status.IsExpressionValidationError());
73+
std::string expected_error =
74+
"Return type of root node float does not match that of expression bool";
75+
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
76+
}
77+
78+
TEST_F(TestProjector, TestNotSupportedDataType) {
79+
// schema for input fields
80+
auto field0 = field("f0", list(int32()));
81+
auto schema = arrow::schema({field0});
82+
83+
// output fields
84+
auto field_result = field("res", list(int32()));
85+
86+
// Build expression
87+
auto node_f0 = TreeExprBuilder::MakeField(field0);
88+
auto lt_expr = TreeExprBuilder::MakeExpression(node_f0, field_result);
89+
90+
// Build a projector for the expressions.
91+
std::shared_ptr<Projector> projector;
92+
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
93+
EXPECT_TRUE(status.IsExpressionValidationError());
94+
std::string expected_error = "Field f0 has unsupported data type list";
95+
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
96+
}
97+
98+
TEST_F(TestProjector, TestIncorrectSchemaMissingField) {
99+
// schema for input fields
100+
auto field0 = field("f0", float32());
101+
auto field1 = field("f2", float32());
102+
auto schema = arrow::schema({field0, field0});
103+
104+
// output fields
105+
auto field_result = field("res", boolean());
106+
107+
// Build expression
108+
auto lt_expr = TreeExprBuilder::MakeExpression("less_than",
109+
{field0, field1}, field_result);
110+
111+
// Build a projector for the expressions.
112+
std::shared_ptr<Projector> projector;
113+
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
114+
EXPECT_TRUE(status.IsExpressionValidationError());
115+
std::string expected_error = "Field f2 not in schema";
116+
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
117+
}
118+
119+
TEST_F(TestProjector, TestIncorrectSchemaTypeNotMatching) {
120+
// schema for input fields
121+
auto field0 = field("f0", float32());
122+
auto field1 = field("f2", float32());
123+
auto field2 = field("f2", int32());
124+
auto schema = arrow::schema({field0, field2});
125+
126+
// output fields
127+
auto field_result = field("res", boolean());
128+
129+
// Build expression
130+
auto lt_expr = TreeExprBuilder::MakeExpression("less_than",
131+
{field0, field1}, field_result);
132+
133+
// Build a projector for the expressions.
134+
std::shared_ptr<Projector> projector;
135+
Status status = Projector::Make(schema, {lt_expr}, pool_, &projector);
136+
EXPECT_TRUE(status.IsExpressionValidationError());
137+
std::cout<<status.message();
138+
std::string expected_error =
139+
"Field definition in schema f2: int32 different from field in expression f2: float";
140+
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
141+
}
142+
143+
TEST_F(TestProjector, TestIfNotSupportedFunction) {
144+
// schema for input fields
145+
auto fielda = field("a", int32());
146+
auto fieldb = field("b", int32());
147+
auto schema = arrow::schema({fielda, fieldb});
148+
149+
// output fields
150+
auto field_result = field("res", int32());
151+
152+
// build expression.
153+
// if (a > b)
154+
// a
155+
// else
156+
// b
157+
auto node_a = TreeExprBuilder::MakeField(fielda);
158+
auto node_b = TreeExprBuilder::MakeField(fieldb);
159+
auto condition = TreeExprBuilder::MakeFunction("non_existent_function",
160+
{node_a, node_b},
161+
boolean());
162+
auto if_node = TreeExprBuilder::MakeIf(condition, node_a, node_b, int32());
163+
164+
auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);
165+
166+
// Build a projector for the expressions.
167+
std::shared_ptr<Projector> projector;
168+
Status status = Projector::Make(schema, {expr}, pool_, &projector);
169+
EXPECT_TRUE(status.IsExpressionValidationError());
170+
}
171+
172+
TEST_F(TestProjector, TestIfNotMatchingReturnType) {
173+
// schema for input fields
174+
auto fielda = field("a", int32());
175+
auto fieldb = field("b", int32());
176+
auto schema = arrow::schema({fielda, fieldb});
177+
178+
// output fields
179+
auto field_result = field("res", int32());
180+
181+
182+
auto node_a = TreeExprBuilder::MakeField(fielda);
183+
auto node_b = TreeExprBuilder::MakeField(fieldb);
184+
auto condition = TreeExprBuilder::MakeFunction("less_than",
185+
{node_a, node_b},
186+
boolean());
187+
auto if_node = TreeExprBuilder::MakeIf(condition, node_a, node_b, boolean());
188+
189+
auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);
190+
191+
// Build a projector for the expressions.
192+
std::shared_ptr<Projector> projector;
193+
Status status = Projector::Make(schema, {expr}, pool_, &projector);
194+
EXPECT_TRUE(status.IsExpressionValidationError());
195+
std::string expected_error =
196+
"Return type of if bool and then int32 not matching.";
197+
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
198+
}
199+
200+
TEST_F(TestProjector, TestElseNotMatchingReturnType) {
201+
// schema for input fields
202+
auto fielda = field("a", int32());
203+
auto fieldb = field("b", int32());
204+
auto fieldc = field("c", boolean());
205+
auto schema = arrow::schema({fielda, fieldb, fieldc});
206+
207+
// output fields
208+
auto field_result = field("res", int32());
209+
210+
211+
auto node_a = TreeExprBuilder::MakeField(fielda);
212+
auto node_b = TreeExprBuilder::MakeField(fieldb);
213+
auto node_c = TreeExprBuilder::MakeField(fieldc);
214+
auto condition = TreeExprBuilder::MakeFunction("less_than",
215+
{node_a, node_b},
216+
boolean());
217+
auto if_node = TreeExprBuilder::MakeIf(condition, node_a, node_c, int32());
218+
219+
auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);
220+
221+
// Build a projector for the expressions.
222+
std::shared_ptr<Projector> projector;
223+
Status status = Projector::Make(schema, {expr}, pool_, &projector);
224+
EXPECT_TRUE(status.IsExpressionValidationError());
225+
std::string expected_error =
226+
"Return type of if int32 and else bool not matching.";
227+
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
228+
}
229+
230+
TEST_F(TestProjector, TestElseNotSupportedType) {
231+
// schema for input fields
232+
auto fielda = field("a", int32());
233+
auto fieldb = field("b", int32());
234+
auto fieldc = field("c", list(int32()));
235+
auto schema = arrow::schema({fielda, fieldb});
236+
237+
// output fields
238+
auto field_result = field("res", int32());
239+
240+
241+
auto node_a = TreeExprBuilder::MakeField(fielda);
242+
auto node_b = TreeExprBuilder::MakeField(fieldb);
243+
auto node_c = TreeExprBuilder::MakeField(fieldc);
244+
auto condition = TreeExprBuilder::MakeFunction("less_than",
245+
{node_a, node_b},
246+
boolean());
247+
auto if_node = TreeExprBuilder::MakeIf(condition, node_a, node_c, int32());
248+
249+
auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);
250+
251+
// Build a projector for the expressions.
252+
std::shared_ptr<Projector> projector;
253+
Status status = Projector::Make(schema, {expr}, pool_, &projector);
254+
EXPECT_TRUE(status.IsExpressionValidationError());
255+
std::string expected_error =
256+
"Field c has unsupported data type list";
257+
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
258+
}
259+
260+
} // namespace gandiva

src/gandiva/src/cpp/integ/projector_test.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,5 +433,4 @@ TEST_F(TestProjector, TestZeroCopyNegative) {
433433
status = projector->Evaluate(*in_batch, {bad_array_data3});
434434
EXPECT_EQ(status.code(), StatusCode::Invalid);
435435
}
436-
437436
} // namespace gandiva

src/gandiva/src/cpp/src/codegen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ add_library(gandiva SHARED
3232
projector.cc
3333
status.cc
3434
tree_expr_builder.cc
35+
expr_validator.cc
3536
${BC_FILE_PATH_CC})
3637

3738
# For users of gandiva library (including integ tests), include-dir is :

0 commit comments

Comments
 (0)