-
Notifications
You must be signed in to change notification settings - Fork 1.4k
fix: Fix partition filters with timestamp value #12368
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bc9e9b3
bd13a09
e06a639
08fcbfb
cf9eb8e
d4fa061
3015b9d
82f5bef
b2e1dd3
239854f
e45c7ac
acbf15e
0f72f61
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -202,19 +202,8 @@ class TableScanTest : public virtual HiveConnectorTestBase { | |
| .planNode(); | ||
|
|
||
| std::string partitionValueStr; | ||
| if (partitionType->isTimestamp() && partitionValue.has_value()) { | ||
| auto t = util::fromTimestampString( | ||
| StringView(*partitionValue), | ||
| util::TimestampParseMode::kPrestoCast) | ||
| .thenOrThrow(folly::identity, [&](const Status& status) { | ||
| VELOX_USER_FAIL("{}", status.message()); | ||
| }); | ||
| t.toGMT(Timestamp::defaultTimezone()); | ||
| partitionValueStr = "'" + t.toString() + "'"; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not entirely sure why this piece needs to be removed.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This code was introduced by https://github.com/facebookincubator/velox/pull/11754/files#diff-c62590f29a2bf22e50e276f2665e0a7666968880de73a6b4db745cbc0cfde6feL202 and is only called by |
||
| } else { | ||
| partitionValueStr = | ||
| partitionValue.has_value() ? "'" + *partitionValue + "'" : "null"; | ||
| } | ||
| partitionValueStr = | ||
| partitionValue.has_value() ? "'" + *partitionValue + "'" : "null"; | ||
| assertQuery( | ||
| op, split, fmt::format("SELECT {}, * FROM tmp", partitionValueStr)); | ||
|
|
||
|
|
@@ -2001,48 +1990,187 @@ TEST_F(TableScanTest, partitionedTableTimestampKey) { | |
| writeToFile(filePath->getPath(), vectors); | ||
| createDuckDbTable(vectors); | ||
| const std::string partitionValue = "2023-10-27 00:12:35"; | ||
| testPartitionedTable(filePath->getPath(), TIMESTAMP(), partitionValue); | ||
|
|
||
| // Test partition filter on TIMESTAMP column. | ||
| auto partitionType = TIMESTAMP(); | ||
| // Test partition value is null. | ||
| testPartitionedTable(filePath->getPath(), partitionType, std::nullopt); | ||
|
|
||
| auto split = exec::test::HiveConnectorSplitBuilder(filePath->getPath()) | ||
| .partitionKey("pkey", partitionValue) | ||
| .build(); | ||
|
|
||
| ColumnHandleMap assignments = { | ||
| {"pkey", partitionKey("pkey", TIMESTAMP())}, | ||
| {"c0", regularColumn("c0", BIGINT())}, | ||
| {"c1", regularColumn("c1", DOUBLE())}}; | ||
|
|
||
| Timestamp ts = | ||
| util::fromTimestampString( | ||
| StringView(partitionValue), util::TimestampParseMode::kPrestoCast) | ||
| .thenOrThrow(folly::identity, [&](const Status& status) { | ||
| VELOX_USER_FAIL("{}", status.message()); | ||
| }); | ||
| // Read timestamp partition value as UTC. | ||
| std::string tsValue = "'" + ts.toString() + "'"; | ||
|
|
||
| Timestamp tsAsLocalTime = ts; | ||
| tsAsLocalTime.toGMT(Timestamp::defaultTimezone()); | ||
| // Read timestamp partition value as local time. | ||
| std::string tsValueAsLocal = "'" + tsAsLocalTime.toString() + "'"; | ||
|
|
||
| { | ||
| auto split = exec::test::HiveConnectorSplitBuilder(filePath->getPath()) | ||
| .partitionKey("pkey", partitionValue) | ||
| .build(); | ||
| auto outputType = | ||
| ROW({"pkey", "c0", "c1"}, {TIMESTAMP(), BIGINT(), DOUBLE()}); | ||
| ColumnHandleMap assignments = { | ||
| {"pkey", partitionKey("pkey", TIMESTAMP())}, | ||
| {"c0", regularColumn("c0", BIGINT())}, | ||
| {"c1", regularColumn("c1", DOUBLE())}}; | ||
| auto plan = | ||
| PlanBuilder() | ||
| .startTableScan() | ||
| .tableName("hive_table") | ||
| .outputType( | ||
| ROW({"pkey", "c0", "c1"}, {partitionType, BIGINT(), DOUBLE()})) | ||
| .assignments(assignments) | ||
| .endTableScan() | ||
| .planNode(); | ||
|
|
||
| auto expect = [&](bool asLocalTime) { | ||
| AssertQueryBuilder(plan, duckDbQueryRunner_) | ||
| .connectorSessionProperty( | ||
| kHiveConnectorId, | ||
| connector::hive::HiveConfig:: | ||
| kReadTimestampPartitionValueAsLocalTimeSession, | ||
| asLocalTime ? "true" : "false") | ||
| .splits({split}) | ||
| .assertResults(fmt::format( | ||
| "SELECT {}, * FROM tmp", asLocalTime ? tsValueAsLocal : tsValue)); | ||
| }; | ||
|
|
||
| common::SubfieldFilters filters; | ||
| // pkey = 2023-10-27 00:12:35. | ||
| auto lower = util::fromTimestampString( | ||
| StringView("2023-10-27 00:12:35"), | ||
| util::TimestampParseMode::kPrestoCast) | ||
| .value(); | ||
| lower.toGMT(Timestamp::defaultTimezone()); | ||
| filters[common::Subfield("pkey")] = | ||
| std::make_unique<common::TimestampRange>(lower, lower, false); | ||
| expect(true); | ||
| expect(false); | ||
| } | ||
|
|
||
| auto tableHandle = std::make_shared<HiveTableHandle>( | ||
| "test-hive", "hive_table", true, std::move(filters), nullptr, nullptr); | ||
| auto op = std::make_shared<TableScanNode>( | ||
| "0", | ||
| std::move(outputType), | ||
| std::move(tableHandle), | ||
| std::move(assignments)); | ||
| { | ||
| auto plan = | ||
| PlanBuilder() | ||
| .startTableScan() | ||
| .tableName("hive_table") | ||
| .outputType( | ||
| ROW({"c0", "pkey", "c1"}, {BIGINT(), partitionType, DOUBLE()})) | ||
| .assignments(assignments) | ||
| .endTableScan() | ||
| .planNode(); | ||
|
|
||
| auto expect = [&](bool asLocalTime) { | ||
| AssertQueryBuilder(plan, duckDbQueryRunner_) | ||
| .connectorSessionProperty( | ||
| kHiveConnectorId, | ||
| connector::hive::HiveConfig:: | ||
| kReadTimestampPartitionValueAsLocalTimeSession, | ||
| asLocalTime ? "true" : "false") | ||
| .splits({split}) | ||
| .assertResults(fmt::format( | ||
| "SELECT c0, {}, c1 FROM tmp", | ||
| asLocalTime ? tsValueAsLocal : tsValue)); | ||
| }; | ||
| expect(true); | ||
| expect(false); | ||
| } | ||
|
|
||
| auto t = | ||
| util::fromTimestampString( | ||
| StringView(partitionValue), util::TimestampParseMode::kPrestoCast) | ||
| .thenOrThrow(folly::identity, [&](const Status& status) { | ||
| VELOX_USER_FAIL("{}", status.message()); | ||
| }); | ||
| t.toGMT(Timestamp::defaultTimezone()); | ||
| std::string partitionValueStr = "'" + t.toString() + "'"; | ||
| assertQuery( | ||
| op, split, fmt::format("SELECT {}, * FROM tmp", partitionValueStr)); | ||
| { | ||
| auto plan = | ||
| PlanBuilder() | ||
| .startTableScan() | ||
| .tableName("hive_table") | ||
| .outputType( | ||
| ROW({"c0", "c1", "pkey"}, {BIGINT(), DOUBLE(), partitionType})) | ||
| .assignments(assignments) | ||
| .endTableScan() | ||
| .planNode(); | ||
|
|
||
| auto expect = [&](bool asLocalTime) { | ||
| AssertQueryBuilder(plan, duckDbQueryRunner_) | ||
| .connectorSessionProperty( | ||
| kHiveConnectorId, | ||
| connector::hive::HiveConfig:: | ||
| kReadTimestampPartitionValueAsLocalTimeSession, | ||
| asLocalTime ? "true" : "false") | ||
| .splits({split}) | ||
| .assertResults(fmt::format( | ||
| "SELECT c0, c1, {} FROM tmp", | ||
| asLocalTime ? tsValueAsLocal : tsValue)); | ||
| }; | ||
| expect(true); | ||
| expect(false); | ||
| } | ||
|
|
||
| { | ||
| // Select only partition key. | ||
| auto plan = | ||
| PlanBuilder() | ||
| .startTableScan() | ||
| .tableName("hive_table") | ||
| .outputType(ROW({"pkey"}, {partitionType})) | ||
| .assignments({{"pkey", partitionKey("pkey", partitionType)}}) | ||
| .endTableScan() | ||
| .planNode(); | ||
|
|
||
| auto expect = [&](bool asLocalTime) { | ||
| AssertQueryBuilder(plan, duckDbQueryRunner_) | ||
| .connectorSessionProperty( | ||
| kHiveConnectorId, | ||
| connector::hive::HiveConfig:: | ||
| kReadTimestampPartitionValueAsLocalTimeSession, | ||
| asLocalTime ? "true" : "false") | ||
| .splits({split}) | ||
| .assertResults(fmt::format( | ||
| "SELECT {} FROM tmp", asLocalTime ? tsValueAsLocal : tsValue)); | ||
| }; | ||
| expect(true); | ||
| expect(false); | ||
| } | ||
|
|
||
| // Test partition filter on TIMESTAMP column. | ||
| { | ||
| auto planWithSubfilter = [&](bool asLocalTime) { | ||
| auto outputType = | ||
| ROW({"pkey", "c0", "c1"}, {TIMESTAMP(), BIGINT(), DOUBLE()}); | ||
| common::SubfieldFilters filters; | ||
| // pkey = 2023-10-27 00:12:35. | ||
| auto lower = | ||
| util::fromTimestampString( | ||
| StringView(partitionValue), util::TimestampParseMode::kPrestoCast) | ||
| .value(); | ||
| if (asLocalTime) { | ||
| lower.toGMT(Timestamp::defaultTimezone()); | ||
| } | ||
| filters[common::Subfield("pkey")] = | ||
| std::make_unique<common::TimestampRange>(lower, lower, false); | ||
| auto tableHandle = std::make_shared<HiveTableHandle>( | ||
| "test-hive", | ||
| "hive_table", | ||
| true, | ||
| std::move(filters), | ||
| nullptr, | ||
| nullptr); | ||
|
|
||
| return PlanBuilder() | ||
| .startTableScan() | ||
| .tableHandle(tableHandle) | ||
| .outputType(outputType) | ||
| .assignments(assignments) | ||
| .endTableScan() | ||
| .planNode(); | ||
| }; | ||
|
|
||
| auto expect = [&](bool asLocalTime) { | ||
| AssertQueryBuilder(planWithSubfilter(asLocalTime), duckDbQueryRunner_) | ||
| .connectorSessionProperty( | ||
| kHiveConnectorId, | ||
| connector::hive::HiveConfig:: | ||
| kReadTimestampPartitionValueAsLocalTimeSession, | ||
| asLocalTime ? "true" : "false") | ||
| .splits({split}) | ||
| .assertResults(fmt::format( | ||
| "SELECT {}, * FROM tmp", asLocalTime ? tsValueAsLocal : tsValue)); | ||
| }; | ||
| expect(true); | ||
| expect(false); | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be nice if we could add test for this change on the iceberg read.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kecookier are you planning to address this comment?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@rui-mo @majetideepak I am not familiar with Iceberg now, and I need some time to add this test case. This may be done in another PR. Perhaps this PR can be merged first?