diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 61c6422de5aa..9e8340ef43da 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -14,6 +14,7 @@ #include #include #include +#include using namespace DB; @@ -78,7 +79,25 @@ std::pair, std::vector> split(RangesInDat RangeEnd, }; - [[ maybe_unused ]] bool operator<(const PartsRangesIterator & other) const { return std::tie(value, event) > std::tie(other.value, other.event); } + [[maybe_unused]] bool operator<(const PartsRangesIterator & other) const + { + // Accurate comparison of `value > other.value` + for (size_t i = 0; i < value.size(); ++i) + { + if (applyVisitor(FieldVisitorAccurateLess(), value[i], other.value[i])) + return false; + + if (!applyVisitor(FieldVisitorAccurateEquals(), value[i], other.value[i])) + return true; + } + + /// Within the same part we should process events in order of mark numbers, + /// because they already ordered by value and range ends have greater mark numbers than the beginnings. + /// Otherwise we could get invalid ranges with the right bound that is less than the left bound. + const auto ev_mark = event == EventType::RangeStart ? range.begin : range.end; + const auto other_ev_mark = other.event == EventType::RangeStart ? other.range.begin : other.range.end; + return ev_mark > other_ev_mark; + } Values value; MarkRangeWithPartIdx range; diff --git a/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.reference b/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.reference new file mode 100644 index 000000000000..573541ac9702 --- /dev/null +++ b/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.sql b/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.sql new file mode 100644 index 000000000000..4e91c2e31676 --- /dev/null +++ b/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.sql @@ -0,0 +1,20 @@ +CREATE TABLE t +( + tid UInt64, + processed_at DateTime, + created_at DateTime, + amount Int64 +) +ENGINE = ReplacingMergeTree +PARTITION BY toStartOfQuarter(created_at) +PRIMARY KEY (toStartOfDay(created_at), toStartOfDay(processed_at)) +ORDER BY (toStartOfDay(created_at), toStartOfDay(processed_at), tid) +SETTINGS index_granularity = 1; + +INSERT INTO t VALUES (5879429,'2023-07-01 03:50:35','2023-07-01 03:50:35',-278) (5881397,'2023-07-01 06:22:26','2023-07-01 06:22:27',2807) (5925060,'2023-07-04 00:24:03','2023-07-04 00:24:02',-12) (5936591,'2023-07-04 07:37:19','2023-07-04 07:37:18',-12) (5940709,'2023-07-04 09:13:35','2023-07-04 09:13:35',2820) (5942342,'2023-07-04 09:58:00','2023-07-04 09:57:59',-12) (5952231,'2023-07-04 22:33:24','2023-07-04 22:33:24',1692) (5959449,'2023-07-05 04:32:55','2023-07-05 04:32:54',-12) (5963240,'2023-07-05 06:37:08','2023-07-05 06:37:09',1709) (5965742,'2023-07-05 07:27:01','2023-07-05 07:27:02',1709) (5969948,'2023-07-05 08:44:36','2023-07-05 08:44:37',2278) (5971673,'2023-07-05 09:14:09','2023-07-05 09:14:09',5695) (6012987,'2023-07-06 20:52:28','2023-07-06 20:52:27',-536); + +SELECT sum(amount) +FROM t FINAL +WHERE (processed_at >= '2023-09-19 00:00:00') AND (processed_at <= '2023-09-20 01:00:00'); + +DROP TABLE t;