1515// specific language governing permissions and limitations
1616// under the License.
1717
18- //! Defines the sort preserving merge plan
18+ //! [`SortPreservingMergeExec`] merges multiple sorted streams into one sorted stream.
1919
2020use std:: any:: Any ;
2121use std:: sync:: Arc ;
@@ -38,10 +38,22 @@ use log::{debug, trace};
3838
3939/// Sort preserving merge execution plan
4040///
41- /// This takes an input execution plan and a list of sort expressions, and
42- /// provided each partition of the input plan is sorted with respect to
43- /// these sort expressions, this operator will yield a single partition
44- /// that is also sorted with respect to them
41+ /// # Overview
42+ ///
43+ /// This operator implements a K-way merge. It is used to merge multiple sorted
44+ /// streams into a single sorted stream and is highly optimized.
45+ ///
46+ /// ## Inputs:
47+ ///
48+ /// 1. A list of sort expressions
49+ /// 2. An input plan, where each partition is sorted with respect to
50+ /// these sort expressions.
51+ ///
52+ /// ## Output:
53+ ///
54+ /// 1. A single partition that is also sorted with respect to the expressions
55+ ///
56+ /// ## Diagram
4557///
4658/// ```text
4759/// ┌─────────────────────────┐
@@ -55,12 +67,12 @@ use log::{debug, trace};
5567/// ┌─────────────────────────┐ │ └───────────────────┘ └─┬─────┴───────────────────────┘
5668/// │ ╔═══╦═══╗ │ │
5769/// │ ║ B ║ E ║ ... │──┘ │
58- /// │ ╚═══╩═══╝ │ Note Stable Sort: the merged stream
59- /// └─────────────────────────┘ places equal rows from stream 1
70+ /// │ ╚═══╩═══╝ │ Stable sort if `enable_round_robin_repartition=false`:
71+ /// └─────────────────────────┘ the merged stream places equal rows from stream 1
6072/// Stream 2
6173///
6274///
63- /// Input Streams Output stream
75+ /// Input Partitions Output Partition
6476/// (sorted) (sorted)
6577/// ```
6678///
@@ -70,7 +82,7 @@ use log::{debug, trace};
7082/// the output and inputs are not polled again.
7183#[ derive( Debug , Clone ) ]
7284pub struct SortPreservingMergeExec {
73- /// Input plan
85+ /// Input plan with sorted partitions
7486 input : Arc < dyn ExecutionPlan > ,
7587 /// Sort expressions
7688 expr : LexOrdering ,
@@ -80,7 +92,9 @@ pub struct SortPreservingMergeExec {
8092 fetch : Option < usize > ,
8193 /// Cache holding plan properties like equivalences, output partitioning etc.
8294 cache : PlanProperties ,
83- /// Configuration parameter to enable round-robin selection of tied winners of loser tree.
95+ /// Use round-robin selection of tied winners of loser tree
96+ ///
97+ /// See [`Self::with_round_robin_repartition`] for more information.
8498 enable_round_robin_repartition : bool ,
8599}
86100
@@ -105,6 +119,14 @@ impl SortPreservingMergeExec {
105119 }
106120
107121 /// Sets the selection strategy of tied winners of the loser tree algorithm
122+ ///
123+ /// If true (the default) equal output rows are placed in the merged stream
124+ /// in round robin fashion. This approach consumes input streams at more
125+ /// even rates when there are many rows with the same sort key.
126+ ///
127+ /// If false, equal output rows are always placed in the merged stream in
128+ /// the order of the inputs, resulting in potentially slower execution but a
129+ /// stable output order.
108130 pub fn with_round_robin_repartition (
109131 mut self ,
110132 enable_round_robin_repartition : bool ,
@@ -128,7 +150,8 @@ impl SortPreservingMergeExec {
128150 self . fetch
129151 }
130152
131- /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
153+ /// Creates the cache object that stores the plan properties
154+ /// such as schema, equivalence properties, ordering, partitioning, etc.
132155 fn compute_properties (
133156 input : & Arc < dyn ExecutionPlan > ,
134157 ordering : LexOrdering ,
0 commit comments