Skip to content

Commit c77dde2

Browse files
richabankerk8s-publishing-bot
authored andcommitted
util/sort: Add MergePreservingRelativeOrder for topological sorting
Add a topological sort implementation that merges multiple ordered lists while preserving their relative ordering. This is used by peer-aggregated discovery to deterministically merge discovery documents from multiple API servers. Part of KEP-4020: Unknown Version Interoperability Proxy Kubernetes-commit: 3b89deb6b3734c7ce1ea14c41c6e8bd568fbbcb4
1 parent 729c13d commit c77dde2

File tree

2 files changed

+339
-0
lines changed

2 files changed

+339
-0
lines changed

pkg/util/sort/sort.go

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package sort
18+
19+
import (
20+
"container/heap"
21+
"fmt"
22+
"sort"
23+
24+
"k8s.io/apimachinery/pkg/util/sets"
25+
)
26+
27+
// MergePreservingRelativeOrder performs a topological consensus sort of items from multiple sources.
28+
// It merges multiple lists of strings into a single list, preserving the relative order of
29+
// elements within each source list.
30+
//
31+
// For any two items, if one appears before the other in any of the input lists,
32+
// that relative order will be preserved in the output. If no relative ordering is
33+
// defined between two items, they are sorted lexicographically.
34+
//
35+
// The function uses Kahn's algorithm for topological sorting with a min-heap to ensure
36+
// deterministic output. Items with no dependencies are processed in lexicographic order,
37+
// guaranteeing consistent results across multiple invocations with the same input.
38+
//
39+
// This function contains a shortcut optimization that returns an input list directly
40+
// if it already contains all unique items. This provides O(n) performance in the best case.
41+
//
42+
// Example:
43+
// - Input: {{"a", "b", "c"}, {"b", "c"}} returns {"a", "b", "c"}
44+
// - Input: {{"a", "c"}, {"b", "c"}} returns {"a", "b", "c"} (lexicographic tie-breaking)
45+
// - Input: {{"a", "b"}, {"b", "a"}} returns error (cycle detected)
46+
//
47+
// Complexity: O(L*n + V*log(V) + E) where L is the number of lists, n is the average
48+
// list size, V is the number of unique items, and E is the number of precedence edges.
49+
//
50+
// This is useful for creating a stable, consistent ordering when merging data from
51+
// multiple sources that may have partial but not conflicting orderings.
52+
func MergePreservingRelativeOrder(inputLists [][]string) []string {
53+
if len(inputLists) == 0 {
54+
return nil
55+
}
56+
57+
// Build a directed graph of precedence relationships
58+
graph := make(map[string]*graphNode)
59+
for _, list := range inputLists {
60+
for i, item := range list {
61+
node := getOrCreateNode(graph, item)
62+
63+
// Add edge from current item to next item in list
64+
if i < len(list)-1 {
65+
nextItem := list[i+1]
66+
nextNode := getOrCreateNode(graph, nextItem)
67+
68+
// Only add edge if not already present (avoid incrementing in-degree multiple times)
69+
if !node.outEdges.Has(nextItem) {
70+
node.outEdges.Insert(nextItem)
71+
nextNode.inDegree++
72+
}
73+
}
74+
}
75+
}
76+
77+
// Shortcut: if any input list contains all items (no duplicates), use it
78+
allItems := sets.New[string]()
79+
for name := range graph {
80+
allItems.Insert(name)
81+
}
82+
for _, list := range inputLists {
83+
if len(list) == allItems.Len() && isUnique(list) {
84+
return list
85+
}
86+
}
87+
88+
// Perform topological sort using Kahn's algorithm with min-heap for determinism
89+
result, err := topologicalSort(graph)
90+
if err != nil {
91+
// This should not happen with valid input, but if it does,
92+
// fall back to lexicographic sort to provide some result
93+
items := make([]string, 0, len(graph))
94+
for name := range graph {
95+
items = append(items, name)
96+
}
97+
sort.Strings(items)
98+
return items
99+
}
100+
101+
return result
102+
}
103+
104+
// getOrCreateNode retrieves or creates a graph node for the given name
105+
func getOrCreateNode(graph map[string]*graphNode, name string) *graphNode {
106+
if graph[name] == nil {
107+
graph[name] = &graphNode{
108+
outEdges: sets.New[string](),
109+
inDegree: 0,
110+
}
111+
}
112+
return graph[name]
113+
}
114+
115+
// isUnique checks if a list contains no duplicate items
116+
func isUnique(list []string) bool {
117+
seen := make(map[string]bool, len(list))
118+
for _, item := range list {
119+
if seen[item] {
120+
return false
121+
}
122+
seen[item] = true
123+
}
124+
return true
125+
}
126+
127+
// topologicalSort performs Kahn's algorithm with a min-heap for deterministic ordering
128+
func topologicalSort(graph map[string]*graphNode) ([]string, error) {
129+
// Initialize min-heap with all nodes that have no incoming edges
130+
pq := &stringMinHeap{}
131+
heap.Init(pq)
132+
133+
for name, node := range graph {
134+
if node.inDegree == 0 {
135+
heap.Push(pq, name)
136+
}
137+
}
138+
139+
result := make([]string, 0, len(graph))
140+
141+
for pq.Len() > 0 {
142+
// Pop item with lowest lexicographic value
143+
current := heap.Pop(pq).(string)
144+
result = append(result, current)
145+
146+
currentNode := graph[current]
147+
148+
// Reduce in-degree for all neighbors
149+
for neighbor := range currentNode.outEdges {
150+
neighborNode := graph[neighbor]
151+
neighborNode.inDegree--
152+
153+
// If in-degree becomes 0, add to heap
154+
if neighborNode.inDegree == 0 {
155+
heap.Push(pq, neighbor)
156+
}
157+
}
158+
}
159+
160+
// Check for cycles
161+
if len(result) != len(graph) {
162+
return nil, fmt.Errorf("cycle detected in precedence graph: sorted %d items but graph has %d items", len(result), len(graph))
163+
}
164+
165+
return result, nil
166+
}
167+
168+
// graphNode represents a node in the precedence graph
169+
type graphNode struct {
170+
// Items that should come after this item
171+
outEdges sets.Set[string]
172+
// Number of items that should come before this item
173+
inDegree int
174+
}
175+
176+
// stringMinHeap implements heap.Interface for strings (min-heap with lexicographic ordering)
177+
type stringMinHeap []string
178+
179+
func (h stringMinHeap) Len() int { return len(h) }
180+
func (h stringMinHeap) Less(i, j int) bool { return h[i] < h[j] }
181+
func (h stringMinHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
182+
func (h *stringMinHeap) Push(x interface{}) {
183+
*h = append(*h, x.(string))
184+
}
185+
func (h *stringMinHeap) Pop() interface{} {
186+
old := *h
187+
n := len(old)
188+
x := old[n-1]
189+
*h = old[0 : n-1]
190+
return x
191+
}

pkg/util/sort/sort_test.go

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package sort
18+
19+
import (
20+
"testing"
21+
)
22+
23+
func TestSortDiscoveryGroupsTopo(t *testing.T) {
24+
cases := []struct {
25+
name string
26+
input [][]string
27+
want []string
28+
}{
29+
{
30+
name: "consensus ordering",
31+
input: [][]string{
32+
{"A", "B", "C", "D"},
33+
{"A", "B", "C", "D"},
34+
{"A", "X", "Z", "D"},
35+
{"Z", "Y"},
36+
{"Q"},
37+
},
38+
want: []string{"A", "B", "C", "Q", "X", "Z", "D", "Y"},
39+
},
40+
{
41+
name: "empty input",
42+
input: [][]string{},
43+
want: []string{},
44+
},
45+
{
46+
name: "single peer",
47+
input: [][]string{{"foo", "bar", "baz"}},
48+
want: []string{"foo", "bar", "baz"},
49+
},
50+
{
51+
name: "conflicting orderings",
52+
input: [][]string{{"A", "B"}, {"B", "A"}},
53+
want: []string{"A", "B"},
54+
},
55+
{
56+
name: "empty list merged with non-empty list",
57+
input: [][]string{{}, {"A", "B", "C"}},
58+
want: []string{"A", "B", "C"},
59+
},
60+
{
61+
name: "multiple empty lists merged",
62+
input: [][]string{{}, {}, {}},
63+
want: []string{},
64+
},
65+
{
66+
name: "lexical tiebreak at beginning",
67+
input: [][]string{
68+
{"C", "D", "E"},
69+
{"B", "D", "E"},
70+
{"A", "D", "E"},
71+
},
72+
// A, B, C have no precedence constraints, so lexical order
73+
want: []string{"A", "B", "C", "D", "E"},
74+
},
75+
{
76+
name: "lexical tiebreak in middle",
77+
input: [][]string{
78+
{"A", "D", "E"},
79+
{"A", "C", "E"},
80+
{"A", "B", "E"},
81+
},
82+
// A comes first (consensus), then B, C, D (lexical), then E (consensus)
83+
want: []string{"A", "B", "C", "D", "E"},
84+
},
85+
{
86+
name: "conflicting orderings of 3 lists",
87+
input: [][]string{
88+
{"A", "B", "C"},
89+
{"B", "C", "A"},
90+
{"C", "A", "B"},
91+
},
92+
// Creates cycle: A->B, B->C, C->A
93+
// Fallback to lexicographic sort
94+
want: []string{"A", "B", "C"},
95+
},
96+
{
97+
name: "conflicting ordering with different list lengths",
98+
input: [][]string{
99+
{"A", "B", "C", "D"},
100+
{"B", "A"},
101+
{"C", "D"},
102+
},
103+
// A->B->C->D from first list, but B->A from second
104+
// Creates cycle between A and B
105+
// Fallback to lexicographic sort
106+
want: []string{"A", "B", "C", "D"},
107+
},
108+
{
109+
name: "conflicting partial lists",
110+
input: [][]string{
111+
{"A", "B"},
112+
{"C", "D"},
113+
{"B", "A"},
114+
},
115+
// A->B from first, B->A from third (cycle)
116+
// C->D is independent
117+
// Fallback to lexicographic sort
118+
want: []string{"A", "B", "C", "D"},
119+
},
120+
{
121+
name: "cycle",
122+
input: [][]string{
123+
{"A", "B"},
124+
{"B", "C"},
125+
{"C", "A"},
126+
},
127+
// Creates cycle: A->B->C->A
128+
// Fallback to lexicographic sort
129+
want: []string{"A", "B", "C"},
130+
},
131+
}
132+
133+
for _, tc := range cases {
134+
t.Run(tc.name, func(t *testing.T) {
135+
got := MergePreservingRelativeOrder(tc.input)
136+
if len(got) != len(tc.want) {
137+
t.Errorf("length mismatch:\n got: %d\n want: %d", len(got), len(tc.want))
138+
return
139+
}
140+
for i := range got {
141+
if got[i] != tc.want[i] {
142+
t.Errorf("mismatch got: %v\n want: %v", got, tc.want)
143+
return
144+
}
145+
}
146+
})
147+
}
148+
}

0 commit comments

Comments
 (0)