|
| 1 | +/* |
| 2 | +Copyright 2025 The Kubernetes Authors. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package sort |
| 18 | + |
| 19 | +import ( |
| 20 | + "container/heap" |
| 21 | + "fmt" |
| 22 | + "sort" |
| 23 | + |
| 24 | + "k8s.io/apimachinery/pkg/util/sets" |
| 25 | +) |
| 26 | + |
| 27 | +// MergePreservingRelativeOrder performs a topological consensus sort of items from multiple sources. |
| 28 | +// It merges multiple lists of strings into a single list, preserving the relative order of |
| 29 | +// elements within each source list. |
| 30 | +// |
| 31 | +// For any two items, if one appears before the other in any of the input lists, |
| 32 | +// that relative order will be preserved in the output. If no relative ordering is |
| 33 | +// defined between two items, they are sorted lexicographically. |
| 34 | +// |
| 35 | +// The function uses Kahn's algorithm for topological sorting with a min-heap to ensure |
| 36 | +// deterministic output. Items with no dependencies are processed in lexicographic order, |
| 37 | +// guaranteeing consistent results across multiple invocations with the same input. |
| 38 | +// |
| 39 | +// This function contains a shortcut optimization that returns an input list directly |
| 40 | +// if it already contains all unique items. This provides O(n) performance in the best case. |
| 41 | +// |
| 42 | +// Example: |
| 43 | +// - Input: {{"a", "b", "c"}, {"b", "c"}} returns {"a", "b", "c"} |
| 44 | +// - Input: {{"a", "c"}, {"b", "c"}} returns {"a", "b", "c"} (lexicographic tie-breaking) |
| 45 | +// - Input: {{"a", "b"}, {"b", "a"}} returns error (cycle detected) |
| 46 | +// |
| 47 | +// Complexity: O(L*n + V*log(V) + E) where L is the number of lists, n is the average |
| 48 | +// list size, V is the number of unique items, and E is the number of precedence edges. |
| 49 | +// |
| 50 | +// This is useful for creating a stable, consistent ordering when merging data from |
| 51 | +// multiple sources that may have partial but not conflicting orderings. |
| 52 | +func MergePreservingRelativeOrder(inputLists [][]string) []string { |
| 53 | + if len(inputLists) == 0 { |
| 54 | + return nil |
| 55 | + } |
| 56 | + |
| 57 | + // Build a directed graph of precedence relationships |
| 58 | + graph := make(map[string]*graphNode) |
| 59 | + for _, list := range inputLists { |
| 60 | + for i, item := range list { |
| 61 | + node := getOrCreateNode(graph, item) |
| 62 | + |
| 63 | + // Add edge from current item to next item in list |
| 64 | + if i < len(list)-1 { |
| 65 | + nextItem := list[i+1] |
| 66 | + nextNode := getOrCreateNode(graph, nextItem) |
| 67 | + |
| 68 | + // Only add edge if not already present (avoid incrementing in-degree multiple times) |
| 69 | + if !node.outEdges.Has(nextItem) { |
| 70 | + node.outEdges.Insert(nextItem) |
| 71 | + nextNode.inDegree++ |
| 72 | + } |
| 73 | + } |
| 74 | + } |
| 75 | + } |
| 76 | + |
| 77 | + // Shortcut: if any input list contains all items (no duplicates), use it |
| 78 | + allItems := sets.New[string]() |
| 79 | + for name := range graph { |
| 80 | + allItems.Insert(name) |
| 81 | + } |
| 82 | + for _, list := range inputLists { |
| 83 | + if len(list) == allItems.Len() && isUnique(list) { |
| 84 | + return list |
| 85 | + } |
| 86 | + } |
| 87 | + |
| 88 | + // Perform topological sort using Kahn's algorithm with min-heap for determinism |
| 89 | + result, err := topologicalSort(graph) |
| 90 | + if err != nil { |
| 91 | + // This should not happen with valid input, but if it does, |
| 92 | + // fall back to lexicographic sort to provide some result |
| 93 | + items := make([]string, 0, len(graph)) |
| 94 | + for name := range graph { |
| 95 | + items = append(items, name) |
| 96 | + } |
| 97 | + sort.Strings(items) |
| 98 | + return items |
| 99 | + } |
| 100 | + |
| 101 | + return result |
| 102 | +} |
| 103 | + |
| 104 | +// getOrCreateNode retrieves or creates a graph node for the given name |
| 105 | +func getOrCreateNode(graph map[string]*graphNode, name string) *graphNode { |
| 106 | + if graph[name] == nil { |
| 107 | + graph[name] = &graphNode{ |
| 108 | + outEdges: sets.New[string](), |
| 109 | + inDegree: 0, |
| 110 | + } |
| 111 | + } |
| 112 | + return graph[name] |
| 113 | +} |
| 114 | + |
| 115 | +// isUnique checks if a list contains no duplicate items |
| 116 | +func isUnique(list []string) bool { |
| 117 | + seen := make(map[string]bool, len(list)) |
| 118 | + for _, item := range list { |
| 119 | + if seen[item] { |
| 120 | + return false |
| 121 | + } |
| 122 | + seen[item] = true |
| 123 | + } |
| 124 | + return true |
| 125 | +} |
| 126 | + |
| 127 | +// topologicalSort performs Kahn's algorithm with a min-heap for deterministic ordering |
| 128 | +func topologicalSort(graph map[string]*graphNode) ([]string, error) { |
| 129 | + // Initialize min-heap with all nodes that have no incoming edges |
| 130 | + pq := &stringMinHeap{} |
| 131 | + heap.Init(pq) |
| 132 | + |
| 133 | + for name, node := range graph { |
| 134 | + if node.inDegree == 0 { |
| 135 | + heap.Push(pq, name) |
| 136 | + } |
| 137 | + } |
| 138 | + |
| 139 | + result := make([]string, 0, len(graph)) |
| 140 | + |
| 141 | + for pq.Len() > 0 { |
| 142 | + // Pop item with lowest lexicographic value |
| 143 | + current := heap.Pop(pq).(string) |
| 144 | + result = append(result, current) |
| 145 | + |
| 146 | + currentNode := graph[current] |
| 147 | + |
| 148 | + // Reduce in-degree for all neighbors |
| 149 | + for neighbor := range currentNode.outEdges { |
| 150 | + neighborNode := graph[neighbor] |
| 151 | + neighborNode.inDegree-- |
| 152 | + |
| 153 | + // If in-degree becomes 0, add to heap |
| 154 | + if neighborNode.inDegree == 0 { |
| 155 | + heap.Push(pq, neighbor) |
| 156 | + } |
| 157 | + } |
| 158 | + } |
| 159 | + |
| 160 | + // Check for cycles |
| 161 | + if len(result) != len(graph) { |
| 162 | + return nil, fmt.Errorf("cycle detected in precedence graph: sorted %d items but graph has %d items", len(result), len(graph)) |
| 163 | + } |
| 164 | + |
| 165 | + return result, nil |
| 166 | +} |
| 167 | + |
| 168 | +// graphNode represents a node in the precedence graph |
| 169 | +type graphNode struct { |
| 170 | + // Items that should come after this item |
| 171 | + outEdges sets.Set[string] |
| 172 | + // Number of items that should come before this item |
| 173 | + inDegree int |
| 174 | +} |
| 175 | + |
| 176 | +// stringMinHeap implements heap.Interface for strings (min-heap with lexicographic ordering) |
| 177 | +type stringMinHeap []string |
| 178 | + |
| 179 | +func (h stringMinHeap) Len() int { return len(h) } |
| 180 | +func (h stringMinHeap) Less(i, j int) bool { return h[i] < h[j] } |
| 181 | +func (h stringMinHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } |
| 182 | +func (h *stringMinHeap) Push(x interface{}) { |
| 183 | + *h = append(*h, x.(string)) |
| 184 | +} |
| 185 | +func (h *stringMinHeap) Pop() interface{} { |
| 186 | + old := *h |
| 187 | + n := len(old) |
| 188 | + x := old[n-1] |
| 189 | + *h = old[0 : n-1] |
| 190 | + return x |
| 191 | +} |
0 commit comments