Skip to content

Commit 8210db3

Browse files
pquernaggreer
andauthored
add benchmark for large scale cycles (#457)
* add benchmark for large scale cycles * hack on scc * Fix some linter errors. * Use range in some places. * Fix lint error. * add actual benchmark * Fix lint errors. --------- Co-authored-by: Geoff Greer <[email protected]>
1 parent f16cc1c commit 8210db3

File tree

8 files changed

+1300
-118
lines changed

8 files changed

+1300
-118
lines changed

pkg/sync/expand/cycle.go

Lines changed: 62 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1,114 +1,70 @@
11
package expand
22

33
import (
4-
mapset "github.com/deckarep/golang-set/v2"
5-
)
4+
"context"
65

7-
const (
8-
colorWhite uint8 = iota
9-
colorGray
10-
colorBlack
6+
"github.com/conductorone/baton-sdk/pkg/sync/expand/scc"
7+
mapset "github.com/deckarep/golang-set/v2"
118
)
129

13-
// cycleDetector encapsulates coloring state for cycle detection on an
14-
// EntitlementGraph. Node IDs are dense (1..NextNodeID), so slices are used for
15-
// O(1) access and zero per-op allocations.
16-
type cycleDetector struct {
17-
g *EntitlementGraph
18-
state []uint8
19-
parent []int
20-
}
21-
22-
func newCycleDetector(g *EntitlementGraph) *cycleDetector {
23-
cd := &cycleDetector{
24-
g: g,
25-
state: make([]uint8, g.NextNodeID+1),
26-
parent: make([]int, g.NextNodeID+1),
10+
// GetFirstCycle given an entitlements graph, return a cycle by node ID if it
11+
// exists. Returns nil if no cycle exists. If there is a single
12+
// node pointing to itself, that will count as a cycle.
13+
func (g *EntitlementGraph) GetFirstCycle(ctx context.Context) []int {
14+
if g.HasNoCycles {
15+
return nil
2716
}
28-
for i := range cd.parent {
29-
cd.parent[i] = -1
17+
comps := g.ComputeCyclicComponents(ctx)
18+
if len(comps) == 0 {
19+
return nil
3020
}
31-
return cd
21+
return comps[0]
3222
}
3323

34-
// dfs performs a coloring-based DFS from u, returning the first detected cycle
35-
// as a slice of node IDs or nil if no cycle is reachable from u.
36-
func (cd *cycleDetector) dfs(u int) ([]int, bool) {
37-
// Self-loop fast path.
38-
if nbrs, ok := cd.g.SourcesToDestinations[u]; ok {
39-
if _, ok := nbrs[u]; ok {
40-
return []int{u}, true
41-
}
42-
}
43-
44-
cd.state[u] = colorGray
45-
if nbrs, ok := cd.g.SourcesToDestinations[u]; ok {
46-
for v := range nbrs {
47-
switch cd.state[v] {
48-
case colorWhite:
49-
cd.parent[v] = u
50-
if cyc, ok := cd.dfs(v); ok {
51-
return cyc, true
52-
}
53-
case colorGray:
54-
// Back-edge to a node on the current recursion stack.
55-
// Reconstruct cycle by walking parents from u back to v (inclusive), then reverse.
56-
cycle := make([]int, 0, 8)
57-
for x := u; ; x = cd.parent[x] {
58-
cycle = append(cycle, x)
59-
if x == v || cd.parent[x] == -1 {
60-
break
61-
}
62-
}
63-
for i, j := 0, len(cycle)-1; i < j; i, j = i+1, j-1 {
64-
cycle[i], cycle[j] = cycle[j], cycle[i]
65-
}
66-
return cycle, true
67-
}
68-
}
24+
// HasCycles returns true if the graph contains any cycle.
25+
func (g *EntitlementGraph) HasCycles(ctx context.Context) bool {
26+
if g.HasNoCycles {
27+
return false
6928
}
70-
cd.state[u] = colorBlack
71-
return nil, false
29+
return len(g.ComputeCyclicComponents(ctx)) > 0
7230
}
7331

74-
// FindAny scans all nodes and returns the first detected cycle or nil if none exist.
75-
func (cd *cycleDetector) FindAny() []int {
76-
for nodeID := range cd.g.Nodes {
77-
if cd.state[nodeID] != colorWhite {
78-
continue
79-
}
80-
if cyc, ok := cd.dfs(nodeID); ok {
81-
return cyc
32+
func (g *EntitlementGraph) cycleDetectionHelper(
33+
nodeID int,
34+
) ([]int, bool) {
35+
reach := g.reachableFrom(nodeID)
36+
if len(reach) == 0 {
37+
return nil, false
38+
}
39+
adj := g.toAdjacency(reach)
40+
groups := scc.CondenseFWBWGroupsFromAdj(context.Background(), adj, scc.DefaultOptions())
41+
for _, comp := range groups {
42+
if len(comp) > 1 || (len(comp) == 1 && adj[comp[0]][comp[0]] != 0) {
43+
return comp, true
8244
}
8345
}
84-
return nil
46+
return nil, false
8547
}
8648

87-
// FindFrom starts cycle detection from a specific node and returns the first
88-
// cycle reachable from that node, or nil,false if none.
89-
func (cd *cycleDetector) FindFrom(start int) ([]int, bool) {
90-
return cd.dfs(start)
49+
func (g *EntitlementGraph) FixCycles(ctx context.Context) error {
50+
return g.FixCyclesFromComponents(ctx, g.ComputeCyclicComponents(ctx))
9151
}
9252

93-
// GetFirstCycle given an entitlements graph, return a cycle by node ID if it
94-
// exists. Returns nil if no cycle exists. If there is a single
95-
// node pointing to itself, that will count as a cycle.
96-
func (g *EntitlementGraph) GetFirstCycle() []int {
53+
// ComputeCyclicComponents runs SCC once and returns only cyclic components.
54+
// A component is cyclic if len>1 or a singleton with a self-loop.
55+
func (g *EntitlementGraph) ComputeCyclicComponents(ctx context.Context) [][]int {
9756
if g.HasNoCycles {
9857
return nil
9958
}
100-
cd := newCycleDetector(g)
101-
return cd.FindAny()
102-
}
103-
104-
func (g *EntitlementGraph) cycleDetectionHelper(
105-
nodeID int,
106-
) ([]int, bool) {
107-
// Thin wrapper around the coloring-based DFS, starting from a specific node.
108-
// The provided visited/currentCycle are ignored here; coloring provides the
109-
// necessary state for correctness and performance.
110-
cd := newCycleDetector(g)
111-
return cd.FindFrom(nodeID)
59+
adj := g.toAdjacency(nil)
60+
groups := scc.CondenseFWBWGroupsFromAdj(ctx, adj, scc.DefaultOptions())
61+
cyclic := make([][]int, 0)
62+
for _, comp := range groups {
63+
if len(comp) > 1 || (len(comp) == 1 && adj[comp[0]][comp[0]] != 0) {
64+
cyclic = append(cyclic, comp)
65+
}
66+
}
67+
return cyclic
11268
}
11369

11470
// removeNode obliterates a node and all incoming/outgoing edges.
@@ -145,30 +101,33 @@ func (g *EntitlementGraph) removeNode(nodeID int) {
145101
delete(g.SourcesToDestinations, nodeID)
146102
}
147103

148-
// FixCycles if any cycles of nodes exist, merge all nodes in that cycle into a
149-
// single node and then repeat. Iteration ends when there are no more cycles.
150-
func (g *EntitlementGraph) FixCycles() error {
104+
// FixCyclesFromComponents merges all provided cyclic components in one pass.
105+
func (g *EntitlementGraph) FixCyclesFromComponents(ctx context.Context, cyclic [][]int) error {
151106
if g.HasNoCycles {
152107
return nil
153108
}
154-
cycle := g.GetFirstCycle()
155-
if cycle == nil {
109+
if len(cyclic) == 0 {
156110
g.HasNoCycles = true
157111
return nil
158112
}
159-
160-
if err := g.fixCycle(cycle); err != nil {
161-
return err
113+
for _, comp := range cyclic {
114+
select {
115+
case <-ctx.Done():
116+
return ctx.Err()
117+
default:
118+
}
119+
if err := g.fixCycle(comp); err != nil {
120+
return err
121+
}
162122
}
163-
164-
// Recurse!
165-
return g.FixCycles()
123+
g.HasNoCycles = true
124+
return nil
166125
}
167126

168127
// fixCycle takes a list of Node IDs that form a cycle and merges them into a
169128
// single, new node.
170129
func (g *EntitlementGraph) fixCycle(nodeIDs []int) error {
171-
entitlementIDs := mapset.NewSet[string]()
130+
entitlementIDs := mapset.NewThreadUnsafeSet[string]()
172131
outgoingEdgesToResourceTypeIDs := map[int]mapset.Set[string]{}
173132
incomingEdgesToResourceTypeIDs := map[int]mapset.Set[string]{}
174133
for _, nodeID := range nodeIDs {
@@ -184,7 +143,7 @@ func (g *EntitlementGraph) fixCycle(nodeIDs []int) error {
184143
if edge, ok := g.Edges[edgeID]; ok {
185144
resourceTypeIDs, ok := incomingEdgesToResourceTypeIDs[sourceNodeID]
186145
if !ok {
187-
resourceTypeIDs = mapset.NewSet[string]()
146+
resourceTypeIDs = mapset.NewThreadUnsafeSet[string]()
188147
}
189148
for _, resourceTypeID := range edge.ResourceTypeIDs {
190149
resourceTypeIDs.Add(resourceTypeID)
@@ -200,7 +159,7 @@ func (g *EntitlementGraph) fixCycle(nodeIDs []int) error {
200159
if edge, ok := g.Edges[edgeID]; ok {
201160
resourceTypeIDs, ok := outgoingEdgesToResourceTypeIDs[destinationNodeID]
202161
if !ok {
203-
resourceTypeIDs = mapset.NewSet[string]()
162+
resourceTypeIDs = mapset.NewThreadUnsafeSet[string]()
204163
}
205164
for _, resourceTypeID := range edge.ResourceTypeIDs {
206165
resourceTypeIDs.Add(resourceTypeID)

pkg/sync/expand/cycle_benchmark_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,16 @@ func BenchmarkCycleDetectionHelper(b *testing.B) {
151151
}
152152

153153
func BenchmarkGetFirstCycle(b *testing.B) {
154+
ctx, cancel := context.WithCancel(context.Background())
155+
defer cancel()
154156
sizes := []int{100, 1000}
155157

156158
for _, n := range sizes {
157159
b.Run(fmt.Sprintf("ring-%d", n), func(b *testing.B) {
158160
g := buildRing(b, n)
159161
b.ResetTimer()
160162
for i := 0; i < b.N; i++ {
161-
_ = g.GetFirstCycle()
163+
_ = g.GetFirstCycle(ctx)
162164
}
163165
})
164166
}
@@ -168,7 +170,7 @@ func BenchmarkGetFirstCycle(b *testing.B) {
168170
g := buildChain(b, n)
169171
b.ResetTimer()
170172
for i := 0; i < b.N; i++ {
171-
_ = g.GetFirstCycle()
173+
_ = g.GetFirstCycle(ctx)
172174
}
173175
})
174176
}

pkg/sync/expand/graph.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,3 +309,64 @@ func (g *EntitlementGraph) DeleteEdge(ctx context.Context, srcEntitlementID stri
309309
}
310310
return nil
311311
}
312+
313+
// toAdjacency builds an adjacency map for SCC. If nodesSubset is non-nil, only
314+
// include those nodes (and edges between them). Always include all nodes in the
315+
// subset as keys, even if they have zero outgoing edges.
316+
func (g *EntitlementGraph) toAdjacency(nodesSubset map[int]struct{}) map[int]map[int]int {
317+
adj := make(map[int]map[int]int, len(g.Nodes))
318+
include := func(id int) bool {
319+
if nodesSubset == nil {
320+
return true
321+
}
322+
_, ok := nodesSubset[id]
323+
return ok
324+
}
325+
326+
// Ensure keys for all included nodes.
327+
for id := range g.Nodes {
328+
if include(id) {
329+
adj[id] = make(map[int]int)
330+
}
331+
}
332+
333+
// Add edges where both endpoints are included.
334+
for src, dsts := range g.SourcesToDestinations {
335+
if !include(src) {
336+
continue
337+
}
338+
row := adj[src]
339+
for dst := range dsts {
340+
if include(dst) {
341+
row[dst] = 1
342+
}
343+
}
344+
}
345+
return adj
346+
}
347+
348+
// reachableFrom computes the set of node IDs reachable from start over
349+
// SourcesToDestinations using an iterative BFS.
350+
func (g *EntitlementGraph) reachableFrom(start int) map[int]struct{} {
351+
if _, ok := g.Nodes[start]; !ok {
352+
return nil
353+
}
354+
visited := make(map[int]struct{}, 16)
355+
queue := make([]int, 0, 16)
356+
queue = append(queue, start)
357+
visited[start] = struct{}{}
358+
for len(queue) > 0 {
359+
u := queue[0]
360+
queue = queue[1:]
361+
if nbrs, ok := g.SourcesToDestinations[u]; ok {
362+
for v := range nbrs {
363+
if _, seen := visited[v]; seen {
364+
continue
365+
}
366+
visited[v] = struct{}{}
367+
queue = append(queue, v)
368+
}
369+
}
370+
}
371+
return visited
372+
}

pkg/sync/expand/graph_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ func TestGetFirstCycle(t *testing.T) {
160160
for _, testCase := range testCases {
161161
t.Run(testCase.message, func(t *testing.T) {
162162
graph := parseExpression(t, ctx, testCase.expression)
163-
cycle := graph.GetFirstCycle()
163+
cycle := graph.GetFirstCycle(ctx)
164164
if testCase.expectedCycleSize == 0 {
165165
require.Nil(t, cycle)
166166
} else {
@@ -189,7 +189,7 @@ func TestHandleCycle(t *testing.T) {
189189

190190
graph := parseExpression(t, ctx, testCase.expression)
191191

192-
cycle := graph.GetFirstCycle()
192+
cycle := graph.GetFirstCycle(ctx)
193193
expectedCycles := createNodeIDList(testCase.expectedCycles)
194194
require.NotNil(t, cycle)
195195
found := false
@@ -201,11 +201,11 @@ func TestHandleCycle(t *testing.T) {
201201
}
202202
require.True(t, found)
203203

204-
err := graph.FixCycles()
204+
err := graph.FixCycles(ctx)
205205
require.NoError(t, err, graph.Str())
206206
err = graph.Validate()
207207
require.NoError(t, err)
208-
cycle = graph.GetFirstCycle()
208+
cycle = graph.GetFirstCycle(ctx)
209209
require.Nil(t, cycle)
210210
})
211211
}
@@ -221,7 +221,7 @@ func TestHandleComplexCycle(t *testing.T) {
221221
require.Equal(t, 4, len(graph.Edges))
222222
require.Equal(t, 3, len(graph.GetEntitlements()))
223223

224-
err := graph.FixCycles()
224+
err := graph.FixCycles(ctx)
225225
require.NoError(t, err, graph.Str())
226226
err = graph.Validate()
227227
require.NoError(t, err)
@@ -230,7 +230,7 @@ func TestHandleComplexCycle(t *testing.T) {
230230
require.Equal(t, 0, len(graph.Edges))
231231
require.Equal(t, 3, len(graph.GetEntitlements()))
232232

233-
cycle := graph.GetFirstCycle()
233+
cycle := graph.GetFirstCycle(ctx)
234234
require.Nil(t, cycle)
235235
}
236236

@@ -248,7 +248,7 @@ func TestHandleCliqueCycle(t *testing.T) {
248248
require.Equal(t, 6, len(graph.Edges))
249249
require.Equal(t, 3, len(graph.GetEntitlements()))
250250

251-
err := graph.FixCycles()
251+
err := graph.FixCycles(ctx)
252252
require.NoError(t, err, graph.Str())
253253
err = graph.Validate()
254254
require.NoError(t, err)
@@ -257,7 +257,7 @@ func TestHandleCliqueCycle(t *testing.T) {
257257
require.Equal(t, 0, len(graph.Edges))
258258
require.Equal(t, 3, len(graph.GetEntitlements()))
259259

260-
cycle := graph.GetFirstCycle()
260+
cycle := graph.GetFirstCycle(ctx)
261261
require.Nil(t, cycle)
262262
}
263263
}

0 commit comments

Comments
 (0)