Skip to content

Commit 5f3e248

Browse files
author
Monokaix
committed
fix node count reconcile
Signed-off-by: Monokaix <[email protected]>
1 parent aba5c55 commit 5f3e248

File tree

7 files changed

+154
-161
lines changed

7 files changed

+154
-161
lines changed

installer/helm/chart/volcano/templates/controllers.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ rules:
8888
- apiGroups: ["topology.volcano.sh"]
8989
resources: ["hypernodes", "hypernodes/status"]
9090
verbs: ["list", "watch", "get", "create", "delete", "update", "patch"]
91+
- apiGroups: [ "" ]
92+
resources: [ "nodes" ]
93+
verbs: [ "list", "watch" ]
9194
---
9295
kind: ClusterRoleBinding
9396
apiVersion: rbac.authorization.k8s.io/v1

installer/volcano-development.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4493,6 +4493,9 @@ rules:
44934493
- apiGroups: ["topology.volcano.sh"]
44944494
resources: ["hypernodes", "hypernodes/status"]
44954495
verbs: ["list", "watch", "get", "create", "delete", "update", "patch"]
4496+
- apiGroups: [ "" ]
4497+
resources: [ "nodes" ]
4498+
verbs: [ "list", "watch" ]
44964499
---
44974500
# Source: volcano/templates/controllers.yaml
44984501
kind: ClusterRoleBinding

pkg/controllers/hypernode/hypernode_controller.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ type hyperNodeController struct {
5555
hyperNodeInformer topologyinformerv1alpha1.HyperNodeInformer
5656
hyperNodeLister topologylisterv1alpha1.HyperNodeLister
5757
hyperNodeQueue workqueue.TypedRateLimitingInterface[string]
58+
nodeLister listersv1.NodeLister
5859

5960
configMapInformer coreinformers.ConfigMapInformer
6061
configMapLister listersv1.ConfigMapLister
@@ -113,6 +114,7 @@ func (hn *hyperNodeController) Initialize(opt *framework.ControllerOption) error
113114
hn.hyperNodeInformer = hn.vcInformerFactory.Topology().V1alpha1().HyperNodes()
114115
hn.hyperNodeLister = hn.hyperNodeInformer.Lister()
115116
hn.hyperNodeQueue = workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]())
117+
hn.nodeLister = hn.informerFactory.Core().V1().Nodes().Lister()
116118

117119
hn.setConfigMapNamespaceAndName()
118120
hn.setupConfigMapInformer()

pkg/controllers/hypernode/hypernode_handler.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,13 @@ import (
2121

2222
"k8s.io/apimachinery/pkg/api/errors"
2323
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+
"k8s.io/apimachinery/pkg/labels"
25+
"k8s.io/apimachinery/pkg/util/sets"
2426
"k8s.io/client-go/tools/cache"
2527
"k8s.io/klog/v2"
2628

2729
topologyv1alpha1 "volcano.sh/apis/pkg/apis/topology/v1alpha1"
30+
"volcano.sh/volcano/pkg/scheduler/api"
2831
)
2932

3033
func (hn *hyperNodeController) addHyperNode(obj interface{}) {
@@ -98,7 +101,7 @@ func (hn *hyperNodeController) syncHyperNodeStatus(key string) error {
98101
return err
99102
}
100103

101-
nodeCount := len(hyperNode.Spec.Members)
104+
nodeCount := hn.actualNodeCnt(hyperNode)
102105
if hyperNode.Status.NodeCount != int64(nodeCount) {
103106
// Create a deep copy to avoid modifying cache objects
104107
hyperNodeCopy := hyperNode.DeepCopy()
@@ -114,3 +117,16 @@ func (hn *hyperNodeController) syncHyperNodeStatus(key string) error {
114117

115118
return nil
116119
}
120+
121+
func (hn *hyperNodeController) actualNodeCnt(hyperNode *topologyv1alpha1.HyperNode) int {
122+
nodes, err := hn.nodeLister.List(labels.Everything())
123+
if err != nil {
124+
klog.ErrorS(err, "Failed to list nodes", "name", hyperNode.Name)
125+
return 0
126+
}
127+
members := sets.New[string]()
128+
for _, member := range hyperNode.Spec.Members {
129+
members.Insert(api.GetMembers(member.Selector, nodes).UnsortedList()...)
130+
}
131+
return len(members)
132+
}

0 commit comments

Comments
 (0)