Skip to content

Commit d1f920b

Browse files
Merge pull request #3550 from bibibox/fix_bind_failed_r1.9
update pod status when bind error
2 parents b31bb71 + 5e5d871 commit d1f920b

File tree

4 files changed

+17
-7
lines changed

4 files changed

+17
-7
lines changed

pkg/scheduler/api/unschedule_info.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ const (
2626
// PodReasonSchedulable reason in PodScheduled PodCondition means that the scheduler
2727
// can schedule the pod right now, but not bind yet
2828
PodReasonSchedulable = "Schedulable"
29+
// PodReasonSchedulerError reason in PodScheduled PodCondition means that the scheduler
30+
// tried to schedule the pod, but went error when scheduling
31+
// for example bind pod return error.
32+
PodReasonSchedulerError = "SchedulerError"
2933
)
3034

3135
// FitErrors is set of FitError on many nodes

pkg/scheduler/cache/cache.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,9 @@ type DefaultBinder struct {
173173
}
174174

175175
// Bind will send bind request to api server
176-
func (db *DefaultBinder) Bind(kubeClient kubernetes.Interface, tasks []*schedulingapi.TaskInfo) ([]*schedulingapi.TaskInfo, error) {
176+
func (db *DefaultBinder) Bind(kubeClient kubernetes.Interface, tasks []*schedulingapi.TaskInfo) ([]*schedulingapi.TaskInfo, []error) {
177177
var errTasks []*schedulingapi.TaskInfo
178+
var errs []error
178179
for _, task := range tasks {
179180
p := task.Pod
180181
if err := db.kubeclient.CoreV1().Pods(p.Namespace).Bind(context.TODO(),
@@ -188,14 +189,15 @@ func (db *DefaultBinder) Bind(kubeClient kubernetes.Interface, tasks []*scheduli
188189
metav1.CreateOptions{}); err != nil {
189190
klog.Errorf("Failed to bind pod <%v/%v> to node %s : %#v", p.Namespace, p.Name, task.NodeName, err)
190191
errTasks = append(errTasks, task)
192+
errs = append(errs, err)
191193
} else {
192194
db.recorder.Eventf(task.Pod, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", task.Namespace, task.Name, task.NodeName)
193195
metrics.UpdateTaskScheduleDuration(metrics.Duration(p.CreationTimestamp.Time)) // update metrics as soon as pod is bind
194196
}
195197
}
196198

197199
if len(errTasks) > 0 {
198-
return errTasks, fmt.Errorf("failed to bind pods")
200+
return errTasks, errs
199201
}
200202

201203
return nil, nil
@@ -895,11 +897,15 @@ func (sc *SchedulerCache) Evict(taskInfo *schedulingapi.TaskInfo, reason string)
895897
// Bind binds task to the target host.
896898
func (sc *SchedulerCache) Bind(tasks []*schedulingapi.TaskInfo) {
897899
tmp := time.Now()
898-
errTasks, err := sc.Binder.Bind(sc.kubeClient, tasks)
899-
if err == nil {
900+
errTasks, errs := sc.Binder.Bind(sc.kubeClient, tasks)
901+
if errs == nil {
900902
klog.V(3).Infof("bind ok, latency %v", time.Since(tmp))
901903
} else {
902-
for _, task := range errTasks {
904+
for i, task := range errTasks {
905+
unschedulableMsg := fmt.Sprintf("failed to bind to node %s: %s", task.NodeName, errs[i])
906+
if err := sc.taskUnschedulable(task, schedulingapi.PodReasonSchedulerError, unschedulableMsg, ""); err != nil {
907+
klog.ErrorS(err, "Failed to update pod status when bind task error", "task", task.Name)
908+
}
903909
klog.V(2).Infof("resyncTask task %s", task.Name)
904910
sc.VolumeBinder.RevertVolumes(task, task.PodVolumes)
905911
sc.resyncTask(task)

pkg/scheduler/cache/interface.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ type VolumeBinder interface {
9999

100100
// Binder interface for binding task and hostname
101101
type Binder interface {
102-
Bind(kubeClient kubernetes.Interface, tasks []*api.TaskInfo) ([]*api.TaskInfo, error)
102+
Bind(kubeClient kubernetes.Interface, tasks []*api.TaskInfo) ([]*api.TaskInfo, []error)
103103
}
104104

105105
// Evictor interface for evict pods

pkg/scheduler/util/test_utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ type FakeBinder struct {
262262
}
263263

264264
// Bind used by fake binder struct to bind pods
265-
func (fb *FakeBinder) Bind(kubeClient kubernetes.Interface, tasks []*api.TaskInfo) ([]*api.TaskInfo, error) {
265+
func (fb *FakeBinder) Bind(kubeClient kubernetes.Interface, tasks []*api.TaskInfo) ([]*api.TaskInfo, []error) {
266266
fb.Lock()
267267
defer fb.Unlock()
268268
for _, p := range tasks {

0 commit comments

Comments
 (0)