Skip to content

Commit

Permalink
Merge pull request #50028 from julia-stripe/fix-incorrect-scheduler-b…
Browse files Browse the repository at this point in the history
…ind-call

Automatic merge from submit-queue

Fix incorrect call to 'bind' in scheduler

I previously submitted #49661 -- I'm not sure if that PR is too big or what, but this is an attempt at a smaller PR that makes progress on the same issue and is easier to review.

**What this PR does / why we need it**:

In this refactor (ecb962e6585#diff-67f2b61521299ca8d8687b0933bbfb19R223) the scheduler code was refactored into separate `bind` and `assume` functions. When that happened, `bind` was called with `pod` as an argument. The argument to `bind` should be the assumed pod, not the original pod. Evidence that `assumedPod` is the correct argument bind and not `pod`: https://github.com/kubernetes/kubernetes/blob/80f26fa8a89ef5863cb19c71a620bb389d025166/plugin/pkg/scheduler/scheduler.go#L229-L234. (and it says `assumed` in the function signature for `bind`, even though it's not called with the assumed pod as an argument).

This is an issue (and causes #49314, where pods that fail to bind to a node get stuck indefinitely) in the following scenario:

1. The pod fails to bind to the node
2. `bind` calls `ForgetPod` with the `pod` argument
3. since `ForgetPod` is expecting the assumed pod as an argument (because that's what's in the scheduler cache), it fails with an error like `scheduler cache ForgetPod failed: pod test-677550-rc-edit-namespace/nginx-jvn09 state was assumed on a different node`
4. The pod gets lost forever because of some incomplete error handling (which I haven't addressed here in the interest of making a simpler PR)

In this PR I've fixed the call to `bind` and modified the tests to make sure that `ForgetPod` gets called with the correct argument (the assumed pod) when binding fails.

**Which issue this PR fixes**: fixes #49314

**Special notes for your reviewer**:

**Release note**:

```release-note
```
  • Loading branch information
Kubernetes Submit Queue authored Aug 4, 2017
2 parents 4a8d041 + d584bf4 commit 898b1b3
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 9 deletions.
16 changes: 9 additions & 7 deletions plugin/pkg/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,14 +185,14 @@ func (sched *Scheduler) schedule(pod *v1.Pod) (string, error) {
}

// assume signals to the cache that a pod is already in the cache, so that binding can be asnychronous.
func (sched *Scheduler) assume(pod *v1.Pod, host string) error {
// assume modifies `assumed`.
func (sched *Scheduler) assume(assumed *v1.Pod, host string) error {
// Optimistically assume that the binding will succeed and send it to apiserver
// in the background.
// If the binding fails, scheduler will release resources allocated to assumed pod
// immediately.
assumed := *pod
assumed.Spec.NodeName = host
if err := sched.config.SchedulerCache.AssumePod(&assumed); err != nil {
if err := sched.config.SchedulerCache.AssumePod(assumed); err != nil {
glog.Errorf("scheduler cache AssumePod failed: %v", err)
// TODO: This means that a given pod is already in cache (which means it
// is either assumed or already added). This is most probably result of a
Expand All @@ -207,7 +207,7 @@ func (sched *Scheduler) assume(pod *v1.Pod, host string) error {
// predicates in equivalence cache.
// If the binding fails, these invalidated item will not break anything.
if sched.config.Ecache != nil {
sched.config.Ecache.InvalidateCachedPredicateItemForPodAdd(pod, host)
sched.config.Ecache.InvalidateCachedPredicateItemForPodAdd(assumed, host)
}
return nil
}
Expand Down Expand Up @@ -264,15 +264,17 @@ func (sched *Scheduler) scheduleOne() {

// Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
// This allows us to keep scheduling without waiting on binding to occur.
err = sched.assume(pod, suggestedHost)
assumedPod := *pod
// assume modifies `assumedPod` by setting NodeName=suggestedHost
err = sched.assume(&assumedPod, suggestedHost)
if err != nil {
return
}

// bind the pod to its host asynchronously (we can do this b/c of the assumption step above).
go func() {
err := sched.bind(pod, &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name, UID: pod.UID},
err := sched.bind(&assumedPod, &v1.Binding{
ObjectMeta: metav1.ObjectMeta{Namespace: assumedPod.Namespace, Name: assumedPod.Name, UID: assumedPod.UID},
Target: v1.ObjectReference{
Kind: "Node",
Name: suggestedHost,
Expand Down
11 changes: 10 additions & 1 deletion plugin/pkg/scheduler/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ func TestScheduler(t *testing.T) {
sendPod *v1.Pod
algo algorithm.ScheduleAlgorithm
expectErrorPod *v1.Pod
expectForgetPod *v1.Pod
expectAssumedPod *v1.Pod
expectError error
expectBind *v1.Binding
Expand All @@ -139,7 +140,8 @@ func TestScheduler(t *testing.T) {
expectAssumedPod: podWithID("foo", testNode.Name),
injectBindError: errB,
expectError: errB,
expectErrorPod: podWithID("foo", ""),
expectErrorPod: podWithID("foo", testNode.Name),
expectForgetPod: podWithID("foo", testNode.Name),
eventReason: "FailedScheduling",
}, {
sendPod: deletingPod("foo"),
Expand All @@ -151,11 +153,15 @@ func TestScheduler(t *testing.T) {
for i, item := range table {
var gotError error
var gotPod *v1.Pod
var gotForgetPod *v1.Pod
var gotAssumedPod *v1.Pod
var gotBinding *v1.Binding
configurator := &FakeConfigurator{
Config: &Config{
SchedulerCache: &schedulertesting.FakeCache{
ForgetFunc: func(pod *v1.Pod) {
gotForgetPod = pod
},
AssumeFunc: func(pod *v1.Pod) {
gotAssumedPod = pod
},
Expand Down Expand Up @@ -196,6 +202,9 @@ func TestScheduler(t *testing.T) {
if e, a := item.expectErrorPod, gotPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error pod: wanted %v, got %v", i, e, a)
}
if e, a := item.expectForgetPod, gotForgetPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: forget pod: wanted %v, got %v", i, e, a)
}
if e, a := item.expectError, gotError; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error: wanted %v, got %v", i, e, a)
}
Expand Down
6 changes: 5 additions & 1 deletion plugin/pkg/scheduler/testing/fake_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
// FakeCache is used for testing
type FakeCache struct {
AssumeFunc func(*v1.Pod)
ForgetFunc func(*v1.Pod)
}

func (f *FakeCache) AssumePod(pod *v1.Pod) error {
Expand All @@ -34,7 +35,10 @@ func (f *FakeCache) AssumePod(pod *v1.Pod) error {

func (f *FakeCache) FinishBinding(pod *v1.Pod) error { return nil }

func (f *FakeCache) ForgetPod(pod *v1.Pod) error { return nil }
func (f *FakeCache) ForgetPod(pod *v1.Pod) error {
f.ForgetFunc(pod)
return nil
}

func (f *FakeCache) AddPod(pod *v1.Pod) error { return nil }

Expand Down

0 comments on commit 898b1b3

Please sign in to comment.