Skip to content

Commit

Permalink
Merge pull request juju#13139 from hmlanigan/action-txn
Browse files Browse the repository at this point in the history
juju#13139

No root cause on why the "state changing too soon" is still seen after the initial change, there is no exact reproducer. I came close with the QA steps below. The changes will improve the txn over all.

1) When enqueuing an operation, include the expected task count to start with. The operations task count may not be the expected count and can change while an operation is running. It's possible that the operation can be completed once the current task count matches the test count, which is not the same as the expected task count.

2) Add an assertion to check the current count is 1 less than expected count before marking an operation complete.


## QA steps

```console
$ juju bootstrap localhost testme
$ juju deploy ubuntu -n 15
$ juju deploy ntp

# Wait for the ubuntu units to settle.
$ juju add-relation ubuntu ntp

# Once the ntp units have started to install:
$ juju run --all -- "ps aux | grep jujud"

# Repeat a few times if at least one action failure is not seen.

# Should be no errors in the unit logs.
# The completed operation should have the same number of expected and completed tasks.

# Test upgrade after an operation has run via both upgrade and migration.
```

## Bug reference

https://bugs.launchpad.net/juju/+bug/1931567
  • Loading branch information
jujubot authored Jul 9, 2021
2 parents 801b6f3 + 1aadbc3 commit 739a258
Show file tree
Hide file tree
Showing 33 changed files with 360 additions and 106 deletions.
26 changes: 13 additions & 13 deletions apiserver/facades/agent/uniter/uniter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1159,7 +1159,7 @@ func (s *uniterSuite) TestWatchTrustConfigSettingsHash(c *gc.C) {
}

func (s *uniterSuite) TestLogActionMessage(c *gc.C) {
operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
anAction, err := s.Model.AddAction(s.wordpressUnit, operationID, "fakeaction", nil)
c.Assert(err, jc.ErrorIsNil)
Expand Down Expand Up @@ -1193,7 +1193,7 @@ func (s *uniterSuite) TestLogActionMessage(c *gc.C) {
}

func (s *uniterSuite) TestLogActionMessageAborting(c *gc.C) {
operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
anAction, err := s.Model.AddAction(s.wordpressUnit, operationID, "fakeaction", nil)
c.Assert(err, jc.ErrorIsNil)
Expand Down Expand Up @@ -1253,7 +1253,7 @@ func (s *uniterSuite) TestWatchActionNotifications(c *gc.C) {
wc := statetesting.NewStringsWatcherC(c, s.State, resource.(state.StringsWatcher))
wc.AssertNoChange()

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
addedAction, err := s.Model.AddAction(s.wordpressUnit, operationID, "fakeaction", nil)
c.Assert(err, jc.ErrorIsNil)
Expand All @@ -1276,7 +1276,7 @@ func (s *uniterSuite) TestWatchPreexistingActions(c *gc.C) {

c.Assert(s.resources.Count(), gc.Equals, 0)

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
action1, err := s.Model.AddAction(s.wordpressUnit, operationID, "fakeaction", nil)
c.Assert(err, jc.ErrorIsNil)
Expand Down Expand Up @@ -1336,7 +1336,7 @@ func (s *uniterSuite) TestWatchActionNotificationsMalformedUnitName(c *gc.C) {
}

func (s *uniterSuite) TestWatchActionNotificationsNotUnit(c *gc.C) {
operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
action, err := s.Model.AddAction(s.mysqlUnit, operationID, "fakeaction", nil)
c.Assert(err, jc.ErrorIsNil)
Expand Down Expand Up @@ -1709,7 +1709,7 @@ func (s *uniterSuite) TestActions(c *gc.C) {
for i, actionTest := range actionTests {
c.Logf("test %d: %s", i, actionTest.description)

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
a, err := s.Model.AddAction(s.wordpressUnit,
operationID,
Expand Down Expand Up @@ -1759,7 +1759,7 @@ func (s *uniterSuite) TestActionsWrongUnit(c *gc.C) {
}
mysqlUnitFacade := s.newUniterAPI(c, s.State, mysqlUnitAuthorizer)

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
action, err := s.Model.AddAction(s.wordpressUnit, operationID, "fakeaction", nil)
c.Assert(err, jc.ErrorIsNil)
Expand All @@ -1775,7 +1775,7 @@ func (s *uniterSuite) TestActionsWrongUnit(c *gc.C) {
}

func (s *uniterSuite) TestActionsPermissionDenied(c *gc.C) {
operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
action, err := s.Model.AddAction(s.mysqlUnit, operationID, "fakeaction", nil)
c.Assert(err, jc.ErrorIsNil)
Expand All @@ -1798,7 +1798,7 @@ func (s *uniterSuite) TestFinishActionsSuccess(c *gc.C) {
c.Assert(err, jc.ErrorIsNil)
c.Assert(results, gc.DeepEquals, ([]state.Action)(nil))

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
action, err := s.Model.AddAction(s.wordpressUnit, operationID, testName, nil)
c.Assert(err, jc.ErrorIsNil)
Expand Down Expand Up @@ -1832,7 +1832,7 @@ func (s *uniterSuite) TestFinishActionsFailure(c *gc.C) {
c.Assert(err, jc.ErrorIsNil)
c.Assert(results, gc.DeepEquals, ([]state.Action)(nil))

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
action, err := s.Model.AddAction(s.wordpressUnit, operationID, testName, nil)
c.Assert(err, jc.ErrorIsNil)
Expand Down Expand Up @@ -1860,7 +1860,7 @@ func (s *uniterSuite) TestFinishActionsFailure(c *gc.C) {
}

func (s *uniterSuite) TestFinishActionsAuthAccess(c *gc.C) {
operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 2)
c.Assert(err, jc.ErrorIsNil)
good, err := s.Model.AddAction(s.wordpressUnit, operationID, "fakeaction", nil)
c.Assert(err, jc.ErrorIsNil)
Expand Down Expand Up @@ -1904,7 +1904,7 @@ func (s *uniterSuite) TestFinishActionsAuthAccess(c *gc.C) {

func (s *uniterSuite) TestBeginActions(c *gc.C) {
ten_seconds_ago := time.Now().Add(-10 * time.Second)
operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
good, err := s.Model.AddAction(s.wordpressUnit, operationID, "fakeaction", nil)
c.Assert(err, jc.ErrorIsNil)
Expand Down Expand Up @@ -5724,7 +5724,7 @@ func (s *uniterV14Suite) TestWatchActionNotificationsLegacy(c *gc.C) {
wc := statetesting.NewStringsWatcherC(c, s.State, resource.(state.StringsWatcher))
wc.AssertNoChange()

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
addedAction, err := s.Model.AddAction(s.wordpressUnit, operationID, "fakeaction", nil)
c.Assert(err, jc.ErrorIsNil)
Expand Down
2 changes: 1 addition & 1 deletion apiserver/facades/client/action/action_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ func (s *actionSuite) TestWatchActionProgress(c *gc.C) {
c.Assert(err, jc.ErrorIsNil)
assertReadyToTest(c, unit)

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
added, err := s.Model.AddAction(unit, operationID, "fakeaction", nil)
c.Assert(err, jc.ErrorIsNil)
Expand Down
8 changes: 4 additions & 4 deletions apiserver/facades/client/action/legacy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func (s *actionSuite) TestListAll(c *gc.C) {
c.Assert(err, jc.ErrorIsNil)
assertReadyToTest(c, unit)

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
// add each action from the test case.
for j, act := range group.Actions {
Expand Down Expand Up @@ -124,7 +124,7 @@ func (s *actionSuite) TestListPending(c *gc.C) {
c.Assert(err, jc.ErrorIsNil)
assertReadyToTest(c, unit)

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
// add each action from the test case.
for _, act := range group.Actions {
Expand Down Expand Up @@ -189,7 +189,7 @@ func (s *actionSuite) TestListRunning(c *gc.C) {
c.Assert(err, jc.ErrorIsNil)
assertReadyToTest(c, unit)

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
// add each action from the test case.
for _, act := range group.Actions {
Expand Down Expand Up @@ -250,7 +250,7 @@ func (s *actionSuite) TestListCompleted(c *gc.C) {
c.Assert(err, jc.ErrorIsNil)
assertReadyToTest(c, unit)

operationID, err := s.Model.EnqueueOperation("a test")
operationID, err := s.Model.EnqueueOperation("a test", 1)
c.Assert(err, jc.ErrorIsNil)
// add each action from the test case.
for _, act := range group.Actions {
Expand Down
2 changes: 1 addition & 1 deletion apiserver/facades/client/action/operation.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func (a *ActionAPI) enqueue(arg params.Actions) (string, params.ActionResults, e
}
}
summary := fmt.Sprintf("%v run on %v", operationName, strings.Join(receivers, ","))
operationID, err := a.model.EnqueueOperation(summary)
operationID, err := a.model.EnqueueOperation(summary, len(receivers))
if err != nil {
return "", params.ActionResults{}, errors.Annotate(err, "creating operation for actions")
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ require (
github.com/juju/clock v0.0.0-20190205081909-9c5c9712527c
github.com/juju/cmd v0.0.0-20200108104440-8e43f3faa5c9
github.com/juju/collections v0.0.0-20200605021417-0d0ec82b7271
github.com/juju/description/v3 v3.0.0-20210507010228-732e570aa3b2
github.com/juju/description/v3 v3.0.0-20210709162012-5f861fa82eab
github.com/juju/errors v0.0.0-20200330140219-3fe23663418f
github.com/juju/featureflag v0.0.0-20200423045028-e2f9e1cb1611
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,8 @@ github.com/juju/collections v0.0.0-20180516022642-90152009b5f3/go.mod h1:Ep+c0vn
github.com/juju/collections v0.0.0-20180717171555-9be91dc79b7c/go.mod h1:Ep+c0vnxsgmmTtsMibPgEEleZyi0b4uVvyzJ+8ka9EI=
github.com/juju/collections v0.0.0-20200605021417-0d0ec82b7271 h1:4R626WTwa7pRYQFiIRLVPepMhm05eZMEx+wIurRnMLc=
github.com/juju/collections v0.0.0-20200605021417-0d0ec82b7271/go.mod h1:5XgO71dV1JClcOJE+4dzdn4HrI5LiyKd7PlVG6eZYhY=
github.com/juju/description/v3 v3.0.0-20210507010228-732e570aa3b2 h1:tukD7SKKPppBRcMJEktXJrOflQS3Dvei0sHBDmKuLOM=
github.com/juju/description/v3 v3.0.0-20210507010228-732e570aa3b2/go.mod h1:l94w+OBJ7b3BAfIInxoX6FHxzERxBtLh9FNABsGNO2U=
github.com/juju/description/v3 v3.0.0-20210709162012-5f861fa82eab h1:F6p6nvVCiS6V2Lbxg3bneKNmX1VVO0BEF6rnQyTILfI=
github.com/juju/description/v3 v3.0.0-20210709162012-5f861fa82eab/go.mod h1:l94w+OBJ7b3BAfIInxoX6FHxzERxBtLh9FNABsGNO2U=
github.com/juju/errors v0.0.0-20150916125642-1b5e39b83d18/go.mod h1:W54LbzXuIE0boCoNJfwqpmkKJ1O4TCTZMetAt6jGk7Q=
github.com/juju/errors v0.0.0-20180726005433-812b06ada177/go.mod h1:W54LbzXuIE0boCoNJfwqpmkKJ1O4TCTZMetAt6jGk7Q=
github.com/juju/errors v0.0.0-20190930114154-d42613fe1ab9/go.mod h1:W54LbzXuIE0boCoNJfwqpmkKJ1O4TCTZMetAt6jGk7Q=
Expand Down
32 changes: 22 additions & 10 deletions state/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,11 @@ func (a *action) Begin() (Action, error) {
return nil, errors.Trace(err)
}
}
if err == nil && parentOperation.(*operation).doc.Status == ActionPending {
parentOpDetails, ok := parentOperation.(*operation)
if !ok {
return nil, errors.Errorf("parentOperation must be of type operation")
}
if err == nil && parentOpDetails.doc.Status == ActionPending {
updateOperationOp = &txn.Op{
C: operationsC,
Id: a.st.docID(parentOperation.Id()),
Expand Down Expand Up @@ -424,13 +428,17 @@ func (a *action) removeAndLogBuildTxn(finalStatus ActionStatus, results map[stri
var updateOperationOp *txn.Op
var err error
if parentOperation != nil {
parentOpDetails, ok := parentOperation.(*operation)
if !ok {
return nil, errors.Errorf("parentOperation must be of type operation")
}
if attempt > 0 {
err = parentOperation.Refresh()
if err != nil && !errors.IsNotFound(err) {
return nil, errors.Trace(err)
}
}
tasks := parentOperation.(*operation).taskStatus
tasks := parentOpDetails.taskStatus
statusStats := set.NewStrings(string(finalStatus))
var numComplete int
for _, status := range tasks {
Expand All @@ -439,7 +447,8 @@ func (a *action) removeAndLogBuildTxn(finalStatus ActionStatus, results map[stri
numComplete++
}
}
if numComplete == len(tasks)-1 {
spawnedTaskCount := parentOpDetails.doc.SpawnedTaskCount
if numComplete == spawnedTaskCount-1 {
// Set the operation status based on the individual
// task status values. eg if any task is failed,
// the entire operation is considered failed.
Expand All @@ -451,20 +460,23 @@ func (a *action) removeAndLogBuildTxn(finalStatus ActionStatus, results map[stri
}
}
updateOperationOp = &txn.Op{
C: operationsC,
Id: a.st.docID(parentOperation.Id()),
Assert: assertNotComplete,
C: operationsC,
Id: a.st.docID(parentOperation.Id()),
Assert: append(assertNotComplete,
bson.DocElem{"complete-task-count", bson.D{{"$eq", spawnedTaskCount - 1}}}),
Update: bson.D{{"$set", bson.D{
{"status", finalOperationStatus},
{"completed", completedTime},
{"complete-task-count", numComplete + 1},
{"complete-task-count", spawnedTaskCount},
}}},
}
} else {
updateOperationOp = &txn.Op{
C: operationsC,
Id: a.st.docID(parentOperation.Id()),
Assert: bson.D{{"complete-task-count", bson.D{{"$lt", len(tasks) - 1}}}},
C: operationsC,
Id: a.st.docID(parentOperation.Id()),
Assert: append(assertNotComplete,
bson.DocElem{"complete-task-count",
bson.D{{"$lt", spawnedTaskCount - 1}}}),
Update: bson.D{{"$inc", bson.D{
{"complete-task-count", 1},
}}},
Expand Down
Loading

0 comments on commit 739a258

Please sign in to comment.