-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenableha.go
382 lines (355 loc) · 11.5 KB
/
enableha.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
// Copyright 2018 Canonical Ltd.
// Licensed under the AGPLv3, see LICENCE file for details.
package state
import (
"github.com/juju/errors"
"github.com/juju/replicaset"
jujutxn "github.com/juju/txn"
"gopkg.in/juju/names.v2"
"gopkg.in/mgo.v2/bson"
"gopkg.in/mgo.v2/txn"
"github.com/juju/juju/constraints"
"github.com/juju/juju/instance"
)
func hasJob(jobs []MachineJob, job MachineJob) bool {
for _, j := range jobs {
if j == job {
return true
}
}
return false
}
var errControllerNotAllowed = errors.New("controller jobs specified but not allowed")
func (st *State) getVotingMachineCount(info *ControllerInfo) (int, error) {
machinesCollection, closer := st.db().GetCollection(machinesC)
defer closer()
hasJobManageModel := bson.M{"$in": []MachineJob{JobManageModel}}
return machinesCollection.Find(
bson.M{
"_id": bson.M{"$in": info.MachineIds},
"jobs": hasJobManageModel,
"novote": false,
},
).Count()
}
// maintainControllersOps returns a set of operations that will maintain
// the controller information when the given machine documents
// are added to the machines collection. If currentInfo is nil,
// there can be only one machine document and it must have
// id 0 (this is a special case to allow adding the bootstrap machine)
func (st *State) maintainControllersOps(mdocs []*machineDoc, currentInfo *ControllerInfo) ([]txn.Op, error) {
var newIds []string
for _, doc := range mdocs {
if !hasJob(doc.Jobs, JobManageModel) {
continue
}
newIds = append(newIds, doc.Id)
}
if len(newIds) == 0 {
return nil, nil
}
if currentInfo == nil {
// Allow bootstrap machine only.
if len(mdocs) != 1 || mdocs[0].Id != "0" {
return nil, errControllerNotAllowed
}
var err error
currentInfo, err = st.ControllerInfo()
if err != nil {
return nil, errors.Annotate(err, "cannot get controller info")
}
if len(currentInfo.MachineIds) > 0 {
return nil, errors.New("controllers already exist")
}
}
ops := []txn.Op{{
C: controllersC,
Id: modelGlobalKey,
Assert: bson.D{
{"machineids", bson.D{{"$size", len(currentInfo.MachineIds)}}},
},
Update: bson.D{
{"$addToSet",
bson.D{
{"machineids", bson.D{{"$each", newIds}}},
},
},
},
}}
return ops, nil
}
// EnableHA adds controller machines as necessary to make
// the number of live controllers equal to numControllers. The given
// constraints and series will be attached to any new machines.
// If placement is not empty, any new machines which may be required are started
// according to the specified placement directives until the placement list is
// exhausted; thereafter any new machines are started according to the constraints and series.
// MachineID is the id of the machine where the apiserver is running.
func (st *State) EnableHA(
numControllers int, cons constraints.Value, series string, placement []string,
) (ControllersChanges, error) {
if numControllers < 0 || (numControllers != 0 && numControllers%2 != 1) {
return ControllersChanges{}, errors.New("number of controllers must be odd and non-negative")
}
if numControllers > replicaset.MaxPeers {
return ControllersChanges{}, errors.Errorf("controller count is too large (allowed %d)", replicaset.MaxPeers)
}
var change ControllersChanges
buildTxn := func(attempt int) ([]txn.Op, error) {
currentInfo, err := st.ControllerInfo()
if err != nil {
return nil, errors.Trace(err)
}
desiredControllerCount := numControllers
votingCount, err := st.getVotingMachineCount(currentInfo)
if err != nil {
return nil, errors.Trace(err)
}
if desiredControllerCount == 0 {
// Make sure we go to add odd number of desired voters. Even if HA was currently at 2 desired voters
desiredControllerCount = votingCount + (votingCount+1)%2
if desiredControllerCount <= 1 {
desiredControllerCount = 3
}
}
if votingCount > desiredControllerCount {
return nil, errors.New("cannot reduce controller count")
}
intent, err := st.enableHAIntentions(currentInfo, placement)
if err != nil {
return nil, err
}
voteCount := 0
for _, m := range intent.maintain {
if m.WantsVote() {
voteCount++
}
}
if voteCount == desiredControllerCount {
return nil, jujutxn.ErrNoOperations
}
// Promote as many machines as we can to fulfil the shortfall.
if n := desiredControllerCount - voteCount; n < len(intent.promote) {
intent.promote = intent.promote[:n]
}
voteCount += len(intent.promote)
if n := desiredControllerCount - voteCount; n < len(intent.convert) {
intent.convert = intent.convert[:n]
}
voteCount += len(intent.convert)
intent.newCount = desiredControllerCount - voteCount
logger.Infof("%d new machines; promoting %v; converting %v", intent.newCount, intent.promote, intent.convert)
var ops []txn.Op
ops, change, err = st.enableHAIntentionOps(intent, currentInfo, cons, series)
return ops, err
}
if err := st.db().Run(buildTxn); err != nil {
err = errors.Annotate(err, "failed to create new controller machines")
return ControllersChanges{}, err
}
return change, nil
}
// Change in controllers after the ensure availability txn has committed.
type ControllersChanges struct {
Added []string
Removed []string
Maintained []string
Promoted []string
Demoted []string
Converted []string
}
// enableHAIntentionOps returns operations to fulfil the desired intent.
func (st *State) enableHAIntentionOps(
intent *enableHAIntent,
currentInfo *ControllerInfo,
cons constraints.Value,
series string,
) ([]txn.Op, ControllersChanges, error) {
var ops []txn.Op
var change ControllersChanges
for _, m := range intent.promote {
ops = append(ops, promoteControllerOps(m)...)
change.Promoted = append(change.Promoted, m.doc.Id)
}
for _, m := range intent.convert {
ops = append(ops, convertControllerOps(m)...)
change.Converted = append(change.Converted, m.doc.Id)
}
// Use any placement directives that have been provided
// when adding new machines, until the directives have
// been all used up. Ignore constraints for provided machines.
// Set up a helper function to do the work required.
placementCount := 0
getPlacementConstraints := func() (string, constraints.Value) {
if placementCount >= len(intent.placement) {
return "", cons
}
result := intent.placement[placementCount]
placementCount++
return result, constraints.Value{}
}
mdocs := make([]*machineDoc, intent.newCount)
for i := range mdocs {
placement, cons := getPlacementConstraints()
template := MachineTemplate{
Series: series,
Jobs: []MachineJob{
JobHostUnits,
JobManageModel,
},
Constraints: cons,
Placement: placement,
}
mdoc, addOps, err := st.addMachineOps(template)
if err != nil {
return nil, ControllersChanges{}, err
}
mdocs[i] = mdoc
ops = append(ops, addOps...)
change.Added = append(change.Added, mdoc.Id)
}
for _, m := range intent.maintain {
tag, err := names.ParseTag(m.Tag().String())
if err != nil {
return nil, ControllersChanges{}, errors.Annotate(err, "could not parse machine tag")
}
if tag.Kind() != names.MachineTagKind {
return nil, ControllersChanges{}, errors.Errorf("expected machine tag kind, got %s", tag.Kind())
}
change.Maintained = append(change.Maintained, tag.Id())
}
ssOps, err := st.maintainControllersOps(mdocs, currentInfo)
if err != nil {
return nil, ControllersChanges{}, errors.Annotate(err, "cannot prepare machine add operations")
}
ops = append(ops, ssOps...)
return ops, change, nil
}
type enableHAIntent struct {
newCount int
placement []string
promote, maintain, convert []*Machine
}
// enableHAIntentions returns what we would like
// to do to maintain the availability of the existing servers
// mentioned in the given info, including:
// gathering available, non-voting machines that may be promoted;
func (st *State) enableHAIntentions(info *ControllerInfo, placement []string) (*enableHAIntent, error) {
var intent enableHAIntent
for _, s := range placement {
// TODO(natefinch): unscoped placements shouldn't ever get here (though
// they do currently). We should fix up the CLI to always add a scope
// to placements and then we can remove the need to deal with unscoped
// placements.
p, err := instance.ParsePlacement(s)
if err == instance.ErrPlacementScopeMissing {
intent.placement = append(intent.placement, s)
continue
}
if err == nil && p.Scope == instance.MachineScope {
if names.IsContainerMachine(p.Directive) {
return nil, errors.New("container placement directives not supported")
}
m, err := st.Machine(p.Directive)
if err != nil {
return nil, errors.Annotatef(err, "can't find machine for placement directive %q", s)
}
if m.IsManager() {
return nil, errors.Errorf("machine for placement directive %q is already a controller", s)
}
intent.convert = append(intent.convert, m)
intent.placement = append(intent.placement, s)
continue
}
return nil, errors.Errorf("unsupported placement directive %q", s)
}
for _, mid := range info.MachineIds {
m, err := st.Machine(mid)
if err != nil {
return nil, err
}
logger.Infof("machine %q, wants vote %v, has vote %v", m, m.WantsVote(), m.HasVote())
if m.WantsVote() {
intent.maintain = append(intent.maintain, m)
} else {
intent.promote = append(intent.promote, m)
}
}
logger.Infof("initial intentions: promote %v; maintain %v; convert: %v",
intent.promote, intent.maintain, intent.convert)
return &intent, nil
}
func convertControllerOps(m *Machine) []txn.Op {
return []txn.Op{{
C: machinesC,
Id: m.doc.DocID,
Update: bson.D{
{"$addToSet", bson.D{{"jobs", JobManageModel}}},
{"$set", bson.D{{"novote", false}}},
},
Assert: bson.D{{"jobs", bson.D{{"$nin", []MachineJob{JobManageModel}}}}},
}, {
C: controllersC,
Id: modelGlobalKey,
Update: bson.D{
{"$addToSet", bson.D{
{"machineids", m.doc.Id},
}},
},
}}
}
func promoteControllerOps(m *Machine) []txn.Op {
return []txn.Op{{
C: machinesC,
Id: m.doc.DocID,
Assert: bson.D{{"novote", true}},
Update: bson.D{{"$set", bson.D{{"novote", false}}}},
}}
}
func removeControllerOps(m *Machine, controllerInfo *ControllerInfo) []txn.Op {
return []txn.Op{{
C: machinesC,
Id: m.doc.DocID,
Assert: bson.D{
{"novote", true},
{"hasvote", false},
},
Update: bson.D{
{"$pull", bson.D{{"jobs", JobManageModel}}},
},
}, {
C: controllersC,
Id: modelGlobalKey,
Assert: bson.D{{"machineids", controllerInfo.MachineIds}},
Update: bson.D{{"$pull", bson.D{{"machineids", m.doc.Id}}}},
}}
}
// RemoveControllerMachine will remove Machine from being part of the set of Controllers.
// It must not have or want to vote, and it must not be the last controller.
func (st *State) RemoveControllerMachine(m *Machine) error {
logger.Infof("removing controller machine %q", m.doc.Id)
buildTxn := func(attempt int) ([]txn.Op, error) {
if attempt != 0 {
// Something changed, make sure we're still up to date
m.Refresh()
}
if m.WantsVote() {
return nil, errors.Errorf("machine %s cannot be removed as a controller as it still wants to vote", m.Id())
}
if m.HasVote() {
return nil, errors.Errorf("machine %s cannot be removed as a controller as it still has a vote", m.Id())
}
controllerInfo, err := st.ControllerInfo()
if err != nil {
return nil, errors.Trace(err)
}
if len(controllerInfo.MachineIds) <= 1 {
return nil, errors.Errorf("machine %s cannot be removed as it is the last controller", m.Id())
}
return removeControllerOps(m, controllerInfo), nil
}
if err := st.db().Run(buildTxn); err != nil {
return errors.Trace(err)
}
return nil
}