Skip to content

Commit

Permalink
Increase socket timeout to 1 minute during bootstrap
Browse files Browse the repository at this point in the history
We increase the socket timeout during bootstrap to
cope with slow disk I/O during machine initialisation.
Empirical evidence has shown this to decrease the
bootstrap failure rate on Microsoft Azure.

Fixes https://bugs.launchpad.net/juju-core/+bug/1351101
  • Loading branch information
axw committed Aug 8, 2014
1 parent 9742c8d commit 14561dc
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 10 deletions.
4 changes: 2 additions & 2 deletions agent/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ type BootstrapMachineConfig struct {

const BootstrapMachineId = "0"

func InitializeState(c ConfigSetter, envCfg *config.Config, machineCfg BootstrapMachineConfig, timeout mongo.DialOpts, policy state.Policy) (_ *state.State, _ *state.Machine, resultErr error) {
func InitializeState(c ConfigSetter, envCfg *config.Config, machineCfg BootstrapMachineConfig, dialOpts mongo.DialOpts, policy state.Policy) (_ *state.State, _ *state.Machine, resultErr error) {
if c.Tag() != names.NewMachineTag(BootstrapMachineId) {
return nil, nil, fmt.Errorf("InitializeState not called with bootstrap machine's configuration")
}
Expand All @@ -85,7 +85,7 @@ func InitializeState(c ConfigSetter, envCfg *config.Config, machineCfg Bootstrap
info.Password = ""

logger.Debugf("initializing address %v", info.Addrs)
st, err := state.Initialize(info, envCfg, timeout, policy)
st, err := state.Initialize(info, envCfg, dialOpts, policy)
if err != nil {
return nil, nil, fmt.Errorf("failed to initialize state: %v", err)
}
Expand Down
11 changes: 10 additions & 1 deletion cmd/jujud/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,15 @@ func (c *BootstrapCommand) Run(_ *cmd.Context) error {
var m *state.Machine
err = c.ChangeConfig(func(agentConfig agent.ConfigSetter) error {
var stateErr error
dialOpts := mongo.DefaultDialOpts()

// Set a longer socket timeout than usual, as the machine
// will be starting up and disk I/O slower than usual. This
// has been known to cause timeouts in queries.
dialOpts.SocketTimeout = 1 * time.Minute

// We shouldn't attempt to dial peers until we have some.
dialOpts.Direct = true
st, m, stateErr = agent.InitializeState(
agentConfig,
envCfg,
Expand All @@ -146,7 +155,7 @@ func (c *BootstrapCommand) Run(_ *cmd.Context) error {
Characteristics: c.Hardware,
SharedSecret: sharedSecret,
},
mongo.DefaultDialOpts(),
dialOpts,
environs.NewStatePolicy(),
)
return stateErr
Expand Down
28 changes: 27 additions & 1 deletion mongo/open.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ type DialOpts struct {
// a state server.
Timeout time.Duration

// SocketTimeout is the amount of time to wait for a
// non-responding socket to the database before it is
// forcefully closed. If this is zero, Timeout will be
// used.
SocketTimeout time.Duration

// Direct informs whether to establish connections only with the
// specified seed servers, or to obtain information for the whole
// cluster and establish connections with further servers too.
Expand All @@ -45,7 +51,10 @@ type DialOpts struct {
// DefaultDialOpts returns a DialOpts representing the default
// parameters for contacting a state server.
func DefaultDialOpts() DialOpts {
return DialOpts{Timeout: defaultDialTimeout}
return DialOpts{
Timeout: defaultDialTimeout,
SocketTimeout: SocketTimeout,
}
}

// Info encapsulates information about cluster of
Expand Down Expand Up @@ -103,3 +112,20 @@ func DialInfo(info Info, opts DialOpts) (*mgo.DialInfo, error) {
Direct: opts.Direct,
}, nil
}

// DialWithInfo establishes a new session to the cluster identified by info,
// with the specified options.
func DialWithInfo(info Info, opts DialOpts) (*mgo.Session, error) {
dialInfo, err := DialInfo(info, opts)
if err != nil {
return nil, err
}
session, err := mgo.DialWithInfo(dialInfo)
if err != nil {
return nil, err
}
if opts.SocketTimeout != 0 {
session.SetSocketTimeout(opts.SocketTimeout)
}
return session, nil
}
7 changes: 1 addition & 6 deletions state/open.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,8 @@ import (
// Open returns unauthorizedError if access is unauthorized.
func Open(info *authentication.MongoInfo, opts mongo.DialOpts, policy Policy) (*State, error) {
logger.Infof("opening state, mongo addresses: %q; entity %q", info.Addrs, info.Tag)
di, err := mongo.DialInfo(info.Info, opts)
if err != nil {
return nil, err
}
logger.Debugf("dialing mongo")
session, err := mgo.DialWithInfo(di)
session, err := mongo.DialWithInfo(info.Info, opts)
if err != nil {
return nil, err
}
Expand All @@ -58,7 +54,6 @@ func Open(info *authentication.MongoInfo, opts mongo.DialOpts, policy Policy) (*
session.Close()
return nil, err
}
session.SetSocketTimeout(mongo.SocketTimeout)
return st, nil
}

Expand Down

0 comments on commit 14561dc

Please sign in to comment.