Skip to content

Commit

Permalink
Merge pull request #13650 from wallyworld/resource-download-limit
Browse files Browse the repository at this point in the history
#13650

This follows on from the work done in #13215 which throttled resource downloads from the store.
Here we introduce limits to resource downloads from the controller to the workload nodes (initiated by the unit agents calling `resource-get`).

There are 2 new controller config attributes:
- controller-resource-download-limit
- application-resource-download-limit

The first limits the number of total downloads for any app hosted in a model on the controller.
The latter limits the number of downloads per application per model.
The default values for these limits are 0, meaning no throttling; the new behaviour is opt in.

## QA steps

I deployed the `dummy-resource` charm with slightly modified hooks; the line
`status-set maintenance $(cat $RES_PATH)`
was commented out.

First create a number of machines to run the charm
juju bootstrap 
juju controller-config controller-resource-download-limit=5
juju add-machine -n 20

Optionally turn on debug logging for `juju.resource.resourceadapters`
juju model-config logging-config="juju.resource.resourceadapters=DEBUG"

Let the machines start.
watch -c juju status --color

Then deploy the charm twice with different app name, using a large zip file
juju deploy /path/to/dummy-resource -n 10 --resource dummy=./dummy-resource.zip --to 1,2,3,4,5,6,7,8,9,10
juju deploy /path/to/dummy-resource -n 10 app2 --resource dummy=./dummy-resource.zip --to 11,12,13,14,15,16,17,18,19,20

status will show the message for each unit change to indicate the resource is delivered. Typically a few change at once.
debug-log will show the resource download lock being acquired in batches.

ssh into the machines and check the zip file is present in the unit resources directory.

## Bug reference

https://bugs.launchpad.net/juju/+bug/1940219
  • Loading branch information
jujubot authored Jan 25, 2022
2 parents 0fa9255 + 1676091 commit b9f202e
Show file tree
Hide file tree
Showing 11 changed files with 783 additions and 115 deletions.
23 changes: 22 additions & 1 deletion apiserver/apiserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ type Server struct {
agentRateLimitRate time.Duration
agentRateLimit *ratelimit.Bucket

// resourceLock is used to limit the number of
// concurrent resource downloads to units.
resourceLock resourceadapters.ResourceDownloadLock

// registerIntrospectionHandlers is a function that will
// call a function with (path, http.Handler) tuples. This
// is to support registering the handlers underneath the
Expand Down Expand Up @@ -331,6 +335,7 @@ func newServer(cfg ServerConfig) (_ *Server, err error) {
healthStatus: "starting",
}
srv.updateAgentRateLimiter(controllerConfig)
srv.updateResourceDownloadLimiters(controllerConfig)

// We are able to get the current controller config before subscribing to changes
// because the changes are only ever published in response to an API call,
Expand All @@ -343,6 +348,7 @@ func newServer(cfg ServerConfig) (_ *Server, err error) {
return
}
srv.updateAgentRateLimiter(data.Config)
srv.updateResourceDownloadLimiters(data.Config)
})
if err != nil {
logger.Criticalf("programming error in subscribe function: %v", err)
Expand Down Expand Up @@ -471,6 +477,20 @@ func (srv *Server) updateAgentRateLimiter(cfg controller.Config) {
}
}

func (srv *Server) updateResourceDownloadLimiters(cfg controller.Config) {
srv.mu.Lock()
defer srv.mu.Unlock()
globalLimit := cfg.ControllerResourceDownloadLimit()
appLimit := cfg.ApplicationResourceDownloadLimit()
srv.resourceLock = resourceadapters.NewResourceDownloadLimiter(globalLimit, appLimit)
}

func (srv *Server) getResourceDownloadLimiter() resourceadapters.ResourceDownloadLock {
srv.mu.Lock()
defer srv.mu.Unlock()
return srv.resourceLock
}

type rateClock struct {
clock.Clock
}
Expand Down Expand Up @@ -712,7 +732,8 @@ func (srv *Server) endpoints() []apihttp.Endpoint {
if err != nil {
return nil, nil, errors.Trace(err)
}
opener, err := resourceadapters.NewResourceOpener(resourceadapters.NewResourceOpenerState(st.State), tag.Id())
opener, err := resourceadapters.NewResourceOpener(
resourceadapters.NewResourceOpenerState(st.State), srv.getResourceDownloadLimiter, tag.Id())
if err != nil {
return nil, nil, errors.Trace(err)
}
Expand Down
246 changes: 160 additions & 86 deletions controller/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,17 @@ const (
// ControllerName is the canonical name for the controller
ControllerName = "controller-name"

// ApplicationResourceDownloadLimit limits the number of concurrent resource download
// requests from unit agents which will be served. The limit is per application.
// Use a value of 0 to disable the limit.
ApplicationResourceDownloadLimit = "application-resource-download-limit"

// ControllerResourceDownloadLimit limits the number of concurrent resource download
// requests from unit agents which will be served. The limit is for the combined total
// of all applications on the controller.
// Use a value of 0 to disable the limit.
ControllerResourceDownloadLimit = "controller-resource-download-limit"

// AgentRateLimitMax is the maximum size of the token bucket used to
// ratelimit the agent connections.
AgentRateLimitMax = "agent-ratelimit-max"
Expand Down Expand Up @@ -248,6 +259,14 @@ const (

// Attribute Defaults

// DefaultApplicationResourceDownloadLimit allows unlimited
// resource download requests initiated by a unit agent per application.
DefaultApplicationResourceDownloadLimit = 0

// DefaultControllerResourceDownloadLimit allows unlimited concurrent resource
// download requests initiated by unit agents for any application on the controller.
DefaultControllerResourceDownloadLimit = 0

// DefaultAgentRateLimitMax allows the first 10 agents to connect without
// any issue. After that the rate limiting kicks in.
DefaultAgentRateLimitMax = 10
Expand Down Expand Up @@ -406,6 +425,8 @@ var (
MaxAgentStateSize,
NonSyncedWritesToRaftLog,
MigrationMinionWaitMax,
ApplicationResourceDownloadLimit,
ControllerResourceDownloadLimit,
}

// For backwards compatibility, we must include "anything", "juju-apiserver"
Expand Down Expand Up @@ -451,6 +472,8 @@ var (
MaxAgentStateSize,
NonSyncedWritesToRaftLog,
MigrationMinionWaitMax,
ApplicationResourceDownloadLimit,
ControllerResourceDownloadLimit,
)

// DefaultAuditLogExcludeMethods is the default list of methods to
Expand Down Expand Up @@ -601,6 +624,35 @@ func (c Config) ControllerAPIPort() int {
return value
}

// ApplicationResourceDownloadLimit limits the number of concurrent resource download
// requests from unit agents which will be served. The limit is per application.
func (c Config) ApplicationResourceDownloadLimit() int {
switch v := c[ApplicationResourceDownloadLimit].(type) {
case float64:
return int(v)
case int:
return v
default:
// nil type shows up here
}
return DefaultApplicationResourceDownloadLimit
}

// ControllerResourceDownloadLimit limits the number of concurrent resource download
// requests from unit agents which will be served. The limit is for the combined total
// of all applications on the controller.
func (c Config) ControllerResourceDownloadLimit() int {
switch v := c[ControllerResourceDownloadLimit].(type) {
case float64:
return int(v)
case int:
return v
default:
// nil type shows up here
}
return DefaultControllerResourceDownloadLimit
}

// AgentRateLimitMax is the initial size of the token bucket that is used to
// rate limit agent connections.
func (c Config) AgentRateLimitMax() int {
Expand Down Expand Up @@ -995,6 +1047,16 @@ func Validate(c Config) error {
return errors.Errorf("controller-uuid: expected UUID, got string(%q)", uuid)
}

if v, ok := c[ApplicationResourceDownloadLimit].(int); ok {
if v < 0 {
return errors.Errorf("negative %s (%d) not valid, use 0 to disable the limit", ApplicationResourceDownloadLimit, v)
}
}
if v, ok := c[ControllerResourceDownloadLimit].(int); ok {
if v < 0 {
return errors.Errorf("negative %s (%d) not valid, use 0 to disable the limit", ControllerResourceDownloadLimit, v)
}
}
if v, ok := c[AgentRateLimitMax].(int); ok {
if v < 0 {
return errors.NotValidf("negative %s (%d)", AgentRateLimitMax, v)
Expand Down Expand Up @@ -1237,98 +1299,110 @@ func (c Config) AsSpaceConstraints(spaces *[]string) *[]string {
}

var configChecker = schema.FieldMap(schema.Fields{
AgentRateLimitMax: schema.ForceInt(),
AgentRateLimitRate: schema.TimeDuration(),
AuditingEnabled: schema.Bool(),
AuditLogCaptureArgs: schema.Bool(),
AuditLogMaxSize: schema.String(),
AuditLogMaxBackups: schema.ForceInt(),
AuditLogExcludeMethods: schema.List(schema.String()),
APIPort: schema.ForceInt(),
APIPortOpenDelay: schema.String(),
ControllerAPIPort: schema.ForceInt(),
ControllerName: schema.String(),
StatePort: schema.ForceInt(),
IdentityURL: schema.String(),
IdentityPublicKey: schema.String(),
SetNUMAControlPolicyKey: schema.Bool(),
AutocertURLKey: schema.String(),
AutocertDNSNameKey: schema.String(),
AllowModelAccessKey: schema.Bool(),
MongoMemoryProfile: schema.String(),
JujuDBSnapChannel: schema.String(),
MaxDebugLogDuration: schema.TimeDuration(),
MaxTxnLogSize: schema.String(),
MaxPruneTxnBatchSize: schema.ForceInt(),
MaxPruneTxnPasses: schema.ForceInt(),
AgentLogfileMaxBackups: schema.ForceInt(),
AgentLogfileMaxSize: schema.String(),
ModelLogfileMaxBackups: schema.ForceInt(),
ModelLogfileMaxSize: schema.String(),
ModelLogsSize: schema.String(),
PruneTxnQueryCount: schema.ForceInt(),
PruneTxnSleepTime: schema.String(),
PublicDNSAddress: schema.String(),
JujuHASpace: schema.String(),
JujuManagementSpace: schema.String(),
CAASOperatorImagePath: schema.String(),
CAASImageRepo: schema.String(),
Features: schema.List(schema.String()),
CharmStoreURL: schema.String(),
MeteringURL: schema.String(),
MaxCharmStateSize: schema.ForceInt(),
MaxAgentStateSize: schema.ForceInt(),
NonSyncedWritesToRaftLog: schema.Bool(),
MigrationMinionWaitMax: schema.String(),
AgentRateLimitMax: schema.ForceInt(),
AgentRateLimitRate: schema.TimeDuration(),
AuditingEnabled: schema.Bool(),
AuditLogCaptureArgs: schema.Bool(),
AuditLogMaxSize: schema.String(),
AuditLogMaxBackups: schema.ForceInt(),
AuditLogExcludeMethods: schema.List(schema.String()),
APIPort: schema.ForceInt(),
APIPortOpenDelay: schema.String(),
ControllerAPIPort: schema.ForceInt(),
ControllerName: schema.String(),
StatePort: schema.ForceInt(),
IdentityURL: schema.String(),
IdentityPublicKey: schema.String(),
SetNUMAControlPolicyKey: schema.Bool(),
AutocertURLKey: schema.String(),
AutocertDNSNameKey: schema.String(),
AllowModelAccessKey: schema.Bool(),
MongoMemoryProfile: schema.String(),
JujuDBSnapChannel: schema.String(),
MaxDebugLogDuration: schema.TimeDuration(),
MaxTxnLogSize: schema.String(),
MaxPruneTxnBatchSize: schema.ForceInt(),
MaxPruneTxnPasses: schema.ForceInt(),
AgentLogfileMaxBackups: schema.ForceInt(),
AgentLogfileMaxSize: schema.String(),
ModelLogfileMaxBackups: schema.ForceInt(),
ModelLogfileMaxSize: schema.String(),
ModelLogsSize: schema.String(),
PruneTxnQueryCount: schema.ForceInt(),
PruneTxnSleepTime: schema.String(),
PublicDNSAddress: schema.String(),
JujuHASpace: schema.String(),
JujuManagementSpace: schema.String(),
CAASOperatorImagePath: schema.String(),
CAASImageRepo: schema.String(),
Features: schema.List(schema.String()),
CharmStoreURL: schema.String(),
MeteringURL: schema.String(),
MaxCharmStateSize: schema.ForceInt(),
MaxAgentStateSize: schema.ForceInt(),
NonSyncedWritesToRaftLog: schema.Bool(),
MigrationMinionWaitMax: schema.String(),
ApplicationResourceDownloadLimit: schema.ForceInt(),
ControllerResourceDownloadLimit: schema.ForceInt(),
}, schema.Defaults{
AgentRateLimitMax: schema.Omit,
AgentRateLimitRate: schema.Omit,
APIPort: DefaultAPIPort,
APIPortOpenDelay: DefaultAPIPortOpenDelay,
ControllerAPIPort: schema.Omit,
ControllerName: schema.Omit,
AuditingEnabled: DefaultAuditingEnabled,
AuditLogCaptureArgs: DefaultAuditLogCaptureArgs,
AuditLogMaxSize: fmt.Sprintf("%vM", DefaultAuditLogMaxSizeMB),
AuditLogMaxBackups: DefaultAuditLogMaxBackups,
AuditLogExcludeMethods: DefaultAuditLogExcludeMethods,
StatePort: DefaultStatePort,
IdentityURL: schema.Omit,
IdentityPublicKey: schema.Omit,
SetNUMAControlPolicyKey: DefaultNUMAControlPolicy,
AutocertURLKey: schema.Omit,
AutocertDNSNameKey: schema.Omit,
AllowModelAccessKey: schema.Omit,
MongoMemoryProfile: DefaultMongoMemoryProfile,
JujuDBSnapChannel: DefaultJujuDBSnapChannel,
MaxDebugLogDuration: DefaultMaxDebugLogDuration,
MaxTxnLogSize: fmt.Sprintf("%vM", DefaultMaxTxnLogCollectionMB),
MaxPruneTxnBatchSize: DefaultMaxPruneTxnBatchSize,
MaxPruneTxnPasses: DefaultMaxPruneTxnPasses,
AgentLogfileMaxBackups: DefaultAgentLogfileMaxBackups,
AgentLogfileMaxSize: fmt.Sprintf("%vM", DefaultAgentLogfileMaxSize),
ModelLogfileMaxBackups: DefaultModelLogfileMaxBackups,
ModelLogfileMaxSize: fmt.Sprintf("%vM", DefaultModelLogfileMaxSize),
ModelLogsSize: fmt.Sprintf("%vM", DefaultModelLogsSizeMB),
PruneTxnQueryCount: DefaultPruneTxnQueryCount,
PruneTxnSleepTime: DefaultPruneTxnSleepTime,
PublicDNSAddress: schema.Omit,
JujuHASpace: schema.Omit,
JujuManagementSpace: schema.Omit,
CAASOperatorImagePath: schema.Omit,
CAASImageRepo: schema.Omit,
Features: schema.Omit,
CharmStoreURL: csclient.ServerURL,
MeteringURL: romulus.DefaultAPIRoot,
MaxCharmStateSize: DefaultMaxCharmStateSize,
MaxAgentStateSize: DefaultMaxAgentStateSize,
NonSyncedWritesToRaftLog: DefaultNonSyncedWritesToRaftLog,
MigrationMinionWaitMax: DefaultMigrationMinionWaitMax,
AgentRateLimitMax: schema.Omit,
AgentRateLimitRate: schema.Omit,
APIPort: DefaultAPIPort,
APIPortOpenDelay: DefaultAPIPortOpenDelay,
ControllerAPIPort: schema.Omit,
ControllerName: schema.Omit,
AuditingEnabled: DefaultAuditingEnabled,
AuditLogCaptureArgs: DefaultAuditLogCaptureArgs,
AuditLogMaxSize: fmt.Sprintf("%vM", DefaultAuditLogMaxSizeMB),
AuditLogMaxBackups: DefaultAuditLogMaxBackups,
AuditLogExcludeMethods: DefaultAuditLogExcludeMethods,
StatePort: DefaultStatePort,
IdentityURL: schema.Omit,
IdentityPublicKey: schema.Omit,
SetNUMAControlPolicyKey: DefaultNUMAControlPolicy,
AutocertURLKey: schema.Omit,
AutocertDNSNameKey: schema.Omit,
AllowModelAccessKey: schema.Omit,
MongoMemoryProfile: DefaultMongoMemoryProfile,
JujuDBSnapChannel: DefaultJujuDBSnapChannel,
MaxDebugLogDuration: DefaultMaxDebugLogDuration,
MaxTxnLogSize: fmt.Sprintf("%vM", DefaultMaxTxnLogCollectionMB),
MaxPruneTxnBatchSize: DefaultMaxPruneTxnBatchSize,
MaxPruneTxnPasses: DefaultMaxPruneTxnPasses,
AgentLogfileMaxBackups: DefaultAgentLogfileMaxBackups,
AgentLogfileMaxSize: fmt.Sprintf("%vM", DefaultAgentLogfileMaxSize),
ModelLogfileMaxBackups: DefaultModelLogfileMaxBackups,
ModelLogfileMaxSize: fmt.Sprintf("%vM", DefaultModelLogfileMaxSize),
ModelLogsSize: fmt.Sprintf("%vM", DefaultModelLogsSizeMB),
PruneTxnQueryCount: DefaultPruneTxnQueryCount,
PruneTxnSleepTime: DefaultPruneTxnSleepTime,
PublicDNSAddress: schema.Omit,
JujuHASpace: schema.Omit,
JujuManagementSpace: schema.Omit,
CAASOperatorImagePath: schema.Omit,
CAASImageRepo: schema.Omit,
Features: schema.Omit,
CharmStoreURL: csclient.ServerURL,
MeteringURL: romulus.DefaultAPIRoot,
MaxCharmStateSize: DefaultMaxCharmStateSize,
MaxAgentStateSize: DefaultMaxAgentStateSize,
NonSyncedWritesToRaftLog: DefaultNonSyncedWritesToRaftLog,
MigrationMinionWaitMax: DefaultMigrationMinionWaitMax,
ApplicationResourceDownloadLimit: schema.Omit,
ControllerResourceDownloadLimit: schema.Omit,
})

// ConfigSchema holds information on all the fields defined by
// the config package.
var ConfigSchema = environschema.Fields{
ApplicationResourceDownloadLimit: {
Description: "The maximum number of concurrent resources downloads per application",
Type: environschema.Tint,
},
ControllerResourceDownloadLimit: {
Description: "The maximum number of concurrent resources downloads across all the applications on the controller",
Type: environschema.Tint,
},
AgentRateLimitMax: {
Description: "The maximum size of the token bucket used to ratelimit agent connections",
Type: environschema.Tint,
Expand Down
Loading

0 comments on commit b9f202e

Please sign in to comment.