Skip to content

Commit

Permalink
feat(cli): automatically roll back stacks if necessary (aws#31920)
Browse files Browse the repository at this point in the history
If a user is deploying with `--no-rollback`, and the stack contains replacements (or the `--no-rollback` flag is dropped), then a rollback needs to be performed before a regular deployment can happen again.

In this PR, we add a prompt where we ask the user to confirm that they are okay with performing a rollback and then a normal deployment.

The way this works is that `deployStack` detects a disallowed combination (replacement and no-rollback, or being in a stuck state and not being called with no-rollback), and returns a special status code. The driver of the calls, `CdkToolkit`, will see those special return codes, prompt the user, and retry.

Also get rid of a stray `Stack undefined` that gets printed to the console.

Closes aws#30546, Closes aws#31685

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
rix0rrr authored Nov 5, 2024
1 parent b3de7e6 commit 2f9fb1e
Show file tree
Hide file tree
Showing 17 changed files with 500 additions and 106 deletions.
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
const cdk = require('aws-cdk-lib');
const lambda = require('aws-cdk-lib/aws-lambda');
const sqs = require('aws-cdk-lib/aws-sqs');
const cr = require('aws-cdk-lib/custom-resources');

/**
* This stack will be deployed in multiple phases, to achieve a very specific effect
*
* It contains resources r1 and r2, where r1 gets deployed first.
* It contains resources r1 and r2, and a queue q, where r1 gets deployed first.
*
* - PHASE = 1: both resources deploy regularly.
* - PHASE = 2a: r1 gets updated, r2 will fail to update
* - PHASE = 2b: r1 gets updated, r2 will fail to update, and r1 will fail its rollback.
* - PHASE = 3: q gets replaced w.r.t. phases 1 and 2
*
* To exercise this app:
*
Expand All @@ -22,7 +24,7 @@ const cr = require('aws-cdk-lib/custom-resources');
* # This will start a rollback that will fail because r1 fails its rollabck
*
* env PHASE=2b npx cdk rollback --force
* # This will retry the rollabck and skip r1
* # This will retry the rollback and skip r1
* ```
*/
class RollbacktestStack extends cdk.Stack {
Expand All @@ -31,6 +33,7 @@ class RollbacktestStack extends cdk.Stack {

let r1props = {};
let r2props = {};
let fifo = false;

const phase = process.env.PHASE;
switch (phase) {
Expand All @@ -46,6 +49,9 @@ class RollbacktestStack extends cdk.Stack {
r1props.FailRollback = true;
r2props.FailUpdate = true;
break;
case '3':
fifo = true;
break;
}

const fn = new lambda.Function(this, 'Fun', {
Expand Down Expand Up @@ -76,6 +82,10 @@ class RollbacktestStack extends cdk.Stack {
properties: r2props,
});
r2.node.addDependency(r1);

new sqs.Queue(this, 'Queue', {
fifo,
});
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2450,6 +2450,103 @@ integTest(
}),
);

integTest(
'automatic rollback if paused and change contains a replacement',
withSpecificFixture('rollback-test-app', async (fixture) => {
let phase = '1';

// Should succeed
await fixture.cdkDeploy('test-rollback', {
options: ['--no-rollback'],
modEnv: { PHASE: phase },
verbose: false,
});
try {
phase = '2a';

// Should fail
const deployOutput = await fixture.cdkDeploy('test-rollback', {
options: ['--no-rollback'],
modEnv: { PHASE: phase },
verbose: false,
allowErrExit: true,
});
expect(deployOutput).toContain('UPDATE_FAILED');

// Do a deployment with a replacement and --force: this will roll back first and then deploy normally
phase = '3';
await fixture.cdkDeploy('test-rollback', {
options: ['--no-rollback', '--force'],
modEnv: { PHASE: phase },
verbose: false,
});
} finally {
await fixture.cdkDestroy('test-rollback');
}
}),
);

integTest(
'automatic rollback if paused and --no-rollback is removed from flags',
withSpecificFixture('rollback-test-app', async (fixture) => {
let phase = '1';

// Should succeed
await fixture.cdkDeploy('test-rollback', {
options: ['--no-rollback'],
modEnv: { PHASE: phase },
verbose: false,
});
try {
phase = '2a';

// Should fail
const deployOutput = await fixture.cdkDeploy('test-rollback', {
options: ['--no-rollback'],
modEnv: { PHASE: phase },
verbose: false,
allowErrExit: true,
});
expect(deployOutput).toContain('UPDATE_FAILED');

// Do a deployment removing --no-rollback: this will roll back first and then deploy normally
phase = '1';
await fixture.cdkDeploy('test-rollback', {
options: ['--force'],
modEnv: { PHASE: phase },
verbose: false,
});
} finally {
await fixture.cdkDestroy('test-rollback');
}
}),
);

integTest(
'automatic rollback if replacement and --no-rollback is removed from flags',
withSpecificFixture('rollback-test-app', async (fixture) => {
let phase = '1';

// Should succeed
await fixture.cdkDeploy('test-rollback', {
options: ['--no-rollback'],
modEnv: { PHASE: phase },
verbose: false,
});
try {
// Do a deployment with a replacement and removing --no-rollback: this will do a regular rollback deploy
phase = '3';
await fixture.cdkDeploy('test-rollback', {
options: ['--force'],
modEnv: { PHASE: phase },
verbose: false,
});
} finally {
await fixture.cdkDestroy('test-rollback');
}
}),
);

integTest(
'test cdk rollback --force',
withSpecificFixture('rollback-test-app', async (fixture) => {
Expand Down
2 changes: 1 addition & 1 deletion packages/@aws-cdk/cloudformation-diff/lib/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ export class Formatter {
const resourceType = diff.isRemoval ? diff.oldResourceType : diff.newResourceType;

// eslint-disable-next-line max-len
this.print(`${this.formatResourcePrefix(diff)} ${this.formatValue(resourceType, chalk.cyan)} ${this.formatLogicalId(logicalId)} ${this.formatImpact(diff.changeImpact)}`);
this.print(`${this.formatResourcePrefix(diff)} ${this.formatValue(resourceType, chalk.cyan)} ${this.formatLogicalId(logicalId)} ${this.formatImpact(diff.changeImpact)}`.trimEnd());

if (diff.isUpdate) {
const differenceCount = diff.differenceCount;
Expand Down
19 changes: 11 additions & 8 deletions packages/aws-cdk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,14 @@ $ cdk deploy -R
```

If a deployment fails you can update your code and immediately retry the
deployment from the point of failure. If you would like to explicitly roll back a failed, paused deployment,
use `cdk rollback`.
deployment from the point of failure. If you would like to explicitly roll back
a failed, paused deployment, use `cdk rollback`.

NOTE: you cannot use `--no-rollback` for any updates that would cause a resource replacement, only for updates
and creations of new resources.
`--no-rollback` deployments cannot contain resource replacements. If the CLI
detects that a resource is being replaced, it will prompt you to perform
a regular replacement instead. If the stack rollback is currently paused
and you are trying to perform an deployment that contains a replacement, you
will be prompted to roll back first.

#### Deploying multiple stacks

Expand Down Expand Up @@ -801,7 +804,7 @@ In practice this means for any resource in the provided template, for example,
}
```

There must not exist a resource of that type with the same identifier in the desired region. In this example that identfier
There must not exist a resource of that type with the same identifier in the desired region. In this example that identfier
would be "amzn-s3-demo-bucket"

##### **The provided template is not deployed to CloudFormation in the account/region, and there *is* overlap with existing resources in the account/region**
Expand Down Expand Up @@ -900,7 +903,7 @@ CDK Garbage Collection.
> API of feature might still change. Otherwise the feature is generally production
> ready and fully supported.
`cdk gc` garbage collects unused assets from your bootstrap bucket via the following mechanism:
`cdk gc` garbage collects unused assets from your bootstrap bucket via the following mechanism:

- for each object in the bootstrap S3 Bucket, check to see if it is referenced in any existing CloudFormation templates
- if not, it is treated as unused and gc will either tag it or delete it, depending on your configuration.
Expand Down Expand Up @@ -938,7 +941,7 @@ Found X objects to delete based off of the following criteria:
Delete this batch (yes/no/delete-all)?
```

Since it's quite possible that the bootstrap bucket has many objects, we work in batches of 1000 objects or 100 images.
Since it's quite possible that the bootstrap bucket has many objects, we work in batches of 1000 objects or 100 images.
To skip the prompt either reply with `delete-all`, or use the `--confirm=false` option.

```console
Expand All @@ -948,7 +951,7 @@ cdk gc --unstable=gc --confirm=false
If you are concerned about deleting assets too aggressively, there are multiple levers you can configure:

- rollback-buffer-days: this is the amount of days an asset has to be marked as isolated before it is elligible for deletion.
- created-buffer-days: this is the amount of days an asset must live before it is elligible for deletion.
- created-buffer-days: this is the amount of days an asset must live before it is elligible for deletion.

When using `rollback-buffer-days`, instead of deleting unused objects, `cdk gc` will tag them with
today's date instead. It will also check if any objects have been tagged by previous runs of `cdk gc`
Expand Down
10 changes: 5 additions & 5 deletions packages/aws-cdk/lib/api/bootstrap/bootstrap-environment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { warning } from '../../logging';
import { loadStructuredFile, serializeStructure } from '../../serialize';
import { rootDir } from '../../util/directories';
import { ISDK, Mode, SdkProvider } from '../aws-auth';
import { DeployStackResult } from '../deploy-stack';
import { SuccessfulDeployStackResult } from '../deploy-stack';

/* eslint-disable max-len */

Expand All @@ -21,7 +21,7 @@ export class Bootstrapper {
constructor(private readonly source: BootstrapSource) {
}

public bootstrapEnvironment(environment: cxapi.Environment, sdkProvider: SdkProvider, options: BootstrapEnvironmentOptions = {}): Promise<DeployStackResult> {
public bootstrapEnvironment(environment: cxapi.Environment, sdkProvider: SdkProvider, options: BootstrapEnvironmentOptions = {}): Promise<SuccessfulDeployStackResult> {
switch (this.source.source) {
case 'legacy':
return this.legacyBootstrap(environment, sdkProvider, options);
Expand All @@ -41,7 +41,7 @@ export class Bootstrapper {
* Deploy legacy bootstrap stack
*
*/
private async legacyBootstrap(environment: cxapi.Environment, sdkProvider: SdkProvider, options: BootstrapEnvironmentOptions = {}): Promise<DeployStackResult> {
private async legacyBootstrap(environment: cxapi.Environment, sdkProvider: SdkProvider, options: BootstrapEnvironmentOptions = {}): Promise<SuccessfulDeployStackResult> {
const params = options.parameters ?? {};

if (params.trustedAccounts?.length) {
Expand Down Expand Up @@ -71,7 +71,7 @@ export class Bootstrapper {
private async modernBootstrap(
environment: cxapi.Environment,
sdkProvider: SdkProvider,
options: BootstrapEnvironmentOptions = {}): Promise<DeployStackResult> {
options: BootstrapEnvironmentOptions = {}): Promise<SuccessfulDeployStackResult> {

const params = options.parameters ?? {};

Expand Down Expand Up @@ -291,7 +291,7 @@ export class Bootstrapper {
private async customBootstrap(
environment: cxapi.Environment,
sdkProvider: SdkProvider,
options: BootstrapEnvironmentOptions = {}): Promise<DeployStackResult> {
options: BootstrapEnvironmentOptions = {}): Promise<SuccessfulDeployStackResult> {

// Look at the template, decide whether it's most likely a legacy or modern bootstrap
// template, and use the right bootstrapper for that.
Expand Down
13 changes: 9 additions & 4 deletions packages/aws-cdk/lib/api/bootstrap/deploy-bootstrap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import * as fs from 'fs-extra';
import { BOOTSTRAP_VERSION_OUTPUT, BootstrapEnvironmentOptions, BOOTSTRAP_VERSION_RESOURCE, BOOTSTRAP_VARIANT_PARAMETER, DEFAULT_BOOTSTRAP_VARIANT } from './bootstrap-props';
import * as logging from '../../logging';
import { Mode, SdkProvider, ISDK } from '../aws-auth';
import { deployStack, DeployStackResult } from '../deploy-stack';
import { assertIsSuccessfulDeployStackResult, deployStack, SuccessfulDeployStackResult } from '../deploy-stack';
import { NoBootstrapStackEnvironmentResources } from '../environment-resources';
import { DEFAULT_TOOLKIT_STACK_NAME, ToolkitInfo } from '../toolkit-info';

Expand Down Expand Up @@ -63,14 +63,15 @@ export class BootstrapStack {
template: any,
parameters: Record<string, string | undefined>,
options: Omit<BootstrapEnvironmentOptions, 'parameters'>,
): Promise<DeployStackResult> {
): Promise<SuccessfulDeployStackResult> {
if (this.currentToolkitInfo.found && !options.force) {
// Safety checks
const abortResponse = {
type: 'did-deploy-stack',
noOp: true,
outputs: {},
stackArn: this.currentToolkitInfo.bootstrapStack.stackId,
};
} satisfies SuccessfulDeployStackResult;

// Validate that the bootstrap stack we're trying to replace is from the same variant as the one we're trying to deploy
const currentVariant = this.currentToolkitInfo.variant;
Expand Down Expand Up @@ -110,7 +111,7 @@ export class BootstrapStack {

const assembly = builder.buildAssembly();

return deployStack({
const ret = await deployStack({
stack: assembly.getStackByName(this.toolkitStackName),
resolvedEnvironment: this.resolvedEnvironment,
sdk: this.sdk,
Expand All @@ -124,6 +125,10 @@ export class BootstrapStack {
// Obviously we can't need a bootstrap stack to deploy a bootstrap stack
envResources: new NoBootstrapStackEnvironmentResources(this.resolvedEnvironment, this.sdk),
});

assertIsSuccessfulDeployStackResult(ret);

return ret;
}
}

Expand Down
Loading

0 comments on commit 2f9fb1e

Please sign in to comment.