Skip to content

Commit e89b9ef

Browse files
authored
feat(redshift): column compression encodings and comments can now be customised (#23597)
This feature request includes additions for compression encoding and comments for table columns. This feature request includes both features in one to close #22506 ---- ### All Submissions: * [x] Have you followed the guidelines in our [Contributing guide?](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md) ### Adding new Construct Runtime Dependencies: * [ ] This PR adds new construct runtime dependencies following the process described [here](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md/#adding-construct-runtime-dependencies) ### New Features * [x] Have you added the new feature to an [integration test](https://github.com/aws/aws-cdk/blob/main/INTEGRATION_TESTS.md)? * [x] Did you use `yarn integ` to deploy the infrastructure and generate the snapshot (i.e. `yarn integ` without `--dry-run`)? *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
1 parent 03a0f79 commit e89b9ef

File tree

34 files changed

+700
-241
lines changed

34 files changed

+700
-241
lines changed

packages/@aws-cdk/aws-redshift/README.md

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -200,17 +200,32 @@ new Table(this, 'Table', {
200200
});
201201
```
202202

203-
Tables can also be configured with a comment:
203+
Tables and their respective columns can be configured to contain comments:
204204

205205
```ts fixture=cluster
206206
new Table(this, 'Table', {
207207
tableColumns: [
208-
{ name: 'col1', dataType: 'varchar(4)' },
209-
{ name: 'col2', dataType: 'float' }
208+
{ name: 'col1', dataType: 'varchar(4)', comment: 'This is a column comment' },
209+
{ name: 'col2', dataType: 'float', comment: 'This is a another column comment' }
210+
],
211+
cluster: cluster,
212+
databaseName: 'databaseName',
213+
tableComment: 'This is a table comment',
214+
});
215+
```
216+
217+
Table columns can be configured to use a specific compression encoding:
218+
219+
```ts fixture=cluster
220+
import { ColumnEncoding } from '@aws-cdk/aws-redshift';
221+
222+
new Table(this, 'Table', {
223+
tableColumns: [
224+
{ name: 'col1', dataType: 'varchar(4)', encoding: ColumnEncoding.TEXT32K },
225+
{ name: 'col2', dataType: 'float', encoding: ColumnEncoding.DELTA32K },
210226
],
211227
cluster: cluster,
212228
databaseName: 'databaseName',
213-
comment: 'This is a comment',
214229
});
215230
```
216231

@@ -417,14 +432,16 @@ Some Amazon Redshift features require Amazon Redshift to access other AWS servic
417432
When you create an IAM role and set it as the default for the cluster using console, you don't have to provide the IAM role's Amazon Resource Name (ARN) to perform authentication and authorization.
418433

419434
```ts
435+
import * as ec2 from '@aws-cdk/aws-ec2';
436+
import * as iam from '@aws-cdk/aws-iam';
420437
declare const vpc: ec2.Vpc;
421438

422439
const defaultRole = new iam.Role(this, 'DefaultRole', {
423440
assumedBy: new iam.ServicePrincipal('redshift.amazonaws.com'),
424441
},
425442
);
426443

427-
new Cluster(stack, 'Redshift', {
444+
new Cluster(this, 'Redshift', {
428445
masterUser: {
429446
masterUsername: 'admin',
430447
},
@@ -437,14 +454,16 @@ new Cluster(stack, 'Redshift', {
437454
A default role can also be added to a cluster using the `addDefaultIamRole` method.
438455

439456
```ts
457+
import * as ec2 from '@aws-cdk/aws-ec2';
458+
import * as iam from '@aws-cdk/aws-iam';
440459
declare const vpc: ec2.Vpc;
441460

442461
const defaultRole = new iam.Role(this, 'DefaultRole', {
443462
assumedBy: new iam.ServicePrincipal('redshift.amazonaws.com'),
444463
},
445464
);
446465

447-
const redshiftCluster = new Cluster(stack, 'Redshift', {
466+
const redshiftCluster = new Cluster(this, 'Redshift', {
448467
masterUser: {
449468
masterUsername: 'admin',
450469
},

packages/@aws-cdk/aws-redshift/lib/private/database-query-provider/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,5 @@ export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent
1818
}
1919
return subHandler(event.ResourceProperties, event);
2020
}
21+
22+
export { ColumnEncoding } from './types';

packages/@aws-cdk/aws-redshift/lib/private/database-query-provider/table.ts

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/* eslint-disable-next-line import/no-unresolved */
22
import * as AWSLambda from 'aws-lambda';
33
import { executeStatement } from './redshift-data';
4-
import { ClusterProps, TableAndClusterProps, TableSortStyle } from './types';
4+
import { ClusterProps, ColumnEncoding, TableAndClusterProps, TableSortStyle } from './types';
55
import { areColumnsEqual, getDistKeyColumn, getSortKeyColumns } from './util';
66
import { Column } from '../../table';
77

@@ -40,7 +40,7 @@ async function createTable(
4040
tableAndClusterProps: TableAndClusterProps,
4141
): Promise<string> {
4242
const tableName = tableNamePrefix + tableNameSuffix;
43-
const tableColumnsString = tableColumns.map(column => `${column.name} ${column.dataType}`).join();
43+
const tableColumnsString = tableColumns.map(column => `${column.name} ${column.dataType}${getEncodingColumnString(column)}`).join();
4444

4545
let statement = `CREATE TABLE ${tableName} (${tableColumnsString})`;
4646

@@ -61,6 +61,11 @@ async function createTable(
6161

6262
await executeStatement(statement, tableAndClusterProps);
6363

64+
for (const column of tableColumns) {
65+
if (column.comment) {
66+
await executeStatement(`COMMENT ON COLUMN ${tableName}.${column.name} IS '${column.comment}'`, tableAndClusterProps);
67+
}
68+
}
6469
if (tableAndClusterProps.tableComment) {
6570
await executeStatement(`COMMENT ON TABLE ${tableName} IS '${tableAndClusterProps.tableComment}'`, tableAndClusterProps);
6671
}
@@ -107,6 +112,20 @@ async function updateTable(
107112
alterationStatements.push(...columnAdditions.map(addition => `ALTER TABLE ${tableName} ${addition}`));
108113
}
109114

115+
const columnEncoding = tableColumns.filter(column => {
116+
return oldTableColumns.some(oldColumn => column.name === oldColumn.name && column.encoding !== oldColumn.encoding);
117+
}).map(column => `ALTER COLUMN ${column.name} ENCODE ${column.encoding || ColumnEncoding.AUTO}`);
118+
if (columnEncoding.length > 0) {
119+
alterationStatements.push(`ALTER TABLE ${tableName} ${columnEncoding.join(', ')}`);
120+
}
121+
122+
const columnComments = tableColumns.filter(column => {
123+
return oldTableColumns.some(oldColumn => column.name === oldColumn.name && column.comment !== oldColumn.comment);
124+
}).map(column => `COMMENT ON COLUMN ${tableName}.${column.name} IS ${column.comment ? `'${column.comment}'` : 'NULL'}`);
125+
if (columnComments.length > 0) {
126+
alterationStatements.push(...columnComments);
127+
}
128+
110129
const oldDistStyle = oldResourceProperties.distStyle;
111130
if ((!oldDistStyle && tableAndClusterProps.distStyle) ||
112131
(oldDistStyle && !tableAndClusterProps.distStyle)) {
@@ -162,3 +181,10 @@ async function updateTable(
162181
function getSortKeyColumnsString(sortKeyColumns: Column[]) {
163182
return sortKeyColumns.map(column => column.name).join();
164183
}
184+
185+
function getEncodingColumnString(column: Column): string {
186+
if (column.encoding) {
187+
return ` ENCODE ${column.encoding}`;
188+
}
189+
return '';
190+
}

packages/@aws-cdk/aws-redshift/lib/private/database-query-provider/types.ts

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,112 @@ export enum TableSortStyle {
2424
*/
2525
INTERLEAVED = 'INTERLEAVED',
2626
}
27+
28+
/**
29+
* The compression encoding of a column.
30+
*
31+
* @see https://docs.aws.amazon.com/redshift/latest/dg/c_Compression_encodings.html
32+
*/
33+
export enum ColumnEncoding {
34+
/**
35+
* Amazon Redshift assigns an optimal encoding based on the column data.
36+
* This is the default.
37+
*/
38+
AUTO = 'AUTO',
39+
40+
/**
41+
* The column is not compressed.
42+
*
43+
* @see https://docs.aws.amazon.com/redshift/latest/dg/c_Raw_encoding.html
44+
*/
45+
RAW = 'RAW',
46+
47+
/**
48+
* The column is compressed using the AZ64 algorithm.
49+
*
50+
* @see https://docs.aws.amazon.com/redshift/latest/dg/az64-encoding.html
51+
*/
52+
AZ64 = 'AZ64',
53+
54+
/**
55+
* The column is compressed using a separate dictionary for each block column value on disk.
56+
*
57+
* @see https://docs.aws.amazon.com/redshift/latest/dg/c_Byte_dictionary_encoding.html
58+
*/
59+
BYTEDICT = 'BYTEDICT',
60+
61+
/**
62+
* The column is compressed based on the difference between values in the column.
63+
* This records differences as 1-byte values.
64+
*
65+
* @see https://docs.aws.amazon.com/redshift/latest/dg/c_Delta_encoding.html
66+
*/
67+
DELTA = 'DELTA',
68+
69+
/**
70+
* The column is compressed based on the difference between values in the column.
71+
* This records differences as 2-byte values.
72+
*
73+
* @see https://docs.aws.amazon.com/redshift/latest/dg/c_Delta_encoding.html
74+
*/
75+
DELTA32K = 'DELTA32K',
76+
77+
/**
78+
* The column is compressed using the LZO algorithm.
79+
*
80+
* @see https://docs.aws.amazon.com/redshift/latest/dg/lzo-encoding.html
81+
*/
82+
LZO = 'LZO',
83+
84+
/**
85+
* The column is compressed to a smaller storage size than the original data type.
86+
* The compressed storage size is 1 byte.
87+
*
88+
* @see https://docs.aws.amazon.com/redshift/latest/dg/c_MostlyN_encoding.html
89+
*/
90+
MOSTLY8 = 'MOSTLY8',
91+
92+
/**
93+
* The column is compressed to a smaller storage size than the original data type.
94+
* The compressed storage size is 2 bytes.
95+
*
96+
* @see https://docs.aws.amazon.com/redshift/latest/dg/c_MostlyN_encoding.html
97+
*/
98+
MOSTLY16 = 'MOSTLY16',
99+
100+
/**
101+
* The column is compressed to a smaller storage size than the original data type.
102+
* The compressed storage size is 4 bytes.
103+
*
104+
* @see https://docs.aws.amazon.com/redshift/latest/dg/c_MostlyN_encoding.html
105+
*/
106+
MOSTLY32 = 'MOSTLY32',
107+
108+
/**
109+
* The column is compressed by recording the number of occurrences of each value in the column.
110+
*
111+
* @see https://docs.aws.amazon.com/redshift/latest/dg/c_Runlength_encoding.html
112+
*/
113+
RUNLENGTH = 'RUNLENGTH',
114+
115+
/**
116+
* The column is compressed by recording the first 245 unique words and then using a 1-byte index to represent each word.
117+
*
118+
* @see https://docs.aws.amazon.com/redshift/latest/dg/c_Text255_encoding.html
119+
*/
120+
TEXT255 = 'TEXT255',
121+
122+
/**
123+
* The column is compressed by recording the first 32K unique words and then using a 2-byte index to represent each word.
124+
*
125+
* @see https://docs.aws.amazon.com/redshift/latest/dg/c_Text255_encoding.html
126+
*/
127+
TEXT32K = 'TEXT32K',
128+
129+
/**
130+
* The column is compressed using the ZSTD algorithm.
131+
*
132+
* @see https://docs.aws.amazon.com/redshift/latest/dg/zstd-encoding.html
133+
*/
134+
ZSTD = 'ZSTD',
135+
}

packages/@aws-cdk/aws-redshift/lib/table.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { Construct, IConstruct } from 'constructs';
33
import { ICluster } from './cluster';
44
import { DatabaseOptions } from './database-options';
55
import { DatabaseQuery } from './private/database-query';
6+
import { ColumnEncoding } from './private/database-query-provider';
67
import { HandlerName } from './private/database-query-provider/handler-name';
78
import { getDistKeyColumn, getSortKeyColumns } from './private/database-query-provider/util';
89
import { TableHandlerProps } from './private/handler-props';
@@ -79,6 +80,20 @@ export interface Column {
7980
* @default - column is not a SORTKEY
8081
*/
8182
readonly sortKey?: boolean;
83+
84+
/**
85+
* The encoding to use for the column.
86+
*
87+
* @default - Amazon Redshift determines the encoding based on the data type.
88+
*/
89+
readonly encoding?: ColumnEncoding;
90+
91+
/**
92+
* A comment to attach to the column.
93+
*
94+
* @default - no comment
95+
*/
96+
readonly comment?: string;
8297
}
8398

8499
/**
@@ -344,3 +359,5 @@ export enum TableSortStyle {
344359
*/
345360
INTERLEAVED = 'INTERLEAVED',
346361
}
362+
363+
export { ColumnEncoding } from './private/database-query-provider';

0 commit comments

Comments
 (0)