Skip to content

Commit 96695ed

Browse files
authored
fix(coderd/database): correct task pending status logic (#21886)
Previously, tasks with pending provisioner jobs (not yet picked up) were incorrectly reported as "initializing". Refs #21887
1 parent 90faf51 commit 96695ed

File tree

6 files changed

+294
-7
lines changed

6 files changed

+294
-7
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
err: WARN: Task is initializing. Showing last 1 message from snapshot.
1+
err: WARN: Task is pending. Showing last 1 message from snapshot.
22
err:
33
out: TYPE CONTENT
44
out: input Single message

coderd/aitasks_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -997,12 +997,12 @@ func TestTasks(t *testing.T) {
997997
wantErrStatusCode int
998998
}{
999999
{
1000-
name: "TaskStatusInitializing",
1000+
name: "TaskStatusPending",
10011001
// We want to disable the provisioner so that the task
1002-
// never gets provisioned (ensuring it stays in Initializing).
1002+
// never gets picked up (ensuring it stays in Pending).
10031003
disableProvisioner: true,
10041004
taskInput: "Valid prompt",
1005-
wantStatus: codersdk.TaskStatusInitializing,
1005+
wantStatus: codersdk.TaskStatusPending,
10061006
wantErr: "Unable to update",
10071007
wantErrStatusCode: http.StatusConflict,
10081008
},

coderd/database/dump.sql

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
-- Update task status in view.
2+
DROP VIEW IF EXISTS tasks_with_status;
3+
4+
CREATE VIEW
5+
tasks_with_status
6+
AS
7+
SELECT
8+
tasks.*,
9+
-- Combine component statuses with precedence: build -> agent -> app.
10+
CASE
11+
WHEN tasks.workspace_id IS NULL THEN 'pending'::task_status
12+
WHEN build_status.status != 'active' THEN build_status.status::task_status
13+
WHEN agent_status.status != 'active' THEN agent_status.status::task_status
14+
ELSE app_status.status::task_status
15+
END AS status,
16+
-- Attach debug information for troubleshooting status.
17+
jsonb_build_object(
18+
'build', jsonb_build_object(
19+
'transition', latest_build_raw.transition,
20+
'job_status', latest_build_raw.job_status,
21+
'computed', build_status.status
22+
),
23+
'agent', jsonb_build_object(
24+
'lifecycle_state', agent_raw.lifecycle_state,
25+
'computed', agent_status.status
26+
),
27+
'app', jsonb_build_object(
28+
'health', app_raw.health,
29+
'computed', app_status.status
30+
)
31+
) AS status_debug,
32+
task_app.*,
33+
agent_raw.lifecycle_state AS workspace_agent_lifecycle_state,
34+
app_raw.health AS workspace_app_health,
35+
task_owner.*
36+
FROM
37+
tasks
38+
CROSS JOIN LATERAL (
39+
SELECT
40+
vu.username AS owner_username,
41+
vu.name AS owner_name,
42+
vu.avatar_url AS owner_avatar_url
43+
FROM
44+
visible_users vu
45+
WHERE
46+
vu.id = tasks.owner_id
47+
) task_owner
48+
LEFT JOIN LATERAL (
49+
SELECT
50+
task_app.workspace_build_number,
51+
task_app.workspace_agent_id,
52+
task_app.workspace_app_id
53+
FROM
54+
task_workspace_apps task_app
55+
WHERE
56+
task_id = tasks.id
57+
ORDER BY
58+
task_app.workspace_build_number DESC
59+
LIMIT 1
60+
) task_app ON TRUE
61+
62+
-- Join the raw data for computing task status.
63+
LEFT JOIN LATERAL (
64+
SELECT
65+
workspace_build.transition,
66+
provisioner_job.job_status,
67+
workspace_build.job_id
68+
FROM
69+
workspace_builds workspace_build
70+
JOIN
71+
provisioner_jobs provisioner_job
72+
ON provisioner_job.id = workspace_build.job_id
73+
WHERE
74+
workspace_build.workspace_id = tasks.workspace_id
75+
AND workspace_build.build_number = task_app.workspace_build_number
76+
) latest_build_raw ON TRUE
77+
LEFT JOIN LATERAL (
78+
SELECT
79+
workspace_agent.lifecycle_state
80+
FROM
81+
workspace_agents workspace_agent
82+
WHERE
83+
workspace_agent.id = task_app.workspace_agent_id
84+
) agent_raw ON TRUE
85+
LEFT JOIN LATERAL (
86+
SELECT
87+
workspace_app.health
88+
FROM
89+
workspace_apps workspace_app
90+
WHERE
91+
workspace_app.id = task_app.workspace_app_id
92+
) app_raw ON TRUE
93+
94+
-- Compute the status for each component.
95+
CROSS JOIN LATERAL (
96+
SELECT
97+
CASE
98+
WHEN latest_build_raw.job_status IS NULL THEN 'pending'::task_status
99+
WHEN latest_build_raw.job_status IN ('failed', 'canceling', 'canceled') THEN 'error'::task_status
100+
WHEN
101+
latest_build_raw.transition IN ('stop', 'delete')
102+
AND latest_build_raw.job_status = 'succeeded' THEN 'paused'::task_status
103+
WHEN
104+
latest_build_raw.transition = 'start'
105+
AND latest_build_raw.job_status = 'pending' THEN 'initializing'::task_status
106+
-- Build is running or done, defer to agent/app status.
107+
WHEN
108+
latest_build_raw.transition = 'start'
109+
AND latest_build_raw.job_status IN ('running', 'succeeded') THEN 'active'::task_status
110+
ELSE 'unknown'::task_status
111+
END AS status
112+
) build_status
113+
CROSS JOIN LATERAL (
114+
SELECT
115+
CASE
116+
-- No agent or connecting.
117+
WHEN
118+
agent_raw.lifecycle_state IS NULL
119+
OR agent_raw.lifecycle_state IN ('created', 'starting') THEN 'initializing'::task_status
120+
-- Agent is running, defer to app status.
121+
-- NOTE(mafredri): The start_error/start_timeout states means connected, but some startup script failed.
122+
-- This may or may not affect the task status but this has to be caught by app health check.
123+
WHEN agent_raw.lifecycle_state IN ('ready', 'start_timeout', 'start_error') THEN 'active'::task_status
124+
-- If the agent is shutting down or turned off, this is an unknown state because we would expect a stop
125+
-- build to be running.
126+
-- This is essentially equal to: `IN ('shutting_down', 'shutdown_timeout', 'shutdown_error', 'off')`,
127+
-- but we cannot use them because the values were added in a migration.
128+
WHEN agent_raw.lifecycle_state NOT IN ('created', 'starting', 'ready', 'start_timeout', 'start_error') THEN 'unknown'::task_status
129+
ELSE 'unknown'::task_status
130+
END AS status
131+
) agent_status
132+
CROSS JOIN LATERAL (
133+
SELECT
134+
CASE
135+
WHEN app_raw.health = 'initializing' THEN 'initializing'::task_status
136+
WHEN app_raw.health = 'unhealthy' THEN 'error'::task_status
137+
WHEN app_raw.health IN ('healthy', 'disabled') THEN 'active'::task_status
138+
ELSE 'unknown'::task_status
139+
END AS status
140+
) app_status
141+
WHERE
142+
tasks.deleted_at IS NULL;
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
-- Fix task status logic: pending provisioner job should give pending task status, not initializing.
2+
-- A task is pending when the provisioner hasn't picked up the job yet.
3+
-- A task is initializing when the provisioner is actively running the job.
4+
DROP VIEW IF EXISTS tasks_with_status;
5+
6+
CREATE VIEW
7+
tasks_with_status
8+
AS
9+
SELECT
10+
tasks.*,
11+
-- Combine component statuses with precedence: build -> agent -> app.
12+
CASE
13+
WHEN tasks.workspace_id IS NULL THEN 'pending'::task_status
14+
WHEN build_status.status != 'active' THEN build_status.status::task_status
15+
WHEN agent_status.status != 'active' THEN agent_status.status::task_status
16+
ELSE app_status.status::task_status
17+
END AS status,
18+
-- Attach debug information for troubleshooting status.
19+
jsonb_build_object(
20+
'build', jsonb_build_object(
21+
'transition', latest_build_raw.transition,
22+
'job_status', latest_build_raw.job_status,
23+
'computed', build_status.status
24+
),
25+
'agent', jsonb_build_object(
26+
'lifecycle_state', agent_raw.lifecycle_state,
27+
'computed', agent_status.status
28+
),
29+
'app', jsonb_build_object(
30+
'health', app_raw.health,
31+
'computed', app_status.status
32+
)
33+
) AS status_debug,
34+
task_app.*,
35+
agent_raw.lifecycle_state AS workspace_agent_lifecycle_state,
36+
app_raw.health AS workspace_app_health,
37+
task_owner.*
38+
FROM
39+
tasks
40+
CROSS JOIN LATERAL (
41+
SELECT
42+
vu.username AS owner_username,
43+
vu.name AS owner_name,
44+
vu.avatar_url AS owner_avatar_url
45+
FROM
46+
visible_users vu
47+
WHERE
48+
vu.id = tasks.owner_id
49+
) task_owner
50+
LEFT JOIN LATERAL (
51+
SELECT
52+
task_app.workspace_build_number,
53+
task_app.workspace_agent_id,
54+
task_app.workspace_app_id
55+
FROM
56+
task_workspace_apps task_app
57+
WHERE
58+
task_id = tasks.id
59+
ORDER BY
60+
task_app.workspace_build_number DESC
61+
LIMIT 1
62+
) task_app ON TRUE
63+
64+
-- Join the raw data for computing task status.
65+
LEFT JOIN LATERAL (
66+
SELECT
67+
workspace_build.transition,
68+
provisioner_job.job_status,
69+
workspace_build.job_id
70+
FROM
71+
workspace_builds workspace_build
72+
JOIN
73+
provisioner_jobs provisioner_job
74+
ON provisioner_job.id = workspace_build.job_id
75+
WHERE
76+
workspace_build.workspace_id = tasks.workspace_id
77+
AND workspace_build.build_number = task_app.workspace_build_number
78+
) latest_build_raw ON TRUE
79+
LEFT JOIN LATERAL (
80+
SELECT
81+
workspace_agent.lifecycle_state
82+
FROM
83+
workspace_agents workspace_agent
84+
WHERE
85+
workspace_agent.id = task_app.workspace_agent_id
86+
) agent_raw ON TRUE
87+
LEFT JOIN LATERAL (
88+
SELECT
89+
workspace_app.health
90+
FROM
91+
workspace_apps workspace_app
92+
WHERE
93+
workspace_app.id = task_app.workspace_app_id
94+
) app_raw ON TRUE
95+
96+
-- Compute the status for each component.
97+
CROSS JOIN LATERAL (
98+
SELECT
99+
CASE
100+
WHEN latest_build_raw.job_status IS NULL THEN 'pending'::task_status
101+
WHEN latest_build_raw.job_status IN ('failed', 'canceling', 'canceled') THEN 'error'::task_status
102+
WHEN
103+
latest_build_raw.transition IN ('stop', 'delete')
104+
AND latest_build_raw.job_status = 'succeeded' THEN 'paused'::task_status
105+
-- Job is pending (not picked up by provisioner yet).
106+
WHEN
107+
latest_build_raw.transition = 'start'
108+
AND latest_build_raw.job_status = 'pending' THEN 'pending'::task_status
109+
-- Job is running or done, defer to agent/app status.
110+
WHEN
111+
latest_build_raw.transition = 'start'
112+
AND latest_build_raw.job_status IN ('running', 'succeeded') THEN 'active'::task_status
113+
ELSE 'unknown'::task_status
114+
END AS status
115+
) build_status
116+
CROSS JOIN LATERAL (
117+
SELECT
118+
CASE
119+
-- No agent or connecting.
120+
WHEN
121+
agent_raw.lifecycle_state IS NULL
122+
OR agent_raw.lifecycle_state IN ('created', 'starting') THEN 'initializing'::task_status
123+
-- Agent is running, defer to app status.
124+
-- NOTE(mafredri): The start_error/start_timeout states means connected, but some startup script failed.
125+
-- This may or may not affect the task status but this has to be caught by app health check.
126+
WHEN agent_raw.lifecycle_state IN ('ready', 'start_timeout', 'start_error') THEN 'active'::task_status
127+
-- If the agent is shutting down or turned off, this is an unknown state because we would expect a stop
128+
-- build to be running.
129+
-- This is essentially equal to: `IN ('shutting_down', 'shutdown_timeout', 'shutdown_error', 'off')`,
130+
-- but we cannot use them because the values were added in a migration.
131+
WHEN agent_raw.lifecycle_state NOT IN ('created', 'starting', 'ready', 'start_timeout', 'start_error') THEN 'unknown'::task_status
132+
ELSE 'unknown'::task_status
133+
END AS status
134+
) agent_status
135+
CROSS JOIN LATERAL (
136+
SELECT
137+
CASE
138+
WHEN app_raw.health = 'initializing' THEN 'initializing'::task_status
139+
WHEN app_raw.health = 'unhealthy' THEN 'error'::task_status
140+
WHEN app_raw.health IN ('healthy', 'disabled') THEN 'active'::task_status
141+
ELSE 'unknown'::task_status
142+
END AS status
143+
) app_status
144+
WHERE
145+
tasks.deleted_at IS NULL;

coderd/database/querier_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7093,8 +7093,8 @@ func TestTasksWithStatusView(t *testing.T) {
70937093
name: "PendingStart",
70947094
buildStatus: database.ProvisionerJobStatusPending,
70957095
buildTransition: database.WorkspaceTransitionStart,
7096-
expectedStatus: database.TaskStatusInitializing,
7097-
description: "Workspace build is starting (pending)",
7096+
expectedStatus: database.TaskStatusPending,
7097+
description: "Workspace build pending (not yet picked up by provisioner)",
70987098
expectBuildNumberValid: true,
70997099
expectBuildNumber: 1,
71007100
expectWorkspaceAgentValid: false,

0 commit comments

Comments
 (0)