-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest_scenario_builder.py
More file actions
287 lines (240 loc) · 13.8 KB
/
test_scenario_builder.py
File metadata and controls
287 lines (240 loc) · 13.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
"""Unit tests for ScenarioBuilder class."""
from __future__ import annotations
from unittest.mock import MagicMock
import pytest
from runloop_api_client.sdk import Snapshot, Blueprint, ScenarioBuilder, ScenarioPreview
from runloop_api_client.types.scoring_function_param import ScorerTestBasedScoringFunctionTestFile
class TestScenarioBuilder:
"""Tests for the synchronous ScenarioBuilder."""
@pytest.fixture
def mock_client(self) -> MagicMock:
"""Create a mock Runloop client."""
client = MagicMock()
return client
@pytest.fixture
def mock_blueprint(self, mock_client: MagicMock) -> Blueprint:
"""Create a mock Blueprint object."""
return Blueprint(mock_client, "bp-123")
@pytest.fixture
def mock_snapshot(self, mock_client: MagicMock) -> Snapshot:
"""Create a mock Snapshot object."""
return Snapshot(mock_client, "snap-123")
@pytest.fixture
def mock_builder(self, mock_client: MagicMock) -> ScenarioBuilder:
"""Create a ScenarioBuilder instance with mock client."""
return ScenarioBuilder("test-scenario", mock_client)
def test_instantiation(self, mock_client: MagicMock) -> None:
"""Test builder initialization and repr."""
builder = ScenarioBuilder("my-scenario", mock_client)
assert builder._client is mock_client
assert builder._name == "my-scenario"
assert builder.name == "my-scenario"
assert repr(builder) == "<ScenarioBuilder name='my-scenario'>"
def test_from_blueprint_and_snapshot(
self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint, mock_snapshot: Snapshot
) -> None:
"""Test blueprint/snapshot setting returns self and are mutually exclusive."""
# from_blueprint returns self and sets blueprint
result = mock_builder.from_blueprint(mock_blueprint)
assert result is mock_builder
assert mock_builder._blueprint is mock_blueprint
assert mock_builder._snapshot is None
# from_snapshot returns self, sets snapshot, and clears blueprint
result = mock_builder.from_snapshot(mock_snapshot)
assert result is mock_builder
assert mock_builder._snapshot is mock_snapshot
assert mock_builder._blueprint is None
# from_blueprint clears snapshot
mock_builder.from_blueprint(mock_blueprint)
assert mock_builder._blueprint is mock_blueprint
assert mock_builder._snapshot is None
def test_scorers(self, mock_builder: ScenarioBuilder) -> None:
"""Test all scorer types, optional params, and multiple scorers."""
# Test scorer with test files
test_files: list[ScorerTestBasedScoringFunctionTestFile] = [
{"file_path": "test_main.py", "file_contents": "def test_foo(): pass"}
]
result = mock_builder.add_test_command_scorer(
"test-scorer", test_command="pytest", weight=2.0, test_files=test_files
)
assert result is mock_builder
assert mock_builder._scorers[0]["name"] == "test-scorer"
assert mock_builder._scorers[0]["weight"] == 2.0
assert mock_builder._scorers[0]["scorer"]["type"] == "test_based_scorer"
assert mock_builder._scorers[0]["scorer"].get("test_command") == "pytest"
assert mock_builder._scorers[0]["scorer"].get("test_files") == test_files
# Command scorer
mock_builder.add_shell_command_scorer("cmd-scorer", command="./check.sh")
assert mock_builder._scorers[1]["scorer"]["type"] == "command_scorer"
assert mock_builder._scorers[1]["scorer"].get("command") == "./check.sh"
# Bash scorer
mock_builder.add_bash_script_scorer("bash-scorer", bash_script="echo 'score=1.0'")
assert mock_builder._scorers[2]["scorer"]["type"] == "bash_script_scorer"
assert mock_builder._scorers[2]["scorer"].get("bash_script") == "echo 'score=1.0'"
# Python scorer with optional params
mock_builder.add_python_script_scorer(
"python-scorer",
python_script="print('1.0')",
python_version_constraint=">=3.10",
requirements_contents="numpy",
)
assert mock_builder._scorers[3]["scorer"]["type"] == "python_script_scorer"
assert mock_builder._scorers[3]["scorer"].get("python_version_constraint") == ">=3.10"
assert mock_builder._scorers[3]["scorer"].get("requirements_contents") == "numpy"
# AST grep scorer with optional lang
mock_builder.add_ast_grep_scorer("ast-scorer", pattern="$A.foo()", search_directory="/src", lang="python")
assert mock_builder._scorers[4]["scorer"]["type"] == "ast_grep_scorer"
assert mock_builder._scorers[4]["scorer"].get("pattern") == "$A.foo()"
assert mock_builder._scorers[4]["scorer"].get("lang") == "python"
# Custom scorer with optional params
mock_builder.add_custom_scorer(
"custom-scorer", custom_scorer_type="my_scorer", scorer_params={"threshold": 0.5}
)
assert mock_builder._scorers[5]["scorer"]["type"] == "custom_scorer"
assert mock_builder._scorers[5]["scorer"].get("custom_scorer_type") == "my_scorer"
assert mock_builder._scorers[5]["scorer"].get("scorer_params") == {"threshold": 0.5}
# Verify multiple scorers accumulated
assert len(mock_builder._scorers) == 6
def test_add_scorer_rejects_invalid_weight(self, mock_builder: ScenarioBuilder) -> None:
"""Test that adding a scorer with zero or negative weight raises ValueError."""
with pytest.raises(ValueError, match="Scorer weight must be positive"):
mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=0.0)
with pytest.raises(ValueError, match="Scorer weight must be positive"):
mock_builder.add_bash_script_scorer("bad", bash_script="echo 1", weight=-1.0)
def test_build_validation(self, mock_builder: ScenarioBuilder) -> None:
"""Test build raises for missing required fields."""
# Missing problem statement
mock_builder.add_test_command_scorer("test", test_command="pytest")
with pytest.raises(ValueError, match="Problem statement is required"):
mock_builder.build()
# Missing scorer (new builder)
builder2 = ScenarioBuilder("test2", mock_builder._client)
builder2.with_problem_statement("Fix the bug")
with pytest.raises(ValueError, match="At least one scorer is required"):
builder2.build()
def test_build_with_all_options(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
"""Test build with all optional fields set."""
mock_builder.with_problem_statement("Fix the bug")
mock_builder.with_additional_context({"hint": "line 42"})
mock_builder.add_test_command_scorer("tests", test_command="pytest")
mock_builder.from_blueprint(mock_blueprint)
mock_builder.with_working_directory("/app")
mock_builder.with_metadata({"team": "infra"})
mock_builder.with_reference_output("diff content")
mock_builder.with_required_env_vars(["API_KEY"])
mock_builder.with_required_secrets(["db_pass"])
mock_builder.with_validation_type("FORWARD")
params = mock_builder.build()
assert params["name"] == "test-scenario"
assert params["input_context"]["problem_statement"] == "Fix the bug"
assert params["input_context"].get("additional_context") == {"hint": "line 42"}
env_params = params.get("environment_parameters")
assert env_params is not None
assert env_params.get("blueprint_id") == "bp-123"
assert env_params.get("working_directory") == "/app"
assert params.get("metadata") == {"team": "infra"}
assert params.get("reference_output") == "diff content"
assert params.get("required_environment_variables") == ["API_KEY"]
assert params.get("required_secret_names") == ["db_pass"]
assert params.get("validation_type") == "FORWARD"
def test_build_normalizes_weights(self, mock_builder: ScenarioBuilder) -> None:
"""Test that build normalizes scorer weights to sum to 1.0."""
mock_builder.with_problem_statement("Fix the bug")
mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
params = mock_builder.build()
scorers = list(params["scoring_contract"]["scoring_function_parameters"])
# Weights 1, 2, 3 should normalize to 1/6, 2/6, 3/6
assert len(scorers) == 3
assert abs(scorers[0]["weight"] - 1 / 6) < 0.0001
assert abs(scorers[1]["weight"] - 2 / 6) < 0.0001
assert abs(scorers[2]["weight"] - 3 / 6) < 0.0001
# Total should be 1.0
total = sum(s["weight"] for s in scorers)
assert abs(total - 1.0) < 0.0001
def test_push_calls_api_and_returns_scenario(self, mock_builder: ScenarioBuilder, mock_client: MagicMock) -> None:
"""Test push() calls API with correct params and returns Scenario."""
mock_client.scenarios.create.return_value.id = "scn-new-123"
mock_builder.with_problem_statement("Fix the bug")
mock_builder.add_test_command_scorer("tests", test_command="pytest")
scenario = mock_builder.push()
mock_client.scenarios.create.assert_called_once()
call_kwargs = mock_client.scenarios.create.call_args.kwargs
assert call_kwargs["name"] == "test-scenario"
assert call_kwargs["input_context"]["problem_statement"] == "Fix the bug"
assert scenario.id == "scn-new-123"
def test_fluent_chaining(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
"""Test that all builder methods can be chained fluently."""
result = (
mock_builder.from_blueprint(mock_blueprint)
.with_working_directory("/app")
.with_problem_statement("Fix the bug")
.with_additional_context({"hint": "check main.py"})
.add_test_command_scorer("tests", test_command="pytest")
.with_metadata({"team": "infra"})
.with_reference_output("diff content")
.with_required_env_vars(["API_KEY"])
.with_required_secrets(["secret"])
.with_validation_type("FORWARD")
)
assert result is mock_builder
assert mock_builder._blueprint is mock_blueprint
assert mock_builder._working_directory == "/app"
assert mock_builder._problem_statement == "Fix the bug"
assert len(mock_builder._scorers) == 1
def test_preview_with_no_config(self, mock_builder: ScenarioBuilder) -> None:
"""Test preview() works with no configuration (only name from constructor)."""
preview = mock_builder.preview()
assert isinstance(preview, ScenarioPreview)
assert preview.name == "test-scenario"
assert preview.input_context is not None
assert preview.input_context.problem_statement is None
assert preview.input_context.additional_context is None
assert preview.scoring_contract is not None
assert len(preview.scoring_contract.scoring_function_parameters) == 0
assert preview.environment is None
assert len(preview.metadata) == 0
assert preview.reference_output is None
assert preview.required_environment_variables is None
assert preview.required_secret_names is None
assert preview.validation_type is None
def test_preview_with_full_config(self, mock_builder: ScenarioBuilder, mock_blueprint: Blueprint) -> None:
"""Test preview() with all fields configured, including weight normalization."""
mock_builder.with_problem_statement("Fix the bug")
mock_builder.with_additional_context({"hint": "line 42"})
mock_builder.from_blueprint(mock_blueprint)
mock_builder.with_working_directory("/app")
mock_builder.with_metadata({"team": "infra"})
mock_builder.with_reference_output("diff content")
mock_builder.with_required_env_vars(["API_KEY"])
mock_builder.with_required_secrets(["db_pass"])
mock_builder.with_validation_type("FORWARD")
# Add multiple scorers with different weights to test normalization
mock_builder.add_bash_script_scorer("scorer1", bash_script="echo 1", weight=1.0)
mock_builder.add_bash_script_scorer("scorer2", bash_script="echo 2", weight=2.0)
mock_builder.add_bash_script_scorer("scorer3", bash_script="echo 3", weight=3.0)
preview = mock_builder.preview()
# Verify it returns ScenarioPreview
assert isinstance(preview, ScenarioPreview)
# Verify all fields are populated
assert preview.name == "test-scenario"
assert preview.input_context is not None
assert preview.input_context.problem_statement == "Fix the bug"
assert preview.input_context.additional_context == {"hint": "line 42"}
assert preview.environment is not None
assert preview.environment.blueprint_id == "bp-123"
assert preview.environment.working_directory == "/app"
assert preview.metadata == {"team": "infra"}
assert preview.reference_output == "diff content"
assert preview.required_environment_variables == ["API_KEY"]
assert preview.required_secret_names == ["db_pass"]
assert preview.validation_type == "FORWARD"
# Verify weights are normalized (1, 2, 3 -> 1/6, 2/6, 3/6)
assert preview.scoring_contract is not None
scorers = preview.scoring_contract.scoring_function_parameters
assert len(scorers) == 3
assert abs(scorers[0].weight - 1 / 6) < 0.0001
assert abs(scorers[1].weight - 2 / 6) < 0.0001
assert abs(scorers[2].weight - 3 / 6) < 0.0001
assert abs(sum(s.weight for s in scorers) - 1.0) < 0.0001