Update effort levels in documentation and code: add 'min' level, adjust validation and defaults

pelikhan · pelikhan · commit 283a3c713e9d · 2025-08-04T09:01:27.000Z
diff --git a/README.md b/README.md
@@ -130,7 +130,7 @@ graph TD
 You can customize the test generation process with various options:
 
 ```shell
-# Specify effort level (low, medium, high)
+# Specify effort level (min, low, medium, high)
 gh models generate --effort high my_prompt.prompt.yml
 
 # Use a specific model for groundtruth generation
@@ -148,6 +148,7 @@ gh models generate --instruction-intent "Focus on edge cases" my_prompt.prompt.y
 
 The `effort` flag controls a few flags in the test generation engine and is a tradeoff
 between how much tests you want generated and how much tokens/time you are willing to spend.
+- `min` is just enough to generate a few tests and make sure things are probably configured.
 - `low` should be used to do a quick try of the test generation. It limits the number of rules to `3`.
 - `medium` provides much better coverage
 - `high` spends more token per rule to generate tests, which typically leads to longer, more complex inputs
diff --git a/cmd/generate/effort.go b/cmd/generate/effort.go
@@ -2,35 +2,37 @@ package generate
 
 // EffortConfiguration defines the configuration for different effort levels
 type EffortConfiguration struct {
-	TestsPerRule              int
-	MaxRules                  int
-	MaxRulesPerTestGeneration int
-	RulesPerGen               int
+	MaxRules     int
+	TestsPerRule int
+	RulesPerGen  int
 }
 
 // GetEffortConfiguration returns the configuration for a given effort level
 // Based on the reference TypeScript implementation in constants.mts
 func GetEffortConfiguration(effort string) *EffortConfiguration {
 	switch effort {
+	case EffortMin:
+		return &EffortConfiguration{
+			MaxRules:     3,
+			TestsPerRule: 1,
+			RulesPerGen:  100,
+		}
 	case EffortLow:
 		return &EffortConfiguration{
-			MaxRules:                  3,
-			TestsPerRule:              2,
-			MaxRulesPerTestGeneration: 5,
-			RulesPerGen:               10,
+			MaxRules:     10,
+			TestsPerRule: 1,
+			RulesPerGen:  10,
 		}
 	case EffortMedium:
 		return &EffortConfiguration{
-			MaxRules:                  20,
-			TestsPerRule:              3,
-			MaxRulesPerTestGeneration: 5,
-			RulesPerGen:               5,
+			MaxRules:     20,
+			TestsPerRule: 3,
+			RulesPerGen:  5,
 		}
 	case EffortHigh:
 		return &EffortConfiguration{
-			MaxRules:                  50,
-			MaxRulesPerTestGeneration: 2,
-			RulesPerGen:               3,
+			TestsPerRule: 4,
+			RulesPerGen:  3,
 		}
 	default:
 		return nil
@@ -43,22 +45,18 @@ func ApplyEffortConfiguration(options *PromptPexOptions, effort string) {
 		return
 	}
 
-	config := GetEffortConfiguration(effort)
-	if config == nil {
+	effortConfig := GetEffortConfiguration(effort)
+	if effortConfig == nil {
 		return
 	}
-
-	// Apply configuration settings only if not already set
-	if options.TestsPerRule == 0 {
-		options.TestsPerRule = config.TestsPerRule
-	}
-	if options.MaxRules == 0 {
-		options.MaxRules = config.MaxRules
+	// Apply effort if set
+	if effortConfig.TestsPerRule != 0 {
+		options.TestsPerRule = effortConfig.TestsPerRule
 	}
-	if options.MaxRulesPerTestGen == 0 {
-		options.MaxRulesPerTestGen = config.MaxRulesPerTestGeneration
+	if effortConfig.MaxRules != 0 {
+		options.MaxRules = effortConfig.MaxRules
 	}
-	if options.RulesPerGen == 0 {
-		options.RulesPerGen = config.RulesPerGen
+	if effortConfig.RulesPerGen != 0 {
+		options.RulesPerGen = effortConfig.RulesPerGen
 	}
 }
diff --git a/cmd/generate/generate.go b/cmd/generate/generate.go
@@ -116,7 +116,7 @@ func NewGenerateCommand(cfg *command.Config) *cobra.Command {
 func AddCommandLineFlags(cmd *cobra.Command) {
 	flags := cmd.Flags()
 	flags.String("org", "", "Organization to attribute usage to")
-	flags.String("effort", "", "Effort level (low, medium, high)")
+	flags.String("effort", "", "Effort level (min, low, medium, high)")
 	flags.String("groundtruth-model", "", "Model to use for generating groundtruth outputs. Defaults to openai/gpt-4o. Use 'none' to disable groundtruth generation.")
 	flags.String("session-file", "", "Session file to load existing context from")
 	flags.StringArray("var", []string{}, "Template variables for prompt files (can be used multiple times: --var name=value)")
@@ -135,8 +135,8 @@ func ParseFlags(cmd *cobra.Command, options *PromptPexOptions) error {
 	// Parse effort first so it can set defaults
 	if effort, _ := flags.GetString("effort"); effort != "" {
 		// Validate effort value
-		if effort != EffortLow && effort != EffortMedium && effort != EffortHigh {
-			return fmt.Errorf("invalid effort level '%s': must be one of %s, %s, or %s", effort, EffortLow, EffortMedium, EffortHigh)
+		if effort != EffortMin && effort != EffortLow && effort != EffortMedium && effort != EffortHigh {
+			return fmt.Errorf("invalid effort level '%s': must be one of %s, %s, %s, or %s", effort, EffortMin, EffortLow, EffortMedium, EffortHigh)
 		}
 		options.Effort = effort
 	}
diff --git a/cmd/generate/generate_test.go b/cmd/generate/generate_test.go
@@ -181,7 +181,7 @@ func TestParseFlagsInvalidEffort(t *testing.T) {
 		{
 			name:        "invalid effort value",
 			effort:      "invalid",
-			expectedErr: "invalid effort level 'invalid': must be one of low, medium, or high",
+			expectedErr: "invalid effort level 'invalid': must be one of min, low, medium, or high",
 		},
 		{
 			name:        "empty effort value",
@@ -191,12 +191,12 @@ func TestParseFlagsInvalidEffort(t *testing.T) {
 		{
 			name:        "case sensitive effort",
 			effort:      "Low",
-			expectedErr: "invalid effort level 'Low': must be one of low, medium, or high",
+			expectedErr: "invalid effort level 'Low': must be one of min, low, medium, or high",
 		},
 		{
 			name:        "numeric effort",
 			effort:      "1",
-			expectedErr: "invalid effort level '1': must be one of low, medium, or high",
+			expectedErr: "invalid effort level '1': must be one of min, low, medium, or high",
 		},
 	}
 
diff --git a/cmd/generate/options.go b/cmd/generate/options.go
@@ -5,7 +5,6 @@ func GetDefaultOptions() *PromptPexOptions {
 	return &PromptPexOptions{
 		TestsPerRule:       3,
 		RulesPerGen:        3,
-		MaxRulesPerTestGen: 3,
 		Verbose:            false,
 		IntentMaxTokens:    100,
 		InputSpecMaxTokens: 500,
diff --git a/cmd/generate/pipeline.go b/cmd/generate/pipeline.go
@@ -176,7 +176,7 @@ Input Specification:`, RenderMessagesToString(context.Prompt.Messages))
 
 // generateOutputRules generates output rules for the prompt
 func (h *generateCommandHandler) generateOutputRules(context *PromptPexContext) error {
-	h.WriteStartBox("Output rules", "")
+	h.WriteStartBox("Output rules", fmt.Sprintf("max rules: %d", h.options.MaxRules))
 	if len(context.Rules) == 0 {
 		system := `Analyze the following prompt and generate a list of output rules.
 These rules should describe what makes a valid output from this prompt.
@@ -220,6 +220,10 @@ Output Rules:`, RenderMessagesToString(context.Prompt.Messages))
 			return fmt.Errorf("failed to parse output rules: %s", rules)
 		}
 
+		if h.options.MaxRules > 0 && len(parsed) > h.options.MaxRules {
+			parsed = parsed[:h.options.MaxRules]
+		}
+
 		context.Rules = parsed
 	}
 
@@ -284,12 +288,7 @@ Inverse Output Rules:`, strings.Join(context.Rules, "\n"))
 func (h *generateCommandHandler) generateTests(context *PromptPexContext) error {
 	h.WriteStartBox("Tests", fmt.Sprintf("%d rules x %d tests per rule", len(context.Rules)+len(context.InverseRules), h.options.TestsPerRule))
 	if len(context.Tests) == 0 {
-		defaultOptions := GetDefaultOptions()
-		testsPerRule := defaultOptions.TestsPerRule
-		if h.options.TestsPerRule != 0 {
-			testsPerRule = h.options.TestsPerRule
-		}
-
+		testsPerRule := h.options.TestsPerRule
 		allRules := append(context.Rules, context.InverseRules...)
 
 		// Generate tests iteratively for groups of rules
@@ -313,7 +312,7 @@ func (h *generateCommandHandler) generateTests(context *PromptPexContext) error
 			// render to terminal
 			for _, test := range groupTests {
 				h.WriteToLine(test.Input)
-				h.WriteToLine(fmt.Sprintf("    %s%s", BOX_END, test.Reasoning))
+				h.WriteToLine(fmt.Sprintf("  %s%s", BOX_END, test.Reasoning))
 			}
 
 			// Accumulate tests
@@ -531,7 +530,7 @@ func (h *generateCommandHandler) generateGroundtruth(context *PromptPexContext)
 				h.cfg.WriteToOut(fmt.Sprintf("Saving context failed: %v", err))
 			}
 		}
-		h.WriteToLine(fmt.Sprintf("    %s%s", BOX_END, test.Expected)) // Write groundtruth output
+		h.WriteToLine(fmt.Sprintf("  %s%s", BOX_END, test.Expected)) // Write groundtruth output
 	}
 
 	h.WriteEndBox(fmt.Sprintf("%d items", len(context.Tests)))
diff --git a/cmd/generate/types.go b/cmd/generate/types.go
@@ -27,7 +27,6 @@ type PromptPexOptions struct {
 	TestsPerRule       int                    `yaml:"testsPerRule,omitempty" json:"testsPerRule,omitempty"`
 	RulesPerGen        int                    `yaml:"rulesPerGen,omitempty" json:"rulesPerGen,omitempty"`
 	MaxRules           int                    `yaml:"maxRules,omitempty" json:"maxRules,omitempty"`
-	MaxRulesPerTestGen int                    `yaml:"maxRulesPerTestGen,omitempty" json:"maxRulesPerTestGen,omitempty"`
 	IntentMaxTokens    int                    `yaml:"intentMaxTokens,omitempty" json:"intentMaxTokens,omitempty"`
 	InputSpecMaxTokens int                    `yaml:"inputSpecMaxTokens,omitempty" json:"inputSpecMaxTokens,omitempty"`
 
@@ -63,6 +62,7 @@ type PromptPexTest struct {
 
 // Effort levels
 const (
+	EffortMin    = "min"
 	EffortLow    = "low"
 	EffortMedium = "medium"
 	EffortHigh   = "high"

Original file line number	Diff line number	Diff line change
`@@ -181,7 +181,7 @@ func TestParseFlagsInvalidEffort(t *testing.T) {`
`181`	`181`	`{`
`182`	`182`	`name: "invalid effort value",`
`183`	`183`	`effort: "invalid",`
`184`		`- expectedErr: "invalid effort level 'invalid': must be one of low, medium, or high",`
	`184`	`+ expectedErr: "invalid effort level 'invalid': must be one of min, low, medium, or high",`
`185`	`185`	`},`
`186`	`186`	`{`
`187`	`187`	`name: "empty effort value",`
`@@ -191,12 +191,12 @@ func TestParseFlagsInvalidEffort(t *testing.T) {`
`191`	`191`	`{`
`192`	`192`	`name: "case sensitive effort",`
`193`	`193`	`effort: "Low",`
`194`		`- expectedErr: "invalid effort level 'Low': must be one of low, medium, or high",`
	`194`	`+ expectedErr: "invalid effort level 'Low': must be one of min, low, medium, or high",`
`195`	`195`	`},`
`196`	`196`	`{`
`197`	`197`	`name: "numeric effort",`
`198`	`198`	`effort: "1",`
`199`		`- expectedErr: "invalid effort level '1': must be one of low, medium, or high",`
	`199`	`+ expectedErr: "invalid effort level '1': must be one of min, low, medium, or high",`
`200`	`200`	`},`
`201`	`201`	`}`
`202`	`202`