Skip to content

Commit 7fc3515

Browse files
authored
Display formatted table when run with gh models eval (#76)
2 parents b21bd7a + 347c6de commit 7fc3515

File tree

2 files changed

+47
-9
lines changed

2 files changed

+47
-9
lines changed

cmd/eval/eval.go

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,21 @@ import (
1010
"time"
1111

1212
"github.com/MakeNowJust/heredoc"
13+
"github.com/cli/go-gh/v2/pkg/tableprinter"
1314
"github.com/github/gh-models/internal/azuremodels"
1415
"github.com/github/gh-models/pkg/command"
1516
"github.com/github/gh-models/pkg/prompt"
1617
"github.com/github/gh-models/pkg/util"
18+
"github.com/mgutz/ansi"
1719
"github.com/spf13/cobra"
1820
)
1921

22+
var (
23+
lightGrayUnderline = ansi.ColorFunc("white+du")
24+
red = ansi.ColorFunc("red")
25+
green = ansi.ColorFunc("green")
26+
)
27+
2028
// EvaluationSummary represents the overall evaluation summary
2129
type EvaluationSummary struct {
2230
Name string `json:"name"`
@@ -167,6 +175,7 @@ func (h *evalCommandHandler) runEvaluation(ctx context.Context) error {
167175

168176
for i, testCase := range h.evalFile.TestData {
169177
if !h.jsonOutput {
178+
h.cfg.WriteToOut("-------------------------\n")
170179
h.cfg.WriteToOut(fmt.Sprintf("Running test case %d/%d...\n", i+1, totalTests))
171180
}
172181

@@ -235,30 +244,58 @@ func (h *evalCommandHandler) runEvaluation(ctx context.Context) error {
235244
}
236245

237246
func (h *evalCommandHandler) printTestResult(result TestResult, testPassed bool) {
247+
printer := h.cfg.NewTablePrinter()
238248
if testPassed {
239-
h.cfg.WriteToOut(" ✓ PASSED\n")
249+
printer.AddField("Result", tableprinter.WithColor(lightGrayUnderline))
250+
printer.AddField("✓ PASSED", tableprinter.WithColor(green))
251+
printer.EndRow()
240252
} else {
241-
h.cfg.WriteToOut(" ✗ FAILED\n")
253+
printer.AddField("Result", tableprinter.WithColor(lightGrayUnderline))
254+
printer.AddField("✗ FAILED", tableprinter.WithColor(red))
255+
printer.EndRow()
242256
// Show the first 100 characters of the model response when test fails
243257
preview := result.ModelResponse
244258
if len(preview) > 100 {
245259
preview = preview[:100] + "..."
246260
}
247-
h.cfg.WriteToOut(fmt.Sprintf(" Model Response: %s\n", preview))
261+
262+
printer.AddField("Model Response", tableprinter.WithColor(lightGrayUnderline))
263+
printer.AddField(preview)
264+
printer.EndRow()
248265
}
249266

267+
err := printer.Render()
268+
if err != nil {
269+
return
270+
}
271+
272+
h.cfg.WriteToOut("\n")
273+
274+
table := h.cfg.NewTablePrinter()
275+
table.AddHeader([]string{"EVALUATION", "RESULT", "SCORE", "CRITERIA"}, tableprinter.WithColor(lightGrayUnderline))
250276
// Show evaluation details
251277
for _, evalResult := range result.EvaluationResults {
252-
status := "✓"
278+
status, color := "✓", green
253279
if !evalResult.Passed {
254-
status = "✗"
280+
status, color = "✗", red
255281
}
256-
h.cfg.WriteToOut(fmt.Sprintf(" %s %s (score: %.2f)\n",
257-
status, evalResult.EvaluatorName, evalResult.Score))
282+
table.AddField(evalResult.EvaluatorName)
283+
table.AddField(status, tableprinter.WithColor(color))
284+
table.AddField(fmt.Sprintf("%.2f", evalResult.Score), tableprinter.WithColor(color))
285+
258286
if evalResult.Details != "" {
259-
h.cfg.WriteToOut(fmt.Sprintf(" %s\n", evalResult.Details))
287+
table.AddField(evalResult.Details)
288+
} else {
289+
table.AddField("")
260290
}
291+
table.EndRow()
292+
}
293+
294+
err = table.Render()
295+
if err != nil {
296+
return
261297
}
298+
262299
h.cfg.WriteToOut("\n")
263300
}
264301

cmd/eval/eval_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,8 @@ evaluators:
312312
require.Contains(t, output, "Failing Test")
313313
require.Contains(t, output, "Running test case")
314314
require.Contains(t, output, "FAILED")
315-
require.Contains(t, output, "Model Response: actual model response")
315+
require.Contains(t, output, "Model Response")
316+
require.Contains(t, output, "actual model response")
316317
})
317318

318319
t.Run("json output format", func(t *testing.T) {

0 commit comments

Comments
 (0)