Skip to content

Commit 9b5673c

Browse files
committed
Add run derive subcommand
1 parent 09f1763 commit 9b5673c

12 files changed

Lines changed: 1307 additions & 195 deletions

File tree

tests/test_derive_handler.py

Lines changed: 476 additions & 0 deletions
Large diffs are not rendered by default.

tests/test_ui.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,145 @@ def test_plain_optimization_ui_best_respects_minimize_goal():
5050

5151
assert any("best so far: 1.5" in line for line in lines)
5252
assert any("Best metric value: 1.5" in line for line in lines)
53+
54+
55+
def _make_plain_ui_with_capture() -> tuple[PlainOptimizationUI, list[str]]:
56+
"""Construct a PlainOptimizationUI that records output to a list instead of stdout."""
57+
ui = PlainOptimizationUI(
58+
run_id="run-1",
59+
run_name="demo",
60+
total_steps=5,
61+
dashboard_url="https://example.com",
62+
model="gpt-4",
63+
metric_name="accuracy",
64+
)
65+
lines: list[str] = []
66+
ui._print = lines.append
67+
return ui, lines
68+
69+
70+
def test_plain_ui_on_init_prints_header():
71+
"""on_init prints the run banner exactly once. (Header used to be printed
72+
by __enter__; the move to on_init keeps the same observable behavior for
73+
non-derived runs.)"""
74+
ui, lines = _make_plain_ui_with_capture()
75+
76+
ui.on_init()
77+
78+
output = "\n".join(lines)
79+
assert "WECO OPTIMIZATION RUN" in output
80+
assert "Run ID: run-1" in output
81+
assert "Run Name: demo" in output
82+
assert "Dashboard: https://example.com" in output
83+
assert "Model: gpt-4" in output
84+
assert "Metric: accuracy" in output
85+
assert "Total Steps: 5" in output
86+
# Non-derived run: no "Derived from" line
87+
assert not any("Derived from" in line for line in lines)
88+
89+
90+
def test_plain_ui_on_init_includes_derived_from_line():
91+
ui, lines = _make_plain_ui_with_capture()
92+
93+
ui.on_init(derived_from={
94+
"run_id": "parent-uuid",
95+
"node_id": "node-uuid",
96+
"step": 7,
97+
"metric_value": 0.842,
98+
})
99+
100+
derived_lines = [line for line in lines if "Derived from" in line]
101+
assert len(derived_lines) == 1
102+
assert "parent-uuid" in derived_lines[0]
103+
assert "step 7" in derived_lines[0]
104+
assert "0.842" in derived_lines[0]
105+
106+
107+
def test_plain_ui_on_init_handles_derived_from_without_metric():
108+
"""A node with no metric_value (e.g., still pending eval) shouldn't crash
109+
the header rendering."""
110+
ui, lines = _make_plain_ui_with_capture()
111+
112+
ui.on_init(derived_from={
113+
"run_id": "parent-uuid",
114+
"node_id": "node-uuid",
115+
"step": 0,
116+
"metric_value": None,
117+
})
118+
119+
derived_lines = [line for line in lines if "Derived from" in line]
120+
assert len(derived_lines) == 1
121+
assert "parent-uuid" in derived_lines[0]
122+
assert "step 0" in derived_lines[0]
123+
# No "(metric: ...)" suffix when metric_value is None. Specific to the
124+
# suffix's literal form so the assertion is robust to metric_name values
125+
# that happen to contain the substring "metric".
126+
assert "(metric:" not in derived_lines[0]
127+
128+
129+
def test_plain_ui_enter_no_longer_prints_header():
130+
"""Header printing must happen via on_init now, not __enter__. This guards
131+
against accidentally re-introducing the auto-print and double-printing the
132+
header."""
133+
ui, lines = _make_plain_ui_with_capture()
134+
135+
with ui:
136+
pass
137+
138+
assert lines == []
139+
140+
141+
def test_live_ui_on_init_renders_derived_from_row():
142+
"""The Live panel grid gains a "From" row when derived_from is set."""
143+
ui = LiveOptimizationUI(
144+
console=Console(force_terminal=False, color_system=None),
145+
run_id="run-1",
146+
run_name="demo",
147+
total_steps=3,
148+
dashboard_url="https://example.com",
149+
metric_name="accuracy",
150+
)
151+
152+
ui.on_init(derived_from={
153+
"run_id": "parent-uuid",
154+
"node_id": "node-uuid",
155+
"step": 4,
156+
"metric_value": 0.91,
157+
})
158+
159+
text = _render_to_text(ui._render())
160+
assert "parent-uuid" in text
161+
assert "step 4" in text
162+
assert "0.91" in text
163+
164+
165+
def test_live_ui_on_init_without_derived_from_does_not_render_from_row():
166+
"""Negative cousin to test_live_ui_on_init_renders_derived_from_row.
167+
168+
Renders the same UI both with and without ``derived_from`` and asserts
169+
the parent reference (the marker added by the "From" row) appears only
170+
in the derived render. Comparing the two renders directly avoids
171+
fragile substring checks against unrelated panel chrome.
172+
"""
173+
174+
def render(derived_from):
175+
ui = LiveOptimizationUI(
176+
console=Console(force_terminal=False, color_system=None),
177+
run_id="run-1",
178+
run_name="demo",
179+
total_steps=3,
180+
dashboard_url="https://example.com",
181+
)
182+
ui.on_init(derived_from=derived_from)
183+
return _render_to_text(ui._render())
184+
185+
derived_text = render({"run_id": "parent-uuid", "node_id": "n", "step": 4, "metric_value": 0.91})
186+
plain_text = render(None)
187+
188+
# The parent reference is added only by the "From" row, so its presence
189+
# in one render and absence in the other proves the row is conditional.
190+
assert "parent-uuid" in derived_text
191+
assert "parent-uuid" not in plain_text
192+
# Belt and braces: the row label itself only appears in the derived
193+
# render (no other panel label contains "From" as a substring).
194+
assert "From" not in plain_text

weco/cli.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,30 @@ def _configure_run_subcommands(run_parser: argparse.ArgumentParser) -> None:
226226
revise_source.add_argument("-s", "--source", type=str, help="Path to a single source file")
227227
revise_source.add_argument("--sources", nargs="+", type=str, help="Paths to multiple source files")
228228

229+
# weco run derive <run-id>
230+
p = subs.add_parser("derive", help="Create a new run derived from an existing run's step")
231+
p.add_argument("run_id", type=str, help="Parent run UUID")
232+
p.add_argument(
233+
"--from-step", type=str, default="best", help="'best' (lineage-best, default), 'run-best', or a step number"
234+
)
235+
p.add_argument("-n", "--steps", type=int, default=None, help="Override step count for the derived run")
236+
p.add_argument(
237+
"-i",
238+
"--additional-instructions",
239+
type=str,
240+
default=None,
241+
help="Steering instructions for the new run (inline text or path to a file). "
242+
"If omitted, the parent run's instructions are inherited.",
243+
)
244+
p.add_argument("--api-key", nargs="+", type=str, default=None, help="API keys in provider=key format")
245+
p.add_argument(
246+
"--output",
247+
type=str,
248+
choices=["rich", "plain"],
249+
default="rich",
250+
help="Output mode: 'rich' for interactive UI, 'plain' for machine-readable output",
251+
)
252+
229253
# weco run submit <run-id> --node <id>
230254
p = subs.add_parser("submit", help="Submit a pending node for evaluation (review mode)")
231255
p.add_argument("run_id", type=str, help="Run UUID")
@@ -350,7 +374,7 @@ def configure_resume_parser(resume_parser: argparse.ArgumentParser) -> None:
350374

351375
def _dispatch_run_subcommand(sub: str, args: argparse.Namespace) -> None:
352376
"""Dispatch ``weco run <subcommand>`` to the appropriate handler."""
353-
from .commands.run import status, results, show, diff, stop, instruct, review, revise, submit
377+
from .commands.run import status, results, show, diff, stop, instruct, review, revise, submit, derive
354378

355379
def _collect_source_paths() -> list[str] | None:
356380
if getattr(args, "sources", None):
@@ -371,6 +395,15 @@ def _collect_source_paths() -> list[str] | None:
371395
),
372396
"show": lambda: show.handle(run_id=args.run_id, step=args.step, console=console),
373397
"diff": lambda: diff.handle(run_id=args.run_id, step=args.step, against=args.against, console=console),
398+
"derive": lambda: derive.handle(
399+
run_id=args.run_id,
400+
from_step=args.from_step,
401+
steps=args.steps,
402+
additional_instructions=args.additional_instructions,
403+
api_keys=parse_api_keys(args.api_key),
404+
output_mode=args.output,
405+
console=console,
406+
),
374407
"stop": lambda: stop.handle(run_id=args.run_id, console=console),
375408
"instruct": lambda: instruct.handle(run_id=args.run_id, instructions=args.instructions, console=console),
376409
"review": lambda: review.handle(run_id=args.run_id, console=console),
@@ -390,8 +423,10 @@ def _collect_source_paths() -> list[str] | None:
390423
if handler is None:
391424
console.print(f"[bold red]Unknown run subcommand: {sub}[/]")
392425
sys.exit(1)
393-
handler()
394-
sys.exit(0)
426+
# Handlers that drive an optimization loop (e.g. derive) return a bool to
427+
# signal success/failure. Read-only handlers return None and exit cleanly.
428+
result = handler()
429+
sys.exit(0 if result is not False else 1)
395430

396431

397432
def execute_run_command(args: argparse.Namespace) -> None:
@@ -526,7 +561,7 @@ def main() -> None:
526561
def _main() -> None:
527562
"""Internal main function containing the CLI logic."""
528563
parser = argparse.ArgumentParser(
529-
description="[bold cyan]Weco CLI[/]\nEnhance your code with AI-driven optimization.",
564+
description="Weco CLI\nEnhance your code with AI-driven optimization.",
530565
formatter_class=argparse.RawDescriptionHelpFormatter,
531566
)
532567

0 commit comments

Comments
 (0)