diff --git a/src/autoskillit/hooks/pretty_output.py b/src/autoskillit/hooks/pretty_output.py index 72f7ea72..12129b5d 100644 --- a/src/autoskillit/hooks/pretty_output.py +++ b/src/autoskillit/hooks/pretty_output.py @@ -114,11 +114,14 @@ def _fmt_run_skill(data: dict, pipeline: bool) -> str: token_usage = data.get("token_usage") if isinstance(token_usage, dict): lines.append("") - lines.append(f"tokens_in: {_fmt_tokens(token_usage.get('input_tokens'))}") + lines.append(f"tokens_uncached: {_fmt_tokens(token_usage.get('input_tokens'))}") lines.append(f"tokens_out: {_fmt_tokens(token_usage.get('output_tokens'))}") cr = token_usage.get("cache_read_input_tokens", 0) if cr: - lines.append(f"tokens_cached: {_fmt_tokens(cr)}") + lines.append(f"tokens_cache_read: {_fmt_tokens(cr)}") + cw = token_usage.get("cache_creation_input_tokens", 0) + if cw: + lines.append(f"tokens_cache_write: {_fmt_tokens(cw)}") result = data.get("result", "") if result: lines.extend(["", "### Result", result]) @@ -252,20 +255,21 @@ def _fmt_get_token_summary(data: dict, _pipeline: bool) -> str: count = step.get("invocation_count", 1) inp = _fmt_tokens(step.get("input_tokens", 0)) out = _fmt_tokens(step.get("output_tokens", 0)) - cached = _fmt_tokens( - step.get("cache_read_input_tokens", 0) + step.get("cache_creation_input_tokens", 0) - ) + cache_rd = _fmt_tokens(step.get("cache_read_input_tokens", 0)) + cache_wr = _fmt_tokens(step.get("cache_creation_input_tokens", 0)) wc = step.get("wall_clock_seconds", step.get("elapsed_seconds", 0.0)) - lines.append(f"{name} x{count} [in:{inp} out:{out} cached:{cached} t:{wc:.1f}s]") + lines.append( + f"{name} x{count} [uc:{inp} out:{out} cr:{cache_rd} cw:{cache_wr} t:{wc:.1f}s]" + ) total = data.get("total", {}) if total: lines.append("") - lines.append(f"total_in: {_fmt_tokens(total.get('input_tokens', 0))}") + lines.append(f"total_uncached: {_fmt_tokens(total.get('input_tokens', 0))}") lines.append(f"total_out: {_fmt_tokens(total.get('output_tokens', 0))}") - cache_tokens = total.get("cache_read_input_tokens", 0) + total.get( - "cache_creation_input_tokens", 0 + lines.append(f"total_cache_read: {_fmt_tokens(total.get('cache_read_input_tokens', 0))}") + lines.append( + f"total_cache_write: {_fmt_tokens(total.get('cache_creation_input_tokens', 0))}" ) - lines.append(f"total_cached: {_fmt_tokens(cache_tokens)}") mcp = data.get("mcp_responses", {}) mcp_total = mcp.get("total", {}) if mcp_total: diff --git a/src/autoskillit/hooks/token_summary_appender.py b/src/autoskillit/hooks/token_summary_appender.py index dff046b4..cb45faa6 100644 --- a/src/autoskillit/hooks/token_summary_appender.py +++ b/src/autoskillit/hooks/token_summary_appender.py @@ -242,36 +242,40 @@ def _format_table(aggregated: dict[str, dict[str, Any]]) -> str: lines = [ "## Token Usage Summary", "", - "| Step | input | output | cached | count | time |", - "|------|-------|--------|--------|-------|------|", + "| Step | uncached | output | cache_read | cache_write | count | time |", + "|------|----------|--------|------------|-------------|-------|------|", ] total_input = 0 total_output = 0 - total_cached = 0 + total_cache_rd = 0 + total_cache_wr = 0 total_time = 0.0 for entry in aggregated.values(): name = entry["step_name"] inp = entry["input_tokens"] out = entry["output_tokens"] - cached = entry["cache_read_input_tokens"] + entry["cache_creation_input_tokens"] + cache_rd = entry["cache_read_input_tokens"] + cache_wr = entry["cache_creation_input_tokens"] count = entry["invocation_count"] elapsed = entry["elapsed_seconds"] lines.append( - f"| {name} | {_humanize(inp)} | {_humanize(out)} | {_humanize(cached)}" - f" | {count} | {_fmt_duration(elapsed)} |" + f"| {name} | {_humanize(inp)} | {_humanize(out)} | {_humanize(cache_rd)}" + f" | {_humanize(cache_wr)} | {count} | {_fmt_duration(elapsed)} |" ) total_input += inp total_output += out - total_cached += cached + total_cache_rd += cache_rd + total_cache_wr += cache_wr total_time += elapsed lines.append( f"| **Total** | {_humanize(total_input)} | {_humanize(total_output)}" - f" | {_humanize(total_cached)} | | {_fmt_duration(total_time)} |" + f" | {_humanize(total_cache_rd)} | {_humanize(total_cache_wr)}" + f" | | {_fmt_duration(total_time)} |" ) return "\n".join(lines) diff --git a/src/autoskillit/pipeline/telemetry_fmt.py b/src/autoskillit/pipeline/telemetry_fmt.py index f7523634..fc4392f6 100644 --- a/src/autoskillit/pipeline/telemetry_fmt.py +++ b/src/autoskillit/pipeline/telemetry_fmt.py @@ -12,9 +12,10 @@ _TOKEN_COLUMNS = ( TerminalColumn("STEP", max_width=40, align="<"), - TerminalColumn("INPUT", max_width=10, align=">"), + TerminalColumn("UNCACHED", max_width=10, align=">"), TerminalColumn("OUTPUT", max_width=10, align=">"), - TerminalColumn("CACHED", max_width=10, align=">"), + TerminalColumn("CACHE_RD", max_width=10, align=">"), + TerminalColumn("CACHE_WR", max_width=10, align=">"), TerminalColumn("COUNT", max_width=7, align=">"), TerminalColumn("TIME", max_width=8, align=">"), ) @@ -64,28 +65,29 @@ def format_token_table(steps: list[dict], total: dict) -> str: lines = [ "## Token Usage Summary", "", - "| Step | input | output | cached | count | time |", - "|------|-------|--------|--------|-------|------|", + "| Step | uncached | output | cache_read | cache_write | count | time |", + "|------|----------|--------|------------|-------------|-------|------|", ] for step in steps: name = step.get("step_name", "?") inp = h(step.get("input_tokens", 0)) out = h(step.get("output_tokens", 0)) - cached = h( - step.get("cache_read_input_tokens", 0) + step.get("cache_creation_input_tokens", 0) - ) + cache_rd = h(step.get("cache_read_input_tokens", 0)) + cache_wr = h(step.get("cache_creation_input_tokens", 0)) count = step.get("invocation_count", 1) wc = step.get("wall_clock_seconds", step.get("elapsed_seconds", 0.0)) - lines.append(f"| {name} | {inp} | {out} | {cached} | {count} | {fmt_dur(wc)} |") + lines.append( + f"| {name} | {inp} | {out} | {cache_rd} | {cache_wr} | {count} | {fmt_dur(wc)} |" + ) total_in = h(total.get("input_tokens", 0)) total_out = h(total.get("output_tokens", 0)) - total_cached = h( - total.get("cache_read_input_tokens", 0) + total.get("cache_creation_input_tokens", 0) - ) + total_cache_rd = h(total.get("cache_read_input_tokens", 0)) + total_cache_wr = h(total.get("cache_creation_input_tokens", 0)) total_time = total.get("total_elapsed_seconds", 0.0) lines.append( - f"| **Total** | {total_in} | {total_out} | {total_cached} | | {fmt_dur(total_time)} |" + f"| **Total** | {total_in} | {total_out} | {total_cache_rd}" + f" | {total_cache_wr} | | {fmt_dur(total_time)} |" ) return "\n".join(lines) @@ -116,17 +118,15 @@ def format_token_table_terminal(steps: list[dict], total: dict) -> str: h = TelemetryFormatter._humanize fmt_dur = TelemetryFormatter._fmt_duration - rows: list[tuple[str, str, str, str, str, str]] = [] + rows: list[tuple[str, str, str, str, str, str, str]] = [] for step in steps: rows.append( ( step.get("step_name", "?"), h(step.get("input_tokens", 0)), h(step.get("output_tokens", 0)), - h( - step.get("cache_read_input_tokens", 0) - + step.get("cache_creation_input_tokens", 0) - ), + h(step.get("cache_read_input_tokens", 0)), + h(step.get("cache_creation_input_tokens", 0)), str(step.get("invocation_count", 1)), fmt_dur(step.get("wall_clock_seconds", step.get("elapsed_seconds", 0.0))), ) @@ -136,10 +136,8 @@ def format_token_table_terminal(steps: list[dict], total: dict) -> str: "Total", h(total.get("input_tokens", 0)), h(total.get("output_tokens", 0)), - h( - total.get("cache_read_input_tokens", 0) - + total.get("cache_creation_input_tokens", 0) - ), + h(total.get("cache_read_input_tokens", 0)), + h(total.get("cache_creation_input_tokens", 0)), "", fmt_dur(total.get("total_elapsed_seconds", 0.0)), ) @@ -177,19 +175,18 @@ def format_compact_kv( count = step.get("invocation_count", 1) inp = h(step.get("input_tokens", 0)) out = h(step.get("output_tokens", 0)) - cached = h( - step.get("cache_read_input_tokens", 0) + step.get("cache_creation_input_tokens", 0) - ) + cache_rd = h(step.get("cache_read_input_tokens", 0)) + cache_wr = h(step.get("cache_creation_input_tokens", 0)) wc = step.get("wall_clock_seconds", step.get("elapsed_seconds", 0.0)) - lines.append(f"{name} x{count} [in:{inp} out:{out} cached:{cached} t:{wc:.1f}s]") + lines.append( + f"{name} x{count} [uc:{inp} out:{out} cr:{cache_rd} cw:{cache_wr} t:{wc:.1f}s]" + ) if total: lines.append("") - lines.append(f"total_in: {h(total.get('input_tokens', 0))}") + lines.append(f"total_uncached: {h(total.get('input_tokens', 0))}") lines.append(f"total_out: {h(total.get('output_tokens', 0))}") - cache_tokens = total.get("cache_read_input_tokens", 0) + total.get( - "cache_creation_input_tokens", 0 - ) - lines.append(f"total_cached: {h(cache_tokens)}") + lines.append(f"total_cache_read: {h(total.get('cache_read_input_tokens', 0))}") + lines.append(f"total_cache_write: {h(total.get('cache_creation_input_tokens', 0))}") if mcp_responses: mcp_total = mcp_responses.get("total", {}) if mcp_total: diff --git a/tests/infra/test_pretty_output.py b/tests/infra/test_pretty_output.py index 552f6969..ae1b76d8 100644 --- a/tests/infra/test_pretty_output.py +++ b/tests/infra/test_pretty_output.py @@ -376,13 +376,14 @@ def test_format_get_token_summary_compact(): out, _ = _run_hook(event=event) text = json.loads(out)["hookSpecificOutput"]["updatedMCPToolOutput"] assert "token_summary" in text - # Assert specific compact line format: name xN [in:Xk out:Xk cached:XM t:Xs] - assert "investigate x1 [in:45.2k out:12.8k cached:1.2M t:0.0s]" in text - assert "make_plan x2 [in:30.0k out:8.0k cached:500.0k t:0.0s]" in text - assert "implement x1 [in:60.0k out:15.0k cached:2.0M t:0.0s]" in text - assert "total_in:" in text + # Assert specific compact line format: name xN [uc:Xk out:Xk cr:XM cw:XM t:Xs] + assert "investigate x1 [uc:45.2k out:12.8k cr:1.2M cw:0 t:0.0s]" in text + assert "make_plan x2 [uc:30.0k out:8.0k cr:0 cw:500.0k t:0.0s]" in text + assert "implement x1 [uc:60.0k out:15.0k cr:2.0M cw:0 t:0.0s]" in text + assert "total_uncached:" in text assert "total_out:" in text - assert "total_cached:" in text + assert "total_cache_read:" in text + assert "total_cache_write:" in text # PHK-16 @@ -1270,6 +1271,60 @@ def test_hook_token_summary_output_equivalent_to_canonical(): ) +def test_fmt_run_skill_interactive_shows_four_token_fields(): + """_fmt_run_skill interactive mode shows all 4 token fields.""" + data = { + "success": True, + "subtype": "COMPLETED", + "exit_code": 0, + "needs_retry": False, + "result": "done", + "token_usage": { + "input_tokens": 5000, + "output_tokens": 3000, + "cache_read_input_tokens": 200000, + "cache_creation_input_tokens": 8000, + }, + } + rendered = _format_response( + "mcp__plugin_autoskillit_autoskillit__run_skill", + json.dumps({"result": json.dumps(data)}), + pipeline=False, + ) + assert rendered is not None + assert "tokens_uncached:" in rendered + assert "tokens_out:" in rendered + assert "tokens_cache_read:" in rendered + assert "tokens_cache_write:" in rendered + + +def test_fmt_run_skill_suppresses_zero_cache_fields(): + """_fmt_run_skill suppresses tokens_cache_read and tokens_cache_write when both are 0.""" + data = { + "success": True, + "subtype": "COMPLETED", + "exit_code": 0, + "needs_retry": False, + "result": "done", + "token_usage": { + "input_tokens": 5000, + "output_tokens": 3000, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0, + }, + } + rendered = _format_response( + "mcp__plugin_autoskillit_autoskillit__run_skill", + json.dumps({"result": json.dumps(data)}), + pipeline=False, + ) + assert rendered is not None + assert "tokens_uncached:" in rendered + assert "tokens_out:" in rendered + assert "tokens_cache_read:" not in rendered + assert "tokens_cache_write:" not in rendered + + # --------------------------------------------------------------------------- # Timing summary dedicated formatter # --------------------------------------------------------------------------- diff --git a/tests/infra/test_token_summary_appender.py b/tests/infra/test_token_summary_appender.py index 44c8a49d..6abfc86f 100644 --- a/tests/infra/test_token_summary_appender.py +++ b/tests/infra/test_token_summary_appender.py @@ -271,6 +271,10 @@ def subprocess_side_effect(args: list[str], **kwargs: object) -> MagicMock: assert "open-pr" in body_content # Total row assert "**Total**" in body_content + # 4-column token headers + assert "uncached" in body_content + assert "cache_read" in body_content + assert "cache_write" in body_content # --------------------------------------------------------------------------- diff --git a/tests/pipeline/test_telemetry_formatter.py b/tests/pipeline/test_telemetry_formatter.py index 9e8e5a27..23b14e89 100644 --- a/tests/pipeline/test_telemetry_formatter.py +++ b/tests/pipeline/test_telemetry_formatter.py @@ -57,6 +57,9 @@ def test_produces_markdown_table(self) -> None: result = TelemetryFormatter.format_token_table(_STEPS, _TOTAL) assert "| Step |" in result assert "|---" in result + assert "| uncached |" in result + assert "| cache_read |" in result + assert "| cache_write |" in result assert "- input_tokens:" not in result assert "# Token Summary" not in result @@ -121,10 +124,10 @@ def test_snapshot(self) -> None: [ "## Token Usage Summary", "", - "| Step | input | output | cached | count | time |", - "|------|-------|--------|--------|-------|------|", - "| plan | 1.0k | 500 | 300 | 1 | 46s |", - "| **Total** | 1.0k | 500 | 300 | | 46s |", + "| Step | uncached | output | cache_read | cache_write | count | time |", + "|------|----------|--------|------------|-------------|-------|------|", + "| plan | 1.0k | 500 | 200 | 100 | 1 | 46s |", + "| **Total** | 1.0k | 500 | 200 | 100 | | 46s |", ] ) assert result == expected @@ -224,6 +227,29 @@ def test_format_timing_table_terminal_output_has_leading_indent() -> None: # --------------------------------------------------------------------------- +def test_terminal_table_has_four_token_columns() -> None: + """Terminal table must show UNCACHED, CACHE_RD, CACHE_WR column headers.""" + result = TelemetryFormatter.format_token_table_terminal(_STEPS, _TOTAL) + assert "UNCACHED" in result + assert "CACHE_RD" in result + assert "CACHE_WR" in result + + +def test_compact_kv_four_token_prefixes() -> None: + """Compact KV format must use uc:, cr:, cw: prefixes and split totals.""" + result = TelemetryFormatter.format_compact_kv(_STEPS, _TOTAL) + assert "uc:" in result + assert "cr:" in result + assert "cw:" in result + assert "total_uncached:" in result + assert "total_cache_read:" in result + assert "total_cache_write:" in result + assert "in:" not in result + assert " cached:" not in result + assert "total_in:" not in result + assert "total_cached:" not in result + + class TestFormatCompactKv: def test_produces_compact_lines(self) -> None: result = TelemetryFormatter.format_compact_kv(_STEPS, _TOTAL) @@ -233,14 +259,15 @@ def test_produces_compact_lines(self) -> None: def test_includes_humanized_tokens(self) -> None: result = TelemetryFormatter.format_compact_kv(_STEPS, _TOTAL) - assert "in:7.0k" in result + assert "uc:7.0k" in result assert "out:5.9k" in result def test_includes_total_lines(self) -> None: result = TelemetryFormatter.format_compact_kv(_STEPS, _TOTAL) - assert "total_in:" in result + assert "total_uncached:" in result assert "total_out:" in result - assert "total_cached:" in result + assert "total_cache_read:" in result + assert "total_cache_write:" in result def test_prefers_wall_clock_seconds(self) -> None: result = TelemetryFormatter.format_compact_kv(_STEPS, _TOTAL)