Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions src/autoskillit/hooks/pretty_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,14 @@ def _fmt_run_skill(data: dict, pipeline: bool) -> str:
token_usage = data.get("token_usage")
if isinstance(token_usage, dict):
lines.append("")
lines.append(f"tokens_in: {_fmt_tokens(token_usage.get('input_tokens'))}")
lines.append(f"tokens_uncached: {_fmt_tokens(token_usage.get('input_tokens'))}")
lines.append(f"tokens_out: {_fmt_tokens(token_usage.get('output_tokens'))}")
cr = token_usage.get("cache_read_input_tokens", 0)
if cr:
lines.append(f"tokens_cached: {_fmt_tokens(cr)}")
lines.append(f"tokens_cache_read: {_fmt_tokens(cr)}")
cw = token_usage.get("cache_creation_input_tokens", 0)
if cw:
lines.append(f"tokens_cache_write: {_fmt_tokens(cw)}")
result = data.get("result", "")
if result:
lines.extend(["", "### Result", result])
Expand Down Expand Up @@ -252,20 +255,21 @@ def _fmt_get_token_summary(data: dict, _pipeline: bool) -> str:
count = step.get("invocation_count", 1)
inp = _fmt_tokens(step.get("input_tokens", 0))
out = _fmt_tokens(step.get("output_tokens", 0))
cached = _fmt_tokens(
step.get("cache_read_input_tokens", 0) + step.get("cache_creation_input_tokens", 0)
)
cache_rd = _fmt_tokens(step.get("cache_read_input_tokens", 0))
cache_wr = _fmt_tokens(step.get("cache_creation_input_tokens", 0))
wc = step.get("wall_clock_seconds", step.get("elapsed_seconds", 0.0))
lines.append(f"{name} x{count} [in:{inp} out:{out} cached:{cached} t:{wc:.1f}s]")
lines.append(
f"{name} x{count} [uc:{inp} out:{out} cr:{cache_rd} cw:{cache_wr} t:{wc:.1f}s]"
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[info] cohesion: _fmt_run_skill uses full labels tokens_uncached, tokens_cache_read, tokens_cache_write while _fmt_get_token_summary uses abbreviated uc:, cr:, cw: for the same token categories in the same file. Creates inconsistent user experience.

)
total = data.get("total", {})
if total:
lines.append("")
lines.append(f"total_in: {_fmt_tokens(total.get('input_tokens', 0))}")
lines.append(f"total_uncached: {_fmt_tokens(total.get('input_tokens', 0))}")
lines.append(f"total_out: {_fmt_tokens(total.get('output_tokens', 0))}")
cache_tokens = total.get("cache_read_input_tokens", 0) + total.get(
"cache_creation_input_tokens", 0
lines.append(f"total_cache_read: {_fmt_tokens(total.get('cache_read_input_tokens', 0))}")
lines.append(
f"total_cache_write: {_fmt_tokens(total.get('cache_creation_input_tokens', 0))}"
)
lines.append(f"total_cached: {_fmt_tokens(cache_tokens)}")
mcp = data.get("mcp_responses", {})
mcp_total = mcp.get("total", {})
if mcp_total:
Expand Down
20 changes: 12 additions & 8 deletions src/autoskillit/hooks/token_summary_appender.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,36 +242,40 @@ def _format_table(aggregated: dict[str, dict[str, Any]]) -> str:
lines = [
"## Token Usage Summary",
"",
"| Step | input | output | cached | count | time |",
"|------|-------|--------|--------|-------|------|",
"| Step | uncached | output | cache_read | cache_write | count | time |",
"|------|----------|--------|------------|-------------|-------|------|",
]

total_input = 0
total_output = 0
total_cached = 0
total_cache_rd = 0
total_cache_wr = 0
total_time = 0.0

for entry in aggregated.values():
name = entry["step_name"]
inp = entry["input_tokens"]
out = entry["output_tokens"]
cached = entry["cache_read_input_tokens"] + entry["cache_creation_input_tokens"]
cache_rd = entry["cache_read_input_tokens"]
cache_wr = entry["cache_creation_input_tokens"]
count = entry["invocation_count"]
elapsed = entry["elapsed_seconds"]

lines.append(
f"| {name} | {_humanize(inp)} | {_humanize(out)} | {_humanize(cached)}"
f" | {count} | {_fmt_duration(elapsed)} |"
f"| {name} | {_humanize(inp)} | {_humanize(out)} | {_humanize(cache_rd)}"
f" | {_humanize(cache_wr)} | {count} | {_fmt_duration(elapsed)} |"
)

total_input += inp
total_output += out
total_cached += cached
total_cache_rd += cache_rd
total_cache_wr += cache_wr
total_time += elapsed

lines.append(
f"| **Total** | {_humanize(total_input)} | {_humanize(total_output)}"
f" | {_humanize(total_cached)} | | {_fmt_duration(total_time)} |"
f" | {_humanize(total_cache_rd)} | {_humanize(total_cache_wr)}"
f" | | {_fmt_duration(total_time)} |"
)

return "\n".join(lines)
Expand Down
57 changes: 27 additions & 30 deletions src/autoskillit/pipeline/telemetry_fmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@

_TOKEN_COLUMNS = (
TerminalColumn("STEP", max_width=40, align="<"),
TerminalColumn("INPUT", max_width=10, align=">"),
TerminalColumn("UNCACHED", max_width=10, align=">"),
TerminalColumn("OUTPUT", max_width=10, align=">"),
TerminalColumn("CACHED", max_width=10, align=">"),
TerminalColumn("CACHE_RD", max_width=10, align=">"),
TerminalColumn("CACHE_WR", max_width=10, align=">"),
TerminalColumn("COUNT", max_width=7, align=">"),
TerminalColumn("TIME", max_width=8, align=">"),
)
Expand Down Expand Up @@ -64,28 +65,29 @@ def format_token_table(steps: list[dict], total: dict) -> str:
lines = [
"## Token Usage Summary",
"",
"| Step | input | output | cached | count | time |",
"|------|-------|--------|--------|-------|------|",
"| Step | uncached | output | cache_read | cache_write | count | time |",
"|------|----------|--------|------------|-------------|-------|------|",
]
for step in steps:
name = step.get("step_name", "?")
inp = h(step.get("input_tokens", 0))
out = h(step.get("output_tokens", 0))
cached = h(
step.get("cache_read_input_tokens", 0) + step.get("cache_creation_input_tokens", 0)
)
cache_rd = h(step.get("cache_read_input_tokens", 0))
cache_wr = h(step.get("cache_creation_input_tokens", 0))
count = step.get("invocation_count", 1)
wc = step.get("wall_clock_seconds", step.get("elapsed_seconds", 0.0))
lines.append(f"| {name} | {inp} | {out} | {cached} | {count} | {fmt_dur(wc)} |")
lines.append(
f"| {name} | {inp} | {out} | {cache_rd} | {cache_wr} | {count} | {fmt_dur(wc)} |"
)

total_in = h(total.get("input_tokens", 0))
total_out = h(total.get("output_tokens", 0))
total_cached = h(
total.get("cache_read_input_tokens", 0) + total.get("cache_creation_input_tokens", 0)
)
total_cached_rd = h(total.get("cache_read_input_tokens", 0))
total_cached_wr = h(total.get("cache_creation_input_tokens", 0))
total_time = total.get("total_elapsed_seconds", 0.0)
lines.append(
f"| **Total** | {total_in} | {total_out} | {total_cached} | | {fmt_dur(total_time)} |"
f"| **Total** | {total_in} | {total_out} | {total_cached_rd}"
f" | {total_cached_wr} | | {fmt_dur(total_time)} |"
)
return "\n".join(lines)

Expand Down Expand Up @@ -116,17 +118,15 @@ def format_token_table_terminal(steps: list[dict], total: dict) -> str:
h = TelemetryFormatter._humanize
fmt_dur = TelemetryFormatter._fmt_duration

rows: list[tuple[str, str, str, str, str, str]] = []
rows: list[tuple[str, str, str, str, str, str, str]] = []
for step in steps:
rows.append(
(
step.get("step_name", "?"),
h(step.get("input_tokens", 0)),
h(step.get("output_tokens", 0)),
h(
step.get("cache_read_input_tokens", 0)
+ step.get("cache_creation_input_tokens", 0)
),
h(step.get("cache_read_input_tokens", 0)),
h(step.get("cache_creation_input_tokens", 0)),
str(step.get("invocation_count", 1)),
fmt_dur(step.get("wall_clock_seconds", step.get("elapsed_seconds", 0.0))),
)
Expand All @@ -136,10 +136,8 @@ def format_token_table_terminal(steps: list[dict], total: dict) -> str:
"Total",
h(total.get("input_tokens", 0)),
h(total.get("output_tokens", 0)),
h(
total.get("cache_read_input_tokens", 0)
+ total.get("cache_creation_input_tokens", 0)
),
h(total.get("cache_read_input_tokens", 0)),
h(total.get("cache_creation_input_tokens", 0)),
"",
fmt_dur(total.get("total_elapsed_seconds", 0.0)),
)
Expand Down Expand Up @@ -177,19 +175,18 @@ def format_compact_kv(
count = step.get("invocation_count", 1)
inp = h(step.get("input_tokens", 0))
out = h(step.get("output_tokens", 0))
cached = h(
step.get("cache_read_input_tokens", 0) + step.get("cache_creation_input_tokens", 0)
)
cache_rd = h(step.get("cache_read_input_tokens", 0))
cache_wr = h(step.get("cache_creation_input_tokens", 0))
wc = step.get("wall_clock_seconds", step.get("elapsed_seconds", 0.0))
lines.append(f"{name} x{count} [in:{inp} out:{out} cached:{cached} t:{wc:.1f}s]")
lines.append(
f"{name} x{count} [uc:{inp} out:{out} cr:{cache_rd} cw:{cache_wr} t:{wc:.1f}s]"
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[warning] cohesion: Asymmetric abbreviation: per-step compact KV uses short labels uc:, cr:, cw: but total summary lines use full total_uncached, total_cache_read, total_cache_write. The same asymmetry appears in pretty_output.py _fmt_get_token_summary. Either abbreviate the totals consistently or expand the per-step labels to match.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Investigated — this is intentional. The per-step compact format [uc:X out:X cr:X cw:X t:Xs] is space-constrained inline; totals are standalone KV pairs where full names (total_uncached, total_cache_read, total_cache_write) improve readability. This same pattern appears consistently in both telemetry_fmt.py::format_compact_kv and pretty_output.py::_fmt_get_token_summary, and is explicitly encoded as expected behavior in tests/pipeline/test_telemetry_formatter.py:239-246 (asserts both 'uc:' and 'total_cache_read:').

)
if total:
lines.append("")
lines.append(f"total_in: {h(total.get('input_tokens', 0))}")
lines.append(f"total_uncached: {h(total.get('input_tokens', 0))}")
lines.append(f"total_out: {h(total.get('output_tokens', 0))}")
cache_tokens = total.get("cache_read_input_tokens", 0) + total.get(
"cache_creation_input_tokens", 0
)
lines.append(f"total_cached: {h(cache_tokens)}")
lines.append(f"total_cache_read: {h(total.get('cache_read_input_tokens', 0))}")
lines.append(f"total_cache_write: {h(total.get('cache_creation_input_tokens', 0))}")
if mcp_responses:
mcp_total = mcp_responses.get("total", {})
if mcp_total:
Expand Down
40 changes: 34 additions & 6 deletions tests/infra/test_pretty_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,13 +376,14 @@ def test_format_get_token_summary_compact():
out, _ = _run_hook(event=event)
text = json.loads(out)["hookSpecificOutput"]["updatedMCPToolOutput"]
assert "token_summary" in text
# Assert specific compact line format: name xN [in:Xk out:Xk cached:XM t:Xs]
assert "investigate x1 [in:45.2k out:12.8k cached:1.2M t:0.0s]" in text
assert "make_plan x2 [in:30.0k out:8.0k cached:500.0k t:0.0s]" in text
assert "implement x1 [in:60.0k out:15.0k cached:2.0M t:0.0s]" in text
assert "total_in:" in text
# Assert specific compact line format: name xN [uc:Xk out:Xk cr:XM cw:XM t:Xs]
assert "investigate x1 [uc:45.2k out:12.8k cr:1.2M cw:0 t:0.0s]" in text
assert "make_plan x2 [uc:30.0k out:8.0k cr:0 cw:500.0k t:0.0s]" in text
assert "implement x1 [uc:60.0k out:15.0k cr:2.0M cw:0 t:0.0s]" in text
assert "total_uncached:" in text
assert "total_out:" in text
assert "total_cached:" in text
assert "total_cache_read:" in text
assert "total_cache_write:" in text


# PHK-16
Expand Down Expand Up @@ -1270,6 +1271,33 @@ def test_hook_token_summary_output_equivalent_to_canonical():
)


def test_fmt_run_skill_interactive_shows_four_token_fields():
"""_fmt_run_skill interactive mode shows all 4 token fields."""
data = {
"success": True,
"subtype": "COMPLETED",
"exit_code": 0,
"needs_retry": False,
"result": "done",
"token_usage": {
"input_tokens": 5000,
"output_tokens": 3000,
"cache_read_input_tokens": 200000,
"cache_creation_input_tokens": 8000,
},
}
rendered = _format_response(
"mcp__plugin_autoskillit_autoskillit__run_skill",
json.dumps({"result": json.dumps(data)}),
pipeline=False,
)
assert rendered is not None
assert "tokens_uncached:" in rendered
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[info] tests: Assertions use substring matching (in rendered) rather than exact field values. Would catch value-formatting regressions if checking exact output like tokens_cache_read: 200.0k.

assert "tokens_out:" in rendered
assert "tokens_cache_read:" in rendered
assert "tokens_cache_write:" in rendered


# ---------------------------------------------------------------------------
# Timing summary dedicated formatter
# ---------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions tests/infra/test_token_summary_appender.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,10 @@ def subprocess_side_effect(args: list[str], **kwargs: object) -> MagicMock:
assert "open-pr" in body_content
# Total row
assert "**Total**" in body_content
# 4-column token headers
assert "uncached" in body_content
assert "cache_read" in body_content
assert "cache_write" in body_content


# ---------------------------------------------------------------------------
Expand Down
41 changes: 34 additions & 7 deletions tests/pipeline/test_telemetry_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def test_produces_markdown_table(self) -> None:
result = TelemetryFormatter.format_token_table(_STEPS, _TOTAL)
assert "| Step |" in result
assert "|---" in result
assert "| uncached |" in result
assert "| cache_read |" in result
assert "| cache_write |" in result
assert "- input_tokens:" not in result
assert "# Token Summary" not in result

Expand Down Expand Up @@ -121,10 +124,10 @@ def test_snapshot(self) -> None:
[
"## Token Usage Summary",
"",
"| Step | input | output | cached | count | time |",
"|------|-------|--------|--------|-------|------|",
"| plan | 1.0k | 500 | 300 | 1 | 46s |",
"| **Total** | 1.0k | 500 | 300 | | 46s |",
"| Step | uncached | output | cache_read | cache_write | count | time |",
"|------|----------|--------|------------|-------------|-------|------|",
"| plan | 1.0k | 500 | 200 | 100 | 1 | 46s |",
"| **Total** | 1.0k | 500 | 200 | 100 | | 46s |",
]
)
assert result == expected
Expand Down Expand Up @@ -224,6 +227,29 @@ def test_format_timing_table_terminal_output_has_leading_indent() -> None:
# ---------------------------------------------------------------------------


def test_terminal_table_has_four_token_columns() -> None:
"""Terminal table must show UNCACHED, CACHE_RD, CACHE_WR column headers."""
result = TelemetryFormatter.format_token_table_terminal(_STEPS, _TOTAL)
assert "UNCACHED" in result
assert "CACHE_RD" in result
assert "CACHE_WR" in result


def test_compact_kv_four_token_prefixes() -> None:
"""Compact KV format must use uc:, cr:, cw: prefixes and split totals."""
result = TelemetryFormatter.format_compact_kv(_STEPS, _TOTAL)
assert "uc:" in result
assert "cr:" in result
assert "cw:" in result
assert "total_uncached:" in result
assert "total_cache_read:" in result
assert "total_cache_write:" in result
assert "in:" not in result
assert " cached:" not in result
assert "total_in:" not in result
assert "total_cached:" not in result


class TestFormatCompactKv:
def test_produces_compact_lines(self) -> None:
result = TelemetryFormatter.format_compact_kv(_STEPS, _TOTAL)
Expand All @@ -233,14 +259,15 @@ def test_produces_compact_lines(self) -> None:

def test_includes_humanized_tokens(self) -> None:
result = TelemetryFormatter.format_compact_kv(_STEPS, _TOTAL)
assert "in:7.0k" in result
assert "uc:7.0k" in result
assert "out:5.9k" in result

def test_includes_total_lines(self) -> None:
result = TelemetryFormatter.format_compact_kv(_STEPS, _TOTAL)
assert "total_in:" in result
assert "total_uncached:" in result
assert "total_out:" in result
assert "total_cached:" in result
assert "total_cache_read:" in result
assert "total_cache_write:" in result

def test_prefers_wall_clock_seconds(self) -> None:
result = TelemetryFormatter.format_compact_kv(_STEPS, _TOTAL)
Expand Down
Loading