Skip to content

Commit 607634f

Browse files
SStasclaude
andcommitted
ClassEval latent: hybrid mode — prior method code as text + KV-cache thinking
Latent pipeline now includes prior generated method code in the prompt alongside KV-cache reasoning context. The model gets both: text for referencing exact signatures/attributes, KV-cache for thinking context. This addresses the design limitation where latent mode couldn't see what earlier methods actually produced. The text cost is minimal (~50-200 tokens of method bodies per step, prefill only). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1e9b325 commit 607634f

File tree

2 files changed

+26
-9
lines changed

2 files changed

+26
-9
lines changed

benchmarks/classeval/agents.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,23 @@ def build_latent_prompt(
8282
class_description: str,
8383
method_info: Dict,
8484
import_statement: str,
85+
prior_methods_text: str = "",
8586
) -> List[Dict[str, str]]:
8687
"""Build prompt for latent-chain incremental generation.
8788
88-
Prior context is carried via KV-cache, so the prompt only contains
89-
the skeleton and current method description.
89+
Reasoning context is carried via KV-cache. Prior method code is included
90+
as text so the model can reference signatures and attribute names.
9091
"""
9192
method_name = method_info["method_name"]
9293
method_desc = method_info.get("method_description", "")
9394

95+
prior_section = ""
96+
if prior_methods_text.strip():
97+
prior_section = (
98+
f"\n\n## Already implemented methods:\n"
99+
f"```python\n{prior_methods_text}\n```\n"
100+
)
101+
94102
user_content = (
95103
f"You are implementing the class below one method at a time. "
96104
f"Now implement the method `{method_name}`. "
@@ -100,6 +108,7 @@ def build_latent_prompt(
100108
f"## Required imports:\n{import_statement}\n\n"
101109
f"## Class skeleton:\n{skeleton}\n\n"
102110
f"## Method to implement:\n{method_name}: {method_desc}"
111+
f"{prior_section}"
103112
)
104113
return [
105114
{"role": "system", "content": SYSTEM_MESSAGE},

benchmarks/classeval/pipeline_latent.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
and current method description.
1313
"""
1414

15+
import copy
1516
import time
1617
import uuid
1718
from typing import Any, Dict, List
@@ -72,15 +73,19 @@ def run_latent_pipeline(
7273
total_output_tokens = 0
7374

7475
generated_methods: Dict[str, str] = {}
76+
prior_methods_text = "" # Accumulated code of prior methods
7577
past_kv = None # Accumulated KV-cache across methods
7678

7779
for step_idx, method_info in enumerate(methods_order):
7880
method_name = method_info["method_name"]
7981
agent_t0 = time.perf_counter()
8082

81-
# Build prompt for this method (no prior text context -- KV-cache has it)
83+
# Build prompt with prior method code as text reference + KV-cache
84+
# for reasoning context. The text lets the model reference exact
85+
# signatures and attribute names; the KV-cache carries thinking.
8286
messages = build_latent_prompt(
8387
skeleton, class_description, method_info, import_statement,
88+
prior_methods_text=prior_methods_text,
8489
)
8590
prompt_text = render_prompt(tokenizer, messages)
8691
input_ids, attention_mask = tokenize_prompt(
@@ -109,6 +114,10 @@ def run_latent_pipeline(
109114

110115
kv_seq_len = get_past_length(step_past_kv)
111116

117+
# Deep-copy KV-cache BEFORE generation — model.generate() mutates
118+
# DynamicCache in-place, appending generated token entries.
119+
past_kv = copy.deepcopy(step_past_kv)
120+
112121
# Generate the method text (we need the actual code for every method)
113122
method_text, gen_past_kv = generate_text(
114123
model, tokenizer, input_ids, attention_mask, device,
@@ -122,16 +131,15 @@ def run_latent_pipeline(
122131
output_tokens = len(output_encoded["input_ids"])
123132
total_output_tokens += output_tokens
124133

125-
# Carry generation KV-cache forward — later methods can attend to
126-
# what earlier methods actually produced (code, signatures, etc.),
127-
# not just the latent "thinking" context.
128-
past_kv = gen_past_kv
129-
130134
agent_time_ms = (time.perf_counter() - agent_t0) * 1000
131135

132-
# Extract method code
136+
# Extract method code and accumulate for next step's prompt
133137
method_code = extract_method_code(method_text, method_name)
134138
generated_methods[method_name] = method_code
139+
if prior_methods_text:
140+
prior_methods_text += "\n\n" + method_code
141+
else:
142+
prior_methods_text = method_code
135143

136144
agent_traces.append({
137145
"step": step_idx,

0 commit comments

Comments
 (0)