ClassEval latent: hybrid mode — prior method code as text + KV-cache thinking

SStas · claude · SStas · commit 607634f6c6b0 · 2026-03-12T00:46:18.000Z
Latent pipeline now includes prior generated method code in the prompt
alongside KV-cache reasoning context. The model gets both: text for
referencing exact signatures/attributes, KV-cache for thinking context.

This addresses the design limitation where latent mode couldn't see
what earlier methods actually produced. The text cost is minimal
(~50-200 tokens of method bodies per step, prefill only).

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/benchmarks/classeval/agents.py b/benchmarks/classeval/agents.py
@@ -82,15 +82,23 @@ def build_latent_prompt(
     class_description: str,
     method_info: Dict,
     import_statement: str,
+    prior_methods_text: str = "",
 ) -> List[Dict[str, str]]:
     """Build prompt for latent-chain incremental generation.
 
-    Prior context is carried via KV-cache, so the prompt only contains
-    the skeleton and current method description.
+    Reasoning context is carried via KV-cache. Prior method code is included
+    as text so the model can reference signatures and attribute names.
     """
     method_name = method_info["method_name"]
     method_desc = method_info.get("method_description", "")
 
+    prior_section = ""
+    if prior_methods_text.strip():
+        prior_section = (
+            f"\n\n## Already implemented methods:\n"
+            f"```python\n{prior_methods_text}\n```\n"
+        )
+
     user_content = (
         f"You are implementing the class below one method at a time. "
         f"Now implement the method `{method_name}`. "
@@ -100,6 +108,7 @@ def build_latent_prompt(
         f"## Required imports:\n{import_statement}\n\n"
         f"## Class skeleton:\n{skeleton}\n\n"
         f"## Method to implement:\n{method_name}: {method_desc}"
+        f"{prior_section}"
     )
     return [
         {"role": "system", "content": SYSTEM_MESSAGE},
diff --git a/benchmarks/classeval/pipeline_latent.py b/benchmarks/classeval/pipeline_latent.py
@@ -12,6 +12,7 @@
 and current method description.
 """
 
+import copy
 import time
 import uuid
 from typing import Any, Dict, List
@@ -72,15 +73,19 @@ def run_latent_pipeline(
         total_output_tokens = 0
 
         generated_methods: Dict[str, str] = {}
+        prior_methods_text = ""  # Accumulated code of prior methods
         past_kv = None  # Accumulated KV-cache across methods
 
         for step_idx, method_info in enumerate(methods_order):
             method_name = method_info["method_name"]
             agent_t0 = time.perf_counter()
 
-            # Build prompt for this method (no prior text context -- KV-cache has it)
+            # Build prompt with prior method code as text reference + KV-cache
+            # for reasoning context. The text lets the model reference exact
+            # signatures and attribute names; the KV-cache carries thinking.
             messages = build_latent_prompt(
                 skeleton, class_description, method_info, import_statement,
+                prior_methods_text=prior_methods_text,
             )
             prompt_text = render_prompt(tokenizer, messages)
             input_ids, attention_mask = tokenize_prompt(
@@ -109,6 +114,10 @@ def run_latent_pipeline(
 
             kv_seq_len = get_past_length(step_past_kv)
 
+            # Deep-copy KV-cache BEFORE generation — model.generate() mutates
+            # DynamicCache in-place, appending generated token entries.
+            past_kv = copy.deepcopy(step_past_kv)
+
             # Generate the method text (we need the actual code for every method)
             method_text, gen_past_kv = generate_text(
                 model, tokenizer, input_ids, attention_mask, device,
@@ -122,16 +131,15 @@ def run_latent_pipeline(
             output_tokens = len(output_encoded["input_ids"])
             total_output_tokens += output_tokens
 
-            # Carry generation KV-cache forward — later methods can attend to
-            # what earlier methods actually produced (code, signatures, etc.),
-            # not just the latent "thinking" context.
-            past_kv = gen_past_kv
-
             agent_time_ms = (time.perf_counter() - agent_t0) * 1000
 
-            # Extract method code
+            # Extract method code and accumulate for next step's prompt
             method_code = extract_method_code(method_text, method_name)
             generated_methods[method_name] = method_code
+            if prior_methods_text:
+                prior_methods_text += "\n\n" + method_code
+            else:
+                prior_methods_text = method_code
 
             agent_traces.append({
                 "step": step_idx,