novelist/state_manager.py at main · AxolDad/novelist · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
"""
state_manager.py — State Management (DB-Backed)

Handles all story state by wrapping db_manager.py.
Replaces legacy JSON file operations with SQLite transactions.
"""

import json
import re
import json
import re
from typing import Any, Dict, List, Optional, Tuple

import db_manager as db
from logger import logger
from config import (
    MANIFEST_FILE,
    STATE_EXCERPT_CHARS,
    CRITIC_MODEL,
    DEFAULT_TARGET_WORD_COUNT,
    CHAPTER_HISTORY_LIMIT,
    MAX_DRIFT_MARKERS,
    MAX_DRIFT_VOICE_NOTES
)
from file_utils import (
    safe_read_json,
    tail_excerpt,
    list_completed_scene_files
)
from ollama_client import call_ollama, extract_clean_json


# ------------------------------------------------------------------
#  WORD COUNT
# ------------------------------------------------------------------
def get_target_word_count(manifest: Dict[str, Any]) -> int:
    """Get the target word count from manifest."""
    try:
        if isinstance(manifest.get("target_word_count"), int):
            return int(manifest["target_word_count"])
        style = manifest.get("style", {}) or {}
        if isinstance(style.get("target_word_count"), int):
            return int(style["target_word_count"])
    except Exception:
        pass
    return DEFAULT_TARGET_WORD_COUNT


def compute_current_word_count(manifest: Optional[Dict[str, Any]] = None, manuscript_file_default: str = "") -> int:
    """Computes total word count from DB scenes."""
    # We ignore file scanning now and trust the DB,
    # but we can fallback or sync if needed. For now, DB sum:
    try:
        return db.get_total_word_count()
    except Exception:
        # Table might not exist yet if project is brand new or not initialized
        pass

    # Fallback to legacy file scan if DB empty (e.g. initial migration with no word counts)
    total = 0
    for fn in list_completed_scene_files():
        try:
            txt = open(fn, "r", encoding="utf-8").read()
            total += len(re.findall(r"\b\w+\b", txt))
        except Exception:
            pass
    return total


# ------------------------------------------------------------------
#  ARC LEDGER
# ------------------------------------------------------------------
def seed_arc_ledger(manifest: Dict[str, Any]) -> Dict[str, Any]:
    """Load arc ledger from DB."""
    return {
        "theme": db.get_kv("arc_theme", "Unspecified"),
        "stakes": db.get_active_arc_items("stake"),
        "promises_to_reader": db.get_active_arc_items("promise"),
        "unresolved_questions": db.get_active_arc_items("question"),
        "payoffs_delivered": [], # active items don't track delivered
        "scene_history": db.get_recent_scene_history(CHAPTER_HISTORY_LIMIT)
    }


def ensure_arc_ledger_schema(arc_ledger: Any, manifest: Dict[str, Any]) -> Dict[str, Any]:
    """Ensures structure (noop for DB, just reloads)."""
    return seed_arc_ledger(manifest)


def update_arc_ledger(
    arc_ledger: Dict[str, Any],
    title: str,
    micro_outline: Dict[str, Any],
    scene_text: str,
    filename: str = "" # New optional arg
) -> Dict[str, Any]:
    """
    Update arc ledger based on new scene.
    Writes updates to DB.
    """
    # Truncate scene_history for prompt efficiency
    arc_excerpt = arc_ledger.copy()
    if "scene_history" in arc_excerpt and len(arc_excerpt["scene_history"]) > CHAPTER_HISTORY_LIMIT:
        arc_excerpt["scene_history"] = arc_excerpt["scene_history"][-CHAPTER_HISTORY_LIMIT:]

    prompt = f"""
Return JSON ONLY.

Update ARC LEDGER based on the new scene. Keep updates minimal and specific.
Do NOT invent giant plot turns unless clearly in the scene.

CURRENT ARC LEDGER (recent history only):
{json.dumps(arc_excerpt, indent=2)}

SCENE TITLE: {title}

MICRO-OUTLINE USED:
{json.dumps(micro_outline, indent=2)}

SCENE (tail excerpt):
{tail_excerpt(scene_text, STATE_EXCERPT_CHARS)}

OUTPUT JSON:
{{
  "stakes_add": [ ... ],
  "promises_add": [ ... ],
  "unresolved_add": [ ... ],
  "unresolved_resolved": [ ... ],
  "payoffs_add": [ ... ],
  "scene_history_add": {{
     "title": "...",
     "want": "...",
     "turn": "...",
     "consequence": "...",
     "new_pressure": "one line"
  }}
}}
"""
    out = call_ollama([{"role": "user", "content": prompt}], model=CRITIC_MODEL, json_mode=True)
    data = extract_clean_json(out)
    if not data:
        return arc_ledger

    # WRITE TO DB
    for s in data.get("stakes_add", []):
         if s: db.add_arc_item("stake", str(s))

    for p in data.get("promises_add", []):
         if p: db.add_arc_item("promise", str(p))

    for q in data.get("unresolved_add", []):
         if q: db.add_arc_item("question", str(q))

    # Resolving items is complex via string matching,
    # for now we assume they are marked resolved in the prompt logic,
    # but the DB doesn't support 'resolving' via simple unique string yet
    # without IDs. We'll skip marking 'status=resolved' in DB for this iteration,
    # relying on the additive nature.
    # Future improvement: Fetch ID map.

    # LOG SCENE
    sh = data.get("scene_history_add")
    if isinstance(sh, dict):
        summary = f"{sh.get('want','')} -> {sh.get('turn','')}"
        consequence = sh.get('consequence', '')

        # Word count
        wc = len(re.findall(r"\b\w+\b", scene_text))

        db.log_scene(
            title=title,
            filename=filename or f"scene_{int(wc)}.txt", # Fallback if no filename
            content=scene_text,
            meta={
                "summary": summary,
                "consequence": consequence,
                "characters_present": [], # Could extract from world state?
                "word_count": wc,
                "tribunal_scores": {} # We don't have them here easily, stored in 'draft' text?
            }
        )

    # Return refreshed object
    return seed_arc_ledger({})


# ------------------------------------------------------------------
#  CHARACTER BIBLE
# ------------------------------------------------------------------
def seed_character_bible(world_state: Dict[str, Any]) -> Dict[str, Any]:
    """Load character bible from DB."""
    return {"characters": db.get_all_characters()}


def update_character_bible(
    char_bible: Dict[str, Any],
    scene_text: str,
    world_state: Dict[str, Any]
) -> Dict[str, Any]:
    """Update character bible with observed behavioral markers. Writes to DB."""
    # Truncate
    bible_excerpt = {"characters": {}}
    for name, data in (char_bible.get("characters") or {}).items():
        bible_excerpt["characters"][name] = {
            "behavioral_markers": (data.get("behavioral_markers") or [])[-6:],
            "voice_notes": (data.get("voice_notes") or [])[-4:],
            "hard_limits": (data.get("hard_limits") or [])[-4:]
        }

    world_chars = {k: v for k, v in (world_state.get("characters") or {}).items()}

    prompt = f"""
Return JSON ONLY.

Update the character bible with *observed* behavioral markers from this scene.
Markers should be forensic-style cues (choices, tells, avoidance, tactics, sensory focus).
Do NOT use labels like "ADHD", "INFP", "narcissistic", etc.

CURRENT BIBLE (recent markers only):
{json.dumps(bible_excerpt, indent=2)}

WORLD STATE CHARACTERS:
{json.dumps(world_chars, indent=2)}

SCENE (excerpt):
{scene_text[:1800]}

OUTPUT JSON:
{{
  "updates": {{
     "<CharacterName>": {{
        "behavioral_markers_add": ["...", "..."],
        "voice_notes_add": ["..."],
        "hard_limits_add": ["..."]
     }}
  }}
}}
"""
    out = call_ollama([{"role": "user", "content": prompt}], model=CRITIC_MODEL, json_mode=True)
    data = extract_clean_json(out)
    if not data:
        return char_bible

    updates = data.get("updates", {})
    if not isinstance(updates, dict):
        return char_bible

    # WRITE TO DB
    # We need to fetch current char to append, or let upsert handle it?
    # Schema says 'roles', 'description', etc.
    # We are updating JSON fields inside 'voice_notes' text?
    # Wait, 'voice_notes' in DB is text.
    # But here we treat it as list.
    # The DB manager expects 'voice_notes' as TEXT.
    # I should change DB manager to store JSON for these lists?
    # Or serialization.

    # Reload full bible to append correctly
    current_bible = db.get_all_characters()

    for name, upd in updates.items():
        if name not in current_bible:
            # Create new empty char if unknown
            current_bible[name] = {
                "role": "Unknown", "description": "",
                "behavioral_markers": [], "voice_notes": [], "hard_limits": [],
                "relationships": {}, "current_status": {}
            }

        c = current_bible[name]

        # Helper to ensure value is a list (handles JSON strings, plain strings, lists, None)
        def ensure_list(val):
            if val is None:
                return []
            if isinstance(val, list):
                return val
            if isinstance(val, str):
                # Try to parse as JSON (might be a JSON-encoded list or dict)
                try:
                    parsed = json.loads(val)
                    if isinstance(parsed, list):
                        return parsed
                    if isinstance(parsed, dict):
                        # If it's a profile dict with voice_notes inside, extract it
                        if "voice_notes" in parsed:
                            return ensure_list(parsed.get("voice_notes"))
                        if "behavioral_markers" in parsed:
                            return ensure_list(parsed.get("behavioral_markers"))
                        if "hard_limits" in parsed:
                            return ensure_list(parsed.get("hard_limits"))
                        return []
                except (json.JSONDecodeError, TypeError):
                    pass
                # Not JSON, treat as single-item list if non-empty
                if val.strip():
                    return [val.strip()]
                return []
            return []

        # Merge lists with type coercion
        bm = ensure_list(c.get("behavioral_markers")) + ensure_list(upd.get("behavioral_markers_add"))
        vn = ensure_list(c.get("voice_notes")) + ensure_list(upd.get("voice_notes_add"))
        hl = ensure_list(c.get("hard_limits")) + ensure_list(upd.get("hard_limits_add"))

        # Dedupe
        def dd(lst): return list(dict.fromkeys([str(x).strip() for x in lst if str(x).strip()]))

        c["behavioral_markers"] = dd(bm)[:MAX_DRIFT_MARKERS]
        c["voice_notes"] = dd(vn)[:MAX_DRIFT_VOICE_NOTES]
        c["hard_limits"] = dd(hl)[:MAX_DRIFT_VOICE_NOTES]

        # Save to DB
        # Note: 'upsert_character' expects flat fields.
        # We need to serialize these lists into the columns.
        # But `schema.sql` had:
        # voice_notes TEXT
        # description TEXT
        # relationships TEXT (JSON)
        # current_status TEXT (JSON)

        # Where do 'behavioral_markers' and 'hard_limits' go?
        # My schema missed them!
        # I should store them in 'description' or add columns?
        # OR store a 'meta' JSON blob?
        # 'relationships' is JSON.

        # Workaround: serialize all these lists into 'description' or 'voice_notes' JSON?
        # Or Just put them in 'relationships' for now (hack)?
        # Or better: create a new PROFILE dict and dumping it into `voice_notes` column (renaming it conceptually to 'profile_json')?

        # I'll put them in `voice_notes` as a JSON string for now.

        profile_json = {
            "behavioral_markers": c["behavioral_markers"],
            "voice_notes": c["voice_notes"],
            "hard_limits": c["hard_limits"]
        }

        db.upsert_character(name, {
            "role": c.get("role"),
            "description": c.get("description"),
            "voice_notes": json.dumps(profile_json), # Storing JSON in text column
            "relationships": c.get("relationships", {}),
            "current_status": c.get("current_status", {})
        })

    return {"characters": db.get_all_characters()}


# ------------------------------------------------------------------
#  AUTO-UPDATE STATE
# ------------------------------------------------------------------
def parse_state_update_block(model_response: str) -> Optional[Dict[str, Any]]:
    """Parse UPDATE_STATE YAML block."""
    try:
        import yaml
        pattern = r"```yaml\n(.*?UPDATE_STATE:.*?)```"
        match = re.search(pattern, model_response, re.DOTALL)
        if match:
            extracted = yaml.safe_load(match.group(1))
            return extracted.get("UPDATE_STATE", {})
    except Exception:
        pass
    return None


def update_story_state(state_file: str, model_response: str, verbose: bool = True) -> Tuple[bool, str]:
    """Scans response, updates DB."""
    updates = parse_state_update_block(model_response)
    if not updates:
        return False, "No update."

    msg_parts = []

    if "current_time" in updates:
        db.set_kv("current_time", updates["current_time"])
        msg_parts.append(f"Time: {updates['current_time']}")

    if "current_location" in updates:
        db.set_kv("current_location", updates["current_location"])
        msg_parts.append(f"Loc: {updates['current_location']}")

    if "add_inventory" in updates:
        inv = db.get_kv("inventory", [])
        if updates["add_inventory"] not in inv:
            inv.append(updates["add_inventory"])
            db.set_kv("inventory", inv)
            msg_parts.append(f"+Inv: {updates['add_inventory']}")

    if "remove_inventory" in updates:
        inv = db.get_kv("inventory", [])
        if updates["remove_inventory"] in inv:
            inv.remove(updates["remove_inventory"])
            db.set_kv("inventory", inv)
            msg_parts.append(f"-Inv: {updates['remove_inventory']}")

    # Generic keys
    for k, v in updates.items():
        if k not in ["current_time", "current_location", "add_inventory", "remove_inventory"]:
             db.set_kv(k, v)

    msg = f"State Advanced: {', '.join(msg_parts)}"
    if verbose:
        logger.info(msg)
    return True, msg


def strip_state_update_block(text: str) -> str:
    pattern = r"```yaml\n.*?UPDATE_STATE:.*?```\s*"
    return re.sub(pattern, "", text, flags=re.DOTALL).strip()


def strip_tribunal_scores(text: str) -> str:
    pattern = r"\[Tribunal Scores?:.*?\]"
    return re.sub(pattern, "", text, flags=re.IGNORECASE).strip()