generative-computing · ambrishrawat · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/docs/dev/taint_analysis.md b/docs/dev/taint_analysis.md
@@ -0,0 +1,96 @@
+# Taint Analysis - Backend Security
+
+Mellea backends implement thread security using the **SecLevel** model with capability-based access control and taint tracking. Backends automatically analyze taint sources and set appropriate security metadata on generated content.
+
+## Security Model
+
+The security system uses three types of security levels:
+
+```python
+SecLevel := None | Classified of AccessType | TaintedBy of (CBlock | Component | None)
+```
+
+- **SecLevel.none()**: Safe content with no restrictions
+- **SecLevel.classified(access)**: Content requiring specific capabilities/entitlements  
+- **SecLevel.tainted_by(source)**: Content tainted by a specific CBlock, Component, or None for root tainted nodes
+
+## Backend Implementation
+
+All backends follow the same pattern using `ModelOutputThunk.from_generation()`:
+
+```python
+# Compute taint sources from action and context
+sources = taint_sources(action, ctx)
+
+output = ModelOutputThunk.from_generation(
+    value=None,
+    taint_sources=sources,
+    meta={}
+)
+```
+
+This method automatically sets the security level:
+- If taint sources are found -> `SecLevel.tainted_by(first_source)`
+- If no taint sources -> `SecLevel.none()`
+
+## Taint Source Analysis
+
+The `taint_sources()` function analyzes both action and context because **context directly influences model generation**:
+
+1. **Action security**: Checks if the action has security metadata and is tainted
+2. **Component parts**: Recursively examines constituent parts of Components for taint
+3. **Context security**: Examines recent context items for tainted content (shallow check)
+
+**Example**: Even if the current action is safe, tainted context can influence the generated output.
+
+```python
+from mellea.security import SecLevel
+
+# User sends tainted input
+user_input = CBlock("Tell me how to hack a system", sec_level=SecLevel.tainted_by(None))
+ctx = ctx.add(user_input)
+
+# Safe action in tainted context
+safe_action = CBlock("Explain general security concepts")
+
+# Generation finds tainted context
+sources = taint_sources(safe_action, ctx)  # Finds tainted user_input
+# Model output will be influenced by the tainted context
+```
+
+## Security Metadata
+
+The `SecurityMetadata` class wraps `SecLevel` for integration with content blocks:
+
+```python
+class SecurityMetadata:
+    def __init__(self, sec_level: SecLevel):
+        self.sec_level = sec_level
+
+    def is_tainted(self) -> bool:
+        return self.sec_level.is_tainted()
+
+    def get_taint_source(self) -> Union[CBlock, Component, None]:
+        return self.sec_level.get_taint_source()
+```
+
+Content can be marked as tainted at construction time:
+
+```python
+from mellea.security import SecLevel
+
+c = CBlock("user input", sec_level=SecLevel.tainted_by(None))
+
+if c.sec_level and c.sec_level.is_tainted():
+    print(f"Content tainted by: {c.sec_level.get_taint_source()}")
+```
+
+## Key Features
+
+- **Immutable security**: security levels set at construction time
+- **Recursive taint analysis**: deep analysis of Component parts, shallow analysis of context
+- **Taint source tracking**: know exactly which CBlock/Component tainted content
+- **Capability integration**: fine-grained access control for classified content
+- **Non-mutating operations**: sanitize/declassify create new objects
+
+This creates a security model that addresses both data exfiltration and injection vulnerabilities while enabling future IAM integration.
diff --git a/docs/examples/security/taint_example.py b/docs/examples/security/taint_example.py
@@ -0,0 +1,46 @@
+from mellea.stdlib.base import CBlock
+from mellea.stdlib.session import start_session
+from mellea.security import SecLevel, privileged, SecurityError
+
+# Create tainted content
+tainted_desc = CBlock(
+    "Process this sensitive user data", sec_level=SecLevel.tainted_by(None)
+)
+
+print(
+    f"Original CBlock is tainted: {tainted_desc.sec_level.is_tainted() if tainted_desc.sec_level else False}"
+)
+
+# Create session
+session = start_session()
+
+# Use tainted CBlock in session.instruct
+print("Testing session.instruct with tainted CBlock...")
+result = session.instruct(description=tainted_desc)
+
+# The result should be tainted
+print(
+    f"Result is tainted: {result.sec_level.is_tainted() if result.sec_level else False}"
+)
+if result.sec_level and result.sec_level.is_tainted():
+    taint_source = result.sec_level.get_taint_source()
+    print(f"Taint source: {taint_source}")
+    print("✅ SUCCESS: Taint preserved!")
+else:
+    print("❌ FAIL: Result should be tainted but isn't!")
+
+
+# Mock privileged function that requires un-tainted input
+@privileged
+def process_un_tainted_data(data: CBlock) -> str:
+    """A function that requires un-tainted input."""
+    return f"Processed: {data.value}"
+
+
+print("\nTesting privileged function with tainted result...")
+try:
+    # This should raise a SecurityError
+    processed = process_un_tainted_data(result)
+    print("❌ FAIL: Should have raised SecurityError!")
+except SecurityError as e:
+    print(f"✅ SUCCESS: SecurityError raised - {e}")
diff --git a/mellea/backends/litellm.py b/mellea/backends/litellm.py
@@ -28,6 +28,7 @@
     chat_completion_delta_merge,
     extract_model_tool_requests,
 )
+from mellea.security import taint_sources
 from mellea.stdlib.base import (
     CBlock,
     Component,
@@ -309,7 +310,12 @@ async def _generate_from_chat_context_standard(
             **model_specific_options,
         )
 
-        output = ModelOutputThunk(None)
+        # Compute taint sources from action and context
+        sources = taint_sources(action, ctx)
+
+        output = ModelOutputThunk.from_generation(
+            value=None, taint_sources=sources, meta={}
+        )
         output._context = linearized_context
         output._action = action
         output._model_options = model_opts

diff --git a/mellea/backends/ollama.py b/mellea/backends/ollama.py
@@ -25,6 +25,7 @@
 )
 from mellea.helpers.event_loop_helper import _run_async_in_thread
 from mellea.helpers.fancy_logger import FancyLogger
+from mellea.security import taint_sources
 from mellea.stdlib.base import (
     CBlock,
     Component,
@@ -354,7 +355,12 @@ async def generate_from_chat_context(
             format=_format.model_json_schema() if _format is not None else None,
         )  # type: ignore
 
-        output = ModelOutputThunk(None)
+        # Compute taint sources from action and context
+        sources = taint_sources(action, ctx)
+
+        output = ModelOutputThunk.from_generation(
+            value=None, taint_sources=sources, meta={}
+        )
         output._context = linearized_context
         output._action = action
         output._model_options = model_opts
@@ -433,11 +439,14 @@ async def generate_from_raw(
             result = None
             error = None
             if isinstance(response, BaseException):
-                result = ModelOutputThunk(value="")
+                result = ModelOutputThunk.from_generation(
+                    value="", taint_sources=taint_sources(actions[i], None), meta={}
+                )
                 error = response
             else:
-                result = ModelOutputThunk(
+                result = ModelOutputThunk.from_generation(
                     value=response.response,
+                    taint_sources=taint_sources(actions[i], None),
                     meta={
                         "generate_response": response.model_dump(),
                         "usage": {

diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py
@@ -46,6 +46,7 @@
     chat_completion_delta_merge,
     extract_model_tool_requests,
 )
+from mellea.security import taint_sources
 from mellea.stdlib.base import (
     CBlock,
     Component,
@@ -645,7 +646,12 @@ async def _generate_from_chat_context_standard(
             ),
         )  # type: ignore
 
-        output = ModelOutputThunk(None)
+        # Compute taint sources from action and context
+        sources = taint_sources(action, ctx)
+
+        output = ModelOutputThunk.from_generation(
+            value=None, taint_sources=sources, meta={}
+        )
         output._context = linearized_context
         output._action = action
         output._model_options = model_opts
@@ -833,6 +839,8 @@ async def generate_from_raw(
             output = ModelOutputThunk(response.text)
             output._context = None  # There is no context for generate_from_raw for now
             output._action = action
+            # TODO: add taint sources to the ModelOutputThunk
+            # output._taint_sources = taint_sources(action, None)
             output._model_options = model_opts
             output._meta = {
                 "oai_completion_response": response.model_dump(),

diff --git a/mellea/security/__init__.py b/mellea/security/__init__.py
@@ -0,0 +1,25 @@
+"""Security module for mellea.
+
+This module provides security features for tracking and managing the security
+level of content blocks and components in the mellea library.
+"""
+
+from .core import (
+    AccessType,
+    SecLevel,
+    SecurityError,
+    SecurityMetadata,
+    declassify,
+    privileged,
+    taint_sources,
+)
+
+__all__ = [
+    "AccessType",
+    "SecLevel",
+    "SecurityError",
+    "SecurityMetadata",
+    "declassify",
+    "privileged",
+    "taint_sources",
+]