diff --git a/apps/railwaycopilot/.gitignore b/apps/railwaycopilot/.gitignore
new file mode 100644
index 0000000..e26ecaa
--- /dev/null
+++ b/apps/railwaycopilot/.gitignore
@@ -0,0 +1,5 @@
+**/__pycache__
+**/__init___.py
+.env
+.DS_Store
+**/.DS_Store
\ No newline at end of file
diff --git a/apps/railwaycopilot/backend/Dockerfile b/apps/railwaycopilot/backend/Dockerfile
new file mode 100644
index 0000000..511169d
--- /dev/null
+++ b/apps/railwaycopilot/backend/Dockerfile
@@ -0,0 +1,10 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+CMD ["streamlit", "run", "app.py", "--server.address=0.0.0.0", "--server.port=8501"]
diff --git a/apps/railwaycopilot/backend/app.py b/apps/railwaycopilot/backend/app.py
new file mode 100644
index 0000000..6f48ece
--- /dev/null
+++ b/apps/railwaycopilot/backend/app.py
@@ -0,0 +1,83 @@
+import os
+import streamlit as st
+
+from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings
+from langchain.docstore.document import Document
+
+from rail_rag.config import (
+    MONGODB_URI,
+    EMBED_MODEL,
+    CHAT_MODEL,
+)
+from rail_rag.index_utils import get_mongo_collection
+from rail_rag.retriever import MongoAtlasRetriever
+from rail_rag.ui import render_prompt_lab
+from rail_rag.generation import run_generation
+from rail_rag.classifier import classify_text
+
+# --- Streamlit UI chrome ---
+st.set_page_config(page_title="Rail Ops & Safety Assistant", page_icon="🚆", layout="wide")
+st.title("🚆 Rail Operations & Safety Assistant (MongoDB + LangChain + Mistral)")
+
+# Sidebar: Prompt Lab (returns all user choices + composed system prompt)
+lab = render_prompt_lab()
+
+if not os.getenv("MISTRAL_API_KEY"):
+    st.error("Missing `MISTRAL_API_KEY` in environment.")
+    st.stop()
+
+if not MONGODB_URI:
+    st.error("Missing `MONGODB_URI` in environment.")
+    st.stop()
+
+colA, colB, colC = st.columns([2, 1, 1])
+with colA:
+    q = st.text_input(
+        "Ask a question (e.g., 'What must a signaller do when going off duty?')",
+        "",
+    )
+with colB:
+    top_k = st.slider("Top-K chunks", 1, 10, 4, 1)
+with colC:
+    show_debug = st.toggle("Show debug", value=False)
+
+# Connect resources (MongoDB collection + embeddings + retriever)
+try:
+    collection = get_mongo_collection()
+except Exception as e:
+    st.exception(e)
+    st.stop()
+
+embedder = MistralAIEmbeddings(model=EMBED_MODEL)
+retriever = MongoAtlasRetriever(collection=collection, embedder=embedder, k=top_k)
+
+llm = ChatMistralAI(model=CHAT_MODEL)
+
+if q:
+    try:
+        retrieved = retriever.invoke(q)
+
+        if show_debug:
+            with st.expander("🔎 Retrieved docs (debug)"):
+                for i, d in enumerate(retrieved, 1):
+                    st.write(f"{i}. meta = {d.metadata}")
+                    st.write((d.page_content or "")[:300] + "…")
+
+        if not retrieved:
+            st.warning(
+                "No documents retrieved. "
+                "Check MongoDB URI / DB / collection / vector index / field names."
+            )
+            st.stop()
+
+        # Full prompt-building + A/B + rendering (answers + sources)
+        run_generation(
+            question=q,
+            retrieved=retrieved,
+            chat_model_name=CHAT_MODEL,
+            lab=lab,
+        )
+
+    except Exception as e:
+        st.exception(e)
+        st.stop()
diff --git a/apps/railwaycopilot/backend/corpus/04. GERT8000_HB10.pdf b/apps/railwaycopilot/backend/corpus/04. GERT8000_HB10.pdf
new file mode 100644
index 0000000..f4d2500
Binary files /dev/null and b/apps/railwaycopilot/backend/corpus/04. GERT8000_HB10.pdf differ
diff --git a/apps/railwaycopilot/backend/corpus/Rules on walking on or near the line.pdf b/apps/railwaycopilot/backend/corpus/Rules on walking on or near the line.pdf
new file mode 100644
index 0000000..5091510
Binary files /dev/null and b/apps/railwaycopilot/backend/corpus/Rules on walking on or near the line.pdf differ
diff --git a/apps/railwaycopilot/backend/corpus/September 2024 Standards Update.pdf b/apps/railwaycopilot/backend/corpus/September 2024 Standards Update.pdf
new file mode 100644
index 0000000..2dccb92
Binary files /dev/null and b/apps/railwaycopilot/backend/corpus/September 2024 Standards Update.pdf differ
diff --git a/apps/railwaycopilot/backend/corpus/TS1_-_General_signalling_regulations_v18.pdf b/apps/railwaycopilot/backend/corpus/TS1_-_General_signalling_regulations_v18.pdf
new file mode 100644
index 0000000..e2283e5
Binary files /dev/null and b/apps/railwaycopilot/backend/corpus/TS1_-_General_signalling_regulations_v18.pdf differ
diff --git a/apps/railwaycopilot/backend/ingest_rulebook.py b/apps/railwaycopilot/backend/ingest_rulebook.py
new file mode 100644
index 0000000..feab459
--- /dev/null
+++ b/apps/railwaycopilot/backend/ingest_rulebook.py
@@ -0,0 +1,214 @@
+import os
+from glob import glob
+from typing import List, Dict
+import numpy as np
+import requests
+import certifi
+
+from pymongo import MongoClient
+from pymongo.errors import OperationFailure
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import PyPDFLoader
+from pymongo.errors import OperationFailure
+
+MONGODB_URI   = os.getenv("MONGODB_URI")  
+DB_NAME       = os.getenv("DB_NAME", "rail_ops")
+COLL_NAME     = os.getenv("COLLECTION_NAME", "rulebook_chunks")
+
+EMBED_MODEL   = os.getenv("MISTRAL_EMBED_MODEL", "mistral-embed")
+EMBED_DIM     = 1024  
+MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
+
+CORPUS_DIR    = os.getenv("CORPUS_DIR", "corpus")
+
+# Field names 
+TEXT_KEY  = "content"
+VEC_KEY   = "content_vector"
+SRC_KEY   = "source"
+PAGE_KEY  = "page"
+
+
+class SimpleMistralEmbedder:
+    def __init__(self, model: str, api_key: str):
+        if not api_key:
+            raise RuntimeError("Missing MISTRAL_API_KEY in environment.")
+        self.model = model
+        self.api_key = api_key
+        self.url = "https://api.mistral.ai/v1/embeddings"
+        self.session = requests.Session()
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        payload = {"model": self.model, "input": texts}
+        r = self.session.post(self.url, headers=self.headers, json=payload, timeout=60)
+        try:
+            j = r.json()
+        except Exception:
+            raise RuntimeError(f"Embeddings HTTP {r.status_code}: {r.text[:500]}")
+        # Accept multiple shapes to be version-tolerant
+        if "data" in j and isinstance(j["data"], list):
+            return [item["embedding"] for item in j["data"]]
+        if "embeddings" in j and isinstance(j["embeddings"], list):
+            return j["embeddings"]
+        if "error" in j:
+            raise RuntimeError(f"Mistral embeddings error: {j['error']}")
+        raise RuntimeError(f"Unexpected embeddings response shape: {str(j)[:500]}")
+
+    def embed_query(self, text: str) -> List[float]:
+        return self.embed_documents([text])[0]
+
+# ---------------------------------------------------------------------
+# Data loading & chunking
+# ---------------------------------------------------------------------
+def load_docs(corpus_dir: str):
+    docs = []
+    for p in glob(os.path.join(corpus_dir, "*.pdf")):
+        for d in PyPDFLoader(p).load():
+            d.metadata[SRC_KEY] = os.path.basename(p)
+            d.metadata[PAGE_KEY] = d.metadata.get("page")
+            docs.append(d)
+    return docs
+
+def chunk_docs(docs):
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=800,
+        chunk_overlap=120,
+        add_start_index=True,
+    )
+    return splitter.split_documents(docs)
+
+# ---------------------------------------------------------------------
+# Build Mongo-ready documents
+# ---------------------------------------------------------------------
+def build_records(chunks, embedder: SimpleMistralEmbedder) -> List[Dict]:
+    texts = [c.page_content or "" for c in chunks]
+    vectors = embedder.embed_documents(texts)
+    recs = []
+    for i, c in enumerate(chunks):
+        vec = vectors[i]
+        # Ensure correct dtype/dim for safety
+        if len(vec) != EMBED_DIM:
+            raise ValueError(f"Unexpected embedding dim {len(vec)} (expected {EMBED_DIM})")
+        # Mongo expects an array of numbers
+        vec = [float(x) for x in vec]
+
+        rec = {
+            TEXT_KEY: c.page_content or "",
+            VEC_KEY: vec,
+            SRC_KEY: c.metadata.get(SRC_KEY),
+        }
+        page_val = c.metadata.get(PAGE_KEY)
+        if page_val is not None:
+            try:
+                rec[PAGE_KEY] = int(page_val)
+            except Exception:
+                rec[PAGE_KEY] = -1
+        recs.append(rec)
+    return recs
+
+# ---------------------------------------------------------------------
+# Ensure Atlas Vector Search index exists (vectorSearch)
+# ---------------------------------------------------------------------
+def ensure_vector_index(coll, index_name="vector_index"):
+    """
+    Creates a Vector Search index on content_vector if it doesn't already exist.
+    """
+
+    print(f"[info] Checking existing search indexes on {coll.full_name}…")
+
+    existing = []
+    try:
+        existing = list(coll.aggregate([{"$listSearchIndexes": {}}]))
+    except OperationFailure as e:
+        print(f"[warn] $listSearchIndexes not supported or failed: {e}")
+    except Exception as e:
+        print(f"[warn] Unexpected error listing search indexes: {e}")
+
+    for idx in existing:
+        if idx.get("name") == index_name:
+            print(f"[info] Search index '{index_name}' already exists.")
+            return
+
+    print(f"[info] Creating VECTOR SEARCH index '{index_name}'…")
+
+    definition = {
+        "name": index_name,
+        "type": "vectorSearch",          
+        "definition": {
+            "fields": [
+                {
+                    "type": "vector",
+                    "path": "content_vector",
+                    "numDimensions": 1024,
+                    "similarity": "cosine",
+                },
+                {
+                    "type": "filter",
+                    "path": "source",
+                },
+                {
+                    "type": "filter",
+                    "path": "page",
+                },
+            ]
+        },
+    }
+
+    try:
+        result = coll.database.command({
+            "createSearchIndexes": coll.name,
+            "indexes": [definition],
+        })
+        print(f"[info] createSearchIndexes result: {result}")
+    except Exception as e:
+        print(f"[error] Failed to create search index '{index_name}': {e}")
+
+# ---------------------------------------------------------------------
+def main():
+    if not MISTRAL_API_KEY:
+        raise SystemExit("Missing MISTRAL_API_KEY in environment!")
+    if not MONGODB_URI:
+        raise SystemExit("Missing MONGODB_URI in environment!")
+
+    # 1) Load & chunk PDFs
+    docs = load_docs(CORPUS_DIR)
+    if not docs:
+        raise SystemExit(f"No PDFs found in '{CORPUS_DIR}'")
+    chunks = chunk_docs(docs)
+
+    # 2) Embed
+    embedder = SimpleMistralEmbedder(model=EMBED_MODEL, api_key=MISTRAL_API_KEY)
+    records = build_records(chunks, embedder)
+
+    # 3) Connect to MongoDB Atlas
+    #client = MongoClient(MONGODB_URI)
+    client = MongoClient(MONGODB_URI, tlsCAFile=certifi.where())
+    db = client[DB_NAME]
+    coll = db[COLL_NAME]
+
+    # 4) Create / ensure vector index
+    try:
+        ensure_vector_index(coll, index_name="vector_index")
+    except Exception as e:
+        # If running locally (no Atlas) or on an older server this may fail; ingestion can still proceed.
+        print(f"[warn] Could not ensure vector index now: {e}")
+
+    # 5) Fresh load: optional cleanup for a clean re-ingest
+    if os.getenv("FRESH_LOAD", "true").lower() in ("1", "true", "yes"):
+        coll.delete_many({})
+
+    # 6) Insert records
+    if records:
+        # Insert in batches
+        BATCH = 500
+        for i in range(0, len(records), BATCH):
+            coll.insert_many(records[i:i+BATCH])
+        print(f"[✅] Ingested {len(records)} chunks into '{DB_NAME}.{COLL_NAME}'")
+    else:
+        print("[ℹ️] No records to insert.")
+
+if __name__ == "__main__":
+    main()
diff --git a/apps/railwaycopilot/backend/rail_rag/__init__.py b/apps/railwaycopilot/backend/rail_rag/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/apps/railwaycopilot/backend/rail_rag/classifier.py b/apps/railwaycopilot/backend/rail_rag/classifier.py
new file mode 100644
index 0000000..bc8d98c
--- /dev/null
+++ b/apps/railwaycopilot/backend/rail_rag/classifier.py
@@ -0,0 +1,41 @@
+from langchain_mistralai import ChatMistralAI
+import json
+
+CLASSIFIER_SYSTEM_PROMPT = """You are a classification assistant for rail operations and safety.
+Classify the input into one of these intents:
+- informational
+- procedural
+- compliance
+- safety_critical
+- other
+Respond ONLY in JSON like:
+{"intent": "..."}.
+"""
+
+llm_classifier = ChatMistralAI(model="mistral-small-latest", temperature=0.0)
+
+def classify_text(text: str) -> dict:
+    messages = [
+        ("system", CLASSIFIER_SYSTEM_PROMPT),
+        ("human", text),
+    ]
+    result = llm_classifier.invoke(messages)
+    raw = result.content.strip()
+
+    # Try to parse JSON; if it fails, fall back to dict with string
+    try:
+        parsed = json.loads(raw)
+        if isinstance(parsed, dict):
+            return parsed
+        else:
+            return {"intent": str(parsed)}
+    except Exception:
+        # fallback: sometimes the LLM returns plain text or partial JSON
+        if raw.startswith("{") and raw.endswith("}"):
+            # slightly malformed JSON, try to clean quotes
+            raw = raw.replace("'", '"')
+            try:
+                return json.loads(raw)
+            except Exception:
+                pass
+        return {"intent": raw}
diff --git a/apps/railwaycopilot/backend/rail_rag/config.py b/apps/railwaycopilot/backend/rail_rag/config.py
new file mode 100644
index 0000000..d459cb9
--- /dev/null
+++ b/apps/railwaycopilot/backend/rail_rag/config.py
@@ -0,0 +1,22 @@
+import os
+
+MONGODB_URI          = os.getenv("MONGODB_URI")  
+MONGO_DB_NAME        = os.getenv("DB_NAME", "rail_ops")
+MONGO_COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rulebook_chunks")
+VECTOR_INDEX_NAME    = os.getenv("VECTOR_INDEX_NAME", "vector_index")
+
+# --- Mistral models ---
+EMBED_MODEL = os.getenv("MISTRAL_EMBED_MODEL", "mistral-embed")
+CHAT_MODEL  = os.getenv("MISTRAL_CHAT_MODEL", "mistral-small-latest")
+
+# --- Field names ---
+TEXT_KEY = "content"
+VEC_KEY  = "content_vector"
+SRC_KEY  = "source"
+PAGE_KEY = "page"
+
+SYSTEM_PROMPT = """You are a Rail Operations & Safety assistant.
+Answer ONLY using the provided context.
+If the answer is not in the context, say “I don’t have that in the documents.”
+Cite sources as (filename p.page). Be concise and correct. Do not reveal internal reasoning steps.
+"""
diff --git a/apps/railwaycopilot/backend/rail_rag/generation.py b/apps/railwaycopilot/backend/rail_rag/generation.py
new file mode 100644
index 0000000..e0fb92d
--- /dev/null
+++ b/apps/railwaycopilot/backend/rail_rag/generation.py
@@ -0,0 +1,102 @@
+import streamlit as st
+from typing import List, Tuple
+from langchain_mistralai import ChatMistralAI
+from langchain.docstore.document import Document
+from rail_rag.prompt_utils import build_context, build_messages, build_system_prompt
+from rail_rag.classifier import classify_text
+
+
+def _human_preview(question_text: str, context_text: str, limit: int = 1200) -> str:
+    snippet = context_text if len(context_text) <= limit else context_text[:limit] + "…"
+    return f"Question: {question_text}\n\nContext:\n{snippet}"
+
+def run_generation(
+    question: str,
+    retrieved: List[Document],
+    chat_model_name: str,
+    lab: dict,
+):
+    active_system_prompt = lab["active_system_prompt"]
+    temperature = lab["temperature"]
+    max_tokens = lab["max_tokens"]
+    fewshot_pairs: List[Tuple[str, str]] = lab["fewshot_pairs"]
+    ab_test = lab["ab_test"]
+    base_prompt = lab["base_prompt"]
+    refuse_if_ooc = lab["refuse_if_ooc"]
+    extra_instructions = lab["extra_instructions"]
+
+    ctx = build_context(retrieved)
+    messages_a = build_messages(
+        question=question,
+        context=ctx,
+        system_prompt=active_system_prompt,
+        few_shots=fewshot_pairs,
+    )
+
+    llm = ChatMistralAI(model=chat_model_name, temperature=temperature, max_tokens=max_tokens)
+
+    # classification
+    classification = classify_text(question)
+    intent = classification.get("intent", "Unknown").capitalize()
+
+    st.subheader("🧭 Query Classification")
+    st.markdown(f"**Intent:** {intent}")
+    st.divider()
+
+    with st.expander("🧾 Prompts used (A/B Preview)", expanded=False):
+        st.markdown("### Prompt A — System")
+        st.code(active_system_prompt, language="markdown")
+        st.markdown("**Prompt A — Human**")
+        st.code(_human_preview(question, ctx), language="markdown")
+
+        messages_b = None
+        alt_system_prompt = None
+        if ab_test:
+            alt_system_prompt = build_system_prompt(
+                base_prompt,
+                {
+                    "force_citations": True,
+                    "refuse_if_ooc": refuse_if_ooc,
+                    "bulleted_style": True,
+                    "structured_style": True,
+                },
+                extra_instructions,
+            )
+            messages_b = build_messages(
+                question=question,
+                context=ctx,
+                system_prompt=alt_system_prompt,
+                few_shots=fewshot_pairs,
+            )
+
+            st.markdown("---")
+            st.markdown("### Prompt B — System")
+            st.code(alt_system_prompt, language="markdown")
+            st.markdown("**Prompt B — Human**")
+            st.code(_human_preview(question, ctx), language="markdown")
+
+    if not ab_test:
+        with st.spinner("Thinking…"):
+            ans_a = llm.invoke(messages_a)
+        st.subheader("Answer")
+        st.write(ans_a.content)
+    else:
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("### Prompt A")
+            with st.spinner("Running A…"):
+                ans_a = llm.invoke(messages_a)
+            st.write(ans_a.content)
+
+        with col2:
+            st.markdown("### Prompt B")
+            with st.spinner("Running B…"):
+                ans_b = llm.invoke(messages_b)
+            st.write(ans_b.content)
+
+    st.subheader("Sources")
+    for i, d in enumerate(retrieved, 1):
+        src = d.metadata.get("source", "document")
+        page = d.metadata.get("page", "n/a")
+        st.markdown(f"**{i}. {src} — p.{page}**")
+        st.write((d.page_content or "")[:400] + "…")
diff --git a/apps/railwaycopilot/backend/rail_rag/index_utils.py b/apps/railwaycopilot/backend/rail_rag/index_utils.py
new file mode 100644
index 0000000..82e8fe8
--- /dev/null
+++ b/apps/railwaycopilot/backend/rail_rag/index_utils.py
@@ -0,0 +1,30 @@
+from typing import Optional
+
+from pymongo import MongoClient
+from pymongo.collection import Collection
+
+from rail_rag.config import (
+    MONGODB_URI,
+    MONGO_DB_NAME,
+    MONGO_COLLECTION_NAME,
+)
+
+
+def get_mongo_collection(
+    uri: Optional[str] = None,
+    db_name: Optional[str] = None,
+    coll_name: Optional[str] = None,
+) -> Collection:
+
+    uri = uri or MONGODB_URI
+    db_name = db_name or MONGO_DB_NAME
+    coll_name = coll_name or MONGO_COLLECTION_NAME
+
+    if not uri:
+        raise RuntimeError("Missing MONGODB_URI in environment.")
+
+    client = MongoClient(uri)
+    db = client[db_name]
+    coll = db[coll_name]
+    return coll
+
diff --git a/apps/railwaycopilot/backend/rail_rag/prompt_presets.py b/apps/railwaycopilot/backend/rail_rag/prompt_presets.py
new file mode 100644
index 0000000..5c66781
--- /dev/null
+++ b/apps/railwaycopilot/backend/rail_rag/prompt_presets.py
@@ -0,0 +1,24 @@
+from rail_rag.config import SYSTEM_PROMPT as BASE_SYSTEM_PROMPT
+
+PRESETS = {
+    "Baseline (Original)": BASE_SYSTEM_PROMPT,
+    "Strict Compliance": (
+        BASE_SYSTEM_PROMPT
+        + "\nAlways refuse to answer if information is missing from context."
+        + "\nUse short, numbered steps when appropriate."
+    ),
+    "Operator Brief (Bulleted)": (
+        BASE_SYSTEM_PROMPT
+        + "\nRespond in crisp bullet points suitable for radio/ops briefings."
+        + "\nIf a rule references a page, include it inline after the bullet."
+    ),
+    "Incident Response (Structured)": (
+        BASE_SYSTEM_PROMPT
+        + "\nOrganize output as: Situation, Applicable Rules, Required Actions, Sources."
+    ),
+    "Trainer Mode (Explain & Cite)": (
+        BASE_SYSTEM_PROMPT
+        + "\nExplain the rule briefly (1–2 sentences) and then summarize the action."
+        + "\nAlways include a Sources section with (filename p.page)."
+    ),
+}
diff --git a/apps/railwaycopilot/backend/rail_rag/prompt_utils.py b/apps/railwaycopilot/backend/rail_rag/prompt_utils.py
new file mode 100644
index 0000000..de72b76
--- /dev/null
+++ b/apps/railwaycopilot/backend/rail_rag/prompt_utils.py
@@ -0,0 +1,43 @@
+from typing import List, Tuple, Dict
+from langchain.docstore.document import Document
+
+def build_system_prompt(base: str, options: Dict[str, bool], extra_instructions: str) -> str:
+    base_clean = base.strip()
+    lines = [base_clean]
+
+    if options.get("force_citations"):
+        lines.append("Always cite sources as (filename p.page).")
+    if options.get("refuse_if_ooc"):
+        lines.append("If the answer is not fully supported by the context, reply: ‘I don’t have that in the documents.’")
+    if options.get("bulleted_style"):
+        lines.append("Use concise bullet points.")
+    if options.get("structured_style"):
+        lines.append("Structure output with headings as appropriate.")
+
+    if extra_instructions:
+        lines.append(extra_instructions.strip())
+
+    return "\n".join(lines)
+
+
+def build_context(docs: List[Document]) -> str:
+    parts = []
+    for i, d in enumerate(docs, 1):
+        src = d.metadata.get("source", "document")
+        page = d.metadata.get("page", "n/a")
+        parts.append(f"[{i}] ({src} p.{page})\n{d.page_content}")
+    return "\n\n".join(parts)
+
+
+def build_messages(question: str, context: str, system_prompt: str, few_shots: List[Tuple[str, str]]):
+    """Return Chat messages as (role, content) tuples in the order the model expects.
+    few_shots: list of (role, content) pairs, e.g., [("human","..."), ("assistant","..."), ...]
+    """
+    messages = [("system", system_prompt)]
+
+    # Append few-shot examples (optional)
+    for role, content in few_shots:
+        messages.append((role, content))
+
+    messages.append(("human", f"Question: {question}\n\nContext:\n{context}\n\nAnswer:"))
+    return messages
diff --git a/apps/railwaycopilot/backend/rail_rag/retriever.py b/apps/railwaycopilot/backend/rail_rag/retriever.py
new file mode 100644
index 0000000..a0c4337
--- /dev/null
+++ b/apps/railwaycopilot/backend/rail_rag/retriever.py
@@ -0,0 +1,80 @@
+from typing import List, Optional
+
+from pydantic import PrivateAttr
+from pymongo.collection import Collection
+from langchain_mistralai import MistralAIEmbeddings
+from langchain_core.retrievers import BaseRetriever
+from langchain_core.callbacks import CallbackManagerForRetrieverRun
+from langchain.docstore.document import Document
+from langchain_community.vectorstores import MongoDBAtlasVectorSearch
+
+from rail_rag.config import (
+    TEXT_KEY,
+    VEC_KEY,
+    SRC_KEY,
+    PAGE_KEY,
+    VECTOR_INDEX_NAME,
+)
+
+
+class MongoAtlasRetriever(BaseRetriever):
+
+    # Public, validated field
+    k: int = 4
+
+    # Private attrs 
+    _collection: Collection = PrivateAttr()
+    _embedder: MistralAIEmbeddings = PrivateAttr()
+    _vectorstore: MongoDBAtlasVectorSearch = PrivateAttr()
+
+    class Config:
+        arbitrary_types_allowed = True
+        underscore_attrs_are_private = True
+
+    def __init__(
+        self,
+        collection: Collection,
+        embedder: MistralAIEmbeddings,
+        k: int = 4,
+        index_name: str = VECTOR_INDEX_NAME,
+        **data,
+    ):
+        super().__init__(k=k, **data)
+        object.__setattr__(self, "_collection", collection)
+        object.__setattr__(self, "_embedder", embedder)
+
+        # LangChain vector store that wraps Atlas Vector Search
+        vs = MongoDBAtlasVectorSearch(
+            collection=collection,
+            embedding=embedder,
+            index_name=index_name,
+            text_key=TEXT_KEY,
+            embedding_key=VEC_KEY,
+        )
+        object.__setattr__(self, "_vectorstore", vs)
+
+    def __repr__(self) -> str:
+        return f"<MongoAtlasRetriever k={self.k}>"
+
+    def __getstate__(self):
+        return {"k": self.k}
+
+    def _get_relevant_documents(
+        self,
+        query: str,
+        *,
+        run_manager: Optional[CallbackManagerForRetrieverRun] = None,
+    ) -> List[Document]:
+        """
+        Use MongoDB Atlas Vector Search via LangChain's MongoDBAtlasVectorSearch.
+        """
+        docs = self._vectorstore.similarity_search(query, k=self.k)
+
+        # Ensure important metadata keys exist 
+        for d in docs:
+            md = d.metadata or {}
+            md.setdefault("source", md.get(SRC_KEY))
+            md.setdefault("page", md.get(PAGE_KEY))
+            d.metadata = md
+
+        return docs
diff --git a/apps/railwaycopilot/backend/rail_rag/ui.py b/apps/railwaycopilot/backend/rail_rag/ui.py
new file mode 100644
index 0000000..1efcf90
--- /dev/null
+++ b/apps/railwaycopilot/backend/rail_rag/ui.py
@@ -0,0 +1,82 @@
+import streamlit as st
+from rail_rag.prompt_presets import PRESETS
+from rail_rag.prompt_utils import build_system_prompt
+
+def render_prompt_lab():
+    with st.sidebar:
+        st.header("🧪 Prompt Lab")
+
+        preset_name = st.selectbox("Preset", list(PRESETS.keys()), index=0)
+        base_prompt = PRESETS[preset_name]
+
+        st.caption("Adjust global behavior")
+        force_citations = st.checkbox("Always include citations", value=True)
+        refuse_if_ooc   = st.checkbox("Refuse if not in context", value=True)
+        bulleted_style  = st.checkbox("Bulleted style", value=("Bulleted" in preset_name))
+        structured_style = st.checkbox("Structured sections", value=("Structured" in preset_name))
+
+        extra_instructions = st.text_area(
+            "Extra instructions (optional)",
+            value="",
+            help="Add domain-specific constraints, formatting, or terminology."
+        )
+
+        st.divider()
+        st.caption("Generation controls")
+        temperature = st.slider("Temperature", 0.0, 1.5, 0.2, 0.05)
+        max_tokens  = st.slider("Max tokens", 128, 2048, 512, 32)
+
+        st.divider()
+        st.caption("Few-shot examples (optional)")
+        use_fewshot = st.checkbox("Enable few-shot examples", value=False)
+        fewshot_pairs = []
+        if use_fewshot:
+            with st.expander("Add examples"):
+                ex_user = st.text_area("User example", value="What must a signaller do when going off duty?")
+                ex_assistant = st.text_area("Assistant example", value=(
+                    "• Notify relief signaller and transfer any ongoing movements.\n"
+                    "• Record handover in logbook.\nSources: (Rulebook.pdf p.12)"
+                ))
+                if st.button("Add example"):
+                    st.session_state.setdefault("fewshots", [])
+                    st.session_state["fewshots"].append(("human", ex_user))
+                    st.session_state["fewshots"].append(("assistant", ex_assistant))
+            fewshot_pairs = st.session_state.get("fewshots", [])
+
+        st.divider()
+        ab_test = st.checkbox(
+            "Run A/B prompt experiment",
+            value=False,
+            help=(
+                "Compare two prompts side-by-side.\n\n"
+                "🅰️ Prompt A — uses your current sidebar settings (preset + toggles + extra instructions + few-shots).\n"
+                "🅱️ Prompt B — same base preset but forces: citations, bullet + structured format, and same 'Refuse if not in context'."
+            ),
+        )
+
+    active_system_prompt = build_system_prompt(
+        base_prompt,
+        {
+            "force_citations": force_citations,
+            "refuse_if_ooc": refuse_if_ooc,
+            "bulleted_style": bulleted_style,
+            "structured_style": structured_style,
+        },
+        extra_instructions,
+    )
+
+    return {
+        "preset_name": preset_name,
+        "base_prompt": base_prompt,
+        "force_citations": force_citations,
+        "refuse_if_ooc": refuse_if_ooc,
+        "bulleted_style": bulleted_style,
+        "structured_style": structured_style,
+        "extra_instructions": extra_instructions,
+        "temperature": temperature,
+        "max_tokens": max_tokens,
+        "use_fewshot": use_fewshot,
+        "fewshot_pairs": few_shots if (few_shots := fewshot_pairs) else [],
+        "ab_test": ab_test,
+        "active_system_prompt": active_system_prompt,
+    }
diff --git a/apps/railwaycopilot/backend/requirements.txt b/apps/railwaycopilot/backend/requirements.txt
new file mode 100644
index 0000000..c2ddd1e
--- /dev/null
+++ b/apps/railwaycopilot/backend/requirements.txt
@@ -0,0 +1,16 @@
+streamlit
+pypdf
+
+# MongoDB Atlas client + TLS CA bundle
+pymongo>=4.6.1
+certifi
+
+# LangChain stack (current split)
+langchain==0.2.16
+langchain-core==0.2.38
+langchain-community==0.2.11
+langchain-mistralai==0.1.10
+langchain-text-splitters==0.2.2
+
+# Embeddings — keep if you still use sentence-transformers elsewhere
+sentence-transformers
diff --git a/apps/railwaycopilot/docker-compose.yml b/apps/railwaycopilot/docker-compose.yml
new file mode 100644
index 0000000..53c9281
--- /dev/null
+++ b/apps/railwaycopilot/docker-compose.yml
@@ -0,0 +1,19 @@
+# docker-compose.yml
+
+services:
+  rag_app:
+    build: ./backend
+    container_name: rag_app_mistral
+    ports:
+      - "8501:8501"
+    environment:
+      - INDEX_NAME=rail_rulebook
+      - MISTRAL_API_KEY=${MISTRAL_API_KEY}
+      - MISTRAL_CHAT_MODEL=${MISTRAL_CHAT_MODEL:-mistral-small-latest}
+      - MISTRAL_EMBED_MODEL=${MISTRAL_EMBED_MODEL:-mistral-embed}
+      - MONGODB_URI=${MONGODB_URI}
+      - NORMALIZE_EMBEDDINGS=true
+    volumes:
+      - ./backend:/app
+      - ./backend/corpus:/app/corpus
+    restart: unless-stopped
diff --git a/apps/railwaycopilot/readme.md b/apps/railwaycopilot/readme.md
new file mode 100644
index 0000000..4bb0293
--- /dev/null
+++ b/apps/railwaycopilot/readme.md
@@ -0,0 +1,70 @@
+# Railway Operations & Safety Procedures Assistant
+
+This is a RAG application which ingests the following sources : 
+
+- TS1 – General signalling regulations, Issue 18 (in force 07 Dec 2024) – PDF. from tectraining.co.uk
+- HB10 – Duties of the COSS and person in charge when using a hand trolley (Issue 5, Sept 2023) – PDF from consultations.rssb.co.uk
+- “Rules on walking on or near the line” (overview page, new rules from 07 Dec 2024) – pdf article from rssb.co.uk
+- RSSB standards Updates from September 2024 from rssb.co.uk
+
+Once up and running, you can ask the chatbot questions like : 
+- What should a signaller do when going off-duty
+- What should a driver do if a signal is defective?
+
+# Stack
+
+- Mistral’s Embeddings API (mistral-embed) and Chat Completions (mistral-small-latest).
+- Langchain
+- MongoDB Atlas as Vector Databse
+
+# Build 
+Create an .env file at the root of the folder, with the following configuration: 
+
+MISTRAL_API_KEY=""
+MISTRAL_CHAT_MODEL=mistral-small-latest     
+MISTRAL_EMBED_MODEL=mistral-embed
+MONGODB_URI=""
+
+```sh
+docker compose up --build -d
+
+#Chunk the source data, embed the chunks, store and index them in MongoDB : 
+docker compose exec rag_app python ingest_rulebook.py
+
+#Open the app
+open http://localhost:8501
+
+#Rebuilt after a change
+docker compose build rag_app
+docker compose up -d
+```
+
+# Features
+
+- Change the prompt structure :
+  - Start with the preset text (base_prompt).
+  - For each ticked checkbox, append an extra line of instructions.
+  - Append any free-text “Extra instructions”.
+  - Note: some presets already mention bullets / structure / refusal. The checkboxes can add additional lines that reinforce or duplicate that behavior. This is fine, the model just sees stronger guidance.
+- Visualize the used prompt
+- A/B Test different prompts :
+  - Prompt A is your defined prompt 
+  - Prompt B uses the same base preset but forces : citations, bullet + structured format, and 'Refuse if not in context'.”
+- Change Temperature (how deterministic vs creative the model can be)
+- Change Max Tokens (how long the model's response can be)
+- Define Top-K Chunks (how many document chunks your retriever returns for the LLM to use as context)
+- Add extra instructions
+- Use Few Shots inference (add your own example Q&A for the model to understand what it needs to do)
+- Visualize a classification of the intent behind the question asked : informational, procedural, compliance, safety_critical, other
+- Show Debug (see retrieved documents)
+
+
+# Going further - potential improvements
+
+- Make the app agentic, leveraging function calling (calling 911, triggering OCR if the input is a picture of the problem)
+- Tune the chunking and data preparation
+- Incident classification and analytics
+- hybrid search
+
+
+