diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
index 7dfa870b4..d0ffef311 100644
--- a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
+++ b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
@@ -509,6 +509,49 @@ def build_index_payload(
             storage_type=StorageType.PERMANENT,
             env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE,
         )
+
+        # Compute x2text_config_hash early so the marker check below (and the
+        # post-success callback) can both consume the same value.
+        x2text_metadata = default_profile.x2text.metadata or {}
+        x2text_config_hash = ToolUtils.hash_str(
+            json.dumps(x2text_metadata, sort_keys=True)
+        )
+
+        # Manage Documents → Index: mirror the pre-async dynamic_extractor
+        # behaviour.  If the extraction marker says this x2text_config_hash +
+        # enable_highlight combination is already extracted, read the existing
+        # extract file from disk and reuse it so the executor can skip the
+        # extract step.  Any failure here falls back to full extraction.
+        reused_extracted_text: str | None = None
+        try:
+            already_extracted = PromptStudioIndexHelper.check_extraction_status(
+                document_id=document_id,
+                profile_manager=default_profile,
+                x2text_config_hash=x2text_config_hash,
+                enable_highlight=tool.enable_highlight,
+            )
+            if already_extracted:
+                try:
+                    reused_extracted_text = fs_instance.read(
+                        path=extract_file_path, mode="r"
+                    )
+                    logger.info(
+                        "Manage Documents index: marker valid, reusing existing "
+                        "extract file for document=%s",
+                        document_id,
+                    )
+                except FileNotFoundError:
+                    logger.warning(
+                        "Marker says extracted but extract file missing: %s. "
+                        "Will re-extract.",
+                        extract_file_path,
+                    )
+        except Exception:
+            logger.warning(
+                "check_extraction_status raised; falling back to full extraction",
+                exc_info=True,
+            )
+
         util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id)
         doc_id_key = IndexingUtils.generate_index_key(
             vector_db=str(default_profile.vector_store.id),
@@ -550,6 +593,11 @@ def build_index_payload(
             "platform_api_key": platform_api_key,
         }
 
+        # On marker-hit, pre-populate the extracted text so the executor's
+        # _handle_ide_index skips the extract step entirely.
+        if reused_extracted_text:
+            index_params[IKeys.EXTRACTED_TEXT] = reused_extracted_text
+
         log_events_id = StateStore.get(Common.LOG_EVENTS_ID) or ""
         request_id = StateStore.get(Common.REQUEST_ID) or ""
 
@@ -568,12 +616,9 @@ def build_index_payload(
             log_events_id=log_events_id,
         )
 
-        # x2text config hash for extraction status tracking in callback
-        x2text_metadata = default_profile.x2text.metadata or {}
-        x2text_config_hash = ToolUtils.hash_str(
-            json.dumps(x2text_metadata, sort_keys=True)
-        )
-
+        # x2text_config_hash (computed above) is forwarded to the callback so
+        # ide_index_complete can refresh the extraction marker via
+        # mark_extraction_status.
         cb_kwargs = {
             "log_events_id": log_events_id,
             "request_id": request_id,
diff --git a/backend/prompt_studio/prompt_studio_core_v2/tests/__init__.py b/backend/prompt_studio/prompt_studio_core_v2/tests/__init__.py
new file mode 100644
index 000000000..41e4777a2
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_core_v2/tests/__init__.py
@@ -0,0 +1 @@
+# Tests for prompt_studio_core_v2.
diff --git a/backend/prompt_studio/prompt_studio_core_v2/tests/test_build_index_payload.py b/backend/prompt_studio/prompt_studio_core_v2/tests/test_build_index_payload.py
new file mode 100644
index 000000000..adb571324
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_core_v2/tests/test_build_index_payload.py
@@ -0,0 +1,492 @@
+"""Regression tests for ``PromptStudioHelper.build_index_payload``.
+
+These tests pin the Manage Documents → Index marker-reuse behaviour
+introduced to fix the "extract runs every time" QA bug.  The helper
+must:
+
+  1. On a valid extraction marker + readable extract file, pre-populate
+     ``index_params[IKeys.EXTRACTED_TEXT]`` so the executor's
+     ``_handle_ide_index`` skips the extract step entirely.
+  2. On a marker hit where the extract file is missing, fall back to
+     full extraction (do NOT pre-populate the field).
+  3. On a marker miss, fall back to full extraction.
+  4. On an error inside ``check_extraction_status``, swallow the error
+     and fall back to full extraction — the dispatch must not fail.
+
+The backend test environment has no ``pytest-django``, no SQLite
+fallback, and the helper has a heavy Django-coupled import surface.
+Rather than spin up Django, we stub every collaborator as a
+``MagicMock`` on ``sys.modules`` *before* importing the helper, and
+then patch ``PromptStudioHelper`` class methods per-test.  This mirrors
+the ``usage_v2/tests/test_helper.py`` approach.
+
+If the helper module cannot be imported in a given environment (for
+example because the stub surface has drifted), all tests in the module
+are skipped with a clear reason.
+"""
+
+from __future__ import annotations
+
+import sys
+import types
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Stub every collaborator module on sys.modules before importing the helper.
+# These stubs are intentionally broad MagicMocks — the tests patch the
+# specific attributes they care about via ``unittest.mock.patch``.
+# ---------------------------------------------------------------------------
+
+
+def _install(name: str, attrs: dict[str, Any] | None = None) -> types.ModuleType:
+    """Install (or replace) a fake module into ``sys.modules``.
+
+    Always creates a fresh ``ModuleType``; this is important because the
+    real module may already have been imported before these stubs run
+    (via pytest collection, conftest, etc.), and we need our fake to
+    actually take effect.
+    """
+    mod = types.ModuleType(name)
+    if attrs:
+        for key, value in attrs.items():
+            setattr(mod, key, value)
+    sys.modules[name] = mod
+    return mod
+
+
+def _install_package(name: str) -> types.ModuleType:
+    """Install a fake package (marked with ``__path__``).
+
+    Only stubs the package if it is not already in ``sys.modules``.
+    This prevents clobbering packages like ``unstract.core`` that must
+    retain their real ``__path__`` for submodule resolution.  The child
+    modules we care about are always replaced explicitly via
+    ``_install``.
+    """
+    if name in sys.modules:
+        return sys.modules[name]
+    mod = types.ModuleType(name)
+    mod.__path__ = []  # type: ignore[attr-defined]
+    sys.modules[name] = mod
+    return mod
+
+
+try:
+    # Account / adapter stubs
+    _install_package("account_v2")
+    _install(
+        "account_v2.constants",
+        {"Common": type("Common", (), {"LOG_EVENTS_ID": "log_events_id",
+                                        "REQUEST_ID": "request_id"})},
+    )
+    _install("account_v2.models", {"User": MagicMock(name="User")})
+    _install_package("adapter_processor_v2")
+    _install(
+        "adapter_processor_v2.constants",
+        {"AdapterKeys": type("AdapterKeys", (), {})},
+    )
+    _install(
+        "adapter_processor_v2.models",
+        {"AdapterInstance": MagicMock(name="AdapterInstance")},
+    )
+
+    # Plugins stub
+    _install("plugins", {"get_plugin": MagicMock(return_value=None)})
+
+    # utils stubs
+    _install_package("utils")
+    _install_package("utils.file_storage")
+    _install(
+        "utils.file_storage.constants",
+        {
+            "FileStorageKeys": type(
+                "FileStorageKeys",
+                (),
+                {"PERMANENT_REMOTE_STORAGE": "permanent"},
+            )
+        },
+    )
+    _install_package("utils.file_storage.helpers")
+    _install(
+        "utils.file_storage.helpers.prompt_studio_file_helper",
+        {"PromptStudioFileHelper": MagicMock(name="PromptStudioFileHelper")},
+    )
+    _install(
+        "utils.local_context",
+        {"StateStore": MagicMock(name="StateStore")},
+    )
+
+    # backend.celery_service stub
+    _install_package("backend")
+    _install(
+        "backend.celery_service",
+        {"app": MagicMock(name="celery_app")},
+    )
+
+    # prompt_studio stubs
+    _install_package("prompt_studio")
+    _install_package("prompt_studio.prompt_profile_manager_v2")
+    _install(
+        "prompt_studio.prompt_profile_manager_v2.models",
+        {"ProfileManager": MagicMock(name="ProfileManager")},
+    )
+    _install(
+        "prompt_studio.prompt_profile_manager_v2.profile_manager_helper",
+        {"ProfileManagerHelper": MagicMock(name="ProfileManagerHelper")},
+    )
+
+    _install_package("prompt_studio.prompt_studio_document_manager_v2")
+    _install(
+        "prompt_studio.prompt_studio_document_manager_v2.models",
+        {"DocumentManager": MagicMock(name="DocumentManager")},
+    )
+
+    _install_package("prompt_studio.prompt_studio_index_manager_v2")
+    _install(
+        "prompt_studio.prompt_studio_index_manager_v2.prompt_studio_index_helper",
+        {"PromptStudioIndexHelper": MagicMock(name="PromptStudioIndexHelper")},
+    )
+
+    _install_package("prompt_studio.prompt_studio_output_manager_v2")
+    _install(
+        "prompt_studio.prompt_studio_output_manager_v2.output_manager_helper",
+        {"OutputManagerHelper": MagicMock(name="OutputManagerHelper")},
+    )
+
+    _install_package("prompt_studio.prompt_studio_v2")
+    _install(
+        "prompt_studio.prompt_studio_v2.models",
+        {"ToolStudioPrompt": MagicMock(name="ToolStudioPrompt")},
+    )
+
+    # Stub the prompt_studio_core_v2 sibling modules too — several of them
+    # transitively import modules (like ``utils.cache_service``) that we
+    # don't want to pull in for these unit tests.
+    _install_package("prompt_studio.prompt_studio_core_v2")
+    _install(
+        "prompt_studio.prompt_studio_core_v2.document_indexing_service",
+        {"DocumentIndexingService": MagicMock(name="DocumentIndexingService")},
+    )
+
+    # Real exception classes — build_index_payload uses ``raise``.
+    class _FakeExc(Exception):
+        pass
+
+    _install(
+        "prompt_studio.prompt_studio_core_v2.exceptions",
+        {
+            "AnswerFetchError": type("AnswerFetchError", (_FakeExc,), {}),
+            "DefaultProfileError": type("DefaultProfileError", (_FakeExc,), {}),
+            "EmptyPromptError": type("EmptyPromptError", (_FakeExc,), {}),
+            "ExtractionAPIError": type("ExtractionAPIError", (_FakeExc,), {}),
+            "IndexingAPIError": type("IndexingAPIError", (_FakeExc,), {}),
+            "NoPromptsFound": type("NoPromptsFound", (_FakeExc,), {}),
+            "OperationNotSupported": type("OperationNotSupported", (_FakeExc,), {}),
+            "PermissionError": type("PermissionError", (_FakeExc,), {}),
+        },
+    )
+    _install(
+        "prompt_studio.prompt_studio_core_v2.migration_utils",
+        {"SummarizeMigrationUtils": MagicMock(name="SummarizeMigrationUtils")},
+    )
+    _install(
+        "prompt_studio.prompt_studio_core_v2.models",
+        {"CustomTool": MagicMock(name="CustomTool")},
+    )
+    _install(
+        "prompt_studio.prompt_studio_core_v2.prompt_ide_base_tool",
+        {"PromptIdeBaseTool": MagicMock(name="PromptIdeBaseTool")},
+    )
+    _install(
+        "prompt_studio.prompt_studio_core_v2.prompt_variable_service",
+        {"PromptStudioVariableService": MagicMock(name="PromptStudioVariableService")},
+    )
+
+    # unstract.core.pubsub_helper stub (LogPublisher isn't used by
+    # build_index_payload but the module-level import must succeed).
+    _install_package("unstract.core")
+    _install(
+        "unstract.core.pubsub_helper",
+        {"LogPublisher": MagicMock(name="LogPublisher")},
+    )
+
+    # unstract.sdk1 stubs — these heavy modules transitively pull in
+    # ``unstract.core.cache.redis_client`` which isn't on the python
+    # path for the backend tests.  We only need the leaf classes.
+    _install_package("unstract.sdk1")
+    _install(
+        "unstract.sdk1.constants",
+        {
+            "LogLevel": type(
+                "LogLevel", (), {"INFO": "INFO", "WARN": "WARN", "ERROR": "ERROR"}
+            )
+        },
+    )
+    _install(
+        "unstract.sdk1.exceptions",
+        {
+            "IndexingError": type("IndexingError", (Exception,), {}),
+            "SdkError": type("SdkError", (Exception,), {}),
+        },
+    )
+    _install_package("unstract.sdk1.execution")
+
+    class _FakeExecutionContext:
+        """Minimal ExecutionContext that keeps ``executor_params`` as
+        the real dict we pass in (the tests inspect it)."""
+
+        def __init__(self, **kwargs: Any) -> None:
+            self.executor_name = kwargs.get("executor_name")
+            self.operation = kwargs.get("operation")
+            self.run_id = kwargs.get("run_id")
+            self.execution_source = kwargs.get("execution_source")
+            self.organization_id = kwargs.get("organization_id")
+            self.executor_params = kwargs.get("executor_params") or {}
+            self.request_id = kwargs.get("request_id")
+            self.log_events_id = kwargs.get("log_events_id")
+
+    _install(
+        "unstract.sdk1.execution.context",
+        {"ExecutionContext": _FakeExecutionContext},
+    )
+    _install(
+        "unstract.sdk1.execution.dispatcher",
+        {"ExecutionDispatcher": MagicMock(name="ExecutionDispatcher")},
+    )
+    _install_package("unstract.sdk1.file_storage")
+    _install(
+        "unstract.sdk1.file_storage.constants",
+        {"StorageType": type("StorageType", (), {"PERMANENT": "permanent"})},
+    )
+    _install(
+        "unstract.sdk1.file_storage.env_helper",
+        {"EnvHelper": MagicMock(name="EnvHelper")},
+    )
+    _install_package("unstract.sdk1.utils")
+    _install(
+        "unstract.sdk1.utils.indexing",
+        {"IndexingUtils": MagicMock(name="IndexingUtils")},
+    )
+    _install(
+        "unstract.sdk1.utils.tool",
+        {"ToolUtils": MagicMock(name="ToolUtils")},
+    )
+
+    # Now import the helper module.  If this fails, all tests below will
+    # be skipped via the ``_IMPORT_ERROR`` sentinel.
+    from prompt_studio.prompt_studio_core_v2 import prompt_studio_helper as _psh_mod  # noqa: E402
+
+    PromptStudioHelper = _psh_mod.PromptStudioHelper
+    IKeys = _psh_mod.IKeys
+    _IMPORT_ERROR: str | None = None
+except Exception as exc:  # pragma: no cover — environment guard
+    _IMPORT_ERROR = (
+        f"prompt_studio_helper could not be imported in this environment: "
+        f"{type(exc).__name__}: {exc}"
+    )
+    PromptStudioHelper = None  # type: ignore[assignment]
+    IKeys = None  # type: ignore[assignment]
+
+
+pytestmark = pytest.mark.skipif(
+    _IMPORT_ERROR is not None, reason=_IMPORT_ERROR or ""
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_tool(enable_highlight: bool = False, summarize_context: bool = False):
+    tool = MagicMock(name="CustomTool")
+    tool.enable_highlight = enable_highlight
+    tool.summarize_context = summarize_context
+    return tool
+
+
+def _make_profile():
+    profile = MagicMock(name="ProfileManager")
+    profile.x2text.id = "x2t-1"
+    profile.x2text.metadata = {"model": "default"}
+    profile.embedding_model.id = "emb-1"
+    profile.vector_store.id = "vdb-1"
+    profile.chunk_size = 512
+    profile.chunk_overlap = 64
+    profile.profile_id = "profile-1"
+    return profile
+
+
+def _dispatch_build(
+    *,
+    check_return: bool | Exception,
+    read_return: str | Exception,
+    tool: Any = None,
+    profile: Any = None,
+):
+    """Run ``build_index_payload`` with all collaborators patched.
+
+    ``check_return`` / ``read_return`` configure the two branches we
+    care about:
+        * ``check_return`` — ``check_extraction_status`` return value
+          or an exception to raise.
+        * ``read_return`` — ``fs_instance.read`` return value or an
+          exception to raise.
+
+    Returns the ``ExecutionContext`` built by ``build_index_payload``.
+    """
+    tool = tool or _make_tool()
+    profile = profile or _make_profile()
+
+    fs_instance = MagicMock(name="fs_instance")
+    if isinstance(read_return, Exception):
+        fs_instance.read.side_effect = read_return
+    else:
+        fs_instance.read.return_value = read_return
+
+    check_mock = MagicMock(name="check_extraction_status")
+    if isinstance(check_return, Exception):
+        check_mock.side_effect = check_return
+    else:
+        check_mock.return_value = check_return
+
+    # Patch everything via context managers so each test starts clean.
+    patches = [
+        patch.object(
+            _psh_mod.CustomTool,
+            "objects",
+            MagicMock(get=MagicMock(return_value=tool)),
+        ),
+        patch.object(
+            _psh_mod.PromptStudioFileHelper,
+            "get_or_create_prompt_studio_subdirectory",
+            return_value="/prompt-studio/org/user/tool",
+        ),
+        patch.object(
+            _psh_mod.ProfileManager,
+            "get_default_llm_profile",
+            return_value=profile,
+        ),
+        patch.object(
+            PromptStudioHelper,
+            "validate_adapter_status",
+            return_value=None,
+        ),
+        patch.object(
+            PromptStudioHelper,
+            "validate_profile_manager_owner_access",
+            return_value=None,
+        ),
+        patch.object(
+            PromptStudioHelper,
+            "_get_platform_api_key",
+            return_value="pk-test",
+        ),
+        patch.object(
+            PromptStudioHelper,
+            "_build_summarize_params",
+            return_value=(None, "", MagicMock()),
+        ),
+        patch.object(
+            _psh_mod.EnvHelper,
+            "get_storage",
+            return_value=fs_instance,
+        ),
+        patch.object(
+            _psh_mod.PromptStudioIndexHelper,
+            "check_extraction_status",
+            check_mock,
+        ),
+        patch.object(
+            _psh_mod.IndexingUtils,
+            "generate_index_key",
+            return_value="doc-key-1",
+        ),
+        patch.object(
+            _psh_mod,
+            "PromptIdeBaseTool",
+            MagicMock(return_value=MagicMock()),
+        ),
+        patch.object(
+            _psh_mod.StateStore,
+            "get",
+            return_value="",
+        ),
+    ]
+    for p in patches:
+        p.start()
+    try:
+        context, cb_kwargs = PromptStudioHelper.build_index_payload(
+            tool_id="tool-1",
+            file_name="doc.pdf",
+            org_id="org-1",
+            user_id="user-1",
+            document_id="doc-1",
+            run_id="run-1",
+        )
+        return context, cb_kwargs, fs_instance, check_mock
+    finally:
+        for p in patches:
+            p.stop()
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestBuildIndexPayloadMarker:
+    """Verify that build_index_payload honours the extraction marker."""
+
+    def test_marker_hit_prepopulates_extracted_text(self) -> None:
+        """Marker True + file readable → EXTRACTED_TEXT is pre-populated."""
+        context, _cb, fs_instance, check_mock = _dispatch_build(
+            check_return=True,
+            read_return="existing extracted content",
+        )
+        index_params = context.executor_params["index_params"]
+        assert index_params[IKeys.EXTRACTED_TEXT] == "existing extracted content"
+        fs_instance.read.assert_called_once()
+        check_mock.assert_called_once()
+
+    def test_marker_hit_missing_file_does_not_prepopulate(self) -> None:
+        """Marker True + FileNotFoundError → field NOT set, fall back to extract."""
+        context, _cb, fs_instance, _check = _dispatch_build(
+            check_return=True,
+            read_return=FileNotFoundError("missing"),
+        )
+        index_params = context.executor_params["index_params"]
+        assert IKeys.EXTRACTED_TEXT not in index_params
+        fs_instance.read.assert_called_once()
+
+    def test_marker_miss_does_not_prepopulate(self) -> None:
+        """Marker False → EXTRACTED_TEXT NOT set, extract runs as before."""
+        context, _cb, fs_instance, _check = _dispatch_build(
+            check_return=False,
+            read_return="should-not-be-read",
+        )
+        index_params = context.executor_params["index_params"]
+        assert IKeys.EXTRACTED_TEXT not in index_params
+        fs_instance.read.assert_not_called()
+
+    def test_check_extraction_status_raises_is_swallowed(self, caplog) -> None:
+        """check_extraction_status error → warn, field NOT set, no re-raise."""
+        import logging as _logging
+
+        caplog.set_level(_logging.WARNING, logger=_psh_mod.logger.name)
+        context, _cb, fs_instance, _check = _dispatch_build(
+            check_return=RuntimeError("db down"),
+            read_return="should-not-be-read",
+        )
+        index_params = context.executor_params["index_params"]
+        assert IKeys.EXTRACTED_TEXT not in index_params
+        fs_instance.read.assert_not_called()
+        # A warning should have been emitted about the fallback.
+        assert any(
+            "falling back to full extraction" in rec.getMessage()
+            for rec in caplog.records
+        )
diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py
index 9a3bf710e..18e1f4774 100644
--- a/workers/executor/executors/legacy_executor.py
+++ b/workers/executor/executors/legacy_executor.py
@@ -408,20 +408,30 @@ def _handle_ide_index(self, context: ExecutionContext) -> ExecutionResult:
                 error=f"ide_index missing required params: {', '.join(missing)}"
             )
 
-        # Step 1: Extract
-        extract_ctx = ExecutionContext(
-            executor_name=context.executor_name,
-            operation=Operation.EXTRACT.value,
-            run_id=context.run_id,
-            execution_source=context.execution_source,
-            organization_id=context.organization_id,
-            executor_params=extract_params,
-            request_id=context.request_id,
-            log_events_id=context.log_events_id,
-        )
-        extract_result = self._handle_extract(extract_ctx)
-        if not extract_result.success:
-            return extract_result
+        # Step 1: Extract (or reuse pre-extracted text on marker hit)
+        pre_extracted_text = index_params.get(IKeys.EXTRACTED_TEXT, "") or ""
+        if pre_extracted_text:
+            logger.info(
+                "ide_index: marker hit, skipping extract step " "(len=%d, run_id=%s)",
+                len(pre_extracted_text),
+                context.run_id,
+            )
+            extracted_text = pre_extracted_text
+        else:
+            extract_ctx = ExecutionContext(
+                executor_name=context.executor_name,
+                operation=Operation.EXTRACT.value,
+                run_id=context.run_id,
+                execution_source=context.execution_source,
+                organization_id=context.organization_id,
+                executor_params=extract_params,
+                request_id=context.request_id,
+                log_events_id=context.log_events_id,
+            )
+            extract_result = self._handle_extract(extract_ctx)
+            if not extract_result.success:
+                return extract_result
+            extracted_text = extract_result.data.get(IKeys.EXTRACTED_TEXT, "")
 
         # Step 2: Optional summarize
         summarize_params = params.get("summarize_params")
@@ -433,7 +443,6 @@ def _handle_ide_index(self, context: ExecutionContext) -> ExecutionResult:
                 return result
 
         # Step 3: Index — inject extracted text
-        extracted_text = extract_result.data.get(IKeys.EXTRACTED_TEXT, "")
         index_params[IKeys.EXTRACTED_TEXT] = extracted_text
 
         index_ctx = ExecutionContext(
diff --git a/workers/tests/test_sanity_phase5.py b/workers/tests/test_sanity_phase5.py
index 31675b8f9..4d00a571f 100644
--- a/workers/tests/test_sanity_phase5.py
+++ b/workers/tests/test_sanity_phase5.py
@@ -377,6 +377,167 @@ def test_ide_index_extract_failure(
         assert not result.success
         assert "X2Text" in result.error
 
+    @patch(_PATCH_INDEX_DEPS)
+    @patch(_PATCH_FS)
+    @patch(_PATCH_X2TEXT)
+    @patch(_PATCH_SHIM)
+    def test_ide_index_reuses_pre_extracted_text(
+        self,
+        mock_shim,
+        mock_x2text,
+        mock_fs,
+        mock_index_deps,
+        eager_app,
+    ):
+        """Marker-hit path: extracted_text pre-populated → extract is skipped."""
+        # If _handle_extract runs, this will blow up — the whole point is
+        # that it must NOT be called when index_params carries extracted_text.
+        x2t_instance = MagicMock()
+        x2t_instance.process.side_effect = AssertionError(
+            "extract must not run when index_params contains pre-extracted text"
+        )
+        mock_x2text.return_value = x2t_instance
+
+        fs = MagicMock()
+        fs.exists.return_value = False
+        mock_fs.return_value = fs
+
+        # Mock index — capture the extracted_text that reached _handle_index.
+        index_inst = MagicMock()
+        index_inst.generate_index_key.return_value = "idx-doc-reuse"
+        index_inst.is_document_indexed.return_value = False
+        mock_index_deps.return_value = (
+            MagicMock(return_value=index_inst),
+            MagicMock(),
+            MagicMock(),
+        )
+
+        ctx = ExecutionContext(
+            executor_name="legacy",
+            operation="ide_index",
+            run_id="run-ide-reuse",
+            execution_source="ide",
+            organization_id="org-test",
+            executor_params={
+                "extract_params": {
+                    "x2text_instance_id": "x2t-1",
+                    "file_path": "/data/doc.pdf",
+                    "enable_highlight": False,
+                    "output_file_path": "/data/extract/doc.txt",
+                    "platform_api_key": "pk-test",
+                    "usage_kwargs": {},
+                },
+                "index_params": {
+                    "tool_id": "tool-1",
+                    "embedding_instance_id": "emb-1",
+                    "vector_db_instance_id": "vdb-1",
+                    "x2text_instance_id": "x2t-1",
+                    "file_path": "/data/extract/doc.txt",
+                    "file_hash": None,
+                    "chunk_overlap": 64,
+                    "chunk_size": 512,
+                    "reindex": True,
+                    "enable_highlight": False,
+                    "usage_kwargs": {},
+                    "run_id": "run-ide-reuse",
+                    "execution_source": "ide",
+                    "platform_api_key": "pk-test",
+                    "extracted_text": "reused extracted payload",
+                },
+            },
+        )
+
+        result_dict = _run_task(eager_app, ctx.to_dict())
+        result = ExecutionResult.from_dict(result_dict)
+
+        assert result.success
+        assert result.data["doc_id"] == "idx-doc-reuse"
+        # Extract adapter must never have been called.
+        x2t_instance.process.assert_not_called()
+        # perform_indexing received the pre-populated text.
+        perform_call_kwargs = index_inst.perform_indexing.call_args.kwargs
+        assert (
+            perform_call_kwargs.get("extracted_text") == "reused extracted payload"
+        )
+
+    @patch(_PATCH_INDEX_DEPS)
+    @patch(_PATCH_FS)
+    @patch(_PATCH_X2TEXT)
+    @patch(_PATCH_SHIM)
+    def test_ide_index_without_pre_extracted_text_runs_extract(
+        self,
+        mock_shim,
+        mock_x2text,
+        mock_fs,
+        mock_index_deps,
+        eager_app,
+    ):
+        """Marker-miss path: extract runs as before when extracted_text is absent."""
+        x2t_instance = MagicMock()
+        x2t_instance.process.return_value = _mock_process_response(
+            "freshly extracted"
+        )
+        mock_x2text.return_value = x2t_instance
+
+        fs = MagicMock()
+        fs.exists.return_value = False
+        mock_fs.return_value = fs
+
+        index_inst = MagicMock()
+        index_inst.generate_index_key.return_value = "idx-doc-fresh"
+        index_inst.is_document_indexed.return_value = False
+        mock_index_deps.return_value = (
+            MagicMock(return_value=index_inst),
+            MagicMock(),
+            MagicMock(),
+        )
+
+        ctx = ExecutionContext(
+            executor_name="legacy",
+            operation="ide_index",
+            run_id="run-ide-fresh",
+            execution_source="ide",
+            organization_id="org-test",
+            executor_params={
+                "extract_params": {
+                    "x2text_instance_id": "x2t-1",
+                    "file_path": "/data/doc.pdf",
+                    "enable_highlight": False,
+                    "output_file_path": "/data/extract/doc.txt",
+                    "platform_api_key": "pk-test",
+                    "usage_kwargs": {},
+                },
+                "index_params": {
+                    "tool_id": "tool-1",
+                    "embedding_instance_id": "emb-1",
+                    "vector_db_instance_id": "vdb-1",
+                    "x2text_instance_id": "x2t-1",
+                    "file_path": "/data/extract/doc.txt",
+                    "file_hash": None,
+                    "chunk_overlap": 64,
+                    "chunk_size": 512,
+                    "reindex": True,
+                    "enable_highlight": False,
+                    "usage_kwargs": {},
+                    "run_id": "run-ide-fresh",
+                    "execution_source": "ide",
+                    "platform_api_key": "pk-test",
+                    # No "extracted_text" key → must take the extract path.
+                },
+            },
+        )
+
+        result_dict = _run_task(eager_app, ctx.to_dict())
+        result = ExecutionResult.from_dict(result_dict)
+
+        assert result.success
+        assert result.data["doc_id"] == "idx-doc-fresh"
+        # Extract adapter was called exactly once.
+        x2t_instance.process.assert_called_once()
+        # perform_indexing received the freshly extracted text.
+        perform_call_kwargs = index_inst.perform_indexing.call_args.kwargs
+        assert perform_call_kwargs.get("extracted_text") == "freshly extracted"
+
 
 # ---------------------------------------------------------------------------
 # 5D: structure_pipeline compound operation through eager chain