Spenhouet · Spenhouet · Mar 8, 2026 · Feb 2, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
@@ -7,9 +7,8 @@
     "ms-python.python",
     "ms-python.vscode-pylance",
     "njpwerner.autodocstring",
-    "visualstudioexptteam.vscodeintellicode",
     "charliermarsh.ruff",
   ],
   // List of extensions recommended by VS Code that should not be recommended for users of this workspace.
   "unwantedRecommendations": []
-}
+}
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@
 - Converts Confluence macros to equivalent Markdown syntax where possible.
 - Handles images and attachments by linking them appropriately in the Markdown output.
 - Supports extended Markdown features like tasks, alerts, and front matter.
+- Skips unchanged pages by default — only re-exports pages that have changed since the last run.
 - Supports Confluence add-ons: [draw.io](https://marketplace.atlassian.com/apps/1210933/draw-io-diagrams-uml-bpmn-aws-erd-flowcharts), [PlantUML](https://marketplace.atlassian.com/apps/1222993/flowchart-plantuml-diagrams-for-confluence)
 
 ## Supported Markdown Elements
@@ -94,7 +95,7 @@ Export all Confluence pages of a single Space:
 confluence-markdown-exporter spaces <space-key e.g. MYSPACE> <output path e.g. ./output_path/>
 ```
 
-#### 2.3. Export all Spaces
+#### 2.4. Export all Spaces
 
 Export all Confluence pages across all spaces:
 
@@ -149,12 +150,17 @@ This will open a menu where you can:
 | export.filename_encoding              | Character mapping for filename encoding.                                                                              | Default mappings for forbidden characters.                          |
 | export.filename_length                | Maximum length of filenames.                                                                                          | 255                                                                 |
 | export.include_document_title         | Whether to include the document title in the exported markdown file.                                                  | True                                                                |
+| export.skip_unchanged                 | Skip exporting pages that have not changed since last export. Uses a lockfile to track page versions.                 | True                                                                |
+| export.cleanup_stale                  | After export, delete local files for pages removed from Confluence or whose export path has changed.                  | True                                                                |
+| export.lockfile_name                  | Name of the lock file used to track exported pages.                                                                   | confluence-lock.json                                                |
+| export.existence_check_batch_size     | Number of page IDs per batch when checking page existence during cleanup. Capped at 25 for self-hosted (CQL).         | 250                                                                 |
 | connection_config.backoff_and_retry   | Enable automatic retry with exponential backoff                                                                       | True                                                                |
 | connection_config.backoff_factor      | Multiplier for exponential backoff                                                                                    | 2                                                                   |
 | connection_config.max_backoff_seconds | Maximum seconds to wait between retries                                                                               | 60                                                                  |
 | connection_config.max_backoff_retries | Maximum number of retry attempts                                                                                      | 5                                                                   |
 | connection_config.retry_status_codes  | HTTP status codes that trigger a retry                                                                                | \[413, 429, 502, 503, 504\]                                         |
 | connection_config.verify_ssl          | Whether to verify SSL certificates for HTTPS requests.                                                                | True                                                                |
+| connection_config.use_v2_api          | Enable Confluence REST API v2 endpoints. Supported on Atlassian Cloud and Data Center 8+. Disable for self-hosted Server instances. | False                                                    |
 | auth.confluence.url                   | Confluence instance URL                                                                                               | ""                                                                  |
 | auth.confluence.username              | Confluence username/email                                                                                             | ""                                                                  |
 | auth.confluence.api_token             | Confluence API token                                                                                                  | ""                                                                  |

diff --git a/confluence_markdown_exporter/api_clients.py b/confluence_markdown_exporter/api_clients.py
@@ -73,15 +73,15 @@ def get_confluence_instance() -> ConfluenceApiSdk:
     """Get authenticated Confluence API client using current settings."""
     settings = get_settings()
     auth = settings.auth
-    connection_config = settings.connection_config.model_dump()
+    connection_config = settings.connection_config.model_dump(exclude={"use_v2_api"})
 
     while True:
         try:
             confluence = ApiClientFactory(connection_config).create_confluence(auth.confluence)
             break
-        except ConnectionError:
+        except ConnectionError as e:
             questionary.print(
-                "Confluence connection failed: Redirecting to Confluence authentication config...",
+                f"{e}\nRedirecting to Confluence authentication config...",
                 style="fg:red bold",
             )
             main_config_menu_loop("auth.confluence")
@@ -99,7 +99,7 @@ def get_jira_instance() -> JiraApiSdk:
     """Get authenticated Jira API client using current settings with required authentication."""
     settings = get_settings()
     auth = settings.auth
-    connection_config = settings.connection_config.model_dump()
+    connection_config = settings.connection_config.model_dump(exclude={"use_v2_api"})
 
     while True:
         try:

diff --git a/confluence_markdown_exporter/confluence.py b/confluence_markdown_exporter/confluence.py
@@ -39,6 +39,7 @@
 from confluence_markdown_exporter.utils.export import sanitize_filename
 from confluence_markdown_exporter.utils.export import sanitize_key
 from confluence_markdown_exporter.utils.export import save_file
+from confluence_markdown_exporter.utils.lockfile import LockfileManager
 from confluence_markdown_exporter.utils.table_converter import TableConverter
 from confluence_markdown_exporter.utils.type_converter import str_to_bool
 
@@ -133,7 +134,7 @@ class Organization(BaseModel):
     spaces: list["Space"]
 
     @property
-    def pages(self) -> list[int]:
+    def pages(self) -> list["Page | Descendant"]:
         return [page for space in self.spaces for page in space.pages]
 
     def export(self) -> None:
@@ -165,15 +166,15 @@ class Space(BaseModel):
     homepage: int | None
 
     @property
-    def pages(self) -> list[int]:
+    def pages(self) -> list["Page | Descendant"]:
         if self.homepage is None:
             logger.warning(
                 f"Space '{self.name}' (key: {self.key}) has no homepage. No pages will be exported."
             )
             return []
 
         homepage = Page.from_id(self.homepage)
-        return [self.homepage, *homepage.descendants]
+        return [homepage, *homepage.descendants]
 
     def export(self) -> None:
         export_pages(self.pages)
@@ -212,7 +213,8 @@ def from_json(cls, data: JsonResponse) -> "Label":
 class Document(BaseModel):
     title: str
     space: Space
-    ancestors: list[int]
+    ancestors: list["Ancestor"]
+    version: Version
 
     @property
     def _template_vars(self) -> dict[str, str]:
@@ -221,10 +223,8 @@ def _template_vars(self) -> dict[str, str]:
             "space_name": sanitize_filename(self.space.name),
             "homepage_id": str(self.space.homepage),
             "homepage_title": sanitize_filename(Page.from_id(self.space.homepage).title),
-            "ancestor_ids": "/".join(str(a) for a in self.ancestors),
-            "ancestor_titles": "/".join(
-                sanitize_filename(Page.from_id(a).title) for a in self.ancestors
-            ),
+            "ancestor_ids": "/".join(str(a.id) for a in self.ancestors),
+            "ancestor_titles": "/".join(sanitize_filename(a.title) for a in self.ancestors),
         }
 
 
@@ -237,7 +237,6 @@ class Attachment(Document):
     collection_name: str
     download_link: str
     comment: str
-    version: Version
 
     @property
     def extension(self) -> str:
@@ -284,8 +283,8 @@ def from_json(cls, data: JsonResponse) -> "Attachment":
             download_link=data.get("_links", {}).get("download", ""),
             comment=extensions.get("comment", ""),
             ancestors=[
-                *[ancestor.get("id") for ancestor in container.get("ancestors", [])],
-                container.get("id"),
+                *[Ancestor.from_json(ancestor) for ancestor in container.get("ancestors", [])],
+                Ancestor.from_json(container),
             ][1:],
             version=Version.from_json(data.get("version", {})),
         )
@@ -333,6 +332,47 @@ def export(self) -> None:
         )
 
 
+class Ancestor(Document):
+    id: int
+
+    @classmethod
+    def from_json(cls, data: JsonResponse) -> "Ancestor":
+        return cls(
+            id=data.get("id", 0),
+            title=data.get("title", ""),
+            space=Space.from_key(data.get("_expandable", {}).get("space", "").split("/")[-1]),
+            ancestors=[],  # Ancestors of ancestor is not needed for now.
+            version=Version.from_json({}),  # Version of ancestor is not needed for now.
+        )
+
+
+class Descendant(Document):
+    id: int
+
+    @property
+    def _template_vars(self) -> dict[str, str]:
+        return {
+            **super()._template_vars,
+            "page_id": str(self.id),
+            "page_title": sanitize_filename(self.title),
+        }
+
+    @property
+    def export_path(self) -> Path:
+        filepath_template = Template(settings.export.page_path.replace("{", "${"))
+        return Path(filepath_template.safe_substitute(self._template_vars))
+
+    @classmethod
+    def from_json(cls, data: JsonResponse) -> "Descendant":
+        return cls(
+            id=data.get("id", 0),
+            title=data.get("title", ""),
+            space=Space.from_key(data.get("_expandable", {}).get("space", "").split("/")[-1]),
+            ancestors=[Ancestor.from_json(ancestor) for ancestor in data.get("ancestors", [])][1:],
+            version=Version.from_json(data.get("version", {})),
+        )
+
+
 class Page(Document):
     id: int
     body: str
@@ -342,11 +382,12 @@ class Page(Document):
     attachments: list["Attachment"]
 
     @property
-    def descendants(self) -> list[int]:
+    def descendants(self) -> list["Descendant"]:
         url = "rest/api/content/search"
         params = {
             "cql": f"type=page AND ancestor={self.id}",
-            "limit": 100,
+            "expand": "metadata.properties,ancestors,version",
+            "limit": 250,
         }
         results = []
 
@@ -372,8 +413,7 @@ def descendants(self) -> list[int]:
                 f"Unexpected error when fetching descendants for content ID {self.id}."
             )
             return []
-
-        return [result["id"] for result in results]
+        return [Descendant.from_json(result) for result in results]
 
     @property
     def _template_vars(self) -> dict[str, str]:
@@ -410,7 +450,7 @@ def export(self) -> None:
         self.export_markdown()
 
     def export_with_descendants(self) -> None:
-        export_pages([self.id, *self.descendants])
+        export_pages([self, *self.descendants])
 
     def export_body(self) -> None:
         soup = BeautifulSoup(self.html, "html.parser")
@@ -498,7 +538,8 @@ def from_json(cls, data: JsonResponse) -> "Page":
                 for label in data.get("metadata", {}).get("labels", {}).get("results", [])
             ],
             attachments=Attachment.from_page_id(data.get("id", 0)),
-            ancestors=[ancestor.get("id") for ancestor in data.get("ancestors", [])][1:],
+            ancestors=[Ancestor.from_json(ancestor) for ancestor in data.get("ancestors", [])][1:],
+            version=Version.from_json(data.get("version", {})),
         )
 
     @classmethod
@@ -511,7 +552,7 @@ def from_id(cls, page_id: int) -> "Page":
                     confluence.get_page_by_id(
                         page_id,
                         expand="body.view,body.export_view,body.editor2,metadata.labels,"
-                        "metadata.properties,ancestors",
+                        "metadata.properties,ancestors,version",
                     ),
                 )
             )
@@ -528,6 +569,7 @@ def from_id(cls, page_id: int) -> "Page":
                 labels=[],
                 attachments=[],
                 ancestors=[],
+                version=Version.from_json({}),
             )
 
     @classmethod
@@ -596,7 +638,9 @@ def front_matter(self) -> str:
         @property
         def breadcrumbs(self) -> str:
             return (
-                " > ".join([self.convert_page_link(ancestor) for ancestor in self.page.ancestors])
+                " > ".join(
+                    [self.convert_page_link(ancestor.id) for ancestor in self.page.ancestors]
+                )
                 + "\n"
             )
 
@@ -1001,7 +1045,7 @@ def convert_drawio(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -
 
             return ""
 
-        def convert_plantuml(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str: # noqa: PLR0911
+        def convert_plantuml(self, el: BeautifulSoup, text: str, parent_tags: list[str]) -> str:  # noqa: PLR0911
             """Convert PlantUML diagrams from editor2 XML to Markdown code blocks.
 
             PlantUML diagrams are stored in the editor2 XML as structured macros with
@@ -1092,24 +1136,102 @@ def _get_path_for_href(self, path: Path, style: Literal["absolute", "relative"])
             return result
 
 
-def export_page(page_id: int) -> None:
-    """Export a Confluence page to Markdown.
+_CQL_MAX_BATCH_SIZE: int = 25
 
-    Args:
-        page_id: The page id.
-        output_path: The output path.
+
+def _fetch_page_ids_v2_batch(batch: list[str]) -> set[str]:
+    """Single v2 API request for a batch of page IDs.
+
+    Uses GET /api/v2/pages?id=X&id=Y&...  (Atlassian Cloud).
+    The v2 API accepts multiple ``id`` params, so they are encoded directly
+    into the URL path since the SDK only accepts a dict for ``params``.
+    """
+    query = urllib.parse.urlencode([("id", pid) for pid in batch] + [("limit", len(batch))])
+    response = confluence.get(f"api/v2/pages?{query}")
+    if not response:
+        return set()
+    return {str(item["id"]) for item in response.get("results", [])}
+
+
+def _fetch_page_ids_cql_batch(batch: list[str]) -> set[str]:
+    """Single CQL v1 request for a batch of page IDs.
+
+    Uses GET /rest/api/content/search with id in (...) (self-hosted / fallback).
+    """
+    cql = "id in ({})".format(",".join(batch))
+    response = confluence.get(
+        "rest/api/content/search",
+        params={"cql": cql, "limit": len(batch), "fields": "id"},
+    )
+    if not response:
+        return set()
+    return {str(item["id"]) for item in response.get("results", [])}
+
+
+def fetch_deleted_page_ids(page_ids: list[str]) -> set[str]:
+    """Return the subset of *page_ids* that no longer exist in Confluence.
+
+    Uses the v2 REST API when ``connection_config.use_v2_api`` is enabled
+    (multiple ``id`` query params, up to ``export.existence_check_batch_size``
+    IDs per request), or the v1 CQL content search otherwise (capped at
+    :data:`_CQL_MAX_BATCH_SIZE` IDs per request).
+
+    Per-batch API failures are handled safely: affected IDs are assumed to
+    still exist so they are never accidentally deleted.
     """
-    page = Page.from_id(page_id)
-    page.export()
+    if not page_ids:
+        return set()
+
+    use_v2 = settings.connection_config.use_v2_api
+    batch_size = settings.export.existence_check_batch_size
+    effective_batch_size = batch_size if use_v2 else min(batch_size, _CQL_MAX_BATCH_SIZE)
+    existing: set[str] = set()
+
+    for i in range(0, len(page_ids), effective_batch_size):
+        batch = page_ids[i : i + effective_batch_size]
+        try:
+            if use_v2:
+                existing.update(_fetch_page_ids_v2_batch(batch))
+            else:
+                existing.update(_fetch_page_ids_cql_batch(batch))
+        except Exception:  # noqa: BLE001
+            logger.warning(
+                "Failed to check page existence for batch (%d IDs). "
+                "Skipping deletion for these pages.",
+                len(batch),
+            )
+            existing.update(batch)
+
+    return set(page_ids) - existing
+
+
+def sync_removed_pages() -> None:
+    """Orchestrate stale-file cleanup: check API for deleted pages, then clean up."""
+    if not settings.export.cleanup_stale:
+        return
+
+    unseen = LockfileManager.unseen_ids()
+    deleted = fetch_deleted_page_ids(sorted(unseen)) if unseen else set()
+    LockfileManager.remove_pages(deleted)
 
 
-def export_pages(page_ids: list[int]) -> None:
+def export_pages(pages: list["Page | Descendant"]) -> None:
     """Export a list of Confluence pages to Markdown.
 
     Args:
-        page_ids: List of pages to export.
-        output_path: The output path.
+        pages: List of pages to export.
     """
-    for page_id in (pbar := tqdm(page_ids, smoothing=0.05)):
-        pbar.set_postfix_str(f"Exporting page {page_id}")
-        export_page(page_id)
+    # Mark all pages as seen so cleanup skips API checks for unchanged pages
+    LockfileManager.mark_seen([p.id for p in pages])
+    pages_to_export = [page for page in pages if LockfileManager.should_export(page)]
+
+    if not pages_to_export:
+        logger.info("No pages to export based on lockfile state.")
+        return
+
+    for page in (pbar := tqdm(pages_to_export, smoothing=0.05)):
+        pbar.set_postfix_str(f"Exporting page {page.id}")
+        _page = Page.from_id(page.id)
+        _page.export()
+        # Record to lockfile if enabled
+        LockfileManager.record_page(_page)