99from datetime import timezone
1010from pathlib import Path
1111from typing import TYPE_CHECKING
12+ from typing import ClassVar
1213
1314from pydantic import BaseModel
1415from pydantic import Field
@@ -30,6 +31,8 @@ class PageEntry(BaseModel):
3031 title : str
3132 version : int
3233 export_path : str
34+ command : str
35+ args : list [str ] = Field (default_factory = list )
3336
3437
3538class ConfluenceLock (BaseModel ):
@@ -50,7 +53,7 @@ def load(cls, lockfile_path: Path) -> ConfluenceLock:
5053 logger .warning (f"Failed to parse lock file: { lockfile_path } . Starting fresh." )
5154 return cls ()
5255
53- def save (self , lockfile_path : Path ) -> None :
56+ def save (self , lockfile_path : Path , * , delete_ids : set [ str ] | None = None ) -> None :
5457 """Save lock file to disk.
5558
5659 To handle concurrent writes, this method reads the existing lock file
@@ -61,9 +64,12 @@ def save(self, lockfile_path: Path) -> None:
6164 # Read existing lock file and merge to handle concurrent writes
6265 existing = ConfluenceLock .load (lockfile_path )
6366 existing .pages = dict (sorted ({** existing .pages , ** self .pages }.items ()))
67+ if delete_ids :
68+ for page_id in delete_ids :
69+ existing .pages .pop (page_id , None )
6470 existing .last_export = datetime .now (timezone .utc ).isoformat ()
6571
66- json_str = json .dumps (existing .model_dump (), indent = 2 )
72+ json_str = json .dumps (existing .model_dump (), indent = 2 , ensure_ascii = False )
6773 tmp_path = None
6874 try :
6975 with tempfile .NamedTemporaryFile (
@@ -85,7 +91,7 @@ def save(self, lockfile_path: Path) -> None:
8591 self .pages = existing .pages
8692 self .last_export = existing .last_export
8793
88- def add_page (self , page : Page ) -> None :
94+ def add_page (self , page : Page , command : str = "" , args : list [ str ] | None = None ) -> None :
8995 """Add or update a page entry in the lock file."""
9096 if page .version is None :
9197 logger .warning (f"Page { page .id } has no version info. Skipping lock entry." )
@@ -95,35 +101,57 @@ def add_page(self, page: Page) -> None:
95101 title = page .title ,
96102 version = page .version .number ,
97103 export_path = str (page .export_path ),
104+ command = command ,
105+ args = args or [],
98106 )
99107
100108
101109class LockfileManager :
102110 """Manager for lock file operations during export."""
103111
104- _lockfile_path : Path | None = None
105- _lock : ConfluenceLock | None = None
112+ _lockfile_path : ClassVar [Path | None ] = None
113+ _lock : ClassVar [ConfluenceLock | None ] = None
114+ _output_path : ClassVar [Path | None ] = None
115+ _command : ClassVar [str ] = ""
116+ _args : ClassVar [list [str ]] = []
117+ _scope_entries : ClassVar [dict [str , PageEntry ]] = {}
118+ _seen_page_ids : ClassVar [set [str ]] = set ()
106119
107120 @classmethod
108- def init (cls ) -> None :
121+ def init (cls , command : str = "" , args : list [ str ] | None = None ) -> None :
109122 """Initialize the lockfile manager if skip_unchanged is enabled."""
110123 from confluence_markdown_exporter .utils .app_data_store import get_settings
111124
112125 settings = get_settings ()
113126 if not settings .export .skip_unchanged :
114127 return
115128
116- cls ._lockfile_path = settings .export .output_path / LOCKFILE_FILENAME
129+ cls ._output_path = settings .export .output_path
130+ cls ._lockfile_path = cls ._output_path / LOCKFILE_FILENAME
117131 cls ._lock = ConfluenceLock .load (cls ._lockfile_path )
132+ cls ._command = command
133+ cls ._args = args or []
134+ cls ._scope_entries = {
135+ k : v
136+ for k , v in cls ._lock .pages .items ()
137+ if v .command == cls ._command and v .args == cls ._args
138+ }
139+ cls ._seen_page_ids = set ()
118140
119141 @classmethod
120142 def record_page (cls , page : Page ) -> None :
121143 """Record a page export to the lock file."""
122144 if cls ._lock is None or cls ._lockfile_path is None :
123145 return
124146
125- cls ._lock .add_page (page )
147+ cls ._lock .add_page (page , command = cls . _command , args = cls . _args )
126148 cls ._lock .save (cls ._lockfile_path )
149+ cls ._seen_page_ids .add (str (page .id ))
150+
151+ @classmethod
152+ def mark_seen (cls , page_ids : list [int ]) -> None :
153+ """Mark page IDs as part of the current export scope."""
154+ cls ._seen_page_ids .update (str (pid ) for pid in page_ids )
127155
128156 @classmethod
129157 def should_export (cls , page : Page | Descendant ) -> bool :
@@ -146,33 +174,23 @@ def should_export(cls, page: Page | Descendant) -> bool:
146174 return entry .version != page .version .number or entry .export_path != str (page .export_path )
147175
148176 @classmethod
149- def cleanup_untracked (cls , * , dry_run : bool = False ) -> list [Path ]:
150- """Delete exported files that are not in the lockfile.
151-
152- Args:
153- dry_run: If True, only return files that would be deleted without deleting.
154-
155- Returns list of deleted (or would-be-deleted) file paths.
156- """
157- from confluence_markdown_exporter .utils .app_data_store import get_settings
158-
159- if cls ._lock is None :
160- return []
177+ def cleanup (cls ) -> None :
178+ """Remove lockfile entries and files for pages no longer in the current scope."""
179+ if cls ._lock is None or cls ._lockfile_path is None or cls ._output_path is None :
180+ return
161181
162- settings = get_settings ()
163- output_path = settings .export .output_path
164-
165- # Collect all export_paths from lockfile
166- tracked_paths = {Path (entry .export_path ) for entry in cls ._lock .pages .values ()}
167-
168- # Find all markdown files in output directory
169- untracked : list [Path ] = []
170- for md_file in output_path .rglob ("*.md" ):
171- relative_path = md_file .relative_to (output_path )
172- if relative_path not in tracked_paths :
173- untracked .append (relative_path )
174- if not dry_run :
175- md_file .unlink ()
176- logger .info (f"Deleted untracked file: { relative_path } " )
177-
178- return untracked
182+ delete_ids : set [str ] = set ()
183+
184+ for page_id , old_entry in cls ._scope_entries .items ():
185+ if page_id not in cls ._seen_page_ids :
186+ (cls ._output_path / old_entry .export_path ).unlink (missing_ok = True )
187+ logger .info (f"Deleted removed page: { old_entry .export_path } " )
188+ delete_ids .add (page_id )
189+ elif page_id in cls ._lock .pages :
190+ new_entry = cls ._lock .pages [page_id ]
191+ if old_entry .export_path != new_entry .export_path :
192+ (cls ._output_path / old_entry .export_path ).unlink (missing_ok = True )
193+ logger .info (f"Deleted old path for moved page: { old_entry .export_path } " )
194+
195+ if delete_ids :
196+ cls ._lock .save (cls ._lockfile_path , delete_ids = delete_ids )
0 commit comments