44import hashlib
55import hmac
66import json
7+ import logging
78import os
89import re
910import subprocess
1213import urllib .error
1314import urllib .parse
1415import urllib .request
15- from collections import OrderedDict
16+ from collections import OrderedDict , defaultdict
1617from datetime import datetime
1718
18- # see git-diff-tree 'RAW OUTPUT FORMAT'
19- # https://git-scm.com/docs/git-diff-tree#_raw_output_format
20- DIFF_TREE_RE = re .compile (
21- r" \
22- ^: \
23- (?P<src_mode>[0-9]{6}) \
24- \s+ \
25- (?P<dst_mode>[0-9]{6}) \
26- \s+ \
27- (?P<src_hash>[0-9a-f]{7,40}) \
28- \s+ \
29- (?P<dst_hash>[0-9a-f]{7,40}) \
30- \s+ \
31- (?P<status>[ADTUX]|[CR][0-9]{1,3}|M[0-9]{0,3}) \
32- \s+ \
33- (?P<file1>\S+) \
34- (?:\s+ \
35- (?P<file2>\S+) \
36- )? \
37- $" ,
38- re .MULTILINE | re .VERBOSE ,
39- )
40-
4119EMPTY_TREE_HASH = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
4220ZEROS = "0000000000000000000000000000000000000000"
4321
@@ -203,39 +181,16 @@ def get_revisions(
203181 "modified" : [],
204182 "url" : commit_url % sha if commit_url else None ,
205183 }
206-
207- # call git diff-tree and get the file changes
208- output = git (["diff-tree" , "-r" , "-C" , "%s" % props ["sha" ]])
209-
210- # sort the changes into the added/modified/removed lists
211- for i in DIFF_TREE_RE .finditer (output ):
212- item = i .groupdict ()
213- if item ["status" ] == "A" :
214- # addition of a file
215- props ["added" ].append (item ["file1" ])
216- elif item ["status" ][0 ] == "C" :
217- # copy of a file into a new one
218- props ["added" ].append (item ["file2" ])
219- elif item ["status" ] == "D" :
220- # deletion of a file
221- props ["removed" ].append (item ["file1" ])
222- elif item ["status" ] == "M" :
223- # modification of the contents or mode of a file
224- props ["modified" ].append (item ["file1" ])
225- elif item ["status" ][0 ] == "R" :
226- # renaming of a file
227- props ["removed" ].append (item ["file1" ])
228- props ["added" ].append (item ["file2" ])
229- elif item ["status" ] == "T" :
230- # change in the type of the file
231- props ["modified" ].append (item ["file1" ])
232- else :
233- # Covers U (file is unmerged)
234- # and X ("unknown" change type, usually an error)
235- # When we get X, we do not know what actually happened so
236- # it's safest just to ignore it. We shouldn't be seeing U
237- # anyway, so we can ignore that too.
238- pass
184+ props .update (
185+ get_tree_changes_from_commit (
186+ props ["sha" ],
187+ # diff-tree doesn't report properly for the first commit in history;
188+ # force the parent if it's the first.
189+ forced_parent = (
190+ EMPTY_TREE_HASH if s == 0 and old == EMPTY_TREE_HASH else None
191+ ),
192+ )
193+ )
239194
240195 # read the header
241196 for l in lines [1 :]:
@@ -259,6 +214,69 @@ def get_revisions(
259214 s += 2
260215
261216
217+ def get_tree_changes_from_commit (
218+ sha : str , forced_parent : str | None = None
219+ ) -> typing .Mapping [str , list [str ]]:
220+ raw_tree = git (
221+ [
222+ "diff-tree" ,
223+ "--raw" ,
224+ "-z" ,
225+ "-r" ,
226+ # detect copies and renames
227+ "-C" ,
228+ "-M" ,
229+ "--no-commit-id" ,
230+ # force the simple format used below.
231+ "--name-status" ,
232+ sha if not forced_parent else f"{ forced_parent } ..{ sha } " ,
233+ # ensure git knows that was a revish, flushing out any code bugs.
234+ "--" ,
235+ ]
236+ )
237+ # see git-diff-tree 'RAW OUTPUT FORMAT' for the actions involved
238+ # https://git-scm.com/docs/git-diff-tree#_raw_output_forma
239+
240+ # the last record still has a null which would trigger another record
241+ # parsing loop
242+ chunks = iter (raw_tree .split ("\0 " )[:- 1 ])
243+
244+ changes = defaultdict (list )
245+ for action in chunks :
246+ # actions can carry a confidence integer percent, thus strip it.
247+ action = action [0 ]
248+ match action :
249+ case "A" :
250+ changes ["added" ].append (next (chunks ))
251+ case "C" :
252+ # copy. Just record the addition
253+ next (chunks ) # discard source file
254+ changes ["added" ].append (next (chunks ))
255+ case "D" :
256+ changes ["removed" ].append (next (chunks ))
257+ case "M" :
258+ changes ["modified" ].append (next (chunks ))
259+ case "R" :
260+ changes ["removed" ].append (next (chunks ))
261+ changes ["added" ].append (next (chunks ))
262+ case "T" :
263+ # change of type of file. Symlink replacing a file, file replacing a symlink, etc.
264+ changes ["added" ].append (next (chunks ))
265+ case "U" :
266+ logging .warning (
267+ "encountured U status in diff-tree; this impossible, there is a bug in this script"
268+ )
269+ case "X" :
270+ logging .warning (
271+ "encontured status X in diff-tree; please report this, it probably a bug in git itself"
272+ )
273+ case _:
274+ logging .warning (
275+ f"unsupported action encountered during diff-tree: { action !r} "
276+ )
277+ return changes
278+
279+
262280def get_base_ref (commit , ref ):
263281 branches = git (["branch" , "--contains" , commit ]).split ("\n " )
264282 CURR_BRANCH_RE = re .compile (r"^\* \w+$" )
0 commit comments