Skip to content

Commit d35cbc3

Browse files
committed
fix diff-tree integration for commit data.
Note: this actually fails on the first commit in history; diff-tree is against the parent. It's edge case, but it exists. This is a seperated PR from metajack#29 for review reasons. When this and that lands, issue metajack#28 can be closed. Signed-off-by: Brian Harring <ferringb@gmail.com>
1 parent 5658a54 commit d35cbc3

1 file changed

Lines changed: 75 additions & 57 deletions

File tree

notify-webhook.py

Lines changed: 75 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import hashlib
55
import hmac
66
import json
7+
import logging
78
import os
89
import re
910
import subprocess
@@ -12,32 +13,9 @@
1213
import urllib.error
1314
import urllib.parse
1415
import urllib.request
15-
from collections import OrderedDict
16+
from collections import OrderedDict, defaultdict
1617
from datetime import datetime
1718

18-
# see git-diff-tree 'RAW OUTPUT FORMAT'
19-
# https://git-scm.com/docs/git-diff-tree#_raw_output_format
20-
DIFF_TREE_RE = re.compile(
21-
r" \
22-
^: \
23-
(?P<src_mode>[0-9]{6}) \
24-
\s+ \
25-
(?P<dst_mode>[0-9]{6}) \
26-
\s+ \
27-
(?P<src_hash>[0-9a-f]{7,40}) \
28-
\s+ \
29-
(?P<dst_hash>[0-9a-f]{7,40}) \
30-
\s+ \
31-
(?P<status>[ADTUX]|[CR][0-9]{1,3}|M[0-9]{0,3}) \
32-
\s+ \
33-
(?P<file1>\S+) \
34-
(?:\s+ \
35-
(?P<file2>\S+) \
36-
)? \
37-
$",
38-
re.MULTILINE | re.VERBOSE,
39-
)
40-
4119
EMPTY_TREE_HASH = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
4220
ZEROS = "0000000000000000000000000000000000000000"
4321

@@ -203,39 +181,16 @@ def get_revisions(
203181
"modified": [],
204182
"url": commit_url % sha if commit_url else None,
205183
}
206-
207-
# call git diff-tree and get the file changes
208-
output = git(["diff-tree", "-r", "-C", "%s" % props["sha"]])
209-
210-
# sort the changes into the added/modified/removed lists
211-
for i in DIFF_TREE_RE.finditer(output):
212-
item = i.groupdict()
213-
if item["status"] == "A":
214-
# addition of a file
215-
props["added"].append(item["file1"])
216-
elif item["status"][0] == "C":
217-
# copy of a file into a new one
218-
props["added"].append(item["file2"])
219-
elif item["status"] == "D":
220-
# deletion of a file
221-
props["removed"].append(item["file1"])
222-
elif item["status"] == "M":
223-
# modification of the contents or mode of a file
224-
props["modified"].append(item["file1"])
225-
elif item["status"][0] == "R":
226-
# renaming of a file
227-
props["removed"].append(item["file1"])
228-
props["added"].append(item["file2"])
229-
elif item["status"] == "T":
230-
# change in the type of the file
231-
props["modified"].append(item["file1"])
232-
else:
233-
# Covers U (file is unmerged)
234-
# and X ("unknown" change type, usually an error)
235-
# When we get X, we do not know what actually happened so
236-
# it's safest just to ignore it. We shouldn't be seeing U
237-
# anyway, so we can ignore that too.
238-
pass
184+
props.update(
185+
get_tree_changes_from_commit(
186+
props["sha"],
187+
# diff-tree doesn't report properly for the first commit in history;
188+
# force the parent if it's the first.
189+
forced_parent=(
190+
EMPTY_TREE_HASH if s == 0 and old == EMPTY_TREE_HASH else None
191+
),
192+
)
193+
)
239194

240195
# read the header
241196
for l in lines[1:]:
@@ -259,6 +214,69 @@ def get_revisions(
259214
s += 2
260215

261216

217+
def get_tree_changes_from_commit(
218+
sha: str, forced_parent: str | None = None
219+
) -> typing.Mapping[str, list[str]]:
220+
raw_tree = git(
221+
[
222+
"diff-tree",
223+
"--raw",
224+
"-z",
225+
"-r",
226+
# detect copies and renames
227+
"-C",
228+
"-M",
229+
"--no-commit-id",
230+
# force the simple format used below.
231+
"--name-status",
232+
sha if not forced_parent else f"{forced_parent}..{sha}",
233+
# ensure git knows that was a revish, flushing out any code bugs.
234+
"--",
235+
]
236+
)
237+
# see git-diff-tree 'RAW OUTPUT FORMAT' for the actions involved
238+
# https://git-scm.com/docs/git-diff-tree#_raw_output_forma
239+
240+
# the last record still has a null which would trigger another record
241+
# parsing loop
242+
chunks = iter(raw_tree.split("\0")[:-1])
243+
244+
changes = defaultdict(list)
245+
for action in chunks:
246+
# actions can carry a confidence integer percent, thus strip it.
247+
action = action[0]
248+
match action:
249+
case "A":
250+
changes["added"].append(next(chunks))
251+
case "C":
252+
# copy. Just record the addition
253+
next(chunks) # discard source file
254+
changes["added"].append(next(chunks))
255+
case "D":
256+
changes["removed"].append(next(chunks))
257+
case "M":
258+
changes["modified"].append(next(chunks))
259+
case "R":
260+
changes["removed"].append(next(chunks))
261+
changes["added"].append(next(chunks))
262+
case "T":
263+
# change of type of file. Symlink replacing a file, file replacing a symlink, etc.
264+
changes["added"].append(next(chunks))
265+
case "U":
266+
logging.warning(
267+
"encountured U status in diff-tree; this impossible, there is a bug in this script"
268+
)
269+
case "X":
270+
logging.warning(
271+
"encontured status X in diff-tree; please report this, it probably a bug in git itself"
272+
)
273+
case _:
274+
logging.warning(
275+
f"unsupported action encountered during diff-tree: {action!r}"
276+
)
277+
return changes
278+
279+
262280
def get_base_ref(commit, ref):
263281
branches = git(["branch", "--contains", commit]).split("\n")
264282
CURR_BRANCH_RE = re.compile(r"^\* \w+$")

0 commit comments

Comments
 (0)