Skip to content

Commit c014a0f

Browse files
Merge pull request #187 from obsidianforensics/up-verisons-and-clean-hostnames
Up the max Chrome version to 129. Add `get_clean_hostnames` function …
2 parents 805c0a9 + 139c88c commit c014a0f

File tree

2 files changed

+44
-40
lines changed

2 files changed

+44
-40
lines changed

pyhindsight/browsers/chrome.py

Lines changed: 7 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def determine_version(self):
116116
Based on research I did to create "Chrome Evolution" tool - dfir.blog/chrome-evolution
117117
"""
118118

119-
possible_versions = list(range(1, 125))
119+
possible_versions = list(range(1, 130))
120120
# TODO: remove 82?
121121
previous_possible_versions = possible_versions[:]
122122

@@ -207,6 +207,7 @@ def trim_lesser_versions(version):
207207
trim_lesser_versions_if('is_persistent', self.structure['Cookies']['cookies'], 66)
208208
trim_lesser_versions_if('encrypted_value', self.structure['Cookies']['cookies'], 33)
209209
trim_lesser_versions_if('priority', self.structure['Cookies']['cookies'], 28)
210+
trim_lesser_versions_if('source_type', self.structure['Cookies']['cookies'], 125)
210211
log.debug(f' - Finishing possible versions: {possible_versions}')
211212

212213
possible_versions, previous_possible_versions = \
@@ -273,6 +274,10 @@ def trim_lesser_versions(version):
273274
'key', self.structure['Network Action Predictor']['resource_prefetch_predictor_url'], 55)
274275
trim_lesser_versions_if(
275276
'proto', self.structure['Network Action Predictor']['resource_prefetch_predictor_url'], 54)
277+
if 'lcp_critical_path_predictor' in list(self.structure['Network Action Predictor'].keys()):
278+
trim_lesser_versions(117)
279+
if 'lcp_critical_path_predictor_initiator_origin' in list(self.structure['Network Action Predictor'].keys()):
280+
trim_lesser_versions(129)
276281
log.debug(f' - Finishing possible versions: {possible_versions}')
277282

278283
possible_versions, previous_possible_versions = \
@@ -2223,39 +2228,7 @@ def get_site_characteristics(self, path, dir_name):
22232228
self.parsed_artifacts.extend(result_list)
22242229

22252230
def build_hsts_domain_hashes(self):
2226-
domains = set()
2227-
for artifact in self.parsed_artifacts:
2228-
if not isinstance(artifact, self.HistoryItem):
2229-
continue
2230-
2231-
if not artifact.url:
2232-
continue
2233-
2234-
artifact_url = artifact.url
2235-
2236-
# Some artifact "URLs" will be in invalid forms, which urllib (rightly)
2237-
# won't parse. Modify these URLs so they will parse properly.
2238-
# Examples:
2239-
# Cookie: ".example.com",
2240-
# Preferences (cookie_controls_metadata): "https://[*.]example.com"
2241-
prefixes = ('.', 'https://[*.]', 'http://[*.]')
2242-
2243-
for prefix in prefixes:
2244-
if artifact_url.startswith(prefix):
2245-
artifact_url = 'http://' + artifact_url[len(prefix):]
2246-
2247-
if artifact_url.endswith(',*'):
2248-
artifact_url = artifact_url[:-2]
2249-
2250-
try:
2251-
domain = urllib.parse.urlparse(artifact_url).hostname
2252-
except ValueError as e:
2253-
log.warning(f'Error when parsing domain from {artifact_url}; {e}')
2254-
continue
2255-
2256-
# Some URLs don't have a domain, like local PDF files
2257-
if domain:
2258-
domains.add(domain)
2231+
domains = self.get_clean_hostnames()
22592232

22602233
for domain in domains:
22612234

pyhindsight/browsers/webbrowser.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,17 +111,48 @@ def dict_factory(cursor, row):
111111
d[col[0]] = row[idx]
112112
return d
113113

114-
def build_md5_hash_list_of_origins(self):
114+
def get_clean_hostnames(self):
115+
hostnames = set()
115116
for artifact in self.parsed_artifacts:
116-
if isinstance(artifact, self.HistoryItem):
117+
if not isinstance(artifact, self.HistoryItem) or not artifact.url:
118+
continue
119+
120+
# Some artifact "URLs", often parsed from Preferences, are two
121+
# origins combined, so split them into two.
122+
# Example from Preferences (3pcd_heuristics_grants):
123+
# "https://[*.]lnkd.in,https://[*.]linkedin.com"
124+
artifact_urls = artifact.url.split(',')
125+
126+
for artifact_url in artifact_urls:
127+
# Some artifact "URLs" will be in invalid forms, which urllib (rightly)
128+
# won't parse. Modify these URLs so they will parse properly.
129+
# Examples:
130+
# Cookie: ".example.com",
131+
# Preferences (cookie_controls_metadata): "https://[*.]example.com"
132+
prefixes = ('.', 'https://[*.]', 'http://[*.]')
133+
134+
for prefix in prefixes:
135+
if artifact_url.startswith(prefix):
136+
artifact_url = 'https://' + artifact_url[len(prefix):]
137+
138+
if artifact_url.endswith(',*'):
139+
artifact_url = artifact_url[:-2]
140+
117141
try:
118-
domain = urllib.parse.urlparse(artifact.url).hostname
142+
hostname = urllib.parse.urlparse(artifact_url).hostname
119143
except ValueError as e:
120-
log.warning(f'Error when parsing domain from {artifact.url}; {e}')
144+
log.warning(f'Error when parsing domain from {artifact_url}; {e}')
121145
continue
146+
122147
# Some URLs don't have a domain, like local PDF files
123-
if domain:
124-
self.origin_hashes[hashlib.md5(domain.encode()).hexdigest()] = domain
148+
if hostname:
149+
hostnames.add(hostname)
150+
return hostnames
151+
152+
def build_md5_hash_list_of_origins(self):
153+
domains = self.get_clean_hostnames()
154+
for domain in domains:
155+
self.origin_hashes[hashlib.md5(domain.encode()).hexdigest()] = domain
125156

126157
class HistoryItem(object):
127158
def __init__(self, item_type, timestamp, profile, url=None, name=None, value=None, interpretation=None):

0 commit comments

Comments
 (0)