Skip to content
12 changes: 12 additions & 0 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1909,6 +1909,18 @@ def path_pattern(self, pattern):
"""Resources with a path that match the provided ``pattern``."""
return self.filter(path__regex=posix_regex_to_django_regex_lookup(pattern))

def path_patterns(self, patterns, ignore=False):
"""Resources with a path that match the provided ``pattern``."""
lookups = Q()
for resource_pattern in patterns:
lookups |= Q(
**{"path__regex": posix_regex_to_django_regex_lookup(resource_pattern)}
)
if ignore:
return self.filter(~lookups)
else:
return self.filter(lookups)

def has_directory_content_fingerprint(self):
"""
Resources that have the key `directory_content` set in the `extra_data`
Expand Down
208 changes: 140 additions & 68 deletions scanpipe/pipes/d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from collections import defaultdict
from contextlib import suppress
from pathlib import Path
from re import match as regex_match
from typing import NamedTuple

from django.contrib.postgres.aggregates.general import ArrayAgg
from django.core.exceptions import MultipleObjectsReturned
Expand All @@ -43,6 +45,7 @@
from scanpipe import pipes
from scanpipe.models import CodebaseRelation
from scanpipe.models import CodebaseResource
from scanpipe.models import posix_regex_to_django_regex_lookup
from scanpipe.pipes import LoopProgress
from scanpipe.pipes import flag
from scanpipe.pipes import get_resource_diff_ratio
Expand Down Expand Up @@ -774,94 +777,163 @@ def _map_javascript_resource(
resource.update(status=flag.MAPPED)


def _map_about_file_resource(project, about_file_resource, to_resources):
about_file_location = str(about_file_resource.location_path)
package_data = resolve.resolve_about_package(about_file_location)
class AboutFileIndexes(NamedTuple):
regex_by_about_path: dict
ignore_regex_by_about_path: dict
about_resources_by_path: dict
about_pkgdata_by_path: dict
mapped_resources_by_aboutpath: dict

@classmethod
def create_indexes(cls, project, from_about_files):
"""Return an ABOUT file index or None."""
about_pkgdata_by_path = {}
regex_by_about_path = {}
ignore_regex_by_about_path = {}
about_resources_by_path = {}
mapped_resources_by_aboutpath = {}

for about_file_resource in from_about_files:
package_data = resolve.resolve_about_package(
input_location=str(about_file_resource.location_path)
)
error_message_details = {
"path": about_file_resource.path,
"package_data": package_data,
}
if not package_data:
project.add_error(
description="Cannot create package from ABOUT file",
model="map_about_files",
details=error_message_details,
)
continue

error_message_details = {
"path": about_file_resource.path,
"package_data": package_data,
}
if not package_data:
project.add_error(
description="Cannot create package from ABOUT file",
model="map_about_files",
details=error_message_details,
about_pkgdata_by_path[about_file_resource.path] = package_data
files_pattern = package_data.get("filename")
if not files_pattern:
# Cannot map anything without the about_resource value.
project.add_error(
description="ABOUT file does not have about_resource",
model="map_about_files",
details=error_message_details,
)
continue
else:
regex = posix_regex_to_django_regex_lookup(files_pattern)
regex_by_about_path[about_file_resource.path] = regex

if extra_data := package_data.get("extra_data"):
ignore_regex = []
for pattern in extra_data.get("ignored_resources", []):
ignore_regex.append(posix_regex_to_django_regex_lookup(pattern))
if ignore_regex:
ignore_regex_by_about_path[about_file_resource.path] = ignore_regex

about_resources_by_path[about_file_resource.path] = about_file_resource
mapped_resources_by_aboutpath[about_file_resource.path] = []

return cls(
about_pkgdata_by_path=about_pkgdata_by_path,
regex_by_about_path=regex_by_about_path,
ignore_regex_by_about_path=ignore_regex_by_about_path,
about_resources_by_path=about_resources_by_path,
mapped_resources_by_aboutpath=mapped_resources_by_aboutpath,
)
return

filename = package_data.get("filename")
if not filename:
# Cannot map anything without the about_resource value.
project.add_error(
description="ABOUT file does not have about_resource",
model="map_about_files",
details=error_message_details,
)
return
def match_to_resources(self, to_resources):
for to_resource in to_resources:
resource_matched = False
for about_path, regex_pattern in self.regex_by_about_path.items():
if regex_match(pattern=regex_pattern, string=to_resource.path):
resource_matched = True
break

ignored_resources = []
if extra_data := package_data.get("extra_data"):
ignored_resources = extra_data.get("ignored_resources")

# Fetch all resources that are covered by the .ABOUT file.
codebase_resources = to_resources.filter(path__contains=f"/{filename.lstrip('/')}")
if not codebase_resources:
# If there's nothing to map on the ``to/`` do not create the package.
project.add_warning(
description=(
"Resource paths listed at about_resource is not found"
" in the to/ codebase"
),
model="map_about_files",
details=error_message_details,
)
return
if not resource_matched:
continue

# Ignore resources for paths in `ignored_resources` attribute
if ignored_resources:
lookups = Q()
for resource_path in ignored_resources:
lookups |= Q(**{"path__contains": resource_path})
codebase_resources = codebase_resources.filter(~lookups)
ignore_regex_patterns = self.ignore_regex_by_about_path.get(about_path, [])
ignore_resource = False
for ignore_regex_pattern in ignore_regex_patterns:
if regex_match(pattern=ignore_regex_pattern, string=to_resource.path):
ignore_resource = True
break

# Create the Package using .ABOUT data and assigned related codebase_resources
pipes.update_or_create_package(project, package_data, codebase_resources)
if ignore_resource:
continue

# Map the .ABOUT file resource to all related resources in the ``to/`` side.
for to_resource in codebase_resources:
pipes.make_relation(
from_resource=about_file_resource,
to_resource=to_resource,
map_type="about_file",
)
mapped_resources_about = self.mapped_resources_by_aboutpath.get(about_path)
if mapped_resources_about:
mapped_resources_about.append(to_resource)
else:
self.mapped_resources_by_aboutpath[about_path] = [to_resource]
to_resource.update(status=flag.ABOUT_MAPPED)

def create_about_packages_relations(self, project):
for about_path, mapped_resources in self.mapped_resources_by_aboutpath.items():
about_file_resource = self.about_resources_by_path[about_path]
package_data = self.about_pkgdata_by_path[about_file_resource.path]

if not mapped_resources:
error_message_details = {
"path": about_file_resource.path,
"package_data": package_data,
}
project.add_warning(
description=(
"Resource paths listed at about_resource is not found"
" in the to/ codebase"
),
model="map_about_files",
details=error_message_details,
)
continue

codebase_resources.update(status=flag.ABOUT_MAPPED)
about_file_resource.update(status=flag.ABOUT_MAPPED)
# Create the Package using .ABOUT data and assign related codebase_resources
pipes.update_or_create_package(project, package_data, mapped_resources)

# Map the .ABOUT file resource to all related resources in the ``to/`` side.
for mapped_resource in mapped_resources:
pipes.make_relation(
from_resource=about_file_resource,
to_resource=mapped_resource,
map_type="about_file",
)

about_file_resource.update(status=flag.ABOUT_MAPPED)

for about_file_resource in self.about_resources_by_path.values():
about_file_companions = (
about_file_resource.siblings()
.filter(name__startswith=about_file_resource.name_without_extension)
.filter(extension__in=[".LICENSE", ".NOTICE"])
)
about_file_companions.update(status=flag.ABOUT_MAPPED)


def map_about_files(project, logger=None):
"""Map ``from/`` .ABOUT files to their related ``to/`` resources."""
project_resources = project.codebaseresources
from_files = project_resources.files().from_codebase()
from_about_files = from_files.filter(extension=".ABOUT")
to_resources = project_resources.to_codebase()
from_about_files = (
project_resources.files().from_codebase().filter(extension=".ABOUT")
)
if not from_about_files.exists():
return

indexes = AboutFileIndexes.create_indexes(
project=project, from_about_files=from_about_files
)

to_resources = project_resources.to_codebase().no_status()

if logger:
logger(
f"Mapping {from_about_files.count():,d} .ABOUT files found in the from/ "
f"codebase."
)

for about_file_resource in from_about_files:
_map_about_file_resource(project, about_file_resource, to_resources)

about_file_companions = (
about_file_resource.siblings()
.filter(name__startswith=about_file_resource.name_without_extension)
.filter(extension__in=[".LICENSE", ".NOTICE"])
)
about_file_companions.update(status=flag.ABOUT_MAPPED)
indexes.match_to_resources(to_resources)
indexes.create_about_packages_relations(project)


def map_javascript_post_purldb_match(project, logger=None):
Expand Down
20 changes: 10 additions & 10 deletions scanpipe/tests/data/d2d/about_files/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
"source_packages": [],
"extra_data": {
"ignored_resources": [
"flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/ConfigurationProvider.class"
"*flume-ng-node-*.jar-extract/org/apache/flume/node/ConfigurationProvider.class"
]
},
"package_uid": "",
Expand Down Expand Up @@ -167,9 +167,9 @@
"status": "not-deployed",
"tag": "from",
"extension": ".ABOUT",
"md5": "04c25308d59068db649ebfd5d8103338",
"sha1": "9625fc925a01cfa22b6e1ce083a1b1802c2ce78c",
"sha256": "46142b274cda38e9c183001b58a72c8fc442b2162ae85e9a453a2c7a1a86d427",
"md5": "4f8ac19bc3661bbaac91fb8652b6c4cb",
"sha1": "ac27839cc3010c96b34796be83bc2e6b6bc1882b",
"sha256": "055133491484d680991ed98ba1f791e88f4552955e15d9c5b3b14c5d46b4ee16",
"sha512": "",
"programming_language": "",
"is_binary": false,
Expand Down Expand Up @@ -198,9 +198,9 @@
"status": "about-mapped",
"tag": "from",
"extension": ".ABOUT",
"md5": "b1d5c62c364d4470557bfba7d0338758",
"sha1": "828f79d9fc0619a5b869c46a54b10ee3573a00bb",
"sha256": "de514210e135dddffb6ace69aa5fe27e1873146e05eeb5b05c6de1f8c00b0010",
"md5": "c7fab493a90ebf247954e1e30582ba8f",
"sha1": "3090f7d036bf68c5421364ac03a094715348f9de",
"sha256": "71f10662c0806de172a04a07fffbb3d22bd72ddacef4188849a480afd4e46849",
"sha512": "",
"programming_language": "",
"is_binary": false,
Expand Down Expand Up @@ -229,9 +229,9 @@
"status": "about-mapped",
"tag": "from",
"extension": ".LICENSE",
"md5": "2b42edef8fa55315f34f2370b4715ca9",
"sha1": "58853eb8199b5afe72a73a25fd8cf8c94285174b",
"sha256": "43070e2d4e532684de521b885f385d0841030efa2b1a20bafb76133a5e1379c1",
"md5": "94c82ae800466538d15278d6be4feedc",
"sha1": "3837fdbc9d942bcd1c5f2d419148e944f7ce996a",
"sha256": "a4da19948e6906fa8af95a258b9a354f641adc6215956f0ec63f429a10f0f603",
"sha512": "",
"programming_language": "",
"is_binary": false,
Expand Down
Binary file modified scanpipe/tests/data/d2d/about_files/from-with-about-file.zip
Binary file not shown.
2 changes: 2 additions & 0 deletions scanpipe/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1553,6 +1553,7 @@ def test_scanpipe_codebase_resource_queryset_path_pattern(self):
make_resource_file(self.project1, path="dir/.example")
make_resource_file(self.project1, path="dir/subdir/readme.html")
make_resource_file(self.project1, path="foo$.class")
make_resource_file(self.project1, path="example-1.0.jar")

patterns = [
"example",
Expand All @@ -1569,6 +1570,7 @@ def test_scanpipe_codebase_resource_queryset_path_pattern(self):
"dir/*/readme.*",
r"*$.class",
"*readme.htm?",
"example-*.jar",
]

for pattern in patterns:
Expand Down