Skip to content

Commit 87c3578

Browse files
committed
[issue_508] add methods to remove duplicated items from list properties
Signed-off-by: Meret Behrens <[email protected]>
1 parent d9a2e01 commit 87c3578

File tree

7 files changed

+155
-9
lines changed

7 files changed

+155
-9
lines changed

src/spdx/document_utils.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# SPDX-FileCopyrightText: 2022 spdx contributors
22
#
33
# SPDX-License-Identifier: Apache-2.0
4-
from typing import Dict, List, Union
4+
from copy import deepcopy
5+
from typing import Any, Dict, List, Union
56

67
from spdx.model.document import Document
78
from spdx.model.file import File
@@ -29,3 +30,30 @@ def get_contained_spdx_elements(document: Document) -> Dict[str, Union[Package,
2930
contained_spdx_elements.update({snippet.spdx_id: snippet for snippet in document.snippets})
3031

3132
return contained_spdx_elements
33+
34+
35+
def create_document_without_duplicates(document: Document) -> Document:
36+
document_without_duplicates = deepcopy(document)
37+
for elements in [
38+
[document_without_duplicates.creation_info],
39+
document_without_duplicates.files,
40+
document_without_duplicates.packages,
41+
document_without_duplicates.snippets,
42+
document_without_duplicates.extracted_licensing_info,
43+
]:
44+
for element in elements:
45+
for key, value in element.__dict__.items():
46+
if isinstance(value, list):
47+
value_without_duplicates = create_list_without_duplicates(value)
48+
setattr(element, key, value_without_duplicates)
49+
50+
return document_without_duplicates
51+
52+
53+
def create_list_without_duplicates(list_with_potential_duplicates: List[Any]) -> List[Any]:
54+
list_without_duplicates = []
55+
for element in list_with_potential_duplicates:
56+
if element not in list_without_duplicates:
57+
list_without_duplicates.append(deepcopy(element))
58+
59+
return list_without_duplicates

src/spdx/writer/json/json_writer.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,19 @@
44
import json
55
from typing import List
66

7+
from spdx.document_utils import create_document_without_duplicates
78
from spdx.jsonschema.document_converter import DocumentConverter
89
from spdx.model.document import Document
910
from spdx.validation.document_validator import validate_full_spdx_document
1011
from spdx.validation.validation_message import ValidationMessage
1112

1213

1314
def write_document_to_file(
14-
document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None
15+
document: Document,
16+
file_name: str,
17+
validate: bool = True,
18+
converter: DocumentConverter = None,
19+
drop_duplicates: bool = True,
1520
):
1621
"""
1722
Serializes the provided document to json and writes it to a file with the provided name. Unless validate is set
@@ -22,6 +27,8 @@ def write_document_to_file(
2227
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
2328
if validation_messages:
2429
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
30+
if drop_duplicates:
31+
document = create_document_without_duplicates(document)
2532
if converter is None:
2633
converter = DocumentConverter()
2734
document_dict = converter.convert(document)

src/spdx/writer/rdf/rdf_writer.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from rdflib import DOAP, Graph
77
from rdflib.compare import to_isomorphic
88

9+
from spdx.document_utils import create_document_without_duplicates
910
from spdx.model.document import Document
1011
from spdx.rdfschema.namespace import POINTER_NAMESPACE, SPDX_NAMESPACE
1112
from spdx.validation.document_validator import validate_full_spdx_document
@@ -19,12 +20,13 @@
1920
from spdx.writer.rdf.snippet_writer import add_snippet_to_graph
2021

2122

22-
def write_document_to_file(document: Document, file_name: str, validate: bool):
23+
def write_document_to_file(document: Document, file_name: str, validate: bool, drop_duplicates: bool = True):
2324
if validate:
2425
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
2526
if validation_messages:
2627
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
27-
28+
if drop_duplicates:
29+
document = create_document_without_duplicates(document)
2830
graph = Graph()
2931
doc_namespace = document.creation_info.document_namespace
3032
external_doc_ref_to_namespace: Dict[str, str] = {

src/spdx/writer/tagvalue/tagvalue_writer.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# limitations under the License.
1111
from typing import List, TextIO
1212

13+
from spdx.document_utils import create_document_without_duplicates
1314
from spdx.model.document import Document
1415
from spdx.validation.document_validator import validate_full_spdx_document
1516
from spdx.validation.validation_message import ValidationMessage
@@ -29,11 +30,13 @@
2930
)
3031

3132

32-
def write_document_to_file(document: Document, file_name: str, validate: bool = True):
33+
def write_document_to_file(document: Document, file_name: str, validate: bool = True, drop_duplicates: bool = True):
3334
if validate:
3435
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
3536
if validation_messages:
3637
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
38+
if drop_duplicates:
39+
document = create_document_without_duplicates(document)
3740

3841
with open(file_name, "w") as out:
3942
write_document(document, out)

src/spdx/writer/xml/xml_writer.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,19 @@
55

66
import xmltodict
77

8+
from spdx.document_utils import create_document_without_duplicates
89
from spdx.jsonschema.document_converter import DocumentConverter
910
from spdx.model.document import Document
1011
from spdx.validation.document_validator import validate_full_spdx_document
1112
from spdx.validation.validation_message import ValidationMessage
1213

1314

1415
def write_document_to_file(
15-
document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None
16+
document: Document,
17+
file_name: str,
18+
validate: bool = True,
19+
converter: DocumentConverter = None,
20+
drop_duplicates: bool = True,
1621
):
1722
"""
1823
Serializes the provided document to XML and writes it to a file with the provided name. Unless validate is set
@@ -23,6 +28,9 @@ def write_document_to_file(
2328
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
2429
if validation_messages:
2530
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
31+
if drop_duplicates:
32+
document = create_document_without_duplicates(document)
33+
2634
if converter is None:
2735
converter = DocumentConverter()
2836
document_dict = {"Document": converter.convert(document)}

src/spdx/writer/yaml/yaml_writer.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,19 @@
55

66
import yaml
77

8+
from spdx.document_utils import create_document_without_duplicates
89
from spdx.jsonschema.document_converter import DocumentConverter
910
from spdx.model.document import Document
1011
from spdx.validation.document_validator import validate_full_spdx_document
1112
from spdx.validation.validation_message import ValidationMessage
1213

1314

1415
def write_document_to_file(
15-
document: Document, file_name: str, validate: bool = True, converter: DocumentConverter = None
16+
document: Document,
17+
file_name: str,
18+
validate: bool = True,
19+
converter: DocumentConverter = None,
20+
drop_duplicates: bool = True,
1621
):
1722
"""
1823
Serializes the provided document to yaml and writes it to a file with the provided name. Unless validate is set
@@ -23,6 +28,8 @@ def write_document_to_file(
2328
validation_messages: List[ValidationMessage] = validate_full_spdx_document(document)
2429
if validation_messages:
2530
raise ValueError(f"Document is not valid. The following errors were detected: {validation_messages}")
31+
if drop_duplicates:
32+
document = create_document_without_duplicates(document)
2633
if converter is None:
2734
converter = DocumentConverter()
2835
document_dict = converter.convert(document)

tests/spdx/test_document_utils.py

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,28 @@
55

66
import pytest
77

8-
from spdx.document_utils import get_contained_spdx_element_ids, get_contained_spdx_elements, get_element_from_spdx_id
9-
from tests.spdx.fixtures import document_fixture, file_fixture, package_fixture, snippet_fixture
8+
from spdx.document_utils import (
9+
create_document_without_duplicates,
10+
create_list_without_duplicates,
11+
get_contained_spdx_element_ids,
12+
get_contained_spdx_elements,
13+
get_element_from_spdx_id,
14+
)
15+
from spdx.model.file import FileType
16+
from spdx.model.spdx_no_assertion import SpdxNoAssertion
17+
from spdx.model.spdx_none import SpdxNone
18+
from tests.spdx.fixtures import (
19+
actor_fixture,
20+
checksum_fixture,
21+
creation_info_fixture,
22+
document_fixture,
23+
external_document_ref_fixture,
24+
external_package_ref_fixture,
25+
extracted_licensing_info_fixture,
26+
file_fixture,
27+
package_fixture,
28+
snippet_fixture,
29+
)
1030

1131

1232
@pytest.fixture
@@ -34,3 +54,74 @@ def test_get_contained_spdx_elements(variables):
3454
assert contained_elements[package.spdx_id] == package
3555
assert contained_elements[file.spdx_id] == file
3656
assert contained_elements[snippet.spdx_id] == snippet
57+
58+
59+
def test_create_list_without_duplicates():
60+
list_with_duplicates = [1, 2, 3, 5, 1, 67, 9, 67]
61+
62+
list_without_duplicates = create_list_without_duplicates(list_with_duplicates)
63+
64+
assert list_without_duplicates == [1, 2, 3, 5, 67, 9]
65+
66+
67+
def test_create_document_without_duplicates():
68+
document = document_fixture(
69+
creation_info=creation_info_fixture(
70+
creators=[actor_fixture(name="creatorName"), actor_fixture(name="creatorName")],
71+
external_document_refs=[external_document_ref_fixture(), external_document_ref_fixture()],
72+
),
73+
packages=[
74+
package_fixture(
75+
checksums=[checksum_fixture(), checksum_fixture()],
76+
license_info_from_files=[SpdxNoAssertion(), SpdxNoAssertion()],
77+
external_references=[external_package_ref_fixture(), external_package_ref_fixture()],
78+
attribution_texts=["duplicated text", "duplicated text"],
79+
)
80+
],
81+
files=[
82+
file_fixture(
83+
checksums=[checksum_fixture(), checksum_fixture()],
84+
file_types=[FileType.TEXT, FileType.TEXT],
85+
license_info_in_file=[SpdxNoAssertion(), SpdxNoAssertion()],
86+
contributors=["duplicated contributor", "duplicated contributor"],
87+
attribution_texts=["duplicated text", "duplicated text"],
88+
)
89+
],
90+
snippets=[
91+
snippet_fixture(
92+
license_info_in_snippet=[SpdxNone(), SpdxNone()],
93+
attribution_texts=["duplicated text", "duplicated text"],
94+
)
95+
],
96+
extracted_licensing_info=[
97+
extracted_licensing_info_fixture(cross_references=["duplicated reference", "duplicated reference"])
98+
],
99+
)
100+
expected_document = document_fixture(
101+
creation_info=creation_info_fixture(
102+
creators=[actor_fixture(name="creatorName")], external_document_refs=[external_document_ref_fixture()]
103+
),
104+
packages=[
105+
package_fixture(
106+
checksums=[checksum_fixture()],
107+
license_info_from_files=[SpdxNoAssertion()],
108+
external_references=[external_package_ref_fixture()],
109+
attribution_texts=["duplicated text"],
110+
)
111+
],
112+
files=[
113+
file_fixture(
114+
checksums=[checksum_fixture()],
115+
file_types=[FileType.TEXT],
116+
license_info_in_file=[SpdxNoAssertion()],
117+
contributors=["duplicated contributor"],
118+
attribution_texts=["duplicated text"],
119+
)
120+
],
121+
snippets=[snippet_fixture(license_info_in_snippet=[SpdxNone()], attribution_texts=["duplicated text"])],
122+
extracted_licensing_info=[extracted_licensing_info_fixture(cross_references=["duplicated reference"])],
123+
)
124+
125+
document_without_duplicates = create_document_without_duplicates(document)
126+
127+
assert document_without_duplicates == expected_document

0 commit comments

Comments
 (0)