diff --git a/src/spdx_tools/spdx/parser/rdf/file_parser.py b/src/spdx_tools/spdx/parser/rdf/file_parser.py index 18512fe6c..83e61b4a5 100644 --- a/src/spdx_tools/spdx/parser/rdf/file_parser.py +++ b/src/spdx_tools/spdx/parser/rdf/file_parser.py @@ -1,7 +1,9 @@ # SPDX-FileCopyrightText: 2023 spdx contributors # # SPDX-License-Identifier: Apache-2.0 -from rdflib import RDFS, Graph, URIRef +from typing import Union + +from rdflib import RDFS, BNode, Graph, URIRef from spdx_tools.spdx.model import File, FileType from spdx_tools.spdx.parser.logger import Logger @@ -23,7 +25,7 @@ from spdx_tools.spdx.rdfschema.namespace import SPDX_NAMESPACE -def parse_file(file_node: URIRef, graph: Graph, doc_namespace: str) -> File: +def parse_file(file_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> File: logger = Logger() spdx_id = parse_spdx_id(file_node, doc_namespace, graph) name = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.fileName) diff --git a/src/spdx_tools/spdx/parser/rdf/graph_parsing_functions.py b/src/spdx_tools/spdx/parser/rdf/graph_parsing_functions.py index ef1a505db..257367595 100644 --- a/src/spdx_tools/spdx/parser/rdf/graph_parsing_functions.py +++ b/src/spdx_tools/spdx/parser/rdf/graph_parsing_functions.py @@ -86,8 +86,8 @@ def parse_enum_value(enum_str: str, enum_class: Type[Enum], prefix: str) -> Enum raise SPDXParsingError([f"Invalid value for {enum_class}: {enum_str}"]) -def parse_spdx_id(resource: URIRef, doc_namespace: str, graph: Graph) -> Optional[str]: - if not resource: +def parse_spdx_id(resource: Union[URIRef, BNode], doc_namespace: str, graph: Graph) -> Optional[str]: + if not resource or isinstance(resource, BNode): return None if resource.startswith(f"{doc_namespace}#"): return resource.fragment diff --git a/src/spdx_tools/spdx/parser/rdf/package_parser.py b/src/spdx_tools/spdx/parser/rdf/package_parser.py index 1cb6a72e4..a0de0e904 100644 --- a/src/spdx_tools/spdx/parser/rdf/package_parser.py +++ b/src/spdx_tools/spdx/parser/rdf/package_parser.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023 spdx contributors # # SPDX-License-Identifier: Apache-2.0 -from typing import Optional +from typing import Optional, Union from rdflib import DOAP, RDFS, Graph, URIRef from rdflib.term import BNode @@ -34,7 +34,7 @@ from spdx_tools.spdx.rdfschema.namespace import REFERENCE_NAMESPACE, SPDX_NAMESPACE -def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Package: +def parse_package(package_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> Package: logger = Logger() spdx_id = parse_spdx_id(package_node, doc_namespace, graph) name = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.name) @@ -120,7 +120,6 @@ def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Pac valid_until_date = parse_literal( logger, graph, package_node, SPDX_NAMESPACE.validUntilDate, parsing_method=datetime_from_str ) - raise_parsing_error_if_logger_has_messages(logger, "Package") package = construct_or_raise_parsing_error( Package, diff --git a/src/spdx_tools/spdx/parser/rdf/snippet_parser.py b/src/spdx_tools/spdx/parser/rdf/snippet_parser.py index b26d6cdfc..e59076654 100644 --- a/src/spdx_tools/spdx/parser/rdf/snippet_parser.py +++ b/src/spdx_tools/spdx/parser/rdf/snippet_parser.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023 spdx contributors # # SPDX-License-Identifier: Apache-2.0 -from typing import Dict, Optional, Tuple +from typing import Dict, Optional, Tuple, Union from rdflib import RDF, RDFS, Graph from rdflib.exceptions import UniquenessError @@ -27,7 +27,7 @@ from spdx_tools.spdx.rdfschema.namespace import POINTER_NAMESPACE, SPDX_NAMESPACE -def parse_snippet(snippet_node: URIRef, graph: Graph, doc_namespace: str) -> Snippet: +def parse_snippet(snippet_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> Snippet: logger = Logger() spdx_id = parse_spdx_id(snippet_node, doc_namespace, graph) file_spdx_id_uri = get_value_from_graph( diff --git a/tests/spdx/parser/rdf/data/invalid_documents/file_without_spdx_ids.xml b/tests/spdx/parser/rdf/data/invalid_documents/file_without_spdx_ids.xml new file mode 100644 index 000000000..23fe6c7de --- /dev/null +++ b/tests/spdx/parser/rdf/data/invalid_documents/file_without_spdx_ids.xml @@ -0,0 +1,53 @@ + + + + documentComment + documentName + + + + 3.19 + 2022-12-01T00:00:00Z + + + + + packageName + http://differentdownload.com + + + + + + 71c4025dd9897b364f3ebbb42c484ff43d00791c + + + ./fileName.py + + + + + + + 1 + + + + + + + 2 + + + + + + + + + diff --git a/tests/spdx/parser/rdf/test_file_parser.py b/tests/spdx/parser/rdf/test_file_parser.py index fb24ed0da..3fe36c267 100644 --- a/tests/spdx/parser/rdf/test_file_parser.py +++ b/tests/spdx/parser/rdf/test_file_parser.py @@ -4,10 +4,12 @@ import os from unittest import TestCase +import pytest from license_expression import get_spdx_licensing -from rdflib import RDF, Graph, URIRef +from rdflib import RDF, BNode, Graph, URIRef from spdx_tools.spdx.model import Checksum, ChecksumAlgorithm, FileType, SpdxNoAssertion +from spdx_tools.spdx.parser.error import SPDXParsingError from spdx_tools.spdx.parser.rdf.file_parser import parse_file from spdx_tools.spdx.rdfschema.namespace import SPDX_NAMESPACE @@ -35,3 +37,18 @@ def test_parse_file(): assert file.license_comment == "licenseComment" assert file.notice == "fileNotice" assert file.attribution_texts == ["fileAttributionText"] + + +def test_parse_invalid_file(): + graph = Graph().parse(os.path.join(os.path.dirname(__file__), "data/invalid_documents/file_without_spdx_ids.xml")) + file_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.File) + doc_namespace = "https://some.namespace" + + assert isinstance(file_node, BNode) + with pytest.raises(SPDXParsingError) as err: + parse_file(file_node, graph, doc_namespace) + + assert err.value.get_messages() == [ + "Error while constructing File: ['SetterError File: type of argument " + '"spdx_id" must be str; got NoneType instead: None\']' + ] diff --git a/tests/spdx/parser/rdf/test_package_parser.py b/tests/spdx/parser/rdf/test_package_parser.py index f6c968073..9d2bcfc1f 100644 --- a/tests/spdx/parser/rdf/test_package_parser.py +++ b/tests/spdx/parser/rdf/test_package_parser.py @@ -18,6 +18,7 @@ PackageVerificationCode, SpdxNoAssertion, ) +from spdx_tools.spdx.parser.error import SPDXParsingError from spdx_tools.spdx.parser.rdf.package_parser import parse_external_package_ref, parse_package from spdx_tools.spdx.rdfschema.namespace import SPDX_NAMESPACE @@ -95,3 +96,18 @@ def test_external_package_ref_parser(download_location, category, locator, type, assert external_package_ref.locator == locator assert external_package_ref.reference_type == type assert external_package_ref.comment == comment + + +def test_parse_invalid_package(): + graph = Graph().parse(os.path.join(os.path.dirname(__file__), "data/invalid_documents/file_without_spdx_ids.xml")) + package_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.Package) + doc_namespace = "https://some.namespace" + + assert isinstance(package_node, BNode) + with pytest.raises(SPDXParsingError) as err: + parse_package(package_node, graph, doc_namespace) + + assert err.value.get_messages() == [ + "Error while constructing Package: ['SetterError Package: type of argument " + '"spdx_id" must be str; got NoneType instead: None\']' + ] diff --git a/tests/spdx/parser/rdf/test_snippet_parser.py b/tests/spdx/parser/rdf/test_snippet_parser.py index 1b4022827..73970035b 100644 --- a/tests/spdx/parser/rdf/test_snippet_parser.py +++ b/tests/spdx/parser/rdf/test_snippet_parser.py @@ -159,3 +159,19 @@ def add_range_to_graph_helper(graph, predicate_value_class_member): graph.add((pointer_node, RDF.type, pointer_class)) graph.add((start_end_pointer, predicate, pointer_node)) graph.add((pointer_node, pointer_member, Literal(value))) + + +def test_parse_invalid_file(): + graph = Graph().parse(os.path.join(os.path.dirname(__file__), "data/invalid_documents/file_without_spdx_ids.xml")) + snippet_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.Snippet) + doc_namespace = "https://some.namespace" + + assert isinstance(snippet_node, BNode) + with pytest.raises(SPDXParsingError) as err: + parse_snippet(snippet_node, graph, doc_namespace) + + assert err.value.get_messages() == [ + "Error while constructing Snippet: ['SetterError Snippet: type of argument " + "\"spdx_id\" must be str; got NoneType instead: None', 'SetterError Snippet: " + 'type of argument "file_spdx_id" must be str; got NoneType instead: None\']' + ]