From c2ab865f701b5858aa30feba5a9cda0e6062e514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Armin=20T=C3=A4nzer?= Date: Wed, 1 Feb 2023 09:25:30 +0100 Subject: [PATCH 1/3] [issue-374] implement license expression validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Armin Tänzer --- .../license_expression_validator.py | 51 ++++++++-- src/spdx/validation/validation_message.py | 1 + .../test_license_expression_validator.py | 92 +++++++++++++++++-- 3 files changed, 129 insertions(+), 15 deletions(-) diff --git a/src/spdx/validation/license_expression_validator.py b/src/spdx/validation/license_expression_validator.py index 0d6d0e687..0beafb86c 100644 --- a/src/spdx/validation/license_expression_validator.py +++ b/src/spdx/validation/license_expression_validator.py @@ -11,25 +11,58 @@ from typing import List, Optional, Union -from license_expression import LicenseExpression +from license_expression import LicenseExpression, get_spdx_licensing, ExpressionError, ExpressionParseError +from spdx.model.document import Document + from spdx.model.spdx_no_assertion import SpdxNoAssertion from spdx.model.spdx_none import SpdxNone -from spdx.validation.validation_message import ValidationMessage +from spdx.validation.validation_message import ValidationMessage, ValidationContext, SpdxElementType def validate_license_expressions(license_expressions: Optional[ - Union[List[LicenseExpression], SpdxNoAssertion, SpdxNone]]) -> List[ValidationMessage]: + Union[List[LicenseExpression], SpdxNoAssertion, SpdxNone]], document: Document, parent_id: str) -> List[ValidationMessage]: if license_expressions in [SpdxNoAssertion(), SpdxNone(), None]: return [] - error_messages = [] + context = ValidationContext(parent_id=parent_id, element_type=SpdxElementType.LICENSE_EXPRESSION, full_element=license_expressions) + validation_messages = [] for license_expression in license_expressions: - error_messages.extend(validate_license_expression(license_expression)) + validation_messages.extend(validate_license_expression(license_expression, document, parent_id, context)) + + return validation_messages + + +def validate_license_expression(license_expression: Optional[ + Union[LicenseExpression, SpdxNoAssertion, SpdxNone]], document: Document, parent_id: str, context: ValidationContext = None) -> List[ValidationMessage]: + if license_expression in [SpdxNoAssertion(), SpdxNone(), None]: + return [] + + if context is None: + context = ValidationContext(parent_id=parent_id, element_type=SpdxElementType.LICENSE_EXPRESSION, full_element=license_expression) + + validation_messages = [] + license_ref_ids: List[str] = [license_ref.license_id for license_ref in document.extracted_licensing_info] - return error_messages + for non_spdx_token in get_spdx_licensing().validate(license_expression).invalid_symbols: + if non_spdx_token not in license_ref_ids: + validation_messages.append( + ValidationMessage( + f"Unrecognized license reference: {non_spdx_token}. license_expression must only use IDs from the license list or extracted licensing info, but is: {license_expression}", + context) + ) + try: + get_spdx_licensing().parse(str(license_expression), validate=True, strict=True) + except ExpressionParseError as err: + validation_messages.append( + ValidationMessage( + f"{err}. for license_expression: {license_expression}", + context) + ) + except ExpressionError: + # This happens for invalid symbols but the error provides only a string of these. On the other hand, + # get_spdx_licensing().validate() gives an actual list of invalid symbols, so this is handled above. + pass -def validate_license_expression(license_expression: LicenseExpression) -> List[ValidationMessage]: - # TODO: implement this once we have a better license expression model: https://github.com/spdx/tools-python/issues/374 - return [] + return validation_messages diff --git a/src/spdx/validation/validation_message.py b/src/spdx/validation/validation_message.py index bf1490126..a86fc2aa7 100644 --- a/src/spdx/validation/validation_message.py +++ b/src/spdx/validation/validation_message.py @@ -15,6 +15,7 @@ class SpdxElementType(Enum): + LICENSE_EXPRESSION = auto() PACKAGE_VERIFICATION_CODE = auto() EXTERNAL_DOCUMENT_REF = auto() CHECKSUM = auto() diff --git a/tests/spdx/validation/test_license_expression_validator.py b/tests/spdx/validation/test_license_expression_validator.py index 458a98acc..b6b89ba25 100644 --- a/tests/spdx/validation/test_license_expression_validator.py +++ b/tests/spdx/validation/test_license_expression_validator.py @@ -10,14 +10,94 @@ # limitations under the License. from typing import List +from unittest import TestCase -from license_expression import Licensing -from spdx.validation.license_expression_validator import validate_license_expression -from spdx.validation.validation_message import ValidationMessage +import pytest +from license_expression import get_spdx_licensing, LicenseExpression +from spdx.model.document import Document +from spdx.model.spdx_no_assertion import SpdxNoAssertion +from spdx.model.spdx_none import SpdxNone +from spdx.validation.license_expression_validator import validate_license_expression, validate_license_expressions +from spdx.validation.validation_message import ValidationMessage, ValidationContext, SpdxElementType +from tests.spdx.fixtures import document_fixture, extracted_licensing_info_fixture -def test_valid_license_expression(): - license_expression = Licensing().parse("something") - validation_messages: List[ValidationMessage] = validate_license_expression(license_expression) +FIXTURE_LICENSE_ID = extracted_licensing_info_fixture().license_id + + +@pytest.mark.parametrize("expression_string", + ["MIT", FIXTURE_LICENSE_ID, + f"GPL-2.0-only with GPL-CC-1.0 and {FIXTURE_LICENSE_ID} with 389-exception or Beerware"]) +def test_valid_license_expression(expression_string): + document: Document = document_fixture() + license_expression: LicenseExpression = get_spdx_licensing().parse(expression_string) + validation_messages: List[ValidationMessage] = validate_license_expression(license_expression, document, + parent_id="SPDXRef-File") + + assert validation_messages == [] + + +@pytest.mark.parametrize("expression", [SpdxNone(), SpdxNoAssertion()]) +def test_none_and_no_assertion(expression): + document: Document = document_fixture() + validation_messages: List[ValidationMessage] = validate_license_expression(expression, document, + parent_id="SPDXRef-File") + assert validation_messages == [] + + +@pytest.mark.parametrize("expression_list", + [SpdxNone(), SpdxNoAssertion(), + [get_spdx_licensing().parse("MIT and GPL-3.0-only"), + get_spdx_licensing().parse(FIXTURE_LICENSE_ID)] + ]) +def test_valid_license_expressions(expression_list): + document: Document = document_fixture() + validation_messages: List[ValidationMessage] = validate_license_expressions(expression_list, document, + parent_id="SPDXRef-File") assert validation_messages == [] + + +@pytest.mark.parametrize("expression_string, unknown_symbols", + [(f"{FIXTURE_LICENSE_ID} or LicenseRef-22", ["LicenseRef-22"]), + ("nope with 389-exception and _.- or LicenseRef-10", ["nope", "_.-", "LicenseRef-10"]) + ]) +def test_invalid_license_expression_with_unknown_symbols(expression_string, unknown_symbols): + document: Document = document_fixture() + license_expression: LicenseExpression = get_spdx_licensing().parse(expression_string) + parent_id = "SPDXRef-File" + context = ValidationContext(parent_id=parent_id, element_type=SpdxElementType.LICENSE_EXPRESSION, + full_element=license_expression) + + validation_messages: List[ValidationMessage] = validate_license_expression(license_expression, document, parent_id) + expected_messages = [ValidationMessage( + f"Unrecognized license reference: {symbol}. license_expression must only use IDs from the license list or extracted licensing info, but is: {license_expression}", + context + ) for symbol in unknown_symbols] + + TestCase().assertCountEqual(validation_messages, expected_messages) + + +@pytest.mark.parametrize("expression_string, expected_message", + [("MIT with MIT", + 'A plain license symbol cannot be used as an exception in a "WITH symbol" statement. for token: "MIT" at position: 9. for license_expression: MIT WITH MIT'), + (f"GPL-2.0-or-later and {FIXTURE_LICENSE_ID} with {FIXTURE_LICENSE_ID}", + f'A plain license symbol cannot be used as an exception in a "WITH symbol" statement. for token: "{FIXTURE_LICENSE_ID}" at position: 39. for license_expression: GPL-2.0-or-later AND {FIXTURE_LICENSE_ID} WITH {FIXTURE_LICENSE_ID}'), + (f"GPL-2.0-or-later with MIT and {FIXTURE_LICENSE_ID} with GPL-2.0-or-later", + f'A plain license symbol cannot be used as an exception in a "WITH symbol" statement. for token: "MIT" at position: 22. for license_expression: GPL-2.0-or-later WITH MIT AND {FIXTURE_LICENSE_ID} WITH GPL-2.0-or-later'), + ("389-exception with 389-exception", + 'A license exception symbol can only be used as an exception in a "WITH exception" statement. for token: "389-exception". for license_expression: 389-exception WITH 389-exception'), + ("389-exception with MIT", + 'A license exception symbol can only be used as an exception in a "WITH exception" statement. for token: "389-exception". for license_expression: 389-exception WITH MIT'), + ]) +def test_invalid_license_expression_with_invalid_exceptions(expression_string, expected_message): + document: Document = document_fixture() + license_expression: LicenseExpression = get_spdx_licensing().parse(expression_string) + parent_id = "SPDXRef-File" + context = ValidationContext(parent_id=parent_id, element_type=SpdxElementType.LICENSE_EXPRESSION, + full_element=license_expression) + + validation_messages: List[ValidationMessage] = validate_license_expression(license_expression, document, parent_id) + expected_messages = [ValidationMessage(expected_message, context)] + + assert validation_messages == expected_messages From af7a9d4745b518e5256d7732215a4e5833a9292d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Armin=20T=C3=A4nzer?= Date: Thu, 23 Feb 2023 13:05:13 +0100 Subject: [PATCH 2/3] [issue-374] adapt validate_license_expression calls to new usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Armin Tänzer --- src/spdx/validation/file_validator.py | 8 +++---- src/spdx/validation/package_validator.py | 30 ++++++++++++------------ src/spdx/validation/snippet_validator.py | 8 +++---- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/spdx/validation/file_validator.py b/src/spdx/validation/file_validator.py index a21f28fd7..745131bdc 100644 --- a/src/spdx/validation/file_validator.py +++ b/src/spdx/validation/file_validator.py @@ -41,6 +41,10 @@ def validate_file_within_document(file: File, spdx_version: str, document: Docum for message in validate_spdx_id(file.spdx_id, document): validation_messages.append(ValidationMessage(message, context)) + validation_messages.extend(validate_license_expression(file.license_concluded, document, file.spdx_id)) + + validation_messages.extend(validate_license_expressions(file.license_info_in_file, document, file.spdx_id)) + validation_messages.extend(validate_file(file, spdx_version, context)) return validation_messages @@ -67,10 +71,6 @@ def validate_file(file: File, spdx_version: str, context: Optional[ValidationCon validation_messages.extend(validate_checksums(file.checksums, file.spdx_id, spdx_version)) - validation_messages.extend(validate_license_expression(file.license_concluded)) - - validation_messages.extend(validate_license_expressions(file.license_info_in_file)) - if spdx_version == "SPDX-2.2": if file.license_concluded is None: validation_messages.append( diff --git a/src/spdx/validation/package_validator.py b/src/spdx/validation/package_validator.py index 888911002..4cd850fc8 100644 --- a/src/spdx/validation/package_validator.py +++ b/src/spdx/validation/package_validator.py @@ -61,6 +61,21 @@ def validate_package_within_document(package: Package, spdx_version: str, docume context) ) + validation_messages.extend(validate_license_expression(package.license_concluded, document, package.spdx_id)) + + license_info_from_files = package.license_info_from_files + if license_info_from_files: + if not package.files_analyzed: + validation_messages.append( + ValidationMessage( + f"license_info_from_files must be None if files_analyzed is False, but is: {license_info_from_files}", + context) + ) + else: + validation_messages.extend(validate_license_expressions(license_info_from_files, document, package.spdx_id)) + + validation_messages.extend(validate_license_expression(package.license_declared, document, package.spdx_id)) + validation_messages.extend(validate_package(package, spdx_version, context)) return validation_messages @@ -94,21 +109,6 @@ def validate_package(package: Package, spdx_version: str, context: Optional[Vali validation_messages.extend(validate_checksums(package.checksums, package.spdx_id, spdx_version)) - validation_messages.extend(validate_license_expression(package.license_concluded)) - - license_info_from_files = package.license_info_from_files - if license_info_from_files: - if not package.files_analyzed: - validation_messages.append( - ValidationMessage( - f"license_info_from_files must be None if files_analyzed is False, but is: {license_info_from_files}", - context) - ) - else: - validation_messages.extend(validate_license_expressions(license_info_from_files)) - - validation_messages.extend(validate_license_expression(package.license_declared)) - validation_messages.extend( validate_external_package_refs(package.external_references, package.spdx_id, spdx_version)) diff --git a/src/spdx/validation/snippet_validator.py b/src/spdx/validation/snippet_validator.py index 21d2e0d44..90a110fa3 100644 --- a/src/spdx/validation/snippet_validator.py +++ b/src/spdx/validation/snippet_validator.py @@ -46,6 +46,10 @@ def validate_snippet_within_document(snippet: Snippet, spdx_version: str, docume for message in messages: validation_messages.append(ValidationMessage(message, context)) + validation_messages.extend(validate_license_expression(snippet.license_concluded, document, snippet.spdx_id)) + + validation_messages.extend(validate_license_expressions(snippet.license_info_in_snippet, document, snippet.spdx_id)) + validation_messages.extend(validate_snippet(snippet, spdx_version, context)) return validation_messages @@ -86,10 +90,6 @@ def validate_snippet(snippet: Snippet, spdx_version: str, context: Optional[Vali context) ) - validation_messages.extend(validate_license_expression(snippet.license_concluded)) - - validation_messages.extend(validate_license_expressions(snippet.license_info_in_snippet)) - if spdx_version == "SPDX-2.2": if snippet.license_concluded is None: validation_messages.append( From d02d82e9b9bade6d2f43057c0453996228d1ad6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Armin=20T=C3=A4nzer?= Date: Mon, 27 Feb 2023 11:56:47 +0100 Subject: [PATCH 3/3] [issue-374, review] more descriptive comments, remove unused LICENSE from Enum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Armin Tänzer --- src/spdx/validation/license_expression_validator.py | 8 +++++--- src/spdx/validation/validation_message.py | 1 - 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/spdx/validation/license_expression_validator.py b/src/spdx/validation/license_expression_validator.py index 0beafb86c..f2c8ddfbf 100644 --- a/src/spdx/validation/license_expression_validator.py +++ b/src/spdx/validation/license_expression_validator.py @@ -38,7 +38,7 @@ def validate_license_expression(license_expression: Optional[ if license_expression in [SpdxNoAssertion(), SpdxNone(), None]: return [] - if context is None: + if not context: context = ValidationContext(parent_id=parent_id, element_type=SpdxElementType.LICENSE_EXPRESSION, full_element=license_expression) validation_messages = [] @@ -55,14 +55,16 @@ def validate_license_expression(license_expression: Optional[ try: get_spdx_licensing().parse(str(license_expression), validate=True, strict=True) except ExpressionParseError as err: + # This error is raised when an exception symbol is used as a license symbol and vice versa. + # So far, it only catches the first such error in the provided string. validation_messages.append( ValidationMessage( f"{err}. for license_expression: {license_expression}", context) ) except ExpressionError: - # This happens for invalid symbols but the error provides only a string of these. On the other hand, - # get_spdx_licensing().validate() gives an actual list of invalid symbols, so this is handled above. + # This error is raised for invalid symbols within the license_expression, but it provides only a string of these. + # On the other hand, get_spdx_licensing().validate() gives an actual list of invalid symbols, so this is handled above. pass return validation_messages diff --git a/src/spdx/validation/validation_message.py b/src/spdx/validation/validation_message.py index a86fc2aa7..70001b78a 100644 --- a/src/spdx/validation/validation_message.py +++ b/src/spdx/validation/validation_message.py @@ -26,7 +26,6 @@ class SpdxElementType(Enum): PACKAGE = auto() FILE = auto() SNIPPET = auto() - LICENSE = auto() ANNOTATION = auto() RELATIONSHIP = auto() EXTRACTED_LICENSING_INFO = auto()