From f17034142fe0f91edc8de3ebb0613f8165032a82 Mon Sep 17 00:00:00 2001 From: Patrick Titzler Date: Mon, 21 Nov 2022 11:21:00 -0800 Subject: [PATCH 1/2] Reject binary property input files Signed-off-by: Patrick Titzler Signed-off-by: Patrick Titzler --- elyra/pipeline/validation.py | 22 +++++ elyra/tests/pipeline/test_validation.py | 109 ++++++++++++++++++++++-- 2 files changed, 123 insertions(+), 8 deletions(-) diff --git a/elyra/pipeline/validation.py b/elyra/pipeline/validation.py index acce8bebe..b3a1c223f 100644 --- a/elyra/pipeline/validation.py +++ b/elyra/pipeline/validation.py @@ -447,6 +447,7 @@ async def _validate_generic_node_properties(self, node: Node, response: Validati property_name="dependencies", filename=dependency, response=response, + binary_file_ok=True, ) async def _validate_custom_component_node_properties( @@ -532,6 +533,7 @@ async def _validate_custom_component_node_properties( property_name=default_parameter, filename=filename, response=response, + binary_file_ok=False, # reject files that are not UTF encoded ) elif self._is_required_property(component_property_dict, default_parameter): response.add_message( @@ -667,6 +669,7 @@ def _validate_filepath( filename: str, response: ValidationResponse, file_dir: Optional[str] = "", + binary_file_ok: bool = True, ) -> None: """ Checks the file structure, paths and existence of pipeline dependencies. @@ -677,6 +680,7 @@ def _validate_filepath( :param filename: the name of the file or directory to verify :param response: ValidationResponse containing the issue list to be updated :param file_dir: the dir path of the where the pipeline file resides in the elyra workspace + :param binary_file_ok: whether to reject binary files """ file_dir = file_dir or self.root_dir @@ -724,6 +728,24 @@ def _validate_filepath( "value": normalized_path, }, ) + elif not binary_file_ok: + # Validate that the file is utf-8 encoded by trying to read it + # as text file + try: + with open(normalized_path, "r") as fh: + fh.read() + except UnicodeDecodeError: + response.add_message( + severity=ValidationSeverity.Error, + message_type="invalidFileType", + message="Property was assigned a file that is not unicode encoded.", + data={ + "nodeID": node_id, + "nodeName": node_label, + "propertyName": property_name, + "value": normalized_path, + }, + ) def _validate_label(self, node_id: str, node_label: str, response: ValidationResponse) -> None: """ diff --git a/elyra/tests/pipeline/test_validation.py b/elyra/tests/pipeline/test_validation.py index 9ef313b7d..dcdb8b5ac 100644 --- a/elyra/tests/pipeline/test_validation.py +++ b/elyra/tests/pipeline/test_validation.py @@ -15,6 +15,8 @@ # import os +from pathlib import Path +import pickle from conftest import AIRFLOW_TEST_OPERATOR_CATALOG from conftest import KFP_COMPONENT_CACHE_INSTANCE @@ -38,6 +40,7 @@ from elyra.pipeline.pipeline_definition import PipelineDefinition from elyra.pipeline.validation import PipelineValidationManager from elyra.pipeline.validation import ValidationResponse +from elyra.pipeline.validation import ValidationSeverity from elyra.tests.pipeline.util import _read_pipeline_resource @@ -59,6 +62,41 @@ def validation_manager(setup_factory_data, component_cache): PipelineValidationManager.clear_instance() +@pytest.fixture() +def pvm(request, component_cache, tmp_path): + yield PipelineValidationManager.instance(root_dir=str(tmp_path)) + PipelineValidationManager.clear_instance() + + +@pytest.fixture() +def dummy_text_file(tmp_path) -> Path: + """ + Create a text file in tmp_path, which contains dummy data. + """ + dummy_file = tmp_path / "text_file.txt" + assert not dummy_file.exists() + with open(dummy_file, "w") as fh: + fh.write("1,2,3,4,5\n") + fh.write("6,7,8,9,10\n") + yield dummy_file + # cleanup + dummy_file.unlink() + + +@pytest.fixture() +def dummy_binary_file(tmp_path) -> Path: + """ + Create a binary file in tmp_path, which contains dummy data. + """ + dummy_file = tmp_path / "binary_file.bin" + assert not dummy_file.exists() + with open(dummy_file, "wb") as fh: + pickle.dump({"key": "value"}, fh) + yield dummy_file + # cleanup + dummy_file.unlink() + + async def test_invalid_lower_pipeline_version(validation_manager, load_pipeline): pipeline, response = load_pipeline("generic_basic_pipeline_only_notebook.pipeline") pipeline_version = PIPELINE_CURRENT_VERSION - 1 @@ -356,26 +394,81 @@ def test_invalid_node_property_dependency_filepath_non_existent(validation_manag assert issues[0]["data"]["nodeID"] == node["id"] -def test_valid_node_property_dependency_filepath(validation_manager): +def test_validate_filepath(pvm, dummy_text_file: Path, dummy_binary_file: Path): + """ + Test function: PipelineValidationManager._validate_filepath + Scope: validate binary_file_ok function parameter + """ response = ValidationResponse() - valid_filename = os.path.join( - os.path.dirname(__file__), "resources/validation_pipelines/generic_single_cycle.pipeline" - ) + node = {"id": "test-id", "app_data": {"label": "test"}} property_name = "test-property" - validation_manager._validate_filepath( + # Test scenario 1: text files and binary files are valid dependencies + # for generic components ('binary_file_ok' is explicitly set to True) + for file_dependency in [dummy_text_file, dummy_binary_file]: + pvm._validate_filepath( + node_id=node["id"], + file_dir=str(file_dependency.parent), + property_name=property_name, + node_label=node["app_data"]["label"], + filename=str(file_dependency), + response=response, + binary_file_ok=True, + ) + + assert not response.has_fatal, response.to_json() + assert not response.to_json().get("issues") + + # Test scenario 2: text files and binary files are valid dependencies + # for generic components (use default for 'binary_file_ok' ) + for file_dependency in [dummy_text_file, dummy_binary_file]: + pvm._validate_filepath( + node_id=node["id"], + file_dir=str(file_dependency.parent), + property_name=property_name, + node_label=node["app_data"]["label"], + filename=str(file_dependency), + response=response, + ) + + assert not response.has_fatal, response.to_json() + assert not response.to_json().get("issues") + + # Test scenario 3: text files are valid input for 'file' widgets + # for custom components ('binary_file_ok' is explicitly set to False) + pvm._validate_filepath( node_id=node["id"], - file_dir=os.getcwd(), + file_dir=str(dummy_text_file.parent), property_name=property_name, node_label=node["app_data"]["label"], - filename=valid_filename, + filename=str(dummy_text_file), response=response, + binary_file_ok=False, ) - assert not response.has_fatal + assert not response.has_fatal, response.to_json() assert not response.to_json().get("issues") + # Test scenario 3: binary files are invalid input for 'file' widgets + # for custom components + pvm._validate_filepath( + node_id=node["id"], + file_dir=str(dummy_binary_file.parent), + property_name=property_name, + node_label=node["app_data"]["label"], + filename=str(dummy_binary_file), + response=response, + binary_file_ok=False, + ) + + response_json = response.to_json() + assert response.has_fatal, response_json + assert response_json["issues"][0]["severity"] == ValidationSeverity.Error + assert response_json["issues"][0]["type"] == "invalidFileType" + assert "Property was assigned a file that is not unicode encoded." in response_json["issues"][0]["message"] + assert str(dummy_binary_file) in response_json["issues"][0]["data"]["value"] + async def test_valid_node_property_pipeline_filepath(monkeypatch, validation_manager, load_pipeline): pipeline, response = load_pipeline("generic_basic_filepath_check.pipeline") From 50b254600041c6f3da34e5a09361ff34a5bf7714 Mon Sep 17 00:00:00 2001 From: Patrick Titzler Date: Wed, 23 Nov 2022 10:22:50 -0800 Subject: [PATCH 2/2] Update elyra/tests/pipeline/test_validation.py Co-authored-by: Kiersten Stokes Signed-off-by: Patrick Titzler --- elyra/tests/pipeline/test_validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elyra/tests/pipeline/test_validation.py b/elyra/tests/pipeline/test_validation.py index dcdb8b5ac..3ba25bba3 100644 --- a/elyra/tests/pipeline/test_validation.py +++ b/elyra/tests/pipeline/test_validation.py @@ -450,7 +450,7 @@ def test_validate_filepath(pvm, dummy_text_file: Path, dummy_binary_file: Path): assert not response.has_fatal, response.to_json() assert not response.to_json().get("issues") - # Test scenario 3: binary files are invalid input for 'file' widgets + # Test scenario 4: binary files are invalid input for 'file' widgets # for custom components pvm._validate_filepath( node_id=node["id"],