From d7138974c3b1185b894cdaf00dc5a273a8ef129c Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Mon, 26 Jun 2023 16:43:55 -0600 Subject: [PATCH 01/11] deps: ceil vcrpy --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 4c72681c8..aac75430c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,9 @@ test = [ "types-orjson~=3.6", "types-python-dateutil~=2.8", "types-urllib3~=1.26", + # pytest-recording breakage with v5.0.0, need release of + # https://github.com/kiwicom/pytest-recording/pull/110 to remove this ceil + "vcrpy<5", ] urllib3 = ["urllib3>=1.26"] validation = ["jsonschema>=4.0.1"] From e2b0a721ac4b95ccf5a455b844cfd23191613467 Mon Sep 17 00:00:00 2001 From: jpolchlo Date: Wed, 21 Jun 2023 15:53:41 -0400 Subject: [PATCH 02/11] First attempt improving the link serialization for non-standard link types --- pystac/link.py | 13 +++++++++---- pystac/stac_object.py | 23 +++++++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/pystac/link.py b/pystac/link.py index 89ebc7c56..d4adb573a 100644 --- a/pystac/link.py +++ b/pystac/link.py @@ -183,10 +183,15 @@ def get_href(self, transform_href: bool = True) -> Optional[str]: *pystac.EXTENSION_HOOKS.get_extended_object_links(self.owner), ] # if a hierarchical link with an owner and root, and relative catalog - if root and root.is_relative() and self.rel in rel_links: - owner_href = self.owner.get_self_href() - if owner_href is not None: - href = make_relative_href(href, owner_href) + if root and root.is_relative(): + if self.rel in rel_links: + owner_href = self.owner.get_self_href() + if owner_href is not None: + href = make_relative_href(href, owner_href) + elif self.target in root.get_target_hierarchy(): + owner_href = self.owner.get_self_href() + if owner_href is not None: + href = make_relative_href(href, owner_href) return href diff --git a/pystac/stac_object.py b/pystac/stac_object.py index b79b32716..311b8e864 100644 --- a/pystac/stac_object.py +++ b/pystac/stac_object.py @@ -10,6 +10,7 @@ Iterable, List, Optional, + Set, Type, TypeVar, Union, @@ -136,6 +137,28 @@ def remove_hierarchical_links(self, add_canonical: bool = False) -> List[Link]: self.links = keep return remove + def get_target_hierarchy(self) -> Set[Union[str, STACObject]]: + """Recursively collects all the targets referred to by the hierarchical + links of the current STACObject. + + Returns: + Set[Union[str, STACObject]]: All encountered targets + """ + + def traverse( + obj: Union[str, STACObject], visited: Set[Union[str, STACObject]] + ) -> Set[Union[str, STACObject]]: + if isinstance(obj, str): + return visited + + hierarchical_links = [link for link in obj.links if link.is_hierarchical()] + new_targets = set([link.target for link in hierarchical_links]) - visited + for target in new_targets: + visited = traverse(target, visited.union(set([target]))) + return visited + + return traverse(self, set([self])) + def get_single_link( self, rel: Optional[Union[str, pystac.RelType]] = None, From 96fac9869f9ebd51b7cb87ae1f4e30c31a7f6953 Mon Sep 17 00:00:00 2001 From: jpolchlo Date: Wed, 21 Jun 2023 16:54:23 -0400 Subject: [PATCH 03/11] Update test --- tests/test_item.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_item.py b/tests/test_item.py index 82fe681fe..f6618daff 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -514,8 +514,8 @@ def test_add_derived_from(test_case_1_catalog: Catalog) -> None: link for link in item_0.links if link.rel == pystac.RelType.DERIVED_FROM ] assert len(filtered) == 2 - assert filtered[0].to_dict()["href"] == item_1.self_href - assert filtered[1].to_dict()["href"] == item_2.self_href + assert filtered[0].to_dict(transform_href=False)["href"] == item_1.self_href + assert filtered[1].to_dict(transform_href=False)["href"] == item_2.self_href def test_get_unresolvable_derived_from(test_case_1_catalog: Catalog) -> None: From 13e6b70eecfa998cd68870cade24b4ace87b549b Mon Sep 17 00:00:00 2001 From: jpolchlo Date: Fri, 23 Jun 2023 11:07:24 -0400 Subject: [PATCH 04/11] Small refactor --- pystac/link.py | 6 +----- pystac/stac_object.py | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pystac/link.py b/pystac/link.py index d4adb573a..7cbed30e8 100644 --- a/pystac/link.py +++ b/pystac/link.py @@ -184,11 +184,7 @@ def get_href(self, transform_href: bool = True) -> Optional[str]: ] # if a hierarchical link with an owner and root, and relative catalog if root and root.is_relative(): - if self.rel in rel_links: - owner_href = self.owner.get_self_href() - if owner_href is not None: - href = make_relative_href(href, owner_href) - elif self.target in root.get_target_hierarchy(): + if self.rel in rel_links or root.target_in_hierarchy(self.target): owner_href = self.owner.get_self_href() if owner_href is not None: href = make_relative_href(href, owner_href) diff --git a/pystac/stac_object.py b/pystac/stac_object.py index 311b8e864..db0e6f32f 100644 --- a/pystac/stac_object.py +++ b/pystac/stac_object.py @@ -137,7 +137,7 @@ def remove_hierarchical_links(self, add_canonical: bool = False) -> List[Link]: self.links = keep return remove - def get_target_hierarchy(self) -> Set[Union[str, STACObject]]: + def target_in_hierarchy(self, target: Union[str, STACObject]) -> bool: """Recursively collects all the targets referred to by the hierarchical links of the current STACObject. @@ -157,7 +157,7 @@ def traverse( visited = traverse(target, visited.union(set([target]))) return visited - return traverse(self, set([self])) + return target in traverse(self, set([self])) def get_single_link( self, From de09e90d2c865218e67a47c4fea50cdabf0bd607 Mon Sep 17 00:00:00 2001 From: jpolchlo Date: Fri, 23 Jun 2023 14:04:39 -0400 Subject: [PATCH 05/11] Fixup formatting error --- pystac/validation/local_validator.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pystac/validation/local_validator.py b/pystac/validation/local_validator.py index 2f6c1bbf6..fc6212fe7 100644 --- a/pystac/validation/local_validator.py +++ b/pystac/validation/local_validator.py @@ -16,7 +16,10 @@ ITEM_SCHEMA_URI = ( f"https://schemas.stacspec.org/v{VERSION}/item-spec/json-schema/item.json" ) -COLLECTION_SCHEMA_URI = f"https://schemas.stacspec.org/v{VERSION}/collection-spec/json-schema/collection.json" +COLLECTION_SCHEMA_URI = ( + f"https://schemas.stacspec.org/v{VERSION}/" + "collection-spec/json-schema/collection.json" +) CATALOG_SCHEMA_URI = ( f"https://schemas.stacspec.org/v{VERSION}/catalog-spec/json-schema/catalog.json" ) From 386e86272617418d55a5d7dda4826a98abd0fed8 Mon Sep 17 00:00:00 2001 From: jpolchlo Date: Fri, 23 Jun 2023 14:05:15 -0400 Subject: [PATCH 06/11] Add test to demonstrate proper function of non-hierarchical relative links --- tests/test_item.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_item.py b/tests/test_item.py index f6618daff..b5bd843ca 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -607,3 +607,19 @@ def test_resolve_collection_with_root( root = read_collection.get_root() assert root assert root.id == "root" + + +def test_non_hierarchical_relative_link() -> None: + root = pystac.Catalog("root", "root") + a = pystac.Catalog("a", "a") + b = pystac.Catalog("b", "b") + + root.add_child(a) + root.add_child(b) + a.add_link(pystac.Link("related", b)) + + root.catalog_type = pystac.catalog.CatalogType.SELF_CONTAINED + root.normalize_hrefs("test_output") + related_href = [link for link in a.links if link.rel == "related"][0].get_href() + + assert related_href is not None and not is_absolute_href(related_href) From 821a53d763718eabd57d859ad490cd851ad22203 Mon Sep 17 00:00:00 2001 From: jpolchlo Date: Mon, 26 Jun 2023 16:37:27 -0400 Subject: [PATCH 07/11] Make hierarchy search more efficient --- pystac/stac_object.py | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/pystac/stac_object.py b/pystac/stac_object.py index db0e6f32f..9a8a00a18 100644 --- a/pystac/stac_object.py +++ b/pystac/stac_object.py @@ -138,26 +138,41 @@ def remove_hierarchical_links(self, add_canonical: bool = False) -> List[Link]: return remove def target_in_hierarchy(self, target: Union[str, STACObject]) -> bool: - """Recursively collects all the targets referred to by the hierarchical - links of the current STACObject. + """Determine if target lin is somewhere in the hierarchical link tree of + a STACObject. + + Args: + target: A string or STACObject describing the target to search for Returns: - Set[Union[str, STACObject]]: All encountered targets + bool: Returns True if the target was found in the hierarchical link tree + for the current STACObject """ def traverse( obj: Union[str, STACObject], visited: Set[Union[str, STACObject]] - ) -> Set[Union[str, STACObject]]: + ) -> bool: + if obj == target: + return True if isinstance(obj, str): - return visited + return False + + new_targets = [ + link.target + for link in obj.links + if link.is_hierarchical() and link.target not in visited + ] + if target in new_targets: + return True + + for subtree in new_targets: + visited.add(subtree) + if traverse(subtree, visited): + return True - hierarchical_links = [link for link in obj.links if link.is_hierarchical()] - new_targets = set([link.target for link in hierarchical_links]) - visited - for target in new_targets: - visited = traverse(target, visited.union(set([target]))) - return visited + return False - return target in traverse(self, set([self])) + return traverse(self, set([self])) def get_single_link( self, From 36cb09cec8929d288971f52a2feb1ad3de164476 Mon Sep 17 00:00:00 2001 From: jpolchlo Date: Mon, 26 Jun 2023 17:28:54 -0400 Subject: [PATCH 08/11] Improve test coverage --- tests/test_item.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_item.py b/tests/test_item.py index b5bd843ca..dc3a26b84 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -617,9 +617,15 @@ def test_non_hierarchical_relative_link() -> None: root.add_child(a) root.add_child(b) a.add_link(pystac.Link("related", b)) + b.add_link( + pystac.Link("item", TestCases.get_path("data-files/item/sample-item.json")) + ) root.catalog_type = pystac.catalog.CatalogType.SELF_CONTAINED root.normalize_hrefs("test_output") related_href = [link for link in a.links if link.rel == "related"][0].get_href() assert related_href is not None and not is_absolute_href(related_href) + assert a.target_in_hierarchy(b) + assert root.target_in_hierarchy(next(b.get_items())) + assert root.target_in_hierarchy(root) From ed1f4462326269e254ee4100a1e3ac39e82e07da Mon Sep 17 00:00:00 2001 From: jpolchlo Date: Tue, 27 Jun 2023 10:43:58 -0400 Subject: [PATCH 09/11] Use cassette for test --- .../test_non_hierarchical_relative_link.yaml | 131 ++++++++++++++++++ tests/test_item.py | 1 + 2 files changed, 132 insertions(+) create mode 100644 tests/cassettes/test_item/test_non_hierarchical_relative_link.yaml diff --git a/tests/cassettes/test_item/test_non_hierarchical_relative_link.yaml b/tests/cassettes/test_item/test_non_hierarchical_relative_link.yaml new file mode 100644 index 000000000..fa20e4204 --- /dev/null +++ b/tests/cassettes/test_item/test_non_hierarchical_relative_link.yaml @@ -0,0 +1,131 @@ +interactions: +- request: + body: null + headers: + Connection: + - close + Host: + - raw.githubusercontent.com + User-Agent: + - Python-urllib/3.8 + method: GET + uri: https://raw.githubusercontent.com/radiantearth/stac-spec/v0.8.1/collection-spec/examples/sentinel2.json + response: + body: + string: "{\n \"stac_version\": \"0.8.1\",\n \"stac_extensions\": [],\n \"id\": + \"COPERNICUS/S2\",\n \"title\": \"Sentinel-2 MSI: MultiSpectral Instrument, + Level-1C\",\n \"description\": \"Sentinel-2 is a wide-swath, high-resolution, + multi-spectral\\nimaging mission supporting Copernicus Land Monitoring studies,\\nincluding + the monitoring of vegetation, soil and water cover,\\nas well as observation + of inland waterways and coastal areas.\\n\\nThe Sentinel-2 data contain 13 + UINT16 spectral bands representing\\nTOA reflectance scaled by 10000. See + the [Sentinel-2 User Handbook](https://sentinel.esa.int/documents/247904/685211/Sentinel-2_User_Handbook)\\nfor + details. In addition, three QA bands are present where one\\n(QA60) is a bitmask + band with cloud mask information. For more\\ndetails, [see the full explanation + of how cloud masks are computed.](https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-2-msi/level-1c/cloud-masks)\\n\\nEach + Sentinel-2 product (zip archive) may contain multiple\\ngranules. Each granule + becomes a separate Earth Engine asset.\\nEE asset ids for Sentinel-2 assets + have the following format:\\nCOPERNICUS/S2/20151128T002653_20151128T102149_T56MNN. + Here the\\nfirst numeric part represents the sensing date and time, the\\nsecond + numeric part represents the product generation date and\\ntime, and the final + 6-character string is a unique granule identifier\\nindicating its UTM grid + reference (see [MGRS](https://en.wikipedia.org/wiki/Military_Grid_Reference_System)).\\n\\nFor + more details on Sentinel-2 radiometric resoltuon, [see this page](https://earth.esa.int/web/sentinel/user-guides/sentinel-2-msi/resolutions/radiometric).\\n\",\n + \ \"license\": \"proprietary\",\n \"keywords\": [\n \"copernicus\",\n + \ \"esa\",\n \"eu\",\n \"msi\",\n \"radiance\",\n \"sentinel\"\n + \ ],\n \"providers\": [\n {\n \"name\": \"European Union/ESA/Copernicus\",\n + \ \"roles\": [\n \"producer\",\n \"licensor\"\n ],\n + \ \"url\": \"https://sentinel.esa.int/web/sentinel/user-guides/sentinel-2-msi\"\n + \ }\n ],\n \"extent\": {\n \"spatial\": {\n \"bbox\": [\n [\n + \ -180,\n -56,\n 180,\n 83\n ]\n + \ ]\n },\n \"temporal\": {\n \"interval\": [\n [\n \"2015-06-23T00:00:00Z\",\n + \ null\n ]\n ]\n }\n },\n\n \"summaries\": {\n \"datetime\": + \ {\n \"min\": \"2015-06-23T00:00:00Z\",\n \"max\": \"2019-07-10T13:44:56Z\"\n + \ },\n \"sci:citation\": [\"Copernicus Sentinel data [Year]\"],\n \"eo:gsd\": + [10,30,60],\n \"eo:platform\": [\"sentinel-2a\",\"sentinel-2b\"],\n \"eo:constellation\": + [\"sentinel-2\"],\n \"eo:instrument\": [\"msi\"],\n \"eo:off_nadir\": + {\n \"min\": 0.0,\n \"max\": 100\n },\n \"eo:sun_elevation\": + {\n \"min\": 6.78,\n \"max\": 89.9\n },\n \"eo:epsg\": [32601,32602,32603,32604,32605,32606,32607,32608,32609,32610,32611,32612,32613,32614,32615,32616,32617,32618,32619,32620,32621,32622,32623,32624,32625,32626,32627,32628,32629,32630,32631,32632,32633,32634,32635,32636,32637,32638,32639,32640,32641,32642,32643,32644,32645,32646,32647,32648,32649,32650,32651,32652,32653,32654,32655,32656,32657,32658,32659,32660],\n + \ \"eo:bands\": [\n [\n {\n \"name\": \"B1\",\n \"common_name\": + \"coastal\",\n \"center_wavelength\": 4.439,\n \"gsd\": + 60\n },\n {\n \"name\": \"B2\",\n \"common_name\": + \"blue\",\n \"center_wavelength\": 4.966,\n \"gsd\": 10\n + \ },\n {\n \"name\": \"B3\",\n \"common_name\": + \"green\",\n \"center_wavelength\": 5.6,\n \"gsd\": 10\n + \ },\n {\n \"name\": \"B4\",\n \"common_name\": + \"red\",\n \"center_wavelength\": 6.645,\n \"gsd\": 10\n + \ },\n {\n \"name\": \"B5\",\n \"center_wavelength\": + 7.039,\n \"gsd\": 20\n },\n {\n \"name\": + \"B6\",\n \"center_wavelength\": 7.402,\n \"gsd\": 20\n + \ },\n {\n \"name\": \"B7\",\n \"center_wavelength\": + 7.825,\n \"gsd\": 20\n },\n {\n \"name\": + \"B8\",\n \"common_name\": \"nir\",\n \"center_wavelength\": + 8.351,\n \"gsd\": 10\n },\n {\n \"name\": + \"B8A\",\n \"center_wavelength\": 8.648,\n \"gsd\": 20\n + \ },\n {\n \"name\": \"B9\",\n \"center_wavelength\": + 9.45,\n \"gsd\": 60\n },\n {\n \"name\": \"B10\",\n + \ \"center_wavelength\": 1.3735,\n \"gsd\": 60\n },\n + \ {\n \"name\": \"B11\",\n \"common_name\": \"swir16\",\n + \ \"center_wavelength\": 1.6137,\n \"gsd\": 20\n },\n + \ {\n \"name\": \"B12\",\n \"common_name\": \"swir22\",\n + \ \"center_wavelength\": 2.2024,\n \"gsd\": 20\n }\n + \ ]\n ]\n },\n \"links\": [\n {\n \"rel\": \"self\",\n \"href\": + \"https://storage.cloud.google.com/earthengine-test/catalog/COPERNICUS_S2.json\"\n + \ },\n {\n \"rel\": \"parent\",\n \"href\": \"https://storage.cloud.google.com/earthengine-test/catalog/catalog.json\"\n + \ },\n {\n \"rel\": \"root\",\n \"href\": \"https://storage.cloud.google.com/earthengine-test/catalog/catalog.json\"\n + \ },\n {\n \"rel\": \"license\",\n \"href\": \"https://scihub.copernicus.eu/twiki/pub/SciHubWebPortal/TermsConditions/Sentinel_Data_Terms_and_Conditions.pdf\",\n + \ \"title\": \"Legal notice on the use of Copernicus Sentinel Data and + Service Information\"\n }\n ]\n}" + headers: + Accept-Ranges: + - bytes + Access-Control-Allow-Origin: + - '*' + Cache-Control: + - max-age=300 + Connection: + - close + Content-Length: + - '5364' + Content-Security-Policy: + - default-src 'none'; style-src 'unsafe-inline'; sandbox + Content-Type: + - text/plain; charset=utf-8 + Cross-Origin-Resource-Policy: + - cross-origin + Date: + - Tue, 27 Jun 2023 14:42:50 GMT + ETag: + - '"7b5b9590049813a43b1a9c064eb61dd6b9c25e8e649fff820d3ac83580b7e559"' + Expires: + - Tue, 27 Jun 2023 14:47:50 GMT + Source-Age: + - '0' + Strict-Transport-Security: + - max-age=31536000 + Vary: + - Authorization,Accept-Encoding,Origin + Via: + - 1.1 varnish + X-Cache: + - MISS + X-Cache-Hits: + - '0' + X-Content-Type-Options: + - nosniff + X-Fastly-Request-ID: + - e6f3b9fe41946ac3e378d2af0fb3a39aa86ec656 + X-Frame-Options: + - deny + X-GitHub-Request-Id: + - A0E8:5E35:4235C:4D583:649AF569 + X-Served-By: + - cache-ewr18137-EWR + X-Timer: + - S1687876971.626913,VS0,VE91 + X-XSS-Protection: + - 1; mode=block + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_item.py b/tests/test_item.py index dc3a26b84..c3274d003 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -609,6 +609,7 @@ def test_resolve_collection_with_root( assert root.id == "root" +@pytest.mark.vcr() def test_non_hierarchical_relative_link() -> None: root = pystac.Catalog("root", "root") a = pystac.Catalog("a", "a") From 6d129d6adc0870e82679964115724461867d816f Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 27 Jun 2023 16:22:42 -0600 Subject: [PATCH 10/11] Update pystac/stac_object.py --- pystac/stac_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pystac/stac_object.py b/pystac/stac_object.py index 9a8a00a18..37981c4cb 100644 --- a/pystac/stac_object.py +++ b/pystac/stac_object.py @@ -138,7 +138,7 @@ def remove_hierarchical_links(self, add_canonical: bool = False) -> List[Link]: return remove def target_in_hierarchy(self, target: Union[str, STACObject]) -> bool: - """Determine if target lin is somewhere in the hierarchical link tree of + """Determine if target is somewhere in the hierarchical link tree of a STACObject. Args: From 27b733c797de5ed2fac62a95bb0be60271dea643 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Tue, 27 Jun 2023 16:22:47 -0600 Subject: [PATCH 11/11] Update pystac/stac_object.py --- pystac/stac_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pystac/stac_object.py b/pystac/stac_object.py index 37981c4cb..c6e155b95 100644 --- a/pystac/stac_object.py +++ b/pystac/stac_object.py @@ -142,7 +142,7 @@ def target_in_hierarchy(self, target: Union[str, STACObject]) -> bool: a STACObject. Args: - target: A string or STACObject describing the target to search for + target: A string or STACObject to search for Returns: bool: Returns True if the target was found in the hierarchical link tree