Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .style.yapf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[style]
column_limit = 100
43 changes: 4 additions & 39 deletions pystac/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,53 +15,18 @@ class STACError(Exception):

from pystac.version import (__version__, STAC_VERSION)
from pystac.stac_io import STAC_IO
from pystac.extension import Extension
from pystac.stac_object import STACObject
from pystac.media_type import MediaType
from pystac.link import (Link, LinkType)
from pystac.catalog import (Catalog, CatalogType)
from pystac.collection import (Collection, Extent, SpatialExtent,
TemporalExtent, Provider)
from pystac.collection import (Collection, Extent, SpatialExtent, TemporalExtent, Provider)
from pystac.item import (Item, Asset)
from pystac.item_collection import ItemCollection
from pystac.single_file_stac import SingleFileSTAC
from pystac.eo import *
from pystac.label import *

from pystac.serialization import (identify_stac_object, STACObjectType)
from pystac.serialization import stac_object_from_dict


def _stac_object_from_dict(d, href=None, root=None):
"""Determines how to deserialize a dictionary into a STAC object.

Args:
d (dict): The dict to parse.
href (str): Optional href that is the file location of the object being
parsed.
root (Catalog or Collection): Optional root of the catalog for this object.
If provided, the root's resolved object cache can be used to search for
previously resolved instances of the STAC object.

Note: This is used internally in STAC_IO to deserialize STAC Objects.
It is in the top level __init__ in order to avoid circular dependencies.
"""
info = identify_stac_object(d)

# TODO: Transorm older versions to newest version (pystac.serialization.migrate)

if info.object_type == STACObjectType.CATALOG:
return Catalog.from_dict(d, href=href, root=root)
if info.object_type == STACObjectType.COLLECTION:
return Collection.from_dict(d, href=href, root=root)
if info.object_type == STACObjectType.ITEMCOLLECTION:
if 'single-file-stac' in info.common_extensions:
return SingleFileSTAC.from_dict(d, href=href, root=root)
return ItemCollection.from_dict(d, href=href, root=root)
if info.object_type == STACObjectType.ITEM:
if 'eo' in info.common_extensions:
return EOItem.from_dict(d, href=href, root=root)
if 'label' in info.common_extensions:
return LabelItem.from_dict(d, href=href, root=root)
return Item.from_dict(d, href=href, root=root)


STAC_IO.stac_object_from_dict = _stac_object_from_dict
STAC_IO.stac_object_from_dict = stac_object_from_dict
239 changes: 239 additions & 0 deletions pystac/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
from collections import ChainMap
from copy import copy


class ResolvedObjectCache:
"""This class tracks resolved objects tied to root catalogs.
A STAC object is 'resolved' when it is a Python Object; a link
to a STAC object such as a Catalog or Item is considered "unresolved"
if it's target is pointed at an HREF of the object.

Tracking resolved objects allows us to tie together the same instances
when there are loops in the Graph of the STAC catalog (e.g. a LabelItem
can link to a rel:source, and if that STAC Item exists in the same
root catalog they should refer to the same Python object).

Resolution tracking is important when copying STACs in-memory: In order
for object links to refer to the copy of STAC Objects rather than their
originals, we have to keep track of the resolved STAC Objects and replace
them with their copies.

Args:
ids_to_objects (Dict[str, STACObject]): Existing cache of STACObject IDs mapped
to the cached STACObject.
TODO
"""
def __init__(self, ids_to_objects=None, ids_to_hrefs=None, hrefs_to_ids=None):
self.ids_to_objects = ids_to_objects or {}
self.ids_to_hrefs = ids_to_hrefs or {}
self.hrefs_to_ids = hrefs_to_ids or {}

self._collection_cache = None

def _cache_href(self, obj):
href = obj.get_self_href()
if href is not None:
self.ids_to_hrefs[obj.id] = href
self.hrefs_to_ids[href] = obj.id

def get_or_cache(self, obj):
"""Gets the STACObject that is the cached version of the given STACObject; or, if
none exists, sets the cached object to the given object.

Args:
obj (STACObject): The given object who's ID will be checked against the cache.

Returns:
STACObject: Either the cached object that has the same ID as the given
object, or the given object.
"""
if obj.id in self.ids_to_objects:
return self.ids_to_objects[obj.id]
else:
self.ids_to_objects[obj.id] = obj
self._cache_href(obj)
return obj

def get(self, obj):
"""Get the cached object that has the same ID as the given object.

Args:
obj (STACObject): The given object who's ID will be checked against the cache.

Returns:
STACObject or None: Either the cached object that has the same ID as the given
object, or None
"""
return self.get_by_id(obj.id)

def get_by_id(self, obj_id):
"""Get the cached object that has the given ID.

Args:
obj_id (str): The ID to be checked against the cache.

Returns:
STACObject or None: Either the cached object that has the given ID, or None
"""

return self.ids_to_objects.get(obj_id)

def get_by_href(self, href):
obj_id = self.hrefs_to_ids.get(href)
if obj_id is not None:
return self.get_by_id(obj_id)
else:
return None

def cache(self, obj):
"""Set the given object into the cache.

Args:
obj (STACObject): The object to cache
"""
self.ids_to_objects[obj.id] = obj
self._cache_href(obj)

def remove(self, obj):
"""Removes any cached object that matches the given object's id.

Args:
obj (STACObject): The object to remove
"""
self.remove_by_id(obj.id)

def remove_by_id(self, obj_id):
"""Removes any cached object that matches the given ID.

Args:
obj_id (str): The object ID to remove
"""
self.ids_to_objects.pop(obj_id, None)
href = self.ids_to_hrefs.pop(obj_id, None)
if href is not None:
self.hrefs_to_ids.pop(href, None)

def clone(self):
"""Clone this ResolvedObjectCache

Returns:
ResolvedObjectCache: A clone of this cache, which contains a shallow
copy of the ID to STACObject cache.
"""
return ResolvedObjectCache(copy(self.ids_to_objects), copy(self.ids_to_hrefs),
copy(self.hrefs_to_ids))

def __contains__(self, obj):
return self.contains_id(obj.id)

def contains_id(self, obj_id):
return obj_id in self.ids_to_objects

def as_collection_cache(self):
if self._collection_cache is None:
self._collection_cache = ResolvedObjectCollectionCache(self)
return self._collection_cache

@staticmethod
def merge(first, second):
"""Merges two ResolvedObjectCache.

The merged cache will give preference to the first argument; that is, if there
are cached IDs that exist in both the first and second cache, the object cached
in the first will be cached in the resulting merged ResolvedObjectCache.

Args:
first (ResolvedObjectCache): The first cache to merge. This cache will be
the prefered cache for objects in the case of ID conflicts.
second (ResolvedObjectCache): The second cache to merge.

Returns:
ResolvedObjectCache: The resulting merged cache.
"""
merged = ResolvedObjectCache(
ids_to_objects=dict(ChainMap(copy(first.ids_to_objects), copy(second.ids_to_objects))),
ids_to_hrefs=dict(ChainMap(copy(first.ids_to_hrefs), copy(second.ids_to_hrefs))),
hrefs_to_ids=dict(ChainMap(copy(first.hrefs_to_ids), copy(second.hrefs_to_ids))))

merged._collection_cache = ResolvedObjectCollectionCache.merge(
merged, first._collection_cache, second._collection_cache)

return merged


class CollectionCache:
"""Cache of collections that can be used to avoid re-reading Collection
JSON in :func:`pystac.serialization.merge_common_properties
<pystac.serialization.common_properties.merge_common_properties>`.
The CollectionCache will contain collections as either as dicts or PySTAC Collections,
and will set Collection JSON that it reads in order to merge in common properties.
"""
def __init__(self, cached_ids=None, cached_hrefs=None):
self.cached_ids = cached_ids or {}
self.cached_hrefs = cached_hrefs or {}

def get_by_id(self, collection_id):
return self.cached_ids.get(collection_id)

def get_by_href(self, href):
return self.cached_hrefs.get(href)

def contains_id(self, collection_id):
return collection_id in self.cached_ids

def cache(self, collection, href=None):
"""Caches a collection JSON."""
self.cached_ids[collection['id']] = collection

if href is not None:
self.cached_hrefs[href] = collection


class ResolvedObjectCollectionCache(CollectionCache):
def __init__(self, resolved_object_cache, cached_ids=None, cached_hrefs=None):
super().__init__(cached_ids, cached_hrefs)
self.resolved_object_cache = resolved_object_cache

def get_by_id(self, collection_id):
result = self.resolved_object_cache.get_by_id(collection_id)
if result is None:
return super().get_by_id(collection_id)
else:
return result

def get_by_href(self, href):
result = self.resolved_object_cache.get_by_href(href)
if result is None:
return super().get_by_href(href)
else:
return result

def contains_id(self, collection_id):
return (self.resolved_object_cache.contains_id(collection_id)
or super().contains_id(collection_id))

def cache(self, collection, href=None):
super().cache(collection, href)

@staticmethod
def merge(resolved_object_cache, first, second):
first_cached_ids = {}
if first is not None:
first_cached_ids = copy(first.cached_ids)

second_cached_ids = {}
if second is not None:
second_cached_ids = copy(second.cached_ids)

first_cached_hrefs = {}
if first is not None:
first_cached_hrefs = copy(first.cached_hrefs)

second_cached_hrefs = {}
if second is not None:
second_cached_hrefs = copy(second.cached_hrefs)

return ResolvedObjectCollectionCache(
resolved_object_cache,
cached_ids=dict(ChainMap(first_cached_ids, second_cached_ids)),
cached_hrefs=dict(ChainMap(first_cached_hrefs, second_cached_hrefs)))
34 changes: 10 additions & 24 deletions pystac/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pystac import (STAC_VERSION, STACError)
from pystac.stac_object import STACObject
from pystac.link import (Link, LinkType)
from pystac.resolved_object_cache import ResolvedObjectCache
from pystac.cache import ResolvedObjectCache
from pystac.utils import (is_absolute_href, make_absolute_href)


Expand Down Expand Up @@ -66,12 +66,7 @@ class Catalog(STACObject):

DEFAULT_FILE_NAME = "catalog.json"
"""Default file name that will be given to this STAC object in a cononical format."""
def __init__(self,
id,
description,
title=None,
stac_extensions=None,
href=None):
def __init__(self, id, description, title=None, stac_extensions=None, href=None):
self.id = id
self.description = description
self.title = title
Expand All @@ -91,8 +86,8 @@ def __repr__(self):
def set_root(self, root, link_type=LinkType.ABSOLUTE):
STACObject.set_root(self, root, link_type)
if root is not None:
root._resolved_objects = ResolvedObjectCache.merge(
root._resolved_objects, self._resolved_objects)
root._resolved_objects = ResolvedObjectCache.merge(root._resolved_objects,
self._resolved_objects)

def add_child(self, child, title=None):
"""Adds a link to a child :class:`~pystac.Catalog` or :class:`~pystac.Collection`.
Expand Down Expand Up @@ -124,8 +119,7 @@ def add_item(self, item, title=None):

# Prevent typo confusion
if isinstance(item, pystac.Catalog):
raise STACError(
'Cannot add catalog as item. Use add_child instead.')
raise STACError('Cannot add catalog as item. Use add_child instead.')

item.set_root(self.get_root())
item.set_parent(self)
Expand Down Expand Up @@ -312,9 +306,7 @@ def to_dict(self, include_self_link=True):
return deepcopy(d)

def clone(self):
clone = Catalog(id=self.id,
description=self.description,
title=self.title)
clone = Catalog(id=self.id, description=self.description, title=self.title)
clone._resolved_objects.cache(clone)

for l in self.links:
Expand Down Expand Up @@ -387,9 +379,7 @@ def normalize_and_save(self, root_href, catalog_type):
def normalize_hrefs(self, root_href):
# Normalizing requires an absolute path
if not is_absolute_href(root_href):
root_href = make_absolute_href(root_href,
os.getcwd(),
start_is_dir=True)
root_href = make_absolute_href(root_href, os.getcwd(), start_is_dir=True)

# Fully resolve the STAC to avoid linking issues.
# This particularly can happen with unresolved links that have
Expand Down Expand Up @@ -441,18 +431,15 @@ def save(self, catalog_type):
else:
child_catalog_type = catalog_type

items_include_self_link = catalog_type in [
CatalogType.ABSOLUTE_PUBLISHED
]
items_include_self_link = catalog_type in [CatalogType.ABSOLUTE_PUBLISHED]

for child_link in self.get_child_links():
if child_link.is_resolved():
child_link.target.save(catalog_type=child_catalog_type)

for item_link in self.get_item_links():
if item_link.is_resolved():
item_link.target.save_object(
include_self_link=items_include_self_link)
item_link.target.save_object(include_self_link=items_include_self_link)

self.save_object(include_self_link=include_self_link)

Expand Down Expand Up @@ -552,8 +539,7 @@ def apply_asset_mapper(tup):

def item_mapper(item):
new_assets = [
x for result in map(apply_asset_mapper, item.assets.items())
for x in result
x for result in map(apply_asset_mapper, item.assets.items()) for x in result
]
item.assets = dict(new_assets)
return item
Expand Down
Loading