diff --git a/upath/core.py b/upath/core.py index e2cbc61f..618621d7 100644 --- a/upath/core.py +++ b/upath/core.py @@ -3,6 +3,7 @@ import sys from typing import Union import urllib +from urllib.parse import ParseResult from fsspec.registry import ( get_filesystem_class, @@ -15,76 +16,49 @@ class _FSSpecAccessor: - def __init__(self, parsed_url, *args, **kwargs): - self._url = parsed_url - cls = get_filesystem_class(self._url.scheme) + __slots__ = ("_fs",) + + def __init__(self, parsed_url: ParseResult, **kwargs): + cls = get_filesystem_class(parsed_url.scheme) url_kwargs = cls._get_kwargs_from_urls( - urllib.parse.urlunparse(self._url) + urllib.parse.urlunparse(parsed_url) ) url_kwargs.update(kwargs) self._fs = cls(**url_kwargs) - def transform_args_wrapper(self, func): - """Modifies the arguments that get passed to the filesystem so that - the UPath instance gets stripped as the first argument. If a - path keyword argument is not given, then `UPath.path` is - formatted for the filesystem and inserted as the first argument. - If it is, then the path keyword argument is formatted properly for - the filesystem. - """ + def _format_path(self, path: "UPath") -> str: + return path.path - def wrapper(*args, **kwargs): - args, kwargs = self._transform_arg_paths(args, kwargs) - return func(*args, **kwargs) - - return wrapper - - def _transform_arg_paths(self, args, kwargs): - """Formats the path properly for the filesystem backend.""" - args = list(args) - first_arg = args.pop(0) - if not kwargs.get("path"): - if isinstance(first_arg, UPath): - first_arg = self._format_path(first_arg.path) - args.insert(0, first_arg) - args = tuple(args) - else: - kwargs["path"] = self._format_path(kwargs["path"]) - return args, kwargs - - def _format_path(self, s): - """Placeholder method for subclassed filesystems""" - return s - - def __getattribute__(self, item): - class_attrs = ["_url", "_fs", "__class__"] - if item in class_attrs: - return super().__getattribute__(item) - - class_methods = [ - "__init__", - "__getattribute__", - "transform_args_wrapper", - "_transform_arg_paths", - "_format_path", - ] - if item in class_methods: - return lambda *args, **kwargs: getattr(self.__class__, item)( - self, *args, **kwargs - ) + def open(self, path, *args, **kwargs): + return self._fs.open(self._format_path(path), *args, **kwargs) - d = object.__getattribute__(self, "__dict__") - fs = d.get("_fs", None) - if fs is not None: - method = getattr(fs, item, None) - if method: - return lambda *args, **kwargs: ( - self.transform_args_wrapper(method)(*args, **kwargs) - ) # noqa: E501 - else: - raise NotImplementedError( - f"{fs.protocol} filesystem has no attribute {item}" - ) + def stat(self, path, **kwargs): + return self._fs.stat(self._format_path(path), **kwargs) + + def listdir(self, path, **kwargs): + return self._fs.listdir(self._format_path(path), **kwargs) + + def glob(self, _path, path_pattern, **kwargs): + return self._fs.glob(self._format_path(path_pattern), **kwargs) + + def exists(self, path, **kwargs): + return self._fs.exists(self._format_path(path), **kwargs) + + def info(self, path, **kwargs): + return self._fs.info(self._format_path(path), **kwargs) + + def rm(self, path, recursive, **kwargs): + return self._fs.rm( + self._format_path(path), recursive=recursive, **kwargs + ) + + def mkdir(self, path, create_parents=True, **kwargs): + return self._fs.mkdir( + self._format_path(path), create_parents=create_parents, **kwargs + ) + + def touch(self, path, **kwargs): + return self._fs.touch(self._format_path(path), **kwargs) class UPath(pathlib.Path): @@ -246,8 +220,8 @@ def relative_to(self, *other): return output def glob(self, pattern): - path = self.joinpath(pattern) - for name in self._accessor.glob(self, path=path.path): + path_pattern = self.joinpath(pattern) + for name in self._accessor.glob(self, path_pattern): name = self._sub_path(name) name = name.split(self._flavour.sep) yield self._make_child(name) diff --git a/upath/implementations/gcs.py b/upath/implementations/cloud.py similarity index 70% rename from upath/implementations/gcs.py rename to upath/implementations/cloud.py index 2613c57e..7adf9ed6 100644 --- a/upath/implementations/gcs.py +++ b/upath/implementations/cloud.py @@ -2,21 +2,17 @@ import re -class _GCSAccessor(upath.core._FSSpecAccessor): - def __init__(self, parsed_url, *args, **kwargs): - super().__init__(parsed_url, *args, **kwargs) - - def _format_path(self, s): +class _CloudAccessor(upath.core._FSSpecAccessor): + def _format_path(self, path): """ - netloc has already been set to project via `GCSPath._from_parts` + netloc has already been set to project via `CloudPath._from_parts` """ - s = f"{self._url.netloc}/{s.lstrip('/')}" - return s + return f"{path._url.netloc}/{path.path.lstrip('/')}" # project is not part of the path, but is part of the credentials -class GCSPath(upath.core.UPath): - _default_accessor = _GCSAccessor +class CloudPath(upath.core.UPath): + _default_accessor = _CloudAccessor @classmethod def _from_parts(cls, args, url=None, **kwargs): @@ -36,9 +32,9 @@ def _from_parsed_parts(cls, drv, root, parts, url=None, **kwargs): def _sub_path(self, name): """ - `gcsfs` returns the full path as `/` with `listdir` and - `glob`. However, in `iterdir` and `glob` we only want the relative path - to `self`. + `gcsfs` and `s3fs` return the full path as `/` with + `listdir` and `glob`. However, in `iterdir` and `glob` we only want the + relative path to `self`. """ sp = self.path subed = re.sub(f"^({self._url.netloc})?/?({sp}|{sp[1:]})/?", "", name) @@ -57,3 +53,11 @@ def joinpath(self, *args): bucket = args_list.pop(0) self._kwargs["bucket"] = bucket return super().joinpath(*tuple(args_list)) + + +class GCSPath(CloudPath): + pass + + +class S3Path(CloudPath): + pass diff --git a/upath/implementations/hdfs.py b/upath/implementations/hdfs.py index 46803e10..f1eb680e 100644 --- a/upath/implementations/hdfs.py +++ b/upath/implementations/hdfs.py @@ -6,21 +6,9 @@ def __init__(self, parsed_url, *args, **kwargs): super().__init__(parsed_url, *args, **kwargs) self._fs.root_marker = "/" - def transform_args_wrapper(self, func): - """If arguments are passed to the wrapped function, and if the first - argument is a UPath instance, that argument is replaced with - the UPath's path attribute - """ - - def wrapper(*args, **kwargs): - args, kwargs = self._transform_arg_paths(args, kwargs) - if "trunicate" in kwargs: - kwargs.pop("trunicate") - if func.__name__ == "mkdir": - args = args[:1] - return func(*args, **kwargs) - - return wrapper + def touch(self, **kwargs): + kwargs.pop("trunicate", None) + super().touch(self, **kwargs) class HDFSPath(upath.core.UPath): diff --git a/upath/implementations/http.py b/upath/implementations/http.py index 408d46f0..c693e44d 100644 --- a/upath/implementations/http.py +++ b/upath/implementations/http.py @@ -1,5 +1,3 @@ -import urllib - import upath.core @@ -7,28 +5,8 @@ class _HTTPAccessor(upath.core._FSSpecAccessor): def __init__(self, parsed_url, *args, **kwargs): super().__init__(parsed_url, *args, **kwargs) - def transform_args_wrapper(self, func): - """if arguments are passed to the wrapped function, and if the first - argument is a UPath instance, that argument is replaced with - the UPath's path attribute - """ - - def wrapper(*args, **kwargs): - if args: - args = list(args) - first_arg = args.pop(0) - if not kwargs.get("path"): - if isinstance(first_arg, upath.core.UPath): - first_arg = str(first_arg) - args.insert(0, first_arg) - args = tuple(args) - else: - new_url = self._url._replace(path=kwargs["path"]) - unparsed = urllib.parse.urlunparse(new_url) - kwargs["path"] = unparsed - return func(*args, **kwargs) - - return wrapper + def _format_path(self, path): + return str(path) class HTTPPath(upath.core.UPath): diff --git a/upath/implementations/s3.py b/upath/implementations/s3.py deleted file mode 100644 index b3f229f5..00000000 --- a/upath/implementations/s3.py +++ /dev/null @@ -1,60 +0,0 @@ -import re - -import upath.core - - -class _S3Accessor(upath.core._FSSpecAccessor): - def __init__(self, parsed_url, *args, **kwargs): - super().__init__(parsed_url, *args, **kwargs) - - def _format_path(self, s): - """If the filesystem backend doesn't have a root_marker, strip the - leading slash of a path and add the bucket - """ - s = f"{self._url.netloc}/{s.lstrip('/')}" - return s - - -class S3Path(upath.core.UPath): - _default_accessor = _S3Accessor - - @classmethod - def _from_parts(cls, args, url=None, **kwargs): - if kwargs.get("bucket") and url is not None: - bucket = kwargs.pop("bucket") - url = url._replace(netloc=bucket) - obj = super()._from_parts(args, url, **kwargs) - return obj - - @classmethod - def _from_parsed_parts(cls, drv, root, parts, url=None, **kwargs): - if kwargs.get("bucket") and url is not None: - bucket = kwargs.pop("bucket") - url = url._replace(netloc=bucket) - - obj = super()._from_parsed_parts(drv, root, parts, url, **kwargs) - return obj - - def _sub_path(self, name): - """ - `s3fs` returns the full path as `/` with `listdir` and - `glob`. However, in `iterdir` and `glob` we only want the relative path - to `self`. - """ - sp = self.path - subed = re.sub(f"^({self._url.netloc})?/?({sp}|{sp[1:]})/?", "", name) - return subed - - def joinpath(self, *args): - if self._url.netloc: - return super().joinpath(*args) - # handles a bucket in the path - else: - path = args[0] - if isinstance(path, list): - args_list = list(*args) - else: - args_list = path.split(self._flavour.sep) - bucket = args_list.pop(0) - self._kwargs["bucket"] = bucket - return super().joinpath(*tuple(args_list)) diff --git a/upath/registry.py b/upath/registry.py index 98f80bfc..d3dce879 100644 --- a/upath/registry.py +++ b/upath/registry.py @@ -6,17 +6,17 @@ class _Registry: - from upath.implementations import hdfs, http, memory, s3, gcs + from upath.implementations import hdfs, http, memory, cloud known_implementations: Dict[str, Type[UPath]] = { "https": http.HTTPPath, "http": http.HTTPPath, "hdfs": hdfs.HDFSPath, - "s3a": s3.S3Path, - "s3": s3.S3Path, + "s3a": cloud.S3Path, + "s3": cloud.S3Path, "memory": memory.MemoryPath, - "gs": gcs.GCSPath, - "gcs": gcs.GCSPath, + "gs": cloud.GCSPath, + "gcs": cloud.GCSPath, } def __getitem__(self, item): diff --git a/upath/tests/implementations/test_gcs.py b/upath/tests/implementations/test_gcs.py index e488e945..e14ecb5f 100644 --- a/upath/tests/implementations/test_gcs.py +++ b/upath/tests/implementations/test_gcs.py @@ -1,7 +1,7 @@ import pytest from upath import UPath -from upath.implementations.gcs import GCSPath +from upath.implementations.cloud import GCSPath from upath.errors import NotDirectoryError from ..cases import BaseTests from ..utils import skip_on_windows diff --git a/upath/tests/implementations/test_s3.py b/upath/tests/implementations/test_s3.py index 017d80d5..e24360eb 100644 --- a/upath/tests/implementations/test_s3.py +++ b/upath/tests/implementations/test_s3.py @@ -4,7 +4,7 @@ from upath import UPath from upath.errors import NotDirectoryError -from upath.implementations.s3 import S3Path +from upath.implementations.cloud import S3Path from ..cases import BaseTests diff --git a/upath/tests/test_core.py b/upath/tests/test_core.py index 27a495c0..b21c5e35 100644 --- a/upath/tests/test_core.py +++ b/upath/tests/test_core.py @@ -5,7 +5,7 @@ import pytest from upath import UPath -from upath.implementations.s3 import S3Path +from upath.implementations.cloud import S3Path from .cases import BaseTests from .utils import only_on_windows, skip_on_windows