diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 0080cc59c..c62033329 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -47,7 +47,7 @@ Enhancements Fixes -- passing withdirs in aync _glob() (#1953) +- passing withdirs in async _glob() (#1953) - fix _rm_file/_rm redirection in async (#1951) - allow arrowFile to be seekable (#1950) - add size attribute to arrowFile (#1944) @@ -182,7 +182,7 @@ Enhancements Fixes -- json should be a method fo requests shim (#1814) +- json should be a method of requests shim (#1814) - don't raise if known_implementation has no given error string (#1804) Other @@ -234,7 +234,7 @@ Enhancements - "tree" text display of filesystem contents (#1750) - async wrapper for sync FSs (#1745) - new known implementation: tosfs (#1739) -- consilidate block fetch requests (#1733) +- consolidate block fetch requests (#1733) Fixes @@ -313,7 +313,7 @@ Fixes Enhancements - allow dicts (not just bytes) for referenceFS (#1616 -- make filesystems JSON serializeable (#1612) +- make filesystems JSON serializable (#1612) - implement multifile cat() for github (#1620) Fixes @@ -557,7 +557,7 @@ Other Enhancements -- #1259, add maxdepth fo cp/get/put +- #1259, add maxdepth to cp/get/put - #1263, allow dir modification during walk() - #1264, add boxfs to registry - #1266, optimise referenceFS lazy lookups, especially for writing parquet @@ -737,7 +737,7 @@ Other 2022.8.1 -------- -- revert #1024 (#1029), with strciter requirements on OpenFile usage +- revert #1024 (#1029), with stricter requirements on OpenFile usage 2022.8.0 -------- diff --git a/fsspec/archive.py b/fsspec/archive.py index 13a4da8df..fc990b5c6 100644 --- a/fsspec/archive.py +++ b/fsspec/archive.py @@ -69,7 +69,6 @@ def ls(self, path, detail=True, **kwargs): out = {"name": ppath, "size": 0, "type": "directory"} paths[ppath] = out if detail: - out = sorted(paths.values(), key=operator.itemgetter("name")) - return out + return sorted(paths.values(), key=operator.itemgetter("name")) else: return sorted(paths) diff --git a/fsspec/asyn.py b/fsspec/asyn.py index 360758ac6..0bad3b447 100644 --- a/fsspec/asyn.py +++ b/fsspec/asyn.py @@ -86,10 +86,8 @@ def sync(loop, func, *args, timeout=None, **kwargs): result = [None] event = threading.Event() asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop) - while True: - # this loops allows thread to get interrupted - if event.wait(1): - break + while not event.wait(1): + # this loop allows thread to get interrupted if timeout is not None: timeout -= 1 if timeout < 0: @@ -362,10 +360,11 @@ async def _copy( batch_size=None, **kwargs, ): - if on_error is None and recursive: - on_error = "ignore" - elif on_error is None: - on_error = "raise" + if on_error is None: + if recursive: + on_error = "ignore" + else: + on_error = "raise" if isinstance(path1, list) and isinstance(path2, list): # No need to expand paths when both source and destination @@ -720,7 +719,7 @@ async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs): detail = kwargs.pop("detail", False) try: listing = await self._ls(path, detail=True, **kwargs) - except (FileNotFoundError, OSError) as e: + except OSError as e: if on_error == "raise": raise elif callable(on_error): @@ -772,7 +771,7 @@ async def _glob(self, path, maxdepth=None, **kwargs): ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash path = self._strip_protocol(path) append_slash_to_dirname = ends_with_sep or path.endswith( - tuple(sep + "**" for sep in seps) + tuple(f"{sep}**" for sep in seps) ) idx_star = path.find("*") if path.find("*") >= 0 else len(path) idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) diff --git a/fsspec/caching.py b/fsspec/caching.py index 3499b4d26..46a952848 100644 --- a/fsspec/caching.py +++ b/fsspec/caching.py @@ -410,8 +410,7 @@ def _fetch_block(self, block_number: int) -> bytes: self.total_requested_bytes += end - start self.miss_count += 1 logger.info("BlockCache fetching block %d", block_number) - block_contents = super()._fetch(start, end) - return block_contents + return super()._fetch(start, end) def _read_cache( self, start: int, end: int, start_block_number: int, end_block_number: int @@ -856,7 +855,7 @@ def _fetch(self, start: int | None, end: int | None) -> bytes: self._fetch_future = None else: # Must join if we need the block for the current fetch - must_join = bool( + must_join = ( start_block_number <= self._fetch_future_block_number <= end_block_number @@ -919,8 +918,7 @@ def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes: logger.info("BlockCache fetching block (%s) %d", log_info, block_number) self.total_requested_bytes += end - start self.miss_count += 1 - block_contents = super()._fetch(start, end) - return block_contents + return super()._fetch(start, end) def _read_cache( self, start: int, end: int, start_block_number: int, end_block_number: int diff --git a/fsspec/fuse.py b/fsspec/fuse.py index 566d520fc..3c2e1cbda 100644 --- a/fsspec/fuse.py +++ b/fsspec/fuse.py @@ -76,8 +76,7 @@ def read(self, path, size, offset, fh): f = self.cache[fh] f.seek(offset) - out = f.read(size) - return out + return f.read(size) def write(self, path, data, offset, fh): logger.debug("write %s", (path, offset)) @@ -119,7 +118,7 @@ def unlink(self, path): fn = "".join([self.root, path.lstrip("/")]) try: self.fs.rm(fn, False) - except (OSError, FileNotFoundError) as exc: + except OSError as exc: raise FuseOSError(EIO) from exc def release(self, path, fh): diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index dfb1bc360..a73542af2 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -463,7 +463,7 @@ async def _glob(self, path, maxdepth=None, **kwargs): """ Find files by glob-matching. - This implementation is idntical to the one in AbstractFileSystem, + This implementation is identical to the one in AbstractFileSystem, but "?" is not considered as a character for globbing, because it is so common in URLs, often identifying the "query" part. """ diff --git a/fsspec/implementations/http_sync.py b/fsspec/implementations/http_sync.py index a67ea3ea5..4f09ba0a5 100644 --- a/fsspec/implementations/http_sync.py +++ b/fsspec/implementations/http_sync.py @@ -11,7 +11,7 @@ try: import yarl -except (ImportError, ModuleNotFoundError, OSError): +except (ImportError, OSError): yarl = False from fsspec.callbacks import _DEFAULT_CALLBACK @@ -278,10 +278,9 @@ def encode_url(self, url): @classmethod def _strip_protocol(cls, path: str) -> str: """For HTTP, we always want to keep the full URL""" - path = path.replace("sync-http://", "http://").replace( + return path.replace("sync-http://", "http://").replace( "sync-https://", "https://" ) - return path @classmethod def _parent(cls, path): diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index 417447e5b..611b6531a 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -243,10 +243,9 @@ def _parent(cls, path): else: # NT path_ = path.rsplit("/", 1)[0] - if len(path_) <= 3: - if path_[1:2] == ":": - # nt root (something like c:/) - return path_[0] + ":/" + if len(path_) <= 3 and path_[1:2] == ":": + # nt root (something like c:/) + return path_[0] + ":/" # More cases may be required here return path_ diff --git a/fsspec/implementations/tests/test_github.py b/fsspec/implementations/tests/test_github.py index 800f1a3f0..939840bd6 100644 --- a/fsspec/implementations/tests/test_github.py +++ b/fsspec/implementations/tests/test_github.py @@ -47,7 +47,7 @@ def test_github_cat(): def test_github_ls(): - # test using ls to list the files in a resository + # test using ls to list the files in a repository fs = fsspec.filesystem("github", org="mwaskom", repo="seaborn-data") ls_result = set(fs.ls("")) expected = {"brain_networks.csv", "mpg.csv", "penguins.csv", "README.md", "raw"} diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index c3caf1ddb..d3841916c 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -997,7 +997,7 @@ def test_strip_protocol_windows_remote_shares(uri, stripped, cwd): assert LocalFileSystem._strip_protocol(uri) == stripped -def test_mkdir_twice_faile(tmpdir): +def test_mkdir_twice_fails(tmpdir): fn = os.path.join(tmpdir, "test") fs = fsspec.filesystem("file") fs.mkdir(fn) diff --git a/fsspec/implementations/tests/test_memory.py b/fsspec/implementations/tests/test_memory.py index 600022a03..094a9f588 100644 --- a/fsspec/implementations/tests/test_memory.py +++ b/fsspec/implementations/tests/test_memory.py @@ -172,7 +172,7 @@ def test_moves(m): assert m.find("") == ["/target.txt", "/target2.txt"] -def test_rm_reursive_empty_subdir(m): +def test_rm_recursive_empty_subdir(m): # https://github.com/fsspec/filesystem_spec/issues/500 m.mkdir("recdir") m.mkdir("recdir/subdir2") diff --git a/fsspec/parquet.py b/fsspec/parquet.py index e6e03fb14..b72f7ff4b 100644 --- a/fsspec/parquet.py +++ b/fsspec/parquet.py @@ -342,11 +342,7 @@ def _transfer_ranges(fs, blocks, paths, starts, ends): def _add_header_magic(data): # Add b"PAR1" to file headers for path in list(data): - add_magic = True - for k in data[path]: - if k[0] == 0 and k[1] >= 4: - add_magic = False - break + add_magic = not any(k[0] == 0 and k[1] >= 4 for k in data[path]) if add_magic: data[path][(0, 4)] = b"PAR1" @@ -461,8 +457,8 @@ def _parquet_byte_ranges( file_offset0 = column.meta_data.dictionary_page_offset if file_offset0 is None: file_offset0 = column.meta_data.data_page_offset - num_bytes = column.meta_data.total_compressed_size if footer_start is None or file_offset0 < footer_start: + num_bytes = column.meta_data.total_compressed_size data_paths.append(fn) data_starts.append(file_offset0) data_ends.append( diff --git a/fsspec/spec.py b/fsspec/spec.py index b67d5c16f..53204cd33 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -277,7 +277,7 @@ def invalidate_cache(self, path=None): """ # Not necessary to implement invalidation mechanism, may have no cache. # But if have, you should call this method of parent class from your - # subclass to ensure expiring caches after transacations correctly. + # subclass to ensure expiring caches after transactions correctly. # See the implementation of FTPFileSystem in ftp.py if self._intrans: self._invalidated_caches_in_transaction.append(path) @@ -430,7 +430,7 @@ def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs): detail = kwargs.pop("detail", False) try: listing = self.ls(path, detail=True, **kwargs) - except (FileNotFoundError, OSError) as e: + except OSError as e: if on_error == "raise": raise if callable(on_error): @@ -602,7 +602,7 @@ def glob(self, path, maxdepth=None, **kwargs): ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash path = self._strip_protocol(path) append_slash_to_dirname = ends_with_sep or path.endswith( - tuple(sep + "**" for sep in seps) + tuple(f"{sep}**" for sep in seps) ) idx_star = path.find("*") if path.find("*") >= 0 else len(path) idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) @@ -1126,10 +1126,11 @@ def copy( not-found exceptions will cause the path to be skipped; defaults to raise unless recursive is true, where the default is ignore """ - if on_error is None and recursive: - on_error = "ignore" - elif on_error is None: - on_error = "raise" + if on_error is None: + if recursive: + on_error = "ignore" + else: + on_error = "raise" if isinstance(path1, list) and isinstance(path2, list): # No need to expand paths when both source and destination @@ -2000,7 +2001,7 @@ def seek(self, loc, whence=0): from start of file, current location or end of file, resp. """ loc = int(loc) - if not self.mode == "rb": + if self.mode != "rb": raise OSError(ESPIPE, "Seek only available in read mode") if whence == 0: nloc = loc diff --git a/fsspec/tests/test_caches.py b/fsspec/tests/test_caches.py index b9c5293c0..2512f746c 100644 --- a/fsspec/tests/test_caches.py +++ b/fsspec/tests/test_caches.py @@ -133,7 +133,7 @@ def test_readahead_cache(): total_requested_bytes += (10 - 3) + block_size assert cache.total_requested_bytes == total_requested_bytes - # caache hit again + # cache hit again assert cache._fetch(3, 10) == letters_fetcher(3, 10) assert cache.miss_count == 2 assert cache.hit_count == 1 diff --git a/fsspec/tests/test_fuse.py b/fsspec/tests/test_fuse.py index ef3005367..89d1c8723 100644 --- a/fsspec/tests/test_fuse.py +++ b/fsspec/tests/test_fuse.py @@ -24,7 +24,7 @@ def host_fuse(mountdir): def test_basic(tmpdir, capfd): mountdir = str(tmpdir.mkdir("mount")) - fuse_process = Process(target=host_fuse, args=(str(mountdir),)) + fuse_process = Process(target=host_fuse, args=(mountdir,)) fuse_process.start() try: diff --git a/fsspec/utils.py b/fsspec/utils.py index 7b06dd581..aec3b55db 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -292,8 +292,7 @@ def read_block( # TODO: allow length to be None and read to the end of the file? assert length is not None - b = f.read(length) - return b + return f.read(length) def tokenize(*args: Any, **kwargs: Any) -> str: