diff --git a/pyproject.toml b/pyproject.toml index 795b3b4d..7bcb24e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,12 +30,15 @@ dependencies = [ ] [project.optional-dependencies] -dev = ["pyshp[test]", "pre-commit", "ruff"] +dev = ["pyshp[test]", "pre-commit", "ruff", "mypy"] test = ["pytest"] [project.urls] Repository = "https://github.com/GeospatialPython/pyshp" +[project.scripts] +shapefile="shapefile.__main__:main" + [tool.hatch.build.targets.sdist] only-include = ["src", "shapefiles", "test_shapefile.py"] @@ -44,7 +47,7 @@ only-include = ["src"] sources = {"src" = ""} # move from "src" directory for wheel [tool.hatch.version] -path = "src/shapefile.py" +path = "src/shapefile/__version__.py" [tool.pytest.ini_options] markers = [ diff --git a/src/shapefile/__init__.py b/src/shapefile/__init__.py index b690d30f..b37ac6b6 100644 --- a/src/shapefile/__init__.py +++ b/src/shapefile/__init__.py @@ -8,67 +8,184 @@ from __future__ import annotations -__all__ = [ - "__version__" - -] +import logging +from .__main__ import main from .__version__ import __version__ -from ._doctest_runner import _test +from ._doctest_runner import _replace_remote_url +from .classes import Field, ShapeRecord, ShapeRecords, Shapes +from .constants import ( + FIRST_RING, + INNER_RING, + MISSING, + MULTIPATCH, + MULTIPOINT, + MULTIPOINTM, + MULTIPOINTZ, + NODATA, + NULL, + OUTER_RING, + PARTTYPE_LOOKUP, + POINT, + POINTM, + POINTZ, + POLYGON, + POLYGONM, + POLYGONZ, + POLYLINE, + POLYLINEM, + POLYLINEZ, + REPLACE_REMOTE_URLS_WITH_LOCALHOST, + RING, + SHAPETYPE_LOOKUP, + SHAPETYPENUM_LOOKUP, + TRIANGLE_FAN, + TRIANGLE_STRIP, +) +from .exceptions import GeoJSON_Error, RingSamplingError, ShapefileException +from .geometric_calculations import bbox_overlap +from .helpers import _Array, fsdecode_if_pathlike +from .reader import Reader +from .shapes import ( + SHAPE_CLASS_FROM_SHAPETYPE, + MultiPatch, + MultiPoint, + MultiPointM, + MultiPointZ, + NullShape, + Point, + PointM, + PointM_shapeTypes, + PointZ, + PointZ_shapeTypes, + Polygon, + PolygonM, + PolygonZ, + Polyline, + PolylineM, + PolylineZ, + Shape, + _CanHaveBBox_shapeTypes, + _HasM, + _HasM_shapeTypes, + _HasZ, + _HasZ_shapeTypes, +) +from .types import ( + FIELD_TYPE_ALIASES, + BBox, + BinaryFileStreamT, + BinaryFileT, + Coord, + Coords, + FieldType, + FieldTypeT, + MBox, + Point2D, + Point3D, + PointMT, + PointsT, + PointT, + PointZT, + ReadableBinStream, + ReadSeekableBinStream, + ReadWriteSeekableBinStream, + RecordValue, + RecordValueNotDate, + WriteableBinStream, + WriteSeekableBinStream, + ZBox, +) +from .writer import Writer -import logging -import sys -# import io -# import os -# import tempfile -# import time -# import zipfile -# from collections.abc import Container, Iterable, Iterator, Reversible, Sequence -# from datetime import date -# from os import PathLike -# from struct import Struct, calcsize, error, pack, unpack -# from types import TracebackType -# from typing import ( -# IO, -# Any, -# Final, -# Generic, -# Literal, -# NamedTuple, -# NoReturn, -# Optional, -# Protocol, -# SupportsIndex, -# TypedDict, -# TypeVar, -# Union, -# cast, -# overload, -# ) +__all__ = [ + "__version__", + "NULL", + "POINT", + "POLYLINE", + "POLYGON", + "MULTIPOINT", + "POINTZ", + "POLYLINEZ", + "POLYGONZ", + "MULTIPOINTZ", + "POINTM", + "POLYLINEM", + "POLYGONM", + "MULTIPOINTM", + "MULTIPATCH", + "SHAPETYPE_LOOKUP", + "REPLACE_REMOTE_URLS_WITH_LOCALHOST", + "SHAPETYPENUM_LOOKUP", + "TRIANGLE_STRIP", + "TRIANGLE_FAN", + "OUTER_RING", + "INNER_RING", + "FIRST_RING", + "RING", + "PARTTYPE_LOOKUP", + "MISSING", + "NODATA", + "Reader", + "Writer", + "fsdecode_if_pathlike", + "_Array", + "Shape", + "NullShape", + "Point", + "Polyline", + "Polygon", + "MultiPoint", + "MultiPointM", + "MultiPointZ", + "PolygonM", + "PolygonZ", + "PolylineM", + "PolylineZ", + "MultiPatch", + "PointM", + "PointZ", + "SHAPE_CLASS_FROM_SHAPETYPE", + "PointM_shapeTypes", + "PointZ_shapeTypes", + "_CanHaveBBox_shapeTypes", + "_HasM", + "_HasM_shapeTypes", + "_HasZ", + "_HasZ_shapeTypes", + "Point2D", + "Point3D", + "PointMT", + "PointZT", + "Coord", + "Coords", + "PointT", + "PointsT", + "BBox", + "MBox", + "ZBox", + "WriteableBinStream", + "ReadableBinStream", + "WriteSeekableBinStream", + "ReadSeekableBinStream", + "ReadWriteSeekableBinStream", + "BinaryFileT", + "BinaryFileStreamT", + "FieldTypeT", + "FieldType", + "FIELD_TYPE_ALIASES", + "RecordValueNotDate", + "RecordValue", + "ShapefileException", + "RingSamplingError", + "GeoJSON_Error", + "Field", + "Shapes", + "ShapeRecord", + "ShapeRecords", + "bbox_overlap", + "main", + "_replace_remote_url", +] -# Create named logger logger = logging.getLogger(__name__) - - - - - - - - - - - - - -def main() -> None: - """ - Doctests are contained in the file 'README.md', and are tested using the built-in - testing libraries. - """ - failure_count = _test() - sys.exit(failure_count) - - -if __name__ == "__main__": - main() diff --git a/src/shapefile/__main__.py b/src/shapefile/__main__.py new file mode 100644 index 00000000..ac2d2f36 --- /dev/null +++ b/src/shapefile/__main__.py @@ -0,0 +1,16 @@ +import sys + +from ._doctest_runner import _test + + +def main() -> None: + """ + Doctests are contained in the file 'README.md', and are tested using the built-in + testing libraries. + """ + failure_count = _test() + sys.exit(failure_count) + + +if __name__ == "__main__": + main() diff --git a/src/shapefile/__version__.py b/src/shapefile/__version__.py index 7234b3c1..20cc3a9f 100644 --- a/src/shapefile/__version__.py +++ b/src/shapefile/__version__.py @@ -1,2 +1 @@ - -__version__ = "3.0.2" \ No newline at end of file +__version__ = "3.0.3rc.dev2" diff --git a/src/shapefile/_doctest_runner.py b/src/shapefile/_doctest_runner.py index 59d5d66b..2b0c0ec7 100644 --- a/src/shapefile/_doctest_runner.py +++ b/src/shapefile/_doctest_runner.py @@ -1,9 +1,17 @@ +from __future__ import annotations + import doctest +import sys +from collections.abc import Iterable, Iterator +from pathlib import Path +from urllib.parse import urlparse, urlunparse + +from .constants import REPLACE_REMOTE_URLS_WITH_LOCALHOST + -# Begin Testing def _get_doctests() -> doctest.DocTest: # run tests - with open("README.md", "rb") as fobj: + with Path("README.md").open("rb") as fobj: tests = doctest.DocTestParser().get_doctest( string=fobj.read().decode("utf8").replace("\r\n", "\n"), globs={}, @@ -139,4 +147,4 @@ def _test(args: list[str] = sys.argv[1:], verbosity: bool = False) -> int: elif failure_count > 0: runner.summarize(verbosity) - return failure_count \ No newline at end of file + return failure_count diff --git a/src/shapefile/classes.py b/src/shapefile/classes.py index 3a9901ad..36d9c877 100644 --- a/src/shapefile/classes.py +++ b/src/shapefile/classes.py @@ -1,6 +1,24 @@ -from typing import NamedTuple +from __future__ import annotations + +from collections.abc import Iterable +from datetime import date +from typing import Any, NamedTuple, Optional, SupportsIndex, overload + +from .constants import NULL +from .exceptions import ShapefileException +from .geojson_types import ( + GeoJSONFeature, + GeoJSONFeatureCollection, + GeoJSONGeometryCollection, +) +from .shapes import Shape +from .types import ( + FIELD_TYPE_ALIASES, + FieldType, + FieldTypeT, + RecordValue, +) -from shapefile.types import FieldTypeT # Use functional syntax to have an attribute named type, a Python keyword class Field(NamedTuple): @@ -270,4 +288,4 @@ def __geo_interface__(self) -> GeoJSONFeatureCollection: return GeoJSONFeatureCollection( type="FeatureCollection", features=[shaperec.__geo_interface__ for shaperec in self], - ) \ No newline at end of file + ) diff --git a/src/shapefile/constants.py b/src/shapefile/constants.py index 37f4b484..c46ace7d 100644 --- a/src/shapefile/constants.py +++ b/src/shapefile/constants.py @@ -1,3 +1,6 @@ +from __future__ import annotations + +import os # Module settings VERBOSE = True @@ -60,4 +63,4 @@ MISSING = (None, "") # Don't make a set, as user input may not be Hashable -NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. \ No newline at end of file +NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. diff --git a/src/shapefile/exceptions.py b/src/shapefile/exceptions.py index f0f34f42..0f496458 100644 --- a/src/shapefile/exceptions.py +++ b/src/shapefile/exceptions.py @@ -1,5 +1,10 @@ +class RingSamplingError(Exception): + pass + + +class GeoJSON_Error(Exception): + pass class ShapefileException(Exception): """An exception to handle shapefile specific problems.""" - diff --git a/src/shapefile/geojson.py b/src/shapefile/geojson.py deleted file mode 100644 index 0037cb55..00000000 --- a/src/shapefile/geojson.py +++ /dev/null @@ -1,280 +0,0 @@ - -class GeoJSON_Error(Exception): - pass - -class HasGeoInterface(Protocol): - @property - def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: ... - - -class GeoJSONPoint(TypedDict): - type: Literal["Point"] - # We fix to a tuple (to statically check the length is 2, 3 or 4) but - # RFC7946 only requires: "A position is an array of numbers. There MUST be two or more - # elements. " - # RFC7946 also requires long/lat easting/northing which we do not enforce, - # and despite the SHOULD NOT, we may use a 4th element for Shapefile M Measures. - coordinates: PointT | tuple[()] - - -class GeoJSONMultiPoint(TypedDict): - type: Literal["MultiPoint"] - coordinates: PointsT - - -class GeoJSONLineString(TypedDict): - type: Literal["LineString"] - # "Two or more positions" not enforced by type checker - # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.4 - coordinates: PointsT - - -class GeoJSONMultiLineString(TypedDict): - type: Literal["MultiLineString"] - coordinates: list[PointsT] - - -class GeoJSONPolygon(TypedDict): - type: Literal["Polygon"] - # Other requirements for Polygon not enforced by type checker - # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.6 - coordinates: list[PointsT] - - -class GeoJSONMultiPolygon(TypedDict): - type: Literal["MultiPolygon"] - coordinates: list[list[PointsT]] - - -GeoJSONHomogeneousGeometryObject = Union[ - GeoJSONPoint, - GeoJSONMultiPoint, - GeoJSONLineString, - GeoJSONMultiLineString, - GeoJSONPolygon, - GeoJSONMultiPolygon, -] - -GEOJSON_TO_SHAPETYPE: dict[str, int] = { - "Null": NULL, - "Point": POINT, - "LineString": POLYLINE, - "Polygon": POLYGON, - "MultiPoint": MULTIPOINT, - "MultiLineString": POLYLINE, - "MultiPolygon": POLYGON, -} - - -class GeoJSONGeometryCollection(TypedDict): - type: Literal["GeometryCollection"] - geometries: list[GeoJSONHomogeneousGeometryObject] - - -# RFC7946 3.1 -GeoJSONObject = Union[GeoJSONHomogeneousGeometryObject, GeoJSONGeometryCollection] - - -class GeoJSONFeature(TypedDict): - type: Literal["Feature"] - properties: ( - dict[str, Any] | None - ) # RFC7946 3.2 "(any JSON object or a JSON null value)" - geometry: GeoJSONObject | None - - -class GeoJSONFeatureCollection(TypedDict): - type: Literal["FeatureCollection"] - features: list[GeoJSONFeature] - - -class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection): - # bbox is technically optional under the spec but this seems - # a very minor improvement that would require NotRequired - # from the typing-extensions backport for Python 3.9 - # (PyShp's resisted having any other dependencies so far!) - bbox: list[float] - -class GeoJSONSerisalizableShape: - @property - def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: - if self.shapeType in {POINT, POINTM, POINTZ}: - # point - if len(self.points) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "Point", "coordinates": ()} - - return {"type": "Point", "coordinates": self.points[0]} - - if self.shapeType in {MULTIPOINT, MULTIPOINTM, MULTIPOINTZ}: - if len(self.points) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "MultiPoint", "coordinates": []} - - # multipoint - return { - "type": "MultiPoint", - "coordinates": self.points, - } - - if self.shapeType in {POLYLINE, POLYLINEM, POLYLINEZ}: - if len(self.parts) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "LineString", "coordinates": []} - - if len(self.parts) == 1: - # linestring - return { - "type": "LineString", - "coordinates": self.points, - } - - # multilinestring - ps = None - coordinates = [] - for part in self.parts: - if ps is None: - ps = part - continue - - coordinates.append(list(self.points[ps:part])) - ps = part - - # assert len(self.parts) > 1 - # from previous if len(self.parts) checks so part is defined - coordinates.append(list(self.points[part:])) - return {"type": "MultiLineString", "coordinates": coordinates} - - if self.shapeType in {POLYGON, POLYGONM, POLYGONZ}: - if len(self.parts) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "Polygon", "coordinates": []} - - # get all polygon rings - rings = [] - for i, start in enumerate(self.parts): - # get indexes of start and end points of the ring - try: - end = self.parts[i + 1] - except IndexError: - end = len(self.points) - - # extract the points that make up the ring - ring = list(self.points[start:end]) - rings.append(ring) - - # organize rings into list of polygons, where each polygon is defined as list of rings. - # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). - polys = organize_polygon_rings(rings, self._errors) - - # if VERBOSE is True, issue detailed warning about any shape errors - # encountered during the Shapefile to GeoJSON conversion - if VERBOSE and self._errors: - header = f"Possible issue encountered when converting Shape #{self.oid} to GeoJSON: " - orphans = self._errors.get("polygon_orphaned_holes", None) - if orphans: - msg = ( - header - + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ -but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ -orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ -encoded as GeoJSON exterior rings instead of holes." - ) - logger.warning(msg) - only_holes = self._errors.get("polygon_only_holes", None) - if only_holes: - msg = ( - header - + "Shapefile format requires that polygons contain at least one exterior ring, \ -but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ -still included but were encoded as GeoJSON exterior rings instead of holes." - ) - logger.warning(msg) - - # return as geojson - if len(polys) == 1: - return {"type": "Polygon", "coordinates": polys[0]} - - return {"type": "MultiPolygon", "coordinates": polys} - - raise GeoJSON_Error( - f'Shape type "{SHAPETYPE_LOOKUP[self.shapeType]}" cannot be represented as GeoJSON.' - ) - - @staticmethod - def _from_geojson(geoj: GeoJSONHomogeneousGeometryObject) -> Shape: - # create empty shape - # set shapeType - geojType = geoj["type"] if geoj else "Null" - if geojType in GEOJSON_TO_SHAPETYPE: - shapeType = GEOJSON_TO_SHAPETYPE[geojType] - else: - raise GeoJSON_Error(f"Cannot create Shape from GeoJSON type '{geojType}'") - - coordinates = geoj["coordinates"] - - if coordinates == (): - raise GeoJSON_Error(f"Cannot create non-Null Shape from: {coordinates=}") - - points: PointsT - parts: list[int] - - # set points and parts - if geojType == "Point": - points = [cast(PointT, coordinates)] - parts = [0] - elif geojType in ("MultiPoint", "LineString"): - points = cast(PointsT, coordinates) - parts = [0] - elif geojType == "Polygon": - points = [] - parts = [] - index = 0 - for i, ext_or_hole in enumerate(cast(list[PointsT], coordinates)): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. - if i == 0 and not is_cw(ext_or_hole): - # flip exterior direction - ext_or_hole = rewind(ext_or_hole) - elif i > 0 and is_cw(ext_or_hole): - # flip hole direction - ext_or_hole = rewind(ext_or_hole) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - elif geojType == "MultiLineString": - points = [] - parts = [] - index = 0 - for linestring in cast(list[PointsT], coordinates): - points.extend(linestring) - parts.append(index) - index += len(linestring) - elif geojType == "MultiPolygon": - points = [] - parts = [] - index = 0 - for polygon in cast(list[list[PointsT]], coordinates): - for i, ext_or_hole in enumerate(polygon): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. - if i == 0 and not is_cw(ext_or_hole): - # flip exterior direction - ext_or_hole = rewind(ext_or_hole) - elif i > 0 and is_cw(ext_or_hole): - # flip hole direction - ext_or_hole = rewind(ext_or_hole) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - return Shape(shapeType=shapeType, points=points, parts=parts) \ No newline at end of file diff --git a/src/shapefile/geojson_types.py b/src/shapefile/geojson_types.py new file mode 100644 index 00000000..3fc9259e --- /dev/null +++ b/src/shapefile/geojson_types.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +from typing import Any, Literal, Protocol, TypedDict, Union + +from .constants import ( + MULTIPOINT, + NULL, + POINT, + POLYGON, + POLYLINE, +) +from .types import PointsT, PointT + + +class HasGeoInterface(Protocol): + @property + def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: ... + + +class GeoJSONPoint(TypedDict): + type: Literal["Point"] + # We fix to a tuple (to statically check the length is 2, 3 or 4) but + # RFC7946 only requires: "A position is an array of numbers. There MUST be two or more + # elements. " + # RFC7946 also requires long/lat easting/northing which we do not enforce, + # and despite the SHOULD NOT, we may use a 4th element for Shapefile M Measures. + coordinates: PointT | tuple[()] + + +class GeoJSONMultiPoint(TypedDict): + type: Literal["MultiPoint"] + coordinates: PointsT + + +class GeoJSONLineString(TypedDict): + type: Literal["LineString"] + # "Two or more positions" not enforced by type checker + # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.4 + coordinates: PointsT + + +class GeoJSONMultiLineString(TypedDict): + type: Literal["MultiLineString"] + coordinates: list[PointsT] + + +class GeoJSONPolygon(TypedDict): + type: Literal["Polygon"] + # Other requirements for Polygon not enforced by type checker + # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.6 + coordinates: list[PointsT] + + +class GeoJSONMultiPolygon(TypedDict): + type: Literal["MultiPolygon"] + coordinates: list[list[PointsT]] + + +GeoJSONHomogeneousGeometryObject = Union[ + GeoJSONPoint, + GeoJSONMultiPoint, + GeoJSONLineString, + GeoJSONMultiLineString, + GeoJSONPolygon, + GeoJSONMultiPolygon, +] + +GEOJSON_TO_SHAPETYPE: dict[str, int] = { + "Null": NULL, + "Point": POINT, + "LineString": POLYLINE, + "Polygon": POLYGON, + "MultiPoint": MULTIPOINT, + "MultiLineString": POLYLINE, + "MultiPolygon": POLYGON, +} + + +class GeoJSONGeometryCollection(TypedDict): + type: Literal["GeometryCollection"] + geometries: list[GeoJSONHomogeneousGeometryObject] + + +# RFC7946 3.1 +GeoJSONObject = Union[GeoJSONHomogeneousGeometryObject, GeoJSONGeometryCollection] + + +class GeoJSONFeature(TypedDict): + type: Literal["Feature"] + properties: ( + dict[str, Any] | None + ) # RFC7946 3.2 "(any JSON object or a JSON null value)" + geometry: GeoJSONObject | None + + +class GeoJSONFeatureCollection(TypedDict): + type: Literal["FeatureCollection"] + features: list[GeoJSONFeature] + + +class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection): + # bbox is technically optional under the spec but this seems + # a very minor improvement that would require NotRequired + # from the typing-extensions backport for Python 3.9 + # (PyShp's resisted having any other dependencies so far!) + bbox: list[float] diff --git a/src/shapefile/geometric_calculations.py b/src/shapefile/geometric_calculations.py index 8dc1ad69..f2dbabb1 100644 --- a/src/shapefile/geometric_calculations.py +++ b/src/shapefile/geometric_calculations.py @@ -1,3 +1,9 @@ +from __future__ import annotations + +from collections.abc import Iterable, Iterator, Reversible + +from .exceptions import RingSamplingError +from .types import BBox, Point2D, PointsT, PointT def signed_area( @@ -101,10 +107,6 @@ def ring_contains_point(coords: PointsT, p: Point2D) -> bool: return inside_flag -class RingSamplingError(Exception): - pass - - def ring_sample(coords: PointsT, ccw: bool = False) -> Point2D: """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation @@ -286,4 +288,4 @@ def organize_polygon_rings( exteriors = holes # add as single exterior without any holes polys = [[ext] for ext in exteriors] - return polys \ No newline at end of file + return polys diff --git a/src/shapefile/helpers.py b/src/shapefile/helpers.py index aa283942..c66d15f2 100644 --- a/src/shapefile/helpers.py +++ b/src/shapefile/helpers.py @@ -1,9 +1,12 @@ +from __future__ import annotations import array import os from os import PathLike from struct import Struct -from typing import overload, TypeVar, Generic, Any +from typing import Any, Generic, TypeVar, overload + +from .types import T # Helpers @@ -34,4 +37,4 @@ class _Array(array.array, Generic[ARR_TYPE]): # type: ignore[type-arg] Used to unpack different shapefile header parts.""" def __repr__(self) -> str: - return str(self.tolist()) \ No newline at end of file + return str(self.tolist()) diff --git a/src/shapefile/py.typed b/src/shapefile/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/src/shapefile/reader.py b/src/shapefile/reader.py index f0afa4be..02be0c18 100644 --- a/src/shapefile/reader.py +++ b/src/shapefile/reader.py @@ -1,8 +1,43 @@ - +from __future__ import annotations + +import io +import os +import sys +import tempfile +import zipfile +from collections.abc import Container, Iterable, Iterator +from datetime import date +from os import PathLike +from struct import Struct, calcsize, unpack +from types import TracebackType +from typing import IO, Any, Union, cast from urllib.error import HTTPError from urllib.parse import urlparse, urlunparse from urllib.request import Request, urlopen +from .classes import ( + Field, + ShapeRecord, + ShapeRecords, + Shapes, + _Record, +) +from .constants import NODATA, SHAPETYPE_LOOKUP +from .exceptions import ShapefileException +from .geojson_types import GeoJSONFeatureCollectionWithBBox +from .helpers import _Array, fsdecode_if_pathlike, unpack_2_int32_be +from .shapes import SHAPE_CLASS_FROM_SHAPETYPE, Shape +from .types import ( + FIELD_TYPE_ALIASES, + BBox, + BinaryFileStreamT, + BinaryFileT, + FieldType, + ReadSeekableBinStream, + T, + ZBox, +) + class _NoShpSentinel: """For use as a default value for shp to preserve the diff --git a/src/shapefile/shapes.py b/src/shapefile/shapes.py index 5611ea54..61ba4b0a 100644 --- a/src/shapefile/shapes.py +++ b/src/shapefile/shapes.py @@ -1,3 +1,53 @@ +from __future__ import annotations + +import logging +from collections.abc import Iterable, Iterator, Sequence +from struct import error, pack, unpack +from typing import Final, TypedDict, Union, cast + +from .constants import ( + MULTIPATCH, + MULTIPOINT, + MULTIPOINTM, + MULTIPOINTZ, + NODATA, + NULL, + POINT, + POINTM, + POINTZ, + POLYGON, + POLYGONM, + POLYGONZ, + POLYLINE, + POLYLINEM, + POLYLINEZ, + SHAPETYPE_LOOKUP, + SHAPETYPENUM_LOOKUP, + VERBOSE, +) +from .exceptions import GeoJSON_Error, ShapefileException +from .geojson_types import ( + GEOJSON_TO_SHAPETYPE, + GeoJSONHomogeneousGeometryObject, +) +from .geometric_calculations import bbox_overlap, is_cw, organize_polygon_rings, rewind +from .helpers import _Array +from .types import ( + BBox, + MBox, + Point2D, + PointMT, + PointsT, + PointT, + PointZT, + ReadableBinStream, + ReadSeekableBinStream, + WriteableBinStream, + ZBox, +) + +logger = logging.getLogger(__name__) + class _NoShapeTypeSentinel: """For use as a default value for Shape.__init__ to @@ -41,7 +91,9 @@ class CanHaveBboxNoLinesKwargs(TypedDict, total=False): z: Sequence[float] | None mbox: MBox | None zbox: ZBox | None -class Shape(GeoJSONSerisalizableShape): + + +class Shape: def __init__( self, shapeType: int | _NoShapeTypeSentinel = _NO_SHAPE_TYPE_SENTINEL, @@ -213,8 +265,6 @@ def _mbox_from_ms(self) -> MBox: def _zbox_from_zs(self) -> ZBox: return min(self.z), max(self.z) - - @property def oid(self) -> int: """The index position of the shape in the original shapefile""" @@ -230,6 +280,189 @@ def __repr__(self) -> str: return f"Shape #{self.__oid}: {self.shapeTypeName}" return f"{class_name} #{self.__oid}" + @property + def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: + if self.shapeType in {POINT, POINTM, POINTZ}: + # point + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "Point", "coordinates": ()} + + return {"type": "Point", "coordinates": self.points[0]} + + if self.shapeType in {MULTIPOINT, MULTIPOINTM, MULTIPOINTZ}: + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "MultiPoint", "coordinates": []} + + # multipoint + return { + "type": "MultiPoint", + "coordinates": self.points, + } + + if self.shapeType in {POLYLINE, POLYLINEM, POLYLINEZ}: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "LineString", "coordinates": []} + + if len(self.parts) == 1: + # linestring + return { + "type": "LineString", + "coordinates": self.points, + } + + # multilinestring + ps = None + coordinates = [] + for part in self.parts: + if ps is None: + ps = part + continue + + coordinates.append(list(self.points[ps:part])) + ps = part + + # assert len(self.parts) > 1 + # from previous if len(self.parts) checks so part is defined + coordinates.append(list(self.points[part:])) + return {"type": "MultiLineString", "coordinates": coordinates} + + if self.shapeType in {POLYGON, POLYGONM, POLYGONZ}: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "Polygon", "coordinates": []} + + # get all polygon rings + rings = [] + for i, start in enumerate(self.parts): + # get indexes of start and end points of the ring + try: + end = self.parts[i + 1] + except IndexError: + end = len(self.points) + + # extract the points that make up the ring + ring = list(self.points[start:end]) + rings.append(ring) + + # organize rings into list of polygons, where each polygon is defined as list of rings. + # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). + polys = organize_polygon_rings(rings, self._errors) + + # if VERBOSE is True, issue detailed warning about any shape errors + # encountered during the Shapefile to GeoJSON conversion + if VERBOSE and self._errors: + header = f"Possible issue encountered when converting Shape #{self.oid} to GeoJSON: " + orphans = self._errors.get("polygon_orphaned_holes", None) + if orphans: + msg = ( + header + + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ +but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ +orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ +encoded as GeoJSON exterior rings instead of holes." + ) + logger.warning(msg) + only_holes = self._errors.get("polygon_only_holes", None) + if only_holes: + msg = ( + header + + "Shapefile format requires that polygons contain at least one exterior ring, \ +but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ +still included but were encoded as GeoJSON exterior rings instead of holes." + ) + logger.warning(msg) + + # return as geojson + if len(polys) == 1: + return {"type": "Polygon", "coordinates": polys[0]} + + return {"type": "MultiPolygon", "coordinates": polys} + + raise GeoJSON_Error( + f'Shape type "{SHAPETYPE_LOOKUP[self.shapeType]}" cannot be represented as GeoJSON.' + ) + + @classmethod + def _from_geojson(cls, geoj: GeoJSONHomogeneousGeometryObject) -> Shape: + # create empty shape + # set shapeType + geojType = geoj["type"] if geoj else "Null" + if geojType in GEOJSON_TO_SHAPETYPE: + shapeType = GEOJSON_TO_SHAPETYPE[geojType] + else: + raise GeoJSON_Error(f"Cannot create Shape from GeoJSON type '{geojType}'") + + coordinates = geoj["coordinates"] + + if coordinates == (): + raise GeoJSON_Error(f"Cannot create non-Null Shape from: {coordinates=}") + + points: PointsT + parts: list[int] + + # set points and parts + if geojType == "Point": + points = [cast(PointT, coordinates)] + parts = [0] + elif geojType in ("MultiPoint", "LineString"): + points = cast(PointsT, coordinates) + parts = [0] + elif geojType == "Polygon": + points = [] + parts = [] + index = 0 + for i, ext_or_hole in enumerate(cast(list[PointsT], coordinates)): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + elif geojType == "MultiLineString": + points = [] + parts = [] + index = 0 + for linestring in cast(list[PointsT], coordinates): + points.extend(linestring) + parts.append(index) + index += len(linestring) + elif geojType == "MultiPolygon": + points = [] + parts = [] + index = 0 + for polygon in cast(list[list[PointsT]], coordinates): + for i, ext_or_hole in enumerate(polygon): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + return cls(shapeType=shapeType, points=points, parts=parts) + # Need unused arguments to keep the same call signature for # different implementations of from_byte_stream and write_to_byte_stream diff --git a/src/shapefile/types.py b/src/shapefile/types.py index c23f5216..a84e2547 100644 --- a/src/shapefile/types.py +++ b/src/shapefile/types.py @@ -1,3 +1,18 @@ +from __future__ import annotations + +import io +from datetime import date +from os import PathLike +from typing import ( + IO, + Any, + Final, + Literal, + Optional, + Protocol, + TypeVar, + Union, +) ## Custom type variables @@ -80,7 +95,6 @@ class FieldType: FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c - RecordValueNotDate = Union[bool, int, float, str] # A Possible value in a Shapefile dbf record, i.e. L, N, M, F, C, or D types diff --git a/src/shapefile/writer.py b/src/shapefile/writer.py index 94f0dda4..a5180584 100644 --- a/src/shapefile/writer.py +++ b/src/shapefile/writer.py @@ -1,3 +1,64 @@ +from __future__ import annotations + +import io +import os +import time +from datetime import date +from os import PathLike +from struct import error, pack +from types import TracebackType +from typing import ( + Any, + Literal, + NoReturn, + TypeVar, + Union, + cast, + overload, +) + +from .classes import Field +from .constants import MISSING, NULL, SHAPETYPE_LOOKUP +from .exceptions import ShapefileException +from .geojson_types import GeoJSONHomogeneousGeometryObject, HasGeoInterface +from .helpers import fsdecode_if_pathlike +from .shapes import ( + SHAPE_CLASS_FROM_SHAPETYPE, + MultiPatch, + MultiPoint, + MultiPointM, + MultiPointZ, + NullShape, + Point, + PointM, + PointM_shapeTypes, + PointZ, + PointZ_shapeTypes, + Polygon, + PolygonM, + PolygonZ, + Polyline, + PolylineM, + PolylineZ, + Shape, + _CanHaveBBox_shapeTypes, + _HasM, + _HasM_shapeTypes, + _HasZ, + _HasZ_shapeTypes, +) +from .types import ( + BBox, + BinaryFileStreamT, + FieldTypeT, + MBox, + PointsT, + ReadWriteSeekableBinStream, + RecordValue, + WriteSeekableBinStream, + ZBox, +) + class Writer: """Provides write support for ESRI Shapefiles.""" @@ -721,4 +782,4 @@ def field( "Shapefile Writer reached maximum number of fields: 2046." ) field_ = Field.from_unchecked(name, field_type, size, decimal) - self.fields.append(field_) \ No newline at end of file + self.fields.append(field_)