diff --git a/datashader/core.py b/datashader/core.py index dd7a0b2f8..763736902 100644 --- a/datashader/core.py +++ b/datashader/core.py @@ -206,15 +206,19 @@ def points(self, source, x=None, y=None, agg=None, geometry=None): x_range = self.x_range if self.x_range is not None else (None, None) y_range = self.y_range if self.y_range is not None else (None, None) source = source.cx_partitions[slice(*x_range), slice(*y_range)] + glyph = MultiPointGeometry(geometry) elif spatialpandas and isinstance(source, spatialpandas.GeoDataFrame): - pass + glyph = MultiPointGeometry(geometry) + elif (geopandas_source := self._source_from_geopandas(source)) is not None: + source = geopandas_source + from datashader.glyphs.points import MultiPointGeoPandas + glyph = MultiPointGeoPandas(geometry) else: raise ValueError( - "source must be an instance of spatialpandas.GeoDataFrame or \n" - "spatialpandas.dask.DaskGeoDataFrame.\n" - " Received value of type {typ}".format(typ=type(source))) - - glyph = MultiPointGeometry(geometry) + "source must be an instance of spatialpandas.GeoDataFrame, " + "spatialpandas.dask.DaskGeoDataFrame, geopandas.GeoDataFrame, or " + "dask_geopandas.GeoDataFrame. Received objects of type {typ}".format( + typ=type(source))) return bypixel(source, self, glyph, agg) @@ -365,15 +369,20 @@ def line(self, source, x=None, y=None, agg=None, axis=0, geometry=None, x_range = self.x_range if self.x_range is not None else (None, None) y_range = self.y_range if self.y_range is not None else (None, None) source = source.cx_partitions[slice(*x_range), slice(*y_range)] + glyph = LineAxis1Geometry(geometry) elif spatialpandas and isinstance(source, spatialpandas.GeoDataFrame): - pass + glyph = LineAxis1Geometry(geometry) + elif (geopandas_source := self._source_from_geopandas(source)) is not None: + source = geopandas_source + from datashader.glyphs.line import LineAxis1GeoPandas + glyph = LineAxis1GeoPandas(geometry) else: raise ValueError( - "source must be an instance of spatialpandas.GeoDataFrame or \n" - "spatialpandas.dask.DaskGeoDataFrame.\n" - " Received value of type {typ}".format(typ=type(source))) + "source must be an instance of spatialpandas.GeoDataFrame, " + "spatialpandas.dask.DaskGeoDataFrame, geopandas.GeoDataFrame, or " + "dask_geopandas.GeoDataFrame. Received objects of type {typ}".format( + typ=type(source))) - glyph = LineAxis1Geometry(geometry) else: # Broadcast column specifications to handle cases where # x is a list and y is a string or vice versa diff --git a/datashader/glyphs/__init__.py b/datashader/glyphs/__init__.py index c1be92102..2ec5df5a4 100644 --- a/datashader/glyphs/__init__.py +++ b/datashader/glyphs/__init__.py @@ -8,6 +8,7 @@ LinesAxis1YConstant, LinesAxis1Ragged, LineAxis1Geometry, + LineAxis1GeoPandas, ) from .area import ( # noqa (API import) AreaToZeroAxis0, diff --git a/datashader/glyphs/line.py b/datashader/glyphs/line.py index 0ececb764..594a56cb7 100644 --- a/datashader/glyphs/line.py +++ b/datashader/glyphs/line.py @@ -568,6 +568,42 @@ def extend(aggs, df, vt, bounds, plot_start=True): return extend +class LineAxis1GeoPandas(_GeometryLike, _AntiAliasedLine): + # geopandas must be available for a GeoPandasLine to be created. + @property + def geom_dtypes(self): + from geopandas.array import GeometryDtype + return (GeometryDtype,) + + @memoize + def _internal_build_extend( + self, x_mapper, y_mapper, info, append, line_width, antialias_stage_2, + antialias_stage_2_funcs, + ): + expand_aggs_and_cols = self.expand_aggs_and_cols(append) + draw_segment, antialias_stage_2_funcs = _line_internal_build_extend( + x_mapper, y_mapper, append, line_width, antialias_stage_2, antialias_stage_2_funcs, + expand_aggs_and_cols, + ) + perform_extend_cpu = _build_extend_line_axis1_geopandas( + draw_segment, expand_aggs_and_cols, antialias_stage_2_funcs, + ) + geometry_name = self.geometry + + def extend(aggs, df, vt, bounds, plot_start=True): + sx, tx, sy, ty = vt + xmin, xmax, ymin, ymax = bounds + aggs_and_cols = aggs + info(df, aggs[0].shape[:2]) + geom_array = df[geometry_name].array + + perform_extend_cpu( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, + geom_array, antialias_stage_2, *aggs_and_cols + ) + + return extend + + def _build_map_onto_pixel_for_line(x_mapper, y_mapper, want_antialias=False): @ngjit def map_onto_pixel_snap(sx, tx, sy, ty, xmin, xmax, ymin, ymax, x, y): @@ -1764,3 +1800,171 @@ def extend_cpu_numba_antialias_2agg( return extend_cpu_antialias_2agg else: return extend_cpu + + +def _build_extend_line_axis1_geopandas(draw_segment, expand_aggs_and_cols, antialias_stage_2_funcs): + if antialias_stage_2_funcs is not None: + aa_stage_2_accumulate, aa_stage_2_clear, aa_stage_2_copy_back = antialias_stage_2_funcs + use_2_stage_agg = antialias_stage_2_funcs is not None + + # Lazy import shapely. Cannot get here if geopandas and shapely are not available. + import shapely + + def _process_geometry(geometry): + ragged = shapely.to_ragged_array(geometry) + geometry_type = ragged[0] + + if geometry_type not in ( + shapely.GeometryType.LINESTRING, shapely.GeometryType.MULTILINESTRING, + shapely.GeometryType.MULTIPOLYGON, shapely.GeometryType.POLYGON, + ): + raise ValueError( + "Canvas.line supports GeoPandas geometry types of LINESTRING, MULTILINESTRING, " + f"MULTIPOLYGON and POLYGON, not {repr(geometry_type)}") + + coords = ragged[1].ravel() + + # Use type to decide whether geometry represents closed line loops or open list strips. + # Skip the last point for closed geometries so as not to double count the first/last point. + if geometry_type == shapely.GeometryType.LINESTRING: + offsets = ragged[2][0] + outer_offsets = np.arange(len(offsets)) + closed_rings = False + elif geometry_type == shapely.GeometryType.MULTILINESTRING: + offsets, outer_offsets = ragged[2] + closed_rings = False + elif geometry_type == shapely.GeometryType.MULTIPOLYGON: + offsets, temp_offsets, outer_offsets = ragged[2] + outer_offsets = temp_offsets[outer_offsets] + closed_rings = True + else: # geometry_type == shapely.GeometryType.POLYGON: + offsets, outer_offsets = ragged[2] + closed_rings = True + + return coords, offsets, outer_offsets, closed_rings + + def extend_cpu( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, geometry, antialias_stage_2, *aggs_and_cols + ): + coords, offsets, outer_offsets, closed_rings = _process_geometry(geometry) + extend_cpu_numba( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, coords, offsets, outer_offsets, closed_rings, + antialias_stage_2, *aggs_and_cols) + + @ngjit + @expand_aggs_and_cols + def extend_cpu_numba( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, values, offsets, outer_offsets, + closed_rings, antialias_stage_2, *aggs_and_cols + ): + antialias = antialias_stage_2 is not None + buffer = np.empty(8) if antialias else None + n_multilines = len(outer_offsets) - 1 + for i in range(n_multilines): + start0 = outer_offsets[i] + stop0 = outer_offsets[i + 1] + + for j in range(start0, stop0): + start1 = offsets[j] + stop1 = offsets[j + 1] + + for k in range(2*start1, 2*stop1 - 2, 2): + x0 = values[k] + y0 = values[k + 1] + x1 = values[k + 2] + y1 = values[k + 3] + if not (np.isfinite(x0) and np.isfinite(y0) and + np.isfinite(x1) and np.isfinite(y1)): + continue + + segment_start = ( + (k == start1 and not closed_rings) or + (k > start1 and + (not np.isfinite(values[k - 2]) or not np.isfinite(values[k - 1]))) + ) + + segment_end = ( + (not closed_rings and k == stop1-4) or + (k < stop1-4 and + (not np.isfinite(values[k + 4]) or not np.isfinite(values[k + 5]))) + ) + + if segment_start or use_2_stage_agg: + xm = 0.0 + ym = 0.0 + elif k == start1 and closed_rings: + xm = values[stop1-4] + ym = values[stop1-3] + else: + xm = values[k-2] + ym = values[k-1] + + draw_segment(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, + segment_start, segment_end, x0, x1, y0, y1, + xm, ym, buffer, *aggs_and_cols) + + def extend_cpu_antialias_2agg( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, geometry, antialias_stage_2, *aggs_and_cols + ): + coords, offsets, outer_offsets, closed_rings = _process_geometry(geometry) + n_aggs = len(antialias_stage_2[0]) + aggs_and_accums = tuple((agg, agg.copy()) for agg in aggs_and_cols[:n_aggs]) + + extend_cpu_numba_antialias_2agg( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, coords, offsets, outer_offsets, closed_rings, + antialias_stage_2, aggs_and_accums, *aggs_and_cols) + + @ngjit + @expand_aggs_and_cols + def extend_cpu_numba_antialias_2agg( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, values, offsets, outer_offsets, + closed_rings, antialias_stage_2, aggs_and_accums, *aggs_and_cols + ): + antialias = antialias_stage_2 is not None + buffer = np.empty(8) if antialias else None + n_multilines = len(outer_offsets) - 1 + first_pass = True + for i in range(n_multilines): + start0 = outer_offsets[i] + stop0 = outer_offsets[i + 1] + + for j in range(start0, stop0): + start1 = offsets[j] + stop1 = offsets[j + 1] + + for k in range(2*start1, 2*stop1 - 2, 2): + x0 = values[k] + y0 = values[k + 1] + x1 = values[k + 2] + y1 = values[k + 3] + if not (np.isfinite(x0) and np.isfinite(y0) and + np.isfinite(x1) and np.isfinite(y1)): + continue + + segment_start = ( + (k == start1 and not closed_rings) or + (k > start1 and + (not np.isfinite(values[k - 2]) or not np.isfinite(values[k - 1]))) + ) + + segment_end = ( + (not closed_rings and k == stop1-4) or + (k < stop1-4 and + (not np.isfinite(values[k + 4]) or not np.isfinite(values[k + 5]))) + ) + + draw_segment(i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, + segment_start, segment_end, x0, x1, y0, y1, + #xm, ym, buffer, *aggs_and_cols) + 0.0, 0.0, buffer, *aggs_and_cols) + + aa_stage_2_accumulate(aggs_and_accums, first_pass) + first_pass = False + aa_stage_2_clear(aggs_and_accums) + + aa_stage_2_copy_back(aggs_and_accums) + + if use_2_stage_agg: + return extend_cpu_antialias_2agg + else: + return extend_cpu diff --git a/datashader/glyphs/points.py b/datashader/glyphs/points.py index 172394d0c..f918c673f 100644 --- a/datashader/glyphs/points.py +++ b/datashader/glyphs/points.py @@ -241,6 +241,86 @@ def extend(aggs, df, vt, bounds): return extend +class MultiPointGeoPandas(_GeometryLike): + # geopandas must be available if a GeoPandasPointGeometry object is created. + @property + def geom_dtypes(self): + from geopandas.array import GeometryDtype + return (GeometryDtype,) + + @memoize + def _build_extend( + self, x_mapper, y_mapper, info, append, _antialias_stage_2, _antialias_stage_2_funcs, + ): + # Lazy import shapely. Cannot get here if geopandas and shapely are not available. + import shapely + + geometry_name = self.geometry + + @ngjit + @self.expand_aggs_and_cols(append) + def _perform_extend_points( + i, j, sx, tx, sy, ty, xmin, xmax, ymin, ymax, values, *aggs_and_cols + ): + x = values[j] + y = values[j + 1] + # points outside bounds are dropped; remainder + # are mapped onto pixels + if (xmin <= x <= xmax) and (ymin <= y <= ymax): + xx = int(x_mapper(x) * sx + tx) + yy = int(y_mapper(y) * sy + ty) + xi, yi = (xx - 1 if x == xmax else xx, + yy - 1 if y == ymax else yy) + + append(i, xi, yi, *aggs_and_cols) + + def extend(aggs, df, vt, bounds): + aggs_and_cols = aggs + info(df, aggs[0].shape[:2]) + sx, tx, sy, ty = vt + xmin, xmax, ymin, ymax = bounds + geometry = df[geometry_name].array + + ragged = shapely.to_ragged_array(geometry) + geometry_type = ragged[0] + + if geometry_type not in (shapely.GeometryType.MULTIPOINT, shapely.GeometryType.POINT): + raise ValueError( + "Canvas.points supports GeoPandas geometry types of POINT and MULTIPOINT, " + f"not {repr(geometry_type)}") + + coords = ragged[1].ravel() # No offsets required if POINT not MULTIPOINT + if geometry_type == shapely.GeometryType.POINT: + extend_point_cpu(sx, tx, sy, ty, xmin, xmax, ymin, ymax, coords, *aggs_and_cols) + else: + offsets = ragged[2][0] + extend_multipoint_cpu( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, coords, offsets, *aggs_and_cols) + + @ngjit + @self.expand_aggs_and_cols(append) + def extend_multipoint_cpu( + sx, tx, sy, ty, xmin, xmax, ymin, ymax, values, offsets, *aggs_and_cols, + ): + for i in range(len(offsets) - 1): + start = offsets[i] + stop = offsets[i+1] + for j in range(start, stop): + _perform_extend_points( + i, 2*j, sx, tx, sy, ty, xmin, xmax, ymin, ymax, values, *aggs_and_cols, + ) + + @ngjit + @self.expand_aggs_and_cols(append) + def extend_point_cpu(sx, tx, sy, ty, xmin, xmax, ymin, ymax, values, *aggs_and_cols): + n = len(values) // 2 + for i in range(n): + _perform_extend_points( + i, 2*i, sx, tx, sy, ty, xmin, xmax, ymin, ymax, values, *aggs_and_cols, + ) + + return extend + + class MultiPointGeometry(_GeometryLike): # spatialpandas must be available if a MultiPointGeometry object is created. diff --git a/datashader/tests/test_geopandas.py b/datashader/tests/test_geopandas.py new file mode 100644 index 000000000..6e3401fe5 --- /dev/null +++ b/datashader/tests/test_geopandas.py @@ -0,0 +1,320 @@ +# Testing GeoPandas and SpatialPandas + +import dask.dataframe as dd +import datashader as ds +from datashader.tests.test_pandas import assert_eq_ndarray +import numpy as np +from numpy import nan +import pytest + + +try: + import dask_geopandas +except ImportError: + dask_geopandas = None + +try: + import geodatasets +except ImportError: + geodatasets = None + +try: + import geopandas +except ImportError: + geopandas = None + +try: + # Import to register extension arrays + import spatialpandas # noqa (register EAs) +except ImportError: + spatialpandas = None + + +nybb_lines_sol = np.array([ + [ 0., 0., 0., nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], + [ 0., nan, 0., 0., 0., nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], + [ 0., 0., nan, nan, 0., 0., nan, nan, nan, nan, nan, 1., 1., 1., nan, nan, nan, nan, nan, nan], + [nan, 0., nan, nan, nan, 0., 0., 0., 2., 2., 2., 2., 2., 2., 1., 1., nan, nan, nan, nan], + [nan, 0., 0., nan, nan, nan, 0., 0., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., nan], + [nan, 0., 0., nan, nan, nan, 0., 2., 2., nan, nan, 2., 2., 2., 2., 2., 1., 1., 1., nan], + [nan, 0., 0., 0., 0., 0., 0., 2., nan, nan, nan, nan, 2., 2., 2., 2., 1., 1., 1., nan], + [nan, nan, 0., 0., nan, 0., 0., 2., 2., 2., nan, nan, nan, 2., 2., 1., 1., nan, 1., 1.], + [nan, nan, nan, nan, nan, nan, nan, nan, 3., 2., nan, nan, 2., 2., 2., nan, nan, nan, 1., 1.], + [nan, nan, nan, nan, nan, nan, nan, 3., 3., 3., 2., nan, 2., 2., 2., nan, nan, nan, nan, 1.], + [nan, nan, nan, nan, nan, nan, nan, nan, 3., 3., 3., 2., 2., nan, nan, nan, nan, nan, 1., 1.], + [nan, nan, nan, nan, nan, nan, nan, nan, 3., nan, 3., 2., nan, nan, nan, nan, nan, nan, nan, 1.], + [nan, nan, nan, nan, nan, nan, nan, nan, 3., 3., 3., 3., nan, nan, 1., nan, nan, 1., 1., 1.], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., nan, 3., 3., 4., 1., 1., 1., 1., 1., nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., 3., 4., 4., 4., 4., 4., 4., 1., nan, nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., 4., nan, 4., 4., 4., 4., nan, nan, nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., 4., nan, nan, 4., 4., 4., 4., nan, nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 4., 4., nan, nan, 4., 4., 4., nan, nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 4., 4., nan, nan, 4., 4., 4., nan, nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 4., 4., 4., nan, nan, nan, nan, nan], +]) + +nybb_points_sol = np.array([ + [2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [2, 3, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 3, 7, 6, 2, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 2, 2, 4, 3, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 2, 4, 2, 8, 3, 0, 1, 2, 2, 0, 3, 1, 0, 0, 0, 2, 1], + [0, 0, 1, 0, 5, 2, 3, 0, 0, 2, 4, 2, 3, 1, 0, 0, 0, 0, 0, 0], + [0, 0, 2, 2, 3, 8, 3, 0, 1, 5, 2, 7, 5, 0, 3, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 1, 0, 1, 5, 5, 3, 4, 3, 3, 1, 2, 0, 2, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 2, 4, 3, 3, 2, 3, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 1, 2, 5, 4, 1, 3, 1, 0, 4], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 5, 2, 2, 5, 2, 3, 1, 1, 2, 4], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 6, 1, 2, 2, 3, 2, 1, 2, 1, 2], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 8, 7, 1, 2, 2, 1, 2, 1, 1, 2], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 9, 6, 3, 3, 1, 4, 1, 3, 5, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 1, 0, 0, 2, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 4, 7, 4, 1, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 6, 8, 9, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 4, 4, 5, 5, 5, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 2, 2, 5, 7, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 5, 0, 0, 0], +], dtype=np.uint32) + + +nybb_polygons_sol = np.array([ + [ 0., 0., nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], + [ 0., 0., 0., nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], + [nan, 0., 0., 0., 0., nan, nan, nan, nan, nan, nan, 1., nan, nan, nan, nan, nan, nan, nan, nan], + [nan, nan, 0., 0., 0., 0., nan, 0., nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], + [nan, nan, 0., 0., 0., 0., 0., 0., nan, 2., 2., 2., 2., 2., 2., 1., 1., nan, nan, nan], + [nan, nan, 0., 0., 0., 0., 0., nan, 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., nan, nan], + [nan, nan, 0., 0., 0., 0., 0., nan, 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., nan, nan], + [nan, nan, nan, 0., nan, nan, nan, nan, 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., nan], + [nan, nan, nan, nan, nan, nan, nan, 3., 3., 3., 2., 2., 2., 1., 1., 1., 1., 1., 1., nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., 3., 2., 1., 1., 1., 1., 1., 1., 1., nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., 3., 1., 1., 1., 1., 1., 1., 1., 1., 1.], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., 3., 1., 1., 1., 1., 1., 1., 1., 1., 1.], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., 3., 1., 1., nan, 1., 1., 1., nan, nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., 3., 4., 4., nan, nan, nan, nan, nan, nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., 4., 4., 4., 4., nan, nan, nan, nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., 4., 4., 4., 4., 4., nan, nan, nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 4., 4., 4., 4., 4., 4., nan, nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 3., 4., 4., 4., 4., 4., 4., nan, nan], + [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 4., 4., 4., nan, nan, nan, nan, nan], +]) + + +@pytest.mark.skipif(not geodatasets, reason="geodatasets not installed") +@pytest.mark.skipif(not geopandas, reason="geopandas not installed") +@pytest.mark.parametrize("geom_type, explode, use_boundary", + [ + ("multipolygon", False, False), + ("polygon", True, False), + ("multilinestring", False, True), + ("linestring", True, True), + ], +) +def test_lines_geopandas(geom_type, explode, use_boundary): + df = geopandas.read_file(geodatasets.get_path("nybb")) + df["col"] = np.arange(len(df)) # Extra column for aggregation. + geometry = "boundary" if use_boundary else "geometry" + + if explode: + df = df.explode(index_parts=False) # Multipolygon -> polygon. + if use_boundary: + df["boundary"] = df.boundary + unique_geom_type = df[geometry].geom_type.unique() + assert len(unique_geom_type) == 1 and unique_geom_type[0].lower() == geom_type + + canvas = ds.Canvas(plot_height=20, plot_width=20) + agg = canvas.line(source=df, geometry=geometry, agg=ds.max("col")) + assert_eq_ndarray(agg.data, nybb_lines_sol) + + +@pytest.mark.skipif(not geodatasets, reason="geodatasets not installed") +@pytest.mark.skipif(not dask_geopandas, reason="dask_geopandas not installed") +@pytest.mark.skipif(not geopandas, reason="geopandas not installed") +@pytest.mark.parametrize('npartitions', [1, 2, 5]) +@pytest.mark.parametrize("geom_type, explode, use_boundary", + [ + ("multipolygon", False, False), + ("polygon", True, False), + ("multilinestring", False, True), + ("linestring", True, True), + ], +) +def test_lines_dask_geopandas(geom_type, explode, use_boundary, npartitions): + df = geopandas.read_file(geodatasets.get_path("nybb")) + df["col"] = np.arange(len(df)) # Extra column for aggregation. + geometry = "boundary" if use_boundary else "geometry" + + if explode: + df = df.explode(index_parts=False) # Multipolygon -> polygon. + if use_boundary: + df["boundary"] = df.boundary + unique_geom_type = df[geometry].geom_type.unique() + assert len(unique_geom_type) == 1 and unique_geom_type[0].lower() == geom_type + + df = dd.from_pandas(df, npartitions=npartitions) + assert df.npartitions == npartitions + df.calculate_spatial_partitions() + + canvas = ds.Canvas(plot_height=20, plot_width=20) + agg = canvas.line(source=df, geometry=geometry, agg=ds.max("col")) + assert_eq_ndarray(agg.data, nybb_lines_sol) + + +@pytest.mark.skipif(not geodatasets, reason="geodatasets not installed") +@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed") +@pytest.mark.parametrize('npartitions', [1, 2, 5]) +@pytest.mark.parametrize("geom_type, explode, use_boundary", + [ + ("multipolygon", False, False), + ("polygon", True, False), + ("multilinestring", False, True), + ("linestring", True, True), + ], +) +def test_lines_spatialpandas(geom_type, explode, use_boundary, npartitions): + df = geopandas.read_file(geodatasets.get_path("nybb")) + df["col"] = np.arange(len(df)) # Extra column for aggregation. + geometry = "boundary" if use_boundary else "geometry" + + if explode: + df = df.explode(index_parts=False) # Multipolygon -> polygon. + if use_boundary: + df["boundary"] = df.boundary + unique_geom_type = df[geometry].geom_type.unique() + assert len(unique_geom_type) == 1 and unique_geom_type[0].lower() == geom_type + + df = spatialpandas.GeoDataFrame(df) + if npartitions > 0: + df = dd.from_pandas(df, npartitions=npartitions) + assert df.npartitions == npartitions + + canvas = ds.Canvas(plot_height=20, plot_width=20) + agg = canvas.line(source=df, geometry=geometry, agg=ds.max("col")) + assert_eq_ndarray(agg.data, nybb_lines_sol) + + +@pytest.mark.skipif(not geodatasets, reason="geodatasets not installed") +@pytest.mark.skipif(not geopandas, reason="geopandas not installed") +@pytest.mark.parametrize("geom_type", ["multipoint", "point"]) +def test_points_geopandas(geom_type): + df = geopandas.read_file(geodatasets.get_path("nybb")) + + df["geometry"] = df["geometry"].sample_points(100, seed=93814) # multipoint + if geom_type == "point": + df = df.explode(index_parts=False) # Multipoint -> point. + unique_geom_type = df["geometry"].geom_type.unique() + assert len(unique_geom_type) == 1 and unique_geom_type[0].lower() == geom_type + + canvas = ds.Canvas(plot_height=20, plot_width=20) + agg = canvas.points(source=df, geometry="geometry", agg=ds.count()) + assert_eq_ndarray(agg.data, nybb_points_sol) + + +@pytest.mark.skipif(not geodatasets, reason="geodatasets not installed") +@pytest.mark.skipif(not geopandas, reason="geopandas not installed") +@pytest.mark.parametrize('npartitions', [1, 2, 5]) +@pytest.mark.parametrize("geom_type", ["multipoint", "point"]) +def test_points_dask_geopandas(geom_type, npartitions): + df = geopandas.read_file(geodatasets.get_path("nybb")) + + df["geometry"] = df["geometry"].sample_points(100, seed=93814) # multipoint + if geom_type == "point": + df = df.explode(index_parts=False) # Multipoint -> point. + unique_geom_type = df["geometry"].geom_type.unique() + assert len(unique_geom_type) == 1 and unique_geom_type[0].lower() == geom_type + + df = dd.from_pandas(df, npartitions=npartitions) + assert df.npartitions == npartitions + df.calculate_spatial_partitions() + + canvas = ds.Canvas(plot_height=20, plot_width=20) + agg = canvas.points(source=df, geometry="geometry", agg=ds.count()) + assert_eq_ndarray(agg.data, nybb_points_sol) + + +@pytest.mark.skipif(not geodatasets, reason="geodatasets not installed") +@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed") +@pytest.mark.parametrize('npartitions', [0, 1, 2, 5]) +@pytest.mark.parametrize("geom_type", ["multipoint", "point"]) +def test_points_spatialpandas(geom_type, npartitions): + df = geopandas.read_file(geodatasets.get_path("nybb")) + + df["geometry"] = df["geometry"].sample_points(100, seed=93814) # multipoint + if geom_type == "point": + df = df.explode(index_parts=False) # Multipoint -> point. + unique_geom_type = df["geometry"].geom_type.unique() + assert len(unique_geom_type) == 1 and unique_geom_type[0].lower() == geom_type + + df = spatialpandas.GeoDataFrame(df) + if npartitions > 0: + df = dd.from_pandas(df, npartitions=npartitions) + assert df.npartitions == npartitions + + canvas = ds.Canvas(plot_height=20, plot_width=20) + agg = canvas.points(source=df, geometry="geometry", agg=ds.count()) + assert_eq_ndarray(agg.data, nybb_points_sol) + + +@pytest.mark.skipif(not geodatasets, reason="geodatasets not installed") +@pytest.mark.skipif(not geopandas, reason="geopandas not installed") +@pytest.mark.parametrize("geom_type", ["multipolygon", "polygon"]) +def test_polygons_geopandas(geom_type): + df = geopandas.read_file(geodatasets.get_path("nybb")) + df["col"] = np.arange(len(df)) # Extra column for aggregation. + + if geom_type == "polygon": + df = df.explode(index_parts=False) # Multipolygon -> polygon. + unique_geom_type = df["geometry"].geom_type.unique() + assert len(unique_geom_type) == 1 and unique_geom_type[0].lower() == geom_type + + canvas = ds.Canvas(plot_height=20, plot_width=20) + agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col")) + assert_eq_ndarray(agg.data, nybb_polygons_sol) + + +@pytest.mark.skipif(not geodatasets, reason="geodatasets not installed") +@pytest.mark.skipif(not dask_geopandas, reason="dask_geopandas not installed") +@pytest.mark.skipif(not geopandas, reason="geopandas not installed") +@pytest.mark.parametrize('npartitions', [1, 2, 5]) +@pytest.mark.parametrize("geom_type", ["multipolygon", "polygon"]) +def test_polygons_dask_geopandas(geom_type, npartitions): + df = geopandas.read_file(geodatasets.get_path("nybb")) + df["col"] = np.arange(len(df)) + + if geom_type == "polygon": + df = df.explode(index_parts=False) # Multipolygon -> polygon. + unique_geom_type = df["geometry"].geom_type.unique() + assert len(unique_geom_type) == 1 and unique_geom_type[0].lower() == geom_type + + df = dd.from_pandas(df, npartitions=npartitions) + assert df.npartitions == npartitions + df.calculate_spatial_partitions() + + canvas = ds.Canvas(plot_height=20, plot_width=20) + agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col")) + assert_eq_ndarray(agg.data, nybb_polygons_sol) + + +@pytest.mark.skipif(not geodatasets, reason="geodatasets not installed") +@pytest.mark.skipif(not geopandas, reason="geopandas not installed") +@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed") +@pytest.mark.parametrize('npartitions', [0, 1, 2, 5]) +@pytest.mark.parametrize("geom_type", ["multipolygon", "polygon"]) +def test_polygons_spatialpandas(geom_type, npartitions): + df = geopandas.read_file(geodatasets.get_path("nybb")) + df["col"] = np.arange(len(df)) + + if geom_type == "polygon": + df = df.explode(index_parts=False) # Multipolygon -> polygon. + unique_geom_type = df["geometry"].geom_type.unique() + assert len(unique_geom_type) == 1 and unique_geom_type[0].lower() == geom_type + + df = spatialpandas.GeoDataFrame(df) + if npartitions > 0: + df = dd.from_pandas(df, npartitions=npartitions) + assert df.npartitions == npartitions + + canvas = ds.Canvas(plot_height=20, plot_width=20) + agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col")) + assert_eq_ndarray(agg.data, nybb_polygons_sol) diff --git a/datashader/tests/test_polygons.py b/datashader/tests/test_polygons.py index 38ee96798..634938d6a 100644 --- a/datashader/tests/test_polygons.py +++ b/datashader/tests/test_polygons.py @@ -1,7 +1,6 @@ import pytest import pandas as pd import numpy as np -from numpy import nan import xarray as xr import datashader as ds from datashader.tests.test_pandas import assert_eq_ndarray, assert_eq_xr @@ -17,18 +16,6 @@ GeoDataFrame = None MultiPolygonArray = None -try: - from geodatasets import get_path - import geopandas -except ImportError: - get_path = None - geopandas = None - -try: - import dask_geopandas -except ImportError: - dask_geopandas = None - def dask_GeoDataFrame(*args, **kwargs): return dd.from_pandas(GeoDataFrame(*args, **kwargs), npartitions=3) @@ -332,61 +319,3 @@ def test_spatial_index_not_dropped(): assert df2.columns == ['some_geom'] assert df2.some_geom.array._sindex == df.some_geom.array._sindex - - -natural_earth_sol = np.array([ - [nan, 7, 7, 7, 7, 7, 0, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, nan], - [nan, nan, nan, nan, 5, nan, 6, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], - [nan, nan, nan, nan, nan, nan, 9, nan, nan, nan, nan, nan, nan, 10, nan, nan, nan, nan, 11, 12], - [nan, nan, nan, nan, nan, nan, 95, nan, nan, nan, nan, 112, nan, nan, nan, nan, 21, 21, 21, 13], - [ 17, nan, nan, nan, nan, nan, 95, 95, nan, nan, nan, 112, 20, nan, nan, nan, 31, 32, 34, 22], - [nan, nan, nan, nan, nan, nan, 95, nan, nan, 112, 112, 112, 112, nan, 44, 41, 50, 43, 37, nan], - [nan, 60, nan, nan, 95, 65, 54, nan, nan, 112, 112, 112, 112, nan, 112, 112, 63, nan, nan, nan], - [nan, nan, nan, 95, 95, 95, 74, nan, nan, nan, 72, 68, 112, 112, 112, 112, 112, 71, 73, nan], - [ 87, 82, 78, 95, 95, 88, 95, nan, nan, 80, 83, 112, 112, 112, 112, 112, 112, 112, nan, nan], - [ 94, nan, nan, 116, 118, 125, 125, 126, 126, nan, nan, 121, 122, 109, nan, 123, nan, 101, 106, 93], -]) - - -@pytest.mark.skipif(not geopandas, reason="geopandas not installed") -def test_natural_earth_geopandas(): - df = geopandas.read_file(get_path("naturalearth.land")) - df["col"] = np.arange(len(df)) - - canvas = ds.Canvas(plot_height=10, plot_width=20) - agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col")) - - assert_eq_ndarray(agg.data, natural_earth_sol) - - -@pytest.mark.skipif(not geopandas, reason="geopandas not installed") -@pytest.mark.skipif(not dask_geopandas, reason="dask_geopandas not installed") -@pytest.mark.parametrize('npartitions', [1, 2, 5]) -def test_natural_earth_dask_geopandas(npartitions): - df = geopandas.read_file(get_path("naturalearth.land")) - df["col"] = np.arange(len(df)) - df = dd.from_pandas(df, npartitions=npartitions) - assert df.npartitions == npartitions - df.calculate_spatial_partitions() - - canvas = ds.Canvas(plot_height=10, plot_width=20) - agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col")) - - assert_eq_ndarray(agg.data, natural_earth_sol) - - -@pytest.mark.skipif(not geopandas, reason="geopandas not installed") -@pytest.mark.skipif(not spatialpandas, reason="spatialpandas not installed") -@pytest.mark.parametrize('npartitions', [0, 1, 2, 5]) -def test_natural_earth_spatialpandas(npartitions): - df = geopandas.read_file(get_path("naturalearth.land")) - df["col"] = np.arange(len(df)) - df = spatialpandas.GeoDataFrame(df) - if npartitions > 0: - df = dd.from_pandas(df, npartitions=npartitions) - assert df.npartitions == npartitions - - canvas = ds.Canvas(plot_height=10, plot_width=20) - agg = canvas.polygons(source=df, geometry="geometry", agg=ds.max("col")) - - assert_eq_ndarray(agg.data, natural_earth_sol)