diff --git a/pygmt/src/binstats.py b/pygmt/src/binstats.py index 18711451cb0..d671082952c 100644 --- a/pygmt/src/binstats.py +++ b/pygmt/src/binstats.py @@ -2,15 +2,17 @@ binstats - Bin spatial data and determine statistics per bin. """ +from typing import Literal + import xarray as xr from pygmt._typing import PathLike, TableLike +from pygmt.alias import Alias, AliasSystem from pygmt.clib import Session from pygmt.helpers import build_arg_list, fmt_docstring, kwargs_to_strings, use_alias @fmt_docstring @use_alias( - C="statistic", E="empty", I="spacing", N="normalize", @@ -26,7 +28,28 @@ ) @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma") def binstats( - data: PathLike | TableLike, outgrid: PathLike | None = None, **kwargs + data: PathLike | TableLike, + outgrid: PathLike | None = None, + statistic: Literal[ + "mean", + "mad", + "full", + "interquartile", + "min", + "minpos", + "median", + "number", + "lms", + "mode", + "quantile", + "rms", + "stddev", + "max", + "maxneg", + "sum", + ] = "number", + quantile_value: float = 50, + **kwargs, ) -> xr.DataArray | None: r""" Bin spatial data and determine statistics per bin. @@ -42,35 +65,36 @@ def binstats( Full GMT docs at :gmt-docs:`gmtbinstats.html`. {aliases} + - C=statistic Parameters ---------- data A file name of an ASCII data table or a 2-D {table-classes}. {outgrid} - statistic : str - **a**\|\ **d**\|\ **g**\|\ **i**\|\ **l**\|\ **L**\|\ **m**\|\ **n**\ - \|\ **o**\|\ **p**\|\ **q**\ [*quant*]\|\ **r**\|\ **s**\|\ **u**\ - \|\ **U**\|\ **z**. - Choose the statistic that will be computed per node based on the - points that are within *radius* distance of the node. Select one of: + statistic + Choose the statistic that will be computed per node based on the points that are + within *radius* distance of the node. Select one of: - - **a**: mean (average) - - **d**: median absolute deviation (MAD) - - **g**: full (max-min) range - - **i**: 25-75% interquartile range - - **l**: minimum (low) - - **L**: minimum of positive values only - - **m**: median - - **n**: number of values - - **o**: LMS scale - - **p**: mode (maximum likelihood) - - **q**: selected quantile (append desired quantile in 0-100% range [50]) - - **r**: root mean square (RMS) - - **s**: standard deviation - - **u**: maximum (upper) - - **U**: maximum of negative values only - - **z**: sum + - ``"mean"``: Mean (i.e., average). + - ``"mad"``: Median absolute deviation (MAD). + - ``"full"``: The full (max-min) range. + - ``"interquartile"``: The 25-75% interquartile range. + - ``"min"``: Minimum (lowest value). + - ``"minpos"``: Minimum of positive values only. + - ``"median"``: Median value. + - ``"number"``: The number of values per bin. + - ``"lms"``: Least median square (LMS) scale. + - ``"mode"``: Mode (maximum likelihood estimate). + - ``"quantile"``: Selected quantile. The quantile value is specified by the + ``quantile_value`` parameter and is in the 0-100% range. + - ``"rms"``: Root mean square (RMS). + - ``"stddev"``: Standard deviation. + - ``"max"``: Maximum (highest value). + - ``"maxneg"``: Maximum of negative values only. + - ``"sum"``: The sum of the values. + quantile_value + The quantile value if ``statistic="quantile"``. empty : float Set the value assigned to empty nodes [Default is NaN]. normalize : bool @@ -104,13 +128,40 @@ def binstats( - ``None`` if ``outgrid`` is set (grid output will be stored in the file set by ``outgrid``) """ + aliasdict = AliasSystem( + C=Alias( + statistic, + name="statistic", + mapping={ + "mean": "a", + "mad": "d", + "full": "g", + "interquartile": "i", + "min": "l", + "minpos": "L", + "median": "m", + "number": "n", + "lms": "o", + "mode": "p", + "quantile": "q", + "rms": "r", + "stddev": "s", + "max": "u", + "maxneg": "U", + "sum": "z", + }, + ), + ).merge(kwargs) + if statistic == "quantile": + aliasdict["C"] += f"{quantile_value}" + with Session() as lib: with ( lib.virtualfile_in(check_kind="vector", data=data) as vintbl, lib.virtualfile_out(kind="grid", fname=outgrid) as voutgrd, ): - kwargs["G"] = voutgrd + aliasdict["G"] = voutgrd lib.call_module( - module="binstats", args=build_arg_list(kwargs, infile=vintbl) + module="binstats", args=build_arg_list(aliasdict, infile=vintbl) ) return lib.virtualfile_to_raster(vfname=voutgrd, outgrid=outgrid) diff --git a/pygmt/tests/test_binstats.py b/pygmt/tests/test_binstats.py index 6b77c82f861..9d633aebf3e 100644 --- a/pygmt/tests/test_binstats.py +++ b/pygmt/tests/test_binstats.py @@ -20,7 +20,7 @@ def test_binstats_outgrid(): data="@capitals.gmt", outgrid=tmpfile.name, spacing=5, - statistic="z", + statistic="sum", search_radius="1000k", aspatial="2=population", region="g", @@ -37,7 +37,7 @@ def test_binstats_no_outgrid(): temp_grid = binstats( data="@capitals.gmt", spacing=5, - statistic="z", + statistic="sum", search_radius="1000k", aspatial="2=population", region="g", @@ -49,3 +49,25 @@ def test_binstats_no_outgrid(): npt.assert_allclose(temp_grid.min(), 53) npt.assert_allclose(temp_grid.median(), 1232714.5) npt.assert_allclose(temp_grid.mean(), 4227489) + + +def test_binstats_quantile(): + """ + Test binstats quantile statistic functionality. + """ + temp_grid = binstats( + data="@capitals.gmt", + spacing=5, + statistic="quantile", + quantile_value=75, + search_radius="1000k", + aspatial="2=population", + region="g", + ) + assert temp_grid.dims == ("y", "x") + assert temp_grid.gmt.gtype is GridType.CARTESIAN + assert temp_grid.gmt.registration is GridRegistration.GRIDLINE + npt.assert_allclose(temp_grid.max(), 15047685) + npt.assert_allclose(temp_grid.min(), 53) + npt.assert_allclose(temp_grid.median(), 543664.5) + npt.assert_allclose(temp_grid.mean(), 1661363.6)