From 85aaa63f4f4aee571cf82b55ef9141c09f584ac6 Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Thu, 6 Feb 2020 18:08:04 +0000 Subject: [PATCH 1/3] There seems to be a bug in grid_to_table Was using coords but thats an unordered dict and not necessarily the order of dimensions for the dataset. --- verde/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verde/utils.py b/verde/utils.py index 42ee7359e..a14644421 100644 --- a/verde/utils.py +++ b/verde/utils.py @@ -267,7 +267,7 @@ def grid_to_table(grid): [20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39] """ - coordinate_names = [*grid.coords.keys()] + coordinate_names = list(grid.dims.keys()) coord_north = grid.coords[coordinate_names[0]].values coord_east = grid.coords[coordinate_names[1]].values coordinates = [i.ravel() for i in np.meshgrid(coord_east, coord_north)] From 8ce1f97b9241c2ff6f0376c6dbbfbcfa06ed22e4 Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Thu, 20 Feb 2020 12:22:16 +0000 Subject: [PATCH 2/3] Seem to have fixed the bug to need some more tests The dims in the Dataset aren't ordered. Have to get them from the DataArrays themselves. Simplified some of the code for the rest of the function as well. Tests pass but need to check with different data arrays. --- verde/utils.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/verde/utils.py b/verde/utils.py index a14644421..bb24c7ac9 100644 --- a/verde/utils.py +++ b/verde/utils.py @@ -237,6 +237,11 @@ def grid_to_table(grid): ... coords=(np.arange(4), np.arange(5, 10)), ... dims=['northing', 'easting'] ... ) + >>> print(temperature.values) + [[ 0 1 2 3 4] + [ 5 6 7 8 9] + [10 11 12 13 14] + [15 16 17 18 19]] >>> grid = xr.Dataset({"temperature": temperature}) >>> table = grid_to_table(grid) >>> list(sorted(table.columns)) @@ -267,23 +272,17 @@ def grid_to_table(grid): [20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39] """ - coordinate_names = list(grid.dims.keys()) - coord_north = grid.coords[coordinate_names[0]].values - coord_east = grid.coords[coordinate_names[1]].values - coordinates = [i.ravel() for i in np.meshgrid(coord_east, coord_north)] - coord_dict = { - coordinate_names[0]: coordinates[1], - coordinate_names[1]: coordinates[0], - } - variable_name = [*grid.data_vars.keys()] - variable_data = grid.to_array().values - variable_arrays = variable_data.reshape( - len(variable_name), int(len(variable_data.ravel()) / len(variable_name)) - ) - var_dict = dict(zip(variable_name, variable_arrays)) - coord_dict.update(var_dict) - data = pd.DataFrame(coord_dict) - return data + data_names = list(grid.data_vars.keys()) + data_arrays = [grid[name].values.ravel() for name in data_names] + coordinate_names = list(grid[data_names[0]].dims) + north = grid.coords[coordinate_names[0]].values + east = grid.coords[coordinate_names[1]].values + # Need to flip the coordinates because the names are in northing and + # easting order + coordinates = [i.ravel() for i in np.meshgrid(east, north)][::-1] + data_dict = dict(zip(coordinate_names, coordinates)) + data_dict.update(dict(zip(data_names, data_arrays))) + return pd.DataFrame(data_dict) def kdtree(coordinates, use_pykdtree=True, **kwargs): From 1c6c559016a047fde7c2201bca9b13b61e123ae0 Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Wed, 26 Feb 2020 12:24:54 +0000 Subject: [PATCH 3/3] Add test that would have caught the bug --- verde/tests/test_utils.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/verde/tests/test_utils.py b/verde/tests/test_utils.py index 0f8f8722e..0d6119b99 100644 --- a/verde/tests/test_utils.py +++ b/verde/tests/test_utils.py @@ -5,11 +5,12 @@ import numpy as np import numpy.testing as npt +import xarray as xr from scipy.spatial import cKDTree # pylint: disable=no-name-in-module import pytest from ..coordinates import grid_coordinates -from ..utils import parse_engine, dummy_jit, kdtree +from ..utils import parse_engine, dummy_jit, kdtree, grid_to_table from .. import utils @@ -52,3 +53,25 @@ def test_kdtree(): npt.assert_allclose(dist, 0.1) if not use_pykdtree: assert isinstance(tree, cKDTree) + + +def test_grid_to_table_order(): + "Check that coordinates are in the right order when converting to tables" + lon, lat = grid_coordinates(region=(1, 10, -10, -1), shape=(3, 4)) + data = lon ** 2 + # If the DataArray is created with coords in an order that doesn't match + # the dims (which is valid), we were getting it wrong because we were + # relying on the order of the coords instead of dims. This test would have + # caught that bug. + grid = xr.DataArray( + data=data, + coords={"longitude": lon[0, :], "latitude": lat[:, 0]}, + dims=("latitude", "longitude"), + ).to_dataset(name="field") + table = grid_to_table(grid) + true_lat = [-10, -10, -10, -10, -5.5, -5.5, -5.5, -5.5, -1, -1, -1, -1] + true_lon = [1, 4, 7, 10, 1, 4, 7, 10, 1, 4, 7, 10] + true_field = [1, 16, 49, 100, 1, 16, 49, 100, 1, 16, 49, 100] + npt.assert_allclose(true_lat, table.latitude) + npt.assert_allclose(true_lon, table.longitude) + npt.assert_allclose(true_field, table.field)