Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
471 changes: 471 additions & 0 deletions Run_the_test.ipynb

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def test_get_pixel_overlaps_gdf_wpreexisting_index(pix_agg=pix_agg):
# the pix_agg variable that this whole section has used. Doesn't really
# matter, since this is testing an index error that would've
# happened during aggregate() above.
assert np.allclose([v for v in agg.agg.test.values],2.1666,rtol=1e-4)
assert np.allclose([v for v in agg.agg.test_weighted_mean.values],2.1666,rtol=1e-4)


##### aggregate() tests #####
Expand All @@ -347,7 +347,7 @@ def test_aggregate_basic(ds=ds):
# it's actually 1.499981, whereas multiplying out
# np.sum(agg.agg.rel_area[0]*np.array([0,1,2,3]))gives 1.499963...
# Possibly worth examining more closely later
assert np.allclose([v for v in agg.agg.test.values],1.4999,rtol=1e-4)
assert np.allclose([v for v in agg.agg.test_weighted_mean.values],1.4999,rtol=1e-4)

def test_aggregate_basic_wdotproduct(ds=ds):
# Create multiple polygons, to double check, since dot product
Expand Down Expand Up @@ -400,7 +400,7 @@ def test_aggregate_with_weights(ds=ds):

# Since the "test" for the input ds has [0,2] for the two
# equatorial pixels, the average should just be 1.0
assert np.allclose([v for v in agg.agg.test.values],1.0)
assert np.allclose([v for v in agg.agg.test_weighted_mean.values],1.0)



Expand Down Expand Up @@ -432,7 +432,7 @@ def test_aggregate_with_mismatched_grid():
agg = aggregate(ds,wm)

# On change in rtol, see note in test_aggregate_basic
assert np.allclose([v for v in agg.agg.test.values],1.4999,rtol=1e-4)
assert np.allclose([v for v in agg.agg.test_weighted_mean.values],1.4999,rtol=1e-4)


# Should probably test multiple polygons just to be sure...
Expand Down
4 changes: 2 additions & 2 deletions tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_to_dataset(agg=agg):

# Build reference output dataset
ds_ref = xr.Dataset({'name':(['poly_idx'],np.array(['test']).astype(object)),
'test':(['poly_idx','run'],np.array([[1.0,2.0]]))},
'test_weighted_mean':(['poly_idx','run'],np.array([[1.0,2.0]]))},
coords={'poly_idx':(['poly_idx'],np.array([0])),
'run':(['run'],np.array([0,1]))})

Expand All @@ -52,7 +52,7 @@ def test_to_dataframe(agg=agg):
df_out = agg.to_dataframe()

# Build reference output dataframe
df_ref = pd.DataFrame({'poly_idx':[0,0],'run':[0,1],'name':['test','test'],'test':[0.9999,1.9999]})
df_ref = pd.DataFrame({'poly_idx':[0,0],'run':[0,1],'name':['test','test'],'test_weighted_mean':[0.9999,1.9999]})
df_ref = df_ref.set_index(['poly_idx','run'])

# Assert equal within tolerance, again likely due to very slight
Expand Down
1 change: 1 addition & 0 deletions xagg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# everything else happening behind the scenes (and the exporting
# happening as methods to the classes that are exported from those
# two functions)
import xagg.esmf_setup
from .wrappers import pixel_overlaps
from .aux import (normalize,fix_ds,get_bnds,subset_find)
from .core import (aggregate,read_wm)
3 changes: 2 additions & 1 deletion xagg/aux.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,8 @@ def subset_find(ds0,ds1):
latlons0 = list(zip(ds1['lat'].values,ds1['lon'].values))

# Find indices of the used grid for aggregation in the input grid
loc_idxs = [latlons.index(i) for i in latlons0]
latlons_dict = {latlon: index for index, latlon in enumerate(latlons)}
loc_idxs = [latlons_dict[i] for i in latlons0]

if np.allclose(len(loc_idxs),len(latlons0)):
print('grid adjustment successful')
Expand Down
169 changes: 148 additions & 21 deletions xagg/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from . aux import (find_rel_area,normalize,fix_ds,get_bnds,subset_find,list_or_first)
from . classes import (weightmap,aggregated)


SUPPORTED_STATISTICS_METHODS = ["weighted_mean", "weighted_sum","weighted_median","sum", "mean", "max", "min", "median", "count", "std"]

def read_wm(path):
""" Load temporary weightmap files from wm.to_file()

Expand Down Expand Up @@ -423,9 +426,10 @@ def get_pixel_overlaps(gdf_in,pix_agg,impl='for_loop'):
# corresponding to those areas, and the lat/lon coordinates of
# those pixels
overlap_info = ov_groups.agg(rel_area=pd.NamedAgg(column='geometry',aggfunc=lambda ds: [normalize(ds.area)]),
pix_idxs=pd.NamedAgg(column='pix_idx',aggfunc=lambda ds: [idx for idx in ds]),
lat=pd.NamedAgg(column='lat',aggfunc=lambda ds: [x for x in ds]),
lon=pd.NamedAgg(column='lon',aggfunc=lambda ds: [x for x in ds]))
poly_area=pd.NamedAgg(column='geometry',aggfunc=lambda ds: [ds.area]),
pix_idxs=pd.NamedAgg(column='pix_idx',aggfunc=lambda ds: [idx for idx in ds]),
lat=pd.NamedAgg(column='lat',aggfunc=lambda ds: [x for x in ds]),
lon=pd.NamedAgg(column='lon',aggfunc=lambda ds: [x for x in ds]))

# Zip lat, lon columns into a list of (lat,lon) coordinates
# (separate from above because as of 12/20, named aggs with
Expand Down Expand Up @@ -464,8 +468,34 @@ def get_pixel_overlaps(gdf_in,pix_agg,impl='for_loop'):

return wm_out

def check_supported_statistic(stats_list):
for s in stats_list:
assert s in SUPPORTED_STATISTICS_METHODS, f"You selected {s}. This statistic is not available. Please choose one of the following: {SUPPORTED_STATISTICS_METHODS}."

def aggregate(ds,wm,impl='for_loop',silent=False):

def comp_weighted_medians(pix_in_poly, weights):
weighted_medians = xr.DataArray(np.zeros(pix_in_poly.shape[0]), dims='t')

# for single timestep
if pix_in_poly.ndim == 1:
df = pd.DataFrame({'pix_values': pix_in_poly, 'weights': weights})
df.sort_values(by='pix_values',inplace=True)
cumsum = df.weights.cumsum()
cutoff = df.weights.sum() / 2.0
weighted_medians = np.interp(cutoff, cumsum, df.pix_values.values)
return np.array(weighted_medians)
# for multiple timesteps
else:
for step in range(pix_in_poly.shape[0]):
df = pd.DataFrame({'pix_values': pix_in_poly[step], 'weights': weights})
df.sort_values(by='pix_values',inplace=True)
cumsum = df.weights.cumsum()
cutoff = df.weights.sum() / 2.0
weighted_medians[step] = np.interp(cutoff, cumsum, df.pix_values.values)
return weighted_medians.values


def aggregate(ds,wm,impl='for_loop',stat=['weighted_mean'],skipna=True,interpolate_NaN=False,silent=False):
""" Aggregate raster variable(s) to polygon(s)

Aggregates (N-D) raster variables in `ds` to the polygons
Expand Down Expand Up @@ -517,6 +547,24 @@ def aggregate(ds,wm,impl='for_loop',silent=False):
aggregation is calculated using a dot product,
requires much more memory (due to broadcasting of
variables) but may be faster in certain circumstances

stat : :class:list (def: ``['mean']``)
only if impl = for loop
which aggregation statistic method to use, either of:
- ``'mean'``
- ``'max'``
- ``'min'``
- ``'sum'``
- ``'median'``
- ``'count'``
- ``'std'``

skipna : bool, default = `True`
if True, skip missing values (as marked by NaN).Only
skips missing values for float dtypes.

interpolate_NaN : bool, default = `False`
if True, NaNs are interpolated

silent : bool, default = `False`
if True, then no status updates are printed to std out
Expand All @@ -527,23 +575,32 @@ def aggregate(ds,wm,impl='for_loop',silent=False):
an :class:`xagg.classes.aggregated` object with the aggregated variables

"""

# Check the supported stats
check_supported_statistic(stat)


# Make sure pixel_overlaps was correctly run if using dot product
if (impl=='dot_product') and (wm.overlap_da is None):
raise ValueError("no 'overlap_da' was found in the `wm` input - since you're using the dot product implementation, "+
"make sure to run `pixel_overlaps()` with `impl='dot_product'` to avoid this error.")

# Turn into dataset if dataarray
if type(ds)==xr.core.dataarray.DataArray:
if ds.name is None:
warnings.warn('An unnamed xr.DataArray was inputted instead of a xr.Dataset; the output variable will be "var"')
ds = ds.to_dataset(name='var')
else:
ds = ds.to_dataset()

if ds.name is None:
warnings.warn('An unnamed xr.DataArray was inputted instead of a xr.Dataset; the output variable will be "var"')
ds = ds.to_dataset(name='var')
else:
ds = ds.to_dataset()


# Run ds through fix_ds (to fix lat/lon names, lon coords)
ds = fix_ds(ds)

# interpolate NaNs
if interpolate_NaN == True:
ds = ds.interpolate_na()

# Stack
ds = ds.stack(loc=('lat','lon'))

Expand Down Expand Up @@ -621,10 +678,13 @@ def aggregate(ds,wm,impl='for_loop',silent=False):
# bound variable
if ('bnds' not in ds[var].dims) & ('loc' in ds[var].dims):
if not silent:
print('aggregating '+var+'...')
# Create the column for the relevant variable
wm.agg[var] = None

all_stat = ', '.join(stat)
print('aggregating '+var+' by '+all_stat+ '...')

for i in stat:
var_stat = var+'_'+ i
wm.agg[var_stat] = None

# Get weighted average of variable based on pixel overlap + other weights
for poly_idx in wm.agg.poly_idx:
# Get average value of variable over the polygon; weighted by
Expand Down Expand Up @@ -659,17 +719,85 @@ def aggregate(ds,wm,impl='for_loop',silent=False):
# Calculate the normalized area+weight of each pixel (taking into account
# nans)
normed_areaweights = normalize(tmp_areas*weights[wm.agg.iloc[poly_idx,:].pix_idxs],drop_na=True)

# Take the weighted average of all the pixel values to calculate the
# aggregated value for the shapefile
wm.agg.loc[poly_idx,var] = [[((ds[var].isel(loc=wm.agg.iloc[poly_idx,:].pix_idxs)*
normed_areaweights).
sum('loc')).values]]
# weights for Sum
areaweights = tmp_areas*weights[wm.agg.iloc[poly_idx,:].pix_idxs]


# Take the values of all pixel values in the current polygon
pix_in_poly = ds[var].isel(loc=wm.agg.iloc[poly_idx,:].pix_idxs)
pixel_area = np.atleast_1d(np.squeeze(wm.agg.iloc[poly_idx,:].poly_area))

# Apply aggregation statistic method to the weighted pixel values

for i in stat:
if i == "weighted_mean": # weighted average of all the pixel values
stat_i = "weighted_mean"
var_stat = var+'_'+stat_i
wm.agg.loc[poly_idx,var_stat] = [[(pix_in_poly*normed_areaweights).sum('loc', skipna=skipna).values]]
continue
elif i == "mean":
stat_i = "mean"
var_stat = var+'_'+stat_i
wm.agg.loc[poly_idx,var_stat] = [[pix_in_poly.mean('loc', skipna=skipna).values]]
continue
elif i == "max":
stat_i = "max"
var_stat = var+'_'+stat_i
wm.agg.loc[poly_idx,var_stat] = [[pix_in_poly.max('loc', skipna=skipna).values]]
continue
elif i == "min":
stat_i = "min"
var_stat = var+'_'+stat_i
wm.agg.loc[poly_idx,var_stat] = [[pix_in_poly.min('loc', skipna=skipna).values]]
continue
elif i == "weighted_sum":
stat_i = "weighted_sum"
var_stat = var+'_'+stat_i
wm.agg.loc[poly_idx,var_stat] = [[(pix_in_poly*weights[wm.agg.iloc[poly_idx,:].pix_idxs]*(pixel_area/pixel_area.max())).sum('loc', skipna=skipna).values]]

continue

elif i == "sum":
stat_i = "sum"
var_stat = var+'_'+stat_i
wm.agg.loc[poly_idx,var_stat] = [[pix_in_poly.sum('loc', skipna=skipna).values]]
continue


elif i == "median":
stat_i = "median"
var_stat = var+'_'+stat_i
df = pd.DataFrame({'pix_values': pix_in_poly, 'weights': areaweights})
df.sort_values(by='pix_values',inplace=True)
cumsum = df.weights.cumsum()
cutoff = df.weights.sum() / 2.0
wm.agg.loc[poly_idx,var_stat] = np.interp(cutoff, cumsum, df.pix_values.values)
continue


elif i == "weighted_median":
stat_i = "weighted_median"
var_stat = var+'_'+stat_i
wm.agg.loc[poly_idx,var_stat] = [[comp_weighted_medians(pix_in_poly, normed_areaweights)]]
continue
elif i == "count": # polygon area divided by pixel size gives exact count
stat_i = "count"
var_stat = var+'_'+stat_i

wm.agg.loc[poly_idx,var_stat] = [[pixel_area.sum()/pixel_area.max()]]
continue
elif "std" in stat:
stat_i = "std"
var_stat = var+'_'+stat_i
wm.agg[var_stat] = None
wm.agg.loc[poly_idx,var_stat] = [[(pix_in_poly*weights[wm.agg.iloc[poly_idx,:].pix_idxs]).std('loc', skipna=skipna)]]

else:
wm.agg[var] = None
wm.agg.loc[poly_idx,var] = [[(ds[var].isel(loc=0)*np.nan).values]]

else:
wm.agg[var] = None
wm.agg.loc[poly_idx,var] = [[(ds[var].isel(loc=0)*np.nan).values]]

# Put in class format
Expand All @@ -680,4 +808,3 @@ def aggregate(ds,wm,impl='for_loop',silent=False):
if not silent:
print('all variables aggregated to polygons!')
return agg_out

28 changes: 28 additions & 0 deletions xagg/esmf_setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
import sys

def get_env_info():
env_path = os.path.dirname(sys.executable)
env_name = os.path.basename(env_path)
return env_name, env_path



def set_esmf_mk_path():
env_name, env_path = get_env_info()
if env_path:
if env_path.endswith('/bin'): env_path= env_path[:-4]
esmf_mk_path = os.path.join(env_path,'lib', 'esmf.mk')

if os.path.isfile(esmf_mk_path):
os.environ['ESMFMKFILE'] = esmf_mk_path
print(f"Found esmf.mk at: {esmf_mk_path}")
else:
print('esmf.mk not found in the specified conda environment.')
else:
print('Unable to determine the Anaconda environment.')

set_esmf_mk_path()



Loading