Skip to content

Commit 858d4a1

Browse files
Merge pull request #217 from HARPgroup/develop-pandas3-prep
Develop pandas3 prep
2 parents 2352c50 + 2493af2 commit 858d4a1

7 files changed

Lines changed: 149 additions & 52 deletions

File tree

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ dependencies:
1010
# Running HSP2
1111
- scipy # Scipy also installs numpy
1212
# Pandas installs most scientific Python modules, such as Numpy, etc.
13-
- pandas
13+
- pandas <3.0.0
1414
- numba
1515
- numpy
1616
- hdf5

examples/pretest/cmd_regression.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Note: This code must be run from the tests dir due to importing
2+
# the `convert` directory where the regression_base file lives
3+
# todo: make this convert path passable by argument
4+
import numpy as np
5+
from convert.regression_base import RegressTest
6+
7+
case = "test10specl"
8+
base_case = "test10"
9+
#case = "testcbp"
10+
tdir = "/opt/model/HSPsquared/tests"
11+
#import_uci(str(hsp2_specl_uci), str(temp_specl_h5file))
12+
#run(temp_specl_h5file, saveall=True, compress=False)
13+
14+
############# RegressTest data loader
15+
## TEST CASE
16+
test = RegressTest(case, threads=1)
17+
# test object hydr
18+
rchres_hydr_hsp2_test_table = test.hsp2_data._read_table('RCHRES', '001', 'HYDR')
19+
perlnd_pwat_hsp2_test_table = test.hsp2_data._read_table('PERLND', '001', 'PWATER')
20+
rchres_hydr_hsp2_test = test.hsp2_data.get_time_series('RCHRES', '001', 'RO', 'HYDR')
21+
rchres_hydr_hspf_test = test.get_hspf_time_series( ('RCHRES', 'HYDR', '001', 'RO', 2)) #Note: the order of arguments is wonky in Regressbase
22+
rchres_hydr_hsp2_test_mo = rchres_hydr_hsp2_test.resample('MS').mean()
23+
rchres_hydr_hspf_test_mo = rchres_hydr_hspf_test.resample('MS').mean()
24+
## BASE CASE
25+
base = RegressTest(base_case, threads=1)
26+
# base object hydr
27+
rchres_hydr_hsp2_base_table = base.hsp2_data._read_table('RCHRES', '001', 'HYDR')
28+
perlnd_pwat_hsp2_base_table = base.hsp2_data._read_table('PERLND', '001', 'PWATER')
29+
rchres_hydr_hsp2_base = base.hsp2_data.get_time_series('RCHRES', '001', 'RO', 'HYDR')
30+
rchres_hydr_hspf_base = base.get_hspf_time_series( ('RCHRES', 'HYDR', '001', 'RO', 2)) #Note: the order of arguments is wonky in Regressbase
31+
rchres_hydr_hsp2_base_mo = rchres_hydr_hsp2_base.resample('MS').mean()
32+
rchres_hydr_hspf_base_mo = rchres_hydr_hspf_base.resample('MS').mean()
33+
34+
# Show quantiles
35+
print("hsp2", case, np.quantile(rchres_hydr_hsp2_test, [0,0.25,0.5,0.75,1.0]))
36+
print("hspf", case, np.quantile(rchres_hydr_hspf_test, [0,0.25,0.5,0.75,1.0]))
37+
print("hsp2", base_case, np.quantile(rchres_hydr_hsp2_base, [0,0.25,0.5,0.75,1.0]))
38+
print("hspf", base_case, np.quantile(rchres_hydr_hspf_base, [0,0.25,0.5,0.75,1.0]))
39+
# Monthly mean value comparisons
40+
rchres_hydr_hsp2_test_mo
41+
rchres_hydr_hspf_test_mo
42+
rchres_hydr_hsp2_base_mo
43+
rchres_hydr_hspf_base_mo
44+
45+
# Compare ANY arbitrary timeseries, not just the ones coded into the RegressTest object
46+
# 3rd argument is tolerance to use
47+
tol = 10.0
48+
test.compare_time_series(rchres_hydr_hsp2_base_table['RO'], rchres_hydr_hsp2_test_table['RO'], tol)
49+
# Example: (True, 8.7855425)
50+
51+
# Compare inflows and outflows
52+
np.mean(perlnd_pwat_hsp2_test_table['PERO']) * 6000 * 0.0833
53+
np.mean(rchres_hydr_hsp2_test_table['IVOL'])
54+
np.mean(rchres_hydr_hsp2_test_table['ROVOL'])
55+
np.mean(perlnd_pwat_hsp2_base_table['PERO']) * 6000 * 0.0833
56+
np.mean(rchres_hydr_hsp2_base_table['IVOL'])
57+
np.mean(rchres_hydr_hsp2_base_table['ROVOL'])
58+
59+
# now do a comparison
60+
# HYDR diff should be almost nonexistent
61+
test.check_con(params = ('RCHRES', 'HYDR', '001', 'ROVOL', 2))
62+
# this is very large for PWTGAS
63+
# git: test10specl ('PERLND', 'PWTGAS', '001', 'POHT', '2') 1163640%
64+
test.check_con(params = ('PERLND', 'PWTGAS', '001', 'POHT', '2'))
65+
# Other mismatches in PERLND
66+
test.check_con(params = ('PERLND', 'PWATER', '001', 'AGWS', '2'))
67+
test.check_con(params = ('PERLND', 'PWATER', '001', 'PERO', '2'))
68+
# Now run the full test
69+
test.quiet = True # this lets us test without overwhelming the console
70+
results = test.run_test()
71+
found = False
72+
mismatches = []
73+
for key, results in results.items():
74+
no_data_hsp2, no_data_hspf, match, diff = results
75+
if any([no_data_hsp2, no_data_hspf]):
76+
continue
77+
if not match:
78+
mismatches.append((case, key, results))
79+
found = True
80+
81+
print(mismatches)
82+
83+
if mismatches:
84+
for case, key, results in mismatches:
85+
diff = results
86+
print(case, key, f"{diff:0.00%}")
87+
else:
88+
print("No mismatches found. Success!")

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ dependencies = [
1414
"cltoolbox",
1515
"numba",
1616
"numpy<2.0",
17-
"pandas",
17+
"pandas<3.0.0",
1818
"tables",
1919
"pyparsing"
2020
]

src/hsp2/hsp2/main.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -704,23 +704,23 @@ def get_flows(
704704
t = data_frame[smemn].astype(float64).to_numpy()[0:steps]
705705

706706
if MFname in ts and AFname in ts:
707-
t *= ts[MFname][:steps] * ts[AFname][0:steps]
707+
t = t * ts[MFname][:steps] * ts[AFname][0:steps]
708708
msg(4, f"MFACTOR modified by timeseries {MFname}")
709709
msg(4, f"AFACTR modified by timeseries {AFname}")
710710
elif MFname in ts:
711-
t *= afactr * ts[MFname][0:steps]
711+
t = t * afactr * ts[MFname][0:steps]
712712
msg(4, f"MFACTOR modified by timeseries {MFname}")
713713
elif AFname in ts:
714-
t *= mfactor * ts[AFname][0:steps]
714+
t = t * mfactor * ts[AFname][0:steps]
715715
msg(4, f"AFACTR modified by timeseries {AFname}")
716716
else:
717-
t *= factor
717+
t = t * factor
718718

719719
# if poht to iheat, imprecision in hspf conversion factor requires a slight adjustment
720720
if (smemn == "POHT" or smemn == "SOHT") and tmemn == "IHEAT":
721-
t *= 0.998553
721+
t = t * 0.998553
722722
if (smemn == "PODOXM" or smemn == "SODOXM") and tmemn == "OXIF1":
723-
t *= 1.000565
723+
t = t * 1.000565
724724

725725
# ??? ISSUE: can fetched data be at different frequency - don't know how to transform.
726726
if tmemn in ts:

src/hsp2/hsp2/utilities.py

Lines changed: 47 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -214,46 +214,47 @@ def transform(ts, name, how, siminfo):
214214
"""
215215

216216
tsfreq = ts.index.freq
217-
freq = Minute(siminfo["delt"])
217+
fmins = Minute(siminfo["delt"])
218+
freq = fmins.nanos
218219
stop = siminfo["stop"]
219220

220221
# append duplicate of last point to force processing last full interval
221222
if ts.index[-1] < stop:
222223
ts[stop] = ts.iloc[-1]
223224

224-
if freq == tsfreq:
225+
if freq == tsfreq.nanos:
225226
pass
226227
elif tsfreq is None: # Sparse time base, frequency not defined
227228
ts = ts.reindex(siminfo["tbase"]).ffill().bfill()
228229
elif how == "SAME":
229-
ts = ts.resample(freq).ffill() # tsfreq >= freq assumed, or bad user choice
230+
ts = ts.resample(fmins).ffill() # tsfreq.nanos >= freq assumed, or bad user choice
230231
elif not how:
231232
if name in flowtype:
232-
if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq > freq:
233+
if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq.nanos > freq:
233234
if "M" in str(tsfreq):
234235
ratio = 1.0 / 730.5
235236
elif "Y" in str(tsfreq):
236237
ratio = 1.0 / 8766.0
237238
else:
238-
ratio = freq / tsfreq
239-
ts = (ratio * ts).resample(freq).ffill() # HSP2 how = div
239+
ratio = freq / tsfreq.nanos
240+
ts = (ratio * ts).resample(fmins).ffill() # HSP2 how = div
240241
else:
241-
ts = ts.resample(freq).sum()
242+
ts = ts.resample(fmins).sum()
242243
else:
243-
if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq > freq:
244-
ts = ts.resample(freq).ffill()
244+
if "Y" in str(tsfreq) or "M" in str(tsfreq) or tsfreq.nanos > freq:
245+
ts = ts.resample(fmins).ffill()
245246
else:
246-
ts = ts.resample(freq).mean()
247+
ts = ts.resample(fmins).mean()
247248
elif how == "MEAN":
248-
ts = ts.resample(freq).mean()
249+
ts = ts.resample(fmins).mean()
249250
elif how == "SUM":
250-
ts = ts.resample(freq).sum()
251+
ts = ts.resample(fmins).sum()
251252
elif how == "MAX":
252-
ts = ts.resample(freq).max()
253+
ts = ts.resample(fmins).max()
253254
elif how == "MIN":
254-
ts = ts.resample(freq).min()
255+
ts = ts.resample(fmins).min()
255256
elif how == "LAST":
256-
ts = ts.resample(freq).ffill()
257+
ts = ts.resample(fmins).ffill()
257258
elif how == "DIV":
258259
if "Y" in str(tsfreq) or "M" in str(tsfreq):
259260
mult = 1
@@ -267,14 +268,14 @@ def transform(ts, name, how, siminfo):
267268
elif "Y" in str(tsfreq):
268269
ratio = 1.0 / (8766.0 * mult)
269270
else:
270-
ratio = freq / tsfreq
271-
ts = (ratio * ts).resample(freq).ffill() # HSP2 how = div
271+
ratio = freq / tsfreq.nanos
272+
ts = (ratio * ts).resample(fmins).ffill() # HSP2 how = div
272273
else:
273-
ts = (ts * (freq / ts.index.freq)).resample(freq).ffill()
274+
ts = (ts * (freq / tsfreq.nanos)).resample(fmins).ffill()
274275
elif how == "ZEROFILL":
275-
ts = ts.resample(freq).fillna(0.0)
276+
ts = ts.resample(fmins).fillna(0.0)
276277
elif how == "INTERPOLATE":
277-
ts = ts.resample(freq).interpolate()
278+
ts = ts.resample(fmins).interpolate()
278279
else:
279280
print(f"UNKNOWN method in TRANS, {how}")
280281
return zeros(1)
@@ -287,7 +288,8 @@ def hoursval(siminfo, hours24, dofirst=False, lapselike=False):
287288
"""create hours flags, flag on the hour or lapse table over full simulation"""
288289
start = siminfo["start"]
289290
stop = siminfo["stop"]
290-
freq = Minute(siminfo["delt"])
291+
fmins = Minute(siminfo["delt"])
292+
freq = fmins.nanos
291293

292294
dr = date_range(
293295
start=f"{start.year}-01-01", end=f"{stop.year}-12-31", freq=Minute(60)
@@ -297,16 +299,17 @@ def hoursval(siminfo, hours24, dofirst=False, lapselike=False):
297299
hours[0] = 1
298300

299301
ts = Series(hours[0 : len(dr)], dr)
302+
tsfreq = ts.index.freq
300303
if lapselike:
301-
if ts.index.freq > freq: # upsample
302-
ts = ts.resample(freq).asfreq().ffill()
303-
elif ts.index.freq < freq: # downsample
304-
ts = ts.resample(freq).mean()
304+
if tsfreq.nanos > freq: # upsample
305+
ts = ts.resample(fmins).asfreq().ffill()
306+
elif tsfreq.nanos < freq: # downsample
307+
ts = ts.resample(fmins).mean()
305308
else:
306-
if ts.index.freq > freq: # upsample
307-
ts = ts.resample(freq).asfreq().fillna(0.0)
308-
elif ts.index.freq < freq: # downsample
309-
ts = ts.resample(freq).max()
309+
if tsfreq.nanos > freq: # upsample
310+
ts = ts.resample(fmins).asfreq().fillna(0.0)
311+
elif tsfreq.nanos < freq: # downsample
312+
ts = ts.resample(fmins).max()
310313
return ts.truncate(start, stop).to_numpy()
311314

312315

@@ -321,16 +324,18 @@ def monthval(siminfo, monthly):
321324
"""returns value at start of month for all times within the month"""
322325
start = siminfo["start"]
323326
stop = siminfo["stop"]
324-
freq = Minute(siminfo["delt"])
327+
fmins = Minute(siminfo["delt"])
328+
freq = fmins.nanos
325329

326330
months = tile(monthly, stop.year - start.year + 1).astype(float)
327331
dr = date_range(start=f"{start.year}-01-01", end=f"{stop.year}-12-31", freq="MS")
328332
ts = Series(months, index=dr).resample("D").ffill()
333+
tsfreq = ts.index.freq
329334

330-
if ts.index.freq > freq: # upsample
331-
ts = ts.resample(freq).asfreq().ffill()
332-
elif ts.index.freq < freq: # downsample
333-
ts = ts.resample(freq).mean()
335+
if tsfreq.nanos > freq: # upsample
336+
ts = ts.resample(fmins).asfreq().ffill()
337+
elif tsfreq.nanos < freq: # downsample
338+
ts = ts.resample(fmins).mean()
334339
return ts.truncate(start, stop).to_numpy()
335340

336341

@@ -339,16 +344,19 @@ def dayval(siminfo, monthly):
339344
interpolation to day, but constant within day"""
340345
start = siminfo["start"]
341346
stop = siminfo["stop"]
342-
freq = Minute(siminfo["delt"])
347+
fmins = Minute(siminfo["delt"])
348+
freq = fmins.nanos
343349

344350
months = tile(monthly, stop.year - start.year + 1).astype(float)
345351
dr = date_range(start=f"{start.year}-01-01", end=f"{stop.year}-12-31", freq="MS")
346352
ts = Series(months, index=dr).resample("D").interpolate("time")
353+
tsfreq = ts.index.freq
354+
347355

348-
if ts.index.freq > freq: # upsample
349-
ts = ts.resample(freq).ffill()
350-
elif ts.index.freq < freq: # downsample
351-
ts = ts.resample(freq).mean()
356+
if tsfreq.nanos > freq: # upsample
357+
ts = ts.resample(fmins).ffill()
358+
elif tsfreq.nanos < freq: # downsample
359+
ts = ts.resample(fmins).mean()
352360
return ts.truncate(start, stop).to_numpy()
353361

354362

src/hsp2/hsp2tools/commands.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ def run(h5file, saveall=True, compress=True):
2121
[optional] Default is True.
2222
use compression on the save h5 file.
2323
"""
24-
hdf5_instance = HDF5(h5file)
25-
io_manager = IOManager(hdf5_instance)
26-
main(io_manager, saveall=saveall, jupyterlab=compress)
24+
with HDF5(h5file) as hdf5_instance:
25+
io_manager = IOManager(hdf5_instance)
26+
main(io_manager, saveall=saveall, jupyterlab=compress)
2727

2828

2929
def import_uci(ucifile, h5file):

tests/convert/regression_base.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def __init__(
3030
self.tcodes = tcodes
3131
self.ids = ids
3232
self.threads = threads
33-
33+
self.quiet = False # allows users to set this later
3434
self._init_files()
3535

3636
def _init_files(self):
@@ -185,7 +185,8 @@ def run_test(self) -> Dict[OperationsTuple, ResultsTuple]:
185185
def check_con(self, params: OperationsTuple) -> ResultsTuple:
186186
"""Performs comparision of single constituent"""
187187
operation, activity, id, constituent, tcode = params
188-
print(f" {operation}_{id} {activity} {constituent}\n")
188+
if not self.quiet:
189+
print(f" {operation}_{id} {activity} {constituent}\n")
189190

190191
ts_hsp2 = self.hsp2_data.get_time_series(operation, id, constituent, activity)
191192
ts_hspf = self.get_hspf_time_series(params)

0 commit comments

Comments
 (0)