1818
1919import fsspec
2020import zarr
21+ import xarray
2122import numpy as np
2223
2324from kerchunk .utils import class_factory , _encode_for_JSON
@@ -376,6 +377,7 @@ def grib_tree(
376377 Grib message variable names that decode as "unknown" are dropped
377378 Grib typeOfLevel attributes that decode as unknown are treated as a single group
378379 Grib steps that are missing due to WrongStepUnitError are patched with NaT
380+ The input message_groups should not be modified by this method
379381
380382 :param message_groups: a collection of zarr store like dictionaries as produced by scan_grib
381383 :param remote_options: remote options to pass to ZarrToMultiZarr
@@ -418,10 +420,10 @@ def grib_tree(
418420 # To resolve unknown variables add custom grib tables.
419421 # https://confluence.ecmwf.int/display/UDOC/Creating+your+own+local+definitions+-+ecCodes+GRIB+FAQ
420422 # If you process the groups from a single file in order, you can use the msg# to compare with the
421- # IDX file.
423+ # IDX file. The idx files message index is 1 based where the grib_tree message count is zero based
422424 logger .warning (
423- "Dropping unknown variable in msg# %d. Compare with the grib idx file to identify and build "
424- " a ecCodes local grib definitions to fix it." ,
425+ "Dropping unknown variable in msg# %d. Compare with the grib idx file to help identify it "
426+ " and build an ecCodes local grib definitions file to fix it." ,
425427 msg_ind ,
426428 )
427429 unknown_counter += 1
@@ -496,21 +498,13 @@ def grib_tree(
496498 catdims ,
497499 )
498500
499- fix_group_step = add_missing_step_var (aggregations [path ], path )
500501 mzz = MultiZarrToZarr (
501- fix_group_step ,
502+ aggregations [ path ] ,
502503 remote_options = remote_options ,
503504 concat_dims = catdims ,
504505 identical_dims = idims ,
505506 )
506- try :
507- group = mzz .translate ()
508- except Exception :
509- import pprint
510-
511- gstr = pprint .pformat (fix_group_step )
512- logger .exception (f"Failed to multizarr { path } \n { gstr } " )
513- raise
507+ group = mzz .translate ()
514508
515509 for key , value in group ["refs" ].items ():
516510 if key not in [".zattrs" , ".zgroup" ]:
@@ -519,53 +513,43 @@ def grib_tree(
519513 return result
520514
521515
522- def add_missing_step_var ( groups : List [ dict ], path : str ) -> List [ dict ] :
516+ def correct_hrrr_subhf_group_step ( group : dict ) -> dict :
523517 """
524- Attempt to fill in missing step var. Should this be done where the step unit error is handled
525- in scan grib?
526- :param groups:
527- :param path:
528- :return:
518+ Overrides the definition of the step variable. Sets the value equal to the `valid_time - time`
519+ in hours as a floating point value. This fixes issues with the HRRR SubHF grib2 step as read by
520+ cfgrib.
521+ The result is a deep copy, the original data is unmodified.
522+ :param groups: the list of groups to fix
523+ :param path: the path of the group
524+ :return: a new deep copy of the corrected listed
529525 """
530- result = []
531- for group in groups :
532- group = copy .deepcopy (group )
533- if "step/.zarray" not in group ["refs" ]:
534- logger .warning ("Adding missing step variable to group path %s" , path )
535- group ["refs" ]["step/.zarray" ] = (
536- '{"chunks":[],"compressor":null,"dtype":"<f8","fill_value":"NaN","filters":null,"order":"C",'
537- '"shape":[],"zarr_format":2}'
538- )
539- group ["refs" ]["step/.zattrs" ] = (
540- '{"_ARRAY_DIMENSIONS":[],"long_name":"time since forecast_reference_time",'
541- '"standard_name":"forecast_period","units":"hours"}'
542- )
526+ group = copy .deepcopy (group )
527+ group ["refs" ]["step/.zarray" ] = (
528+ '{"chunks":[],"compressor":null,"dtype":"<f8","fill_value":"NaN","filters":null,"order":"C",'
529+ '"shape":[],"zarr_format":2}'
530+ )
531+ group ["refs" ]["step/.zattrs" ] = (
532+ '{"_ARRAY_DIMENSIONS":[],"long_name":"time since forecast_reference_time",'
533+ '"standard_name":"forecast_period","units":"hours"}'
534+ )
543535
544- # Try to set the value - this doesn't work
545- import xarray
536+ fo = fsspec . filesystem ( "reference" , fo = group , mode = "r" )
537+ xd = xarray . open_dataset ( fo . get_mapper (), engine = "zarr" , consolidated = False )
546538
547- gcopy = copy .deepcopy (group )
548- fo = fsspec .filesystem ("reference" , fo = gcopy , mode = "r" )
549- xd = xarray .open_dataset (fo .get_mapper (), engine = "zarr" , consolidated = False )
539+ correct_step = xd .valid_time .values - xd .time .values
550540
551- logger .info ("%s has step val %s" , path , xd .step .values )
552- correct_step = xd .valid_time .values - xd .time .values
553- assert correct_step .shape == ()
554- step_float = correct_step .astype ("timedelta64[s]" ).astype ("float" ) / 3600.0
555- step_bytes = step_float .tobytes ()
556- try :
557- # easiest way to test if data is ascii
558- bytes = step_bytes .decode ("ascii" )
559- except UnicodeDecodeError :
560- bytes = (b"base64:" + base64 .b64encode (step_bytes )).decode ("ascii" )
541+ assert correct_step .shape == ()
542+ step_float = correct_step .astype ("timedelta64[s]" ).astype ("float" ) / 3600.0
543+ logger .info (
544+ "Orig val %s, new val %s" , xd .step .values .astype ("timedelta64[s]" ), step_float
545+ )
561546
562- group ["refs" ]["step/0" ] = bytes
563- logger .info (
564- "Computed step float: %s, current step/0 %s, orig step %s" ,
565- step_float ,
566- group ["refs" ].get ("step/0" ),
567- gcopy ["refs" ].get ("step/0" ),
568- )
569- result .append (group )
547+ step_bytes = step_float .tobytes ()
548+ try :
549+ enocded_val = step_bytes .decode ("ascii" )
550+ except UnicodeDecodeError :
551+ enocded_val = (b"base64:" + base64 .b64encode (step_bytes )).decode ("ascii" )
570552
571- return result
553+ group ["refs" ]["step/0" ] = enocded_val
554+
555+ return group
0 commit comments