Skip to content

Commit 9f6e78e

Browse files
pp-moznicholls
authored andcommitted
Provide info on pp-field indices in the file for structured um loads. (SciTools#2977)
* Field locations for structured um loads. * Remove unused import. * Fix test for new implementation. * Added whatsnew. * Review changes.
1 parent 832baeb commit 9f6e78e

8 files changed

Lines changed: 204 additions & 46 deletions

File tree

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
* The :class:`iris.fileformats.um.FieldCollation` objects, which are passed
2+
into load callbacks when using
3+
:func:`iris.fileformats.um.structured_um_loading`, now
4+
have the additional properties :
5+
:data:`iris.fileformats.um.FieldCollation.data_filepath` and
6+
:data:`iris.fileformats.um.FieldCollation.data_field_indices`.
7+
These provide the file locations of the original data fields, which are
8+
otherwise lost in the structured loading process.

lib/iris/fileformats/pp.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ def _pp_attribute_names(header_defn):
831831
special_headers = list('_' + name for name in _SPECIAL_HEADERS)
832832
extra_data = list(EXTRA_DATA.values())
833833
special_attributes = ['_raw_header', 'raw_lbtim', 'raw_lbpack',
834-
'boundary_packing']
834+
'boundary_packing', '_index_in_structured_load_file']
835835
return normal_headers + special_headers + extra_data + special_attributes
836836

837837

@@ -864,6 +864,7 @@ def __init__(self, header=None):
864864
self.raw_lbtim = None
865865
self.raw_lbpack = None
866866
self.boundary_packing = None
867+
self._index_in_structured_load_file = None
867868
if header is not None:
868869
self.raw_lbtim = header[self.HEADER_DICT['lbtim'][0]]
869870
self.raw_lbpack = header[self.HEADER_DICT['lbpack'][0]]

lib/iris/fileformats/um/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# (C) British Crown Copyright 2014 - 2017, Met Office
1+
# (C) British Crown Copyright 2014 - 2018, Met Office
22
#
33
# This file is part of Iris.
44
#
@@ -27,7 +27,6 @@
2727

2828
# Publish the FF-replacement features here, and include documentation.
2929
from ._ff_replacement import um_to_pp, load_cubes, load_cubes_32bit_ieee
30-
from ._fast_load import structured_um_loading
31-
from ._fast_load_structured_fields import FieldCollation
30+
from ._fast_load import structured_um_loading, FieldCollation
3231
__all__ = ['um_to_pp', 'load_cubes', 'load_cubes_32bit_ieee',
3332
'structured_um_loading', 'FieldCollation']

lib/iris/fileformats/um/_fast_load.py

Lines changed: 67 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# (C) British Crown Copyright 2016 - 2017, Met Office
1+
# (C) British Crown Copyright 2016 - 2018, Met Office
22
#
33
# This file is part of Iris.
44
#
@@ -40,19 +40,67 @@
4040
import threading
4141
import os.path
4242

43+
import numpy as np
44+
4345
# Be minimal about what we import from iris, to avoid circular imports.
4446
# Below, other parts of iris.fileformats are accessed via deferred imports.
4547
import iris
4648
from iris.coords import DimCoord
4749
from iris.cube import CubeList
4850
from iris.exceptions import TranslationError
49-
51+
from iris.fileformats.um._fast_load_structured_fields import \
52+
BasicFieldCollation, group_structured_fields
5053

5154
# Strings to identify the PP and FF file format handler specs.
5255
_FF_SPEC_NAME = 'UM Fieldsfile'
5356
_PP_SPEC_NAME = 'UM Post Processing file'
5457

5558

59+
class FieldCollation(BasicFieldCollation):
60+
# This class specialises the BasicFieldCollation by adding the file-index
61+
# and file-path concepts.
62+
# This preserves the more abstract scope of the original 'FieldCollation'
63+
# class, now renamed 'BasicFieldCollation'.
64+
65+
def __init__(self, fields, filepath):
66+
"""
67+
Args:
68+
69+
* fields (iterable of :class:`iris.fileformats.pp.PPField`):
70+
The fields in the collation.
71+
72+
* filepath (string):
73+
The path of the file the collation is loaded from.
74+
75+
"""
76+
super(FieldCollation, self).__init__(fields)
77+
self._load_filepath = filepath
78+
79+
@property
80+
def data_filepath(self):
81+
return self._load_filepath
82+
83+
@property
84+
def data_field_indices(self):
85+
"""
86+
Field indices of the contained PPFields in the input file.
87+
88+
This records the original file location of the individual data fields
89+
contained, within the input datafile.
90+
91+
Returns:
92+
An integer array of shape `self.vector_dims_shape`.
93+
94+
"""
95+
# Get shape : N.B. this calculates (and caches) the structure.
96+
vector_dims_shape = self.vector_dims_shape
97+
# Get index-in-file of each contained field.
98+
indices = np.array([field._index_in_structured_load_file
99+
for field in self._fields],
100+
dtype=np.int64)
101+
return indices.reshape(vector_dims_shape)
102+
103+
56104
def _basic_load_function(filename, pp_filter=None, **kwargs):
57105
# The low-level 'fields from filename' loader.
58106
#
@@ -71,8 +119,6 @@ def _basic_load_function(filename, pp_filter=None, **kwargs):
71119
# Therefore, the actual loader will pass this as the 'pp_filter' keyword,
72120
# when it is present.
73121
# Additional load keywords are 'passed on' to the lower-level function.
74-
from iris.fileformats.um._fast_load_structured_fields import \
75-
group_structured_fields
76122

77123
# Helper function to select the correct fields loader call.
78124
def _select_raw_fields_loader(fname):
@@ -98,10 +144,20 @@ def _select_raw_fields_loader(fname):
98144
return loader
99145

100146
loader = _select_raw_fields_loader(filename)
101-
fields = iter(field
102-
for field in loader(filename, **kwargs)
103-
if pp_filter is None or pp_filter(field))
104-
return group_structured_fields(fields)
147+
148+
def iter_fields_decorated_with_load_indices(fields_iter):
149+
for i_field, field in enumerate(fields_iter):
150+
field._index_in_structured_load_file = i_field
151+
yield field
152+
153+
fields = iter_fields_decorated_with_load_indices(
154+
field
155+
for field in loader(filename, **kwargs)
156+
if pp_filter is None or pp_filter(field))
157+
158+
return group_structured_fields(fields,
159+
collation_class=FieldCollation,
160+
filepath=filename)
105161

106162

107163
# Define the preferred order of candidate dimension coordinates, as used by
@@ -342,7 +398,9 @@ def structured_um_loading():
342398
which is normally the whole of one phenomenon from a single input file.
343399
In particular, the callback's "field" argument is a
344400
:class:`~iris.fileformats.um.FieldCollation`, from which "field.fields"
345-
gives a *list* of PPFields from which that cube was built.
401+
gives a *list* of PPFields from which that cube was built, and the
402+
properties "field.load_filepath" and "field.load_file_indices"
403+
reference the original file locations of the cube data.
346404
The code required is therefore different from a 'normal' callback.
347405
For an example of this, see `this example in the Iris test code
348406
<https://github.com/SciTools/iris/

lib/iris/fileformats/um/_fast_load_structured_fields.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
Code for fast loading of structured UM data.
1919
2020
This module defines which pp-field elements take part in structured loading,
21-
and provides creation of :class:`FieldCollation` objects from lists of
21+
and provides creation of :class:`BasicFieldCollation` objects from lists of
2222
:class:`iris.fileformats.pp.PPField`.
2323
2424
"""
@@ -36,18 +36,18 @@
3636
optimal_array_structure
3737

3838

39-
class FieldCollation(object):
39+
class BasicFieldCollation(object):
4040
"""
4141
An object representing a group of UM fields with array structure that can
4242
be vectorized into a single cube.
4343
4444
For example:
4545
4646
Suppose we have a set of 28 fields repeating over 7 vertical levels for
47-
each of 4 different data times. If a FieldCollation is created to contain
48-
these, it can identify that this is a 4*7 regular array structure.
47+
each of 4 different data times. If a BasicFieldCollation is created to
48+
contain these, it can identify that this is a 4*7 regular array structure.
4949
50-
This FieldCollation will then have the following properties:
50+
This BasicFieldCollation will then have the following properties:
5151
5252
* within 'element_arrays_and_dims' :
5353
Element 'blev' have the array shape (7,) and dims of (1,).
@@ -259,7 +259,9 @@ def _um_collation_key_function(field):
259259
# vector pseudo-level coordinate directly in the structured load analysis.
260260

261261

262-
def group_structured_fields(field_iterator):
262+
def group_structured_fields(field_iterator,
263+
collation_class=BasicFieldCollation,
264+
**collation_kwargs):
263265
"""
264266
Collect structured fields into identified groups whose fields can be
265267
combined to form a single cube.
@@ -269,6 +271,13 @@ def group_structured_fields(field_iterator):
269271
* field_iterator (iterator of :class:`iris.fileformats.pp.PPField`):
270272
A source of PP or FF fields. N.B. order is significant.
271273
274+
Kwargs:
275+
276+
* collation_class (class):
277+
Type of collation wrapper to create from each group of fields.
278+
* collation_kwargs (dict):
279+
Additional constructor keywords for collation creation.
280+
272281
The function sorts and collates on phenomenon-relevant metadata only,
273282
defined as the field components: 'lbuser[3]' (stash), 'lbproc' (statistic),
274283
'lbuser[6]' (model).
@@ -285,8 +294,8 @@ def group_structured_fields(field_iterator):
285294
:func:`iris.fileformats.pp_load_rules._convert_time_coords`).
286295
287296
Returns:
288-
A generator of FieldCollation objects, each of which contains a single
289-
collated group from the input fields.
297+
A generator of 'collation_class' objects, each of which contains a
298+
single collated group from the input fields.
290299
291300
.. note::
292301
@@ -297,4 +306,4 @@ def group_structured_fields(field_iterator):
297306
"""
298307
_fields = sorted(field_iterator, key=_um_collation_key_function)
299308
for _, fields in itertools.groupby(_fields, _um_collation_key_function):
300-
yield FieldCollation(tuple(fields))
309+
yield collation_class(tuple(fields), **collation_kwargs)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# (C) British Crown Copyright 2018, Met Office
2+
#
3+
# This file is part of Iris.
4+
#
5+
# Iris is free software: you can redistribute it and/or modify it under
6+
# the terms of the GNU Lesser General Public License as published by the
7+
# Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# Iris is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU Lesser General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU Lesser General Public License
16+
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
17+
"""
18+
Unit tests for the class
19+
:class:`iris.fileformats.um._fast_load.FieldCollation`.
20+
21+
This only tests the additional functionality for recording file locations of
22+
PPFields that make loaded cubes.
23+
The original class is the baseclass of this, now renamed 'BasicFieldCollation'.
24+
25+
"""
26+
27+
from __future__ import (absolute_import, division, print_function)
28+
from six.moves import (filter, input, map, range, zip) # noqa
29+
30+
# import iris tests first so that some things can be initialised
31+
# before importing anything else.
32+
import iris.tests as tests
33+
34+
import numpy as np
35+
36+
import iris
37+
38+
from iris.tests.integration.fast_load.test_fast_load import Mixin_FieldTest
39+
40+
41+
class TestFastCallbackLocationInfo(Mixin_FieldTest, tests.IrisTest):
42+
do_fast_loads = True
43+
44+
def setUp(self):
45+
# Call parent setup.
46+
super(TestFastCallbackLocationInfo, self).setUp()
47+
48+
# Create a basic load test case.
49+
self.callback_collations = []
50+
self.callback_filepaths = []
51+
52+
def fast_load_callback(cube, collation, filename):
53+
self.callback_collations.append(collation)
54+
self.callback_filepaths.append(filename)
55+
56+
flds = self.fields(c_t='11112222', c_h='11221122', phn='01010101')
57+
self.test_filepath = self.save_fieldcubes(flds)
58+
iris.load(self.test_filepath, callback=fast_load_callback)
59+
60+
def test_callback_collations_filepaths(self):
61+
self.assertEqual(len(self.callback_collations), 2)
62+
self.assertEqual(self.callback_collations[0].data_filepath,
63+
self.test_filepath)
64+
self.assertEqual(self.callback_collations[1].data_filepath,
65+
self.test_filepath)
66+
67+
def test_callback_collations_field_indices(self):
68+
self.assertEqual(
69+
self.callback_collations[0].data_field_indices.dtype, np.int64)
70+
self.assertArrayEqual(
71+
self.callback_collations[0].data_field_indices,
72+
[[1, 3], [5, 7]])
73+
74+
self.assertEqual(
75+
self.callback_collations[1].data_field_indices.dtype, np.int64)
76+
self.assertArrayEqual(
77+
self.callback_collations[1].data_field_indices,
78+
[[0, 2], [4, 6]])
79+
80+
81+
if __name__ == '__main__':
82+
tests.main()

0 commit comments

Comments
 (0)