SciTools · trexfeathers · May 16, 2024 · Mar 5, 2024 · May 2, 2024 · May 15, 2024
diff --git a/benchmarks/benchmarks/aggregate_collapse.py b/benchmarks/benchmarks/aggregate_collapse.py
@@ -0,0 +1,183 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Benchmarks relating to :meth:`iris.cube.CubeList.merge` and ``concatenate``."""
+
+import numpy as np
+
+from iris import analysis, coords, cube
+
+from .generate_data.stock import realistic_4d_w_everything
+
+
+class AggregationMixin:
+    params = [[False, True]]
+    param_names = ["Lazy operations"]
+
+    def setup(self, lazy_run: bool):
+        # 4d cube instead of merge, or replicate if break
+        cube = realistic_4d_w_everything(lazy=lazy_run)
+        self.lazy_run = lazy_run
+
+        agg_mln_data = np.arange(0, 80, 10)
+        agg_mln_repeat = np.repeat(agg_mln_data, 10)
+
+        agg_mln_coord = coords.AuxCoord(points=agg_mln_repeat, long_name="aggregatable")
+
+        if lazy_run:
+            agg_mln_coord.points = agg_mln_coord.lazy_points()
+        cube.add(agg_mln_coord, 1)
+        self.cube = cube
+
+
+class Aggregation(AggregationMixin):
+    def time_aggregated_by_MEAN(self):
+        self.cube.aggregated_by("aggregatable", analysis.MEAN)
+
+    def time_aggregated_by_COUNT(self):
+        self.cube.aggregated_by(
+            "aggregatable", analysis.COUNT, function=lambda values: 340 > values > 280
+        )
+
+    def time_aggregated_by_GMEAN(self):
+        self.cube.aggregated_by("aggregatable", analysis.GMEAN)
+
+    def time_aggregated_by_HMEAN(self):
+        self.cube.aggregated_by("aggregatable", analysis.HMEAN)
+
+    def time_aggregated_by_MAX_RUN(self):
+        self.cube.aggregated_by(
+            "aggregatable", analysis.MAX_RUN, function=lambda values: 340 > values > 280
+        )
+
+    def time_aggregated_by_MAX(self):
+        self.cube.aggregated_by("aggregatable", analysis.MAX)
+
+    def time_aggregated_by_MEDIAN(self):
+        self.cube.aggregated_by("aggregatable", analysis.MEDIAN)
+
+    def time_aggregated_by_MIN(self):
+        self.cube.aggregated_by("aggregatable", analysis.MIN)
+
+    def time_aggregated_by_PEAK(self):
+        self.cube.aggregated_by("aggregatable", analysis.PEAK)
+
+    def time_aggregated_by_PERCENTILE(self):
+        self.cube.aggregated_by(
+            "aggregatable", analysis.PERCENTILE, percent=[10, 50, 90]
+        )
+
+    def time_aggregated_by_FAST_PERCENTILE(self):
+        self.cube.aggregated_by(
+            "aggregatable",
+            analysis.PERCENTILE,
+            percent=[10, 50, 90],
+            fast_percentile_method=True,
+        )
+
+    def time_aggregated_by_PROPORTION(self):
+        self.cube.aggregated_by(
+            "aggregatable",
+            analysis.PROPORTION,
+            function=lambda values: 340 > values > 280,
+        )
+
+    def time_aggregated_by_STD_DEV(self):
+        self.cube.aggregated_by("aggregatable", analysis.STD_DEV)
+
+    def time_aggregated_by_VARIANCE(self):
+        self.cube.aggregated_by("aggregatable", analysis.VARIANCE)
+
+    def time_aggregated_by_RMS(self):
+        self.cube.aggregated_by("aggregatable", analysis.RMS)
+
+    def time_collapsed_by_MEAN(self):
+        self.cube.collapsed("latitude", analysis.MEAN)
+
+    def time_collapsed_by_COUNT(self):
+        self.cube.collapsed(
+            "latitude", analysis.COUNT, function=lambda values: 340 > values > 280
+        )
+
+    def time_collapsed_by_GMEAN(self):
+        self.cube.collapsed("latitude", analysis.GMEAN)
+
+    def time_collapsed_by_HMEAN(self):
+        self.cube.collapsed("latitude", analysis.HMEAN)
+
+    def time_collapsed_by_MAX_RUN(self):
+        self.cube.collapsed(
+            "latitude", analysis.MAX_RUN, function=lambda values: 340 > values > 280
+        )
+
+    def time_collapsed_by_MAX(self):
+        self.cube.collapsed("latitude", analysis.MAX)
+
+    def time_collapsed_by_MEDIAN(self):
+        self.cube.collapsed("latitude", analysis.MEDIAN)
+
+    def time_collapsed_by_MIN(self):
+        self.cube.collapsed("latitude", analysis.MIN)
+
+    def time_collapsed_by_PEAK(self):
+        self.cube.collapsed("latitude", analysis.PEAK)
+
+    def time_collapsed_by_PERCENTILE(self):
+        self.cube.collapsed("latitude", analysis.PERCENTILE, percent=[10, 50, 90])
+
+    def time_collapsed_by_FAST_PERCENTILE(self):
+        self.cube.collapsed(
+            "latitude",
+            analysis.PERCENTILE,
+            percent=[10, 50, 90],
+            fast_percentile_method=True,
+        )
+
+    def time_collapsed_by_PROPORTION(self):
+        self.cube.collapsed(
+            "latitude", analysis.PROPORTION, function=lambda values: 340 > values > 280
+        )
+
+    def time_collapsed_by_STD_DEV(self):
+        self.cube.collapsed("latitude", analysis.STD_DEV)
+
+    def time_collapsed_by_VARIANCE(self):
+        self.cube.collapsed("latitude", analysis.VARIANCE)
+
+    def time_collapsed_by_RMS(self):
+        self.cube.collapsed("latitude", analysis.RMS)
+
+
+class WeightedAggregation(AggregationMixin):
+    def setup(self, lazy_run):
+        super().setup(lazy_run)
+        self.weights = np.linspace(0, 1, 100)
+
+    def time_w_aggregated_by_WPERCENTILE(self):
+        self.cube.aggregated_by(
+            "aggregatable", analysis.WPERCENTILE, self.weights, percent=[10, 50, 90]
+        )
+
+    def time_w_aggregated_by_SUM(self):
+        self.cube.aggregated_by("aggregatable", analysis.SUM, self.weights)
+
+    def time_w_aggregated_by_RMS(self):
+        self.cube.aggregated_by("aggregatable", analysis.RMS, self.weights)
+
+    def time_w_aggregated_by_MEAN(self):
+        self.cube.aggregated_by("aggregatable", analysis.MEAN, self.weights)
+
+    def time_w_collapsed_by_WPERCENTILE(self):
+        self.cube.collapsed(
+            "latitude", analysis.WPERCENTILE, self.weights, percent=[10, 50, 90]
+        )
+
+    def time_w_collapsed_by_SUM(self):
+        self.cube.collapsed("latitude", analysis.SUM, self.weights)
+
+    def time_w_collapsed_by_RMS(self):
+        self.cube.collapsed("latitude", analysis.RMS, self.weights)
+
+    def time_w_collapsed_by_MEAN(self):
+        self.cube.collapsed("latitude", analysis.MEAN, self.weights)
diff --git a/lib/iris/cube.py b/lib/iris/cube.py
@@ -28,6 +28,7 @@
 import numpy as np
 import numpy.ma as ma
 
+from iris import cube
 import iris._constraints
 from iris._data_manager import DataManager
 import iris._lazy_data as _lazy
@@ -93,8 +94,8 @@ def from_cubes(cubes, constraints=None):
         constraints = iris._constraints.list_of_constraints(constraints)
         pairs = [_CubeFilter(constraint) for constraint in constraints]
         collection = _CubeFilterCollection(pairs)
-        for cube in cubes:
-            collection.add_cube(cube)
+        for c in cubes:
+            collection.add_cube(c)
         return collection
 
     def __init__(self, pairs):
@@ -133,8 +134,8 @@ def __init__(self, *args, **kwargs):
         # Do whatever a list does, to initialise ourself "as a list"
         super().__init__(*args, **kwargs)
         # Check that all items in the list are cubes.
-        for cube in self:
-            self._assert_is_cube(cube)
+        for c in self:
+            self._assert_is_cube(c)
 
     def __str__(self):
         """Run short :meth:`Cube.summary` on every cube."""
@@ -309,9 +310,9 @@ def _extract_and_merge(cubes, constraints, strict=False, return_single_cube=Fals
         constraint_groups = dict(
             [(constraint, CubeList()) for constraint in constraints]
         )
-        for cube in cubes:
+        for c in cubes:
             for constraint, cube_list in constraint_groups.items():
-                sub_cube = constraint.extract(cube)
+                sub_cube = constraint.extract(c)
                 if sub_cube is not None:
                     cube_list.append(sub_cube)
 
@@ -395,8 +396,8 @@ def merge_cube(self):
 
         # Register each of our cubes with a single ProtoCube.
         proto_cube = iris._merge.ProtoCube(self[0])
-        for cube in self[1:]:
-            proto_cube.register(cube, error_on_mismatch=True)
+        for c in self[1:]:
+            proto_cube.register(c, error_on_mismatch=True)
 
         # Extract the merged cube from the ProtoCube.
         (merged_cube,) = proto_cube.merge()
@@ -472,18 +473,18 @@ def merge(self, unique=True):
         """
         # Register each of our cubes with its appropriate ProtoCube.
         proto_cubes_by_name = {}
-        for cube in self:
-            name = cube.standard_name
+        for c in self:
+            name = c.standard_name
             proto_cubes = proto_cubes_by_name.setdefault(name, [])
             proto_cube = None
 
             for target_proto_cube in proto_cubes:
-                if target_proto_cube.register(cube):
+                if target_proto_cube.register(c):
                     proto_cube = target_proto_cube
                     break
 
             if proto_cube is None:
-                proto_cube = iris._merge.ProtoCube(cube)
+                proto_cube = iris._merge.ProtoCube(c)
                 proto_cubes.append(proto_cube)
 
         # Emulate Python 2 behaviour.
@@ -3175,8 +3176,41 @@ def create_coords(src_coords, add_coord):
                     add_coord(result_coord, dims)
                     coord_mapping[id(src_coord)] = result_coord
 
+            def create_metadata(src_metadatas, add_metadata, metadata_type):
+                if metadata_type == "cell_measure":
+                    metadata_search_object = cube.Cube.cell_measure
+                elif metadata_type == "ancillary_var":
+                    metadata_search_object = cube.Cube.ancillary_variable
+                else:
+                    raise ValueError
+                for src_metadata in src_metadatas:
+                    dims = src_metadata.cube_dims(self)
+                    if dim in dims:
+                        dim_within_coord = dims.index(dim)
+                        data = np.concatenate(
+                            [
+                                metadata_search_object(
+                                    chunk, src_metadata.name()
+                                ).core_data()
+                                for chunk in chunks
+                            ],
+                            dim_within_coord,
+                        )
+                        result_coord = src_metadata.copy(values=data)
+                    else:
+                        result_coord = src_metadata.copy()
+                    add_metadata(result_coord, dims)
+
             create_coords(self.dim_coords, result.add_dim_coord)
             create_coords(self.aux_coords, result.add_aux_coord)
+            create_metadata(
+                self.cell_measures(), result.add_cell_measure, "cell_measure"
+            )
+            create_metadata(
+                self.ancillary_variables(),
+                result.add_ancillary_variable,
+                "ancillary_var",
+            )
             for factory in self.aux_factories:
                 result.add_aux_factory(factory.updated(coord_mapping))
         return result