SciTools · ESadek-MO · May 13, 2024 · May 7, 2024 · May 7, 2024 · May 7, 2024
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -62,6 +62,23 @@ interest. Is set during the benchmark runner `cperf` and `sperf` sub-commands.
 
 [See the ASV docs](https://asv.readthedocs.io/) for full detail.
 
+### What benchmarks to write
+
+It is not possible to maintain a full suite of 'unit style' benchmarks:
+
+* Benchmarks take longer to run than tests.
+* Small benchmarks are more vulnerable to noise - they report a lot of false
+positive regressions.
+
+We therefore recommend writing benchmarks representing scripts or single
+operations that are likely to be run at the user level.
+
+The drawback of this approach: a reported regression is less likely to reveal
+the root cause (e.g. a regression in coordinate creation time observed only
+as a regression in file loading time). Be prepared for manual investigations;
+and consider committing any useful benchmarks as 
+[on-demand benchmarks](#on-demand-benchmarks) for future developers to use.
+
 ### Data generation
 **Important:** be sure not to use the benchmarking environment to generate any
 test objects/files, as this environment changes with each commit being
@@ -86,6 +103,10 @@ estimate run-time, and these will still be subject to the original problem.
 
 ### Scaling / non-Scaling Performance Differences
 
+**(We no longer advocate the below for benchmarks run during CI, given the
+limited available runtime and risk of false-positives. It remains useful for
+manual investigations).**
+
 When comparing performance between commits/file-type/whatever it can be helpful
 to know if the differences exist in scaling or non-scaling parts of the Iris
 functionality in question. This can be done using a size parameter, setting

diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
@@ -7,8 +7,6 @@
 from os import environ
 import resource
 
-ARTIFICIAL_DIM_SIZE = int(10e3)  # For all artificial cubes, coords etc.
-
 
 def disable_repeat_between_setup(benchmark_object):
     """Benchmark where object persistence would be inappropriate (decorator).

diff --git a/benchmarks/benchmarks/cube.py b/benchmarks/benchmarks/cube.py
@@ -4,249 +4,111 @@
 # See LICENSE in the root of the repository for full licensing details.
 """Cube benchmark tests."""
 
-import numpy as np
-
-from iris import analysis, aux_factory, coords, cube
-
-from . import ARTIFICIAL_DIM_SIZE, disable_repeat_between_setup
-from .generate_data.stock import sample_meshcoord
-
-
-def setup(*params):
-    """General variables needed by multiple benchmark classes."""
-    global data_1d
-    global data_2d
-    global general_cube
-
-    data_2d = np.zeros((ARTIFICIAL_DIM_SIZE,) * 2)
-    data_1d = data_2d[0]
-    general_cube = cube.Cube(data_2d)
-
-
-class ComponentCommon:
-    # TODO: once https://github.com/airspeed-velocity/asv/pull/828 is released:
-    #       * make class an ABC
-    #       * remove NotImplementedError
-    #       * combine setup_common into setup
-    """Run a generalised suite of benchmarks for cubes.
-
-    A base class running a generalised suite of benchmarks for cubes that
-    include a specified component (e.g. Coord, CellMeasure etc.). Component to
-    be specified in a subclass.
-
-    ASV will run the benchmarks within this class for any subclasses.
-
-    Should only be instantiated within subclasses, but cannot enforce this
-    since ASV cannot handle classes that include abstract methods.
-    """
-
-    def setup(self):
-        """Prevent ASV instantiating (must therefore override setup() in any subclasses.)."""
-        raise NotImplementedError
-
-    def create(self):
-        """Create a cube (generic).
-
-        cube_kwargs allow dynamic inclusion of different components;
-        specified in subclasses.
-        """
-        return cube.Cube(data=data_2d, **self.cube_kwargs)
-
-    def setup_common(self):
-        """Shared setup code that can be called by subclasses."""
-        self.cube = self.create()
-
-    def time_create(self):
-        """Create a cube that includes an instance of the benchmarked component."""
-        self.create()
-
-    def time_add(self):
-        """Add an instance of the benchmarked component to an existing cube."""
-        # Unable to create the copy during setup since this needs to be re-done
-        # for every repeat of the test (some components disallow duplicates).
-        general_cube_copy = general_cube.copy(data=data_2d)
-        self.add_method(general_cube_copy, *self.add_args)
-
-
-class Cube:
-    def time_basic(self):
-        cube.Cube(data_2d)
-
-    def time_rename(self):
-        general_cube.name = "air_temperature"
-
-
-class AuxCoord(ComponentCommon):
-    def setup(self):
-        self.coord_name = "test"
-        coord_bounds = np.array([data_1d - 1, data_1d + 1]).transpose()
-        aux_coord = coords.AuxCoord(
-            long_name=self.coord_name,
-            points=data_1d,
-            bounds=coord_bounds,
-            units="days since 1970-01-01",
-            climatological=True,
+from iris import coords
+from iris.cube import Cube
+
+from .generate_data.stock import realistic_4d_w_everything
+
+
+class CubeCreation:
+    params = [[False, True], ["instantiate", "construct"]]
+    param_names = ["Cube has mesh", "Cube creation strategy"]
+
+    cube_kwargs: dict
+
+    def setup(self, w_mesh: bool, _) -> None:
+        # Loaded as two cubes due to the hybrid height.
+        source_cube = realistic_4d_w_everything(w_mesh=w_mesh)
+
+        def get_coords_and_dims(
+            coords_tuple: tuple[coords._DimensionalMetadata, ...],
+        ) -> list[tuple[coords._DimensionalMetadata, tuple[int, ...]]]:
+            return [(c, c.cube_dims(source_cube)) for c in coords_tuple]
+
+        self.cube_kwargs = dict(
+            data=source_cube.data,
+            standard_name=source_cube.standard_name,
+            long_name=source_cube.long_name,
+            var_name=source_cube.var_name,
+            units=source_cube.units,
+            attributes=source_cube.attributes,
+            cell_methods=source_cube.cell_methods,
+            dim_coords_and_dims=get_coords_and_dims(source_cube.dim_coords),
+            aux_coords_and_dims=get_coords_and_dims(source_cube.aux_coords),
+            aux_factories=source_cube.aux_factories,
+            cell_measures_and_dims=get_coords_and_dims(source_cube.cell_measures()),
+            ancillary_variables_and_dims=get_coords_and_dims(
+                source_cube.ancillary_variables()
+            ),
         )
 
-        # Variables needed by the ComponentCommon base class.
-        self.cube_kwargs = {"aux_coords_and_dims": [(aux_coord, 0)]}
-        self.add_method = cube.Cube.add_aux_coord
-        self.add_args = (aux_coord, (0))
-
-        self.setup_common()
-
-    def time_return_coords(self):
-        self.cube.coords()
-
-    def time_return_coord_dims(self):
-        self.cube.coord_dims(self.coord_name)
-
-
-class AuxFactory(ComponentCommon):
-    def setup(self):
-        coord = coords.AuxCoord(points=data_1d, units="m")
-        self.hybrid_factory = aux_factory.HybridHeightFactory(delta=coord)
-
-        # Variables needed by the ComponentCommon base class.
-        self.cube_kwargs = {
-            "aux_coords_and_dims": [(coord, 0)],
-            "aux_factories": [self.hybrid_factory],
-        }
-
-        self.setup_common()
-
-        # Variables needed by the overridden time_add benchmark in this subclass.
-        cube_w_coord = self.cube.copy()
-        [cube_w_coord.remove_aux_factory(i) for i in cube_w_coord.aux_factories]
-        self.cube_w_coord = cube_w_coord
-
-    def time_add(self):
-        # Requires override from super().time_add because the cube needs an
-        # additional coord.
-        self.cube_w_coord.add_aux_factory(self.hybrid_factory)
-
-
-class CellMeasure(ComponentCommon):
-    def setup(self):
-        cell_measure = coords.CellMeasure(data_1d)
-
-        # Variables needed by the ComponentCommon base class.
-        self.cube_kwargs = {"cell_measures_and_dims": [(cell_measure, 0)]}
-        self.add_method = cube.Cube.add_cell_measure
-        self.add_args = (cell_measure, 0)
-
-        self.setup_common()
-
-
-class CellMethod(ComponentCommon):
-    def setup(self):
-        cell_method = coords.CellMethod("test")
-
-        # Variables needed by the ComponentCommon base class.
-        self.cube_kwargs = {"cell_methods": [cell_method]}
-        self.add_method = cube.Cube.add_cell_method
-        self.add_args = [cell_method]
-
-        self.setup_common()
-
-
-class AncillaryVariable(ComponentCommon):
-    def setup(self):
-        ancillary_variable = coords.AncillaryVariable(data_1d)
-
-        # Variables needed by the ComponentCommon base class.
-        self.cube_kwargs = {"ancillary_variables_and_dims": [(ancillary_variable, 0)]}
-        self.add_method = cube.Cube.add_ancillary_variable
-        self.add_args = (ancillary_variable, 0)
-
-        self.setup_common()
-
-
-class MeshCoord:
+    def time_create(self, _, cube_creation_strategy: str) -> None:
+        if cube_creation_strategy == "instantiate":
+            _ = Cube(**self.cube_kwargs)
+
+        elif cube_creation_strategy == "construct":
+            new_cube = Cube(data=self.cube_kwargs["data"])
+            new_cube.standard_name = self.cube_kwargs["standard_name"]
+            new_cube.long_name = self.cube_kwargs["long_name"]
+            new_cube.var_name = self.cube_kwargs["var_name"]
+            new_cube.units = self.cube_kwargs["units"]
+            new_cube.attributes = self.cube_kwargs["attributes"]
+            new_cube.cell_methods = self.cube_kwargs["cell_methods"]
+            for coord, dims in self.cube_kwargs["dim_coords_and_dims"]:
+                coord: coords.DimCoord  # Type hint to help linters.
+                new_cube.add_dim_coord(coord, dims)
+            for coord, dims in self.cube_kwargs["aux_coords_and_dims"]:
+                new_cube.add_aux_coord(coord, dims)
+            for aux_factory in self.cube_kwargs["aux_factories"]:
+                new_cube.add_aux_factory(aux_factory)
+            for cell_measure, dims in self.cube_kwargs["cell_measures_and_dims"]:
+                new_cube.add_cell_measure(cell_measure, dims)
+            for ancillary_variable, dims in self.cube_kwargs[
+                "ancillary_variables_and_dims"
+            ]:
+                new_cube.add_ancillary_variable(ancillary_variable, dims)
+
+        else:
+            message = f"Unknown cube creation strategy: {cube_creation_strategy}"
+            raise NotImplementedError(message)
+
+
+class CubeEquality:
     params = [
-        6,  # minimal cube-sphere
-        int(1e6),  # realistic cube-sphere size
-        ARTIFICIAL_DIM_SIZE,  # To match size in :class:`AuxCoord`
+        [False, True],
+        [False, True],
+        ["metadata_inequality", "coord_inequality", "data_inequality", "all_equal"],
     ]
-    param_names = ["number of faces"]
-
-    def setup(self, n_faces):
-        mesh_kwargs = dict(n_nodes=n_faces + 2, n_edges=n_faces * 2, n_faces=n_faces)
-
-        self.mesh_coord = sample_meshcoord(sample_mesh_kwargs=mesh_kwargs)
-        self.data = np.zeros(n_faces)
-        self.cube_blank = cube.Cube(data=self.data)
-        self.cube = self.create()
-
-    def create(self):
-        return cube.Cube(data=self.data, aux_coords_and_dims=[(self.mesh_coord, 0)])
-
-    def time_create(self, n_faces):
-        _ = self.create()
-
-    @disable_repeat_between_setup
-    def time_add(self, n_faces):
-        self.cube_blank.add_aux_coord(self.mesh_coord, 0)
-
-    @disable_repeat_between_setup
-    def time_remove(self, n_faces):
-        self.cube.remove_coord(self.mesh_coord)
-
-
-class Merge:
-    def setup(self):
-        self.cube_list = cube.CubeList()
-        for i in np.arange(2):
-            i_cube = general_cube.copy()
-            i_coord = coords.AuxCoord([i])
-            i_cube.add_aux_coord(i_coord)
-            self.cube_list.append(i_cube)
-
-    def time_merge(self):
-        self.cube_list.merge()
-
-
-class Concatenate:
-    def setup(self):
-        dim_size = ARTIFICIAL_DIM_SIZE
-        self.cube_list = cube.CubeList()
-        for i in np.arange(dim_size * 2, step=dim_size):
-            i_cube = general_cube.copy()
-            i_coord = coords.DimCoord(np.arange(dim_size) + (i * dim_size))
-            i_cube.add_dim_coord(i_coord, 0)
-            self.cube_list.append(i_cube)
-
-    def time_concatenate(self):
-        self.cube_list.concatenate()
-
-
-class Equality:
-    def setup(self):
-        self.cube_a = general_cube.copy()
-        self.cube_b = general_cube.copy()
-
-        aux_coord = coords.AuxCoord(data_1d)
-        self.cube_a.add_aux_coord(aux_coord, 0)
-        self.cube_b.add_aux_coord(aux_coord, 1)
-
-    def time_equality(self):
-        self.cube_a == self.cube_b
-
-
-class Aggregation:
-    def setup(self):
-        repeat_number = 10
-        repeat_range = range(int(ARTIFICIAL_DIM_SIZE / repeat_number))
-        array_repeat = np.repeat(repeat_range, repeat_number)
-        array_unique = np.arange(len(array_repeat))
-
-        coord_repeat = coords.AuxCoord(points=array_repeat, long_name="repeat")
-        coord_unique = coords.DimCoord(points=array_unique, long_name="unique")
-
-        local_cube = general_cube.copy()
-        local_cube.add_aux_coord(coord_repeat, 0)
-        local_cube.add_dim_coord(coord_unique, 0)
-        self.cube = local_cube
-
-    def time_aggregated_by(self):
-        self.cube.aggregated_by("repeat", analysis.MEAN)
+    param_names = ["Cubes are lazy", "Cubes have meshes", "Scenario"]
+
+    cube_1: Cube
+    cube_2: Cube
+    coord_name = "surface_altitude"
+
+    def setup(self, lazy: bool, w_mesh: bool, scenario: str) -> None:
+        self.cube_1 = realistic_4d_w_everything(w_mesh=w_mesh, lazy=lazy)
+        # Using Cube.copy() produces different results due to sharing of the
+        #  Mesh instance.
+        self.cube_2 = realistic_4d_w_everything(w_mesh=w_mesh, lazy=lazy)
+
+        match scenario:
+            case "metadata_inequality":
+                self.cube_2.long_name = "different"
+            case "coord_inequality":
+                coord = self.cube_2.coord(self.coord_name)
+                coord.points = coord.core_points() * 2
+            case "data_inequality":
+                self.cube_2.data = self.cube_2.core_data() * 2
+            case "all_equal":
+                pass
+            case _:
+                message = f"Unknown scenario: {scenario}"
+                raise NotImplementedError(message)
+
+    def time_equality(self, lazy: bool, __, ___) -> None:
+        _ = self.cube_1 == self.cube_2
+        if lazy:
+            for cube in (self.cube_1, self.cube_2):
+                # Confirm that this benchmark is safe for repetition.
+                assert cube.coord(self.coord_name).has_lazy_points()
+                assert cube.has_lazy_data()