Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,23 @@ interest. Is set during the benchmark runner `cperf` and `sperf` sub-commands.

[See the ASV docs](https://asv.readthedocs.io/) for full detail.

### What benchmarks to write

It is not possible to maintain a full suite of 'unit style' benchmarks:

* Benchmarks take longer to run than tests.
* Small benchmarks are more vulnerable to noise - they report a lot of false
positive regressions.

We therefore recommend writing benchmarks representing scripts or single
operations that are likely to be run at the user level.

The drawback of this approach: a reported regression is less likely to reveal
the root cause (e.g. a regression in coordinate creation time observed only
as a regression in file loading time). Be prepared for manual investigations;
and consider committing any useful benchmarks as
[on-demand benchmarks](#on-demand-benchmarks) for future developers to use.

### Data generation
**Important:** be sure not to use the benchmarking environment to generate any
test objects/files, as this environment changes with each commit being
Expand All @@ -86,6 +103,10 @@ estimate run-time, and these will still be subject to the original problem.

### Scaling / non-Scaling Performance Differences

**(We no longer advocate the below for benchmarks run during CI, given the
limited available runtime and risk of false-positives. It remains useful for
manual investigations).**

When comparing performance between commits/file-type/whatever it can be helpful
to know if the differences exist in scaling or non-scaling parts of the Iris
functionality in question. This can be done using a size parameter, setting
Expand Down
2 changes: 0 additions & 2 deletions benchmarks/benchmarks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from os import environ
import resource

ARTIFICIAL_DIM_SIZE = int(10e3) # For all artificial cubes, coords etc.


def disable_repeat_between_setup(benchmark_object):
"""Benchmark where object persistence would be inappropriate (decorator).
Expand Down
346 changes: 104 additions & 242 deletions benchmarks/benchmarks/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,249 +4,111 @@
# See LICENSE in the root of the repository for full licensing details.
"""Cube benchmark tests."""

import numpy as np

from iris import analysis, aux_factory, coords, cube

from . import ARTIFICIAL_DIM_SIZE, disable_repeat_between_setup
from .generate_data.stock import sample_meshcoord


def setup(*params):
"""General variables needed by multiple benchmark classes."""
global data_1d
global data_2d
global general_cube

data_2d = np.zeros((ARTIFICIAL_DIM_SIZE,) * 2)
data_1d = data_2d[0]
general_cube = cube.Cube(data_2d)


class ComponentCommon:
# TODO: once https://github.com/airspeed-velocity/asv/pull/828 is released:
# * make class an ABC
# * remove NotImplementedError
# * combine setup_common into setup
"""Run a generalised suite of benchmarks for cubes.

A base class running a generalised suite of benchmarks for cubes that
include a specified component (e.g. Coord, CellMeasure etc.). Component to
be specified in a subclass.

ASV will run the benchmarks within this class for any subclasses.

Should only be instantiated within subclasses, but cannot enforce this
since ASV cannot handle classes that include abstract methods.
"""

def setup(self):
"""Prevent ASV instantiating (must therefore override setup() in any subclasses.)."""
raise NotImplementedError

def create(self):
"""Create a cube (generic).

cube_kwargs allow dynamic inclusion of different components;
specified in subclasses.
"""
return cube.Cube(data=data_2d, **self.cube_kwargs)

def setup_common(self):
"""Shared setup code that can be called by subclasses."""
self.cube = self.create()

def time_create(self):
"""Create a cube that includes an instance of the benchmarked component."""
self.create()

def time_add(self):
"""Add an instance of the benchmarked component to an existing cube."""
# Unable to create the copy during setup since this needs to be re-done
# for every repeat of the test (some components disallow duplicates).
general_cube_copy = general_cube.copy(data=data_2d)
self.add_method(general_cube_copy, *self.add_args)


class Cube:
def time_basic(self):
cube.Cube(data_2d)

def time_rename(self):
general_cube.name = "air_temperature"


class AuxCoord(ComponentCommon):
def setup(self):
self.coord_name = "test"
coord_bounds = np.array([data_1d - 1, data_1d + 1]).transpose()
aux_coord = coords.AuxCoord(
long_name=self.coord_name,
points=data_1d,
bounds=coord_bounds,
units="days since 1970-01-01",
climatological=True,
from iris import coords
from iris.cube import Cube

from .generate_data.stock import realistic_4d_w_everything


class CubeCreation:
params = [[False, True], ["instantiate", "construct"]]
param_names = ["Cube has mesh", "Cube creation strategy"]

cube_kwargs: dict

def setup(self, w_mesh: bool, _) -> None:
# Loaded as two cubes due to the hybrid height.
source_cube = realistic_4d_w_everything(w_mesh=w_mesh)

def get_coords_and_dims(
coords_tuple: tuple[coords._DimensionalMetadata, ...],
) -> list[tuple[coords._DimensionalMetadata, tuple[int, ...]]]:
return [(c, c.cube_dims(source_cube)) for c in coords_tuple]

self.cube_kwargs = dict(
data=source_cube.data,
standard_name=source_cube.standard_name,
long_name=source_cube.long_name,
var_name=source_cube.var_name,
units=source_cube.units,
attributes=source_cube.attributes,
cell_methods=source_cube.cell_methods,
dim_coords_and_dims=get_coords_and_dims(source_cube.dim_coords),
aux_coords_and_dims=get_coords_and_dims(source_cube.aux_coords),
aux_factories=source_cube.aux_factories,
cell_measures_and_dims=get_coords_and_dims(source_cube.cell_measures()),
ancillary_variables_and_dims=get_coords_and_dims(
source_cube.ancillary_variables()
),
)

# Variables needed by the ComponentCommon base class.
self.cube_kwargs = {"aux_coords_and_dims": [(aux_coord, 0)]}
self.add_method = cube.Cube.add_aux_coord
self.add_args = (aux_coord, (0))

self.setup_common()

def time_return_coords(self):
self.cube.coords()

def time_return_coord_dims(self):
self.cube.coord_dims(self.coord_name)


class AuxFactory(ComponentCommon):
def setup(self):
coord = coords.AuxCoord(points=data_1d, units="m")
self.hybrid_factory = aux_factory.HybridHeightFactory(delta=coord)

# Variables needed by the ComponentCommon base class.
self.cube_kwargs = {
"aux_coords_and_dims": [(coord, 0)],
"aux_factories": [self.hybrid_factory],
}

self.setup_common()

# Variables needed by the overridden time_add benchmark in this subclass.
cube_w_coord = self.cube.copy()
[cube_w_coord.remove_aux_factory(i) for i in cube_w_coord.aux_factories]
self.cube_w_coord = cube_w_coord

def time_add(self):
# Requires override from super().time_add because the cube needs an
# additional coord.
self.cube_w_coord.add_aux_factory(self.hybrid_factory)


class CellMeasure(ComponentCommon):
def setup(self):
cell_measure = coords.CellMeasure(data_1d)

# Variables needed by the ComponentCommon base class.
self.cube_kwargs = {"cell_measures_and_dims": [(cell_measure, 0)]}
self.add_method = cube.Cube.add_cell_measure
self.add_args = (cell_measure, 0)

self.setup_common()


class CellMethod(ComponentCommon):
def setup(self):
cell_method = coords.CellMethod("test")

# Variables needed by the ComponentCommon base class.
self.cube_kwargs = {"cell_methods": [cell_method]}
self.add_method = cube.Cube.add_cell_method
self.add_args = [cell_method]

self.setup_common()


class AncillaryVariable(ComponentCommon):
def setup(self):
ancillary_variable = coords.AncillaryVariable(data_1d)

# Variables needed by the ComponentCommon base class.
self.cube_kwargs = {"ancillary_variables_and_dims": [(ancillary_variable, 0)]}
self.add_method = cube.Cube.add_ancillary_variable
self.add_args = (ancillary_variable, 0)

self.setup_common()


class MeshCoord:
def time_create(self, _, cube_creation_strategy: str) -> None:
if cube_creation_strategy == "instantiate":
_ = Cube(**self.cube_kwargs)

elif cube_creation_strategy == "construct":
new_cube = Cube(data=self.cube_kwargs["data"])
new_cube.standard_name = self.cube_kwargs["standard_name"]
new_cube.long_name = self.cube_kwargs["long_name"]
new_cube.var_name = self.cube_kwargs["var_name"]
new_cube.units = self.cube_kwargs["units"]
new_cube.attributes = self.cube_kwargs["attributes"]
new_cube.cell_methods = self.cube_kwargs["cell_methods"]
for coord, dims in self.cube_kwargs["dim_coords_and_dims"]:
coord: coords.DimCoord # Type hint to help linters.
new_cube.add_dim_coord(coord, dims)
for coord, dims in self.cube_kwargs["aux_coords_and_dims"]:
new_cube.add_aux_coord(coord, dims)
for aux_factory in self.cube_kwargs["aux_factories"]:
new_cube.add_aux_factory(aux_factory)
for cell_measure, dims in self.cube_kwargs["cell_measures_and_dims"]:
new_cube.add_cell_measure(cell_measure, dims)
for ancillary_variable, dims in self.cube_kwargs[
"ancillary_variables_and_dims"
]:
new_cube.add_ancillary_variable(ancillary_variable, dims)

else:
message = f"Unknown cube creation strategy: {cube_creation_strategy}"
raise NotImplementedError(message)


class CubeEquality:
params = [
6, # minimal cube-sphere
int(1e6), # realistic cube-sphere size
ARTIFICIAL_DIM_SIZE, # To match size in :class:`AuxCoord`
[False, True],
[False, True],
["metadata_inequality", "coord_inequality", "data_inequality", "all_equal"],
]
param_names = ["number of faces"]

def setup(self, n_faces):
mesh_kwargs = dict(n_nodes=n_faces + 2, n_edges=n_faces * 2, n_faces=n_faces)

self.mesh_coord = sample_meshcoord(sample_mesh_kwargs=mesh_kwargs)
self.data = np.zeros(n_faces)
self.cube_blank = cube.Cube(data=self.data)
self.cube = self.create()

def create(self):
return cube.Cube(data=self.data, aux_coords_and_dims=[(self.mesh_coord, 0)])

def time_create(self, n_faces):
_ = self.create()

@disable_repeat_between_setup
def time_add(self, n_faces):
self.cube_blank.add_aux_coord(self.mesh_coord, 0)

@disable_repeat_between_setup
def time_remove(self, n_faces):
self.cube.remove_coord(self.mesh_coord)


class Merge:
def setup(self):
self.cube_list = cube.CubeList()
for i in np.arange(2):
i_cube = general_cube.copy()
i_coord = coords.AuxCoord([i])
i_cube.add_aux_coord(i_coord)
self.cube_list.append(i_cube)

def time_merge(self):
self.cube_list.merge()


class Concatenate:
def setup(self):
dim_size = ARTIFICIAL_DIM_SIZE
self.cube_list = cube.CubeList()
for i in np.arange(dim_size * 2, step=dim_size):
i_cube = general_cube.copy()
i_coord = coords.DimCoord(np.arange(dim_size) + (i * dim_size))
i_cube.add_dim_coord(i_coord, 0)
self.cube_list.append(i_cube)

def time_concatenate(self):
self.cube_list.concatenate()


class Equality:
def setup(self):
self.cube_a = general_cube.copy()
self.cube_b = general_cube.copy()

aux_coord = coords.AuxCoord(data_1d)
self.cube_a.add_aux_coord(aux_coord, 0)
self.cube_b.add_aux_coord(aux_coord, 1)

def time_equality(self):
self.cube_a == self.cube_b


class Aggregation:
def setup(self):
repeat_number = 10
repeat_range = range(int(ARTIFICIAL_DIM_SIZE / repeat_number))
array_repeat = np.repeat(repeat_range, repeat_number)
array_unique = np.arange(len(array_repeat))

coord_repeat = coords.AuxCoord(points=array_repeat, long_name="repeat")
coord_unique = coords.DimCoord(points=array_unique, long_name="unique")

local_cube = general_cube.copy()
local_cube.add_aux_coord(coord_repeat, 0)
local_cube.add_dim_coord(coord_unique, 0)
self.cube = local_cube

def time_aggregated_by(self):
self.cube.aggregated_by("repeat", analysis.MEAN)
param_names = ["Cubes are lazy", "Cubes have meshes", "Scenario"]

cube_1: Cube
cube_2: Cube
coord_name = "surface_altitude"

def setup(self, lazy: bool, w_mesh: bool, scenario: str) -> None:
self.cube_1 = realistic_4d_w_everything(w_mesh=w_mesh, lazy=lazy)
# Using Cube.copy() produces different results due to sharing of the
# Mesh instance.
self.cube_2 = realistic_4d_w_everything(w_mesh=w_mesh, lazy=lazy)

match scenario:
case "metadata_inequality":
self.cube_2.long_name = "different"
case "coord_inequality":
coord = self.cube_2.coord(self.coord_name)
coord.points = coord.core_points() * 2
case "data_inequality":
self.cube_2.data = self.cube_2.core_data() * 2
case "all_equal":
pass
case _:
message = f"Unknown scenario: {scenario}"
raise NotImplementedError(message)

def time_equality(self, lazy: bool, __, ___) -> None:
_ = self.cube_1 == self.cube_2
if lazy:
for cube in (self.cube_1, self.cube_2):
# Confirm that this benchmark is safe for repetition.
assert cube.coord(self.coord_name).has_lazy_points()
assert cube.has_lazy_data()
Loading