Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 183 additions & 0 deletions benchmarks/benchmarks/aggregate_collapse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Benchmarks relating to :meth:`iris.cube.CubeList.merge` and ``concatenate``."""

import numpy as np

from iris import analysis, coords, cube

from .generate_data.stock import realistic_4d_w_everything


class AggregationMixin:
params = [[False, True]]
param_names = ["Lazy operations"]

def setup(self, lazy_run: bool):
# 4d cube instead of merge, or replicate if break
cube = realistic_4d_w_everything(lazy=lazy_run)
self.lazy_run = lazy_run

agg_mln_data = np.arange(0, 80, 10)
agg_mln_repeat = np.repeat(agg_mln_data, 10)

agg_mln_coord = coords.AuxCoord(points=agg_mln_repeat, long_name="aggregatable")

if lazy_run:
agg_mln_coord.points = agg_mln_coord.lazy_points()
cube.add(agg_mln_coord, 1)
self.cube = cube


class Aggregation(AggregationMixin):
def time_aggregated_by_MEAN(self):
self.cube.aggregated_by("aggregatable", analysis.MEAN)

def time_aggregated_by_COUNT(self):
self.cube.aggregated_by(
"aggregatable", analysis.COUNT, function=lambda values: 340 > values > 280
)

def time_aggregated_by_GMEAN(self):
self.cube.aggregated_by("aggregatable", analysis.GMEAN)

def time_aggregated_by_HMEAN(self):
self.cube.aggregated_by("aggregatable", analysis.HMEAN)

def time_aggregated_by_MAX_RUN(self):
self.cube.aggregated_by(
"aggregatable", analysis.MAX_RUN, function=lambda values: 340 > values > 280
)

def time_aggregated_by_MAX(self):
self.cube.aggregated_by("aggregatable", analysis.MAX)

def time_aggregated_by_MEDIAN(self):
self.cube.aggregated_by("aggregatable", analysis.MEDIAN)

def time_aggregated_by_MIN(self):
self.cube.aggregated_by("aggregatable", analysis.MIN)

def time_aggregated_by_PEAK(self):
self.cube.aggregated_by("aggregatable", analysis.PEAK)

def time_aggregated_by_PERCENTILE(self):
self.cube.aggregated_by(
"aggregatable", analysis.PERCENTILE, percent=[10, 50, 90]
)

def time_aggregated_by_FAST_PERCENTILE(self):
self.cube.aggregated_by(
"aggregatable",
analysis.PERCENTILE,
percent=[10, 50, 90],
fast_percentile_method=True,
)

def time_aggregated_by_PROPORTION(self):
self.cube.aggregated_by(
"aggregatable",
analysis.PROPORTION,
function=lambda values: 340 > values > 280,
)

def time_aggregated_by_STD_DEV(self):
self.cube.aggregated_by("aggregatable", analysis.STD_DEV)

def time_aggregated_by_VARIANCE(self):
self.cube.aggregated_by("aggregatable", analysis.VARIANCE)

def time_aggregated_by_RMS(self):
self.cube.aggregated_by("aggregatable", analysis.RMS)

def time_collapsed_by_MEAN(self):
self.cube.collapsed("latitude", analysis.MEAN)

def time_collapsed_by_COUNT(self):
self.cube.collapsed(
"latitude", analysis.COUNT, function=lambda values: 340 > values > 280
)

def time_collapsed_by_GMEAN(self):
self.cube.collapsed("latitude", analysis.GMEAN)

def time_collapsed_by_HMEAN(self):
self.cube.collapsed("latitude", analysis.HMEAN)

def time_collapsed_by_MAX_RUN(self):
self.cube.collapsed(
"latitude", analysis.MAX_RUN, function=lambda values: 340 > values > 280
)

def time_collapsed_by_MAX(self):
self.cube.collapsed("latitude", analysis.MAX)

def time_collapsed_by_MEDIAN(self):
self.cube.collapsed("latitude", analysis.MEDIAN)

def time_collapsed_by_MIN(self):
self.cube.collapsed("latitude", analysis.MIN)

def time_collapsed_by_PEAK(self):
self.cube.collapsed("latitude", analysis.PEAK)

def time_collapsed_by_PERCENTILE(self):
self.cube.collapsed("latitude", analysis.PERCENTILE, percent=[10, 50, 90])

def time_collapsed_by_FAST_PERCENTILE(self):
self.cube.collapsed(
"latitude",
analysis.PERCENTILE,
percent=[10, 50, 90],
fast_percentile_method=True,
)

def time_collapsed_by_PROPORTION(self):
self.cube.collapsed(
"latitude", analysis.PROPORTION, function=lambda values: 340 > values > 280
)

def time_collapsed_by_STD_DEV(self):
self.cube.collapsed("latitude", analysis.STD_DEV)

def time_collapsed_by_VARIANCE(self):
self.cube.collapsed("latitude", analysis.VARIANCE)

def time_collapsed_by_RMS(self):
self.cube.collapsed("latitude", analysis.RMS)


class WeightedAggregation(AggregationMixin):
def setup(self, lazy_run):
super().setup(lazy_run)
self.weights = np.linspace(0, 1, 100)

def time_w_aggregated_by_WPERCENTILE(self):
self.cube.aggregated_by(
"aggregatable", analysis.WPERCENTILE, self.weights, percent=[10, 50, 90]
)

def time_w_aggregated_by_SUM(self):
self.cube.aggregated_by("aggregatable", analysis.SUM, self.weights)

def time_w_aggregated_by_RMS(self):
self.cube.aggregated_by("aggregatable", analysis.RMS, self.weights)

def time_w_aggregated_by_MEAN(self):
self.cube.aggregated_by("aggregatable", analysis.MEAN, self.weights)

def time_w_collapsed_by_WPERCENTILE(self):
self.cube.collapsed(
"latitude", analysis.WPERCENTILE, self.weights, percent=[10, 50, 90]
)

def time_w_collapsed_by_SUM(self):
self.cube.collapsed("latitude", analysis.SUM, self.weights)

def time_w_collapsed_by_RMS(self):
self.cube.collapsed("latitude", analysis.RMS, self.weights)

def time_w_collapsed_by_MEAN(self):
self.cube.collapsed("latitude", analysis.MEAN, self.weights)
58 changes: 46 additions & 12 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import numpy as np
import numpy.ma as ma

from iris import cube
import iris._constraints
from iris._data_manager import DataManager
import iris._lazy_data as _lazy
Expand Down Expand Up @@ -93,8 +94,8 @@ def from_cubes(cubes, constraints=None):
constraints = iris._constraints.list_of_constraints(constraints)
pairs = [_CubeFilter(constraint) for constraint in constraints]
collection = _CubeFilterCollection(pairs)
for cube in cubes:
collection.add_cube(cube)
for c in cubes:
collection.add_cube(c)
return collection

def __init__(self, pairs):
Expand Down Expand Up @@ -133,8 +134,8 @@ def __init__(self, *args, **kwargs):
# Do whatever a list does, to initialise ourself "as a list"
super().__init__(*args, **kwargs)
# Check that all items in the list are cubes.
for cube in self:
self._assert_is_cube(cube)
for c in self:
self._assert_is_cube(c)

def __str__(self):
"""Run short :meth:`Cube.summary` on every cube."""
Expand Down Expand Up @@ -309,9 +310,9 @@ def _extract_and_merge(cubes, constraints, strict=False, return_single_cube=Fals
constraint_groups = dict(
[(constraint, CubeList()) for constraint in constraints]
)
for cube in cubes:
for c in cubes:
for constraint, cube_list in constraint_groups.items():
sub_cube = constraint.extract(cube)
sub_cube = constraint.extract(c)
if sub_cube is not None:
cube_list.append(sub_cube)

Expand Down Expand Up @@ -395,8 +396,8 @@ def merge_cube(self):

# Register each of our cubes with a single ProtoCube.
proto_cube = iris._merge.ProtoCube(self[0])
for cube in self[1:]:
proto_cube.register(cube, error_on_mismatch=True)
for c in self[1:]:
proto_cube.register(c, error_on_mismatch=True)

# Extract the merged cube from the ProtoCube.
(merged_cube,) = proto_cube.merge()
Expand Down Expand Up @@ -472,18 +473,18 @@ def merge(self, unique=True):
"""
# Register each of our cubes with its appropriate ProtoCube.
proto_cubes_by_name = {}
for cube in self:
name = cube.standard_name
for c in self:
name = c.standard_name
proto_cubes = proto_cubes_by_name.setdefault(name, [])
proto_cube = None

for target_proto_cube in proto_cubes:
if target_proto_cube.register(cube):
if target_proto_cube.register(c):
proto_cube = target_proto_cube
break

if proto_cube is None:
proto_cube = iris._merge.ProtoCube(cube)
proto_cube = iris._merge.ProtoCube(c)
proto_cubes.append(proto_cube)

# Emulate Python 2 behaviour.
Expand Down Expand Up @@ -3175,8 +3176,41 @@ def create_coords(src_coords, add_coord):
add_coord(result_coord, dims)
coord_mapping[id(src_coord)] = result_coord

def create_metadata(src_metadatas, add_metadata, metadata_type):
if metadata_type == "cell_measure":
metadata_search_object = cube.Cube.cell_measure
elif metadata_type == "ancillary_var":
metadata_search_object = cube.Cube.ancillary_variable
else:
raise ValueError
for src_metadata in src_metadatas:
dims = src_metadata.cube_dims(self)
if dim in dims:
dim_within_coord = dims.index(dim)
data = np.concatenate(
[
metadata_search_object(
chunk, src_metadata.name()
).core_data()
for chunk in chunks
],
dim_within_coord,
)
result_coord = src_metadata.copy(values=data)
else:
result_coord = src_metadata.copy()
add_metadata(result_coord, dims)

create_coords(self.dim_coords, result.add_dim_coord)
create_coords(self.aux_coords, result.add_aux_coord)
create_metadata(
self.cell_measures(), result.add_cell_measure, "cell_measure"
)
create_metadata(
self.ancillary_variables(),
result.add_ancillary_variable,
"ancillary_var",
)
for factory in self.aux_factories:
result.add_aux_factory(factory.updated(coord_mapping))
return result
Expand Down