From 617628ff10840a438f7616605636161cb0ebbee1 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 24 Jan 2023 19:48:09 +0100 Subject: [PATCH] Add benchmarks for to_dataframe and to_dask_dataframe --- asv_bench/benchmarks/pandas.py | 37 +++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/pandas.py b/asv_bench/benchmarks/pandas.py index 8aaa515d417..2a296ecc4d0 100644 --- a/asv_bench/benchmarks/pandas.py +++ b/asv_bench/benchmarks/pandas.py @@ -3,7 +3,7 @@ import xarray as xr -from . import parameterized +from . import parameterized, requires_dask class MultiIndexSeries: @@ -24,3 +24,38 @@ def setup(self, dtype, subset): @parameterized(["dtype", "subset"], ([int, float], [True, False])) def time_from_series(self, dtype, subset): xr.DataArray.from_series(self.series) + + +class ToDataFrame: + def setup(self, *args, **kwargs): + xp = kwargs.get("xp", np) + random_kws = kwargs.get("random_kws", {}) + method = kwargs.get("method", "to_dataframe") + + dim1 = 10_000 + dim2 = 10_000 + ds = xr.Dataset( + { + "x": xr.DataArray( + data=xp.random.random((dim1, dim2), **random_kws), + dims=["dim1", "dim2"], + coords={"dim1": np.arange(0, dim1), "dim2": np.arange(0, dim2)}, + ) + } + ) + self.to_frame = getattr(ds, method) + + def time_to_dataframe(self): + self.to_frame() + + def peakmem_to_dataframe(self): + self.to_frame() + + +class ToDataFrameDask(ToDataFrame): + def setup(self, *args, **kwargs): + requires_dask() + + import dask.array as da + + super().setup(xp=da, random_kws=dict(chunks=5000), method="to_dask_dataframe")