Skip to content

Commit 49746ef

Browse files
authored
Breaking: add combine method for groupby output, fixing similar for AbstractDimStack (#903)
* add combine method * test groupby and similar * docs entry
1 parent 685efd7 commit 49746ef

File tree

9 files changed

+135
-24
lines changed

9 files changed

+135
-24
lines changed

docs/src/api/reference.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ For transforming DimensionalData objects:
6969

7070
```@docs
7171
groupby
72+
combine
7273
DimensionalData.DimGroupByArray
7374
Bins
7475
ranges

src/DimensionalData.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ export dimnum, hasdim, hasselection, otherdims
8383
export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!,
8484
mergedims, unmergedims, maplayers
8585

86-
export groupby, seasons, months, hours, intervals, ranges
86+
export groupby, combine, seasons, months, hours, intervals, ranges
8787

8888

8989
export @d

src/array/methods.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ function _check_cat_lookups(D, ::Regular, lookups...)
421421
@warn _cat_warn_string(D, "step sizes $(step(span(l))) and $s do not match")
422422
return false
423423
end
424-
if !(lastval + s first(l))
424+
if !(s isa Dates.AbstractTime) && !(lastval + s first(l))
425425
@warn _cat_warn_string(D, "`Regular` lookups do not join with the correct step size: $(lastval) + $s$(first(l)) should hold")
426426
return false
427427
end

src/groupby.jl

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,6 @@ Group some data along the time dimension:
249249
250250
```jldoctest groupby; setup = :(using Random; Random.seed!(123))
251251
julia> using DimensionalData, Dates
252-
253252
julia> A = rand(X(1:0.1:20), Y(1:20), Ti(DateTime(2000):Day(3):DateTime(2003)));
254253
255254
julia> groups = groupby(A, Ti => month) # Group by month
@@ -356,6 +355,7 @@ end
356355
function _group_indices(dim::Dimension, f::Base.Callable; labels=nothing)
357356
orig_lookup = lookup(dim)
358357
k1 = f(first(orig_lookup))
358+
# TODO: using a Dict here is a bit slow
359359
indices_dict = Dict{typeof(k1),Vector{Int}}()
360360
for (i, x) in enumerate(orig_lookup)
361361
k = f(x)
@@ -447,11 +447,56 @@ end
447447
448448
Generate a `Vector` of `UnitRange` with length `step(A)`
449449
"""
450-
intervals(rng::AbstractRange) = IntervalSets.Interval{:closed,:open}.(rng, rng .+ step(rng))
450+
intervals(rng::AbstractRange) =
451+
IntervalSets.Interval{:closed,:open}.(rng, rng .+ step(rng))
451452

452453
"""
453454
ranges(A::AbstractRange{<:Integer})
454455
455456
Generate a `Vector` of `UnitRange` with length `step(A)`
456457
"""
457458
ranges(rng::AbstractRange{<:Integer}) = map(x -> x:x+step(rng)-1, rng)
459+
460+
"""
461+
combine(f::Function, gb::DimGroupByArray; dims=:)
462+
463+
Combine the `DimGroupByArray` using function `f` over the group dimensions.
464+
Unlike broadcasting a reducing function over a `DimGroupByArray`, this function
465+
always returns a new flattened `AbstractDimArray` even where not all dimensions
466+
are reduced. It will also work over grouped `AbstractDimStack`.
467+
468+
If `dims` is given, it will combine only the dimensions in `dims`, the
469+
others will be present in the final array. Note that all grouped dimensions
470+
must be reduced and included in `dims`.
471+
472+
The reducing function `f` must also accept a `dims` keyword.
473+
474+
# Example
475+
476+
```jldoctest groupby
477+
````
478+
"""
479+
function combine(f::Function, gb::DimGroupByArray{G}; dims=:) where G
480+
targetdims = DD.commondims(first(gb), dims)
481+
all(hasdim(first(gb), targetdims)) || throw(ArgumentError("dims must be a subset of the groupby dimensions"))
482+
all(hasdim(targetdims, DD.dims(gb))) || throw(ArgumentError("grouped dimensions $(DD.basedims(gb)) must be included in dims"))
483+
# This works for both arrays and stacks
484+
# Combine the remaining dimensions after reduction and the group dimensions
485+
destdims = (otherdims(DD.dims(first(gb)), dims)..., DD.dims(gb)...)
486+
# Get the output eltype
487+
T = Base.promote_op(f, G)
488+
# Create a output array with the combined dimensions
489+
dest = similar(first(gb), T, destdims)
490+
for D in DimIndices(gb)
491+
if all(hasdim(targetdims, DD.dims(first(gb))))
492+
# Assigned reduced scalar to dest
493+
dest[D...] = f(gb[D])
494+
else
495+
# Reduce with `f` and drop length 1 dimensions
496+
xs = dropdims(f(gb[D]; dims); dims)
497+
# Broadcast the reduced array to dest
498+
broadcast_dims!(identity, view(dest, D...), xs)
499+
end
500+
end
501+
return dest
502+
end

src/stack/indexing.jl

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ for f in (:getindex, :view, :dotview)
150150
end
151151
end
152152

153+
@generated function _any_dimarray(v::Union{NamedTuple,Tuple})
154+
any(T -> T <: AbstractDimArray, v.types)
155+
end
153156

154157
#### setindex ####
155158
@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs, I...; kw...) =
@@ -160,22 +163,17 @@ end
160163
hassamedims(s) ? _map_setindex!(s, xs, i; kw...) : _setindex_mixed!(s, xs, i; kw...)
161164
@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, i::AbstractArray; kw...) =
162165
hassamedims(s) ? _map_setindex!(s, xs, i; kw...) : _setindex_mixed!(s, xs, i; kw...)
166+
@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, i::DimensionIndsArrays; kw...) =
167+
_map_setindex!(s, xs, i; kw...)
168+
@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, I...; kw...) =
169+
_map_setindex!(s, xs, I...; kw...)
163170

164-
@propagate_inbounds function Base.setindex!(
165-
s::AbstractDimStack, xs::NamedTuple, I...; kw...
166-
)
167-
map((A, x) -> setindex!(A, x, I...; kw...), layers(s), xs)
168-
end
169-
170-
_map_setindex!(s, xs, i; kw...) = map((A, x) -> setindex!(A, x, i...; kw...), layers(s), xs)
171+
_map_setindex!(s, xs, i...; kw...) = map((A, x) -> setindex!(A, x, i...; kw...), layers(s), xs)
171172

172-
_setindex_mixed!(s::AbstractDimStack, x, i::AbstractArray) =
173-
map(A -> setindex!(A, x, DimIndices(dims(s))[i]), layers(s))
174-
_setindex_mixed!(s::AbstractDimStack, i::Integer) =
175-
map(A -> setindex!(A, x, DimIndices(dims(s))[i]), layers(s))
176-
function _setindex_mixed!(s::AbstractDimStack, x, i::Colon)
177-
map(DimIndices(dims(s))) do D
178-
map(A -> setindex!(A, D), x, layers(s))
173+
function _setindex_mixed!(s::AbstractDimStack, xs::NamedTuple, i)
174+
D = DimIndices(dims(s))[i]
175+
map(layers(s), xs) do A, x
176+
A[D] = x
179177
end
180178
end
181179

src/stack/stack.jl

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ Base.length(s::AbstractDimStack) = prod(size(s))
153153
Base.axes(s::AbstractDimStack) = map(first axes, dims(s))
154154
Base.axes(s::AbstractDimStack, dims::DimOrDimType) = axes(s, dimnum(s, dims))
155155
Base.axes(s::AbstractDimStack, dims::Integer) = axes(s)[dims]
156-
Base.similar(s::AbstractDimStack, args...) = maplayers(A -> similar(A, args...), s)
157156
Base.eltype(::AbstractDimStack{<:Any,T}) where T = T
158157
Base.ndims(::AbstractDimStack{<:Any,<:Any,N}) where N = N
159158
Base.CartesianIndices(s::AbstractDimStack) = CartesianIndices(dims(s))
@@ -197,6 +196,36 @@ Base.get(f::Base.Callable, st::AbstractDimStack, k::Symbol) =
197196
@propagate_inbounds Base.iterate(st::AbstractDimStack, i) =
198197
i > length(st) ? nothing : (st[DimIndices(st)[i]], i + 1)
199198

199+
Base.similar(s::AbstractDimStack) = similar(s, eltype(s))
200+
Base.similar(s::AbstractDimStack, dims::Dimension...) = similar(s, dims)
201+
Base.similar(s::AbstractDimStack, ::Type{T},dims::Dimension...) where T =
202+
similar(s, T, dims)
203+
Base.similar(s::AbstractDimStack, dims::Tuple{Vararg{Dimension}}) =
204+
similar(s, eltype(s), dims)
205+
Base.similar(s::AbstractDimStack, ::Type{T}) where T =
206+
similar(s, T, dims(s))
207+
function Base.similar(s::AbstractDimStack, ::Type{T}, dims::Tuple) where T
208+
# Any dims not in the stack are added to all layers
209+
ods = otherdims(dims, DD.dims(s))
210+
maplayers(s) do A
211+
# Original layer dims are maintained, other dims are added
212+
D = DD.commondims(dims, (DD.dims(A)..., ods...))
213+
similar(A, T, D)
214+
end
215+
end
216+
function Base.similar(s::AbstractDimStack, ::Type{T}, dims::Tuple) where T<:NamedTuple
217+
ods = otherdims(dims, DD.dims(s))
218+
maplayers(s, _nt_types(T)) do A, Tx
219+
D = DD.commondims(dims, (DD.dims(A)..., ods...))
220+
similar(A, Tx, D)
221+
end
222+
end
223+
224+
@generated function _nt_types(::Type{NamedTuple{K,T}}) where {K,T}
225+
expr = Expr(:tuple, T.parameters...)
226+
return :(NamedTuple{K}($expr))
227+
end
228+
200229
# `merge` for AbstractDimStack and NamedTuple.
201230
# One of the first three arguments must be an AbstractDimStack for dispatch to work.
202231
Base.merge(s::AbstractDimStack) = s

src/utils.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,11 @@ function broadcast_dims!(f, dest::AbstractDimArray{<:Any,N}, As::AbstractBasicDi
160160
od = map(A -> otherdims(dest, dims(A)), As)
161161
return _broadcast_dims_inner!(f, dest, As, od)
162162
end
163+
function broadcast_dims!(f, dest::AbstractDimStack, stacks::AbstractDimStack...)
164+
maplayers(dest, stacks...) do d, layers...
165+
broadcast_dims!(f, d, layers...)
166+
end
167+
end
163168

164169
# Function barrier
165170
function _broadcast_dims_inner!(f, dest, As, od)

test/groupby.jl

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ days = DateTime(2000):Day(1):DateTime(2000, 12, 31)
88
A = DimArray((1:6) * (1:366)', (X(1:0.2:2), Ti(days)))
99
st = DimStack((a=A, b=A, c=A[X=1]))
1010

11-
1211
@testset "group eltype matches indexed values" begin
1312
da = rand(X(1:10), Y(1:10))
1413
grps = groupby(da, X => isodd)
@@ -22,10 +21,16 @@ end
2221
mean(A[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1])
2322
end
2423
@test mean.(groupby(A, Ti=>month)) == manualmeans
24+
combinedmeans = combine(mean, groupby(A, Ti=>month))
25+
@test combinedmeans isa DimArray
26+
@test combinedmeans == manualmeans
2527
manualmeans_st = map(months) do m
2628
mean(st[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1])
2729
end
2830
@test mean.(groupby(st, Ti=>month)) == manualmeans_st
31+
combinedmeans_st = combine(mean, groupby(st, Ti=>month))
32+
@test combinedmeans_st isa DimStack{(:a, :b, :c), @NamedTuple{a::Float64, b::Float64, c::Float64}}
33+
@test collect(combinedmeans_st) == manualmeans_st
2934

3035
manualsums = mapreduce(hcat, months) do m
3136
vcat(sum(A[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1, X=1 .. 1.5]),
@@ -36,6 +41,8 @@ end
3641
@test dims(gb_sum, Ti) == Ti(Sampled([1:12...], ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata()))
3742
@test typeof(dims(gb_sum, X)) == typeof(X(Sampled(BitVector([false, true]), ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata())))
3843
@test gb_sum == manualsums
44+
combined_sum = combine(sum, groupby(A, Ti=>month, X => >(1.5)))
45+
@test collect(combined_sum) == manualsums
3946

4047
manualsums_st = mapreduce(hcat, months) do m
4148
vcat(sum(st[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1, X=1 .. 1.5]),
@@ -46,10 +53,22 @@ end
4653
@test dims(gb_sum_st, Ti) == Ti(Sampled([1:12...], ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata()))
4754
@test typeof(dims(gb_sum_st, X)) == typeof(X(Sampled(BitVector([false, true]), ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata())))
4855
@test gb_sum_st == manualsums_st
56+
combined_sum_st = combine(sum, groupby(st, Ti=>month, X => >(1.5)))
57+
@test collect(combined_sum_st) == manualsums_st
4958

5059
@test_throws ArgumentError groupby(st, Ti=>month, Y=>isodd)
5160
end
5261

62+
@testset "partial reductions in combine" begin
63+
months = DateTime(2000):Month(1):DateTime(2000, 12, 31)
64+
using BenchmarkTools
65+
manualmeans = cat(map(months) do m
66+
mean(A[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1]; dims=Ti)
67+
end...; dims=Ti(collect(1:12)))
68+
combinedmeans = combine(mean, groupby(A, Ti()=>month); dims=Ti())
69+
@test combinedmeans == manualmeans
70+
end
71+
5372
@testset "bins" begin
5473
seasons = DateTime(2000):Month(3):DateTime(2000, 12, 31)
5574
manualmeans = map(seasons) do s
@@ -59,6 +78,7 @@ end
5978
@test mean.(groupby(A, Ti=>Bins(month, ranges(1:3:12)))) == manualmeans
6079
@test mean.(groupby(A, Ti=>Bins(month, intervals(1:3:12)))) == manualmeans
6180
@test mean.(groupby(A, Ti=>Bins(month, 4))) == manualmeans
81+
@test combine(mean, groupby(A, Ti=>Bins(month, ranges(1:3:12)))) == manualmeans
6282
end
6383

6484
@testset "dimension matching groupby" begin
@@ -75,9 +95,10 @@ end
7595
end
7696
@test all(collect(mean.(gb)) .=== manualmeans)
7797
@test all(mean.(gb) .=== manualmeans)
98+
@test all(combine(mean, gb) .=== manualmeans)
7899
end
79100

80-
@testset "broadcastdims runs after groupby" begin
101+
@testset "broadcast_dims runs after groupby" begin
81102
dimlist = (
82103
Ti(Date("2021-12-01"):Day(1):Date("2022-12-31")),
83104
X(range(1, 10, length=10)),
@@ -87,7 +108,7 @@ end
87108
data = rand(396, 10, 15, 2)
88109
A = DimArray(data, dimlist)
89110
month_length = DimArray(daysinmonth, dims(A, Ti))
90-
g_tempo = DimensionalData.groupby(month_length, Ti=>seasons(; start=December))
111+
g_tempo = DimensionalData.groupby(month_length, Ti => seasons(; start=December))
91112
sum_days = sum.(g_tempo, dims=Ti)
92113
@test sum_days isa DimArray
93114
weights = map(./, g_tempo, sum_days)

test/stack.jl

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ using DimensionalData, Test, LinearAlgebra, Statistics, ConstructionBase, Random
33
using DimensionalData: data
44
using DimensionalData: Sampled, Categorical, AutoLookup, NoLookup, Transformed,
55
Regular, Irregular, Points, Intervals, Start, Center, End,
6-
Metadata, NoMetadata, ForwardOrdered, ReverseOrdered, Unordered, layers, basedims
6+
Metadata, NoMetadata, ForwardOrdered, ReverseOrdered, Unordered, layers, basedims, layerdims
77

88
A = [1.0 2.0 3.0;
99
4.0 5.0 6.0]
@@ -94,11 +94,23 @@ end
9494
@test all(maplayers(similar(mixed), mixed) do s, m
9595
dims(s) == dims(m) && dims(s) === dims(m) && eltype(s) === eltype(m)
9696
end)
97-
@test eltype(similar(s, Int)) === @NamedTuple{one::Int, two::Int, three::Int}
97+
@test eltype(similar(s, Int)) ===
98+
@NamedTuple{one::Int, two::Int, three::Int}
99+
@test eltype(similar(s, @NamedTuple{one::Int, two::Float32, three::Bool})) ===
100+
@NamedTuple{one::Int, two::Float32, three::Bool}
98101
st2 = similar(mixed, Bool, x, y)
99102
@test dims(st2) === (x, y)
100103
@test dims(st2[:one]) === (x, y)
101104
@test eltype(st2) === @NamedTuple{one::Bool, two::Bool, extradim::Bool}
105+
@test eltype(similar(mixed)) == eltype(mixed)
106+
@test size(similar(mixed)) == size(mixed)
107+
@test keys(similar(mixed)) == keys(mixed)
108+
@test layerdims(similar(mixed)) == layerdims(mixed)
109+
xy = (X(), Y())
110+
@test layerdims(similar(mixed, dims(mixed, (X, Y)))) == (one=xy, two=xy, extradim=xy)
111+
st3 = similar(mixed, @NamedTuple{one::Int, two::Float32, extradim::Bool}, (Z([:a, :b, :c]), Ti(1:12), X(1:3)))
112+
@test layerdims(st3) == (one=(Ti(), X()), two=(Ti(), X()), extradim=(Z(), Ti(), X()))
113+
@test eltype(st3) == @NamedTuple{one::Int, two::Float32, extradim::Bool}
102114
end
103115

104116
@testset "merge" begin

0 commit comments

Comments
 (0)