@@ -591,7 +591,7 @@ module IteratorsMD
591591 CartesianIndices (intersect .(a. indices, b. indices))
592592
593593 # Views of reshaped CartesianIndices are used for partitions — ensure these are fast
594- const CartesianPartition{T<: CartesianIndex , P<: CartesianIndices , R<: ReshapedArray{T,1,P} } = SubArray{T,1 ,R,Tuple{UnitRange {Int}},false }
594+ const CartesianPartition{T<: CartesianIndex , P<: CartesianIndices , R<: ReshapedArray{T,1,P} } = SubArray{T,1 ,R,<: Tuple{AbstractUnitRange {Int}} ,false }
595595 eltype (:: Type{PartitionIterator{T}} ) where {T<: ReshapedArrayLF } = SubArray{eltype (T), 1 , T, Tuple{UnitRange{Int}}, true }
596596 eltype (:: Type{PartitionIterator{T}} ) where {T<: ReshapedArray } = SubArray{eltype (T), 1 , T, Tuple{UnitRange{Int}}, false }
597597 Iterators. IteratorEltype (:: Type{<:PartitionIterator{T}} ) where {T<: ReshapedArray } = Iterators. IteratorEltype (T)
@@ -600,7 +600,6 @@ module IteratorsMD
600600 eltype (:: Type{PartitionIterator{T}} ) where {T<: Union{UnitRange, StepRange, StepRangeLen, LinRange} } = T
601601 Iterators. IteratorEltype (:: Type{<:PartitionIterator{T}} ) where {T<: Union{OneTo, UnitRange, StepRange, StepRangeLen, LinRange} } = Iterators. IteratorEltype (T)
602602
603-
604603 @inline function iterate (iter:: CartesianPartition )
605604 isempty (iter) && return nothing
606605 f = first (iter)
@@ -616,33 +615,45 @@ module IteratorsMD
616615 # In general, the Cartesian Partition might start and stop in the middle of the outer
617616 # dimensions — thus the outer range of a CartesianPartition is itself a
618617 # CartesianPartition.
619- t = tail (iter. parent. parent. indices)
620- ci = CartesianIndices (t)
621- li = LinearIndices (t)
622- return @inbounds view (ci, li[tail (iter[1 ]. I)... ]: li[tail (iter[end ]. I)... ])
618+ mi = iter. parent. mi
619+ ci = iter. parent. parent
620+ ax, ax1 = axes (ci), Base. axes1 (ci)
621+ subs = Base. ind2sub_rs (ax, mi, first (iter. indices[1 ]))
622+ vl, fl = Base. _sub2ind (tail (ax), tail (subs)... ), subs[1 ]
623+ vr, fr = divrem (last (iter. indices[1 ]) - 1 , mi[end ]) .+ (1 , first (ax1))
624+ oci = CartesianIndices (tail (ci. indices))
625+ # A fake CartesianPartition to reuse the outer iterate fallback
626+ outer = @inbounds view (ReshapedArray (oci, (length (oci),), mi), vl: vr)
627+ init = @inbounds dec (oci[tail (subs)... ]. I, oci. indices) # real init state
628+ # Use Generator to make inner loop branchless
629+ @inline function skip_len_I (i:: Int , I:: CartesianIndex )
630+ l = i == 1 ? fl : first (ax1)
631+ r = i == length (outer) ? fr : last (ax1)
632+ l - first (ax1), r - l + 1 , I
633+ end
634+ (skip_len_I (i, I) for (i, I) in Iterators. enumerate (Iterators. rest (outer, (init, 0 ))))
623635 end
624- function simd_outer_range (iter:: CartesianPartition{CartesianIndex{2}} )
636+ @inline function simd_outer_range (iter:: CartesianPartition{CartesianIndex{2}} )
625637 # But for two-dimensional Partitions the above is just a simple one-dimensional range
626638 # over the second dimension; we don't need to worry about non-rectangular staggers in
627639 # higher dimensions.
628- return @inbounds CartesianIndices ((iter[1 ][2 ]: iter[end ][2 ],))
629- end
630- @inline function simd_inner_length (iter:: CartesianPartition , I:: CartesianIndex )
631- inner = iter. parent. parent. indices[1 ]
632- @inbounds fi = iter[1 ]. I
633- @inbounds li = iter[end ]. I
634- inner_start = I. I == tail (fi) ? fi[1 ] : first (inner)
635- inner_end = I. I == tail (li) ? li[1 ] : last (inner)
636- return inner_end - inner_start + 1
637- end
638- @inline function simd_index (iter:: CartesianPartition , Ilast:: CartesianIndex , I1:: Int )
639- # I1 is the 0-based distance from the first dimension's offest
640- offset = first (iter. parent. parent. indices[1 ]) # (this is 1 for 1-based arrays)
641- # In the first column we need to also add in the iter's starting point (branchlessly)
642- f = @inbounds iter[1 ]
643- startoffset = (Ilast. I == tail (f. I))* (f[1 ] - 1 )
644- CartesianIndex ((I1 + offset + startoffset, Ilast. I... ))
640+ mi = iter. parent. mi
641+ ci = iter. parent. parent
642+ ax, ax1 = axes (ci), Base. axes1 (ci)
643+ fl, vl = Base. ind2sub_rs (ax, mi, first (iter. indices[1 ]))
644+ fr, vr = Base. ind2sub_rs (ax, mi, last (iter. indices[1 ]))
645+ outer = @inbounds CartesianIndices ((ci. indices[2 ][vl: vr],))
646+ # Use Generator to make inner loop branchless
647+ @inline function skip_len_I (I:: CartesianIndex{1} )
648+ l = I == first (outer) ? fl : first (ax1)
649+ r = I == last (outer) ? fr : last (ax1)
650+ l - first (ax1), r - l + 1 , I
651+ end
652+ (skip_len_I (I) for I in outer)
645653 end
654+ @inline simd_inner_length (iter:: CartesianPartition , (_, len, _):: Tuple{Int,Int,CartesianIndex} ) = len
655+ @propagate_inbounds simd_index (iter:: CartesianPartition , (skip, _, I):: Tuple{Int,Int,CartesianIndex} , n:: Int ) =
656+ simd_index (iter. parent. parent, I, n + skip)
646657end # IteratorsMD
647658
648659
0 commit comments