@@ -345,61 +345,71 @@ end
345345# the corresponding cartesian index into the parent, and then uses
346346# dims to convert back to a linear index into the parent array.
347347#
348- # However, a common case is linindex::UnitRange .
349- # Since div is slow and in(j::Int, linindex::UnitRange ) is fast,
348+ # However, a common case is linindex::Range .
349+ # Since div is slow and in(j::Int, linindex::Range ) is fast,
350350# it can be much faster to generate all possibilities and
351351# then test whether the corresponding linear index is in linindex.
352352# One exception occurs when only a small subset of the total
353353# is desired, in which case we fall back to the div-based algorithm.
354- stagedfunction merge_indexes (V, indexes:: NTuple , dims:: Dims , linindex:: UnitRange{Int} )
355- N = length (indexes)
354+ # stagedfunction merge_indexes{T<:Integer}(V, parentindexes::NTuple, parentdims::Dims, linindex::Union(Colon,Range{T}), lindim)
355+ stagedfunction merge_indexes_in {TT} (V, parentindexes:: TT , parentdims:: Dims , linindex, lindim)
356+ N = length (parentindexes) # number of parent axes we're merging
356357 N > 0 || throw (ArgumentError (" cannot merge empty indexes" ))
358+ lengthexpr = linindex == Colon ? (:(prod (size (V)[lindim: end ]))) : (:(length (linindex)))
359+ L = symbol (string (" Istride_" , N+ 1 )) # length of V's trailing dimensions
357360 quote
358- n = length (linindex)
359- Base. Cartesian. @nexprs $ N d-> (I_d = indexes[d])
360- L = 1
361- dimoffset = ndims (V. parent) - length (dims)
362- Base. Cartesian. @nexprs $ N d-> (L *= dimsize (V. parent, d+ dimoffset, I_d))
363- if n < 0.1 L # this has not been carefully tuned
364- return merge_indexes_div (V, indexes, dims, linindex)
361+ n = $ lengthexpr
362+ Base. Cartesian. @nexprs $ N d-> (I_d = parentindexes[d])
363+ pdimoffset = ndims (V. parent) - length (parentdims)
364+ Istride_1 = 1 # parentindexes strides
365+ Base. Cartesian. @nexprs $ N d-> (Istride_{d+ 1 } = Istride_d* dimsize (V. parent, d+ pdimoffset, I_d))
366+ Istridet = Base. Cartesian. @ntuple $ (N+ 1 ) d-> Istride_d
367+ if n < 0.1 * $ L # this has not been carefully tuned
368+ return merge_indexes_div (V, parentindexes, parentdims, linindex, lindim)
365369 end
366370 Pstride_1 = 1 # parent strides
367- Base. Cartesian. @nexprs $ (N- 1 ) d-> (Pstride_{d+ 1 } = Pstride_d* dims[d])
368- Istride_1 = 1 # indexes strides
369- Base. Cartesian. @nexprs $ (N- 1 ) d-> (Istride_{d+ 1 } = Istride_d* dimsize (V, d+ dimoffset, I_d))
370- Base. Cartesian. @nexprs $ N d-> (counter_d = 1 ) # counter_0 is a linear index into indexes
371+ Base. Cartesian. @nexprs $ (N- 1 ) d-> (Pstride_{d+ 1 } = Pstride_d* parentdims[d])
372+ Base. Cartesian. @nexprs $ N d-> (counter_d = 1 ) # counter_0 is a linear index into parentindexes
371373 Base. Cartesian. @nexprs $ N d-> (offset_d = 1 ) # offset_0 is a linear index into parent
372374 k = 0
373375 index = Array (Int, n)
374- Base. Cartesian. @nloops $ N i d-> (1 : dimsize (V, d+ dimoffset , I_d)) d-> (offset_{d- 1 } = offset_d + (I_d[i_d]- 1 )* Pstride_d; counter_{d- 1 } = counter_d + (i_d- 1 )* Istride_d) begin
376+ Base. Cartesian. @nloops $ N i d-> (1 : dimsize (V. parent , d+ pdimoffset , I_d)) d-> (offset_{d- 1 } = offset_d + (I_d[i_d]- 1 )* Pstride_d; counter_{d- 1 } = counter_d + (i_d- 1 )* Istride_d) begin
375377 if in (counter_0, linindex)
376378 index[k+= 1 ] = offset_0
377379 end
378380 end
379381 index
380382 end
381383end
382- merge_indexes (V, indexes:: NTuple , dims:: Dims , linindex) = merge_indexes_div (V, indexes, dims, linindex)
384+
385+ # HACK: dispatch seemingly wasn't working properly
386+ function merge_indexes (V, parentindexes:: NTuple , parentdims:: Dims , linindex, lindim)
387+ if isa (linindex, Colon) || isa (linindex, Range)
388+ return merge_indexes_in (V, parentindexes, parentdims, linindex, lindim)
389+ end
390+ merge_indexes_div (V, parentindexes, parentdims, linindex, lindim)
391+ end
383392
384393# This could be written as a regular function, but performance
385394# will be better using Cartesian macros to avoid the heap and
386395# an extra loop.
387- stagedfunction merge_indexes_div (V, indexes :: NTuple , dims :: Dims , linindex)
388- N = length (indexes )
396+ stagedfunction merge_indexes_div {TT} (V, parentindexes :: TT , parentdims :: Dims , linindex, lindim )
397+ N = length (parentindexes )
389398 N > 0 || throw (ArgumentError (" cannot merge empty indexes" ))
390399 Istride_N = symbol (" Istride_$N " )
400+ lengthexpr = :(length (linindex))
391401 quote
392- Base. Cartesian. @nexprs $ N d-> (I_d = indexes [d])
402+ Base. Cartesian. @nexprs $ N d-> (I_d = parentindexes [d])
393403 Pstride_1 = 1 # parent strides
394- Base. Cartesian. @nexprs $ (N- 1 ) d-> (Pstride_{d+ 1 } = Pstride_d* dims [d])
395- Istride_1 = 1 # indexes strides
396- dimoffset = ndims (V. parent) - length (dims )
397- Base. Cartesian. @nexprs $ (N- 1 ) d-> (Istride_{d+ 1 } = Istride_d* dimsize (V. parent, d+ dimoffset , I_d))
398- n = length (linindex)
399- L = $ (Istride_N) * dimsize (V. parent, $ N+ dimoffset, indexes [end ])
404+ Base. Cartesian. @nexprs $ (N- 1 ) d-> (Pstride_{d+ 1 } = Pstride_d* parentdims [d])
405+ Istride_1 = 1 # parentindexes strides
406+ pdimoffset = ndims (V. parent) - length (parentdims )
407+ Base. Cartesian. @nexprs $ (N- 1 ) d-> (Istride_{d+ 1 } = Istride_d* dimsize (V. parent, d+ pdimoffset , I_d))
408+ n = $ lengthexpr
409+ L = $ (Istride_N) * dimsize (V. parent, $ N+ pdimoffset, parentindexes [end ])
400410 index = Array (Int, n)
401411 for i = 1 : n
402- k = linindex[i] # k is the indexes -centered linear index
412+ k = linindex[i] # k is the parentindexes -centered linear index
403413 1 <= k <= L || throw (BoundsError ())
404414 k -= 1
405415 j = 0 # j will be the new parent-centered linear index
0 commit comments