Skip to content

Commit d6277e4

Browse files
authored
Synchronize ByRow and Tables.subset with Tables.jl 1.8 (#3158)
1 parent 952b0e2 commit d6277e4

File tree

8 files changed

+63
-30
lines changed

8 files changed

+63
-30
lines changed

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@
7676
* Make `transform!` on `SubDataFrame` faster
7777
([#3070](https://github.com/JuliaData/DataFrames.jl/pull/3070))
7878

79+
## Integration changes
80+
81+
* Support `Tables.subset` and move `ByRow` definition to Tables.jl
82+
([#3158](https://github.com/JuliaData/DataFrames.jl/pull/3158))
83+
7984
# DataFrames.jl v1.3.4 Patch Release Notes
8085

8186
## Bug fixes

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Reexport = "0.1, 0.2, 1"
3636
ShiftedArrays = "1"
3737
SortingAlgorithms = "0.1, 0.2, 0.3, 1"
3838
TableTraits = "0.4, 1"
39-
Tables = "1.2"
39+
Tables = "1.8.1"
4040
Unitful = "1"
4141
julia = "1"
4242

docs/src/lib/types.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ without caution because:
126126
```@docs
127127
AbstractDataFrame
128128
AsTable
129-
ByRow
130129
DataFrame
131130
DataFrameRow
132131
GroupedDataFrame

src/DataFrames.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import LinearAlgebra: norm
99
using Markdown
1010
using PrettyTables
1111
using Random
12+
using Tables: ByRow
1213

1314
import DataAPI,
1415
DataAPI.allcombinations,

src/abstractdataframe/selection.jl

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -309,33 +309,6 @@ function broadcast_pair(df::AbstractDataFrame, @nospecialize(p::AbstractVecOrMat
309309
end
310310
end
311311

312-
"""
313-
ByRow
314-
315-
A type used for selection operations to signal that the wrapped function should
316-
be applied to each element (row) of the selection.
317-
318-
The wrapped function is called exactly once for each element.
319-
This differs from `map` and `broadcast`, which assume for some types of
320-
source vectors (e.g. `SparseVector`) that the wrapped function is pure,
321-
allowing them to call the function only once for multiple equal values.
322-
When using such types, for maximal performance with pure functions
323-
which are relatively costly, use `x -> map(f, x)` instead of `ByRow(f)`.
324-
325-
Note that `ByRow` always collects values returned by `fun` in a vector.
326-
"""
327-
struct ByRow{T} <: Function
328-
fun::T
329-
end
330-
331-
# invoke the generic AbstractVector function to ensure function is called
332-
# exactly once for each element
333-
(f::ByRow)(cols::AbstractVector...) =
334-
invoke(map,
335-
Tuple{typeof(f.fun), ntuple(i -> AbstractVector, length(cols))...},
336-
f.fun, cols...)
337-
(f::ByRow)(table::NamedTuple) = [f.fun(nt) for nt in Tables.namedtupleiterator(table)]
338-
339312
# add a method to funname defined in other/utils.jl
340313
funname(row::ByRow) = funname(row.fun)
341314

src/other/tables.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,12 @@ IteratorInterfaceExtensions.getiterator(df::AbstractDataFrame) =
103103
Tables.datavaluerows(Tables.columntable(df))
104104
IteratorInterfaceExtensions.isiterable(x::AbstractDataFrame) = true
105105
TableTraits.isiterabletable(x::AbstractDataFrame) = true
106+
107+
@inline function Tables.subset(df::AbstractDataFrame, inds; view::Union{Bool, Nothing}=nothing)
108+
res = view === true ? DataFrames.view(df, inds, :) : df[inds, :]
109+
if res isa DataFrameRow && view === false
110+
return copy(res)
111+
else
112+
return res
113+
end
114+
end

test/grouping.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3461,7 +3461,9 @@ end
34613461
df = DataFrame(id=[1, 1, 2, 3, 3, 1], x=1:6)
34623462
gdf = groupby_checked(df, :id)
34633463
@test_throws ArgumentError combine(gdf, :x, :x)
3464-
@test_throws ErrorException combine(gdf, :x => (x -> Dict("a" => [1])) => AsTable)
3464+
@test_throws ErrorException combine(gdf, :x => (x -> Dict("a" => 1)) => AsTable)
3465+
# changed in Tables.jl 1.8
3466+
@test combine(gdf, :x => (x -> Dict("a" => [1])) => AsTable) == DataFrame(id=1:3, a=1)
34653467
@test_throws ErrorException combine(gdf, :x => (x -> Dict(:a => 1)) => AsTable)
34663468
@test_throws ArgumentError combine(gdf, sdf -> sdf.id[1] == 1 ? Ref(1) : [1])
34673469
@test_throws ArgumentError combine(gdf, sdf -> sdf.id[1] == 2 ? Ref(1) : [1])

test/tables.jl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,4 +323,48 @@ end
323323
@test DataFrame === @inferred Tables.materializer(DataFrames.DataFrameColumns)
324324
end
325325

326+
@testset "Tables.subset" begin
327+
df = DataFrame(a=1:3, b=4:6)
328+
329+
res = @inferred Tables.subset(df, :)
330+
@test res isa DataFrame
331+
@test res == DataFrame(a=1:3, b=4:6)
332+
res = Tables.subset(df, :, view=false)
333+
@test res isa DataFrame
334+
@test res == DataFrame(a=1:3, b=4:6)
335+
res = Tables.subset(df, :, view=true)
336+
@test res isa SubDataFrame
337+
@test res == DataFrame(a=1:3, b=4:6)
338+
339+
res = @inferred Tables.subset(df, [3, 1])
340+
@test res isa DataFrame
341+
@test res == DataFrame(a=[3, 1], b=[6, 4])
342+
res = Tables.subset(df, [3, 1], view=false)
343+
@test res isa DataFrame
344+
@test res == DataFrame(a=[3, 1], b=[6, 4])
345+
res = Tables.subset(df, [3, 1], view=true)
346+
@test res isa SubDataFrame
347+
@test res == DataFrame(a=[3, 1], b=[6, 4])
348+
349+
res = @inferred Tables.subset(df, [true, false, true])
350+
@test res isa DataFrame
351+
@test res == DataFrame(a=[1, 3], b=[4, 6])
352+
res = Tables.subset(df, [1, 3], view=false)
353+
@test res isa DataFrame
354+
@test res == DataFrame(a=[1, 3], b=[4, 6])
355+
res = Tables.subset(df, [1, 3], view=true)
356+
@test res isa SubDataFrame
357+
@test res == DataFrame(a=[1, 3], b=[4, 6])
358+
359+
res = @inferred Tables.subset(df, 2)
360+
@test res isa DataFrameRow
361+
@test res == DataFrame(a=2, b=5)[1, :]
362+
res = Tables.subset(df, 2, view=false)
363+
@test res isa NamedTuple{(:a, :b), Tuple{Int, Int}}
364+
@test res == (a=2, b=5)
365+
res = Tables.subset(df, 2, view=true)
366+
@test res isa DataFrameRow
367+
@test res == DataFrame(a=2, b=5)[1, :]
368+
end
369+
326370
end # module

0 commit comments

Comments
 (0)