Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# tidyr (development version)

* `pivot_wider()` gains a new `names_vary` argument for controlling the
ordering when combining `names_from` values with `values_from` column names
(#839).

* `pivot_wider()` now gives better advice about how to identify duplicates when
values are not uniquely identified (#1113).

Expand Down
52 changes: 43 additions & 9 deletions R/pivot-wide.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@
#' `.value`) to create custom column names.
#' @param names_sort Should the column names be sorted? If `FALSE`, the default,
#' column names are ordered by first appearance.
#' @param names_vary When `names_from` identifies a column (or columns) with
#' multiple unique values, and multiple `values_from` columns are provided,
#' in what order should the resulting column names be combined?
#'
#' - `"fastest"` varies `names_from` values fastest, resulting in a column
#' naming scheme of the form: `value1_name1, value1_name2, value2_name1,
#' value2_name2`. This is the default.
#'
#' - `"slowest"` varies `names_from` values slowest, resulting in a column
#' naming scheme of the form: `value1_name1, value2_name1, value1_name2,
#' value2_name2`.
#' @param values_fill Optionally, a (scalar) value that specifies what each
#' `value` should be filled in with when missing.
#'
Expand Down Expand Up @@ -66,7 +77,19 @@
#' # Generate column names from multiple variables
#' us_rent_income
#' us_rent_income %>%
#' pivot_wider(names_from = variable, values_from = c(estimate, moe))
#' pivot_wider(
#' names_from = variable,
#' values_from = c(estimate, moe)
#' )
#'
#' # You can control whether `names_from` values vary fastest or slowest
#' # relative to the `values_from` column names using `names_vary`.
#' us_rent_income %>%
#' pivot_wider(
#' names_from = variable,
#' values_from = c(estimate, moe),
#' names_vary = "slowest"
#' )
#'
#' # When there are multiple `names_from` or `values_from`, you can use
#' # use `names_sep` or `names_glue` to control the output variable names
Expand Down Expand Up @@ -109,6 +132,7 @@ pivot_wider <- function(data,
names_sep = "_",
names_glue = NULL,
names_sort = FALSE,
names_vary = "fastest",
names_repair = "check_unique",
values_from = value,
values_fill = NULL,
Expand All @@ -126,12 +150,12 @@ pivot_wider.data.frame <- function(data,
names_sep = "_",
names_glue = NULL,
names_sort = FALSE,
names_vary = "fastest",
names_repair = "check_unique",
values_from = value,
values_fill = NULL,
values_fn = NULL,
...
) {
...) {
names_from <- enquo(names_from)
values_from <- enquo(values_from)

Expand All @@ -142,7 +166,8 @@ pivot_wider.data.frame <- function(data,
names_prefix = names_prefix,
names_sep = names_sep,
names_glue = names_glue,
names_sort = names_sort
names_sort = names_sort,
names_vary = names_vary
)

id_cols <- build_wider_id_cols_expr(
Expand Down Expand Up @@ -347,7 +372,8 @@ build_wider_spec <- function(data,
names_prefix = "",
names_sep = "_",
names_glue = NULL,
names_sort = FALSE) {
names_sort = FALSE,
names_vary = "fastest") {
names_from <- tidyselect::eval_select(enquo(names_from), data)
values_from <- tidyselect::eval_select(enquo(values_from), data)

Expand All @@ -358,6 +384,8 @@ build_wider_spec <- function(data,
abort("`values_from` must select at least one column.")
}

names_vary <- arg_match0(names_vary, c("fastest", "slowest"), arg_nm = "names_vary")

row_ids <- vec_unique(data[names_from])
if (names_sort) {
row_ids <- vec_sort(row_ids)
Expand All @@ -372,11 +400,17 @@ build_wider_spec <- function(data,
if (length(values_from) == 1) {
out$.value <- names(values_from)
} else {
out <- vec_rep(out, vec_size(values_from))
out$.value <- vec_rep_each(names(values_from), vec_size(row_ids))
out$.name <- vec_paste0(out$.value, names_sep, out$.name)
if (names_vary == "fastest") {
out <- vec_rep(out, vec_size(values_from))
out$.value <- vec_rep_each(names(values_from), vec_size(row_ids))
row_ids <- vec_rep(row_ids, vec_size(values_from))
} else {
out <- vec_rep_each(out, vec_size(values_from))
out$.value <- vec_rep(names(values_from), vec_size(row_ids))
row_ids <- vec_rep_each(row_ids, vec_size(values_from))
}

row_ids <- vec_rep(row_ids, vec_size(values_from))
out$.name <- vec_paste0(out$.value, names_sep, out$.name)
}

out <- vec_cbind(out, as_tibble(row_ids), .name_repair = "minimal")
Expand Down
25 changes: 24 additions & 1 deletion man/pivot_wider.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 12 additions & 1 deletion man/pivot_wider_spec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions tests/testthat/_snaps/pivot-wide.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,19 @@
Applying `values_fn` to `value` must result in a single summary value per key.
x Applying `values_fn` resulted in a value with length 2.

# `names_vary` is validated

Code
(expect_error(build_wider_spec(df, names_vary = 1)))
Output
<error/rlang_error>
`names_vary` must be a character vector.
Code
(expect_error(build_wider_spec(df, names_vary = "x")))
Output
<error/rlang_error>
`names_vary` must be one of "fastest" or "slowest".

# duplicated keys produce list column with warning

Code
Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/test-pivot-wide.R
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,37 @@ test_that("can sort column names", {
expect_equal(spec$.name, levels(df$fac))
})

test_that("can vary `names_from` values slowest (#839)", {
df <- tibble(
name = c("name1", "name2"),
value1 = c(1, 2),
value2 = c(4, 5)
)

spec <- build_wider_spec(df, names_from = name, values_from = c(value1, value2))

expect_identical(
spec$.name,
c("value1_name1", "value1_name2", "value2_name1", "value2_name2")
)

spec <- build_wider_spec(df, names_from = name, values_from = c(value1, value2), names_vary = "slowest")

expect_identical(
spec$.name,
c("value1_name1", "value2_name1", "value1_name2", "value2_name2")
)
})

test_that("`names_vary` is validated", {
df <- tibble(name = c("a", "b"), value = c(1, 2))

expect_snapshot({
(expect_error(build_wider_spec(df, names_vary = 1)))
(expect_error(build_wider_spec(df, names_vary = "x")))
})
})

# keys ---------------------------------------------------------

test_that("can override default keys", {
Expand Down