diff --git a/NEWS.md b/NEWS.md index 87ef6447..a59219cb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ * Added download progress bar for `board_url()` (#872, @lbm364dl). +* Added support for writing pins with multiple types, like `type = c("rds", "csv")` (#877, @lbm364dl). + # pins 1.4.1 * Support new `preview_data` parameter for pin previews on Posit Connect (#850). diff --git a/R/board_connect_bundle.R b/R/board_connect_bundle.R index 60e61cce..cadbc95c 100644 --- a/R/board_connect_bundle.R +++ b/R/board_connect_bundle.R @@ -62,7 +62,7 @@ rsc_bundle_preview_index <- function(board, name, x, metadata, preview_data = TR data_preview_style = if ( is.data.frame(x) && preview_data ) "" else "display:none", urls = paste0("", metadata$urls, "", collapse = ", "), url_preview_style = if (!is.null(metadata$urls)) "" else "display:none", - show_python_style = if (metadata$type %in% c("rds", "qs", "qs2")) "display:none" else "", + show_python_style = if (all(metadata$type %in% c("rds", "qs", "qs2"))) "display:none" else "", pin_name = paste0(owner, "/", name$name), pin_metadata = list( as_yaml = yaml::as.yaml(metadata), diff --git a/R/pin-read-write.R b/R/pin-read-write.R index 99ffa8bf..a5e51571 100644 --- a/R/pin-read-write.R +++ b/R/pin-read-write.R @@ -15,6 +15,9 @@ #' @param hash Specify a hash to verify that you get exactly the dataset that #' you expect. You can find the hash of an existing pin by looking for #' `pin_hash` in [pin_meta()]. +#' @param type Retrieve the pin with this specific file type. If not supplied and +#' the pin has been stored with multiple types, one will be chosen arbitrarily. +#' Specify the file type explicitly to avoid this. #' @param ... Additional arguments passed on to methods for a specific board. #' @return `pin_read()` returns an R object read from the pin; #' `pin_write()` returns the fully qualified name of the new pin, invisibly. @@ -37,14 +40,27 @@ #' b %>% pin_read("x", version = .Last.value$version[[1]]) #' # (Normally you'd specify the version with a string, but since the #' # version includes the date-time I can't do that in an example) -pin_read <- function(board, name, version = NULL, hash = NULL, ...) { +#' +#' # Pin with multiple types +#' b %>% pin_write(1:10, "y", type = c("rds", "json")) +#' b %>% pin_read("y", type = "json") +#' # Automatically chooses one of the available types +#' b %>% pin_read("y") +pin_read <- function( + board, + name, + version = NULL, + hash = NULL, + type = NULL, + ... +) { check_dots_used() check_board(board, "pin_read", "pin_get") meta <- pin_fetch(board, name, version = version, ...) check_hash(meta, hash) - object_read(meta) + object_read(meta, type) } #' @param x An object (typically a data frame) to pin. @@ -55,13 +71,13 @@ pin_read <- function(board, name, version = NULL, hash = NULL, ...) { #' @param metadata A list containing additional metadata to store with the pin. #' When retrieving the pin, this will be stored in the `user` key, to #' avoid potential clashes with the metadata that pins itself uses. -#' @param type File type used to save `x` to disk. Must be one of -#' "csv", "json", "rds", "parquet", "arrow", "qs", or "qs2". If not supplied, will -#' use JSON for bare lists and RDS for everything else. Be aware that CSV and -#' JSON are plain text formats, while RDS, Parquet, Arrow, +#' @param type File types used to save `x` to disk. Supports a single type or a +#' vector of types (to pin in more than one format. Each type must be one of +#' "csv", "json", "rds", "parquet", "arrow", "qs", or "qs2". If not supplied, +#' will use JSON for bare lists and RDS for everything else. Be aware that CSV +#' and JSON are plain text formats, while RDS, Parquet, Arrow, #' [qs](https://CRAN.R-project.org/package=qs), and -#' [qs2](https://CRAN.R-project.org/package=qs2) -#' are binary formats. +#' [qs2](https://CRAN.R-project.org/package=qs2) are binary formats. #' @param versioned Should the pin be versioned? The default, `NULL`, will #' use the default for `board` #' @param tags A character vector of tags for the pin; most important for @@ -73,23 +89,28 @@ pin_read <- function(board, name, version = NULL, hash = NULL, ...) { #' contents are compared, not the pin metadata. Defaults to `FALSE`. #' @rdname pin_read #' @export -pin_write <- function(board, x, - name = NULL, - ..., - type = NULL, - title = NULL, - description = NULL, - metadata = NULL, - versioned = NULL, - tags = NULL, - urls = NULL, - force_identical_write = FALSE) { +pin_write <- function( + board, + x, + name = NULL, + ..., + type = NULL, + title = NULL, + description = NULL, + metadata = NULL, + versioned = NULL, + tags = NULL, + urls = NULL, + force_identical_write = FALSE +) { check_board(board, "pin_write", "pin") dots <- list2(...) if (!missing(...) && (is.null(names(dots)) || names(dots)[[1]] == "")) { - cli::cli_abort('Arguments after the dots `...` must be named, like {.code type = "json"}.') + cli::cli_abort( + 'Arguments after the dots `...` must be named, like {.code type = "json"}.' + ) } - if (!is_null(type) && type == "qs") { + if (!is_null(type) && any("qs" %in% type)) { lifecycle::deprecate_soft( when = "1.4.2", what = I('The file type "qs"'), @@ -119,12 +140,17 @@ pin_write <- function(board, x, pins_inform("Guessing `type = '{type}'`") } - path <- fs::path_temp(fs::path_ext_set(fs::path_file(name), type)) - object_write(x, path, type = type) - withr::defer(fs::file_delete(path)) + paths <- fs::path_temp(fs::path_ext_set( + fs::path_file(rep_along(type, name)), + type + )) + for (i in seq_along(paths)) { + object_write(x, paths[i], type = type[i], call = caller_env()) + } + withr::defer(fs::file_delete(paths)) meta <- standard_meta( - paths = path, + paths = paths, type = type, title = title %||% default_title(name, data = x), description = description, @@ -145,7 +171,7 @@ pin_write <- function(board, x, } check_dots_used() - name <- pin_store(board, name, path, meta, versioned = versioned, x = x, ...) + name <- pin_store(board, name, paths, meta, versioned = versioned, x = x, ...) pins_inform("Writing to pin '{name}'") invisible(name) } @@ -161,10 +187,11 @@ guess_type <- function(x) { } } -object_write <- function(x, path, type = "rds") { - type <- arg_match0(type, setdiff(object_types, "file")) +object_write <- function(x, path, type = "rds", call) { + type <- arg_match0(type, setdiff(object_types, "file"), error_call = call) - switch(type, + switch( + type, rds = write_rds(x, path), json = jsonlite::write_json(x, path, auto_unbox = TRUE), parquet = write_parquet(x, path), @@ -222,20 +249,37 @@ write_arrow <- function(x, path) { invisible(path) } -object_types <- +object_types <- c("rds", "json", "parquet", "arrow", "pickle", "csv", "qs", "qs2", "file") -object_read <- function(meta) { +object_read <- function(meta, type, call = caller_env()) { path <- fs::path(meta$local$dir, meta$file) + missing <- !fs::file_exists(path) if (any(missing)) { abort(c("Cache failure. Missing files:", path[!missing])) } + if (is.null(type)) { + type <- meta$type[1] + if (length(meta$type) > 1) { + cli::cli_warn(c( + "!" = "Pin {.val {meta$name}} has multiple types: {.val {meta$type}}", + "*" = "Automatically choosing {.val {type}}", + "*" = "To avoid this warning, specify the {.arg type} explicitly" + )) + } + } + + path <- path |> + purrr::detect(~ fs::path_ext(.x) == type) + if (meta$api_version == 1) { - type <- arg_match0(meta$type, object_types) + type <- arg_match0(type, object_types, error_call = call) + type <- arg_match0(type, meta$type, error_call = call) - switch(type, + switch( + type, rds = readRDS(path), json = jsonlite::read_json(path, simplifyVector = TRUE), parquet = read_parquet(path), @@ -256,7 +300,8 @@ object_read <- function(meta) { type <- arg_match0(meta$type, c("default", "files", "table")) path <- fs::path_dir(path[[1]]) - switch(type, + switch( + type, default = pin_load.default(path), table = pin_load.table(path), files = pin_load.files(path) diff --git a/man/pin_read.Rd b/man/pin_read.Rd index 683dc711..c4981b7f 100644 --- a/man/pin_read.Rd +++ b/man/pin_read.Rd @@ -5,7 +5,7 @@ \alias{pin_write} \title{Read and write objects to and from a board} \usage{ -pin_read(board, name, version = NULL, hash = NULL, ...) +pin_read(board, name, version = NULL, hash = NULL, type = NULL, ...) pin_write( board, @@ -35,18 +35,18 @@ find out which versions are available and when they were created.} you expect. You can find the hash of an existing pin by looking for \code{pin_hash} in \code{\link[=pin_meta]{pin_meta()}}.} +\item{type}{File types used to save \code{x} to disk. Supports a single type or a +vector of types (to pin in more than one format. Each type must be one of +"csv", "json", "rds", "parquet", "arrow", "qs", or "qs2". If not supplied, +will use JSON for bare lists and RDS for everything else. Be aware that CSV +and JSON are plain text formats, while RDS, Parquet, Arrow, +\href{https://CRAN.R-project.org/package=qs}{qs}, and +\href{https://CRAN.R-project.org/package=qs2}{qs2} are binary formats.} + \item{...}{Additional arguments passed on to methods for a specific board.} \item{x}{An object (typically a data frame) to pin.} -\item{type}{File type used to save \code{x} to disk. Must be one of -"csv", "json", "rds", "parquet", "arrow", "qs" or "qs2". If not supplied, will -use JSON for bare lists and RDS for everything else. Be aware that CSV and -JSON are plain text formats, while RDS, Parquet, Arrow, -\href{https://CRAN.R-project.org/package=qs}{qs} and -\href{https://CRAN.R-project.org/package=qs2}{qs2} -are binary formats.} - \item{title}{A title for the pin; most important for shared boards so that others can understand what the pin contains. If omitted, a brief description of the contents will be automatically generated.} @@ -101,4 +101,10 @@ b \%>\% pin_versions("x") b \%>\% pin_read("x", version = .Last.value$version[[1]]) # (Normally you'd specify the version with a string, but since the # version includes the date-time I can't do that in an example) + + # Pin with multiple types + b \%>\% pin_write(1:10, "y", type = c("rds", "json")) + b \%>\% pin_read("y", type = "json") + # Automatically chooses one of the available types + b \%>\% pin_read("y") } diff --git a/tests/testthat/_snaps/pin-read-write.md b/tests/testthat/_snaps/pin-read-write.md index 5fa0a9bf..c1b0849f 100644 --- a/tests/testthat/_snaps/pin-read-write.md +++ b/tests/testthat/_snaps/pin-read-write.md @@ -28,7 +28,7 @@ Code pin_write(board, mtcars, name = "mtcars", type = "froopy-loops") Condition - Error in `object_write()`: + Error: ! `type` must be one of "rds", "json", "parquet", "arrow", "pickle", "csv", "qs", or "qs2", not "froopy-loops". Code pin_write(board, mtcars, name = "mtcars", metadata = 1) @@ -81,3 +81,32 @@ Error in `pin_read()`: ! Specified hash "ABCD" doesn't match pin hash "dfa6c1c109362781". +# can write and read multiple types + + Code + pin_read(board, "df-1", type = "froopy-loops") + Condition + Error in `pin_read()`: + ! `type` must be one of "rds", "json", "parquet", "arrow", "pickle", "csv", "qs", "qs2", or "file", not "froopy-loops". + Code + pin_read(board, "df-1") + Condition + Warning: + ! Pin "df-1" has multiple types: "rds" and "csv" + * Automatically choosing "rds" + * To avoid this warning, specify the `type` explicitly + Output + # A tibble: 10 x 1 + x + + 1 1 + 2 2 + 3 3 + 4 4 + 5 5 + 6 6 + 7 7 + 8 8 + 9 9 + 10 10 + diff --git a/tests/testthat/test-pin-read-write.R b/tests/testthat/test-pin-read-write.R index 0c24235f..2b1e46c1 100644 --- a/tests/testthat/test-pin-read-write.R +++ b/tests/testthat/test-pin-read-write.R @@ -107,3 +107,34 @@ test_that("can request specific hash", { pin_read(b, "mtcars", hash = "ABCD") }) }) + +test_that("can write and read multiple types", { + board <- board_temp() + + # Data frames + df <- tibble::tibble(x = 1:10) + pin_write(board, df, "df-1", type = c("rds", "csv")) + + expect_warning( + board |> + pin_read("df-1") |> + expect_equal(df) + ) + + board |> + pin_read("df-1", type = "rds") |> + expect_equal(df) + + board |> + pin_read("df-1", type = "csv") |> + tibble::as_tibble() |> + expect_equal(df) + + expect_snapshot(error = TRUE, { + board |> + pin_read("df-1", type = "froopy-loops") + # No error, only snapshot for warning + board |> + pin_read("df-1") + }) +})