diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 2df07e68744..f1bc25faaa4 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -78,4 +78,5 @@ Collate: 'read_table.R' 'reexports-bit64.R' 'reexports-tidyselect.R' + 'to-arrow.R' 'write_arrow.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index 3a413c0e802..48d4da5807b 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -83,6 +83,9 @@ S3method(read_table,"arrow::ipc::RecordBatchStreamReader") S3method(read_table,character) S3method(read_table,fs_path) S3method(read_table,raw) +S3method(to_arrow,"arrow::Object") +S3method(to_arrow,data.frame) +S3method(to_arrow,default) S3method(type,"arrow::Array") S3method(type,"arrow::ChunkedArray") S3method(type,"arrow::Column") @@ -120,7 +123,6 @@ export(RecordBatchStreamWriter) export(StatusCode) export(TimeUnit) export(Type) -export(array) export(arrow_available) export(boolean) export(buffer) @@ -176,10 +178,11 @@ export(record_batch) export(schema) export(starts_with) export(struct) -export(table) +export(table_from_dots) export(time32) export(time64) export(timestamp) +export(to_arrow) export(type) export(uint16) export(uint32) diff --git a/r/R/Table.R b/r/R/Table.R index 15ea48fe7c1..1c9889c05f7 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -65,24 +65,6 @@ ) ) -#' Create an arrow::Table from a data frame -#' -#' @param ... arrays, chunked arrays, or R vectors -#' @param schema a schema. The default (`NULL`) infers the schema from the `...` -#' -#' @return an arrow::Table -#' -#' @export -table <- function(..., schema = NULL){ - dots <- list2(...) - # making sure there are always names - if (is.null(names(dots))) { - names(dots) <- rep_len("", length(dots)) - } - stopifnot(length(dots) > 0) - shared_ptr(`arrow::Table`, Table__from_dots(dots, schema)) -} - #' @export `as.data.frame.arrow::Table` <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){ Table__to_dataframe(x, use_threads = option_use_threads()) diff --git a/r/R/array.R b/r/R/array.R index deb3bc53893..4ba65794d89 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -147,13 +147,3 @@ #' @export `==.arrow::Array` <- function(x, y) x$Equals(y) - -#' create an [arrow::Array][arrow__Array] from an R vector -#' -#' @param x R object -#' @param type Explicit [type][arrow__DataType], or NULL (the default) to infer from the data -#' -#' @export -array <- function(x, type = NULL){ - `arrow::Array`$dispatch(Array__from_vector(x, type)) -} diff --git a/r/R/to-arrow.R b/r/R/to-arrow.R new file mode 100644 index 00000000000..852244d8650 --- /dev/null +++ b/r/R/to-arrow.R @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' Send R data structures to Arrow +#' +#' @param x an R vector or `data.frame` +#' @param ... additional arguments passed to some methods: +#' * `table` logical: when providing a `data.frame` input, should it be made +#' into an Arrow Table or a struct-type Array? Default is `TRUE` unless you +#' specify a `type`. +#' * `type` an explicit [type][arrow__DataType], or NULL (the default) to +#' infer from `x`. Only valid when making an `Array`. +#' * `schema` a schema. The default (`NULL`) infers the schema from the `x`. +#' Only valid when making a `Table` from a `data.frame` +#' @return An `arrow::Table` if `x` is a `data.frame` unless otherwise directed, +#' or an `arrow::Array`. +#' @examples +#' \donttest{ +#' tbl <- data.frame( +#' int = 1:10, +#' dbl = as.numeric(1:10), +#' lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), +#' chr = letters[1:10], +#' stringsAsFactors = FALSE +#' ) +#' tab <- to_arrow(tbl) +#' tab$schema +#' +#' a <- to_arrow(tbl$int) +#' +#' # Making a struct column from a data.frame +#' df <- tibble::tibble(x = 1:10, y = 1:10) +#' a <- to_arrow(df, table = FALSE) +#' # Or specify a type +#' a <- to_arrow(df, type = struct(x = float64(), y = int16())) +#' } +#' @export +to_arrow <- function(x, ...) { + UseMethod("to_arrow") +} + +#' @export +`to_arrow.arrow::Object` <- function(x, ...) x + +#' @export +to_arrow.data.frame <- function(x, table = is.null(type), type = NULL, schema = NULL, ...) { + # Validate that at least one of type or schema is null? + if (table) { + # Default: make an arrow Table + shared_ptr(`arrow::Table`, Table__from_dots(x, schema_sxp = schema)) + } else { + # Make this a struct array + to_arrow.default(x, type = type) + } +} + +#' @export +to_arrow.default <- function(x, type = NULL, ...) { + `arrow::Array`$dispatch(Array__from_vector(x, s_type = type)) +} + +#' Create an arrow::Table from diverse inputs +#' +#' Unlike [to_arrow()], this function splices together inputs to form a Table. +#' When providing columns, they can be a mix of Arrow arrays and R vectors. +#' +#' @param ... arrays, chunked arrays, or R vectors that should define the +#' columns of the Arrow Table; alternatively, if record batches are given, +#' they will be stacked. +#' @param schema a schema. The default (`NULL`) infers the schema from the `...` +#' +#' @return An `arrow::Table` +#' @examples +#' \donttest{ +#' tab1 <- table_from_dots(a = 1:10, b = letters[1:10]) +#' tab1 +#' as.data.frame(tab1) +#' } +#' @export +table_from_dots <- function(..., schema = NULL){ + dots <- list2(...) + # making sure there are always names + if (is.null(names(dots))) { + names(dots) <- rep_len("", length(dots)) + } + stopifnot(length(dots) > 0) + shared_ptr(`arrow::Table`, Table__from_dots(dots, schema)) +} diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R index 435fa82a40f..5d58e83a60e 100644 --- a/r/R/write_arrow.R +++ b/r/R/write_arrow.R @@ -15,17 +15,6 @@ # specific language governing permissions and limitations # under the License. -to_arrow <- function(x) { - UseMethod("to_arrow") -} - -`to_arrow.arrow::RecordBatch` <- function(x) x -`to_arrow.arrow::Table` <- function(x) x - -# splice the data frame as arguments of table() -# see ?rlang::list2() -`to_arrow.data.frame` <- function(x) table(!!!x) - #' Write Arrow formatted data #' #' @param x an [arrow::Table][arrow__Table], an [arrow::RecordBatch][arrow__RecordBatch] or a data.frame diff --git a/r/README.Rmd b/r/README.Rmd index 0fad65925d8..9e2e92bb8ce 100644 --- a/r/README.Rmd +++ b/r/README.Rmd @@ -54,7 +54,8 @@ When installing from source, if the R and C++ library versions do not match, ins library(arrow) set.seed(24) -tab <- arrow::table(x = 1:10, y = rnorm(10)) +df <- data.frame(x = 1:10, y = rnorm(10)) +tab <- to_arrow(df) tab$schema tab as.data.frame(tab) diff --git a/r/README.md b/r/README.md index 4a6af748faf..804591d975c 100644 --- a/r/README.md +++ b/r/README.md @@ -69,7 +69,8 @@ Arrow C++ library first. library(arrow) set.seed(24) -tab <- arrow::table(x = 1:10, y = rnorm(10)) +df <- data.frame(x = 1:10, y = rnorm(10)) +tab <- to_arrow(df) tab$schema #> arrow::Schema #> x: int32 diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index 552eff97252..5ca866287fc 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -62,12 +62,11 @@ reference: - parquet_file_reader - title: Arrow data containers contents: + - to_arrow - buffer - - array - chunked_array - record_batch - schema - - table - type - dictionary - field diff --git a/r/man/array.Rd b/r/man/array.Rd deleted file mode 100644 index 2b784caf9a1..00000000000 --- a/r/man/array.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/array.R -\name{array} -\alias{array} -\title{create an \link[=arrow__Array]{arrow::Array} from an R vector} -\usage{ -array(x, type = NULL) -} -\arguments{ -\item{x}{R object} - -\item{type}{Explicit \link[=arrow__DataType]{type}, or NULL (the default) to infer from the data} -} -\description{ -create an \link[=arrow__Array]{arrow::Array} from an R vector -} diff --git a/r/man/table.Rd b/r/man/table.Rd deleted file mode 100644 index fbf9632a03a..00000000000 --- a/r/man/table.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Table.R -\name{table} -\alias{table} -\title{Create an arrow::Table from a data frame} -\usage{ -table(..., schema = NULL) -} -\arguments{ -\item{...}{arrays, chunked arrays, or R vectors} - -\item{schema}{a schema. The default (\code{NULL}) infers the schema from the \code{...}} -} -\value{ -an arrow::Table -} -\description{ -Create an arrow::Table from a data frame -} diff --git a/r/man/table_from_dots.Rd b/r/man/table_from_dots.Rd new file mode 100644 index 00000000000..62c8f39bcb0 --- /dev/null +++ b/r/man/table_from_dots.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/to-arrow.R +\name{table_from_dots} +\alias{table_from_dots} +\title{Create an arrow::Table from diverse inputs} +\usage{ +table_from_dots(..., schema = NULL) +} +\arguments{ +\item{...}{arrays, chunked arrays, or R vectors that should define the +columns of the Arrow Table; alternatively, if record batches are given, +they will be stacked.} + +\item{schema}{a schema. The default (\code{NULL}) infers the schema from the \code{...}} +} +\value{ +An \code{arrow::Table} +} +\description{ +Unlike \code{\link[=to_arrow]{to_arrow()}}, this function splices together inputs to form a Table. +When providing columns, they can be a mix of Arrow arrays and R vectors. +} +\examples{ +\donttest{ +tab1 <- table_from_dots(a = 1:10, b = letters[1:10]) +tab1 +as.data.frame(tab1) +} +} diff --git a/r/man/to_arrow.Rd b/r/man/to_arrow.Rd new file mode 100644 index 00000000000..2e1fe5f7672 --- /dev/null +++ b/r/man/to_arrow.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/to-arrow.R +\name{to_arrow} +\alias{to_arrow} +\title{Send R data structures to Arrow} +\usage{ +to_arrow(x, ...) +} +\arguments{ +\item{x}{an R vector or \code{data.frame}} + +\item{...}{additional arguments passed to some methods: +\itemize{ +\item \code{table} logical: when providing a \code{data.frame} input, should it be made +into an Arrow Table or a struct-type Array? Default is \code{TRUE} unless you +specify a \code{type}. +\item \code{type} an explicit \link[=arrow__DataType]{type}, or NULL (the default) to +infer from \code{x}. Only valid when making an \code{Array}. +\item \code{schema} a schema. The default (\code{NULL}) infers the schema from the \code{x}. +Only valid when making a \code{Table} from a \code{data.frame} +}} +} +\value{ +An \code{arrow::Table} if \code{x} is a \code{data.frame} unless otherwise directed, +or an \code{arrow::Array}. +} +\description{ +Send R data structures to Arrow +} +\examples{ +\donttest{ +tbl <- data.frame( + int = 1:10, + dbl = as.numeric(1:10), + lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), + chr = letters[1:10], + stringsAsFactors = FALSE +) +tab <- to_arrow(tbl) +tab$schema + +a <- to_arrow(tbl$int) + +# Making a struct column from a data.frame +df <- tibble::tibble(x = 1:10, y = 1:10) +a <- to_arrow(df, table = FALSE) +# Or specify a type +a <- to_arrow(df, type = struct(x = float64(), y = int16())) +} +} diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 36518cbf645..baa0a50fb4e 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -18,7 +18,7 @@ context("arrow::Array") test_that("Array", { - x <- array(c(1:10, 1:10, 1:5)) + x <- to_arrow(c(1:10, 1:10, 1:5)) expect_equal(x$type, int32()) expect_equal(x$length(), 25L) expect_equal(x$as_vector(), c(1:10, 1:10, 1:5)) @@ -35,7 +35,7 @@ test_that("Array", { expect_equal(z$as_vector(), c(1:5)) expect_true(x$RangeEquals(z, 10, 15, 0)) - x_dbl <- array(c(1,2,3,4,5,6)) + x_dbl <- to_arrow(c(1,2,3,4,5,6)) expect_equal(x_dbl$type, float64()) expect_equal(x_dbl$length(), 6L) expect_equal(x_dbl$as_vector(), as.numeric(1:6)) @@ -53,8 +53,8 @@ test_that("Array", { }) test_that("Array supports NA", { - x_int <- array(as.integer(c(1:10, NA))) - x_dbl <- array(as.numeric(c(1:10, NA))) + x_int <- to_arrow(as.integer(c(1:10, NA))) + x_dbl <- to_arrow(as.numeric(c(1:10, NA))) expect_true(x_int$IsValid(0L)) expect_true(x_dbl$IsValid(0L)) expect_true(x_int$IsNull(10L)) @@ -68,19 +68,19 @@ test_that("Array supports NA", { test_that("Array supports logical vectors (ARROW-3341)", { # with NA x <- sample(c(TRUE, FALSE, NA), 1000, replace = TRUE) - arr_lgl <- array(x) + arr_lgl <- to_arrow(x) expect_identical(x, arr_lgl$as_vector()) # without NA x <- sample(c(TRUE, FALSE), 1000, replace = TRUE) - arr_lgl <- array(x) + arr_lgl <- to_arrow(x) expect_identical(x, arr_lgl$as_vector()) }) test_that("Array supports character vectors (ARROW-3339)", { # with NA x <- c("itsy", NA, "spider") - arr_chr <- array(x) + arr_chr <- to_arrow(x) expect_equal(arr_chr$length(), 3L) expect_identical(arr_chr$as_vector(), x) expect_true(arr_chr$IsValid(0)) @@ -92,51 +92,51 @@ test_that("Array supports character vectors (ARROW-3339)", { # without NA x <- c("itsy", "bitsy", "spider") - arr_chr <- array(x) + arr_chr <- to_arrow(x) expect_equal(arr_chr$length(), 3L) expect_identical(arr_chr$as_vector(), x) }) test_that("empty arrays are supported", { x <- character() - expect_equal(array(x)$as_vector(), x) + expect_equal(to_arrow(x)$as_vector(), x) x <- integer() - expect_equal(array(x)$as_vector(), x) + expect_equal(to_arrow(x)$as_vector(), x) x <- numeric() - expect_equal(array(x)$as_vector(), x) + expect_equal(to_arrow(x)$as_vector(), x) x <- factor(character()) - expect_equal(array(x)$as_vector(), x) + expect_equal(to_arrow(x)$as_vector(), x) x <- logical() - expect_equal(array(x)$as_vector(), x) + expect_equal(to_arrow(x)$as_vector(), x) }) test_that("array with all nulls are supported", { nas <- c(NA, NA) x <- as.logical(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(to_arrow(x)$as_vector(), x) x <- as.integer(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(to_arrow(x)$as_vector(), x) x <- as.numeric(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(to_arrow(x)$as_vector(), x) x <- as.character(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(to_arrow(x)$as_vector(), x) x <- as.factor(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(to_arrow(x)$as_vector(), x) }) test_that("Array supports unordered factors (ARROW-3355)", { # without NA f <- factor(c("itsy", "bitsy", "spider", "spider")) - arr_fac <- array(f) + arr_fac <- to_arrow(f) expect_equal(arr_fac$length(), 4L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -152,7 +152,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { # with NA f <- factor(c("itsy", "bitsy", NA, "spider", "spider")) - arr_fac <- array(f) + arr_fac <- to_arrow(f) expect_equal(arr_fac$length(), 5L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -171,7 +171,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { test_that("Array supports ordered factors (ARROW-3355)", { # without NA f <- ordered(c("itsy", "bitsy", "spider", "spider")) - arr_fac <- array(f) + arr_fac <- to_arrow(f) expect_equal(arr_fac$length(), 4L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -187,7 +187,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { # with NA f <- ordered(c("itsy", "bitsy", NA, "spider", "spider")) - arr_fac <- array(f) + arr_fac <- to_arrow(f) expect_equal(arr_fac$length(), 5L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -205,20 +205,20 @@ test_that("Array supports ordered factors (ARROW-3355)", { test_that("array supports Date (ARROW-3340)", { d <- Sys.Date() + 1:10 - a <- array(d) + a <- to_arrow(d) expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) d[5] <- NA - a <- array(d) + a <- to_arrow(d) expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) expect_true(a$IsNull(4)) d2 <- d + .5 - a <- array(d2) + a <- to_arrow(d2) expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) @@ -227,14 +227,14 @@ test_that("array supports Date (ARROW-3340)", { test_that("array supports POSIXct (ARROW-3340)", { times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10 - a <- array(times) + a <- to_arrow(times) expect_equal(a$type$name, "timestamp") expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) expect_equal(as.numeric(a$as_vector()), as.numeric(times)) times[5] <- NA - a <- array(times) + a <- to_arrow(times) expect_equal(a$type$name, "timestamp") expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) @@ -244,13 +244,13 @@ test_that("array supports POSIXct (ARROW-3340)", { test_that("array supports integer64", { x <- bit64::as.integer64(1:10) - a <- array(x) + a <- to_arrow(x) expect_equal(a$type, int64()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), x) x[4] <- NA - a <- array(x) + a <- to_arrow(x) expect_equal(a$type, int64()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), x) @@ -259,18 +259,18 @@ test_that("array supports integer64", { test_that("array$as_vector() correctly handles all NA inte64 (ARROW-3795)", { x <- bit64::as.integer64(NA) - a <- array(x) + a <- to_arrow(x) expect_true(is.na(a$as_vector())) }) test_that("array supports difftime", { time <- hms::hms(56, 34, 12) - a <- array(c(time, time)) + a <- to_arrow(c(time, time)) expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_equal(a$as_vector(), c(time, time)) - a <- array(vctrs::vec_c(time, NA)) + a <- to_arrow(vctrs::vec_c(time, NA)) expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_true(a$IsNull(1)) @@ -280,13 +280,13 @@ test_that("array supports difftime", { test_that("support for NaN (ARROW-3615)", { x <- c(1, NA, NaN, -1) - y <- array(x) + y <- to_arrow(x) expect_true(y$IsValid(2)) expect_equal(y$null_count, 1L) }) test_that("integer types casts (ARROW-3741)", { - a <- array(c(1:10, NA)) + a <- to_arrow(c(1:10, NA)) a_int8 <- a$cast(int8()) a_int16 <- a$cast(int16()) a_int32 <- a$cast(int32()) @@ -317,7 +317,7 @@ test_that("integer types casts (ARROW-3741)", { }) test_that("integer types cast safety (ARROW-3741, ARROW-5541)", { - a <- array(-(1:10)) + a <- to_arrow(-(1:10)) expect_error(a$cast(uint8()), regexp = "Integer value out of bounds") expect_error(a$cast(uint16()), regexp = "Integer value out of bounds") expect_error(a$cast(uint32()), regexp = "Integer value out of bounds") @@ -331,7 +331,7 @@ test_that("integer types cast safety (ARROW-3741, ARROW-5541)", { test_that("float types casts (ARROW-3741)", { x <- c(1, 2, 3, NA) - a <- array(x) + a <- to_arrow(x) a_f32 <- a$cast(float32()) a_f64 <- a$cast(float64()) @@ -347,12 +347,12 @@ test_that("float types casts (ARROW-3741)", { test_that("cast to half float works", { skip("until https://issues.apache.org/jira/browse/ARROW-3802") - a <- array(1:4) + a <- to_arrow(1:4) a_f16 <- a$cast(float16()) expect_equal(a_16$type, float16()) }) -test_that("array() supports the type= argument. conversion from INTSXP and int64 to all int types", { +test_that("to_arrow() supports the type= argument. conversion from INTSXP and int64 to all int types", { num_int32 <- 12L num_int64 <- bit64::as.integer64(10) @@ -362,80 +362,79 @@ test_that("array() supports the type= argument. conversion from INTSXP and int64 float32(), float64() ) for(type in types) { - expect_equal(array(num_int32, type = type)$type, type) - expect_equal(array(num_int64, type = type)$type, type) + expect_equal(to_arrow(num_int32, type = type)$type, type) + expect_equal(to_arrow(num_int64, type = type)$type, type) } }) -test_that("array() aborts on overflow", { - expect_error(array(128L, type = int8())$type, "Invalid.*downsize") - expect_error(array(-129L, type = int8())$type, "Invalid.*downsize") +test_that("to_arrow() aborts on overflow", { + expect_error(to_arrow(128L, type = int8())$type, "Invalid.*downsize") + expect_error(to_arrow(-129L, type = int8())$type, "Invalid.*downsize") - expect_error(array(256L, type = uint8())$type, "Invalid.*downsize") - expect_error(array(-1L, type = uint8())$type, "Invalid.*downsize") + expect_error(to_arrow(256L, type = uint8())$type, "Invalid.*downsize") + expect_error(to_arrow(-1L, type = uint8())$type, "Invalid.*downsize") - expect_error(array(32768L, type = int16())$type, "Invalid.*downsize") - expect_error(array(-32769L, type = int16())$type, "Invalid.*downsize") + expect_error(to_arrow(32768L, type = int16())$type, "Invalid.*downsize") + expect_error(to_arrow(-32769L, type = int16())$type, "Invalid.*downsize") - expect_error(array(65536L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(-1L, type = uint16())$type, "Invalid.*downsize") + expect_error(to_arrow(65536L, type = uint16())$type, "Invalid.*downsize") + expect_error(to_arrow(-1L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(65536L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(-1L, type = uint16())$type, "Invalid.*downsize") + expect_error(to_arrow(65536L, type = uint16())$type, "Invalid.*downsize") + expect_error(to_arrow(-1L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(bit64::as.integer64(2^31), type = int32()), "Invalid.*downsize") - expect_error(array(bit64::as.integer64(2^32), type = uint32()), "Invalid.*downsize") + expect_error(to_arrow(bit64::as.integer64(2^31), type = int32()), "Invalid.*downsize") + expect_error(to_arrow(bit64::as.integer64(2^32), type = uint32()), "Invalid.*downsize") }) -test_that("array() does not convert doubles to integer", { +test_that("to_arrow() does not convert doubles to integer", { types <- list( int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64() ) for(type in types) { - expect_error(array(10, type = type)$type, "Cannot convert.*REALSXP") + expect_error(to_arrow(10, type = type)$type, "Cannot convert.*REALSXP") } }) -test_that("array() converts raw vectors to uint8 arrays (ARROW-3794)", { - expect_equal(array(as.raw(1:10))$type, uint8()) +test_that("to_arrow() converts raw vectors to uint8 arrays (ARROW-3794)", { + expect_equal(to_arrow(as.raw(1:10))$type, uint8()) }) test_that("Array$as_vector() converts to integer (ARROW-3794)", { - a <- array((-128):127)$cast(int8()) + a <- to_arrow((-128):127)$cast(int8()) expect_equal(a$type, int8()) expect_equal(a$as_vector(), (-128):127) - a <- array(0:255)$cast(uint8()) + a <- to_arrow(0:255)$cast(uint8()) expect_equal(a$type, uint8()) expect_equal(a$as_vector(), 0:255) }) -test_that("array() recognise arrow::Array (ARROW-3815)", { - a <- array(1:10) - expect_equal(a, array(a)) +test_that("to_arrow() recognise arrow::Array (ARROW-3815)", { + a <- to_arrow(1:10) + expect_equal(a, to_arrow(a)) }) -test_that("array() handles data frame -> struct arrays (ARROW-3811)", { +test_that("to_arrow() handles data frame -> struct arrays (ARROW-3811)", { df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10]) - a <- array(df) + a <- to_arrow(df, table = FALSE) expect_equal(a$type, struct(x = int32(), y = float64(), z = utf8())) expect_equivalent(a$as_vector(), df) }) -test_that("array() can handle data frame with custom struct type (not infered)", { +test_that("to_arrow() can handle data frame with custom struct type (not infered)", { df <- tibble::tibble(x = 1:10, y = 1:10) type <- struct(x = float64(), y = int16()) - a <- array(df, type = type) + a <- to_arrow(df, type = type) expect_equal(a$type, type) type <- struct(x = float64(), y = int16(), z = int32()) - expect_error(array(df, type = type), regexp = "Number of fields in struct.* incompatible with number of columns in the data frame") + expect_error(to_arrow(df, type = type), regexp = "Number of fields in struct.* incompatible with number of columns in the data frame") type <- struct(y = int16(), x = float64()) - expect_error(array(df, type = type), regexp = "Field name in position.*does not match the name of the column of the data frame") + expect_error(to_arrow(df, type = type), regexp = "Field name in position.*does not match the name of the column of the data frame") type <- struct(x = float64(), y = utf8()) - expect_error(array(df, type = type), regexp = "Cannot convert R object to string array") + expect_error(to_arrow(df, type = type), regexp = "Cannot convert R object to string array") }) - diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index d5a141c87ff..7fe223d9e11 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -32,7 +32,7 @@ test_that("RecordBatch", { schema( int = int32(), dbl = float64(), lgl = boolean(), chr = utf8(), - fct = dictionary(int32(), array(letters[1:10])) + fct = dictionary(int32(), to_arrow(letters[1:10])) ) ) expect_equal(batch$num_columns, 5L) @@ -67,12 +67,12 @@ test_that("RecordBatch", { col_fct <- batch$column(4) expect_true(inherits(col_fct, 'arrow::Array')) expect_equal(col_fct$as_vector(), tbl$fct) - expect_equal(col_fct$type, dictionary(int32(), array(letters[1:10]))) + expect_equal(col_fct$type, dictionary(int32(), to_arrow(letters[1:10]))) batch2 <- batch$RemoveColumn(0) expect_equal( batch2$schema, - schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), array(letters[1:10]))) + schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), to_arrow(letters[1:10]))) ) expect_equal(batch2$column(0), batch$column(1)) expect_identical(as.data.frame(batch2), tbl[,-1]) @@ -103,7 +103,7 @@ test_that("RecordBatch with 0 rows are supported", { dbl = float64(), lgl = boolean(), chr = utf8(), - fct = dictionary(int32(), array(c("a", "b"))) + fct = dictionary(int32(), to_arrow(c("a", "b"))) ) ) }) @@ -147,7 +147,7 @@ test_that("RecordBatch dim() and nrow() (ARROW-3816)", { }) test_that("record_batch() handles arrow::Array", { - batch <- record_batch(x = 1:10, y = arrow::array(1:10)) + batch <- record_batch(x = 1:10, y = to_arrow(1:10)) expect_equal(batch$schema, schema(x = int32(), y = int32())) }) @@ -220,4 +220,3 @@ test_that("record_batch() only auto splice data frames", { regexp = "only data frames are allowed as unnamed arguments to be auto spliced" ) }) - diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index ae948f27f59..158cf8526e7 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -23,7 +23,7 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), chr = letters[1:10] ) - tab <- arrow::table(!!!tbl) + tab <- to_arrow(tbl) tf <- tempfile() write_arrow(tab, tf) @@ -64,7 +64,7 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { }) test_that("Table cast (ARROW-3741)", { - tab <- table(x = 1:10, y = 1:10) + tab <- to_arrow(data.frame(x = 1:10, y = 1:10)) expect_error(tab$cast(schema(x = int32()))) expect_error(tab$cast(schema(x = int32(), z = int32()))) @@ -77,14 +77,15 @@ test_that("Table cast (ARROW-3741)", { }) test_that("Table dim() and nrow() (ARROW-3816)", { - tab <- table(x = 1:10, y = 1:10) + tab <- to_arrow(data.frame(x = 1:10, y = 1:10)) expect_equal(dim(tab), c(10L, 2L)) expect_equal(nrow(tab), 10L) }) -test_that("table() handles record batches with splicing", { +test_that("table_from_dots() handles record batches with splicing", { + # TODO: reimplement as something like rbind for record batches? batch <- record_batch(x = 1:2, y = letters[1:2]) - tab <- table(batch, batch, batch) + tab <- table_from_dots(batch, batch, batch) expect_equal(tab$schema, batch$schema) expect_equal(tab$num_rows, 6L) expect_equal( @@ -93,7 +94,7 @@ test_that("table() handles record batches with splicing", { ) batches <- list(batch, batch, batch) - tab <- table(!!!batches) + tab <- table_from_dots(!!!batches) expect_equal(tab$schema, batch$schema) expect_equal(tab$num_rows, 6L) expect_equal( @@ -102,13 +103,13 @@ test_that("table() handles record batches with splicing", { ) }) -test_that("table() handles ... of arrays, chunked arrays, vectors", { - a <- array(1:10) +test_that("table_from_dots() handles ... of arrays, chunked arrays, vectors", { + a <- to_arrow(1:10) ca <- chunked_array(1:5, 6:10) v <- rnorm(10) tbl <- tibble::tibble(x = 1:10, y = letters[1:10]) - tab <- table(a = a, b = ca, c = v, !!!tbl) + tab <- table_from_dots(a = a, b = ca, c = v, !!!tbl) expect_equal( tab$schema, schema(a = int32(), b = int32(), c = float64(), x = int32(), y = utf8()) @@ -120,20 +121,19 @@ test_that("table() handles ... of arrays, chunked arrays, vectors", { ) }) -test_that("table() auto splices (ARROW-5718)", { +test_that("table_from_dots() auto splices (ARROW-5718)", { df <- tibble::tibble(x = 1:10, y = letters[1:10]) - tab1 <- table(df) - tab2 <- table(!!!df) + tab1 <- to_arrow(df) + tab2 <- table_from_dots(!!!df) expect_equal(tab1, tab2) expect_equal(tab1$schema, schema(x = int32(), y = utf8())) expect_equivalent(as.data.frame(tab1), df) s <- schema(x = float64(), y = utf8()) - tab3 <- table(df, schema = s) - tab4 <- table(!!!df, schema = s) + tab3 <- to_arrow(df, schema = s) + tab4 <- table_from_dots(!!!df, schema = s) expect_equal(tab3, tab4) expect_equal(tab3$schema, s) expect_equivalent(as.data.frame(tab3), df) }) - diff --git a/r/tests/testthat/test-arraydata.R b/r/tests/testthat/test-arraydata.R index 02ca9b85625..3bfdf1656d4 100644 --- a/r/tests/testthat/test-arraydata.R +++ b/r/tests/testthat/test-arraydata.R @@ -18,7 +18,7 @@ context("arrow::ArrayData") test_that("string vectors with only empty strings and nulls don't allocate a data buffer (ARROW-3693)", { - a <- array("") + a <- to_arrow("") expect_equal(a$length(), 1L) buffers <- a$data()$buffers diff --git a/r/tests/testthat/test-arrow-csv.R b/r/tests/testthat/test-arrow-csv.R index 6fc0b3e07c5..da89a514adf 100644 --- a/r/tests/testthat/test-arrow-csv.R +++ b/r/tests/testthat/test-arrow-csv.R @@ -28,7 +28,7 @@ test_that("Can read csv file", { tab3 <- read_csv_arrow(ReadableFile(tf), as_tibble = FALSE) iris$Species <- as.character(iris$Species) - tab0 <- table(!!!iris) + tab0 <- to_arrow(iris) expect_equal(tab0, tab1) expect_equal(tab0, tab2) expect_equal(tab0, tab3) @@ -142,7 +142,7 @@ test_that("read_csv_arrow() respects col_select", { write.csv(iris, tf, row.names = FALSE, quote = FALSE) tab <- read_csv_arrow(tf, col_select = starts_with("Sepal"), as_tibble = FALSE) - expect_equal(tab, table(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width)) + expect_equal(tab, to_arrow(iris[, c("Sepal.Length", "Sepal.Width")])) tib <- read_csv_arrow(tf, col_select = starts_with("Sepal"), as_tibble = TRUE) expect_equal(tib, tibble::tibble(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width)) diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R index 4dfbecaf1f5..4ebf0c528d7 100644 --- a/r/tests/testthat/test-buffer.R +++ b/r/tests/testthat/test-buffer.R @@ -56,7 +56,7 @@ test_that("can read remaining bytes of a RandomAccessFile", { lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), chr = letters[1:10] ) - tab <- arrow::table(!!!tbl) + tab <- to_arrow(tbl) tf <- tempfile() all_bytes <- write_arrow(tab, tf) diff --git a/r/tests/testthat/test-chunkedarray.R b/r/tests/testthat/test-chunkedarray.R index b8ffe3d2f10..0c49827d56b 100644 --- a/r/tests/testthat/test-chunkedarray.R +++ b/r/tests/testthat/test-chunkedarray.R @@ -207,7 +207,7 @@ test_that("chunked_array() supports the type= argument. conversion from INTSXP a } }) -test_that("array() aborts on overflow", { +test_that("chunked_array() aborts on overflow", { expect_error(chunked_array(128L, type = int8())$type, "Invalid.*downsize") expect_error(chunked_array(-129L, type = int8())$type, "Invalid.*downsize") @@ -268,7 +268,7 @@ test_that("chunked_array() handles 0 chunks if given a type", { test_that("chunked_array() can ingest arrays (ARROW-3815)", { expect_equal( - chunked_array(1:5, array(6:10))$as_vector(), + chunked_array(1:5, to_arrow(6:10))$as_vector(), 1:10 ) }) diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R index b3e7d5638f5..6caa367869d 100644 --- a/r/tests/testthat/test-json.R +++ b/r/tests/testthat/test-json.R @@ -114,8 +114,8 @@ test_that("Can read json file with nested columns (ARROW-5503)", { ) struct_array <- tab1$column(1)$chunk(0) - ps <- array(c(NA, NA, 78, 90, NA, 19)) - hello <- array(c(NA, NA, "hi", "bonjour", "ciao", NA)) + ps <- to_arrow(c(NA, NA, 78, 90, NA, 19)) + hello <- to_arrow(c(NA, NA, "hi", "bonjour", "ciao", NA)) expect_equal(struct_array$field(0L), ps) expect_equal(struct_array$GetFieldByName("ps"), ps) expect_equal(struct_array$Flatten(), list(ps, hello)) diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R index 17d994deab2..a6b6c6a0448 100644 --- a/r/tests/testthat/test-read-write.R +++ b/r/tests/testthat/test-read-write.R @@ -24,7 +24,7 @@ test_that("arrow::table round trip", { raw = as.raw(1:10) ) - tab <- arrow::table(!!!tbl) + tab <- to_arrow(tbl) expect_equal(tab$num_columns, 3L) expect_equal(tab$num_rows, 10L) @@ -83,7 +83,7 @@ test_that("arrow::table round trip handles NA in integer and numeric", { raw = as.raw(1:10) ) - tab <- arrow::table(!!!tbl) + tab <- to_arrow(tbl) expect_equal(tab$num_columns, 3L) expect_equal(tab$num_rows, 10L) diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read_record_batch.R index adbb192fa59..92365903322 100644 --- a/r/tests/testthat/test-read_record_batch.R +++ b/r/tests/testthat/test-read_record_batch.R @@ -18,12 +18,14 @@ context("read_record_batch()") test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { - tab <- table( + df <- data.frame( int = 1:10, dbl = as.numeric(1:10), lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), - chr = letters[1:10] + chr = letters[1:10], + stringsAsFactors = FALSE ) + tab <- to_arrow(df) tf <- tempfile() writer <- RecordBatchFileWriter(tf, tab$schema) diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index 70f8df63159..0c5590cf33b 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -18,7 +18,7 @@ context("test-type") test_that("type() gets the right type for arrow::Array", { - a <- array(1:10) + a <- to_arrow(1:10) expect_equal(type(a), a$type) }) @@ -35,7 +35,7 @@ test_that("type() infers from R type", { expect_equal(type(""), utf8()) expect_equal( type(iris$Species), - dictionary(int8(), array(levels(iris$Species)), FALSE) + dictionary(int8(), to_arrow(levels(iris$Species)), FALSE) ) expect_equal( type(lubridate::ymd_hms("2019-02-14 13:55:05")),