Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ S3method("==","arrow::RecordBatch")
S3method("==","arrow::ipc::Message")
S3method(BufferReader,"arrow::Buffer")
S3method(BufferReader,default)
S3method(FeatherTableReader,"arrow::io::RandomAccessFile")
S3method(FeatherTableReader,"arrow::ipc::feather::TableReader")
S3method(FeatherTableReader,character)
S3method(FeatherTableReader,default)
S3method(FeatherTableReader,fs_path)
S3method(FeatherTableWriter,"arrow::io::OutputStream")
S3method(FixedSizeBufferWriter,"arrow::Buffer")
S3method(FixedSizeBufferWriter,default)
S3method(MessageReader,"arrow::io::InputStream")
Expand All @@ -33,12 +39,6 @@ S3method(buffer,default)
S3method(buffer,integer)
S3method(buffer,numeric)
S3method(buffer,raw)
S3method(feather_table_reader,"arrow::io::RandomAccessFile")
S3method(feather_table_reader,"arrow::ipc::feather::TableReader")
S3method(feather_table_reader,character)
S3method(feather_table_reader,default)
S3method(feather_table_reader,fs_path)
S3method(feather_table_writer,"arrow::io::OutputStream")
S3method(length,"arrow::Array")
S3method(names,"arrow::RecordBatch")
S3method(print,"arrow-enum")
Expand Down Expand Up @@ -70,6 +70,8 @@ S3method(write_feather_RecordBatch,fs_path)
export(BufferOutputStream)
export(BufferReader)
export(DateUnit)
export(FeatherTableReader)
export(FeatherTableWriter)
export(FileMode)
export(FileOutputStream)
export(FixedSizeBufferWriter)
Expand All @@ -95,8 +97,6 @@ export(date64)
export(decimal)
export(default_memory_pool)
export(dictionary)
export(feather_table_reader)
export(feather_table_writer)
export(field)
export(float16)
export(float32)
Expand Down
4 changes: 2 additions & 2 deletions r/R/RcppExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 26 additions & 18 deletions r/R/feather.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
num_columns = function() ipc___feather___TableReader__num_columns(self),
GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i),
GetColumn = function(i) shared_ptr(`arrow::Column`, ipc___feather___TableReader__GetColumn(self, i)),
Read = function() shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self))
Read = function(columns) {
shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self, columns))
}
)
)

Expand All @@ -44,12 +46,12 @@
#' @param stream an OutputStream
#'
#' @export
feather_table_writer <- function(stream) {
UseMethod("feather_table_writer")
FeatherTableWriter <- function(stream) {
UseMethod("FeatherTableWriter")
}

#' @export
`feather_table_writer.arrow::io::OutputStream` <- function(stream){
`FeatherTableWriter.arrow::io::OutputStream` <- function(stream){
unique_ptr(`arrow::ipc::feather::TableWriter`, ipc___feather___TableWriter__Open(stream))
}

Expand Down Expand Up @@ -107,7 +109,7 @@ write_feather_RecordBatch <- function(data, stream) {
#' @export
#' @method write_feather_RecordBatch arrow::io::OutputStream
`write_feather_RecordBatch.arrow::io::OutputStream` <- function(data, stream) {
ipc___TableWriter__RecordBatch__WriteFeather(feather_table_writer(stream), data)
ipc___TableWriter__RecordBatch__WriteFeather(FeatherTableWriter(stream), data)
}

#' A arrow::ipc::feather::TableReader to read from a file
Expand All @@ -117,44 +119,50 @@ write_feather_RecordBatch <- function(data, stream) {
#' @param ... extra parameters
#'
#' @export
feather_table_reader <- function(file, mmap = TRUE, ...){
UseMethod("feather_table_reader")
FeatherTableReader <- function(file, mmap = TRUE, ...){
UseMethod("FeatherTableReader")
}

#' @export
feather_table_reader.default <- function(file, mmap = TRUE, ...) {
FeatherTableReader.default <- function(file, mmap = TRUE, ...) {
stop("unsupported")
}

#' @export
feather_table_reader.character <- function(file, mmap = TRUE, ...) {
feather_table_reader(fs::path_abs(file), mmap = mmap, ...)
FeatherTableReader.character <- function(file, mmap = TRUE, ...) {
FeatherTableReader(fs::path_abs(file), mmap = mmap, ...)
}

#' @export
feather_table_reader.fs_path <- function(file, mmap = TRUE, ...) {
FeatherTableReader.fs_path <- function(file, mmap = TRUE, ...) {
stream <- if(isTRUE(mmap)) mmap_open(file, ...) else ReadableFile(file, ...)
feather_table_reader(stream)
FeatherTableReader(stream)
}

#' @export
`feather_table_reader.arrow::io::RandomAccessFile` <- function(file, mmap = TRUE, ...){
`FeatherTableReader.arrow::io::RandomAccessFile` <- function(file, mmap = TRUE, ...){
unique_ptr(`arrow::ipc::feather::TableReader`, ipc___feather___TableReader__Open(file))
}

#' @export
`feather_table_reader.arrow::ipc::feather::TableReader` <- function(file, mmap = TRUE, ...){
`FeatherTableReader.arrow::ipc::feather::TableReader` <- function(file, mmap = TRUE, ...){
file
}

#' Read a feather file
#'
#' @param file a arrow::ipc::feather::TableReader or whatever the [feather_table_reader()] function can handle
#' @param file a arrow::ipc::feather::TableReader or whatever the [FeatherTableReader()] function can handle
#' @param columns names if the columns to read. The default `NULL` means all columns
#' @param as_tibble should the [arrow::Table][arrow__Table] be converted to a tibble.
#' @param ... additional parameters
#'
#' @return an arrow::Table
#' @return a data frame if `as_tibble` is `TRUE` (the default), or a [arrow::Table][arrow__Table] otherwise
#'
#' @export
read_feather <- function(file, ...){
feather_table_reader(file, ...)$Read()
read_feather <- function(file, columns = NULL, as_tibble = TRUE, ...){
out <- FeatherTableReader(file, ...)$Read(columns)
if (isTRUE(as_tibble)) {
out <- as_tibble(out)
}
out
}
6 changes: 3 additions & 3 deletions r/man/feather_table_reader.Rd → r/man/FeatherTableReader.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions r/man/feather_table_writer.Rd → r/man/FeatherTableWriter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 7 additions & 3 deletions r/man/read_feather.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions r/src/RcppExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 30 additions & 2 deletions r/src/feather.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,37 @@ std::shared_ptr<arrow::Column> ipc___feather___TableReader__GetColumn(

// [[Rcpp::export]]
std::shared_ptr<arrow::Table> ipc___feather___TableReader__Read(
const std::unique_ptr<arrow::ipc::feather::TableReader>& reader) {
const std::unique_ptr<arrow::ipc::feather::TableReader>& reader, SEXP columns) {
std::shared_ptr<arrow::Table> table;
STOP_IF_NOT_OK(reader->Read(&table));

switch (TYPEOF(columns)) {
case INTSXP: {
R_xlen_t n = XLENGTH(columns);
std::vector<int> indices(n);
int* p_columns = INTEGER(columns);
for (int i = 0; i < n; i++) {
indices[i] = p_columns[i] - 1;
}
STOP_IF_NOT_OK(reader->Read(indices, &table));
break;
}
case STRSXP: {
R_xlen_t n = XLENGTH(columns);
std::vector<std::string> names(n);
for (R_xlen_t i = 0; i < n; i++) {
names[i] = CHAR(STRING_ELT(columns, i));
}
STOP_IF_NOT_OK(reader->Read(names, &table));
break;
}
case NILSXP:
STOP_IF_NOT_OK(reader->Read(&table));
break;
default:
Rcpp::stop("incompatible column specification");
break;
};

return table;
}

Expand Down
59 changes: 50 additions & 9 deletions r/tests/testthat/test-feather.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,25 +34,66 @@ test_that("feather read/write round trip", {
expect_true(fs::file_exists(tf3))

tab1 <- read_feather(tf1)
expect_is(tab1, "arrow::Table")
expect_is(tab1, "data.frame")

tab2 <- read_feather(tf2)
expect_is(tab2, "arrow::Table")
expect_is(tab2, "data.frame")

tab3 <- read_feather(tf3)
expect_is(tab3, "arrow::Table")
expect_is(tab3, "data.frame")

# reading directly from arrow::io::MemoryMappedFile
tab4 <- read_feather(mmap_open(tf3))
expect_is(tab4, "arrow::Table")
expect_is(tab4, "data.frame")

# reading directly from arrow::io::ReadableFile
tab5 <- read_feather(ReadableFile(tf3))
expect_is(tab5, "arrow::Table")
expect_is(tab5, "data.frame")

expect_equal(tib, tab1)
expect_equal(tib, tab2)
expect_equal(tib, tab3)
expect_equal(tib, tab4)
expect_equal(tib, tab5)
})

test_that("feather handles columns = <names>", {
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])

tf1 <- local_tempfile()
write_feather(tib, tf1)
expect_true(fs::file_exists(tf1))

tab1 <- read_feather(tf1, columns = c("x", "y"))
expect_is(tab1, "data.frame")

expect_equal(tib[, c("x", "y")], as_tibble(tab1))
})

test_that("feather handles columns = <integer>", {
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])

tf1 <- local_tempfile()
write_feather(tib, tf1)
expect_true(fs::file_exists(tf1))

tab1 <- read_feather(tf1, columns = 1:2)
expect_is(tab1, "data.frame")

expect_equal(tib[, c("x", "y")], as_tibble(tab1))
})

test_that("feather read/write round trip", {
tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])

tf1 <- local_tempfile()
write_feather(tib, tf1)
expect_true(fs::file_exists(tf1))

tab1 <- read_feather(tf1, as_tibble = FALSE)
expect_is(tab1, "arrow::Table")

expect_equal(tib, as_tibble(tab1))
expect_equal(tib, as_tibble(tab2))
expect_equal(tib, as_tibble(tab3))
expect_equal(tib, as_tibble(tab4))
expect_equal(tib, as_tibble(tab5))
})