diff --git a/r/.gitignore b/r/.gitignore index 85c986810bd..0f405f57136 100644 --- a/r/.gitignore +++ b/r/.gitignore @@ -1,3 +1,6 @@ +Meta +doc +inst/doc *.o *.o-* *.d diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 0250023e8fb..5f93c83f236 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -61,6 +61,9 @@ Collate: 'memory_pool.R' 'message.R' 'on_exit.R' + 'read_record_batch.R' + 'read_table.R' 'reexports-bit64.R' 'reexports-tibble.R' + 'write_arrow.R' 'zzz.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index 490d2118c58..10677b43f85 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -6,57 +6,57 @@ S3method("==","arrow::DataType") S3method("==","arrow::Field") S3method("==","arrow::RecordBatch") S3method("==","arrow::ipc::Message") +S3method(BufferReader,"arrow::Buffer") +S3method(BufferReader,default) +S3method(FixedSizeBufferWriter,"arrow::Buffer") +S3method(FixedSizeBufferWriter,default) +S3method(MessageReader,"arrow::io::InputStream") +S3method(MessageReader,default) +S3method(RecordBatchFileReader,"arrow::io::RandomAccessFile") +S3method(RecordBatchFileReader,character) +S3method(RecordBatchFileReader,fs_path) +S3method(RecordBatchFileWriter,"arrow::io::OutputStream") +S3method(RecordBatchFileWriter,character) +S3method(RecordBatchFileWriter,fs_path) +S3method(RecordBatchStreamReader,"arrow::io::InputStream") +S3method(RecordBatchStreamReader,raw) +S3method(RecordBatchStreamWriter,"arrow::io::OutputStream") +S3method(RecordBatchStreamWriter,character) +S3method(RecordBatchStreamWriter,fs_path) S3method(as_tibble,"arrow::RecordBatch") S3method(as_tibble,"arrow::Table") +S3method(buffer,complex) S3method(buffer,default) S3method(buffer,integer) S3method(buffer,numeric) S3method(buffer,raw) -S3method(buffer_reader,"arrow::Buffer") -S3method(buffer_reader,default) S3method(feather_table_reader,"arrow::io::RandomAccessFile") S3method(feather_table_reader,"arrow::ipc::feather::TableReader") S3method(feather_table_reader,character) S3method(feather_table_reader,default) S3method(feather_table_reader,fs_path) S3method(feather_table_writer,"arrow::io::OutputStream") -S3method(fixed_size_buffer_writer,"arrow::Buffer") -S3method(fixed_size_buffer_writer,default) S3method(length,"arrow::Array") -S3method(message_reader,"arrow::io::InputStream") -S3method(message_reader,default) -S3method(message_reader,raw) S3method(names,"arrow::RecordBatch") S3method(print,"arrow-enum") S3method(read_message,"arrow::io::InputStream") -S3method(read_message,default) -S3method(read_record_batch,"arrow::io::BufferReader") -S3method(read_record_batch,"arrow::io::RandomAccessFile") +S3method(read_message,"arrow::ipc::MessageReader") +S3method(read_record_batch,"arrow::Buffer") +S3method(read_record_batch,"arrow::io::InputStream") S3method(read_record_batch,"arrow::ipc::Message") -S3method(read_record_batch,"arrow::ipc::RecordBatchFileReader") -S3method(read_record_batch,"arrow::ipc::RecordBatchStreamReader") -S3method(read_record_batch,character) -S3method(read_record_batch,fs_path) S3method(read_record_batch,raw) S3method(read_schema,"arrow::Buffer") S3method(read_schema,"arrow::io::InputStream") -S3method(read_schema,default) S3method(read_schema,raw) -S3method(read_table,"arrow::io::BufferReader") -S3method(read_table,"arrow::io::RandomAccessFile") S3method(read_table,"arrow::ipc::RecordBatchFileReader") S3method(read_table,"arrow::ipc::RecordBatchStreamReader") S3method(read_table,character) S3method(read_table,fs_path) S3method(read_table,raw) -S3method(record_batch_file_reader,"arrow::io::RandomAccessFile") -S3method(record_batch_file_reader,character) -S3method(record_batch_file_reader,fs_path) -S3method(record_batch_stream_reader,"arrow::io::InputStream") -S3method(record_batch_stream_reader,raw) -S3method(write_arrow,"arrow::RecordBatch") -S3method(write_arrow,"arrow::Table") -S3method(write_arrow,data.frame) +S3method(write_arrow,"arrow::ipc::RecordBatchWriter") +S3method(write_arrow,character) +S3method(write_arrow,fs_path) +S3method(write_arrow,raw) S3method(write_feather,"arrow::RecordBatch") S3method(write_feather,data.frame) S3method(write_feather,default) @@ -64,19 +64,20 @@ S3method(write_feather_RecordBatch,"arrow::io::OutputStream") S3method(write_feather_RecordBatch,character) S3method(write_feather_RecordBatch,default) S3method(write_feather_RecordBatch,fs_path) -S3method(write_record_batch,"arrow::io::OutputStream") -S3method(write_record_batch,"arrow::ipc::RecordBatchWriter") -S3method(write_record_batch,character) -S3method(write_record_batch,fs_path) -S3method(write_record_batch,raw) -S3method(write_table,"arrow::io::OutputStream") -S3method(write_table,"arrow::ipc::RecordBatchWriter") -S3method(write_table,character) -S3method(write_table,fs_path) -S3method(write_table,raw) +export(BufferOutputStream) +export(BufferReader) export(DateUnit) export(FileMode) +export(FileOutputStream) +export(FixedSizeBufferWriter) +export(MessageReader) export(MessageType) +export(MockOutputStream) +export(ReadableFile) +export(RecordBatchFileReader) +export(RecordBatchFileWriter) +export(RecordBatchStreamReader) +export(RecordBatchStreamWriter) export(StatusCode) export(TimeUnit) export(Type) @@ -84,20 +85,16 @@ export(array) export(as_tibble) export(boolean) export(buffer) -export(buffer_output_stream) -export(buffer_reader) export(cast_options) export(chunked_array) export(date32) export(date64) export(decimal) +export(default_memory_pool) export(dictionary) export(feather_table_reader) export(feather_table_writer) export(field) -export(file_open) -export(file_output_stream) -export(fixed_size_buffer_writer) export(float16) export(float32) export(float64) @@ -106,10 +103,8 @@ export(int32) export(int64) export(int8) export(list_of) -export(message_reader) export(mmap_create) export(mmap_open) -export(mock_output_stream) export(null) export(print.integer64) export(read_arrow) @@ -119,10 +114,6 @@ export(read_record_batch) export(read_schema) export(read_table) export(record_batch) -export(record_batch_file_reader) -export(record_batch_file_writer) -export(record_batch_stream_reader) -export(record_batch_stream_writer) export(schema) export(str.integer64) export(struct) @@ -138,8 +129,6 @@ export(utf8) export(write_arrow) export(write_feather) export(write_feather_RecordBatch) -export(write_record_batch) -export(write_table) importFrom(R6,R6Class) importFrom(Rcpp,sourceCpp) importFrom(assertthat,assert_that) diff --git a/r/R/ArrayData.R b/r/R/ArrayData.R index 47b858d589f..765971b405b 100644 --- a/r/R/ArrayData.R +++ b/r/R/ArrayData.R @@ -17,6 +17,30 @@ #' @include R6.R +#' @title class arrow::ArrayData +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Usage: +#' +#' ``` +#' data <- array(...)$data() +#' +#' data$type() +#' data$length() +#' data$null_count() +#' data$offset() +#' data$buffers() +#' ``` +#' +#' @section Methods: +#' +#' ... +#' +#' @rdname arrow__ArrayData +#' @name arrow__ArrayData `arrow::ArrayData` <- R6Class("arrow::ArrayData", inherit = `arrow::Object`, active = list( diff --git a/r/R/ChunkedArray.R b/r/R/ChunkedArray.R index 338438f578d..46e40766290 100644 --- a/r/R/ChunkedArray.R +++ b/r/R/ChunkedArray.R @@ -17,14 +17,22 @@ #' @include R6.R +#' @title class arrow::ChunkedArray +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__ChunkedArray +#' @name arrow__ChunkedArray `arrow::ChunkedArray` <- R6Class("arrow::ChunkedArray", inherit = `arrow::Object`, public = list( length = function() ChunkedArray__length(self), - null_count = function() ChunkedArray__null_count(self), - num_chunks = function() ChunkedArray__num_chunks(self), chunk = function(i) shared_ptr(`arrow::Array`, ChunkedArray__chunk(self, i)), - chunks = function() purrr::map(ChunkedArray__chunks(self), shared_ptr, class = `arrow::Array`), - type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self)), as_vector = function() ChunkedArray__as_vector(self), Slice = function(offset, length = NULL){ if (is.null(length)) { @@ -38,10 +46,16 @@ assert_that(inherits(options, "arrow::compute::CastOptions")) shared_ptr(`arrow::ChunkedArray`, ChunkedArray__cast(self, target_type, options)) } + ), + active = list( + null_count = function() ChunkedArray__null_count(self), + num_chunks = function() ChunkedArray__num_chunks(self), + chunks = function() map(ChunkedArray__chunks(self), shared_ptr, class = `arrow::Array`), + type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self)) ) ) -#' create an arrow::Array from an R vector +#' create an [arrow::ChunkedArray][arrow__ChunkedArray] from various R vectors #' #' @param \dots Vectors to coerce #' @param type currently ignored diff --git a/r/R/Column.R b/r/R/Column.R index bf3fe0a0e10..fb8af1ea315 100644 --- a/r/R/Column.R +++ b/r/R/Column.R @@ -17,11 +17,26 @@ #' @include R6.R +#' @title class arrow::Column +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__Column +#' @name arrow__Column `arrow::Column` <- R6Class("arrow::Column", inherit = `arrow::Object`, public = list( length = function() Column__length(self), - null_count = function() Column__null_count(self), - type = function() `arrow::DataType`$dispatch(Column__type(self)), data = function() shared_ptr(`arrow::ChunkedArray`, Column__data(self)) + ), + + active = list( + null_count = function() Column__null_count(self), + type = function() `arrow::DataType`$dispatch(Column__type(self)) ) ) diff --git a/r/R/Field.R b/r/R/Field.R index 79c0f33be68..4f5636fbfff 100644 --- a/r/R/Field.R +++ b/r/R/Field.R @@ -17,20 +17,35 @@ #' @include R6.R +#' @title class arrow::Field +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__Field +#' @name arrow__Field `arrow::Field` <- R6Class("arrow::Field", inherit = `arrow::Object`, public = list( ToString = function() { Field__ToString(self) }, + Equals = function(other) { + inherits(other, "arrow::Field") && Field__Equals(self, other) + } + ), + + active = list( name = function() { Field__name(self) }, nullable = function() { Field__nullable(self) }, - Equals = function(other) { - inherits(other, "arrow::Field") && Field__Equals(self, other) - }, type = function() { `arrow::DataType`$dispatch(Field__type(self)) } diff --git a/r/R/R6.R b/r/R/R6.R index 1caa885d90c..69d58e0c136 100644 --- a/r/R/R6.R +++ b/r/R/R6.R @@ -54,15 +54,24 @@ unique_ptr <- function(class, xp) { !(lhs == rhs) } +#' @title class arrow::DataType +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__DataType +#' @name arrow__DataType `arrow::DataType` <- R6Class("arrow::DataType", inherit = `arrow::Object`, public = list( ToString = function() { DataType__ToString(self) }, - name = function() { - DataType__name(self) - }, Equals = function(other) { assert_that(inherits(other, "arrow::DataType")) DataType__Equals(self, other) @@ -73,11 +82,9 @@ unique_ptr <- function(class, xp) { children = function() { map(DataType__children_pointer(self), shared_ptr, class= `arrow::Field`) }, - id = function(){ - DataType__id(self) - }, + ..dispatch = function(){ - switch(names(Type)[self$id()+1], + switch(names(Type)[self$id + 1], "NA" = null(), BOOL = boolean(), UINT8 = uint8(), @@ -107,6 +114,15 @@ unique_ptr <- function(class, xp) { MAP = stop("Type MAP not implemented yet") ) } + ), + + active = list( + id = function(){ + DataType__id(self) + }, + name = function() { + DataType__name(self) + } ) ) @@ -116,9 +132,21 @@ unique_ptr <- function(class, xp) { #----- metadata +#' @title class arrow::FixedWidthType +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__FixedWidthType +#' @name arrow__FixedWidthType `arrow::FixedWidthType` <- R6Class("arrow::FixedWidthType", inherit = `arrow::DataType`, - public = list( + active = list( bit_width = function() FixedWidthType__bit_width(self) ) ) diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R index 324510cf1b6..ccf854927b7 100644 --- a/r/R/RcppExports.R +++ b/r/R/RcppExports.R @@ -629,6 +629,10 @@ RecordBatch__schema <- function(x) { .Call(`_arrow_RecordBatch__schema`, x) } +RecordBatch__columns <- function(batch) { + .Call(`_arrow_RecordBatch__columns`, batch) +} + RecordBatch__column <- function(batch, i) { .Call(`_arrow_RecordBatch__column`, batch, i) } @@ -665,6 +669,14 @@ RecordBatch__Slice2 <- function(self, offset, length) { .Call(`_arrow_RecordBatch__Slice2`, self, offset, length) } +ipc___SerializeRecordBatch__Raw <- function(batch) { + .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch) +} + +ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema) { + .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema) +} + RecordBatchReader__schema <- function(reader) { .Call(`_arrow_RecordBatchReader__schema`, reader) } @@ -677,6 +689,10 @@ ipc___RecordBatchStreamReader__Open <- function(stream) { .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream) } +ipc___RecordBatchStreamReader__batches <- function(reader) { + .Call(`_arrow_ipc___RecordBatchStreamReader__batches`, reader) +} + ipc___RecordBatchFileReader__schema <- function(reader) { .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader) } @@ -701,16 +717,12 @@ Table__from_RecordBatchStreamReader <- function(reader) { .Call(`_arrow_Table__from_RecordBatchStreamReader`, reader) } -ipc___RecordBatchFileWriter__Open <- function(stream, schema) { - .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema) -} - -ipc___RecordBatchStreamWriter__Open <- function(stream, schema) { - .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema) +ipc___RecordBatchFileReader__batches <- function(reader) { + .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader) } -ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch, allow_64bit) { - invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch, allow_64bit)) +ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch) { + invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch)) } ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table) { @@ -721,6 +733,14 @@ ipc___RecordBatchWriter__Close <- function(batch_writer) { invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer)) } +ipc___RecordBatchFileWriter__Open <- function(stream, schema) { + .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema) +} + +ipc___RecordBatchStreamWriter__Open <- function(stream, schema) { + .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema) +} + Table__from_dataframe <- function(tbl) { .Call(`_arrow_Table__from_dataframe`, tbl) } @@ -745,3 +765,7 @@ Table__column <- function(table, i) { .Call(`_arrow_Table__column`, table, i) } +Table__columns <- function(table) { + .Call(`_arrow_Table__columns`, table) +} + diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index c606d12143b..fed10abee76 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -17,11 +17,20 @@ #' @include R6.R +#' @title class arrow::RecordBatch +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__RecordBatch +#' @name arrow__RecordBatch `arrow::RecordBatch` <- R6Class("arrow::RecordBatch", inherit = `arrow::Object`, public = list( - num_columns = function() RecordBatch__num_columns(self), - num_rows = function() RecordBatch__num_rows(self), - schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)), column = function(i) shared_ptr(`arrow::Array`, RecordBatch__column(self, i)), column_name = function(i) RecordBatch__column_name(self, i), names = function() RecordBatch__names(self), @@ -29,9 +38,11 @@ assert_that(inherits(other, "arrow::RecordBatch")) RecordBatch__Equals(self, other) }, + RemoveColumn = function(i){ shared_ptr(`arrow::RecordBatch`, RecordBatch__RemoveColumn(self, i)) }, + Slice = function(offset, length = NULL) { if (is.null(length)) { shared_ptr(`arrow::RecordBatch`, RecordBatch__Slice1(self, offset)) @@ -40,14 +51,21 @@ } }, - serialize = function(output_stream, ...) write_record_batch(self, output_stream, ...), + serialize = function() ipc___SerializeRecordBatch__Raw(self), cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { assert_that(inherits(target_schema, "arrow::Schema")) assert_that(inherits(options, "arrow::compute::CastOptions")) - assert_that(identical(self$schema()$names, target_schema$names), msg = "incompatible schemas") + assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") shared_ptr(`arrow::RecordBatch`, RecordBatch__cast(self, target_schema, options)) } + ), + + active = list( + num_columns = function() RecordBatch__num_columns(self), + num_rows = function() RecordBatch__num_rows(self), + schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)), + columns = function() map(RecordBatch__columns(self), shared_ptr, `arrow::Array`) ) ) @@ -66,10 +84,11 @@ RecordBatch__to_dataframe(x) } -#' Create an arrow::RecordBatch from a data frame +#' Create an [arrow::RecordBatch][arrow__RecordBatch] from a data frame #' #' @param .data a data frame #' +#' @return a [arrow::RecordBatch][arrow__RecordBatch] #' @export record_batch <- function(.data){ shared_ptr(`arrow::RecordBatch`, RecordBatch__from_dataframe(.data)) diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R index 35037538426..222f05586c1 100644 --- a/r/R/RecordBatchReader.R +++ b/r/R/RecordBatchReader.R @@ -17,6 +17,18 @@ #' @include R6.R +#' @title class arrow::RecordBatchReader +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__RecordBatchReader +#' @name arrow__RecordBatchReader `arrow::RecordBatchReader` <- R6Class("arrow::RecordBatchReader", inherit = `arrow::Object`, public = list( schema = function() shared_ptr(`arrow::Schema`, RecordBatchReader__schema(self)), @@ -26,170 +38,87 @@ ) ) -`arrow::ipc::RecordBatchStreamReader` <- R6Class("arrow::ipc::RecordBatchStreamReader", inherit = `arrow::RecordBatchReader`) +#' @title class arrow::ipc::RecordBatchStreamReader +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__ipc__RecordBatchStreamReader +#' @name arrow__ipc__RecordBatchStreamReader +`arrow::ipc::RecordBatchStreamReader` <- R6Class("arrow::ipc::RecordBatchStreamReader", inherit = `arrow::RecordBatchReader`, + public = list( + batches = function() map(ipc___RecordBatchStreamReader__batches(self), shared_ptr, class = `arrow::RecordBatch`) + ) +) +#' @title class arrow::ipc::RecordBatchFileReader +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__ipc__RecordBatchFileReader +#' @name arrow__ipc__RecordBatchFileReader `arrow::ipc::RecordBatchFileReader` <- R6Class("arrow::ipc::RecordBatchFileReader", inherit = `arrow::Object`, public = list( schema = function() shared_ptr(`arrow::Schema`, ipc___RecordBatchFileReader__schema(self)), num_record_batches = function() ipc___RecordBatchFileReader__num_record_batches(self), - ReadRecordBatch = function(i) shared_ptr(`arrow::RecordBatch`, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)) + ReadRecordBatch = function(i) shared_ptr(`arrow::RecordBatch`, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)), + + batches = function() map(ipc___RecordBatchFileReader__batches(self), shared_ptr, class = `arrow::RecordBatch`) ) ) - -#' Create a `arrow::ipc::RecordBatchStreamReader` from an input stream +#' Create a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] from an input stream +#' +#' @param stream input stream, an [arrow::io::InputStream][arrow__io__InputStream] or a raw vector #' -#' @param stream input stream #' @export -record_batch_stream_reader <- function(stream){ - UseMethod("record_batch_stream_reader") +RecordBatchStreamReader <- function(stream){ + UseMethod("RecordBatchStreamReader") } #' @export -`record_batch_stream_reader.arrow::io::InputStream` <- function(stream) { +`RecordBatchStreamReader.arrow::io::InputStream` <- function(stream) { shared_ptr(`arrow::ipc::RecordBatchStreamReader`, ipc___RecordBatchStreamReader__Open(stream)) } #' @export -`record_batch_stream_reader.raw` <- function(stream) { - record_batch_stream_reader(buffer_reader(stream)) +`RecordBatchStreamReader.raw` <- function(stream) { + RecordBatchStreamReader(BufferReader(stream)) } -#' Create an `arrow::ipc::RecordBatchFileReader` from a file +#' Create an [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] from a file #' -#' @param file The file to read from +#' @param file The file to read from. A file path, or an [arrow::io::RandomAccessFile][arrow__ipc__RecordBatchFileReader] #' #' @export -record_batch_file_reader <- function(file) { - UseMethod("record_batch_file_reader") +RecordBatchFileReader <- function(file) { + UseMethod("RecordBatchFileReader") } #' @export -`record_batch_file_reader.arrow::io::RandomAccessFile` <- function(file) { +`RecordBatchFileReader.arrow::io::RandomAccessFile` <- function(file) { shared_ptr(`arrow::ipc::RecordBatchFileReader`, ipc___RecordBatchFileReader__Open(file)) } #' @export -`record_batch_file_reader.character` <- function(file) { +`RecordBatchFileReader.character` <- function(file) { assert_that(length(file) == 1L) - record_batch_file_reader(fs::path_abs(file)) -} - -#' @export -`record_batch_file_reader.fs_path` <- function(file) { - record_batch_file_reader(file_open(file)) -} - -#-------- read_record_batch - -#' Read a single record batch from a stream -#' -#' @param stream input stream -#' @param ... additional parameters -#' -#' @details `stream` can be a `arrow::io::RandomAccessFile` stream as created by [file_open()] or [mmap_open()] or a path. -#' -#' @export -read_record_batch <- function(stream, ...){ - UseMethod("read_record_batch") -} - -#' @export -read_record_batch.character <- function(stream, ...){ - assert_that(length(stream) == 1L) - read_record_batch(fs::path_abs(stream)) -} - -#' @export -read_record_batch.fs_path <- function(stream, ...){ - stream <- close_on_exit(file_open(stream)) - read_record_batch(stream) -} - -#' @export -`read_record_batch.arrow::io::RandomAccessFile` <- function(stream, ...){ - reader <- record_batch_file_reader(stream) - reader$ReadRecordBatch(0) -} - -#' @export -`read_record_batch.arrow::io::BufferReader` <- function(stream, ...){ - reader <- record_batch_stream_reader(stream) - reader$ReadNext() -} - -#' @export -read_record_batch.raw <- function(stream, ...){ - stream <- close_on_exit(buffer_reader(stream)) - read_record_batch(stream) -} - -#' @export -`read_record_batch.arrow::ipc::RecordBatchStreamReader` <- function(stream, ...) { - stream$ReadNext() -} - -#' @export -`read_record_batch.arrow::ipc::RecordBatchFileReader` <- function(stream, i = 0, ...) { - stream$ReadRecordBatch(i) -} - -#' @export -`read_record_batch.arrow::ipc::Message` <- function(stream, schema, ...) { - assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__Message__Schema(stream, schema)) -} - - -#--------- read_table - -#' Read an arrow::Table from a stream -#' -#' @param stream stream. Either a stream created by [file_open()] or [mmap_open()] or a file path. -#' -#' @export -read_table <- function(stream){ - UseMethod("read_table") + RecordBatchFileReader(fs::path_abs(file)) } #' @export -read_table.character <- function(stream){ - assert_that(length(stream) == 1L) - read_table(fs::path_abs(stream)) +`RecordBatchFileReader.fs_path` <- function(file) { + RecordBatchFileReader(ReadableFile(file)) } - -#' @export -read_table.fs_path <- function(stream) { - stream <- close_on_exit(file_open(stream)) - read_table(stream) -} - -#' @export -`read_table.arrow::io::RandomAccessFile` <- function(stream) { - reader <- record_batch_file_reader(stream) - read_table(reader) -} - -#' @export -`read_table.arrow::ipc::RecordBatchFileReader` <- function(stream) { - shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(stream)) -} - -#' @export -`read_table.arrow::ipc::RecordBatchStreamReader` <- function(stream) { - shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(stream)) -} - -#' @export -`read_table.arrow::io::BufferReader` <- function(stream) { - reader <- record_batch_stream_reader(stream) - read_table(reader) -} - -#' @export -`read_table.raw` <- function(stream) { - stream <- close_on_exit(buffer_reader(stream)) - read_table(stream) -} - diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R index 515b6986b94..77305114d33 100644 --- a/r/R/RecordBatchWriter.R +++ b/r/R/RecordBatchWriter.R @@ -17,175 +17,174 @@ #' @include R6.R +#' @title class arrow::ipc::RecordBatchWriter +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' - `$write_batch(batch)`: Write record batch to stream +#' - `$write_table(table)`: write Table to stream +#' - `$close()`: close stream +#' +#' @section Derived classes: +#' +#' - [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] implements the streaming binary format +#' - [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] implements the binary file format +#' +#' @rdname arrow__ipc__RecordBatchWriter +#' @name arrow__ipc__RecordBatchWriter `arrow::ipc::RecordBatchWriter` <- R6Class("arrow::ipc::RecordBatchWriter", inherit = `arrow::Object`, public = list( - WriteRecordBatch = function(batch, allow_64bit) ipc___RecordBatchWriter__WriteRecordBatch(self, batch, allow_64bit), - WriteTable = function(table) ipc___RecordBatchWriter__WriteTable(self, table), - Close = function() ipc___RecordBatchWriter__Close(self) + write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), + write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table), + + write = function(x) { + if (inherits(x, "arrow::RecordBatch")) { + self$write_batch(x) + } else if(inherits(x, "arrow::Table")) { + self$write_table(x) + } else if (inherits(x, "data.frame")) { + self$write_table(table(x)) + } else { + abort("unexpected type for RecordBatchWriter$write(), must be an arrow::RecordBatch or an arrow::Table") + } + }, + + close = function() ipc___RecordBatchWriter__Close(self) ) ) -`arrow::ipc::RecordBatchStreamWriter` <- R6Class("arrow::ipc::RecordBatchStreamWriter", inherit = `arrow::ipc::RecordBatchWriter`) -`arrow::ipc::RecordBatchFileWriter` <- R6Class("arrow::ipc::RecordBatchFileWriter", inherit = `arrow::ipc::RecordBatchStreamWriter`) - -#' Create a record batch file writer from a stream +#' @title class arrow::ipc::RecordBatchStreamWriter #' -#' @param stream a stream -#' @param schema the schema of the batches +#' Writer for the Arrow streaming binary format #' -#' @return an `arrow::ipc::RecordBatchWriter` object +#' @usage NULL +#' @format NULL +#' @docType class #' -#' @export -record_batch_file_writer <- function(stream, schema) { - assert_that( - inherits(stream, "arrow::io::OutputStream"), - inherits(schema, "arrow::Schema") - ) - shared_ptr(`arrow::ipc::RecordBatchFileWriter`, ipc___RecordBatchFileWriter__Open(stream, schema)) -} - -#' Create a record batch stream writer +#' @section usage: #' -#' @param stream a stream -#' @param schema a schema +#' ``` +#' writer <- RecordBatchStreamWriter(sink, schema) #' -#' @export -record_batch_stream_writer <- function(stream, schema) { - assert_that( - inherits(stream, "arrow::io::OutputStream"), - inherits(schema, "arrow::Schema") - ) - shared_ptr(`arrow::ipc::RecordBatchStreamWriter`, ipc___RecordBatchStreamWriter__Open(stream, schema)) -} - -#-------- write_record_batch - -#' write a record batch +#' writer$write_batch(batch) +#' writer$write_table(table) +#' writer$close() +#' ``` #' -#' @param x a `arrow::RecordBatch` -#' @param stream where to stream the record batch -#' @param ... extra parameters +#' @section Factory: #' -#' @export -write_record_batch <- function(x, stream, ...){ - UseMethod("write_record_batch", stream) -} - -#' @export -`write_record_batch.arrow::io::OutputStream` <- function(x, stream, ...) { - stream_writer <- close_on_exit(record_batch_stream_writer(stream, x$schema())) - write_record_batch(x, stream_writer) -} - -#' @export -`write_record_batch.arrow::ipc::RecordBatchWriter` <- function(x, stream, allow_64bit = TRUE, ...){ - stream$WriteRecordBatch(x, allow_64bit) -} - -#' @export -`write_record_batch.character` <- function(x, stream, ...) { - assert_that(length(stream) == 1L) - write_record_batch(x, fs::path_abs(stream), ...) -} - -#' @export -`write_record_batch.fs_path` <- function(x, stream, ...) { - assert_that(length(stream) == 1L) - file_stream <- close_on_exit(file_output_stream(stream)) - file_writer <- close_on_exit(record_batch_file_writer(file_stream, x$schema())) - write_record_batch(x, file_writer, ...) -} - -#' @export -`write_record_batch.raw` <- function(x, stream, ...) { - # how many bytes do we need - mock <- mock_output_stream() - write_record_batch(x, mock) - n <- mock$GetExtentBytesWritten() - - bytes <- raw(n) - buffer <- buffer(bytes) - buffer_writer <- fixed_size_buffer_writer(buffer) - write_record_batch(x, buffer_writer) - - bytes -} - -#-------- stream Table - -#' write an arrow::Table +#' The [RecordBatchStreamWriter()] function creates a record batch stream writer. #' -#' @param x an `arrow::Table` -#' @param stream where to stream the record batch -#' @param ... extra parameters +#' @section Methods: +#' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter] #' -#' @export -write_table <- function(x, stream, ...) { - UseMethod("write_table", stream) -} +#' - `$write_batch(batch)`: Write record batch to stream +#' - `$write_table(table)`: write Table to stream +#' - `$close()`: close stream +#' +#' @rdname arrow__ipc__RecordBatchStreamWriter +#' @name arrow__ipc__RecordBatchStreamWriter +`arrow::ipc::RecordBatchStreamWriter` <- R6Class("arrow::ipc::RecordBatchStreamWriter", inherit = `arrow::ipc::RecordBatchWriter`) +#' Writer for the Arrow streaming binary format +#' +#' @param sink Where to write. Can either be: +#' +#' - A string, meant as a file path, passed to [fs::path_abs()] +#' - a [file path][fs::path_abs()] +#' - [arrow::io::OutputStream][arrow__io__OutputStream] +#' +#' @param schema The [arrow::Schema][arrow__Schema] for data to be written. +#' +#' @return a [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] +#' #' @export -`write_table.arrow::io::OutputStream` <- function(x, stream, ...) { - stream_writer <- close_on_exit(record_batch_stream_writer(stream, x$schema())) - write_table(x, stream_writer) +RecordBatchStreamWriter <- function(sink, schema) { + UseMethod("RecordBatchStreamWriter") } #' @export -`write_table.arrow::ipc::RecordBatchWriter` <- function(x, stream, ...){ - stream$WriteTable(x) +RecordBatchStreamWriter.character <- function(sink, schema){ + RecordBatchStreamWriter(fs::path_abs(sink), schema) } #' @export -`write_table.character` <- function(x, stream, ...) { - assert_that(length(stream) == 1L) - write_table(x, fs::path_abs(stream), ...) +RecordBatchStreamWriter.fs_path <- function(sink, schema){ + RecordBatchStreamWriter(FileOutputStream(sink), schema) } #' @export -`write_table.fs_path` <- function(x, stream, ...) { - assert_that(length(stream) == 1L) - file_stream <- close_on_exit(file_output_stream(stream)) - file_writer <- close_on_exit(record_batch_file_writer(file_stream, x$schema())) - write_table(x, file_writer, ...) +`RecordBatchStreamWriter.arrow::io::OutputStream` <- function(sink, schema){ + assert_that(inherits(schema, "arrow::Schema")) + shared_ptr(`arrow::ipc::RecordBatchStreamWriter`, ipc___RecordBatchStreamWriter__Open(sink, schema)) } -#' @export -`write_table.raw` <- function(x, stream, ...) { - # how many bytes do we need - mock <- mock_output_stream() - write_table(x, mock) - n <- mock$GetExtentBytesWritten() - - bytes <- raw(n) - buffer <- buffer(bytes) - buffer_writer <- fixed_size_buffer_writer(buffer) - write_table(x, buffer_writer) - - bytes -} +#' @title class arrow::ipc::RecordBatchFileWriter +#' +#' Writer for the Arrow binary file format +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section usage: +#' +#' ``` +#' writer <- RecordBatchFileWriter(sink, schema) +#' +#' writer$write_batch(batch) +#' writer$write_table(table) +#' writer$close() +#' ``` +#' +#' @section Factory: +#' +#' The [RecordBatchFileWriter()] function creates a record batch stream writer. +#' +#' @section Methods: +#' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter] +#' +#' - `$write_batch(batch)`: Write record batch to stream +#' - `$write_table(table)`: write Table to stream +#' - `$close()`: close stream +#' +#' @rdname arrow__ipc__RecordBatchFileWriter +#' @name arrow__ipc__RecordBatchFileWriter +`arrow::ipc::RecordBatchFileWriter` <- R6Class("arrow::ipc::RecordBatchFileWriter", inherit = `arrow::ipc::RecordBatchStreamWriter`) -#' Write an object to a stream +#' Create a record batch file writer from a stream #' -#' @param x An object to stream -#' @param stream A stream -#' @param ... additional parameters +#' @param sink Where to write. Can either be: +#' +#' - character vector of length one +#' - a [file path][fs::path_abs()] +#' - [arrow::io::OutputStream][arrow__io__OutputStream] +#' +#' @param schema The [arrow::Schema][arrow__Schema] for data to be written. +#' +#' @return an `arrow::ipc::RecordBatchWriter` object #' #' @export -write_arrow <- function(x, stream, ...){ - UseMethod("write_arrow") +RecordBatchFileWriter <- function(sink, schema) { + UseMethod("RecordBatchFileWriter") } #' @export -`write_arrow.arrow::RecordBatch` <- function(x, stream, ...) { - write_record_batch(x, stream, ...) +RecordBatchFileWriter.character <- function(sink, schema){ + RecordBatchFileWriter(fs::path_abs(sink), schema) } #' @export -`write_arrow.arrow::Table` <- function(x, stream, ...) { - write_table(x, stream, ...) +RecordBatchFileWriter.fs_path <- function(sink, schema){ + RecordBatchFileWriter(FileOutputStream(sink), schema) } #' @export -`write_arrow.data.frame` <- function(x, stream, ...) { - write_record_batch(record_batch(x), stream, ...) +`RecordBatchFileWriter.arrow::io::OutputStream` <- function(sink, schema){ + assert_that(inherits(schema, "arrow::Schema")) + shared_ptr(`arrow::ipc::RecordBatchFileWriter`, ipc___RecordBatchFileWriter__Open(sink, schema)) } diff --git a/r/R/Schema.R b/r/R/Schema.R index b158fee169d..08047a3b11f 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -17,6 +17,30 @@ #' @include R6.R +#' @title class arrow::Schema +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Usage: +#' +#' ``` +#' s <- schema(...) +#' +#' s$ToString() +#' s$num_fields() +#' s$field(i) +#' ``` +#' +#' @section Methods: +#' +#' - `$ToString()`: convert to a string +#' - `$num_fields()`: returns the number of fields +#' - `$field(i)`: returns the field at index `i` (0-based) +#' +#' @rdname arrow__Schema +#' @name arrow__Schema `arrow::Schema` <- R6Class("arrow::Schema", inherit = `arrow::Object`, public = list( @@ -29,11 +53,11 @@ ) ) -#' Schema functions +#' Schema factory #' #' @param ... named list of data types #' -#' @return a Schema +#' @return a [schema][arrow__Schema] #' #' @export schema <- function(...){ @@ -50,11 +74,6 @@ read_schema <- function(stream, ...) { UseMethod("read_schema") } -#' @export -read_schema.default <- function(stream, ...) { - stop("unsupported") -} - #' @export `read_schema.arrow::io::InputStream` <- function(stream, ...) { shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) @@ -62,10 +81,12 @@ read_schema.default <- function(stream, ...) { #' @export `read_schema.arrow::Buffer` <- function(stream, ...) { - read_schema(buffer_reader(stream), ...) + stream <- close_on_exit(BufferReader(stream)) + shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) } #' @export `read_schema.raw` <- function(stream, ...) { - read_schema(buffer(stream), ...) + stream <- close_on_exit(BufferReader(stream)) + shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) } diff --git a/r/R/Table.R b/r/R/Table.R index e7d4545c1f6..8972634d59f 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -16,12 +16,21 @@ # under the License. #' @include R6.R - +#' +#' @title class arrow::Table +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__Table +#' @name arrow__Table `arrow::Table` <- R6Class("arrow::Table", inherit = `arrow::Object`, public = list( - num_columns = function() Table__num_columns(self), - num_rows = function() Table__num_rows(self), - schema = function() shared_ptr(`arrow::Schema`, Table__schema(self)), column = function(i) shared_ptr(`arrow::Column`, Table__column(self, i)), serialize = function(output_stream, ...) write_table(self, output_stream, ...), @@ -29,9 +38,16 @@ cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { assert_that(inherits(target_schema, "arrow::Schema")) assert_that(inherits(options, "arrow::compute::CastOptions")) - assert_that(identical(self$schema()$names, target_schema$names), msg = "incompatible schemas") + assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") shared_ptr(`arrow::Table`, Table__cast(self, target_schema, options)) } + ), + + active = list( + num_columns = function() Table__num_columns(self), + num_rows = function() Table__num_rows(self), + schema = function() shared_ptr(`arrow::Schema`, Table__schema(self)), + columns = function() map(Table__columns(self), shared_ptr, class = `arrow::Column`) ) ) @@ -48,14 +64,3 @@ table <- function(.data){ `as_tibble.arrow::Table` <- function(x, ...){ Table__to_dataframe(x) } - -#' Read an tibble from an arrow::Table on disk -#' -#' @param stream input stream -#' -#' @return a [tibble::tibble] -#' -#' @export -read_arrow <- function(stream){ - as_tibble(read_table(stream)) -} diff --git a/r/R/array.R b/r/R/array.R index 2d434f9a221..63fdb4e0f61 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -17,18 +17,65 @@ #' @include R6.R +#' @title class arrow::Array +#' +#' Array base type. Immutable data array with some logical type and some length. +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Usage: +#' +#' ``` +#' a <- array(...) +#' +#' a$IsNull(i) +#' a$IsValid(i) +#' a$length() or length(a) +#' a$offset() +#' a$null_count() +#' a$type() +#' a$type_id() +#' a$Equals(b) +#' a$ApproxEquals(b) +#' a$as_vector() +#' a$ToString() +#' a$Slice(offset, length = NULL) +#' a$RangeEquals(other, start_idx, end_idx, other_start_idx) +#' +#' print(a) +#' a == a +#' ``` +#' +#' @section Methods: +#' +#' - `$IsNull(i)`: Return true if value at index is null. Does not boundscheck +#' - `$IsValid(i)`: Return true if value at index is valid. Does not boundscheck +#' - `$length()`: Size in the number of elements this array contains +#' - `$offset()`: A relative position into another array's data, to enable zero-copy slicing +#' - `$null_count()`: The number of null entries in the array +#' - `$type()`: logical type of data +#' - `$type_id()`: type id +#' - `$Equals(other)` : is this array equal to `other` +#' - `$ApproxEquals(other)` : +#' - `$data()`: return the underlying [arrow::ArrayData][arrow__ArrayData] +#' - `$as_vector()`: convert to an R vector +#' - `$ToString()`: string representation of the array +#' - `$Slice(offset, length = NULL)` : Construct a zero-copy slice of the array with the indicated offset and length. If length is `NULL`, the slice goes until the end of the array. +#' - `$RangeEquals(other, start_idx, end_idx, other_start_idx)` : +#' +#' @rdname arrow__Array +#' @name arrow__Array `arrow::Array` <- R6Class("arrow::Array", inherit = `arrow::Object`, public = list( IsNull = function(i) Array__IsNull(self, i), IsValid = function(i) Array__IsValid(self, i), length = function() Array__length(self), - offset = function() Array__offset(self), - null_count = function() Array__null_count(self), - type = function() `arrow::DataType`$dispatch(Array__type(self)), type_id = function() Array__type_id(self), Equals = function(other) Array__Equals(self, other), - ApproxEquals = function(othet) Array__ApproxEquals(self, other), + ApproxEquals = function(other) Array__ApproxEquals(self, other), data = function() shared_ptr(`arrow::ArrayData`, Array__data(self)), as_vector = function() Array__as_vector(self), ToString = function() Array__ToString(self), @@ -48,6 +95,11 @@ assert_that(inherits(options, "arrow::compute::CastOptions")) `arrow::Array`$dispatch(Array__cast(self, target_type, options)) } + ), + active = list( + null_count = function() Array__null_count(self), + offset = function() Array__offset(self), + type = function() `arrow::DataType`$dispatch(Array__type(self)) ) ) @@ -65,7 +117,7 @@ #' @export `==.arrow::Array` <- function(x, y) x$Equals(y) -#' create an arrow::Array from an R vector +#' create an [arrow::Array][arrow__Array] from an R vector #' #' @param \dots Vectors to coerce #' @param type currently ignored diff --git a/r/R/buffer.R b/r/R/buffer.R index 9684a972913..2fecd0e4fc6 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -18,21 +18,38 @@ #' @include R6.R #' @include enums.R +#' @title class arrow::Buffer +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' - `$is_mutable()` : +#' - `$ZeroPadding()` : +#' - `$size()` : +#' - `$capacity()`: +#' +#' @rdname arrow__Buffer +#' @name arrow__Buffer `arrow::Buffer` <- R6Class("arrow::Buffer", inherit = `arrow::Object`, public = list( + ZeroPadding = function() Buffer__ZeroPadding(self) + ), + + active = list( is_mutable = function() Buffer__is_mutable(self), - ZeroPadding = function() Buffer__ZeroPadding(self), size = function() Buffer__size(self), capacity = function() Buffer__capacity(self) ) ) -`arrow::MutableBuffer` <- R6Class("arrow::Buffer", inherit = `arrow::Buffer`) - -#' Create a buffer from an R object +#' Create a [arrow::Buffer][arrow__Buffer] from an R object #' -#' @param x R object -#' @return an instance of `arrow::Buffer` that borrows memory from `x` +#' @param x R object. Only raw, numeric and integer vectors are currently supported +#' +#' @return an instance of [arrow::Buffer][arrow__Buffer] that borrows memory from `x` #' #' @export buffer <- function(x){ @@ -44,7 +61,6 @@ buffer.default <- function(x) { stop("cannot convert to Buffer") } - #' @export buffer.raw <- function(x) { shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x)) diff --git a/r/R/dictionary.R b/r/R/dictionary.R index d8a71d92a9f..3c3758df303 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -17,15 +17,27 @@ #' @include R6.R +#' @title class arrow::DictionaryType +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__DictionaryType +#' @name arrow__DictionaryType `arrow::DictionaryType` <- R6Class("arrow::DictionaryType", inherit = `arrow::FixedWidthType`, - public = list( + + active = list( index_type = function() `arrow::DataType`$dispatch(DictionaryType__index_type(self)), - name = function() DictionaryType__name(self), dictionary = function() shared_ptr(`arrow::Array`, DictionaryType__dictionary(self)), + name = function() DictionaryType__name(self), ordered = function() DictionaryType__ordered(self) ) - ) #' dictionary type factory @@ -34,6 +46,8 @@ #' @param values values array, typically an arrow array of strings #' @param ordered Is this an ordered dictionary #' +#' @return a [arrow::DictionaryType][arrow__DictionaryType] +#' #' @export dictionary <- function(type, values, ordered = FALSE) { assert_that( diff --git a/r/R/feather.R b/r/R/feather.R index c36c571bd4b..bae71d31bc1 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -100,7 +100,7 @@ write_feather_RecordBatch <- function(data, stream) { #' @export #' @method write_feather_RecordBatch fs_path `write_feather_RecordBatch.fs_path` <- function(data, stream) { - file_stream <- close_on_exit(file_output_stream(stream)) + file_stream <- close_on_exit(FileOutputStream(stream)) `write_feather_RecordBatch.arrow::io::OutputStream`(data, file_stream) } @@ -133,7 +133,7 @@ feather_table_reader.character <- function(file, mmap = TRUE, ...) { #' @export feather_table_reader.fs_path <- function(file, mmap = TRUE, ...) { - stream <- if(isTRUE(mmap)) mmap_open(file, ...) else file_open(file, ...) + stream <- if(isTRUE(mmap)) mmap_open(file, ...) else ReadableFile(file, ...) feather_table_reader(stream) } diff --git a/r/R/io.R b/r/R/io.R index d4534927412..b772be30acf 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -19,45 +19,151 @@ #' @include enums.R #' @include buffer.R -`arrow::io::Readable` <- R6Class("arrow::io::Readable", inherit = `arrow::Object`, - public = list( - Read = function(nbytes) shared_ptr(`arrow::Buffer`, io___Readable__Read(self, nbytes)) - ) -) - -`arrow::io::InputStream` <- R6Class("arrow::io::InputStream", inherit = `arrow::io::Readable`, - public = list( - Close = function() io___InputStream__Close(self) - ) -) +# OutputStream ------------------------------------------------------------ `arrow::io::Writable` <- R6Class("arrow::io::Writable", inherit = `arrow::Object`) +#' @title OutputStream +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' - `arrow::Buffer` `Read`(`int` nbytes): Read `nbytes` bytes +#' - `void` `close`(): close the stream +#' +#' @rdname arrow__io__OutputStream +#' @name arrow__io__OutputStream `arrow::io::OutputStream` <- R6Class("arrow::io::OutputStream", inherit = `arrow::io::Writable`, public = list( - Close = function() io___OutputStream__Close(self) + close = function() io___OutputStream__Close(self) ) ) +#' @title class arrow::io::FileOutputStream +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__FileOutputStream +#' @name arrow__io__FileOutputStream `arrow::io::FileOutputStream` <- R6Class("arrow::io::FileOutputStream", inherit = `arrow::io::OutputStream`) +#' @title class arrow::io::MockOutputStream +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__MockOutputStream +#' @name arrow__io__MockOutputStream `arrow::io::MockOutputStream` <- R6Class("arrow::io::MockOutputStream", inherit = `arrow::io::OutputStream`, public = list( GetExtentBytesWritten = function() io___MockOutputStream__GetExtentBytesWritten(self) ) ) +#' @title class arrow::io::BufferOutputStream +#' +#' @usage NULL +#' @docType class +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__BufferOutputStream +#' @name arrow__io__BufferOutputStream `arrow::io::BufferOutputStream` <- R6Class("arrow::io::BufferOutputStream", inherit = `arrow::io::OutputStream`, public = list( capacity = function() io___BufferOutputStream__capacity(self), - Finish = function() shared_ptr(`arrow::Buffer`, io___BufferOutputStream__Finish(self)), + getvalue = function() shared_ptr(`arrow::Buffer`, io___BufferOutputStream__Finish(self)), + Write = function(bytes) io___BufferOutputStream__Write(self, bytes), Tell = function() io___BufferOutputStream__Tell(self) ) ) +#' @title class arrow::io::FixedSizeBufferWriter +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__FixedSizeBufferWriter +#' @name arrow__io__FixedSizeBufferWriter `arrow::io::FixedSizeBufferWriter` <- R6Class("arrow::io::FixedSizeBufferWriter", inherit = `arrow::io::OutputStream`) + +# InputStream ------------------------------------------------------------- + +#' @title class arrow::io::Readable +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__Readable +#' @name arrow__io__Readable +`arrow::io::Readable` <- R6Class("arrow::io::Readable", inherit = `arrow::Object`, + public = list( + Read = function(nbytes) shared_ptr(`arrow::Buffer`, io___Readable__Read(self, nbytes)) + ) +) + +#' @title class arrow::io::InputStream +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__InputStream +#' @name arrow__io__InputStream +`arrow::io::InputStream` <- R6Class("arrow::io::InputStream", inherit = `arrow::io::Readable`, + public = list( + close = function() io___InputStream__Close(self) + ) +) + +#' @title class arrow::io::RandomAccessFile +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__RandomAccessFile +#' @name arrow__io__RandomAccessFile `arrow::io::RandomAccessFile` <- R6Class("arrow::io::RandomAccessFile", inherit = `arrow::io::InputStream`, public = list( GetSize = function() io___RandomAccessFile__GetSize(self), @@ -67,94 +173,159 @@ ) ) +#' @title class arrow::io::MemoryMappedFile +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' +#' @section Methods: +#' +#' TODO +#' +#' @seealso [mmap_open()], [mmap_create()] +#' +#' +#' @rdname arrow__io__MemoryMappedFile +#' @name arrow__io__MemoryMappedFile `arrow::io::MemoryMappedFile` <- R6Class("arrow::io::MemoryMappedFile", inherit = `arrow::io::RandomAccessFile`, public = list( Resize = function(size) io___MemoryMappedFile__Resize(self, size) ) ) +#' @title class arrow::io::ReadableFile +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__ReadableFile +#' @name arrow__io__ReadableFile `arrow::io::ReadableFile` <- R6Class("arrow::io::ReadableFile", inherit = `arrow::io::RandomAccessFile`) -`arrow::io::BufferReader` <- R6Class("arrow::io::BufferReader", inherit = `arrow::io::RandomAccessFile`) +#' @title class arrow::io::BufferReader +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__io__BufferReader +#' @name arrow__io__BufferReader +`arrow::io::BufferReader` <- R6Class("arrow::io::BufferReader", inherit = `arrow::io::RandomAccessFile`) #' Create a new read/write memory mapped file of a given size #' #' @param path file path #' @param size size in bytes -#' @param mode file mode (read/write/readwrite) -#' @param buffer an `arrow::Buffer`, typically created by [buffer()] -#' @param initial_capacity initial capacity for the buffer output stream #' -#' @rdname io +#' @return a [arrow::io::MemoryMappedFile][arrow__io__MemoryMappedFile] +#' #' @export -mmap_create <- `arrow::io::MemoryMappedFile`$create <- function(path, size) { +mmap_create <- function(path, size) { shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Create(fs::path_abs(path), size)) } -#' @rdname io +#' Open a memory mapped file +#' +#' @param path file path +#' @param mode file mode (read/write/readwrite) +#' #' @export -mmap_open <- `arrow::io::MemoryMappedFile`$open <- function(path, mode = c("read", "write", "readwrite")) { +mmap_open <- function(path, mode = c("read", "write", "readwrite")) { mode <- match(match.arg(mode), c("read", "write", "readwrite")) - 1L shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Open(fs::path_abs(path), mode)) } -#' @rdname io +#' open a [arrow::io::ReadableFile][arrow__io__ReadableFile] +#' +#' @param path file path +#' +#' @return a [arrow::io::ReadableFile][arrow__io__ReadableFile] +#' #' @export -file_open <- `arrow::io::ReadableFile`$open <- function(path) { +ReadableFile <- function(path) { shared_ptr(`arrow::io::ReadableFile`, io___ReadableFile__Open(fs::path_abs(path))) } -#' @rdname io +#' Open a [arrow::io::FileOutputStream][arrow__io__FileOutputStream] +#' +#' @param path file path +#' +#' @return a [arrow::io::FileOutputStream][arrow__io__FileOutputStream] +#' #' @export -file_output_stream <- function(path) { +FileOutputStream <- function(path) { shared_ptr(`arrow::io::FileOutputStream`, io___FileOutputStream__Open(path)) } -#' @rdname io +#' Open a [arrow::io::MockOutputStream][arrow__io__MockOutputStream] +#' +#' @return a [arrow::io::MockOutputStream][arrow__io__MockOutputStream] +#' #' @export -mock_output_stream <- function() { +MockOutputStream <- function() { shared_ptr(`arrow::io::MockOutputStream`, io___MockOutputStream__initialize()) } -#' @rdname io +#' Open a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] +#' +#' @param initial_capacity initial capacity +#' +#' @return a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] +#' #' @export -buffer_output_stream <- function(initial_capacity = 0L) { +BufferOutputStream <- function(initial_capacity = 0L) { shared_ptr(`arrow::io::BufferOutputStream`, io___BufferOutputStream__Create(initial_capacity)) } -#' @rdname io +#' Open a [arrow::io::FixedSizeBufferWriter][arrow__io__FixedSizeBufferWriter] +#' +#' @param buffer [arrow::Buffer][arrow__Buffer] or something [buffer()] can handle +#' +#' @return a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] +#' #' @export -fixed_size_buffer_writer <- function(buffer){ - UseMethod("fixed_size_buffer_writer") +FixedSizeBufferWriter <- function(buffer){ + UseMethod("FixedSizeBufferWriter") } #' @export -fixed_size_buffer_writer.default <- function(buffer){ - fixed_size_buffer_writer(buffer(buffer)) +FixedSizeBufferWriter.default <- function(buffer){ + FixedSizeBufferWriter(buffer(buffer)) } #' @export -`fixed_size_buffer_writer.arrow::Buffer` <- function(buffer){ - assert_that(buffer$is_mutable()) +`FixedSizeBufferWriter.arrow::Buffer` <- function(buffer){ + assert_that(buffer$is_mutable) shared_ptr(`arrow::io::FixedSizeBufferWriter`, io___FixedSizeBufferWriter__initialize(buffer)) } -#' Create a `arrow::BufferReader` +#' Create a [arrow::io::BufferReader][arrow__io__BufferReader] #' #' @param x R object to treat as a buffer or a buffer created by [buffer()] #' #' @export -buffer_reader <- function(x) { - UseMethod("buffer_reader") +BufferReader <- function(x) { + UseMethod("BufferReader") } #' @export -`buffer_reader.arrow::Buffer` <- function(x) { - shared_ptr(`arrow::io::BufferReader`, io___BufferReader__initialize(x)) +BufferReader.default <- function(x) { + BufferReader(buffer(x)) } #' @export -buffer_reader.default <- function(x) { - buffer_reader(buffer(x)) +`BufferReader.arrow::Buffer` <- function(x) { + shared_ptr(`arrow::io::BufferReader`, io___BufferReader__initialize(x)) } - diff --git a/r/R/memory_pool.R b/r/R/memory_pool.R index 49f65d2a1f3..88c2c7bc198 100644 --- a/r/R/memory_pool.R +++ b/r/R/memory_pool.R @@ -16,7 +16,19 @@ # under the License. #' @include R6.R - +#' +#' @title class arrow::MemoryPool +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow___MemoryPool +#' @name arrow__MemoryPool `arrow::MemoryPool` <- R6Class("arrow::MemoryPool", inherit = `arrow::Object`, public = list( @@ -28,6 +40,10 @@ ) ) +#' default [arrow::MemoryPool][arrow__MemoryPool] +#' +#' @return the default [arrow::MemoryPool][arrow__MemoryPool] +#' @export default_memory_pool <- function() { shared_ptr(`arrow::MemoryPool`, MemoryPool__default()) } diff --git a/r/R/message.R b/r/R/message.R index f31fb9a53b7..93c90c09763 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -17,6 +17,18 @@ #' @include R6.R +#' @title class arrow::ipc::Message +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__ipc__Message +#' @name arrow__ipc__Message `arrow::ipc::Message` <- R6Class("arrow::ipc::Message", inherit = `arrow::Object`, public = list( Equals = function(other){ @@ -24,10 +36,10 @@ ipc___Message__Equals(self, other) }, body_length = function() ipc___Message__body_length(self), - Verify = function() ipc___Message__Verify(self), - type = function() ipc___Message__type(self) + Verify = function() ipc___Message__Verify(self) ), active = list( + type = function() ipc___Message__type(self), metadata = function() shared_ptr(`arrow::Buffer`, ipc___Message__metadata(self)), body = function() shared_ptr(`arrow::Buffer`, ipc___Message__body(self)) ) @@ -36,51 +48,58 @@ #' @export `==.arrow::ipc::Message` <- function(x, y) x$Equals(y) +#' @title class arrow::ipc::MessageReader +#' +#' @usage NULL +#' @format NULL +#' @docType class +#' +#' @section Methods: +#' +#' TODO +#' +#' @rdname arrow__ipc__MessageReader +#' @name arrow__ipc__MessageReader `arrow::ipc::MessageReader` <- R6Class("arrow::ipc::MessageReader", inherit = `arrow::Object`, public = list( ReadNextMessage = function() unique_ptr(`arrow::ipc::Message`, ipc___MessageReader__ReadNextMessage(self)) ) ) -#' Read a Message from a stream +#' Open a MessageReader that reads from a stream #' #' @param stream an InputStream #' #' @export -read_message <- function(stream) { - UseMethod("read_message") +MessageReader <- function(stream) { + UseMethod("MessageReader") } #' @export -read_message.default <- function(stream) { - stop("unsupported") +MessageReader.default <- function(stream) { + MessageReader(BufferReader(stream)) } #' @export -`read_message.arrow::io::InputStream` <- function(stream) { - unique_ptr(`arrow::ipc::Message`, ipc___ReadMessage(stream) ) +`MessageReader.arrow::io::InputStream` <- function(stream) { + unique_ptr(`arrow::ipc::MessageReader`, ipc___MessageReader__Open(stream)) } -#' Open a MessageReader that reads from a stream +#' Read a Message from a stream #' #' @param stream an InputStream #' #' @export -message_reader <- function(stream) { - UseMethod("message_reader") -} - -#' @export -message_reader.default <- function(stream) { - stop("unsupported") +read_message <- function(stream) { + UseMethod("read_message") } #' @export -message_reader.raw <- function(stream) { - message_reader(buffer_reader(stream)) +`read_message.arrow::io::InputStream` <- function(stream) { + unique_ptr(`arrow::ipc::Message`, ipc___ReadMessage(stream) ) } #' @export -`message_reader.arrow::io::InputStream` <- function(stream) { - unique_ptr(`arrow::ipc::MessageReader`, ipc___MessageReader__Open(stream)) +`read_message.arrow::ipc::MessageReader` <- function(stream) { + stream$ReadNextMessage() } diff --git a/r/R/on_exit.R b/r/R/on_exit.R index 9387169b8be..52b017404de 100644 --- a/r/R/on_exit.R +++ b/r/R/on_exit.R @@ -17,7 +17,7 @@ #' @importFrom withr defer_parent close_on_exit <- function(x, ...){ - defer_parent(x$Close(), ...) + defer_parent(x$close(), ...) x } diff --git a/r/R/read_record_batch.R b/r/R/read_record_batch.R new file mode 100644 index 00000000000..967ac5b7650 --- /dev/null +++ b/r/R/read_record_batch.R @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' read [arrow::RecordBatch][arrow__RecordBatch] as encapsulated IPC message, given a known [arrow::Schema][arrow__Schema] +#' +#' @param obj a [arrow::ipc::Message][arrow__ipc__Message], a [arrow::io::InputStream][arrow__io__InputStream], a [arrow::Buffer][arrow__Buffer], or a raw vector +#' @param schema a [arrow::Schema][arrow__Schema] +#' +#' @return a [arrow::RecordBatch][arrow__RecordBatch] +#' +#' @export +read_record_batch <- function(obj, schema){ + UseMethod("read_record_batch") +} + +#' @export +`read_record_batch.arrow::ipc::Message` <- function(obj, schema) { + assert_that(inherits(schema, "arrow::Schema")) + shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__Message__Schema(obj, schema)) +} + +#' @export +`read_record_batch.arrow::io::InputStream` <- function(obj, schema) { + assert_that(inherits(schema, "arrow::Schema")) + shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__InputStream__Schema(obj, schema)) +} + +#' @export +read_record_batch.raw <- function(obj, schema){ + stream <- close_on_exit(BufferReader(obj)) + read_record_batch(stream, schema) +} + +#' @export +`read_record_batch.arrow::Buffer` <- function(obj, schema){ + stream <- close_on_exit(BufferReader(obj)) + read_record_batch(stream, schema) +} diff --git a/r/R/read_table.R b/r/R/read_table.R new file mode 100644 index 00000000000..a540a421735 --- /dev/null +++ b/r/R/read_table.R @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' Read an [arrow::Table][arrow__Table] from a stream +#' +#' @param stream stream. +#' +#' - a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader]: +#' read an [arrow::Table][arrow__Table] +#' from all the record batches in the reader +#' +#' - a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader]: +#' read an [arrow::Table][arrow__Table] from the remaining record batches +#' in the reader +#' +#' - a string or [file path][fs::path_abs()]: interpret the file as an arrow +#' binary file format, and uses a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] +#' to process it. +#' +#' - a raw vector: read using a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] +#' +#' @return +#' +#' - `read_table` returns an [arrow::Table][arrow__Table] +#' - `read_arrow` returns a [tibble::tibble()] +#' +#' @details +#' +#' The methods using [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] and +#' [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] offer the most +#' flexibility. The other methods are for convenience. +#' +#' @export +read_table <- function(stream){ + UseMethod("read_table") +} + +#' @export +`read_table.arrow::ipc::RecordBatchFileReader` <- function(stream) { + shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(stream)) +} + +#' @export +`read_table.arrow::ipc::RecordBatchStreamReader` <- function(stream) { + shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(stream)) +} + +#' @export +read_table.character <- function(stream){ + assert_that(length(stream) == 1L) + read_table(fs::path_abs(stream)) +} + +#' @export +read_table.fs_path <- function(stream) { + stream <- close_on_exit(ReadableFile(stream)) + batch_reader <- close_on_exit(RecordBatchFileReader(stream)) + shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(batch_reader)) +} + +#' @export +`read_table.raw` <- function(stream) { + stream <- close_on_exit(BufferReader(stream)) + batch_reader <- close_on_exit(RecordBatchStreamReader(stream)) + shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(batch_reader)) +} + +#' @rdname read_table +#' @export +read_arrow <- function(stream){ + as_tibble(read_table(stream)) +} diff --git a/r/R/write_arrow.R b/r/R/write_arrow.R new file mode 100644 index 00000000000..5fc684771e5 --- /dev/null +++ b/r/R/write_arrow.R @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +to_arrow <- function(x) { + UseMethod("to_arrow") +} + +`to_arrow.arrow::RecordBatch` <- function(x) x +`to_arrow.arrow::Table` <- function(x) x +`to_arrow.data.frame` <- function(x) table(x) + +#' serialize an [arrow::Table][arrow__Table], an [arrow::RecordBatch][arrow__RecordBatch], or a +#' data frame to either the streaming format or the binary file format +#' +#' @param x an [arrow::Table][arrow__Table], an [arrow::RecordBatch][arrow__RecordBatch] or a data.frame +#' +#' @param stream where to serialize to +#' +#' - A [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter]: the `$write()` +#' of `x` is used. The stream is left open. This uses the streaming format +#' or the binary file format depending on the type of the writer. +#' +#' - A string or [file path][fs::path_abs()]: `x` is serialized with +#' a [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter], i.e. +#' using the binary file format. +#' +#' - A raw vector: typically of length zero (its data is ignored, and only used for +#' dispatch). `x` is serialized using the streaming format, i.e. using the +#' [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] +#' +#' @param ... extra parameters, currently ignored +#' +#' `write_arrow` is a convenience function, the classes [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] +#' and [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] can be used for more flexibility. +#' +#' @export +write_arrow <- function(x, stream, ...) { + UseMethod("write_arrow", stream) +} + +#' @export +`write_arrow.arrow::ipc::RecordBatchWriter` <- function(x, stream, ...){ + stream$write(x) +} + +#' @export +`write_arrow.character` <- function(x, stream, ...) { + write_arrow(x, fs::path_abs(stream), ...) +} + +#' @export +`write_arrow.fs_path` <- function(x, stream, ...) { + assert_that(length(stream) == 1L) + x <- to_arrow(x) + file_stream <- close_on_exit(FileOutputStream(stream)) + file_writer <- close_on_exit(RecordBatchFileWriter(file_stream, x$schema)) + write_arrow(x, file_writer, ...) +} + +#' @export +`write_arrow.raw` <- function(x, stream, ...) { + x <- to_arrow(x) + schema <- x$schema + + # how many bytes do we need + mock_stream <- MockOutputStream() + writer <- RecordBatchStreamWriter(mock_stream, schema) + writer$write(x) + writer$close() + n <- mock_stream$GetExtentBytesWritten() + + # now that we know the size, stream in a buffer backed by an R raw vector + bytes <- raw(n) + buffer_writer <- FixedSizeBufferWriter(buffer(bytes)) + writer <- RecordBatchStreamWriter(buffer_writer, schema) + writer$write(x) + writer$close() + + bytes +} diff --git a/r/README.Rmd b/r/README.Rmd index 204a9f9d566..2c51d01c0f0 100644 --- a/r/README.Rmd +++ b/r/README.Rmd @@ -46,9 +46,9 @@ tf <- tempfile() # write arrow::Table to file (tib <- tibble(x = 1:10, y = rnorm(10))) -arrow::write_arrow(tib, tf) +# arrow::write_arrow(tib, tf) -# read it back with pyarrow -pa <- import("pyarrow") -as_tibble(pa$open_file(tf)$read_pandas()) +# # read it back with pyarrow +# pa <- import("pyarrow") +# as_tibble(pa$open_file(tf)$read_pandas()) ``` diff --git a/r/configure b/r/configure index 69f04632a2f..28f6a73ac7e 100755 --- a/r/configure +++ b/r/configure @@ -91,7 +91,7 @@ if [ $? -ne 0 ]; then fi # Write to Makevars -sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars +sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" -e "s|@visibility@|$C_VISIBILITY|" src/Makevars.in > src/Makevars # Success exit 0 diff --git a/r/data-raw/test.R b/r/data-raw/test.R deleted file mode 100644 index 516af58616e..00000000000 --- a/r/data-raw/test.R +++ /dev/null @@ -1,85 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -library(tidyverse) -library(arrow) - -# meta data -(t1 <- int32()) -(t2 <- utf8()) -(t5 <- timestamp(unit = TimeUnit$MILLI)) - -# lists -list_of(t1) - -# shema -schema(x = int32(), y = float64()) - -# :scream_cat: -# -# pa.schema( -# [ -# pa.field('x', pa.int32()), -# pa.field('y', pa.float64()) -# ] -# ) -# - -schema(x = int32(), y = list_of(float64())) - -#------- arrays - -# arr = pa.array([1, 2, 3]) -arr <- array(1:3, 5:80) -arr -arr$as_vector() - -#------- read_arrow / stream -tbl <- tibble(x=1:10, y=rnorm(10)) -write_arrow(tbl, "/tmp/test.arrow") -readr::write_rds(tbl, "/tmp/test.rds") -fs::file_info(c("/tmp/test.arrow", "/tmp/test.rds")) - -(data <- read_arrow("/tmp/test.arrow")) - -# tibble <-> arrow::RecordBatch -(batch <- record_batch(tbl)) -batch$num_columns() -batch$num_rows() -write_arrow(batch, "/tmp/test") -readBin("/tmp/test", what = raw(), n = 1000) -batch$schema() -all.equal(tbl, data) - -batch <- read_record_batch("/tmp/test") -batch$schema() -batch$column(0) -batch$column(0)$as_vector() - -as_tibble(batch) - -# tibble <-> arrow::Table -tab <- arrow::table(tbl) -tab -tab$schema() -tab$num_columns() -tab$num_rows() - -# read_arrow, stream -tbl <- tibble(x = rnorm(20), y = seq_len(20)) -write_arrow(tbl, tf) - diff --git a/r/man/BufferOutputStream.Rd b/r/man/BufferOutputStream.Rd new file mode 100644 index 00000000000..1776f995930 --- /dev/null +++ b/r/man/BufferOutputStream.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{BufferOutputStream} +\alias{BufferOutputStream} +\title{Open a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream}} +\usage{ +BufferOutputStream(initial_capacity = 0L) +} +\arguments{ +\item{initial_capacity}{initial capacity} +} +\value{ +a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} +} +\description{ +Open a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} +} diff --git a/r/man/buffer_reader.Rd b/r/man/BufferReader.Rd similarity index 52% rename from r/man/buffer_reader.Rd rename to r/man/BufferReader.Rd index 3b814fb00b1..ea5dd790cdd 100644 --- a/r/man/buffer_reader.Rd +++ b/r/man/BufferReader.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R -\name{buffer_reader} -\alias{buffer_reader} -\title{Create a \code{arrow::BufferReader}} +\name{BufferReader} +\alias{BufferReader} +\title{Create a \link[=arrow__io__BufferReader]{arrow::io::BufferReader}} \usage{ -buffer_reader(x) +BufferReader(x) } \arguments{ \item{x}{R object to treat as a buffer or a buffer created by \code{\link[=buffer]{buffer()}}} } \description{ -Create a \code{arrow::BufferReader} +Create a \link[=arrow__io__BufferReader]{arrow::io::BufferReader} } diff --git a/r/man/FileOutputStream.Rd b/r/man/FileOutputStream.Rd new file mode 100644 index 00000000000..4155d349d1a --- /dev/null +++ b/r/man/FileOutputStream.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{FileOutputStream} +\alias{FileOutputStream} +\title{Open a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream}} +\usage{ +FileOutputStream(path) +} +\arguments{ +\item{path}{file path} +} +\value{ +a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream} +} +\description{ +Open a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream} +} diff --git a/r/man/FixedSizeBufferWriter.Rd b/r/man/FixedSizeBufferWriter.Rd new file mode 100644 index 00000000000..553d61b76e1 --- /dev/null +++ b/r/man/FixedSizeBufferWriter.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{FixedSizeBufferWriter} +\alias{FixedSizeBufferWriter} +\title{Open a \link[=arrow__io__FixedSizeBufferWriter]{arrow::io::FixedSizeBufferWriter}} +\usage{ +FixedSizeBufferWriter(buffer) +} +\arguments{ +\item{buffer}{\link[=arrow__Buffer]{arrow::Buffer} or something \code{\link[=buffer]{buffer()}} can handle} +} +\value{ +a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} +} +\description{ +Open a \link[=arrow__io__FixedSizeBufferWriter]{arrow::io::FixedSizeBufferWriter} +} diff --git a/r/man/message_reader.Rd b/r/man/MessageReader.Rd similarity index 79% rename from r/man/message_reader.Rd rename to r/man/MessageReader.Rd index 0d8b1e7ff63..01589f5d078 100644 --- a/r/man/message_reader.Rd +++ b/r/man/MessageReader.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/message.R -\name{message_reader} -\alias{message_reader} +\name{MessageReader} +\alias{MessageReader} \title{Open a MessageReader that reads from a stream} \usage{ -message_reader(stream) +MessageReader(stream) } \arguments{ \item{stream}{an InputStream} diff --git a/r/man/MockOutputStream.Rd b/r/man/MockOutputStream.Rd new file mode 100644 index 00000000000..2e3c0b6d3e3 --- /dev/null +++ b/r/man/MockOutputStream.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{MockOutputStream} +\alias{MockOutputStream} +\title{Open a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream}} +\usage{ +MockOutputStream() +} +\value{ +a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream} +} +\description{ +Open a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream} +} diff --git a/r/man/ReadableFile.Rd b/r/man/ReadableFile.Rd new file mode 100644 index 00000000000..11535321bfb --- /dev/null +++ b/r/man/ReadableFile.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{ReadableFile} +\alias{ReadableFile} +\title{open a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile}} +\usage{ +ReadableFile(path) +} +\arguments{ +\item{path}{file path} +} +\value{ +a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile} +} +\description{ +open a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile} +} diff --git a/r/man/RecordBatchFileReader.Rd b/r/man/RecordBatchFileReader.Rd new file mode 100644 index 00000000000..3ea04817e0e --- /dev/null +++ b/r/man/RecordBatchFileReader.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchReader.R +\name{RecordBatchFileReader} +\alias{RecordBatchFileReader} +\title{Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} from a file} +\usage{ +RecordBatchFileReader(file) +} +\arguments{ +\item{file}{The file to read from. A file path, or an \link[=arrow__ipc__RecordBatchFileReader]{arrow::io::RandomAccessFile}} +} +\description{ +Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} from a file +} diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd new file mode 100644 index 00000000000..90858304b0b --- /dev/null +++ b/r/man/RecordBatchFileWriter.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchWriter.R +\name{RecordBatchFileWriter} +\alias{RecordBatchFileWriter} +\title{Create a record batch file writer from a stream} +\usage{ +RecordBatchFileWriter(sink, schema) +} +\arguments{ +\item{sink}{Where to write. Can either be: +\itemize{ +\item character vector of length one +\item a \link[fs:path_abs]{file path} +\item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} +}} + +\item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.} +} +\value{ +an \code{arrow::ipc::RecordBatchWriter} object +} +\description{ +Create a record batch file writer from a stream +} diff --git a/r/man/RecordBatchStreamReader.Rd b/r/man/RecordBatchStreamReader.Rd new file mode 100644 index 00000000000..4bd0e8ccdc5 --- /dev/null +++ b/r/man/RecordBatchStreamReader.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchReader.R +\name{RecordBatchStreamReader} +\alias{RecordBatchStreamReader} +\title{Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} from an input stream} +\usage{ +RecordBatchStreamReader(stream) +} +\arguments{ +\item{stream}{input stream, an \link[=arrow__io__InputStream]{arrow::io::InputStream} or a raw vector} +} +\description{ +Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} from an input stream +} diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd new file mode 100644 index 00000000000..b9183a80719 --- /dev/null +++ b/r/man/RecordBatchStreamWriter.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchWriter.R +\name{RecordBatchStreamWriter} +\alias{RecordBatchStreamWriter} +\title{Writer for the Arrow streaming binary format} +\usage{ +RecordBatchStreamWriter(sink, schema) +} +\arguments{ +\item{sink}{Where to write. Can either be: +\itemize{ +\item A string, meant as a file path, passed to \code{\link[fs:path_abs]{fs::path_abs()}} +\item a \link[fs:path_abs]{file path} +\item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} +}} + +\item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.} +} +\value{ +a \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} +} +\description{ +Writer for the Arrow streaming binary format +} diff --git a/r/man/array.Rd b/r/man/array.Rd index 38bd773be92..ccdba181db8 100644 --- a/r/man/array.Rd +++ b/r/man/array.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/array.R \name{array} \alias{array} -\title{create an arrow::Array from an R vector} +\title{create an \link[=arrow__Array]{arrow::Array} from an R vector} \usage{ array(..., type) } @@ -12,5 +12,5 @@ array(..., type) \item{type}{currently ignored} } \description{ -create an arrow::Array from an R vector +create an \link[=arrow__Array]{arrow::Array} from an R vector } diff --git a/r/man/arrow__Array.Rd b/r/man/arrow__Array.Rd new file mode 100644 index 00000000000..b11373d26b3 --- /dev/null +++ b/r/man/arrow__Array.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/array.R +\docType{class} +\name{arrow__Array} +\alias{arrow__Array} +\alias{arrow::Array} +\title{class arrow::Array + +Array base type. Immutable data array with some logical type and some length.} +\description{ +class arrow::Array + +Array base type. Immutable data array with some logical type and some length. +} +\section{Usage}{ +\preformatted{a <- array(...) + +a$IsNull(i) +a$IsValid(i) +a$length() or length(a) +a$offset() +a$null_count() +a$type() +a$type_id() +a$Equals(b) +a$ApproxEquals(b) +a$as_vector() +a$ToString() +a$Slice(offset, length = NULL) +a$RangeEquals(other, start_idx, end_idx, other_start_idx) + +print(a) +a == a +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$IsNull(i)}: Return true if value at index is null. Does not boundscheck +\item \code{$IsValid(i)}: Return true if value at index is valid. Does not boundscheck +\item \code{$length()}: Size in the number of elements this array contains +\item \code{$offset()}: A relative position into another array's data, to enable zero-copy slicing +\item \code{$null_count()}: The number of null entries in the array +\item \code{$type()}: logical type of data +\item \code{$type_id()}: type id +\item \code{$Equals(other)} : is this array equal to \code{other} +\item \code{$ApproxEquals(other)} : +\item \code{$data()}: return the underlying \link[=arrow__ArrayData]{arrow::ArrayData} +\item \code{$as_vector()}: convert to an R vector +\item \code{$ToString()}: string representation of the array +\item \code{$Slice(offset, length = NULL)} : Construct a zero-copy slice of the array with the indicated offset and length. If length is \code{NULL}, the slice goes until the end of the array. +\item \code{$RangeEquals(other, start_idx, end_idx, other_start_idx)} : +} +} + +\keyword{datasets} diff --git a/r/man/arrow__ArrayData.Rd b/r/man/arrow__ArrayData.Rd new file mode 100644 index 00000000000..bdf996605c5 --- /dev/null +++ b/r/man/arrow__ArrayData.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ArrayData.R +\docType{class} +\name{arrow__ArrayData} +\alias{arrow__ArrayData} +\alias{arrow::ArrayData} +\title{class arrow::ArrayData} +\description{ +class arrow::ArrayData +} +\section{Usage}{ +\preformatted{data <- array(...)$data() + +data$type() +data$length() +data$null_count() +data$offset() +data$buffers() +} +} + +\section{Methods}{ + + +... +} + +\keyword{datasets} diff --git a/r/man/arrow__Buffer.Rd b/r/man/arrow__Buffer.Rd new file mode 100644 index 00000000000..135da7a20e7 --- /dev/null +++ b/r/man/arrow__Buffer.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/buffer.R +\docType{class} +\name{arrow__Buffer} +\alias{arrow__Buffer} +\alias{arrow::Buffer} +\title{class arrow::Buffer} +\description{ +class arrow::Buffer +} +\section{Methods}{ + +\itemize{ +\item \code{$is_mutable()} : +\item \code{$ZeroPadding()} : +\item \code{$size()} : +\item \code{$capacity()}: +} +} + +\keyword{datasets} diff --git a/r/man/arrow__ChunkedArray.Rd b/r/man/arrow__ChunkedArray.Rd new file mode 100644 index 00000000000..a87bf1c0dcc --- /dev/null +++ b/r/man/arrow__ChunkedArray.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ChunkedArray.R +\docType{class} +\name{arrow__ChunkedArray} +\alias{arrow__ChunkedArray} +\alias{arrow::ChunkedArray} +\title{class arrow::ChunkedArray} +\description{ +class arrow::ChunkedArray +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__Column.Rd b/r/man/arrow__Column.Rd new file mode 100644 index 00000000000..6a0ee6a40a5 --- /dev/null +++ b/r/man/arrow__Column.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Column.R +\docType{class} +\name{arrow__Column} +\alias{arrow__Column} +\alias{arrow::Column} +\title{class arrow::Column} +\description{ +class arrow::Column +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__DataType.Rd b/r/man/arrow__DataType.Rd new file mode 100644 index 00000000000..53bd6327d91 --- /dev/null +++ b/r/man/arrow__DataType.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/R6.R +\docType{class} +\name{arrow__DataType} +\alias{arrow__DataType} +\alias{arrow::DataType} +\title{class arrow::DataType} +\description{ +class arrow::DataType +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__DictionaryType.Rd b/r/man/arrow__DictionaryType.Rd new file mode 100644 index 00000000000..ba462ee0114 --- /dev/null +++ b/r/man/arrow__DictionaryType.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dictionary.R +\docType{class} +\name{arrow__DictionaryType} +\alias{arrow__DictionaryType} +\alias{arrow::DictionaryType} +\title{class arrow::DictionaryType} +\description{ +class arrow::DictionaryType +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__Field.Rd b/r/man/arrow__Field.Rd new file mode 100644 index 00000000000..893a65aa08e --- /dev/null +++ b/r/man/arrow__Field.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Field.R +\docType{class} +\name{arrow__Field} +\alias{arrow__Field} +\alias{arrow::Field} +\title{class arrow::Field} +\description{ +class arrow::Field +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__FixedWidthType.Rd b/r/man/arrow__FixedWidthType.Rd new file mode 100644 index 00000000000..610a4003429 --- /dev/null +++ b/r/man/arrow__FixedWidthType.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/R6.R +\docType{class} +\name{arrow__FixedWidthType} +\alias{arrow__FixedWidthType} +\alias{arrow::FixedWidthType} +\title{class arrow::FixedWidthType} +\description{ +class arrow::FixedWidthType +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__RecordBatch.Rd b/r/man/arrow__RecordBatch.Rd new file mode 100644 index 00000000000..40ba6323ee0 --- /dev/null +++ b/r/man/arrow__RecordBatch.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatch.R +\docType{class} +\name{arrow__RecordBatch} +\alias{arrow__RecordBatch} +\alias{arrow::RecordBatch} +\title{class arrow::RecordBatch} +\description{ +class arrow::RecordBatch +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__RecordBatchReader.Rd b/r/man/arrow__RecordBatchReader.Rd new file mode 100644 index 00000000000..b3ccd3f1749 --- /dev/null +++ b/r/man/arrow__RecordBatchReader.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchReader.R +\docType{class} +\name{arrow__RecordBatchReader} +\alias{arrow__RecordBatchReader} +\alias{arrow::RecordBatchReader} +\title{class arrow::RecordBatchReader} +\description{ +class arrow::RecordBatchReader +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__Schema.Rd b/r/man/arrow__Schema.Rd new file mode 100644 index 00000000000..b657ff2c4a8 --- /dev/null +++ b/r/man/arrow__Schema.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Schema.R +\docType{class} +\name{arrow__Schema} +\alias{arrow__Schema} +\alias{arrow::Schema} +\title{class arrow::Schema} +\description{ +class arrow::Schema +} +\section{Usage}{ +\preformatted{s <- schema(...) + +s$ToString() +s$num_fields() +s$field(i) +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$ToString()}: convert to a string +\item \code{$num_fields()}: returns the number of fields +\item \code{$field(i)}: returns the field at index \code{i} (0-based) +} +} + +\keyword{datasets} diff --git a/r/man/arrow__Table.Rd b/r/man/arrow__Table.Rd new file mode 100644 index 00000000000..139db980acf --- /dev/null +++ b/r/man/arrow__Table.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Table.R +\docType{class} +\name{arrow__Table} +\alias{arrow__Table} +\alias{arrow::Table} +\title{class arrow::Table} +\description{ +class arrow::Table +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow___MemoryPool.Rd b/r/man/arrow___MemoryPool.Rd new file mode 100644 index 00000000000..9189e8be4a3 --- /dev/null +++ b/r/man/arrow___MemoryPool.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/memory_pool.R +\docType{class} +\name{arrow__MemoryPool} +\alias{arrow__MemoryPool} +\alias{arrow::MemoryPool} +\title{class arrow::MemoryPool} +\description{ +class arrow::MemoryPool +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__BufferOutputStream.Rd b/r/man/arrow__io__BufferOutputStream.Rd new file mode 100644 index 00000000000..e90d1cc0ed8 --- /dev/null +++ b/r/man/arrow__io__BufferOutputStream.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{arrow__io__BufferOutputStream} +\alias{arrow__io__BufferOutputStream} +\alias{arrow::io::BufferOutputStream} +\title{class arrow::io::BufferOutputStream} +\format{An object of class \code{R6ClassGenerator} of length 24.} +\description{ +class arrow::io::BufferOutputStream +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__BufferReader.Rd b/r/man/arrow__io__BufferReader.Rd new file mode 100644 index 00000000000..609fec5b6d4 --- /dev/null +++ b/r/man/arrow__io__BufferReader.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{arrow__io__BufferReader} +\alias{arrow__io__BufferReader} +\alias{arrow::io::BufferReader} +\title{class arrow::io::BufferReader} +\description{ +class arrow::io::BufferReader +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__FileOutputStream.Rd b/r/man/arrow__io__FileOutputStream.Rd new file mode 100644 index 00000000000..92eaac13c9f --- /dev/null +++ b/r/man/arrow__io__FileOutputStream.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{arrow__io__FileOutputStream} +\alias{arrow__io__FileOutputStream} +\alias{arrow::io::FileOutputStream} +\title{class arrow::io::FileOutputStream} +\description{ +class arrow::io::FileOutputStream +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__FixedSizeBufferWriter.Rd b/r/man/arrow__io__FixedSizeBufferWriter.Rd new file mode 100644 index 00000000000..39d8bb69c25 --- /dev/null +++ b/r/man/arrow__io__FixedSizeBufferWriter.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{arrow__io__FixedSizeBufferWriter} +\alias{arrow__io__FixedSizeBufferWriter} +\alias{arrow::io::FixedSizeBufferWriter} +\title{class arrow::io::FixedSizeBufferWriter} +\description{ +class arrow::io::FixedSizeBufferWriter +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__InputStream.Rd b/r/man/arrow__io__InputStream.Rd new file mode 100644 index 00000000000..37f83308b64 --- /dev/null +++ b/r/man/arrow__io__InputStream.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{arrow__io__InputStream} +\alias{arrow__io__InputStream} +\alias{arrow::io::InputStream} +\title{class arrow::io::InputStream} +\description{ +class arrow::io::InputStream +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__MemoryMappedFile.Rd b/r/man/arrow__io__MemoryMappedFile.Rd new file mode 100644 index 00000000000..409bb17302a --- /dev/null +++ b/r/man/arrow__io__MemoryMappedFile.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{arrow__io__MemoryMappedFile} +\alias{arrow__io__MemoryMappedFile} +\alias{arrow::io::MemoryMappedFile} +\title{class arrow::io::MemoryMappedFile} +\description{ +class arrow::io::MemoryMappedFile +} +\section{Methods}{ + + +TODO +} + +\seealso{ +\code{\link[=mmap_open]{mmap_open()}}, \code{\link[=mmap_create]{mmap_create()}} +} +\keyword{datasets} diff --git a/r/man/arrow__io__MockOutputStream.Rd b/r/man/arrow__io__MockOutputStream.Rd new file mode 100644 index 00000000000..f0b2c06d7a5 --- /dev/null +++ b/r/man/arrow__io__MockOutputStream.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{arrow__io__MockOutputStream} +\alias{arrow__io__MockOutputStream} +\alias{arrow::io::MockOutputStream} +\title{class arrow::io::MockOutputStream} +\description{ +class arrow::io::MockOutputStream +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__OutputStream.Rd b/r/man/arrow__io__OutputStream.Rd new file mode 100644 index 00000000000..c41b815c021 --- /dev/null +++ b/r/man/arrow__io__OutputStream.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{arrow__io__OutputStream} +\alias{arrow__io__OutputStream} +\alias{arrow::io::OutputStream} +\title{OutputStream} +\description{ +OutputStream +} +\section{Methods}{ + +\itemize{ +\item \code{arrow::Buffer} \code{Read}(\code{int} nbytes): Read \code{nbytes} bytes +\item \code{void} \code{close}(): close the stream +} +} + +\keyword{datasets} diff --git a/r/man/arrow__io__RandomAccessFile.Rd b/r/man/arrow__io__RandomAccessFile.Rd new file mode 100644 index 00000000000..f8cb86abda6 --- /dev/null +++ b/r/man/arrow__io__RandomAccessFile.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{arrow__io__RandomAccessFile} +\alias{arrow__io__RandomAccessFile} +\alias{arrow::io::RandomAccessFile} +\title{class arrow::io::RandomAccessFile} +\description{ +class arrow::io::RandomAccessFile +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__Readable.Rd b/r/man/arrow__io__Readable.Rd new file mode 100644 index 00000000000..b0b30a42302 --- /dev/null +++ b/r/man/arrow__io__Readable.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{arrow__io__Readable} +\alias{arrow__io__Readable} +\alias{arrow::io::Readable} +\title{class arrow::io::Readable} +\description{ +class arrow::io::Readable +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__io__ReadableFile.Rd b/r/man/arrow__io__ReadableFile.Rd new file mode 100644 index 00000000000..440149fbbb4 --- /dev/null +++ b/r/man/arrow__io__ReadableFile.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{arrow__io__ReadableFile} +\alias{arrow__io__ReadableFile} +\alias{arrow::io::ReadableFile} +\title{class arrow::io::ReadableFile} +\description{ +class arrow::io::ReadableFile +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__Message.Rd b/r/man/arrow__ipc__Message.Rd new file mode 100644 index 00000000000..d3811f8f4c1 --- /dev/null +++ b/r/man/arrow__ipc__Message.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/message.R +\docType{class} +\name{arrow__ipc__Message} +\alias{arrow__ipc__Message} +\alias{arrow::ipc::Message} +\title{class arrow::ipc::Message} +\description{ +class arrow::ipc::Message +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__MessageReader.Rd b/r/man/arrow__ipc__MessageReader.Rd new file mode 100644 index 00000000000..883e9e0618b --- /dev/null +++ b/r/man/arrow__ipc__MessageReader.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/message.R +\docType{class} +\name{arrow__ipc__MessageReader} +\alias{arrow__ipc__MessageReader} +\alias{arrow::ipc::MessageReader} +\title{class arrow::ipc::MessageReader} +\description{ +class arrow::ipc::MessageReader +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchFileReader.Rd b/r/man/arrow__ipc__RecordBatchFileReader.Rd new file mode 100644 index 00000000000..675f636b365 --- /dev/null +++ b/r/man/arrow__ipc__RecordBatchFileReader.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchReader.R +\docType{class} +\name{arrow__ipc__RecordBatchFileReader} +\alias{arrow__ipc__RecordBatchFileReader} +\alias{arrow::ipc::RecordBatchFileReader} +\title{class arrow::ipc::RecordBatchFileReader} +\description{ +class arrow::ipc::RecordBatchFileReader +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchFileWriter.Rd b/r/man/arrow__ipc__RecordBatchFileWriter.Rd new file mode 100644 index 00000000000..a80b55941fb --- /dev/null +++ b/r/man/arrow__ipc__RecordBatchFileWriter.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchWriter.R +\docType{class} +\name{arrow__ipc__RecordBatchFileWriter} +\alias{arrow__ipc__RecordBatchFileWriter} +\alias{arrow::ipc::RecordBatchFileWriter} +\title{class arrow::ipc::RecordBatchFileWriter + +Writer for the Arrow binary file format} +\description{ +class arrow::ipc::RecordBatchFileWriter + +Writer for the Arrow binary file format +} +\section{usage}{ +\preformatted{writer <- RecordBatchFileWriter(sink, schema) + +writer$write_batch(batch) +writer$write_table(table) +writer$close() +} +} + +\section{Factory}{ + + +The \code{\link[=RecordBatchFileWriter]{RecordBatchFileWriter()}} function creates a record batch stream writer. +} + +\section{Methods}{ + +inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter} +\itemize{ +\item \code{$write_batch(batch)}: Write record batch to stream +\item \code{$write_table(table)}: write Table to stream +\item \code{$close()}: close stream +} +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchStreamReader.Rd b/r/man/arrow__ipc__RecordBatchStreamReader.Rd new file mode 100644 index 00000000000..49f57cce057 --- /dev/null +++ b/r/man/arrow__ipc__RecordBatchStreamReader.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchReader.R +\docType{class} +\name{arrow__ipc__RecordBatchStreamReader} +\alias{arrow__ipc__RecordBatchStreamReader} +\alias{arrow::ipc::RecordBatchStreamReader} +\title{class arrow::ipc::RecordBatchStreamReader} +\description{ +class arrow::ipc::RecordBatchStreamReader +} +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd new file mode 100644 index 00000000000..3d2030287d1 --- /dev/null +++ b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchWriter.R +\docType{class} +\name{arrow__ipc__RecordBatchStreamWriter} +\alias{arrow__ipc__RecordBatchStreamWriter} +\alias{arrow::ipc::RecordBatchStreamWriter} +\title{class arrow::ipc::RecordBatchStreamWriter + +Writer for the Arrow streaming binary format} +\description{ +class arrow::ipc::RecordBatchStreamWriter + +Writer for the Arrow streaming binary format +} +\section{usage}{ +\preformatted{writer <- RecordBatchStreamWriter(sink, schema) + +writer$write_batch(batch) +writer$write_table(table) +writer$close() +} +} + +\section{Factory}{ + + +The \code{\link[=RecordBatchStreamWriter]{RecordBatchStreamWriter()}} function creates a record batch stream writer. +} + +\section{Methods}{ + +inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter} +\itemize{ +\item \code{$write_batch(batch)}: Write record batch to stream +\item \code{$write_table(table)}: write Table to stream +\item \code{$close()}: close stream +} +} + +\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchWriter.Rd b/r/man/arrow__ipc__RecordBatchWriter.Rd new file mode 100644 index 00000000000..08593df8524 --- /dev/null +++ b/r/man/arrow__ipc__RecordBatchWriter.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RecordBatchWriter.R +\docType{class} +\name{arrow__ipc__RecordBatchWriter} +\alias{arrow__ipc__RecordBatchWriter} +\alias{arrow::ipc::RecordBatchWriter} +\title{class arrow::ipc::RecordBatchWriter} +\description{ +class arrow::ipc::RecordBatchWriter +} +\section{Methods}{ + +\itemize{ +\item \code{$write_batch(batch)}: Write record batch to stream +\item \code{$write_table(table)}: write Table to stream +\item \code{$close()}: close stream +} +} + +\section{Derived classes}{ + +\itemize{ +\item \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} implements the streaming binary format +\item \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter} implements the binary file format +} +} + +\keyword{datasets} diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd index 4d4e97e47d8..60fd25d4bf1 100644 --- a/r/man/buffer.Rd +++ b/r/man/buffer.Rd @@ -2,16 +2,16 @@ % Please edit documentation in R/buffer.R \name{buffer} \alias{buffer} -\title{Create a buffer from an R object} +\title{Create a \link[=arrow__Buffer]{arrow::Buffer} from an R object} \usage{ buffer(x) } \arguments{ -\item{x}{R object} +\item{x}{R object. Only raw, numeric and integer vectors are currently supported} } \value{ -an instance of \code{arrow::Buffer} that borrows memory from \code{x} +an instance of \link[=arrow__Buffer]{arrow::Buffer} that borrows memory from \code{x} } \description{ -Create a buffer from an R object +Create a \link[=arrow__Buffer]{arrow::Buffer} from an R object } diff --git a/r/man/chunked_array.Rd b/r/man/chunked_array.Rd index 1f4fb836143..c6973be7210 100644 --- a/r/man/chunked_array.Rd +++ b/r/man/chunked_array.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ChunkedArray.R \name{chunked_array} \alias{chunked_array} -\title{create an arrow::Array from an R vector} +\title{create an \link[=arrow__ChunkedArray]{arrow::ChunkedArray} from various R vectors} \usage{ chunked_array(..., type) } @@ -12,5 +12,5 @@ chunked_array(..., type) \item{type}{currently ignored} } \description{ -create an arrow::Array from an R vector +create an \link[=arrow__ChunkedArray]{arrow::ChunkedArray} from various R vectors } diff --git a/r/man/default_memory_pool.Rd b/r/man/default_memory_pool.Rd new file mode 100644 index 00000000000..1725ff0e10a --- /dev/null +++ b/r/man/default_memory_pool.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/memory_pool.R +\name{default_memory_pool} +\alias{default_memory_pool} +\title{default \link[=arrow__MemoryPool]{arrow::MemoryPool}} +\usage{ +default_memory_pool() +} +\value{ +the default \link[=arrow__MemoryPool]{arrow::MemoryPool} +} +\description{ +default \link[=arrow__MemoryPool]{arrow::MemoryPool} +} diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd index 2a7989648b0..340283ec4da 100644 --- a/r/man/dictionary.Rd +++ b/r/man/dictionary.Rd @@ -13,6 +13,9 @@ dictionary(type, values, ordered = FALSE) \item{ordered}{Is this an ordered dictionary} } +\value{ +a \link[=arrow__DictionaryType]{arrow::DictionaryType} +} \description{ dictionary type factory } diff --git a/r/man/field.Rd b/r/man/field.Rd index e7af66db290..5cbd8033875 100644 --- a/r/man/field.Rd +++ b/r/man/field.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/Field.R \name{field} \alias{field} -\title{Factor for a \code{arrow::Field}} +\title{Factory for a \code{arrow::Field}} \usage{ field(name, type, metadata) } @@ -14,7 +14,7 @@ field(name, type, metadata) \item{metadata}{currently ignored} } \description{ -Factor for a \code{arrow::Field} +Factory for a \code{arrow::Field} } \examples{ field("x", int32()) diff --git a/r/man/io.Rd b/r/man/io.Rd deleted file mode 100644 index 74817bf88a3..00000000000 --- a/r/man/io.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{mmap_create} -\alias{mmap_create} -\alias{mmap_open} -\alias{file_open} -\alias{file_output_stream} -\alias{mock_output_stream} -\alias{buffer_output_stream} -\alias{fixed_size_buffer_writer} -\title{Create a new read/write memory mapped file of a given size} -\usage{ -mmap_create(path, size) - -mmap_open(path, mode = c("read", "write", "readwrite")) - -file_open(path) - -file_output_stream(path) - -mock_output_stream() - -buffer_output_stream(initial_capacity = 0L) - -fixed_size_buffer_writer(buffer) -} -\arguments{ -\item{path}{file path} - -\item{size}{size in bytes} - -\item{mode}{file mode (read/write/readwrite)} - -\item{initial_capacity}{initial capacity for the buffer output stream} - -\item{buffer}{an \code{arrow::Buffer}, typically created by \code{\link[=buffer]{buffer()}}} -} -\description{ -Create a new read/write memory mapped file of a given size -} diff --git a/r/man/mmap_create.Rd b/r/man/mmap_create.Rd new file mode 100644 index 00000000000..050ae18c76f --- /dev/null +++ b/r/man/mmap_create.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{mmap_create} +\alias{mmap_create} +\title{Create a new read/write memory mapped file of a given size} +\usage{ +mmap_create(path, size) +} +\arguments{ +\item{path}{file path} + +\item{size}{size in bytes} +} +\value{ +a \link[=arrow__io__MemoryMappedFile]{arrow::io::MemoryMappedFile} +} +\description{ +Create a new read/write memory mapped file of a given size +} diff --git a/r/man/mmap_open.Rd b/r/man/mmap_open.Rd new file mode 100644 index 00000000000..d0047a72c38 --- /dev/null +++ b/r/man/mmap_open.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{mmap_open} +\alias{mmap_open} +\title{Open a memory mapped file} +\usage{ +mmap_open(path, mode = c("read", "write", "readwrite")) +} +\arguments{ +\item{path}{file path} + +\item{mode}{file mode (read/write/readwrite)} +} +\description{ +Open a memory mapped file +} diff --git a/r/man/read_arrow.Rd b/r/man/read_arrow.Rd deleted file mode 100644 index 362ee7adc1a..00000000000 --- a/r/man/read_arrow.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Table.R -\name{read_arrow} -\alias{read_arrow} -\title{Read an tibble from an arrow::Table on disk} -\usage{ -read_arrow(stream) -} -\arguments{ -\item{stream}{input stream} -} -\value{ -a \link[tibble:tibble]{tibble::tibble} -} -\description{ -Read an tibble from an arrow::Table on disk -} diff --git a/r/man/read_record_batch.Rd b/r/man/read_record_batch.Rd index 4ca048f28ec..fef12cbac4a 100644 --- a/r/man/read_record_batch.Rd +++ b/r/man/read_record_batch.Rd @@ -1,19 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R +% Please edit documentation in R/read_record_batch.R \name{read_record_batch} \alias{read_record_batch} -\title{Read a single record batch from a stream} +\title{read \link[=arrow__RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=arrow__Schema]{arrow::Schema}} \usage{ -read_record_batch(stream, ...) +read_record_batch(obj, schema) } \arguments{ -\item{stream}{input stream} +\item{obj}{a \link[=arrow__ipc__Message]{arrow::ipc::Message}, a \link[=arrow__io__InputStream]{arrow::io::InputStream}, a \link[=arrow__Buffer]{arrow::Buffer}, or a raw vector} -\item{...}{additional parameters} +\item{schema}{a \link[=arrow__Schema]{arrow::Schema}} } -\description{ -Read a single record batch from a stream +\value{ +a \link[=arrow__RecordBatch]{arrow::RecordBatch} } -\details{ -\code{stream} can be a \code{arrow::io::RandomAccessFile} stream as created by \code{\link[=file_open]{file_open()}} or \code{\link[=mmap_open]{mmap_open()}} or a path. +\description{ +read \link[=arrow__RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=arrow__Schema]{arrow::Schema} } diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index f851057e8a7..3231b26da26 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -1,14 +1,40 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R +% Please edit documentation in R/read_table.R \name{read_table} \alias{read_table} -\title{Read an arrow::Table from a stream} +\alias{read_arrow} +\title{Read an \link[=arrow__Table]{arrow::Table} from a stream} \usage{ read_table(stream) + +read_arrow(stream) } \arguments{ -\item{stream}{stream. Either a stream created by \code{\link[=file_open]{file_open()}} or \code{\link[=mmap_open]{mmap_open()}} or a file path.} +\item{stream}{stream. +\itemize{ +\item a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader}: +read an \link[=arrow__Table]{arrow::Table} +from all the record batches in the reader +\item a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader}: +read an \link[=arrow__Table]{arrow::Table} from the remaining record batches +in the reader +\item a string or \link[fs:path_abs]{file path}: interpret the file as an arrow +binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} +to process it. +\item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} +}} +} +\value{ +\itemize{ +\item \code{read_table} returns an \link[=arrow__Table]{arrow::Table} +\item \code{read_arrow} returns a \code{\link[tibble:tibble]{tibble::tibble()}} +} } \description{ -Read an arrow::Table from a stream +Read an \link[=arrow__Table]{arrow::Table} from a stream +} +\details{ +The methods using \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} and +\link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} offer the most +flexibility. The other methods are for convenience. } diff --git a/r/man/record_batch.Rd b/r/man/record_batch.Rd index e108d64b46a..4567a9ab763 100644 --- a/r/man/record_batch.Rd +++ b/r/man/record_batch.Rd @@ -2,13 +2,16 @@ % Please edit documentation in R/RecordBatch.R \name{record_batch} \alias{record_batch} -\title{Create an arrow::RecordBatch from a data frame} +\title{Create an \link[=arrow__RecordBatch]{arrow::RecordBatch} from a data frame} \usage{ record_batch(.data) } \arguments{ \item{.data}{a data frame} } +\value{ +a \link[=arrow__RecordBatch]{arrow::RecordBatch} +} \description{ -Create an arrow::RecordBatch from a data frame +Create an \link[=arrow__RecordBatch]{arrow::RecordBatch} from a data frame } diff --git a/r/man/record_batch_file_reader.Rd b/r/man/record_batch_file_reader.Rd deleted file mode 100644 index b7e211dfbc2..00000000000 --- a/r/man/record_batch_file_reader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R -\name{record_batch_file_reader} -\alias{record_batch_file_reader} -\title{Create an \code{arrow::ipc::RecordBatchFileReader} from a file} -\usage{ -record_batch_file_reader(file) -} -\arguments{ -\item{file}{The file to read from} -} -\description{ -Create an \code{arrow::ipc::RecordBatchFileReader} from a file -} diff --git a/r/man/record_batch_file_writer.Rd b/r/man/record_batch_file_writer.Rd deleted file mode 100644 index b7dcb0c39e4..00000000000 --- a/r/man/record_batch_file_writer.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\name{record_batch_file_writer} -\alias{record_batch_file_writer} -\title{Create a record batch file writer from a stream} -\usage{ -record_batch_file_writer(stream, schema) -} -\arguments{ -\item{stream}{a stream} - -\item{schema}{the schema of the batches} -} -\value{ -an \code{arrow::ipc::RecordBatchWriter} object -} -\description{ -Create a record batch file writer from a stream -} diff --git a/r/man/record_batch_stream_reader.Rd b/r/man/record_batch_stream_reader.Rd deleted file mode 100644 index 018045f6a32..00000000000 --- a/r/man/record_batch_stream_reader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R -\name{record_batch_stream_reader} -\alias{record_batch_stream_reader} -\title{Create a \code{arrow::ipc::RecordBatchStreamReader} from an input stream} -\usage{ -record_batch_stream_reader(stream) -} -\arguments{ -\item{stream}{input stream} -} -\description{ -Create a \code{arrow::ipc::RecordBatchStreamReader} from an input stream -} diff --git a/r/man/record_batch_stream_writer.Rd b/r/man/record_batch_stream_writer.Rd deleted file mode 100644 index d720d50d3a7..00000000000 --- a/r/man/record_batch_stream_writer.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\name{record_batch_stream_writer} -\alias{record_batch_stream_writer} -\title{Create a record batch stream writer} -\usage{ -record_batch_stream_writer(stream, schema) -} -\arguments{ -\item{stream}{a stream} - -\item{schema}{a schema} -} -\description{ -Create a record batch stream writer -} diff --git a/r/man/schema.Rd b/r/man/schema.Rd index 9b77d47b613..ad3bcb1f4e0 100644 --- a/r/man/schema.Rd +++ b/r/man/schema.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/Schema.R \name{schema} \alias{schema} -\title{Schema functions} +\title{Schema factory} \usage{ schema(...) } @@ -10,8 +10,8 @@ schema(...) \item{...}{named list of data types} } \value{ -a Schema +a \link[=arrow__Schema]{schema} } \description{ -Schema functions +Schema factory } diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd index 42b39f1d051..4296bcbd899 100644 --- a/r/man/write_arrow.Rd +++ b/r/man/write_arrow.Rd @@ -1,18 +1,34 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R +% Please edit documentation in R/write_arrow.R \name{write_arrow} \alias{write_arrow} -\title{Write an object to a stream} +\title{serialize an \link[=arrow__Table]{arrow::Table}, an \link[=arrow__RecordBatch]{arrow::RecordBatch}, or a +data frame to either the streaming format or the binary file format} \usage{ write_arrow(x, stream, ...) } \arguments{ -\item{x}{An object to stream} +\item{x}{an \link[=arrow__Table]{arrow::Table}, an \link[=arrow__RecordBatch]{arrow::RecordBatch} or a data.frame} -\item{stream}{A stream} +\item{stream}{where to serialize to +\itemize{ +\item A \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter}: the \code{$write()} +of \code{x} is used. The stream is left open. This uses the streaming format +or the binary file format depending on the type of the writer. +\item A string or \link[fs:path_abs]{file path}: \code{x} is serialized with +a \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter}, i.e. +using the binary file format. +\item A raw vector: typically of length zero (its data is ignored, and only used for +dispatch). \code{x} is serialized using the streaming format, i.e. using the +\link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} +}} -\item{...}{additional parameters} +\item{...}{extra parameters, currently ignored + +\code{write_arrow} is a convenience function, the classes \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter} +and \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} can be used for more flexibility.} } \description{ -Write an object to a stream +serialize an \link[=arrow__Table]{arrow::Table}, an \link[=arrow__RecordBatch]{arrow::RecordBatch}, or a +data frame to either the streaming format or the binary file format } diff --git a/r/man/write_record_batch.Rd b/r/man/write_record_batch.Rd deleted file mode 100644 index afc3363f0df..00000000000 --- a/r/man/write_record_batch.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\name{write_record_batch} -\alias{write_record_batch} -\title{write a record batch} -\usage{ -write_record_batch(x, stream, ...) -} -\arguments{ -\item{x}{a \code{arrow::RecordBatch}} - -\item{stream}{where to stream the record batch} - -\item{...}{extra parameters} -} -\description{ -write a record batch -} diff --git a/r/man/write_table.Rd b/r/man/write_table.Rd deleted file mode 100644 index a247870ec01..00000000000 --- a/r/man/write_table.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\name{write_table} -\alias{write_table} -\title{write an arrow::Table} -\usage{ -write_table(x, stream, ...) -} -\arguments{ -\item{x}{an \code{arrow::Table}} - -\item{stream}{where to stream the record batch} - -\item{...}{extra parameters} -} -\description{ -write an arrow::Table -} diff --git a/r/src/Makevars.in b/r/src/Makevars.in index 5e285518f24..a0d5fed10ba 100644 --- a/r/src/Makevars.in +++ b/r/src/Makevars.in @@ -16,7 +16,7 @@ # under the License. PKG_CPPFLAGS=@cflags@ -PKG_CXXFLAGS+=$(C_VISIBILITY) +PKG_CXXFLAGS=@visibility@ CXX_STD=CXX11 PKG_LIBS=@libs@ -Wl,-rpath,/usr/local/lib #CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp index 2c549ad1b90..bca4eafdee4 100644 --- a/r/src/RcppExports.cpp +++ b/r/src/RcppExports.cpp @@ -1753,6 +1753,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// RecordBatch__columns +arrow::ArrayVector RecordBatch__columns(const std::shared_ptr& batch); +RcppExport SEXP _arrow_RecordBatch__columns(SEXP batchSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type batch(batchSEXP); + rcpp_result_gen = Rcpp::wrap(RecordBatch__columns(batch)); + return rcpp_result_gen; +END_RCPP +} // RecordBatch__column std::shared_ptr RecordBatch__column(const std::shared_ptr& batch, int i); RcppExport SEXP _arrow_RecordBatch__column(SEXP batchSEXP, SEXP iSEXP) { @@ -1859,6 +1870,29 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// ipc___SerializeRecordBatch__Raw +RawVector ipc___SerializeRecordBatch__Raw(const std::shared_ptr& batch); +RcppExport SEXP _arrow_ipc___SerializeRecordBatch__Raw(SEXP batchSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type batch(batchSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___SerializeRecordBatch__Raw(batch)); + return rcpp_result_gen; +END_RCPP +} +// ipc___ReadRecordBatch__InputStream__Schema +std::shared_ptr ipc___ReadRecordBatch__InputStream__Schema(const std::shared_ptr& stream, const std::shared_ptr& schema); +RcppExport SEXP _arrow_ipc___ReadRecordBatch__InputStream__Schema(SEXP streamSEXP, SEXP schemaSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type stream(streamSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr& >::type schema(schemaSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___ReadRecordBatch__InputStream__Schema(stream, schema)); + return rcpp_result_gen; +END_RCPP +} // RecordBatchReader__schema std::shared_ptr RecordBatchReader__schema(const std::shared_ptr& reader); RcppExport SEXP _arrow_RecordBatchReader__schema(SEXP readerSEXP) { @@ -1892,6 +1926,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// ipc___RecordBatchStreamReader__batches +std::vector> ipc___RecordBatchStreamReader__batches(const std::shared_ptr& reader); +RcppExport SEXP _arrow_ipc___RecordBatchStreamReader__batches(SEXP readerSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type reader(readerSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchStreamReader__batches(reader)); + return rcpp_result_gen; +END_RCPP +} // ipc___RecordBatchFileReader__schema std::shared_ptr ipc___RecordBatchFileReader__schema(const std::shared_ptr& reader); RcppExport SEXP _arrow_ipc___RecordBatchFileReader__schema(SEXP readerSEXP) { @@ -1959,39 +2004,25 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// ipc___RecordBatchFileWriter__Open -std::shared_ptr ipc___RecordBatchFileWriter__Open(const std::shared_ptr& stream, const std::shared_ptr& schema); -RcppExport SEXP _arrow_ipc___RecordBatchFileWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) { +// ipc___RecordBatchFileReader__batches +std::vector> ipc___RecordBatchFileReader__batches(const std::shared_ptr& reader); +RcppExport SEXP _arrow_ipc___RecordBatchFileReader__batches(SEXP readerSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr& >::type stream(streamSEXP); - Rcpp::traits::input_parameter< const std::shared_ptr& >::type schema(schemaSEXP); - rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchFileWriter__Open(stream, schema)); - return rcpp_result_gen; -END_RCPP -} -// ipc___RecordBatchStreamWriter__Open -std::shared_ptr ipc___RecordBatchStreamWriter__Open(const std::shared_ptr& stream, const std::shared_ptr& schema); -RcppExport SEXP _arrow_ipc___RecordBatchStreamWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr& >::type stream(streamSEXP); - Rcpp::traits::input_parameter< const std::shared_ptr& >::type schema(schemaSEXP); - rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchStreamWriter__Open(stream, schema)); + Rcpp::traits::input_parameter< const std::shared_ptr& >::type reader(readerSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchFileReader__batches(reader)); return rcpp_result_gen; END_RCPP } // ipc___RecordBatchWriter__WriteRecordBatch -void ipc___RecordBatchWriter__WriteRecordBatch(const std::shared_ptr& batch_writer, const std::shared_ptr& batch, bool allow_64bit); -RcppExport SEXP _arrow_ipc___RecordBatchWriter__WriteRecordBatch(SEXP batch_writerSEXP, SEXP batchSEXP, SEXP allow_64bitSEXP) { +void ipc___RecordBatchWriter__WriteRecordBatch(const std::shared_ptr& batch_writer, const std::shared_ptr& batch); +RcppExport SEXP _arrow_ipc___RecordBatchWriter__WriteRecordBatch(SEXP batch_writerSEXP, SEXP batchSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const std::shared_ptr& >::type batch_writer(batch_writerSEXP); Rcpp::traits::input_parameter< const std::shared_ptr& >::type batch(batchSEXP); - Rcpp::traits::input_parameter< bool >::type allow_64bit(allow_64bitSEXP); - ipc___RecordBatchWriter__WriteRecordBatch(batch_writer, batch, allow_64bit); + ipc___RecordBatchWriter__WriteRecordBatch(batch_writer, batch); return R_NilValue; END_RCPP } @@ -2016,6 +2047,30 @@ BEGIN_RCPP return R_NilValue; END_RCPP } +// ipc___RecordBatchFileWriter__Open +std::shared_ptr ipc___RecordBatchFileWriter__Open(const std::shared_ptr& stream, const std::shared_ptr& schema); +RcppExport SEXP _arrow_ipc___RecordBatchFileWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type stream(streamSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr& >::type schema(schemaSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchFileWriter__Open(stream, schema)); + return rcpp_result_gen; +END_RCPP +} +// ipc___RecordBatchStreamWriter__Open +std::shared_ptr ipc___RecordBatchStreamWriter__Open(const std::shared_ptr& stream, const std::shared_ptr& schema); +RcppExport SEXP _arrow_ipc___RecordBatchStreamWriter__Open(SEXP streamSEXP, SEXP schemaSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type stream(streamSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr& >::type schema(schemaSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___RecordBatchStreamWriter__Open(stream, schema)); + return rcpp_result_gen; +END_RCPP +} // Table__from_dataframe std::shared_ptr Table__from_dataframe(DataFrame tbl); RcppExport SEXP _arrow_Table__from_dataframe(SEXP tblSEXP) { @@ -2083,6 +2138,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// Table__columns +std::vector> Table__columns(const std::shared_ptr& table); +RcppExport SEXP _arrow_Table__columns(SEXP tableSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type table(tableSEXP); + rcpp_result_gen = Rcpp::wrap(Table__columns(table)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 1}, @@ -2242,6 +2308,7 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, {"_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, {"_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, + {"_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, {"_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, {"_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 1}, {"_arrow_RecordBatch__from_dataframe", (DL_FUNC) &_arrow_RecordBatch__from_dataframe, 1}, @@ -2251,26 +2318,31 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_RecordBatch__names", (DL_FUNC) &_arrow_RecordBatch__names, 1}, {"_arrow_RecordBatch__Slice1", (DL_FUNC) &_arrow_RecordBatch__Slice1, 2}, {"_arrow_RecordBatch__Slice2", (DL_FUNC) &_arrow_RecordBatch__Slice2, 3}, + {"_arrow_ipc___SerializeRecordBatch__Raw", (DL_FUNC) &_arrow_ipc___SerializeRecordBatch__Raw, 1}, + {"_arrow_ipc___ReadRecordBatch__InputStream__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__InputStream__Schema, 2}, {"_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, {"_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, {"_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, + {"_arrow_ipc___RecordBatchStreamReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__batches, 1}, {"_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1}, {"_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1}, {"_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2}, {"_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, {"_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, {"_arrow_Table__from_RecordBatchStreamReader", (DL_FUNC) &_arrow_Table__from_RecordBatchStreamReader, 1}, - {"_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 2}, - {"_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 2}, - {"_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 3}, + {"_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1}, + {"_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, {"_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2}, {"_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, + {"_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 2}, + {"_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 2}, {"_arrow_Table__from_dataframe", (DL_FUNC) &_arrow_Table__from_dataframe, 1}, {"_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1}, {"_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1}, {"_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, {"_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 1}, {"_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, + {"_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, {NULL, NULL, 0} }; diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 419705f9fcd..9ebc558d0d4 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -152,6 +152,7 @@ using LogicalVector_ = Rcpp::Vector; using StringVector_ = Rcpp::Vector; using CharacterVector_ = StringVector_; using RawVector_ = Rcpp::Vector; +using List_ = Rcpp::Vector; template inline typename Rcpp::Vector::stored_type default_value() { diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index 829ad45eadb..b6bee7ae539 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -40,6 +40,17 @@ std::shared_ptr RecordBatch__schema( return x->schema(); } +// [[Rcpp::export]] +arrow::ArrayVector RecordBatch__columns( + const std::shared_ptr& batch) { + auto nc = batch->num_columns(); + ArrayVector res(nc); + for (int i = 0; i < nc; i++) { + res[i] = batch->column(i); + } + return res; +} + // [[Rcpp::export]] std::shared_ptr RecordBatch__column( const std::shared_ptr& batch, int i) { @@ -120,3 +131,32 @@ std::shared_ptr RecordBatch__Slice2( const std::shared_ptr& self, int offset, int length) { return self->Slice(offset, length); } + +// [[Rcpp::export]] +RawVector ipc___SerializeRecordBatch__Raw( + const std::shared_ptr& batch) { + // how many bytes do we need ? + int64_t size; + STOP_IF_NOT_OK(arrow::ipc::GetRecordBatchSize(*batch, &size)); + + // allocate the result raw vector + RawVector out(no_init(size)); + + // serialize into the bytes of the raw vector + auto buffer = std::make_shared>(out); + arrow::io::FixedSizeBufferWriter stream(buffer); + STOP_IF_NOT_OK( + arrow::ipc::SerializeRecordBatch(*batch, arrow::default_memory_pool(), &stream)); + STOP_IF_NOT_OK(stream.Close()); + + return out; +} + +// [[Rcpp::export]] +std::shared_ptr ipc___ReadRecordBatch__InputStream__Schema( + const std::shared_ptr& stream, + const std::shared_ptr& schema) { + std::shared_ptr batch; + STOP_IF_NOT_OK(arrow::ipc::ReadRecordBatch(schema, stream.get(), &batch)); + return batch; +} diff --git a/r/src/recordbatchreader.cpp b/r/src/recordbatchreader.cpp index 65a1c9baf3b..f3e90228d3c 100644 --- a/r/src/recordbatchreader.cpp +++ b/r/src/recordbatchreader.cpp @@ -41,6 +41,22 @@ std::shared_ptr ipc___RecordBatchStreamReader__Open( return reader; } +// [[Rcpp::export]] +std::vector> ipc___RecordBatchStreamReader__batches( + const std::shared_ptr& reader) { + std::vector> res; + + while (true) { + std::shared_ptr batch; + STOP_IF_NOT_OK(reader->ReadNext(&batch)); + if (!batch) break; + + res.push_back(batch); + } + + return res; +} + // -------- RecordBatchFileReader // [[Rcpp::export]] @@ -104,3 +120,16 @@ std::shared_ptr Table__from_RecordBatchStreamReader( return table; } + +// [[Rcpp::export]] +std::vector> ipc___RecordBatchFileReader__batches( + const std::shared_ptr& reader) { + auto n = reader->num_record_batches(); + std::vector> res(n); + + for (int i = 0; i < n; i++) { + STOP_IF_NOT_OK(reader->ReadRecordBatch(i, &res[i])); + } + + return res; +} diff --git a/r/src/recordbatchwriter.cpp b/r/src/recordbatchwriter.cpp index f86c474fec3..d4dd212a9bd 100644 --- a/r/src/recordbatchwriter.cpp +++ b/r/src/recordbatchwriter.cpp @@ -17,6 +17,26 @@ #include "arrow_types.h" +// [[Rcpp::export]] +void ipc___RecordBatchWriter__WriteRecordBatch( + const std::shared_ptr& batch_writer, + const std::shared_ptr& batch) { + STOP_IF_NOT_OK(batch_writer->WriteRecordBatch(*batch, true)); +} + +// [[Rcpp::export]] +void ipc___RecordBatchWriter__WriteTable( + const std::shared_ptr& batch_writer, + const std::shared_ptr& table) { + STOP_IF_NOT_OK(batch_writer->WriteTable(*table)); +} + +// [[Rcpp::export]] +void ipc___RecordBatchWriter__Close( + const std::shared_ptr& batch_writer) { + STOP_IF_NOT_OK(batch_writer->Close()); +} + // [[Rcpp::export]] std::shared_ptr ipc___RecordBatchFileWriter__Open( const std::shared_ptr& stream, @@ -36,23 +56,3 @@ std::shared_ptr ipc___RecordBatchStreamWriter__Op arrow::ipc::RecordBatchStreamWriter::Open(stream.get(), schema, &stream_writer)); return stream_writer; } - -// [[Rcpp::export]] -void ipc___RecordBatchWriter__WriteRecordBatch( - const std::shared_ptr& batch_writer, - const std::shared_ptr& batch, bool allow_64bit) { - STOP_IF_NOT_OK(batch_writer->WriteRecordBatch(*batch, allow_64bit)); -} - -// [[Rcpp::export]] -void ipc___RecordBatchWriter__WriteTable( - const std::shared_ptr& batch_writer, - const std::shared_ptr& table) { - STOP_IF_NOT_OK(batch_writer->WriteTable(*table)); -} - -// [[Rcpp::export]] -void ipc___RecordBatchWriter__Close( - const std::shared_ptr& batch_writer) { - STOP_IF_NOT_OK(batch_writer->Close()); -} diff --git a/r/src/table.cpp b/r/src/table.cpp index 4bdff167db9..f4ebd0466b9 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -67,3 +67,14 @@ std::shared_ptr Table__column(const std::shared_ptr int i) { return table->column(i); } + +// [[Rcpp::export]] +std::vector> Table__columns( + const std::shared_ptr& table) { + auto nc = table->num_columns(); + std::vector> res(nc); + for (int i = 0; i < nc; i++) { + res[i] = table->column(i); + } + return res; +} diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index cbf67e711d1..e456fe88654 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -19,35 +19,35 @@ context("arrow::Array") test_that("Array", { x <- array(1:10, 1:10, 1:5) - expect_equal(x$type(), int32()) + expect_equal(x$type, int32()) expect_equal(x$length(), 25L) expect_equal(x$as_vector(), c(1:10, 1:10, 1:5)) y <- x$Slice(10) - expect_equal(y$type(), int32()) + expect_equal(y$type, int32()) expect_equal(y$length(), 15L) expect_equal(y$as_vector(), c(1:10, 1:5)) expect_true(x$RangeEquals(y, 10, 24, 0)) z <- x$Slice(10, 5) - expect_equal(z$type(), int32()) + expect_equal(z$type, int32()) expect_equal(z$length(), 5L) expect_equal(z$as_vector(), c(1:5)) expect_true(x$RangeEquals(z, 10, 15, 0)) x_dbl <- array(c(1,2,3), c(4,5,6)) - expect_equal(x_dbl$type(), float64()) + expect_equal(x_dbl$type, float64()) expect_equal(x_dbl$length(), 6L) expect_equal(x_dbl$as_vector(), as.numeric(1:6)) y_dbl <- x_dbl$Slice(3) - expect_equal(y_dbl$type(), float64()) + expect_equal(y_dbl$type, float64()) expect_equal(y_dbl$length(), 3L) - expect_equal(y_dbl$offset(), 3L) + expect_equal(y_dbl$offset, 3L) expect_equal(y_dbl$as_vector(), as.numeric(4:6)) z_dbl <- x_dbl$Slice(3, 2) - expect_equal(z_dbl$type(), float64()) + expect_equal(z_dbl$type, float64()) expect_equal(z_dbl$length(), 2L) expect_equal(z_dbl$as_vector(), as.numeric(4:5)) }) @@ -138,7 +138,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { f <- factor(c("itsy", "bitsy", "spider", "spider")) arr_fac <- array(f) expect_equal(arr_fac$length(), 4L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -147,7 +147,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 3L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_equal(sl$as_vector(), f[2:4]) # with NA @@ -155,7 +155,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { # TODO: rm the suppressWarnings when https://github.com/r-lib/vctrs/issues/109 arr_fac <- suppressWarnings(array(f)) expect_equal(arr_fac$length(), 5L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -165,7 +165,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 4L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_equal(sl$as_vector(), f[2:5]) }) @@ -174,7 +174,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { f <- ordered(c("itsy", "bitsy", "spider", "spider")) arr_fac <- array(f) expect_equal(arr_fac$length(), 4L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -183,7 +183,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 3L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_equal(sl$as_vector(), f[2:4]) # with NA @@ -191,7 +191,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { # TODO: rm the suppressWarnings when https://github.com/r-lib/vctrs/issues/109 arr_fac <- suppressWarnings(array(f)) expect_equal(arr_fac$length(), 5L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) expect_true(arr_fac$IsValid(0)) expect_true(arr_fac$IsValid(1)) @@ -201,27 +201,27 @@ test_that("Array supports ordered factors (ARROW-3355)", { sl <- arr_fac$Slice(1) expect_equal(sl$length(), 4L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_equal(sl$as_vector(), f[2:5]) }) test_that("array supports Date (ARROW-3340)", { d <- Sys.Date() + 1:10 a <- array(d) - expect_equal(a$type(), date32()) + expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) d[5] <- NA a <- array(d) - expect_equal(a$type(), date32()) + expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) expect_true(a$IsNull(4)) d2 <- d + .5 a <- array(d2) - expect_equal(a$type(), date32()) + expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) expect_true(a$IsNull(4)) @@ -230,15 +230,15 @@ test_that("array supports Date (ARROW-3340)", { test_that("array supports POSIXct (ARROW-3340)", { times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10 a <- array(times) - expect_equal(a$type()$name(), "timestamp") - expect_equal(a$type()$unit(), unclass(TimeUnit$MICRO)) + expect_equal(a$type$name, "timestamp") + expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) expect_equal(as.numeric(a$as_vector()), as.numeric(times)) times[5] <- NA a <- array(times) - expect_equal(a$type()$name(), "timestamp") - expect_equal(a$type()$unit(), unclass(TimeUnit$MICRO)) + expect_equal(a$type$name, "timestamp") + expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) expect_equal(as.numeric(a$as_vector()), as.numeric(times)) expect_true(a$IsNull(4)) @@ -247,13 +247,13 @@ test_that("array supports POSIXct (ARROW-3340)", { test_that("array supports integer64", { x <- bit64::as.integer64(1:10) a <- array(x) - expect_equal(a$type(), int64()) + expect_equal(a$type, int64()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), x) x[4] <- NA a <- array(x) - expect_equal(a$type(), int64()) + expect_equal(a$type, int64()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), x) expect_true(a$IsNull(3L)) @@ -268,12 +268,12 @@ test_that("array$as_vector() correctly handles all NA inte64 (ARROW-3795)", { test_that("array supports difftime", { time <- hms::hms(56, 34, 12) a <- array(time, time) - expect_equal(a$type(), time32(unit = TimeUnit$SECOND)) + expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_equal(a$as_vector(), c(time, time)) a <- array(time, NA) - expect_equal(a$type(), time32(unit = TimeUnit$SECOND)) + expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_true(a$IsNull(1)) expect_equal(a$as_vector()[1], time) @@ -284,7 +284,7 @@ test_that("support for NaN (ARROW-3615)", { x <- c(1, NA, NaN, -1) y <- array(x) expect_true(y$IsValid(2)) - expect_equal(y$null_count(), 1L) + expect_equal(y$null_count, 1L) }) test_that("array ignores the type argument (ARROW-3784)", { @@ -300,10 +300,10 @@ test_that("integer types casts (ARROW-3741)", { a_int32 <- a$cast(int32()) a_int64 <- a$cast(int64()) - expect_equal(a_int8$type(), int8()) - expect_equal(a_int16$type(), int16()) - expect_equal(a_int32$type(), int32()) - expect_equal(a_int64$type(), int64()) + expect_equal(a_int8$type, int8()) + expect_equal(a_int16$type, int16()) + expect_equal(a_int32$type, int32()) + expect_equal(a_int64$type, int64()) expect_true(a_int8$IsNull(10L)) expect_true(a_int16$IsNull(10L)) expect_true(a_int32$IsNull(10L)) @@ -314,10 +314,10 @@ test_that("integer types casts (ARROW-3741)", { a_uint32 <- a$cast(uint32()) a_uint64 <- a$cast(uint64()) - expect_equal(a_uint8$type(), uint8()) - expect_equal(a_uint16$type(), uint16()) - expect_equal(a_uint32$type(), uint32()) - expect_equal(a_uint64$type(), uint64()) + expect_equal(a_uint8$type, uint8()) + expect_equal(a_uint16$type, uint16()) + expect_equal(a_uint32$type, uint32()) + expect_equal(a_uint64$type, uint64()) expect_true(a_uint8$IsNull(10L)) expect_true(a_uint16$IsNull(10L)) expect_true(a_uint32$IsNull(10L)) @@ -345,8 +345,8 @@ test_that("float types casts (ARROW-3741)", { a_f32 <- a$cast(float32()) a_f64 <- a$cast(float64()) - expect_equal(a_f32$type(), float32()) - expect_equal(a_f64$type(), float64()) + expect_equal(a_f32$type, float32()) + expect_equal(a_f64$type, float64()) expect_true(a_f32$IsNull(3L)) expect_true(a_f64$IsNull(3L)) @@ -359,5 +359,5 @@ test_that("cast to half float works", { skip("until https://issues.apache.org/jira/browse/ARROW-3802") a <- array(1:4) a_f16 <- a$cast(float16()) - expect_equal(a_16$type(), float16()) + expect_equal(a_16$type, float16()) }) diff --git a/r/tests/testthat/test-DataType.R b/r/tests/testthat/test-DataType.R index b479e5a3f67..fc9fc896eae 100644 --- a/r/tests/testthat/test-DataType.R +++ b/r/tests/testthat/test-DataType.R @@ -19,8 +19,8 @@ context("arrow::DataType") test_that("null type works as expected",{ x <- null() - expect_equal(x$id(), 0L) - expect_equal(x$name(), "null") + expect_equal(x$id, 0L) + expect_equal(x$name, "null") expect_equal(x$ToString(), "null") expect_true(x == x) expect_false(x == int8()) @@ -30,134 +30,134 @@ test_that("null type works as expected",{ test_that("boolean type work as expected",{ x <- boolean() - expect_equal(x$id(), 1L) - expect_equal(x$name(), "bool") + expect_equal(x$id, 1L) + expect_equal(x$name, "bool") expect_equal(x$ToString(), "bool") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 1L) + expect_equal(x$bit_width, 1L) }) test_that("int types works as expected",{ x <- uint8() - expect_equal(x$id(), 2L) - expect_equal(x$name(), "uint8") + expect_equal(x$id, 2L) + expect_equal(x$name, "uint8") expect_equal(x$ToString(), "uint8") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 8L) + expect_equal(x$bit_width, 8L) x <- int8() - expect_equal(x$id(), 3L) - expect_equal(x$name(), "int8") + expect_equal(x$id, 3L) + expect_equal(x$name, "int8") expect_equal(x$ToString(), "int8") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 8L) + expect_equal(x$bit_width, 8L) x <- uint16() - expect_equal(x$id(), 4L) - expect_equal(x$name(), "uint16") + expect_equal(x$id, 4L) + expect_equal(x$name, "uint16") expect_equal(x$ToString(), "uint16") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 16L) + expect_equal(x$bit_width, 16L) x <- int16() - expect_equal(x$id(), 5L) - expect_equal(x$name(), "int16") + expect_equal(x$id, 5L) + expect_equal(x$name, "int16") expect_equal(x$ToString(), "int16") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 16L) + expect_equal(x$bit_width, 16L) x <- uint32() - expect_equal(x$id(), 6L) - expect_equal(x$name(), "uint32") + expect_equal(x$id, 6L) + expect_equal(x$name, "uint32") expect_equal(x$ToString(), "uint32") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 32L) + expect_equal(x$bit_width, 32L) x <- int32() - expect_equal(x$id(), 7L) - expect_equal(x$name(), "int32") + expect_equal(x$id, 7L) + expect_equal(x$name, "int32") expect_equal(x$ToString(), "int32") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 32L) + expect_equal(x$bit_width, 32L) x <- uint64() - expect_equal(x$id(), 8L) - expect_equal(x$name(), "uint64") + expect_equal(x$id, 8L) + expect_equal(x$name, "uint64") expect_equal(x$ToString(), "uint64") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) x <- int64() - expect_equal(x$id(), 9L) - expect_equal(x$name(), "int64") + expect_equal(x$id, 9L) + expect_equal(x$name, "int64") expect_equal(x$ToString(), "int64") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) }) test_that("float types work as expected",{ x <- float16() - expect_equal(x$id(), 10L) - expect_equal(x$name(), "halffloat") + expect_equal(x$id, 10L) + expect_equal(x$name, "halffloat") expect_equal(x$ToString(), "halffloat") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 16L) + expect_equal(x$bit_width, 16L) x <- float32() - expect_equal(x$id(), 11L) - expect_equal(x$name(), "float") + expect_equal(x$id, 11L) + expect_equal(x$name, "float") expect_equal(x$ToString(), "float") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 32L) + expect_equal(x$bit_width, 32L) x <- float64() - expect_equal(x$id(), 12L) - expect_equal(x$name(), "double") + expect_equal(x$id, 12L) + expect_equal(x$name, "double") expect_equal(x$ToString(), "double") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) }) test_that("utf8 type works as expected",{ x <- utf8() - expect_equal(x$id(), 13L) - expect_equal(x$name(), "utf8") + expect_equal(x$id, 13L) + expect_equal(x$name, "utf8") expect_equal(x$ToString(), "string") expect_true(x == x) expect_false(x == null()) @@ -167,8 +167,8 @@ test_that("utf8 type works as expected",{ test_that("date types work as expected", { x <- date32() - expect_equal(x$id(), 16L) - expect_equal(x$name(), "date32") + expect_equal(x$id, 16L) + expect_equal(x$name, "date32") expect_equal(x$ToString(), "date32[day]") expect_true(x == x) expect_false(x == null()) @@ -177,8 +177,8 @@ test_that("date types work as expected", { expect_equal(x$unit(), unclass(DateUnit$DAY)) x <- date64() - expect_equal(x$id(), 17L) - expect_equal(x$name(), "date64") + expect_equal(x$id, 17L) + expect_equal(x$name, "date64") expect_equal(x$ToString(), "date64[ms]") expect_true(x == x) expect_false(x == null()) @@ -189,106 +189,106 @@ test_that("date types work as expected", { test_that("timestamp type works as expected", { x <- timestamp(TimeUnit$SECOND) - expect_equal(x$id(), 18L) - expect_equal(x$name(), "timestamp") + expect_equal(x$id, 18L) + expect_equal(x$name, "timestamp") expect_equal(x$ToString(), "timestamp[s]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$SECOND)) x <- timestamp(TimeUnit$MILLI) - expect_equal(x$id(), 18L) - expect_equal(x$name(), "timestamp") + expect_equal(x$id, 18L) + expect_equal(x$name, "timestamp") expect_equal(x$ToString(), "timestamp[ms]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$MILLI)) x <- timestamp(TimeUnit$MICRO) - expect_equal(x$id(), 18L) - expect_equal(x$name(), "timestamp") + expect_equal(x$id, 18L) + expect_equal(x$name, "timestamp") expect_equal(x$ToString(), "timestamp[us]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$MICRO)) x <- timestamp(TimeUnit$NANO) - expect_equal(x$id(), 18L) - expect_equal(x$name(), "timestamp") + expect_equal(x$id, 18L) + expect_equal(x$name, "timestamp") expect_equal(x$ToString(), "timestamp[ns]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$NANO)) }) test_that("time32 types work as expected", { x <- time32(TimeUnit$SECOND) - expect_equal(x$id(), 19L) - expect_equal(x$name(), "time32") + expect_equal(x$id, 19L) + expect_equal(x$name, "time32") expect_equal(x$ToString(), "time32[s]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 32L) + expect_equal(x$bit_width, 32L) expect_equal(x$unit(), unclass(TimeUnit$SECOND)) x <- time32(TimeUnit$MILLI) - expect_equal(x$id(), 19L) - expect_equal(x$name(), "time32") + expect_equal(x$id, 19L) + expect_equal(x$name, "time32") expect_equal(x$ToString(), "time32[ms]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 32L) + expect_equal(x$bit_width, 32L) expect_equal(x$unit(), unclass(TimeUnit$MILLI)) }) test_that("time64 types work as expected", { x <- time64(TimeUnit$MICRO) - expect_equal(x$id(), 20L) - expect_equal(x$name(), "time64") + expect_equal(x$id, 20L) + expect_equal(x$name, "time64") expect_equal(x$ToString(), "time64[us]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$unit(), unclass(TimeUnit$MICRO)) x <- time64(TimeUnit$NANO) - expect_equal(x$id(), 20L) - expect_equal(x$name(), "time64") + expect_equal(x$id, 20L) + expect_equal(x$name, "time64") expect_equal(x$ToString(), "time64[ns]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_children(), 0L) expect_equal(x$children(), list()) - expect_equal(x$bit_width(), 64L) + expect_equal(x$bit_width, 64L) expect_equal(x$unit(), unclass(TimeUnit$NANO)) }) test_that("list type works as expected", { x <- list_of(int32()) - expect_equal(x$id(), 23L) - expect_equal(x$name(), "list") + expect_equal(x$id, 23L) + expect_equal(x$name, "list") expect_equal(x$ToString(), "list") expect_true(x == x) expect_false(x == null()) @@ -301,8 +301,8 @@ test_that("list type works as expected", { test_that("struct type works as expected", { x <- struct(x = int32(), y = boolean()) - expect_equal(x$id(), 24L) - expect_equal(x$name(), "struct") + expect_equal(x$id, 24L) + expect_equal(x$name, "struct") expect_equal(x$ToString(), "struct") expect_true(x == x) expect_false(x == null()) @@ -318,9 +318,9 @@ test_that("DictionaryType works as expected (ARROW-3355)", { expect_equal(d, d) expect_true(d == d) expect_false(d == int32()) - expect_equal(d$id(), Type$DICTIONARY) - expect_equal(d$bit_width(), 32L) + expect_equal(d$id, Type$DICTIONARY) + expect_equal(d$bit_width, 32L) expect_equal(d$ToString(), "dictionary") - expect_equal(d$index_type(), int32()) - expect_equal(d$dictionary(), array(c("foo", "bar", "baz"))) + expect_equal(d$index_type, int32()) + expect_equal(d$dictionary, array(c("foo", "bar", "baz"))) }) diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index 348327783fd..f40bd8387ad 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -28,15 +28,15 @@ test_that("RecordBatch", { expect_true(batch == batch) expect_equal( - batch$schema(), + batch$schema, schema( int = int32(), dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), array(letters[1:10])) ) ) - expect_equal(batch$num_columns(), 5L) - expect_equal(batch$num_rows(), 10L) + expect_equal(batch$num_columns, 5L) + expect_equal(batch$num_rows, 10L) expect_equal(batch$column_name(0), "int") expect_equal(batch$column_name(1), "dbl") expect_equal(batch$column_name(2), "lgl") @@ -47,32 +47,32 @@ test_that("RecordBatch", { col_int <- batch$column(0) expect_true(inherits(col_int, 'arrow::Array')) expect_equal(col_int$as_vector(), tbl$int) - expect_equal(col_int$type(), int32()) + expect_equal(col_int$type, int32()) col_dbl <- batch$column(1) expect_true(inherits(col_dbl, 'arrow::Array')) expect_equal(col_dbl$as_vector(), tbl$dbl) - expect_equal(col_dbl$type(), float64()) + expect_equal(col_dbl$type, float64()) col_lgl <- batch$column(2) expect_true(inherits(col_dbl, 'arrow::Array')) expect_equal(col_lgl$as_vector(), tbl$lgl) - expect_equal(col_lgl$type(), boolean()) + expect_equal(col_lgl$type, boolean()) col_chr <- batch$column(3) expect_true(inherits(col_chr, 'arrow::Array')) expect_equal(col_chr$as_vector(), tbl$chr) - expect_equal(col_chr$type(), utf8()) + expect_equal(col_chr$type, utf8()) col_fct <- batch$column(4) expect_true(inherits(col_fct, 'arrow::Array')) expect_equal(col_fct$as_vector(), tbl$fct) - expect_equal(col_fct$type(), dictionary(int32(), array(letters[1:10]))) + expect_equal(col_fct$type, dictionary(int32(), array(letters[1:10]))) batch2 <- batch$RemoveColumn(0) expect_equal( - batch2$schema(), + batch2$schema, schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), array(letters[1:10]))) ) expect_equal(batch2$column(0), batch$column(1)) @@ -95,10 +95,10 @@ test_that("RecordBatch with 0 rows are supported", { ) batch <- record_batch(tbl) - expect_equal(batch$num_columns(), 5L) - expect_equal(batch$num_rows(), 0L) + expect_equal(batch$num_columns, 5L) + expect_equal(batch$num_rows, 0L) expect_equal( - batch$schema(), + batch$schema, schema( int = int32(), dbl = float64(), @@ -107,67 +107,6 @@ test_that("RecordBatch with 0 rows are supported", { fct = dictionary(int32(), array(c("a", "b"))) ) ) - - tf <- local_tempfile() - write_record_batch(batch, tf) - res <- read_record_batch(tf) - expect_equal(res, batch) -}) - -test_that("read_record_batch handles various streams (ARROW-3450, ARROW-3505)", { - tbl <- tibble::tibble( - int = 1:10, dbl = as.numeric(1:10), - lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), - chr = letters[1:10] - ) - batch <- record_batch(tbl) - tf <- local_tempfile() - write_record_batch(batch, tf) - - bytes <- write_record_batch(batch, raw()) - buf_reader <- buffer_reader(bytes) - - batch1 <- read_record_batch(tf) - batch2 <- read_record_batch(fs::path_abs(tf)) - - readable_file <- close_on_exit(file_open(tf)) - batch3 <- read_record_batch(readable_file) - - mmap_file <- close_on_exit(mmap_open(tf)) - batch4 <- read_record_batch(mmap_file) - batch5 <- read_record_batch(bytes) - batch6 <- read_record_batch(buf_reader) - - stream_reader <- record_batch_stream_reader(bytes) - batch7 <- read_record_batch(stream_reader) - expect_null(read_record_batch(stream_reader)) - - file_reader <- record_batch_file_reader(tf) - batch8 <- read_record_batch(file_reader) - expect_null(read_record_batch(file_reader, i = 2)) - - expect_equal(batch, batch1) - expect_equal(batch, batch2) - expect_equal(batch, batch3) - expect_equal(batch, batch4) - expect_equal(batch, batch5) - expect_equal(batch, batch6) - expect_equal(batch, batch7) - expect_equal(batch, batch8) -}) - -test_that("read_record_batch can handle Message, Schema parameters (ARROW-3499)", { - batch <- record_batch(tibble::tibble(x = 1:10)) - stream <- buffer_reader(write_record_batch(batch, raw())) - - # schema - message <- read_message(stream) - - # batch - message <- read_message(stream) - schema <- batch$schema() - batch2 <- read_record_batch(message, schema) - expect_equal(batch, batch2) }) test_that("RecordBatch cast (ARROW-3741)", { @@ -178,7 +117,7 @@ test_that("RecordBatch cast (ARROW-3741)", { s2 <- schema(x = int16(), y = int64()) batch2 <- batch$cast(s2) - expect_equal(batch2$schema(), s2) - expect_equal(batch2$column(0L)$type(), int16()) - expect_equal(batch2$column(1L)$type(), int64()) + expect_equal(batch2$schema, s2) + expect_equal(batch2$column(0L)$type, int16()) + expect_equal(batch2$column(1L)$type, int64()) }) diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index d5db9de2406..ec1be9b2348 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -24,29 +24,28 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { chr = letters[1:10] ) tab <- arrow::table(tbl) + tf <- local_tempfile() - write_table(tab, tf) + write_arrow(tab, tf) - bytes <- write_table(tab, raw()) - buf_reader <- buffer_reader(bytes) + bytes <- write_arrow(tab, raw()) tab1 <- read_table(tf) tab2 <- read_table(fs::path_abs(tf)) - readable_file <- close_on_exit(file_open(tf)) - tab3 <- read_table(readable_file) + readable_file <- close_on_exit(ReadableFile(tf)) + tab3 <- read_table(close_on_exit(RecordBatchFileReader(readable_file))) mmap_file <- close_on_exit(mmap_open(tf)) - tab4 <- read_table(mmap_file) + tab4 <- read_table(close_on_exit(RecordBatchFileReader(mmap_file))) tab5 <- read_table(bytes) - tab6 <- read_table(buf_reader) - stream_reader <- record_batch_stream_reader(bytes) - tab7 <- read_table(stream_reader) + stream_reader <- RecordBatchStreamReader(bytes) + tab6 <- read_table(stream_reader) - file_reader <- record_batch_file_reader(tf) - tab8 <- read_table(file_reader) + file_reader <- RecordBatchFileReader(tf) + tab7 <- read_table(file_reader) expect_equal(tab, tab1) expect_equal(tab, tab2) @@ -55,7 +54,6 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { expect_equal(tab, tab5) expect_equal(tab, tab6) expect_equal(tab, tab7) - expect_equal(tab, tab8) }) test_that("Table cast (ARROW-3741)", { @@ -66,7 +64,7 @@ test_that("Table cast (ARROW-3741)", { s2 <- schema(x = int16(), y = int64()) tab2 <- tab$cast(s2) - expect_equal(tab2$schema(), s2) - expect_equal(tab2$column(0L)$type(), int16()) - expect_equal(tab2$column(1L)$type(), int64()) + expect_equal(tab2$schema, s2) + expect_equal(tab2$column(0L)$type, int16()) + expect_equal(tab2$column(1L)$type, int64()) }) diff --git a/r/tests/testthat/test-arraydata.R b/r/tests/testthat/test-arraydata.R index 5d8f8f1dcaa..02ca9b85625 100644 --- a/r/tests/testthat/test-arraydata.R +++ b/r/tests/testthat/test-arraydata.R @@ -24,5 +24,5 @@ test_that("string vectors with only empty strings and nulls don't allocate a dat buffers <- a$data()$buffers expect_null(buffers[[1]]) expect_null(buffers[[3]]) - expect_equal(buffers[[2]]$size(), 8L) + expect_equal(buffers[[2]]$size, 8L) }) diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R index aa712b02680..26ec8dfde0a 100644 --- a/r/tests/testthat/test-buffer.R +++ b/r/tests/testthat/test-buffer.R @@ -21,26 +21,26 @@ test_that("arrow::Buffer can be created from raw vector", { vec <- raw(123) buf <- buffer(vec) expect_is(buf, "arrow::Buffer") - expect_equal(buf$size(), 123) + expect_equal(buf$size, 123) }) test_that("arrow::Buffer can be created from integer vector", { vec <- integer(17) buf <- buffer(vec) expect_is(buf, "arrow::Buffer") - expect_equal(buf$size(), 17 * 4) + expect_equal(buf$size, 17 * 4) }) test_that("arrow::Buffer can be created from numeric vector", { vec <- numeric(17) buf <- buffer(vec) expect_is(buf, "arrow::Buffer") - expect_equal(buf$size(), 17 * 8) + expect_equal(buf$size, 17 * 8) }) test_that("arrow::Buffer can be created from complex vector", { vec <- complex(3) buf <- buffer(vec) expect_is(buf, "arrow::Buffer") - expect_equal(buf$size(), 3 * 16) + expect_equal(buf$size, 3 * 16) }) diff --git a/r/tests/testthat/test-bufferreader.R b/r/tests/testthat/test-bufferreader.R index e7680a493fc..72d257101fa 100644 --- a/r/tests/testthat/test-bufferreader.R +++ b/r/tests/testthat/test-bufferreader.R @@ -18,9 +18,9 @@ context("arrow::BufferReader") test_that("BufferReader can be created from R objects", { - num <- buffer_reader(numeric(13)) - int <- buffer_reader(integer(13)) - raw <- buffer_reader(raw(16)) + num <- BufferReader(numeric(13)) + int <- BufferReader(integer(13)) + raw <- BufferReader(raw(16)) expect_is(num, "arrow::io::BufferReader") expect_is(int, "arrow::io::BufferReader") @@ -33,7 +33,7 @@ test_that("BufferReader can be created from R objects", { test_that("BufferReader can be created from Buffer", { buf <- buffer(raw(76)) - reader <- buffer_reader(buf) + reader <- BufferReader(buf) expect_is(reader, "arrow::io::BufferReader") expect_equal(reader$GetSize(), 76) diff --git a/r/tests/testthat/test-chunkedarray.R b/r/tests/testthat/test-chunkedarray.R index 8bca6201477..11a196d039d 100644 --- a/r/tests/testthat/test-chunkedarray.R +++ b/r/tests/testthat/test-chunkedarray.R @@ -19,38 +19,38 @@ context("arrow::ChunkedArray") test_that("ChunkedArray", { x <- chunked_array(1:10, 1:10, 1:5) - expect_equal(x$type(), int32()) - expect_equal(x$num_chunks(), 3L) + expect_equal(x$type, int32()) + expect_equal(x$num_chunks, 3L) expect_equal(x$length(), 25L) expect_equal(x$as_vector(), c(1:10, 1:10, 1:5)) y <- x$Slice(8) - expect_equal(y$type(), int32()) - expect_equal(y$num_chunks(), 3L) + expect_equal(y$type, int32()) + expect_equal(y$num_chunks, 3L) expect_equal(y$length(), 17L) expect_equal(y$as_vector(), c(9:10, 1:10, 1:5)) z <- x$Slice(8, 5) - expect_equal(z$type(), int32()) - expect_equal(z$num_chunks(), 2L) + expect_equal(z$type, int32()) + expect_equal(z$num_chunks, 2L) expect_equal(z$length(), 5L) expect_equal(z$as_vector(), c(9:10, 1:3)) x_dbl <- chunked_array(c(1,2,3), c(4,5,6)) - expect_equal(x_dbl$type(), float64()) - expect_equal(x_dbl$num_chunks(), 2L) + expect_equal(x_dbl$type, float64()) + expect_equal(x_dbl$num_chunks, 2L) expect_equal(x_dbl$length(), 6L) expect_equal(x_dbl$as_vector(), as.numeric(1:6)) y_dbl <- x_dbl$Slice(2) - expect_equal(y_dbl$type(), float64()) - expect_equal(y_dbl$num_chunks(), 2L) + expect_equal(y_dbl$type, float64()) + expect_equal(y_dbl$num_chunks, 2L) expect_equal(y_dbl$length(), 4L) expect_equal(y_dbl$as_vector(), as.numeric(3:6)) z_dbl <- x_dbl$Slice(2, 2) - expect_equal(z_dbl$type(), float64()) - expect_equal(z_dbl$num_chunks(), 2L) + expect_equal(z_dbl$type, float64()) + expect_equal(z_dbl$num_chunks, 2L) expect_equal(z_dbl$length(), 2L) expect_equal(z_dbl$as_vector(), as.numeric(3:4)) }) @@ -58,19 +58,19 @@ test_that("ChunkedArray", { test_that("ChunkedArray handles !!! splicing", { data <- list(1, 2, 3) x <- chunked_array(!!!data) - expect_equal(x$type(), float64()) - expect_equal(x$num_chunks(), 3L) + expect_equal(x$type, float64()) + expect_equal(x$num_chunks, 3L) }) test_that("ChunkedArray handles NA", { data <- list(1:10, c(NA, 2:10), c(1:3, NA, 5L)) x <- chunked_array(!!!data) - expect_equal(x$type(), int32()) - expect_equal(x$num_chunks(), 3L) + expect_equal(x$type, int32()) + expect_equal(x$num_chunks, 3L) expect_equal(x$length(), 25L) expect_equal(x$as_vector(), c(1:10, c(NA, 2:10), c(1:3, NA, 5))) - chunks <- x$chunks() + chunks <- x$chunks expect_equal(Array__Mask(chunks[[1]]), !is.na(data[[1]])) expect_equal(Array__Mask(chunks[[2]]), !is.na(data[[2]])) expect_equal(Array__Mask(chunks[[3]]), !is.na(data[[3]])) @@ -81,10 +81,10 @@ test_that("ChunkedArray supports logical vectors (ARROW-3341)", { data <- purrr::rerun(3, sample(c(TRUE, FALSE, NA), 100, replace = TRUE)) arr_lgl <- chunked_array(!!!data) expect_equal(arr_lgl$length(), 300L) - expect_equal(arr_lgl$null_count(), sum(unlist(map(data, is.na)))) + expect_equal(arr_lgl$null_count, sum(unlist(map(data, is.na)))) expect_identical(arr_lgl$as_vector(), purrr::flatten_lgl(data)) - chunks <- arr_lgl$chunks() + chunks <- arr_lgl$chunks expect_identical(data[[1]], chunks[[1]]$as_vector()) expect_identical(data[[2]], chunks[[2]]$as_vector()) expect_identical(data[[3]], chunks[[3]]$as_vector()) @@ -94,10 +94,10 @@ test_that("ChunkedArray supports logical vectors (ARROW-3341)", { data <- purrr::rerun(3, sample(c(TRUE, FALSE), 100, replace = TRUE)) arr_lgl <- chunked_array(!!!data) expect_equal(arr_lgl$length(), 300L) - expect_equal(arr_lgl$null_count(), sum(unlist(map(data, is.na)))) + expect_equal(arr_lgl$null_count, sum(unlist(map(data, is.na)))) expect_identical(arr_lgl$as_vector(), purrr::flatten_lgl(data)) - chunks <- arr_lgl$chunks() + chunks <- arr_lgl$chunks expect_identical(data[[1]], chunks[[1]]$as_vector()) expect_identical(data[[2]], chunks[[2]]$as_vector()) expect_identical(data[[3]], chunks[[3]]$as_vector()) @@ -112,10 +112,10 @@ test_that("ChunkedArray supports character vectors (ARROW-3339)", { ) arr_chr <- chunked_array(!!!data) expect_equal(arr_chr$length(), length(unlist(data))) - expect_equal(arr_chr$null_count(), 1L) + expect_equal(arr_chr$null_count, 1L) expect_equal(arr_chr$as_vector(), purrr::flatten_chr(data)) - chunks <- arr_chr$chunks() + chunks <- arr_chr$chunks expect_equal(data, purrr::map(chunks, ~.$as_vector())) }) @@ -123,14 +123,14 @@ test_that("ChunkedArray supports factors (ARROW-3716)", { f <- factor(c("itsy", "bitsy", "spider", "spider")) arr_fac <- chunked_array(f, f, f) expect_equal(arr_fac$length(), 12L) - expect_equal(arr_fac$type()$index_type(), int8()) + expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), vctrs::vec_c(f, f, f)) }) test_that("ChunkedArray supports dates (ARROW-3716)", { d <- Sys.Date() + 1:10 a <- chunked_array(d, d) - expect_equal(a$type(), date32()) + expect_equal(a$type, date32()) expect_equal(a$length(), 20L) expect_equal(a$as_vector(), c(d, d)) }) @@ -138,8 +138,8 @@ test_that("ChunkedArray supports dates (ARROW-3716)", { test_that("ChunkedArray supports POSIXct (ARROW-3716)", { times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10 a <- chunked_array(times, times) - expect_equal(a$type()$name(), "timestamp") - expect_equal(a$type()$unit(), unclass(TimeUnit$MICRO)) + expect_equal(a$type$name, "timestamp") + expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 20L) expect_equal(as.numeric(a$as_vector()), as.numeric(c(times, times))) }) @@ -147,7 +147,7 @@ test_that("ChunkedArray supports POSIXct (ARROW-3716)", { test_that("ChunkedArray supports integer64 (ARROW-3716)", { x <- bit64::as.integer64(1:10) a <- chunked_array(x, x) - expect_equal(a$type(), int64()) + expect_equal(a$type, int64()) expect_equal(a$length(), 20L) expect_equal(a$as_vector(), c(x,x)) }) @@ -155,7 +155,7 @@ test_that("ChunkedArray supports integer64 (ARROW-3716)", { test_that("ChunkedArray supports difftime", { time <- hms::hms(56, 34, 12) a <- chunked_array(time, time) - expect_equal(a$type(), time32(unit = TimeUnit$SECOND)) + expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_equal(a$as_vector(), c(time, time)) }) @@ -177,10 +177,10 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", { expect_is(a_int16, "arrow::ChunkedArray") expect_is(a_int32, "arrow::ChunkedArray") expect_is(a_int64, "arrow::ChunkedArray") - expect_equal(a_int8$type(), int8()) - expect_equal(a_int16$type(), int16()) - expect_equal(a_int32$type(), int32()) - expect_equal(a_int64$type(), int64()) + expect_equal(a_int8$type, int8()) + expect_equal(a_int16$type, int16()) + expect_equal(a_int32$type, int32()) + expect_equal(a_int64$type, int64()) a_uint8 <- a$cast(uint8()) a_uint16 <- a$cast(uint16()) @@ -192,8 +192,8 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", { expect_is(a_uint32, "arrow::ChunkedArray") expect_is(a_uint64, "arrow::ChunkedArray") - expect_equal(a_uint8$type(), uint8()) - expect_equal(a_uint16$type(), uint16()) - expect_equal(a_uint32$type(), uint32()) - expect_equal(a_uint64$type(), uint64()) + expect_equal(a_uint8$type, uint8()) + expect_equal(a_uint16$type, uint16()) + expect_equal(a_uint32$type, uint32()) + expect_equal(a_uint64$type, uint64()) }) diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index f6d9bee581d..715017fb586 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -29,7 +29,7 @@ test_that("feather read/write round trip", { expect_true(fs::file_exists(tf2)) tf3 <- local_tempfile() - stream <- close_on_exit(file_output_stream(tf3)) + stream <- close_on_exit(FileOutputStream(tf3)) write_feather(tib, stream) expect_true(fs::file_exists(tf3)) @@ -47,7 +47,7 @@ test_that("feather read/write round trip", { expect_is(tab4, "arrow::Table") # reading directly from arrow::io::ReadableFile - tab5 <- read_feather(file_open(tf3)) + tab5 <- read_feather(ReadableFile(tf3)) expect_is(tab5, "arrow::Table") expect_equal(tib, as_tibble(tab1)) diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R index 08bf4db36a5..aaa2875510a 100644 --- a/r/tests/testthat/test-field.R +++ b/r/tests/testthat/test-field.R @@ -19,8 +19,8 @@ context("arrow::Field") test_that("field() factory", { x <- field("x", int32()) - expect_equal(x$type(), int32()) - expect_equal(x$name(), "x") + expect_equal(x$type, int32()) + expect_equal(x$name, "x") expect_true(x == x) expect_false(x == field("x", int64())) }) diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R index fd05b860568..3fe5829f869 100644 --- a/r/tests/testthat/test-message.R +++ b/r/tests/testthat/test-message.R @@ -19,16 +19,12 @@ context("arrow::ipc::Message") test_that("read_message can read from input stream", { batch <- record_batch(tibble::tibble(x = 1:10)) - bytes <- write_record_batch(batch, raw()) - stream <- buffer_reader(bytes) + bytes <- batch$serialize() + stream <- BufferReader(bytes) message <- read_message(stream) - expect_equal(message$type(), MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") - - message <- read_message(stream) - expect_equal(message$type(), MessageType$RECORD_BATCH) + expect_is(message, "arrow::ipc::Message") + expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "arrow::Buffer") expect_is(message$metadata, "arrow::Buffer") diff --git a/r/tests/testthat/test-messagereader.R b/r/tests/testthat/test-messagereader.R index 4527a2882f0..5ff8277625d 100644 --- a/r/tests/testthat/test-messagereader.R +++ b/r/tests/testthat/test-messagereader.R @@ -19,16 +19,13 @@ context("arrow::ipc::MessageReader") test_that("MessageReader can be created from raw vectors", { batch <- record_batch(tibble::tibble(x = 1:10)) - bytes <- write_record_batch(batch, raw()) + bytes <- batch$serialize() - reader <- message_reader(bytes) - message <- reader$ReadNextMessage() - expect_equal(message$type(), MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + reader <- MessageReader(bytes) message <- reader$ReadNextMessage() - expect_equal(message$type(), MessageType$RECORD_BATCH) + expect_is(message, "arrow::ipc::Message") + expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "arrow::Buffer") expect_is(message$metadata, "arrow::Buffer") @@ -38,17 +35,17 @@ test_that("MessageReader can be created from raw vectors", { test_that("MessageReader can be created from input stream", { batch <- record_batch(tibble::tibble(x = 1:10)) - bytes <- write_record_batch(batch, raw()) - stream <- buffer_reader(bytes) + bytes <- batch$serialize() - reader <- message_reader(stream) - message <- reader$ReadNextMessage() - expect_equal(message$type(), MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + stream <- BufferReader(bytes) + expect_is(stream, "arrow::io::BufferReader") + + reader <- MessageReader(stream) + expect_is(reader, "arrow::ipc::MessageReader") message <- reader$ReadNextMessage() - expect_equal(message$type(), MessageType$RECORD_BATCH) + expect_is(message, "arrow::ipc::Message") + expect_equal(message$type, MessageType$RECORD_BATCH) expect_is(message$body, "arrow::Buffer") expect_is(message$metadata, "arrow::Buffer") diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R index 2af718ebe56..ffc14eba72b 100644 --- a/r/tests/testthat/test-read-write.R +++ b/r/tests/testthat/test-read-write.R @@ -25,24 +25,24 @@ test_that("arrow::table round trip", { ) tab <- arrow::table(tbl) - expect_equal(tab$num_columns(), 3L) - expect_equal(tab$num_rows(), 10L) + expect_equal(tab$num_columns, 3L) + expect_equal(tab$num_rows, 10L) # arrow::Column col_int <- tab$column(0) expect_equal(col_int$length(), 10L) - expect_equal(col_int$null_count(), 0L) - expect_equal(col_int$type(), int32()) + expect_equal(col_int$null_count, 0L) + expect_equal(col_int$type, int32()) # arrow::ChunkedArray chunked_array_int <- col_int$data() expect_equal(chunked_array_int$length(), 10L) - expect_equal(chunked_array_int$null_count(), 0L) + expect_equal(chunked_array_int$null_count, 0L) expect_equal(chunked_array_int$as_vector(), tbl$int) # arrow::Array - chunks_int <- chunked_array_int$chunks() - expect_equal(length(chunks_int), chunked_array_int$num_chunks()) + chunks_int <- chunked_array_int$chunks + expect_equal(length(chunks_int), chunked_array_int$num_chunks) for( i in seq_along(chunks_int)){ expect_equal(chunked_array_int$chunk(i-1L), chunks_int[[i]]) } @@ -50,18 +50,18 @@ test_that("arrow::table round trip", { # arrow::Column col_dbl <- tab$column(1) expect_equal(col_dbl$length(), 10L) - expect_equal(col_dbl$null_count(), 0L) - expect_equal(col_dbl$type(), float64()) + expect_equal(col_dbl$null_count, 0L) + expect_equal(col_dbl$type, float64()) # arrow::ChunkedArray chunked_array_dbl <- col_dbl$data() expect_equal(chunked_array_dbl$length(), 10L) - expect_equal(chunked_array_dbl$null_count(), 0L) + expect_equal(chunked_array_dbl$null_count, 0L) expect_equal(chunked_array_dbl$as_vector(), tbl$dbl) # arrow::Array - chunks_dbl <- chunked_array_dbl$chunks() - expect_equal(length(chunks_dbl), chunked_array_dbl$num_chunks()) + chunks_dbl <- chunked_array_dbl$chunks + expect_equal(length(chunks_dbl), chunked_array_dbl$num_chunks) for( i in seq_along(chunks_dbl)){ expect_equal(chunked_array_dbl$chunk(i-1L), chunks_dbl[[i]]) } @@ -69,18 +69,18 @@ test_that("arrow::table round trip", { # arrow::Colmumn col_raw <- tab$column(2) expect_equal(col_raw$length(), 10L) - expect_equal(col_raw$null_count(), 0L) - expect_equal(col_raw$type(), int8()) + expect_equal(col_raw$null_count, 0L) + expect_equal(col_raw$type, int8()) # arrow::ChunkedArray chunked_array_raw <- col_raw$data() expect_equal(chunked_array_raw$length(), 10L) - expect_equal(chunked_array_raw$null_count(), 0L) + expect_equal(chunked_array_raw$null_count, 0L) expect_equal(chunked_array_raw$as_vector(), tbl$raw) # arrow::Array - chunks_raw <- chunked_array_raw$chunks() - expect_equal(length(chunks_raw), chunked_array_raw$num_chunks()) + chunks_raw <- chunked_array_raw$chunks + expect_equal(length(chunks_raw), chunked_array_raw$num_chunks) for( i in seq_along(chunks_raw)){ expect_equal(chunked_array_raw$chunk(i-1L), chunks_raw[[i]]) } @@ -99,20 +99,20 @@ test_that("arrow::table round trip handles NA in integer and numeric", { ) tab <- arrow::table(tbl) - expect_equal(tab$num_columns(), 3L) - expect_equal(tab$num_rows(), 10L) + expect_equal(tab$num_columns, 3L) + expect_equal(tab$num_rows, 10L) expect_equal(tab$column(0)$length(), 10L) expect_equal(tab$column(1)$length(), 10L) expect_equal(tab$column(2)$length(), 10L) - expect_equal(tab$column(0)$null_count(), 1L) - expect_equal(tab$column(1)$null_count(), 2L) - expect_equal(tab$column(2)$null_count(), 0L) + expect_equal(tab$column(0)$null_count, 1L) + expect_equal(tab$column(1)$null_count, 2L) + expect_equal(tab$column(2)$null_count, 0L) - expect_equal(tab$column(0)$type(), int32()) - expect_equal(tab$column(1)$type(), float64()) - expect_equal(tab$column(2)$type(), int8()) + expect_equal(tab$column(0)$type, int32()) + expect_equal(tab$column(1)$type, float64()) + expect_equal(tab$column(2)$type, int8()) tf <- local_tempfile() write_arrow(tbl, tf) diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read_record_batch.R new file mode 100644 index 00000000000..8477b7a4c3d --- /dev/null +++ b/r/tests/testthat/test-read_record_batch.R @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +context("read_record_batch()") + +test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { + tab <- table(tibble::tibble( + int = 1:10, dbl = as.numeric(1:10), + lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), + chr = letters[1:10] + )) + tf <- local_tempfile() + + writer <- RecordBatchFileWriter(tf, tab$schema) + expect_is(writer, "arrow::ipc::RecordBatchFileWriter") + writer$write_table(tab) + writer$close() + tab2 <- read_table(tf) + expect_equal(tab, tab2) + + stream <- FileOutputStream(tf) + writer <- RecordBatchFileWriter(stream, tab$schema) + expect_is(writer, "arrow::ipc::RecordBatchFileWriter") + writer$write_table(tab) + writer$close() + tab3 <- read_table(tf) + expect_equal(tab, tab3) +}) + +test_that("read_record_batch() handles (raw|Buffer|InputStream, Schema) (ARROW-3450, ARROW-3505)", { + tbl <- tibble::tibble( + int = 1:10, dbl = as.numeric(1:10), + lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), + chr = letters[1:10] + ) + batch <- record_batch(tbl) + schema <- batch$schema + + raw <- batch$serialize() + batch2 <- read_record_batch(raw, schema) + batch3 <- read_record_batch(buffer(raw), schema) + batch4 <- read_record_batch(close_on_exit(BufferReader(raw)), schema) + + expect_equal(batch, batch2) + expect_equal(batch, batch3) + expect_equal(batch, batch4) +}) + +test_that("read_record_batch() can handle (Message, Schema) parameters (ARROW-3499)", { + batch <- record_batch(tibble::tibble(x = 1:10)) + schema <- batch$schema + + raw <- batch$serialize() + stream <- close_on_exit(BufferReader(raw)) + + message <- read_message(stream) + batch2 <- read_record_batch(message, schema) + expect_equal(batch, batch2) +}) diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index d40fbfa36bc..2f2d3ee84e7 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -17,18 +17,30 @@ context("arrow::Schema") -test_that("reading schema from raw vector", { +test_that("reading schema from Buffer", { + # TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter + # maybe there is an easier way to serialize a schema batch <- record_batch(tibble::tibble(x = 1:10)) - bytes <- write_record_batch(batch, raw()) - schema <- read_schema(bytes) - expect_equal(schema, batch$schema()) -}) + expect_is(batch, "arrow::RecordBatch") -test_that("reading schema from streams", { - batch <- record_batch(tibble::tibble(x = 1:10)) - bytes <- write_record_batch(batch, raw()) - stream <- buffer_reader(bytes) + stream <- BufferOutputStream() + writer <- RecordBatchStreamWriter(stream, batch$schema) + expect_is(writer, "arrow::ipc::RecordBatchStreamWriter") + writer$close() + + buffer <- stream$getvalue() + expect_is(buffer, "arrow::Buffer") + + reader <- MessageReader(buffer) + expect_is(reader, "arrow::ipc::MessageReader") + + message <- reader$ReadNextMessage() + expect_is(message, "arrow::ipc::Message") + expect_equal(message$type, MessageType$SCHEMA) - schema <- read_schema(stream) - expect_equal(schema, batch$schema()) + stream <- BufferReader(buffer) + expect_is(stream, "arrow::io::BufferReader") + message <- read_message(stream) + expect_is(message, "arrow::ipc::Message") + expect_equal(message$type, MessageType$SCHEMA) })