diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 53f9ac20c3b8..deb3d2fe1dd5 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -112,6 +112,8 @@ jobs: cd arrow # re-run tests on arrow workspace with additional features cargo test --features=prettyprint + # run test on arrow with minimal set of features + cargo test --no-default-features cargo run --example builders cargo run --example dynamic_types cargo run --example read_csv diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 6d532ced79cb..7781584e00ca 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -41,8 +41,8 @@ serde_derive = "1.0" serde_json = { version = "1.0", features = ["preserve_order"] } indexmap = "1.6" rand = "0.7" -csv = "1.1" num = "0.4" +csv_crate = { version = "1.1", optional = true, package="csv" } regex = "1.3" lazy_static = "1.4" packed_simd = { version = "0.3.4", optional = true, package = "packed_simd_2" } @@ -54,8 +54,9 @@ lexical-core = "^0.7" multiversion = "0.6.1" [features] -default = [] +default = ["csv"] avx512 = [] +csv = ["csv_crate"] simd = ["packed_simd"] prettyprint = ["prettytable-rs"] # this is only intended to be used in single-threaded programs: it verifies that diff --git a/arrow/benches/csv_writer.rs b/arrow/benches/csv_writer.rs index 9b0185309388..50b94d6836d9 100644 --- a/arrow/benches/csv_writer.rs +++ b/arrow/benches/csv_writer.rs @@ -21,6 +21,7 @@ extern crate criterion; use criterion::*; use arrow::array::*; +#[cfg(feature = "csv")] use arrow::csv; use arrow::datatypes::*; use arrow::record_batch::RecordBatch; @@ -28,38 +29,41 @@ use std::fs::File; use std::sync::Arc; fn record_batches_to_csv() { - let schema = Schema::new(vec![ - Field::new("c1", DataType::Utf8, false), - Field::new("c2", DataType::Float64, true), - Field::new("c3", DataType::UInt32, false), - Field::new("c3", DataType::Boolean, true), - ]); + #[cfg(feature = "csv")] + { + let schema = Schema::new(vec![ + Field::new("c1", DataType::Utf8, false), + Field::new("c2", DataType::Float64, true), + Field::new("c3", DataType::UInt32, false), + Field::new("c3", DataType::Boolean, true), + ]); - let c1 = StringArray::from(vec![ - "Lorem ipsum dolor sit amet", - "consectetur adipiscing elit", - "sed do eiusmod tempor", - ]); - let c2 = PrimitiveArray::::from(vec![ - Some(123.564532), - None, - Some(-556132.25), - ]); - let c3 = PrimitiveArray::::from(vec![3, 2, 1]); - let c4 = BooleanArray::from(vec![Some(true), Some(false), None]); + let c1 = StringArray::from(vec![ + "Lorem ipsum dolor sit amet", + "consectetur adipiscing elit", + "sed do eiusmod tempor", + ]); + let c2 = PrimitiveArray::::from(vec![ + Some(123.564532), + None, + Some(-556132.25), + ]); + let c3 = PrimitiveArray::::from(vec![3, 2, 1]); + let c4 = BooleanArray::from(vec![Some(true), Some(false), None]); - let b = RecordBatch::try_new( - Arc::new(schema), - vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)], - ) - .unwrap(); - let file = File::create("target/bench_write_csv.csv").unwrap(); - let mut writer = csv::Writer::new(file); - let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b]; - #[allow(clippy::unit_arg)] - criterion::black_box(for batch in batches { - writer.write(batch).unwrap() - }); + let b = RecordBatch::try_new( + Arc::new(schema), + vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)], + ) + .unwrap(); + let file = File::create("target/bench_write_csv.csv").unwrap(); + let mut writer = csv::Writer::new(file); + let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b]; + #[allow(clippy::unit_arg)] + criterion::black_box(for batch in batches { + writer.write(batch).unwrap() + }); + } } fn criterion_benchmark(c: &mut Criterion) { diff --git a/arrow/examples/read_csv.rs b/arrow/examples/read_csv.rs index 9e2b9c34c86a..506b89887657 100644 --- a/arrow/examples/read_csv.rs +++ b/arrow/examples/read_csv.rs @@ -20,24 +20,29 @@ extern crate arrow; use std::fs::File; use std::sync::Arc; +#[cfg(feature = "csv")] use arrow::csv; use arrow::datatypes::{DataType, Field, Schema}; #[cfg(feature = "prettyprint")] use arrow::util::pretty::print_batches; fn main() { - let schema = Schema::new(vec![ - Field::new("city", DataType::Utf8, false), - Field::new("lat", DataType::Float64, false), - Field::new("lng", DataType::Float64, false), - ]); + #[cfg(feature = "csv")] + { + let schema = Schema::new(vec![ + Field::new("city", DataType::Utf8, false), + Field::new("lat", DataType::Float64, false), + Field::new("lng", DataType::Float64, false), + ]); - let file = File::open("test/data/uk_cities.csv").unwrap(); + let file = File::open("test/data/uk_cities.csv").unwrap(); - let mut csv = csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None); - let _batch = csv.next().unwrap().unwrap(); - #[cfg(feature = "prettyprint")] - { - print_batches(&[_batch]).unwrap(); + let mut csv = + csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None); + let _batch = csv.next().unwrap().unwrap(); + #[cfg(feature = "prettyprint")] + { + print_batches(&[_batch]).unwrap(); + } } } diff --git a/arrow/examples/read_csv_infer_schema.rs b/arrow/examples/read_csv_infer_schema.rs index 93253e72cff2..11f8cfb7f7d2 100644 --- a/arrow/examples/read_csv_infer_schema.rs +++ b/arrow/examples/read_csv_infer_schema.rs @@ -17,20 +17,24 @@ extern crate arrow; +#[cfg(feature = "csv")] use arrow::csv; #[cfg(feature = "prettyprint")] use arrow::util::pretty::print_batches; use std::fs::File; fn main() { - let file = File::open("test/data/uk_cities_with_headers.csv").unwrap(); - let builder = csv::ReaderBuilder::new() - .has_header(true) - .infer_schema(Some(100)); - let mut csv = builder.build(file).unwrap(); - let _batch = csv.next().unwrap().unwrap(); - #[cfg(feature = "prettyprint")] + #[cfg(feature = "csv")] { - print_batches(&[_batch]).unwrap(); + let file = File::open("test/data/uk_cities_with_headers.csv").unwrap(); + let builder = csv::ReaderBuilder::new() + .has_header(true) + .infer_schema(Some(100)); + let mut csv = builder.build(file).unwrap(); + let _batch = csv.next().unwrap().unwrap(); + #[cfg(feature = "prettyprint")] + { + print_batches(&[_batch]).unwrap(); + } } } diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index 00f1d7f4b688..5b9fb5df2929 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -49,8 +49,6 @@ use std::fs::File; use std::io::{Read, Seek, SeekFrom}; use std::sync::Arc; -use csv as csv_crate; - use crate::array::{ ArrayRef, BooleanArray, DictionaryArray, PrimitiveArray, StringArray, }; @@ -58,7 +56,7 @@ use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::record_batch::RecordBatch; -use self::csv_crate::{ByteRecord, StringRecord}; +use csv_crate::{ByteRecord, StringRecord}; lazy_static! { static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap(); diff --git a/arrow/src/csv/writer.rs b/arrow/src/csv/writer.rs index f2f4ce813c65..aa0ed675221f 100644 --- a/arrow/src/csv/writer.rs +++ b/arrow/src/csv/writer.rs @@ -65,8 +65,6 @@ //! } //! ``` -use csv as csv_crate; - use std::io::Write; use crate::datatypes::*; diff --git a/arrow/src/error.rs b/arrow/src/error.rs index 6bfa077f4abd..86896c016882 100644 --- a/arrow/src/error.rs +++ b/arrow/src/error.rs @@ -19,7 +19,6 @@ use std::fmt::{Debug, Display, Formatter}; use std::io::Write; -use csv as csv_crate; use std::error::Error; /// Many different operations in the `arrow` crate return this error type. @@ -59,6 +58,7 @@ impl From<::std::io::Error> for ArrowError { } } +#[cfg(feature = "csv")] impl From for ArrowError { fn from(error: csv_crate::Error) -> Self { match error.kind() { diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs index 30f968c99798..687ce1e8d7da 100644 --- a/arrow/src/lib.rs +++ b/arrow/src/lib.rs @@ -149,6 +149,7 @@ pub mod bitmap; pub mod buffer; mod bytes; pub mod compute; +#[cfg(feature = "csv")] pub mod csv; pub mod datatypes; pub mod error; diff --git a/arrow/src/util/string_writer.rs b/arrow/src/util/string_writer.rs index 2a8175d15623..4c61f183e206 100644 --- a/arrow/src/util/string_writer.rs +++ b/arrow/src/util/string_writer.rs @@ -23,6 +23,8 @@ //! Example: //! //! ``` +//! #[cfg(feature = "csv")] +//! { //! use arrow::array::*; //! use arrow::csv; //! use arrow::datatypes::*; @@ -58,6 +60,7 @@ //! let sw = StringWriter::new(); //! let mut writer = csv::Writer::new(sw); //! writer.write(&batch).unwrap(); +//! } //! ``` use std::io::{Error, ErrorKind, Result, Write};