Skip to content

Commit 4a1eda5

Browse files
enhancement(clickhouse sink): add support for Arrow complex types (#24409)
* refactor: split arrow encoder into smaller files * enhancement(clickhouse sink): add support for complex types * refactor: use idiomatic Arrow types instead of Arc * refactor: unify primitives, decimal, temporal + complex logic * refactor: use append_option where possible * refactor: use iterator method for building record batch * chore: remove duplicated code in arrow tests * chore: update docs * chore: add changelog fragment * chore: add support for named tuples * chore: improve handling for to_arrow * refactor: use serde_arrow for Arrow encoding * refactor: use nom for clickhouse type parsing * refactor: replace to_arrow() with TryFrom impl * refactor: use iterator chain instead of loops * chore: avoid fallible unwrap when fetching schema * refactor: implement FromStr for ClickHouseType * refactor: use String directly * refactor: use try_collect for schema parsing * refactor: simplify schema parsing with TryFrom for Field * refactor: use form_urlencoded for query string building * refactor: return to single file format/arrow.rs * refactor: defer to serde_arrow error handling * chore: add recursion warning for try_from Co-authored-by: Thomas <[email protected]> * chore: add validation for DataType::Map * refactor: eliminate unreachable!() in ClickHouseType conversion * chore: clippy * chore: update license * Update licenses --------- Co-authored-by: Thomas <[email protected]> Co-authored-by: Thomas <[email protected]>
1 parent 1040f78 commit 4a1eda5

File tree

11 files changed

+1783
-1846
lines changed

11 files changed

+1783
-1846
lines changed

Cargo.lock

Lines changed: 46 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -852,7 +852,7 @@ sinks-azure_blob = ["dep:azure_core", "dep:azure_storage_blob"]
852852
sinks-azure_monitor_logs = []
853853
sinks-blackhole = []
854854
sinks-chronicle = []
855-
sinks-clickhouse = ["dep:rust_decimal", "codecs-arrow"]
855+
sinks-clickhouse = ["dep:nom", "dep:rust_decimal", "codecs-arrow"]
856856
sinks-console = []
857857
sinks-databend = ["dep:databend-client"]
858858
sinks-datadog_events = []

LICENSE-3rdparty.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ bytecheck,https://github.com/djkoloski/bytecheck,MIT,David Koloski <djkoloski@gm
141141
bytecheck_derive,https://github.com/djkoloski/bytecheck,MIT,David Koloski <[email protected]>
142142
bytecount,https://github.com/llogiq/bytecount,Apache-2.0 OR MIT,"Andre Bogus <[email protected]>, Joshua Landau <[email protected]>"
143143
bytemuck,https://github.com/Lokathor/bytemuck,Zlib OR Apache-2.0 OR MIT,Lokathor <[email protected]>
144+
bytemuck_derive,https://github.com/Lokathor/bytemuck,Zlib OR Apache-2.0 OR MIT,Lokathor <[email protected]>
144145
byteorder,https://github.com/BurntSushi/byteorder,Unlicense OR MIT,Andrew Gallant <[email protected]>
145146
bytes,https://github.com/carllerche/bytes,MIT,Carl Lerche <[email protected]>
146147
bytes,https://github.com/tokio-rs/bytes,MIT,"Carl Lerche <[email protected]>, Sean McArthur <[email protected]>"
@@ -458,6 +459,7 @@ macro_magic_core,https://github.com/sam0x17/macro_magic,MIT,The macro_magic_core
458459
macro_magic_core_macros,https://github.com/sam0x17/macro_magic,MIT,The macro_magic_core_macros Authors
459460
macro_magic_macros,https://github.com/sam0x17/macro_magic,MIT,The macro_magic_macros Authors
460461
malloc_buf,https://github.com/SSheldon/malloc_buf,MIT,Steven Sheldon
462+
marrow,https://github.com/chmp/marrow,MIT,Christopher Prohm <[email protected]>
461463
match_cfg,https://github.com/gnzlbg/match_cfg,MIT OR Apache-2.0,gnzlbg <[email protected]>
462464
matchers,https://github.com/hawkw/matchers,MIT,Eliza Weisman <[email protected]>
463465
matchit,https://github.com/ibraheemdev/matchit,MIT AND BSD-3-Clause,Ibraheem Ahmed <[email protected]>
@@ -691,6 +693,7 @@ serde,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar <eric
691693
serde-aux,https://github.com/iddm/serde-aux,MIT,Victor Polevoy <[email protected]>
692694
serde-toml-merge,https://github.com/jdrouet/serde-toml-merge,MIT,Jeremie Drouet <[email protected]>
693695
serde-value,https://github.com/arcnmx/serde-value,MIT,arcnmx
696+
serde_arrow,https://github.com/chmp/serde_arrow,MIT,Christopher Prohm <[email protected]>
694697
serde_bytes,https://github.com/serde-rs/bytes,MIT OR Apache-2.0,David Tolnay <[email protected]>
695698
serde_core,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar <[email protected]>, David Tolnay <[email protected]>"
696699
serde_derive,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar <[email protected]>, David Tolnay <[email protected]>"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The `clickhouse` sink now supports complex data types (Array, Map, and Tuple) when using the `arrow_stream` format.
2+
3+
authors: benjamin-awd

lib/codecs/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ path = "tests/bin/generate-avro-fixtures.rs"
1414

1515
[dependencies]
1616
apache-avro = { version = "0.20.0", default-features = false }
17-
arrow = { version = "56.2.0", default-features = false, features = ["ipc"] }
17+
arrow = { version = "56.2.0", default-features = false, features = ["ipc"], optional = true }
1818
async-trait.workspace = true
1919
bytes.workspace = true
2020
chrono.workspace = true
@@ -38,6 +38,7 @@ regex.workspace = true
3838
serde.workspace = true
3939
serde_with = { version = "3.14.0", default-features = false, features = ["std", "macros", "chrono_0_4"] }
4040
serde_json.workspace = true
41+
serde_arrow = { version = "0.13", features = ["arrow-56"], optional = true }
4142
serde-aux = { version = "4.5", optional = true }
4243
smallvec = { version = "1", default-features = false, features = ["union"] }
4344
snafu.workspace = true
@@ -67,7 +68,7 @@ uuid.workspace = true
6768
vrl.workspace = true
6869

6970
[features]
70-
arrow = []
71+
arrow = ["dep:arrow", "dep:serde_arrow"]
7172
opentelemetry = ["dep:opentelemetry-proto"]
7273
syslog = ["dep:syslog_loose", "dep:strum", "dep:derive_more", "dep:serde-aux", "dep:toml"]
7374
test = []

0 commit comments

Comments
 (0)