Skip to content

Commit b85f20b

Browse files
authored
GH-48992: [Ruby] Add support for writing large UTF-8 array (#48993)
### Rationale for this change It's a large variant of UTF-8 array. ### What changes are included in this PR? * Add `ArrowFormat::LargeUTF8Type#to_flatbuffers` * Add support for large UTF-8 array of `#values` and `#raw_records` ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #48992 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 685873e commit b85f20b

File tree

7 files changed

+48
-0
lines changed

7 files changed

+48
-0
lines changed

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,10 @@ def build_array(size, validity_buffer, offsets_buffer, values_buffer)
574574
offsets_buffer,
575575
values_buffer)
576576
end
577+
578+
def to_flatbuffers
579+
FB::LargeUtf8::Data.new
580+
end
577581
end
578582

579583
class FixedSizeBinaryType < Type

ruby/red-arrow-format/test/test-writer.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ def convert_type(red_arrow_type)
5252
ArrowFormat::LargeBinaryType.singleton
5353
when Arrow::StringDataType
5454
ArrowFormat::UTF8Type.singleton
55+
when Arrow::LargeStringDataType
56+
ArrowFormat::LargeUTF8Type.singleton
5557
else
5658
raise "Unsupported type: #{red_arrow_type.inspect}"
5759
end
@@ -298,6 +300,17 @@ def test_write
298300
@values)
299301
end
300302
end
303+
304+
sub_test_case("LargeString") do
305+
def build_array
306+
Arrow::LargeStringArray.new(["Hello", nil, "World"])
307+
end
308+
309+
def test_write
310+
assert_equal(["Hello", nil, "World"],
311+
@values)
312+
end
313+
end
301314
end
302315
end
303316
end

ruby/red-arrow/ext/arrow/converters.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,14 @@ namespace red_arrow {
175175
length);
176176
}
177177

178+
inline VALUE convert(const arrow::LargeStringArray& array,
179+
const int64_t i) {
180+
int64_t length;
181+
const auto value = array.GetValue(i, &length);
182+
return rb_utf8_str_new(reinterpret_cast<const char*>(value),
183+
length);
184+
}
185+
178186
inline VALUE convert(const arrow::FixedSizeBinaryArray& array,
179187
const int64_t i) {
180188
return rb_enc_str_new(reinterpret_cast<const char*>(array.Value(i)),

ruby/red-arrow/ext/arrow/raw-records.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ namespace red_arrow {
9090
VISIT(Binary)
9191
VISIT(LargeBinary)
9292
VISIT(String)
93+
VISIT(LargeString)
9394
VISIT(FixedSizeBinary)
9495
VISIT(Date32)
9596
VISIT(Date64)
@@ -227,6 +228,7 @@ namespace red_arrow {
227228
VISIT(Binary)
228229
VISIT(LargeBinary)
229230
VISIT(String)
231+
VISIT(LargeString)
230232
VISIT(FixedSizeBinary)
231233
VISIT(Date32)
232234
VISIT(Date64)

ruby/red-arrow/ext/arrow/values.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ namespace red_arrow {
7171
VISIT(Binary)
7272
VISIT(LargeBinary)
7373
VISIT(String)
74+
VISIT(LargeString)
7475
VISIT(FixedSizeBinary)
7576
VISIT(Date32)
7677
VISIT(Date64)

ruby/red-arrow/test/raw-records/test-basic-arrays.rb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,16 @@ def test_string
177177
assert_equal(records, actual_records(target))
178178
end
179179

180+
def test_large_string
181+
records = [
182+
["Ruby"],
183+
[nil],
184+
["\u3042"], # U+3042 HIRAGANA LETTER A
185+
]
186+
target = build({column: :large_string}, records)
187+
assert_equal(records, actual_records(target))
188+
end
189+
180190
def test_date32
181191
records = [
182192
[Date.new(1960, 1, 1)],

ruby/red-arrow/test/values/test-basic-arrays.rb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,16 @@ def test_string
167167
assert_equal(values, target.values)
168168
end
169169

170+
def test_large_string
171+
values = [
172+
"Ruby",
173+
nil,
174+
"\u3042", # U+3042 HIRAGANA LETTER A
175+
]
176+
target = build(Arrow::LargeStringArray.new(values))
177+
assert_equal(values, target.values)
178+
end
179+
170180
def test_date32
171181
values = [
172182
Date.new(1960, 1, 1),

0 commit comments

Comments
 (0)