Skip to content
Merged
13 changes: 10 additions & 3 deletions cpp/src/parquet/encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2372,7 +2372,7 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DTyp
// DeltaByteArrayDecoder
void SetDecoder(int num_values, std::shared_ptr<::arrow::bit_util::BitReader> decoder) {
this->num_values_ = num_values;
decoder_ = decoder;
decoder_ = std::move(decoder);
InitHeader();
}

Expand Down Expand Up @@ -2459,7 +2459,9 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DTyp
ParquetException::EofException();
}
if (bit_width_data[i] > kMaxDeltaBitWidth) {
throw ParquetException("delta bit width larger than integer bit width");
throw ParquetException("delta bit width " + std::to_string(bit_width_data[i]) +
" larger than integer bit width " +
std::to_string(kMaxDeltaBitWidth));
}
}
mini_block_idx_ = 0;
Expand All @@ -2479,7 +2481,12 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DTyp
if (ARROW_PREDICT_FALSE(values_current_mini_block_ == 0)) {
if (ARROW_PREDICT_FALSE(!block_initialized_)) {
buffer[i++] = last_value_;
if (ARROW_PREDICT_FALSE(i == max_values)) break;
if (ARROW_PREDICT_FALSE(i == max_values)) {
if (i != static_cast<int>(total_value_count_)) {
InitBlock();
}
break;
}
Comment on lines 2481 to +2497
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this difficult to read, could you add a comment about what this logic is doing?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be honest, the logic is a bit trickey here, I will try to explain it. I'm bad in English, so maybe you can edit it directly if you can explain it better.

InitBlock();
} else {
++mini_block_idx_;
Expand Down
34 changes: 34 additions & 0 deletions cpp/src/parquet/encoding_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,29 @@ class TestDeltaBitPackEncoding : public TestEncodingBase<Type> {
CheckRoundtripSpaced(valid_bits, valid_bits_offset);
}

void ExecuteSteps(int nvalues, int repeats, int read_batch) {
this->InitData(nvalues, repeats);
auto encoder =
MakeTypedEncoder<Type>(Encoding::DELTA_BINARY_PACKED, false, descr_.get());
auto decoder = MakeTypedDecoder<Type>(Encoding::DELTA_BINARY_PACKED, descr_.get());

for (size_t i = 0; i < ROUND_TRIP_TIMES; ++i) {
encoder->Put(draws_, num_values_);
encode_buffer_ = encoder->FlushValues();

decoder->SetData(num_values_, encode_buffer_->data(),
static_cast<int>(encode_buffer_->size()));
int values_decoded_sum = 0;
while (values_decoded_sum < num_values_) {
int values_decoded =
decoder->Decode(decode_buf_ + values_decoded_sum, read_batch);
values_decoded_sum += values_decoded;
}
ASSERT_EQ(num_values_, values_decoded_sum);
ASSERT_NO_FATAL_FAILURE(VerifyResults<c_type>(decode_buf_, draws_, num_values_));
}
}

void CheckRoundtrip() override {
auto encoder =
MakeTypedEncoder<Type>(Encoding::DELTA_BINARY_PACKED, false, descr_.get());
Expand Down Expand Up @@ -1413,5 +1436,16 @@ TYPED_TEST(TestDeltaBitPackEncoding, BasicRoundTrip) {
}
}

TYPED_TEST(TestDeltaBitPackEncoding, ZeroRoundTrip) {
ASSERT_NO_FATAL_FAILURE(this->Execute(1, 1));
ASSERT_NO_FATAL_FAILURE(this->Execute(1, 2));
ASSERT_NO_FATAL_FAILURE(this->Execute(2, 2));
ASSERT_NO_FATAL_FAILURE(this->ExecuteSteps(1, 1, 1));
ASSERT_NO_FATAL_FAILURE(this->ExecuteSteps(1, 2, 1));
ASSERT_NO_FATAL_FAILURE(this->ExecuteSteps(2, 2, 1));
ASSERT_NO_FATAL_FAILURE(this->ExecuteSteps(10, 10, 1));
ASSERT_NO_FATAL_FAILURE(this->ExecuteSteps(10, 10, 3));
}

} // namespace test
} // namespace parquet