Skip to content

Commit 748a272

Browse files
kyligence-gitkyligence-gitChang chen
authored
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20250629) (#10080)
* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20250629) * Fix Build due to ClickHouse/ClickHouse#80931 * Fix Build due to ClickHouse/ClickHouse#81976 * Fix Build due to ClickHouse/ClickHouse#82508 * Try to Fix issue caused by ClickHouse/ClickHouse#81754 see ClickHouse/ClickHouse#82379 * Fix UT due to ClickHouse/ClickHouse#82358 --------- Co-authored-by: kyligence-git <[email protected]> Co-authored-by: Chang chen <[email protected]>
1 parent 7e17ea3 commit 748a272

26 files changed

+83
-69
lines changed

cpp-ch/clickhouse.version

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
CH_ORG=Kyligence
2-
CH_BRANCH=rebase_ch/20250621
3-
CH_COMMIT=8960b38d0bf
2+
CH_BRANCH=rebase_ch/20250629
3+
CH_COMMIT=addbf00cfd7

cpp-ch/local-engine/Common/DebugUtils.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ void dumpMessage(const google::protobuf::Message & message, const char * type, b
361361

362362
if (!force && !logger->debug())
363363
return;
364-
pb_util::JsonOptions options;
364+
pb_util::JsonPrintOptions options;
365365
std::string json;
366366
if (auto s = MessageToJsonString(message, &json, options); !s.ok())
367367
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Can not convert {} to Json", type);

cpp-ch/local-engine/Disks/registerGlutenDisks.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ void registerGlutenDisks(bool global_skip_access_check)
8787
config_prefix,
8888
object_storage_creator);
8989

90-
disk->startup(context, skip_access_check);
90+
disk->startup(skip_access_check);
9191
return disk;
9292
};
9393

@@ -134,7 +134,7 @@ void registerGlutenDisks(bool global_skip_access_check)
134134
config_prefix,
135135
object_storage_creator);
136136

137-
disk->startup(context, skip_access_check);
137+
disk->startup(skip_access_check);
138138
return disk;
139139
};
140140

cpp-ch/local-engine/Join/StorageJoinFromReadBuffer.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,13 @@ StorageJoinFromReadBuffer::StorageJoinFromReadBuffer(
101101
collectAllInputs(data, right_sample_block);
102102
}
103103

104-
void StorageJoinFromReadBuffer::buildJoin(Blocks & data, const Block header, std::shared_ptr<DB::TableJoin> analyzed_join)
104+
void StorageJoinFromReadBuffer::buildJoin(const Blocks & data, const Block & header, std::shared_ptr<DB::TableJoin> analyzed_join)
105105
{
106106
auto build_join = [&]
107107
{
108-
join = std::make_shared<HashJoin>(analyzed_join, header, overwrite, row_count);
109-
for (Block block : data)
110-
join->addBlockToJoin(std::move(block), true);
108+
join = std::make_shared<HashJoin>(analyzed_join, header, overwrite, row_count, "", false, true);
109+
for (const Block& block : data)
110+
join->addBlockToJoin(block, true);
111111
};
112112
/// Record memory usage in Total Memory Tracker
113113
ThreadFromGlobalPoolNoTracingContextPropagation thread(build_join);
@@ -132,7 +132,7 @@ void StorageJoinFromReadBuffer::buildJoinLazily(DB::Block header, std::shared_pt
132132
std::unique_lock lock(join_mutex);
133133
if (join)
134134
return;
135-
join = std::make_shared<HashJoin>(analyzed_join, header, overwrite, row_count);
135+
join = std::make_shared<HashJoin>(analyzed_join, header, overwrite, row_count, "", false, true);
136136
while (!input_blocks.empty())
137137
{
138138
auto & block = *input_blocks.begin();
@@ -154,11 +154,11 @@ void StorageJoinFromReadBuffer::buildJoinLazily(DB::Block header, std::shared_pt
154154
}
155155

156156

157-
/// The column names of 'rgiht_header' could be different from the ones in `input_blocks`, and we must
157+
/// The column names of 'right_header' could be different from the ones in `input_blocks`, and we must
158158
/// use 'right_header' to build the HashJoin. Otherwise, it will cause exceptions with name mismatches.
159159
///
160160
/// In most cases, 'getJoinLocked' is called only once, and the input_blocks should not be too large.
161-
/// This is will be OK.
161+
/// This will be OK.
162162
DB::JoinPtr StorageJoinFromReadBuffer::getJoinLocked(std::shared_ptr<DB::TableJoin> analyzed_join, DB::ContextPtr /*context*/)
163163
{
164164
if ((analyzed_join->forceNullableRight() && !use_nulls)

cpp-ch/local-engine/Join/StorageJoinFromReadBuffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class StorageJoinFromReadBuffer
7272
bool is_null_aware_anti_join;
7373

7474
void readAllBlocksFromInput(DB::ReadBuffer & in);
75-
void buildJoin(DB::Blocks & data, const DB::Block header, std::shared_ptr<DB::TableJoin> analyzed_join);
75+
void buildJoin(const DB::Blocks & data, const DB::Block & header, std::shared_ptr<DB::TableJoin> analyzed_join);
7676
void collectAllInputs(DB::Blocks & data, const DB::Block header);
7777
void buildJoinLazily(DB::Block header, std::shared_ptr<DB::TableJoin> analyzed_join);
7878
};

cpp-ch/local-engine/Parser/RelParsers/CrossRelParser.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,6 @@ DB::QueryPlanPtr CrossRelParser::parseJoin(const substrait::CrossRel & join, DB:
171171
QueryPlanPtr query_plan;
172172
if (storage_join)
173173
{
174-
/// FIXME: There is mistake in HashJoin::needUsedFlagsForPerRightTableRow which returns true when
175-
/// join clauses is empty. But in fact there should not be any join clause in cross join.
176-
table_join->addDisjunct();
177-
178174
auto broadcast_hash_join = storage_join->getJoinLocked(table_join, context);
179175
// table_join->resetKeys();
180176
QueryPlanStepPtr join_step = std::make_unique<FilledJoinStep>(left->getCurrentHeader(), broadcast_hash_join, 8192);

cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,6 @@ class VectorizedParquetBlockInputFormat final : public DB::IInputFormat
214214
protected:
215215
void onCancel() noexcept override { is_stopped = 1; }
216216

217-
// TODO: create ColumnIndexFilter here, currently disable it now.
218-
void setKeyCondition(const std::shared_ptr<const DB::KeyCondition> & key_condition_) override { }
219-
220217
public:
221218
VectorizedParquetBlockInputFormat(
222219
DB::ReadBuffer & in_,

cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ bool ExcelTextFormatFile::useThis(const DB::ContextPtr & context)
6262
return settingsEqual(context->getSettingsRef(), USE_EXCEL_PARSER, "true");
6363
}
6464

65-
FormatFile::InputFormatPtr ExcelTextFormatFile::createInputFormat(const DB::Block & header)
65+
FormatFile::InputFormatPtr
66+
ExcelTextFormatFile::createInputFormat(const DB::Block & header, const std::shared_ptr<const DB::ActionsDAG> & /*filter_actions_dag*/)
6667
{
6768
auto read_buffer = read_buffer_builder->build(file_info);
6869

cpp-ch/local-engine/Storages/SubstraitSource/ExcelTextFormatFile.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ class ExcelTextFormatFile : public FormatFile
4747

4848
~ExcelTextFormatFile() override = default;
4949

50-
FormatFile::InputFormatPtr createInputFormat(const DB::Block & header) override;
50+
FormatFile::InputFormatPtr
51+
createInputFormat(const DB::Block & header, const std::shared_ptr<const DB::ActionsDAG> & filter_actions_dag = nullptr) override;
5152

5253
bool supportSplit() const override { return true; }
5354
String getFileFormat() const override { return "ExcelText"; }

cpp-ch/local-engine/Storages/SubstraitSource/FileReader.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -274,17 +274,12 @@ std::unique_ptr<NormalFileReader> createNormalFileReader(
274274
const FormatFilePtr & file,
275275
const DB::Block & to_read_header_,
276276
const DB::Block & output_header_,
277-
const std::shared_ptr<const DB::KeyCondition> & key_condition = nullptr,
277+
const std::shared_ptr<const DB::ActionsDAG> & filter_actions_dag = nullptr,
278278
const ColumnIndexFilterPtr & column_index_filter = nullptr)
279279
{
280280
file->initialize(column_index_filter);
281281
auto createInputFormat = [&](const DB::Block & new_read_header_) -> FormatFile::InputFormatPtr
282-
{
283-
auto input_format = file->createInputFormat(new_read_header_);
284-
if (key_condition && input_format)
285-
input_format->inputFormat().setKeyCondition(key_condition);
286-
return input_format;
287-
};
282+
{ return file->createInputFormat(new_read_header_, filter_actions_dag); };
288283

289284
if (file->getFileInfo().has_iceberg())
290285
return iceberg::IcebergReader::create(file, to_read_header_, output_header_, createInputFormat);
@@ -316,11 +311,13 @@ std::unique_ptr<NormalFileReader> createNormalFileReader(
316311
return std::make_unique<NormalFileReader>(file, to_read_header_, output_header_, input_format);
317312
}
318313
}
314+
315+
/// TODO Remove ColumnIndexFilterPtr
319316
std::unique_ptr<BaseReader> BaseReader::create(
320317
const FormatFilePtr & current_file,
321318
const DB::Block & readHeader,
322319
const DB::Block & outputHeader,
323-
const std::shared_ptr<const DB::KeyCondition> & key_condition,
320+
const std::shared_ptr<const DB::ActionsDAG> & filter_actions_dag,
324321
const ColumnIndexFilterPtr & column_index_filter)
325322
{
326323
if (!readHeader)
@@ -335,7 +332,7 @@ std::unique_ptr<BaseReader> BaseReader::create(
335332
}
336333
}
337334

338-
return createNormalFileReader(current_file, readHeader, outputHeader, key_condition, column_index_filter);
335+
return createNormalFileReader(current_file, readHeader, outputHeader, filter_actions_dag, column_index_filter);
339336
}
340337

341338

0 commit comments

Comments
 (0)