diff --git a/03631_hive_columns_not_in_format_header.reference b/03631_hive_columns_not_in_format_header.reference new file mode 100644 index 000000000000..231eebbbb627 --- /dev/null +++ b/03631_hive_columns_not_in_format_header.reference @@ -0,0 +1,2 @@ +1 +raw_blob String diff --git a/03631_hive_columns_not_in_format_header.sql b/03631_hive_columns_not_in_format_header.sql new file mode 100644 index 000000000000..895f7aa4dfc0 --- /dev/null +++ b/03631_hive_columns_not_in_format_header.sql @@ -0,0 +1,13 @@ +-- Tags: no-parallel, no-fasttest, no-random-settings + +INSERT INTO FUNCTION s3( + s3_conn, + filename='03631', + format=Parquet, + partition_strategy='hive', + partition_columns_in_data_file=1) PARTITION BY (year, country) SELECT 'Brazil' as country, 2025 as year, 1 as id; + +-- distinct because minio isn't cleaned up +SELECT count(distinct year) FROM s3(s3_conn, filename='03631/**.parquet', format=RawBLOB) SETTINGS use_hive_partitioning=1; + +DESCRIBE s3(s3_conn, filename='03631/**.parquet', format=RawBLOB) SETTINGS use_hive_partitioning=1; diff --git a/src/Storages/prepareReadingFromFormat.cpp b/src/Storages/prepareReadingFromFormat.cpp index 3b31a9b0d2ae..e84005511bb4 100644 --- a/src/Storages/prepareReadingFromFormat.cpp +++ b/src/Storages/prepareReadingFromFormat.cpp @@ -234,7 +234,12 @@ ReadFromFormatInfo prepareReadingFromFormat( } /// Create header for InputFormat with columns that will be read from the data. - info.format_header = storage_snapshot->getSampleBlockForColumns(info.columns_description.getNamesOfPhysical()); + for (const auto & column : info.columns_description) + { + /// Never read hive partition columns from the data file. This fixes https://github.com/ClickHouse/ClickHouse/issues/87515 + if (!hive_parameters.hive_partition_columns_to_read_from_file_path_map.contains(column.name)) + info.format_header.insert(ColumnWithTypeAndName{column.type, column.name}); + } info.serialization_hints = getSerializationHintsForFileLikeStorage(storage_snapshot->metadata, context);