diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 7c391a537a8e..bed87f627282 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -6,6 +6,7 @@ env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + REGRESSION_RESULTS_URL: altinity-build-artifacts/${{github.event.number}}/$GITHUB_SHA on: # yamllint disable-line rule:truthy @@ -153,8 +154,9 @@ jobs: - name: Build run: | sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + mkdir -p "$TEMP_PATH/build_check/package_release" + cd .. && tar czf $TEMP_PATH/build_source.src.tar.gz ClickHouse/ + cd $TEMP_PATH && tar xvzf $TEMP_PATH/build_source.src.tar.gz cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} @@ -569,6 +571,460 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" +############################################################################################# +##################################### REGRESSION TESTS ###################################### +############################################################################################# + regression_start: + ## Not depending on the tests above since they can fail at any given moment. + needs: [BuilderDebRelease] + runs-on: ubuntu-latest + steps: + - run: true + + regression_common: + strategy: + fail-fast: false + matrix: + SUITE: [aes_encryption, aggregate_functions, atomic_insert, base_58, clickhouse_keeper, datetime64_extended_range, disk_level_encryption, dns, example, extended_precision_data_types, kafka, kerberos, lightweight_delete, map_type, part_moves_between_shards, rbac, selects, ssl_server, tiered_storage, window_functions] + needs: [regression_start] + runs-on: [self-hosted, stress-tester] + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_REPORT_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_REPORT_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REPORT_REGION }} + steps: + - name: Checkout regression repo + uses: actions/checkout@v3 + with: + repository: Altinity/clickhouse-regression + ref: releases + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + REPORTS_PATH=${{runner.temp}}/reports_dir + SUITE=${{ matrix.SUITE }} + artifacts=public + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Setup + run: .github/setup.sh + - name: Get deb url + run: python3 .github/get-deb-url.py --reports-path ${{ env.REPORTS_PATH }} --github-env $GITHUB_ENV + - name: Run ${{ env.SUITE }} suite + run: python3 + -u ${{ env.SUITE }}/regression.py + --clickhouse-binary-path ${{ env.clickhouse_binary_path }} + --test-to-end + --local + --collect-service-logs + --output classic + --parallel 1 + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_binary_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.id="$GITHUB_RUN_ID" job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="x86_64" + --log raw.log + - name: Create and upload logs + if: always() + run: .github/create_and_upload_logs.sh 1 + env: + artifact_s3_dir: build/v${{ env.version }}/$GITHUB_SHA + - uses: actions/upload-artifact@v3 + if: always() + with: + name: ${{ env.SUITE }}-artifacts + path: | + ./report.html + ./*.log.txt + ./*.log + ./*.html + ./*/_instances/*.log + ./*/_instances/*/logs/*.log + ./*/*/_instances/*/logs/*.log + ./*/*/_instances/*.log + + benchmark: + strategy: + fail-fast: false + matrix: + STORAGE: [minio, aws_s3, gcs] + needs: [regression_start] + runs-on: [self-hosted, stress-tester] + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_REPORT_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_REPORT_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REPORT_REGION }} + steps: + - name: Checkout regression repo + uses: actions/checkout@v3 + with: + repository: Altinity/clickhouse-regression + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + REPORTS_PATH=${{runner.temp}}/reports_dir + SUITE=ontime_benchmark + STORAGE=/${{ matrix.STORAGE }} + artifacts=public + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Setup + run: .github/setup.sh + - name: Get deb url + run: python3 .github/get-deb-url.py --reports-path ${{ env.REPORTS_PATH }} --github-env $GITHUB_ENV + - name: Run ${{ env.SUITE }} suite + run: python3 + -u ${{ env.SUITE }}/benchmark.py + --clickhouse-binary-path ${{ env.clickhouse_binary_path }} + --storage ${{ matrix.STORAGE }} + --gcs-uri ${{ secrets.REGRESSION_GCS_URI }} + --gcs-key-id ${{ secrets.REGRESSION_GCS_KEY_ID }} + --gcs-key-secret ${{ secrets.REGRESSION_GCS_KEY_SECRET }} + --aws-s3-bucket ${{ secrets.REGRESSION_AWS_S3_BUCKET }} + --aws-s3-region ${{ secrets.REGRESSION_AWS_S3_REGION }} + --aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }} + --aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }} + --test-to-end + --local + --collect-service-logs + --output classic + --parallel 1 + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_binary_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.id="$GITHUB_RUN_ID" job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="x86_64" + --log raw.log + - name: Create and upload logs + if: always() + run: .github/create_and_upload_logs.sh 1 + env: + artifact_s3_dir: build/v${{ env.version }}/$GITHUB_SHA + - uses: actions/upload-artifact@v3 + if: always() + with: + name: ${{ env.SUITE }}-minio-artifacts + path: | + ./report.html + ./*.log.txt + ./*.log + ./*.html + ./*/_instances/*.log + ./*/_instances/*/logs/*.log + ./*/*/_instances/*/logs/*.log + ./*/*/_instances/*.log + + ldap: + strategy: + fail-fast: false + matrix: + SUITE: [authentication, external_user_directory, role_mapping] + needs: [regression_start] + runs-on: [self-hosted, stress-tester] + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_REPORT_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_REPORT_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REPORT_REGION }} + steps: + - name: Checkout regression repo + uses: actions/checkout@v3 + with: + repository: Altinity/clickhouse-regression + ref: releases + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + REPORTS_PATH=${{runner.temp}}/reports_dir + SUITE=ldap/${{ matrix.SUITE }} + artifacts=public + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Setup + run: .github/setup.sh + - name: Get deb url + run: python3 .github/get-deb-url.py --reports-path ${{ env.REPORTS_PATH }} --github-env $GITHUB_ENV + - name: Run ${{ env.SUITE }} suite + run: python3 + -u ${{ env.SUITE }}/regression.py + --clickhouse-binary-path ${{ env.clickhouse_binary_path }} + --test-to-end + --local + --collect-service-logs + --output classic + --parallel 1 + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_binary_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.id="$GITHUB_RUN_ID" job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="x86_64" + --log raw.log + - name: Create and upload logs + if: always() + run: .github/create_and_upload_logs.sh 1 + env: + artifact_s3_dir: build/v${{ env.version }}/$GITHUB_SHA + - uses: actions/upload-artifact@v3 + if: always() + with: + name: ldap-authentication-artifacts + path: | + ./report.html + ./*.log.txt + ./*.log + ./*.html + ./*/_instances/*.log + ./*/_instances/*/logs/*.log + ./*/*/_instances/*/logs/*.log + ./*/*/_instances/*.log + + parquet: + needs: [regression_start] + runs-on: [self-hosted, stress-tester] + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_REPORT_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_REPORT_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REPORT_REGION }} + steps: + - name: Checkout regression repo + uses: actions/checkout@v3 + with: + repository: Altinity/clickhouse-regression + ref: releases + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + REPORTS_PATH=${{runner.temp}}/reports_dir + SUITE=parquet + artifacts=public + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Setup + run: .github/setup.sh + - name: Get deb url + run: python3 .github/get-deb-url.py --reports-path ${{ env.REPORTS_PATH }} --github-env $GITHUB_ENV + - name: Run ${{ env.SUITE }} suite + run: python3 + -u ${{ env.SUITE }}/regression.py + --clickhouse-binary-path ${{ env.clickhouse_binary_path }} + --test-to-end + --local + --collect-service-logs + --output classic + --parallel 1 + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_binary_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.id="$GITHUB_RUN_ID" job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="x86_64" + --log raw.log + --storage minio + --storage aws_s3 + --aws-s3-bucket ${{ secrets.REGRESSION_AWS_S3_BUCKET }} + --aws-s3-region ${{ secrets.REGRESSION_AWS_S3_REGION }} + --aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }} + --aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }} + --storage gcs + --gcs-uri ${{ secrets.REGRESSION_GCS_URI }} + --gcs-key-id ${{ secrets.REGRESSION_GCS_KEY_ID }} + --gcs-key-secret ${{ secrets.REGRESSION_GCS_KEY_SECRET }} + - name: Create and upload logs + if: always() + run: .github/create_and_upload_logs.sh 1 + env: + artifact_s3_dir: build/v${{ env.version }}/$GITHUB_SHA + - uses: actions/upload-artifact@v3 + if: always() + with: + name: ${{ env.SUITE }}-artifacts + path: | + ./report.html + ./*.log.txt + ./*.log + ./*.html + ./*/_instances/*.log + ./*/_instances/*/logs/*.log + ./*/*/_instances/*/logs/*.log + ./*/*/_instances/*.log + + s3: + strategy: + fail-fast: false + matrix: + STORAGE: [minio, aws_s3, gcs] + needs: [regression_start] + runs-on: [self-hosted, stress-tester] + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_REPORT_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_REPORT_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REPORT_REGION }} + steps: + - name: Checkout regression repo + uses: actions/checkout@v3 + with: + repository: Altinity/clickhouse-regression + ref: releases + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + REPORTS_PATH=${{runner.temp}}/reports_dir + SUITE=s3 + STORAGE=/${{ matrix.STORAGE }} + artifacts=public + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Setup + run: .github/setup.sh + - name: Get deb url + run: python3 .github/get-deb-url.py --reports-path ${{ env.REPORTS_PATH }} --github-env $GITHUB_ENV + - name: Run ${{ env.SUITE }} suite + run: python3 + -u ${{ env.SUITE }}/regression.py + --clickhouse-binary-path ${{ env.clickhouse_binary_path }} + --test-to-end + --local + --collect-service-logs + --output classic + --parallel 1 + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_binary_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.id="$GITHUB_RUN_ID" job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="x86_64" + --log raw.log + --storage ${{ matrix.STORAGE }} + --gcs-uri ${{ secrets.REGRESSION_GCS_URI }} + --gcs-key-id ${{ secrets.REGRESSION_GCS_KEY_ID }} + --gcs-key-secret ${{ secrets.REGRESSION_GCS_KEY_SECRET }} + --aws-s3-bucket ${{ secrets.REGRESSION_AWS_S3_BUCKET }} + --aws-s3-region ${{ secrets.REGRESSION_AWS_S3_REGION }} + --aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }} + --aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }} + - name: Create and upload logs + if: always() + run: .github/create_and_upload_logs.sh 1 + env: + artifact_s3_dir: build/v${{ env.version }}/$GITHUB_SHA + - uses: actions/upload-artifact@v3 + if: always() + with: + name: ${{ env.SUITE }}-${{ matrix.STORAGE }}-artifacts + path: | + ./report.html + ./*.log.txt + ./*.log + ./*.html + ./*/_instances/*.log + ./*/_instances/*/logs/*.log + ./*/*/_instances/*/logs/*.log + ./*/*/_instances/*.log + + tiered_storage_s3: + strategy: + fail-fast: false + matrix: + STORAGE: [minio, s3amazon, s3gcs] + needs: [regression_start] + runs-on: [self-hosted, stress-tester] + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_REPORT_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_REPORT_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REPORT_REGION }} + steps: + - name: Checkout regression repo + uses: actions/checkout@v3 + with: + repository: Altinity/clickhouse-regression + ref: releases + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + REPORTS_PATH=${{runner.temp}}/reports_dir + SUITE=tiered_storage + STORAGE=/${{ matrix.STORAGE }} + artifacts=public + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Setup + run: .github/setup.sh + - name: Get deb url + run: python3 .github/get-deb-url.py --reports-path ${{ env.REPORTS_PATH }} --github-env $GITHUB_ENV + - name: Run ${{ env.SUITE }} suite + run: python3 + -u ${{ env.SUITE }}/regression.py + --clickhouse-binary-path ${{ env.clickhouse_binary_path }} + --test-to-end + --local + --collect-service-logs + --output classic + --parallel 1 + --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_binary_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.id="$GITHUB_RUN_ID" job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="x86_64" + --log raw.log + --aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }} + --aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }} + --aws-s3-uri https://s3.${{ secrets.REGRESSION_AWS_S3_REGION}}.amazonaws.com/${{ secrets.REGRESSION_AWS_S3_BUCKET }}/data/ + --gcs-key-id ${{ secrets.REGRESSION_GCS_KEY_ID }} + --gcs-key-secret ${{ secrets.REGRESSION_GCS_KEY_SECRET }} + --gcs-uri ${{ secrets.REGRESSION_GCS_URI }} + --with-${{ matrix.STORAGE }} + - name: Create and upload logs + if: always() + run: .github/create_and_upload_logs.sh 1 + env: + artifact_s3_dir: build/v${{ env.version }}/$GITHUB_SHA + - uses: actions/upload-artifact@v3 + if: always() + with: + name: ${{ env.SUITE }}-${{ matrix.STORAGE }}-artifacts + path: | + ./report.html + ./*.log.txt + ./*.log + ./*.html + ./*/_instances/*.log + ./*/_instances/*/logs/*.log + ./*/*/_instances/*/logs/*.log + ./*/*/_instances/*.log + + SignRelease: + needs: [BuilderDebRelease] + runs-on: [ self-hosted ] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/signed + REPORTS_PATH=${{runner.temp}}/reports_dir + EOF + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Sign release + env: + GPG_BINARY_SIGNING_KEY: ${{ secrets.GPG_BINARY_SIGNING_KEY }} + GPG_BINARY_SIGNING_PASSPHRASE: ${{ secrets.GPG_BINARY_SIGNING_PASSPHRASE }} + REPORTS_PATH: ${{ env.REPORTS_PATH }} + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 sign_release.py + - name: Upload signed hashes + uses: actions/upload-artifact@v2 + with: + name: signed-hashes + path: ${{ env.TEMP_PATH }}/*.gpg + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + ########################################################################################### + ################################ FINISH CHECK ############################################# + ########################################################################################### FinishCheck: needs: - DockerHubPush @@ -583,6 +1039,13 @@ jobs: - IntegrationTestsRelease0 - IntegrationTestsRelease1 - CompatibilityCheck + - SignRelease + - regression_common + - benchmark + - ldap + - parquet + - s3 + - tiered_storage_s3 runs-on: [self-hosted, style-checker] steps: - name: Check out repository code diff --git a/contrib/arrow b/contrib/arrow index 450a56387043..d03245f801f7 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 450a5638704386356f8e520080468fc9bc8bcaf8 +Subproject commit d03245f801f798c63ee9a7d2b8914a9e5c5cd666 diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 61f95f0b2dd5..de9310732a8b 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -55,7 +55,7 @@ RUN arch=${TARGETARCH:-amd64} \ && dpkg -i /tmp/nfpm.deb \ && rm /tmp/nfpm.deb -ARG GO_VERSION=1.18.3 +ARG GO_VERSION=1.19.8 # We need go for clickhouse-diagnostics RUN arch=${TARGETARCH:-amd64} \ && curl -Lo /tmp/go.tgz "https://go.dev/dl/go${GO_VERSION}.linux-${arch}.tar.gz" \ diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index 2293cae415f7..da3698cb3e0b 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -27,6 +27,7 @@ while true; do reties=$((reties+1)) if [[ $reties -ge 100 ]]; then # 10 sec max echo "Can't start docker daemon, timeout exceeded." >&2 + cat /ClickHouse/tests/integration/dockerd.log >&2 exit 1; fi sleep 0.1 diff --git a/packages/clickhouse-common-static.yaml b/packages/clickhouse-common-static.yaml index 3167e78dbc3e..429d3fbcb678 100644 --- a/packages/clickhouse-common-static.yaml +++ b/packages/clickhouse-common-static.yaml @@ -33,8 +33,9 @@ deb: contents: - src: root/usr/bin/clickhouse dst: /usr/bin/clickhouse -- src: root/usr/bin/clickhouse-diagnostics - dst: /usr/bin/clickhouse-diagnostics +# Excluded due to CVEs in go runtime that popup constantly +# - src: root/usr/bin/clickhouse-diagnostics +# dst: /usr/bin/clickhouse-diagnostics - src: root/usr/bin/clickhouse-extract-from-config dst: /usr/bin/clickhouse-extract-from-config - src: root/usr/bin/clickhouse-library-bridge diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3ece5fd410b3..8493b306067b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -602,6 +602,7 @@ if (ENABLE_TESTS) dbms clickhouse_common_config clickhouse_common_zookeeper + ch_contrib::parquet string_utils) if (TARGET ch_contrib::simdjson) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f645a88cfde9..77727321311f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -712,6 +712,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \ M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ + M(UInt64, input_format_parquet_max_block_size, 8192, "Max block size for parquet reader.", 0) \ M(Bool, input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format Protobuf", 0) \ M(Bool, input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format CapnProto", 0) \ M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format ORC", 0) \ diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index b2552ec331f0..4d522ff52c57 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -71,6 +71,7 @@ class Space : public std::enable_shared_from_this virtual const String & getName() const = 0; /// Reserve the specified number of bytes. + /// Returns valid reservation or nullptr when failure. virtual ReservationPtr reserve(UInt64 bytes) = 0; virtual ~Space() = default; diff --git a/src/Disks/VolumeJBOD.h b/src/Disks/VolumeJBOD.h index 21d61e6dd8d0..81da64c488d1 100644 --- a/src/Disks/VolumeJBOD.h +++ b/src/Disks/VolumeJBOD.h @@ -82,6 +82,9 @@ class VolumeJBOD : public IVolume ReservationPtr reserve(uint64_t bytes) { ReservationPtr reservation = disk->reserve(bytes); + if (!reservation) + return {}; + /// Not just subtract bytes, but update the value, /// since some reservations may be done directly via IDisk, or not by ClickHouse. free_size = reservation->getUnreservedSpace(); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index d37f578d947b..f689deadc043 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -104,6 +104,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference; format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string; + format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 27ad954adaf1..dfa72fc3e573 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -160,6 +160,7 @@ struct FormatSettings bool case_insensitive_column_matching = false; std::unordered_set skip_row_groups = {}; bool output_string_as_string = false; + UInt64 max_block_size = 8192; } parquet; struct Pretty diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index d58feb4570b0..ad0d10224592 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -234,10 +234,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } if (which_type.isDateTime64() - && (which_from_type.isNativeInt() || which_from_type.isNativeUInt() || which_from_type.isDate() || which_from_type.isDate32() || which_from_type.isDateTime() || which_from_type.isDateTime64())) + && (src.getType() == Field::Types::UInt64 || src.getType() == Field::Types::Int64 || src.getType() == Field::Types::Decimal64)) { const auto scale = static_cast(type).getScale(); - const auto decimal_value = DecimalUtils::decimalFromComponents(applyVisitor(FieldVisitorConvertToNumber(), src), 0, scale); + const auto decimal_value + = DecimalUtils::decimalFromComponents(applyVisitor(FieldVisitorConvertToNumber(), src), 0, scale); return Field(DecimalField(decimal_value, scale)); } } diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 05fc3b8ca2a1..552c971a26e5 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -71,7 +71,7 @@ Chunk ArrowBlockInputFormat::generate() ++record_batch_current; - arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result); + arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result, (*table_result)->num_rows()); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index ebd9783b4fd4..96ed2a7021e0 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -95,6 +96,12 @@ arrow::Result> RandomAccessFileFromSeekableReadBu return buffer; } +arrow::Future> RandomAccessFileFromSeekableReadBuffer::ReadAsync(const arrow::io::IOContext &, int64_t position, int64_t nbytes) +{ + /// Just a stub to to avoid using internal arrow thread pool + return arrow::Future>::MakeFinished(ReadAt(position, nbytes)); +} + arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position) { seekable_in.seek(position, SEEK_SET); diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.h b/src/Processors/Formats/Impl/ArrowBufferedStreams.h index da0382837313..4169ce8fc80e 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.h +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.h @@ -62,6 +62,11 @@ class RandomAccessFileFromSeekableReadBuffer : public arrow::io::RandomAccessFil arrow::Result> Read(int64_t nbytes) override; + /// Override async reading to avoid using internal arrow thread pool. + /// In our code we don't use async reading, so implementation is sync, + /// we just call ReadAt and return future with ready value. + arrow::Future> ReadAsync(const arrow::io::IOContext&, int64_t position, int64_t nbytes) override; + arrow::Status Seek(int64_t position) override; private: diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 4863ebfb2f59..e82bbae91982 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -69,7 +69,6 @@ namespace ErrorCodes extern const int DUPLICATE_COLUMN; extern const int THERE_IS_NO_COLUMN; extern const int UNKNOWN_EXCEPTION; - extern const int INCORRECT_NUMBER_OF_COLUMNS; extern const int INCORRECT_DATA; } @@ -90,7 +89,7 @@ static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr buffer = chunk->data()->buffers[1]; - const auto * raw_data = reinterpret_cast(buffer->data()); + const auto * raw_data = reinterpret_cast(buffer->data()) + chunk->offset(); column_data.insert_assume_reserved(raw_data, raw_data + chunk->length()); } return {std::move(internal_column), std::move(internal_type), column_name}; @@ -347,7 +346,7 @@ static ColumnWithTypeAndName readColumnWithIndexesDataImpl(std::shared_ptr buffer = chunk->data()->buffers[1]; - const auto * data = reinterpret_cast(buffer->data()); + const auto * data = reinterpret_cast(buffer->data()) + chunk->offset(); /// Check that indexes are correct (protection against corrupted files) for (int64_t i = 0; i != chunk->length(); ++i) @@ -767,7 +766,7 @@ ArrowColumnToCHColumn::ArrowColumnToCHColumn( { } -void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table) +void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows) { NameToColumnPtr name_to_column_ptr; for (auto column_name : table->ColumnNames()) @@ -781,16 +780,12 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptrsecond->length(); columns_list.reserve(header.columns()); std::unordered_map>> nested_tables; bool skipped = false; diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 092ed65d61a3..5630597517d2 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -28,9 +28,9 @@ class ArrowColumnToCHColumn bool allow_missing_columns_, bool case_insensitive_matching_ = false); - void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table); + void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows); - void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr); + void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows); /// Get missing columns that exists in header but not in arrow::Schema std::vector getMissingColumns(const arrow::Schema & schema) const; diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 36126c21bf1e..42b5a1342add 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -54,14 +54,19 @@ Chunk ORCBlockInputFormat::generate() throw ParsingException( ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading batch of ORC data: {}", table_result.status().ToString()); + /// We should extract the number of rows directly from the stripe, because in case when + /// record batch contains 0 columns (for example if we requested only columns that + /// are not presented in data) the number of rows in record batch will be 0. + size_t num_rows = file_reader->GetRawORCReader()->getStripe(stripe_current)->getNumberOfRows(); + auto table = table_result.ValueOrDie(); - if (!table || !table->num_rows()) + if (!table || !num_rows) return {}; ++stripe_current; Chunk res; - arrow_column_to_ch_column->arrowTableToCHChunk(res, table); + arrow_column_to_ch_column->arrowTableToCHChunk(res, table, num_rows); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. if (format_settings.defaults_for_omitted_fields) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 427c159314b3..ffd12a5ff673 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -16,6 +16,7 @@ #include "ArrowColumnToCHColumn.h" #include + namespace DB { @@ -43,34 +44,38 @@ Chunk ParquetBlockInputFormat::generate() block_missing_values.clear(); if (!file_reader) + { prepareReader(); + file_reader->set_batch_size(format_settings.parquet.max_block_size); + std::vector row_group_indices; + for (int i = 0; i < row_group_total; ++i) + { + if (!skip_row_groups.contains(i)) + row_group_indices.emplace_back(i); + } + auto read_status = file_reader->GetRecordBatchReader(row_group_indices, column_indices, ¤t_record_batch_reader); + if (!read_status.ok()) + throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", read_status.ToString()); + } if (is_stopped) return {}; - for (; row_group_current < row_group_total && skip_row_groups.contains(row_group_current); ++row_group_current) - ; - - if (row_group_current >= row_group_total) - return res; - - std::shared_ptr table; - - std::unique_ptr<::arrow::RecordBatchReader> rbr; - std::vector row_group_indices { row_group_current }; - arrow::Status get_batch_reader_status = file_reader->GetRecordBatchReader(row_group_indices, column_indices, &rbr); - - if (!get_batch_reader_status.ok()) - throw ParsingException{"Error while reading Parquet data: " + get_batch_reader_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; - - arrow::Status read_status = rbr->ReadAll(&table); - - if (!read_status.ok()) - throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; - - ++row_group_current; - - arrow_column_to_ch_column->arrowTableToCHChunk(res, table); + auto batch = current_record_batch_reader->Next(); + if (!batch.ok()) + { + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", + batch.status().ToString()); + } + if (*batch) + { + auto tmp_table = arrow::Table::FromRecordBatches({*batch}); + arrow_column_to_ch_column->arrowTableToCHChunk(res, *tmp_table, (*tmp_table)->num_rows()); + } + else + { + return {}; + } /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. @@ -85,6 +90,7 @@ void ParquetBlockInputFormat::resetParser() IInputFormat::resetParser(); file_reader.reset(); + current_record_batch_reader.reset(); column_indices.clear(); row_group_current = 0; block_missing_values.clear(); diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index 76803bb5b89a..080933e05bde 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -8,7 +8,7 @@ namespace parquet::arrow { class FileReader; } -namespace arrow { class Buffer; } +namespace arrow { class Buffer; class RecordBatchReader;} namespace DB { @@ -46,6 +46,7 @@ class ParquetBlockInputFormat : public IInputFormat BlockMissingValues block_missing_values; const FormatSettings format_settings; const std::unordered_set & skip_row_groups; + std::shared_ptr current_record_batch_reader; std::atomic is_stopped{0}; }; diff --git a/src/Processors/tests/gtest_assert_arrow_log_does_not_abort.cpp b/src/Processors/tests/gtest_assert_arrow_log_does_not_abort.cpp new file mode 100644 index 000000000000..fb13f4664d9d --- /dev/null +++ b/src/Processors/tests/gtest_assert_arrow_log_does_not_abort.cpp @@ -0,0 +1,18 @@ +#include +#include +#include +#include + +using namespace DB; + +TEST(ChunkedArray, ChunkedArrayWithZeroChunksShouldNotAbort) +{ + std::vector> empty_chunks_vector; + + EXPECT_ANY_THROW(::arrow::ChunkedArray{empty_chunks_vector}); +} + +TEST(ArrowLog, FatalLogShouldThrow) +{ + EXPECT_ANY_THROW(::arrow::util::ArrowLog(__FILE__, __LINE__, ::arrow::util::ArrowLogLevel::ARROW_FATAL)); +} diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index d72d54aa2b11..0e3c59af3e65 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1172,7 +1172,8 @@ bool KeyCondition::transformConstantWithValidFunctions( if (is_valid_chain) { - auto const_type = cur_node->result_type; + out_type = removeLowCardinality(out_type); + auto const_type = removeLowCardinality(cur_node->result_type); auto const_column = out_type->createColumnConst(1, out_value); auto const_value = (*castColumnAccurateOrNull({const_column, out_type, ""}, const_type))[0]; diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index fec68341cd85..59da13a24fb0 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -394,6 +394,18 @@ def main(): print(f"::notice ::Log URL: {log_url}") + src_path = os.path.join(TEMP_PATH, "build_source.src.tar.gz") + + if os.path.exists(src_path): + src_url = s3_helper.upload_build_file_to_s3( + src_path, s3_path_prefix + "/clickhouse-" + version.string + ".src.tar.gz" + ) + logging.info("Source tar %s", src_url) + else: + logging.info("Source tar doesn't exist") + + print(f"::notice ::Source tar URL: {src_url}") + create_json_artifact( TEMP_PATH, build_name, log_url, build_urls, build_config, elapsed, success ) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index a0fafebb79f1..c13d06ef7f43 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -335,6 +335,9 @@ "required_build": "package_aarch64", "test_grep_exclude_filter": "constant_column_search", }, + "Sign release (actions)": { + "required_build": "package_release" + } }, } # type: dict diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 4bc7ad1e6fc7..f2ace1d5614e 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -197,7 +197,7 @@ def mark_flaky_tests(clickhouse_helper, check_name, test_results): AND pull_request_number = 0 """ - tests_data = clickhouse_helper.select_json_each_row("default", query) + tests_data = clickhouse_helper.select_json_each_row("gh-data", query) master_failed_tests = {row["test_name"] for row in tests_data} logging.info("Found flaky tests: %s", ", ".join(master_failed_tests)) diff --git a/tests/ci/sign_release.py b/tests/ci/sign_release.py new file mode 100644 index 000000000000..2c44e0a928f2 --- /dev/null +++ b/tests/ci/sign_release.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +import sys +import os +import logging +from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH +from s3_helper import S3Helper +from pr_info import PRInfo +from build_download_helper import download_builds_filter +import hashlib + +GPG_BINARY_SIGNING_KEY = os.getenv("GPG_BINARY_SIGNING_KEY") +GPG_BINARY_SIGNING_PASSPHRASE = os.getenv("GPG_BINARY_SIGNING_PASSPHRASE") + +CHECK_NAME = "Sign release (actions)" + +def hash_file(file_path): + BLOCK_SIZE = 65536 # The size of each read from the file + + file_hash = hashlib.sha256() # Create the hash object, can use something other than `.sha256()` if you wish + with open(file_path, 'rb') as f: # Open the file to read it's bytes + fb = f.read(BLOCK_SIZE) # Read from the file. Take in the amount declared above + while len(fb) > 0: # While there is still data being read from the file + file_hash.update(fb) # Update the hash + fb = f.read(BLOCK_SIZE) # Read the next block from the file + + hash_file_path = file_path + '.sha256' + with open(hash_file_path, 'x') as f: + digest = file_hash.hexdigest() + f.write(digest) + print(f'Hashed {file_path}: {digest}') + + return hash_file_path + +def sign_file(file_path): + priv_key_file_path = 'priv.key' + with open(priv_key_file_path, 'x') as f: + f.write(GPG_BINARY_SIGNING_KEY) + + out_file_path = f'{file_path}.gpg' + + os.system(f'echo {GPG_BINARY_SIGNING_PASSPHRASE} | gpg --batch --import {priv_key_file_path}') + os.system(f'gpg -o {out_file_path} --pinentry-mode=loopback --batch --yes --passphrase {GPG_BINARY_SIGNING_PASSPHRASE} --sign {file_path}') + print(f"Signed {file_path}") + os.remove(priv_key_file_path) + + return out_file_path + +def main(): + reports_path = REPORTS_PATH + + if not os.path.exists(TEMP_PATH): + os.makedirs(TEMP_PATH) + + pr_info = PRInfo() + + logging.info("Repo copy path %s", REPO_COPY) + + s3_helper = S3Helper("https://s3.amazonaws.com") + + s3_path_prefix = f"{pr_info.number}/{pr_info.sha}/" + CHECK_NAME.lower().replace( + " ", "_" + ).replace("(", "_").replace(")", "_").replace(",", "_") + + # downloads `package_release` artifacts generated + download_builds_filter(CHECK_NAME, reports_path, TEMP_PATH) + + for f in os.listdir(TEMP_PATH): + full_path = os.path.join(TEMP_PATH, f) + hashed_file_path = hash_file(full_path) + signed_file_path = sign_file(hashed_file_path) + s3_path = f'{s3_path_prefix}/{os.path.basename(signed_file_path)}' + s3_helper.upload_build_file_to_s3(signed_file_path, s3_path) + print(f'Uploaded file {signed_file_path} to {s3_path}') + + # Signed hashes are: + # clickhouse-client_22.3.15.2.altinitystable_amd64.deb.sha512.gpg clickhouse-keeper_22.3.15.2.altinitystable_x86_64.apk.sha512.gpg + # clickhouse-client-22.3.15.2.altinitystable-amd64.tgz.sha512.gpg clickhouse-keeper-22.3.15.2.altinitystable.x86_64.rpm.sha512.gpg + # clickhouse-client_22.3.15.2.altinitystable_x86_64.apk.sha512.gpg clickhouse-keeper-dbg_22.3.15.2.altinitystable_amd64.deb.sha512.gpg + # clickhouse-client-22.3.15.2.altinitystable.x86_64.rpm.sha512.gpg clickhouse-keeper-dbg-22.3.15.2.altinitystable-amd64.tgz.sha512.gpg + # clickhouse-common-static_22.3.15.2.altinitystable_amd64.deb.sha512.gpg clickhouse-keeper-dbg_22.3.15.2.altinitystable_x86_64.apk.sha512.gpg + # clickhouse-common-static-22.3.15.2.altinitystable-amd64.tgz.sha512.gpg clickhouse-keeper-dbg-22.3.15.2.altinitystable.x86_64.rpm.sha512.gpg + # clickhouse-common-static_22.3.15.2.altinitystable_x86_64.apk.sha512.gpg clickhouse-keeper.sha512.gpg + # clickhouse-common-static-22.3.15.2.altinitystable.x86_64.rpm.sha512.gpg clickhouse-library-bridge.sha512.gpg + # clickhouse-common-static-dbg_22.3.15.2.altinitystable_amd64.deb.sha512.gpg clickhouse-odbc-bridge.sha512.gpg + # clickhouse-common-static-dbg-22.3.15.2.altinitystable-amd64.tgz.sha512.gpg clickhouse-server_22.3.15.2.altinitystable_amd64.deb.sha512.gpg + # clickhouse-common-static-dbg_22.3.15.2.altinitystable_x86_64.apk.sha512.gpg clickhouse-server-22.3.15.2.altinitystable-amd64.tgz.sha512.gpg + # clickhouse-common-static-dbg-22.3.15.2.altinitystable.x86_64.rpm.sha512.gpg clickhouse-server_22.3.15.2.altinitystable_x86_64.apk.sha512.gpg + # clickhouse-keeper_22.3.15.2.altinitystable_amd64.deb.sha512.gpg clickhouse-server-22.3.15.2.altinitystable.x86_64.rpm.sha512.gpg + # clickhouse-keeper-22.3.15.2.altinitystable-amd64.tgz.sha512.gpg clickhouse.sha512.gpg + + sys.exit(0) + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.reference b/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.reference new file mode 100644 index 000000000000..13b65c29f05d --- /dev/null +++ b/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.reference @@ -0,0 +1 @@ +printer1 diff --git a/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.sql b/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.sql new file mode 100644 index 000000000000..690ec6c70e0b --- /dev/null +++ b/tests/queries/0_stateless/02457_key_condition_with_types_that_cannot_be_nullable.sql @@ -0,0 +1,9 @@ +drop table if exists test; + +create table test (Printer LowCardinality(String), IntervalStart DateTime) engine MergeTree partition by (hiveHash(Printer), toYear(IntervalStart)) order by (Printer, IntervalStart); + +insert into test values ('printer1', '2006-02-07 06:28:15'); + +select Printer from test where Printer='printer1'; + +drop table test; diff --git a/tests/queries/0_stateless/02481_parquet_int_list_multiple_chunks.sh b/tests/queries/0_stateless/02481_parquet_int_list_multiple_chunks.sh index c2c6f6898510..5c7c9701a675 100755 --- a/tests/queries/0_stateless/02481_parquet_int_list_multiple_chunks.sh +++ b/tests/queries/0_stateless/02481_parquet_int_list_multiple_chunks.sh @@ -37,6 +37,6 @@ DATA_FILE=$CUR_DIR/data_parquet/int-list-zero-based-chunked-array.parquet ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (arr Array(Int64)) ENGINE = Memory" cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum +${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load SETTINGS max_threads=1" | md5sum ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load" ${CLICKHOUSE_CLIENT} --query="drop table parquet_load" \ No newline at end of file diff --git a/tests/queries/0_stateless/02511_parquet_orc_missing_columns.reference b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.reference new file mode 100644 index 000000000000..d5318a96f1ac --- /dev/null +++ b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.reference @@ -0,0 +1,8 @@ +Hello +Hello +Hello +6 6 +Hello +Hello +Hello +6 6 diff --git a/tests/queries/0_stateless/02511_parquet_orc_missing_columns.sh b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.sh new file mode 100755 index 000000000000..455dccafbb9b --- /dev/null +++ b/tests/queries/0_stateless/02511_parquet_orc_missing_columns.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +#Tags: no-fasttest, no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select number as x from numbers(3) format Parquet" > 02511_data1.parquet +$CLICKHOUSE_LOCAL -q "select y from file(02511_data1.parquet, auto, 'x UInt64, y String default \'Hello\'') settings input_format_parquet_allow_missing_columns=1" +$CLICKHOUSE_LOCAL -q "select number as x, 'Hello' as y from numbers(3) format Parquet" > 02511_data2.parquet +$CLICKHOUSE_LOCAL -q "select count(*), count(y) from file('02511_data*.parquet', auto, 'x UInt64, y String') settings input_format_parquet_allow_missing_columns=1" + +$CLICKHOUSE_LOCAL -q "select number as x from numbers(3) format ORC" > 02511_data1.orc +$CLICKHOUSE_LOCAL -q "select y from file(02511_data1.orc, auto, 'x UInt64, y String default \'Hello\'') settings input_format_orc_allow_missing_columns=1" +$CLICKHOUSE_LOCAL -q "select number as x, 'Hello' as y from numbers(3) format ORC" > 02511_data2.orc +$CLICKHOUSE_LOCAL -q "select count(*), count(y) from file('02511_data*.orc', auto, 'x UInt64, y String') settings input_format_orc_allow_missing_columns=1" + +rm 02511_data* + diff --git a/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh b/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh index a795b9ec5a0b..1d89246b242a 100755 --- a/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh +++ b/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh @@ -19,6 +19,6 @@ $CLICKHOUSE_CLIENT --query_id="$query_id" -q "select * from sample_final FINAL S $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q " -SELECT ProfileEvents['SelectedRows'] < 1_000_000 +SELECT ProfileEvents['SelectedRows'] < 1000000 FROM system.query_log WHERE event_date >= yesterday() AND type = 'QueryFinish' AND query_id = {query_id:String} AND current_database = currentDatabase()"