diff --git a/.github/actions/create_workflow_report/action.yml b/.github/actions/create_workflow_report/action.yml
new file mode 100644
index 000000000000..fde62d01e29d
--- /dev/null
+++ b/.github/actions/create_workflow_report/action.yml
@@ -0,0 +1,41 @@
+name: Create and Upload Combined Report
+description: Create and upload a combined CI report
+inputs:
+ final:
+ description: "Control whether the report is final or a preview"
+ required: false
+ default: "false"
+runs:
+ using: "composite"
+ steps:
+ - name: Create and upload workflow report
+ env:
+ PR_NUMBER: ${{ github.event.pull_request.number || 0 }}
+ COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+ ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}
+ FINAL: ${{ inputs.final }}
+ shell: bash
+ run: |
+ pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.0.3 jinja2==3.1.5
+
+ CMD="python3 .github/actions/create_workflow_report/create_workflow_report.py"
+ ARGS="--pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves"
+
+ set +e
+ if [[ "$FINAL" == "false" ]]; then
+ REPORT_LINK=$($CMD $ARGS --mark-preview)
+ else
+ REPORT_LINK=$($CMD $ARGS)
+ fi
+
+ echo $REPORT_LINK
+
+ if [[ "$FINAL" == "true" ]]; then
+ IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://')
+ if [[ -n $IS_VALID_URL ]]; then
+ echo "Workflow Run Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY
+ else
+ echo "Error: $REPORT_LINK" >> $GITHUB_STEP_SUMMARY
+ exit 1
+ fi
+ fi
diff --git a/.github/actions/create_workflow_report/ci_run_report.html.jinja b/.github/actions/create_workflow_report/ci_run_report.html.jinja
new file mode 100644
index 000000000000..a92c1aa34e3a
--- /dev/null
+++ b/.github/actions/create_workflow_report/ci_run_report.html.jinja
@@ -0,0 +1,269 @@
+
+
+
+
+
+
+
+
+ {{ title }}
+
+
+
+
+
+
+ {{ title }}
+
+ {% if is_preview %}
+ This is a preview. The workflow is not yet finished.
+ {% endif %}
+ Table of Contents
+
+
+ {%- if pr_number != 0 -%}
+ New Fails in PR
+ Compared with base sha {{ base_sha }}
+ {{ new_fails_html }}
+ {%- endif %}
+
+ CI Jobs Status
+ {{ ci_jobs_status_html }}
+
+ Checks Errors
+ {{ checks_errors_html }}
+
+ Checks New Fails
+ {{ checks_fails_html }}
+
+ Regression New Fails
+ {{ regression_fails_html }}
+
+ Docker Images CVEs
+ {{ docker_images_cves_html }}
+
+ Checks Known Fails
+
+ Fail reason conventions:
+ KNOWN - Accepted fail and fix is not planned
+ INVESTIGATE - We don't know why it fails
+ NEEDSFIX - Investigation done and a fix is needed to make it pass
+
+ {{ checks_known_fails_html }}
+
+
+
+
\ No newline at end of file
diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py
new file mode 100755
index 000000000000..4f360b9f092d
--- /dev/null
+++ b/.github/actions/create_workflow_report/create_workflow_report.py
@@ -0,0 +1,684 @@
+#!/usr/bin/env python3
+import argparse
+import os
+from pathlib import Path
+from itertools import combinations
+import json
+from datetime import datetime
+from functools import lru_cache
+
+import pandas as pd
+from jinja2 import Environment, FileSystemLoader
+import requests
+from clickhouse_driver import Client
+import boto3
+from botocore.exceptions import NoCredentialsError
+
+DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST"
+DATABASE_USER_VAR = "CHECKS_DATABASE_USER"
+DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD"
+S3_BUCKET = "altinity-build-artifacts"
+GITHUB_REPO = "Altinity/ClickHouse"
+
+# Set up the Jinja2 environment
+template_dir = os.path.dirname(__file__)
+
+# Load the template
+template = Environment(loader=FileSystemLoader(template_dir)).get_template(
+ "ci_run_report.html.jinja"
+)
+
+
+@lru_cache
+def get_run_details(run_url: str) -> dict:
+ """
+ Fetch run details for a given run URL.
+ """
+ run_id = run_url.split("/")[-1]
+
+ headers = {
+ "Authorization": f"token {os.getenv('GITHUB_TOKEN')}",
+ "Accept": "application/vnd.github.v3+json",
+ }
+
+ url = f"https://api.github.com/repos/{GITHUB_REPO}/actions/runs/{run_id}"
+ response = requests.get(url, headers=headers)
+
+ if response.status_code != 200:
+ raise Exception(
+ f"Failed to fetch run details: {response.status_code} {response.text}"
+ )
+
+ return response.json()
+
+
+def get_commit_statuses(sha: str) -> pd.DataFrame:
+ """
+ Fetch commit statuses for a given SHA and return as a pandas DataFrame.
+ Handles pagination to get all statuses.
+
+ Args:
+ sha (str): Commit SHA to fetch statuses for.
+
+ Returns:
+ pd.DataFrame: DataFrame containing all statuses.
+ """
+ headers = {
+ "Authorization": f"token {os.getenv('GITHUB_TOKEN')}",
+ "Accept": "application/vnd.github.v3+json",
+ }
+
+ url = f"https://api.github.com/repos/{GITHUB_REPO}/commits/{sha}/statuses"
+
+ all_data = []
+
+ while url:
+ response = requests.get(url, headers=headers)
+
+ if response.status_code != 200:
+ raise Exception(
+ f"Failed to fetch statuses: {response.status_code} {response.text}"
+ )
+
+ data = response.json()
+ all_data.extend(data)
+
+ # Check for pagination links in the response headers
+ if "Link" in response.headers:
+ links = response.headers["Link"].split(",")
+ next_url = None
+
+ for link in links:
+ parts = link.strip().split(";")
+ if len(parts) == 2 and 'rel="next"' in parts[1]:
+ next_url = parts[0].strip("<>")
+ break
+
+ url = next_url
+ else:
+ url = None
+
+ # Parse relevant fields
+ parsed = [
+ {
+ "job_name": item["context"],
+ "job_status": item["state"],
+ "message": item["description"],
+ "results_link": item["target_url"],
+ }
+ for item in all_data
+ ]
+
+ # Create DataFrame
+ df = pd.DataFrame(parsed)
+
+ # Drop duplicates keeping the first occurrence (newest status for each context)
+ # GitHub returns statuses in reverse chronological order
+ df = df.drop_duplicates(subset=["job_name"], keep="first")
+
+ # Sort by status and job name
+ return df.sort_values(
+ by=["job_status", "job_name"], ascending=[True, True]
+ ).reset_index(drop=True)
+
+
+def get_pr_info_from_number(pr_number: str) -> dict:
+ """
+ Fetch pull request information for a given PR number.
+
+ Args:
+ pr_number (str): Pull request number to fetch information for.
+
+ Returns:
+ dict: Dictionary containing PR information.
+ """
+ headers = {
+ "Authorization": f"token {os.getenv('GITHUB_TOKEN')}",
+ "Accept": "application/vnd.github.v3+json",
+ }
+
+ url = f"https://api.github.com/repos/{GITHUB_REPO}/pulls/{pr_number}"
+ response = requests.get(url, headers=headers)
+
+ if response.status_code != 200:
+ raise Exception(
+ f"Failed to fetch pull request info: {response.status_code} {response.text}"
+ )
+
+ return response.json()
+
+
+@lru_cache
+def get_run_details(run_url: str) -> dict:
+ """
+ Fetch run details for a given run URL.
+ """
+ run_id = run_url.split("/")[-1]
+
+ headers = {
+ "Authorization": f"token {os.getenv('GITHUB_TOKEN')}",
+ "Accept": "application/vnd.github.v3+json",
+ }
+
+ url = f"https://api.github.com/repos/{GITHUB_REPO}/actions/runs/{run_id}"
+ response = requests.get(url, headers=headers)
+
+ if response.status_code != 200:
+ raise Exception(
+ f"Failed to fetch run details: {response.status_code} {response.text}"
+ )
+
+ return response.json()
+
+
+def get_checks_fails(client: Client, job_url: str):
+ """
+ Get tests that did not succeed for the given job URL.
+ Exclude checks that have status 'error' as they are counted in get_checks_errors.
+ """
+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
+ FROM (
+ SELECT
+ argMax(check_status, check_start_time) as job_status,
+ check_name as job_name,
+ argMax(test_status, check_start_time) as status,
+ test_name,
+ report_url as results_link,
+ task_url
+ FROM `gh-data`.checks
+ GROUP BY check_name, test_name, report_url, task_url
+ )
+ WHERE task_url LIKE '{job_url}%'
+ AND test_status IN ('FAIL', 'ERROR')
+ AND job_status!='error'
+ ORDER BY job_name, test_name
+ """
+ return client.query_dataframe(query)
+
+
+def get_checks_known_fails(client: Client, job_url: str, known_fails: dict):
+ """
+ Get tests that are known to fail for the given job URL.
+ """
+ if len(known_fails) == 0:
+ return pd.DataFrame()
+
+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
+ FROM (
+ SELECT
+ argMax(check_status, check_start_time) as job_status,
+ check_name as job_name,
+ argMax(test_status, check_start_time) as status,
+ test_name,
+ report_url as results_link,
+ task_url
+ FROM `gh-data`.checks
+ GROUP BY check_name, test_name, report_url, task_url
+ )
+ WHERE task_url LIKE '{job_url}%'
+ AND test_status='BROKEN'
+ AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())})
+ ORDER BY job_name, test_name
+ """
+
+ df = client.query_dataframe(query)
+
+ df.insert(
+ len(df.columns) - 1,
+ "reason",
+ df["test_name"]
+ .astype(str)
+ .apply(
+ lambda test_name: known_fails[test_name].get("reason", "No reason given")
+ ),
+ )
+
+ return df
+
+
+def get_checks_errors(client: Client, job_url: str):
+ """
+ Get checks that have status 'error' for the given job URL.
+ """
+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
+ FROM (
+ SELECT
+ argMax(check_status, check_start_time) as job_status,
+ check_name as job_name,
+ argMax(test_status, check_start_time) as status,
+ test_name,
+ report_url as results_link,
+ task_url
+ FROM `gh-data`.checks
+ GROUP BY check_name, test_name, report_url, task_url
+ )
+ WHERE task_url LIKE '{job_url}%'
+ AND job_status=='error'
+ ORDER BY job_name, test_name
+ """
+ return client.query_dataframe(query)
+
+
+def drop_prefix_rows(df, column_to_clean):
+ """
+ Drop rows from the dataframe if:
+ - the row matches another row completely except for the specified column
+ - the specified column of that row is a prefix of the same column in another row
+ """
+ to_drop = set()
+ reference_columns = [col for col in df.columns if col != column_to_clean]
+ for (i, row_1), (j, row_2) in combinations(df.iterrows(), 2):
+ if all(row_1[col] == row_2[col] for col in reference_columns):
+ if row_2[column_to_clean].startswith(row_1[column_to_clean]):
+ to_drop.add(i)
+ elif row_1[column_to_clean].startswith(row_2[column_to_clean]):
+ to_drop.add(j)
+ return df.drop(to_drop)
+
+
+def get_regression_fails(client: Client, job_url: str):
+ """
+ Get regression tests that did not succeed for the given job URL.
+ """
+ # If you rename the alias for report_url, also update the formatters in format_results_as_html_table
+ # Nested SELECT handles test reruns
+ query = f"""SELECT arch, job_name, status, test_name, results_link
+ FROM (
+ SELECT
+ architecture as arch,
+ test_name,
+ argMax(result, start_time) AS status,
+ job_name,
+ report_url as results_link,
+ job_url
+ FROM `gh-data`.clickhouse_regression_results
+ GROUP BY architecture, test_name, job_url, job_name, report_url
+ ORDER BY length(test_name) DESC
+ )
+ WHERE job_url LIKE '{job_url}%'
+ AND status IN ('Fail', 'Error')
+ """
+ df = client.query_dataframe(query)
+ df = drop_prefix_rows(df, "test_name")
+ df["job_name"] = df["job_name"].str.title()
+ return df
+
+
+def get_new_fails_this_pr(
+ client: Client,
+ pr_info: dict,
+ checks_fails: pd.DataFrame,
+ regression_fails: pd.DataFrame,
+):
+ """
+ Get tests that failed in the PR but passed in the base branch.
+ Compares both checks and regression test results.
+ """
+ base_sha = pr_info.get("base", {}).get("sha")
+ if not base_sha:
+ raise Exception("No base SHA found for PR")
+
+ # Modify tables to have the same columns
+ if len(checks_fails) > 0:
+ checks_fails = checks_fails.copy().drop(columns=["job_status"])
+ if len(regression_fails) > 0:
+ regression_fails = regression_fails.copy()
+ regression_fails["job_name"] = regression_fails.apply(
+ lambda row: f"{row['arch']} {row['job_name']}".strip(), axis=1
+ )
+ regression_fails["test_status"] = regression_fails["status"]
+
+ # Combine both types of fails and select only desired columns
+ desired_columns = ["job_name", "test_name", "test_status", "results_link"]
+ all_pr_fails = pd.concat([checks_fails, regression_fails], ignore_index=True)[
+ desired_columns
+ ]
+ if len(all_pr_fails) == 0:
+ return pd.DataFrame()
+
+ # Get all checks from the base branch that didn't fail
+ base_checks_query = f"""SELECT job_name, status as test_status, test_name, results_link
+ FROM (
+ SELECT
+ check_name as job_name,
+ argMax(test_status, check_start_time) as status,
+ test_name,
+ report_url as results_link,
+ task_url
+ FROM `gh-data`.checks
+ WHERE commit_sha='{base_sha}'
+ GROUP BY check_name, test_name, report_url, task_url
+ )
+ WHERE test_status NOT IN ('FAIL', 'ERROR')
+ ORDER BY job_name, test_name
+ """
+ base_checks = client.query_dataframe(base_checks_query)
+
+ # Get regression results from base branch that didn't fail
+ base_regression_query = f"""SELECT arch, job_name, status, test_name, results_link
+ FROM (
+ SELECT
+ architecture as arch,
+ test_name,
+ argMax(result, start_time) AS status,
+ job_url,
+ job_name,
+ report_url as results_link
+ FROM `gh-data`.clickhouse_regression_results
+ WHERE results_link LIKE'%/{base_sha}/%'
+ GROUP BY architecture, test_name, job_url, job_name, report_url
+ ORDER BY length(test_name) DESC
+ )
+ WHERE status NOT IN ('Fail', 'Error')
+ """
+ base_regression = client.query_dataframe(base_regression_query)
+ if len(base_regression) > 0:
+ base_regression["job_name"] = base_regression.apply(
+ lambda row: f"{row['arch']} {row['job_name']}".strip(), axis=1
+ )
+ base_regression["test_status"] = base_regression["status"]
+ base_regression = base_regression.drop(columns=["arch", "status"])
+
+ # Combine base results
+ base_results = pd.concat([base_checks, base_regression], ignore_index=True)
+
+ # Find tests that failed in PR but passed in base
+ pr_failed_tests = set(zip(all_pr_fails["job_name"], all_pr_fails["test_name"]))
+ base_passed_tests = set(zip(base_results["job_name"], base_results["test_name"]))
+
+ new_fails = pr_failed_tests.intersection(base_passed_tests)
+
+ # Filter PR results to only include new fails
+ mask = all_pr_fails.apply(
+ lambda row: (row["job_name"], row["test_name"]) in new_fails, axis=1
+ )
+ new_fails_df = all_pr_fails[mask]
+
+ return new_fails_df
+
+
+def get_cves(pr_number, commit_sha):
+ """
+ Fetch Grype results from S3.
+
+ If no results are available for download, returns ... (Ellipsis).
+ """
+ s3_client = boto3.client("s3", endpoint_url=os.getenv("S3_URL"))
+ s3_prefix = f"{pr_number}/{commit_sha}/grype/"
+
+ results = []
+
+ response = s3_client.list_objects_v2(
+ Bucket=S3_BUCKET, Prefix=s3_prefix, Delimiter="/"
+ )
+ grype_result_dirs = [
+ content["Prefix"] for content in response.get("CommonPrefixes", [])
+ ]
+
+ if len(grype_result_dirs) == 0:
+ # We were asked to check the CVE data, but none was found,
+ # maybe this is a preview report and grype results are not available yet
+ return ...
+
+ for path in grype_result_dirs:
+ file_key = f"{path}result.json"
+ file_response = s3_client.get_object(Bucket=S3_BUCKET, Key=file_key)
+ content = file_response["Body"].read().decode("utf-8")
+ results.append(json.loads(content))
+
+ rows = []
+ for scan_result in results:
+ for match in scan_result["matches"]:
+ rows.append(
+ {
+ "docker_image": scan_result["source"]["target"]["userInput"],
+ "severity": match["vulnerability"]["severity"],
+ "identifier": match["vulnerability"]["id"],
+ "namespace": match["vulnerability"]["namespace"],
+ }
+ )
+
+ if len(rows) == 0:
+ return pd.DataFrame()
+
+ df = pd.DataFrame(rows).drop_duplicates()
+ df = df.sort_values(
+ by="severity",
+ key=lambda col: col.str.lower().map(
+ {"critical": 1, "high": 2, "medium": 3, "low": 4, "negligible": 5}
+ ),
+ )
+ return df
+
+
+def url_to_html_link(url: str) -> str:
+ if not url:
+ return ""
+ text = url.split("/")[-1].replace("__", "_")
+ if not text:
+ text = "results"
+ return f'{text} '
+
+
+def format_test_name_for_linewrap(text: str) -> str:
+ """Tweak the test name to improve line wrapping."""
+ return f'{text} '
+
+
+def format_test_status(text: str) -> str:
+ """Format the test status for better readability."""
+ color = (
+ "red"
+ if text.lower().startswith("fail")
+ else "orange" if text.lower() in ("error", "broken", "pending") else "green"
+ )
+ return f'{text} '
+
+
+def format_results_as_html_table(results) -> str:
+ if len(results) == 0:
+ return "Nothing to report
"
+ results.columns = [col.replace("_", " ").title() for col in results.columns]
+ html = results.to_html(
+ index=False,
+ formatters={
+ "Results Link": url_to_html_link,
+ "Test Name": format_test_name_for_linewrap,
+ "Test Status": format_test_status,
+ "Job Status": format_test_status,
+ "Status": format_test_status,
+ "Message": lambda m: m.replace("\n", " "),
+ "Identifier": lambda i: url_to_html_link(
+ "https://nvd.nist.gov/vuln/detail/" + i
+ ),
+ },
+ escape=False,
+ border=0,
+ classes=["test-results-table"],
+ )
+ return html
+
+
+def parse_args() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(description="Create a combined CI report.")
+ parser.add_argument( # Need the full URL rather than just the ID to query the databases
+ "--actions-run-url", required=True, help="URL of the actions run"
+ )
+ parser.add_argument(
+ "--pr-number", help="Pull request number for the S3 path", type=int
+ )
+ parser.add_argument("--commit-sha", help="Commit SHA for the S3 path")
+ parser.add_argument(
+ "--no-upload", action="store_true", help="Do not upload the report"
+ )
+ parser.add_argument(
+ "--known-fails", type=str, help="Path to the file with known fails"
+ )
+ parser.add_argument(
+ "--cves", action="store_true", help="Get CVEs from Grype results"
+ )
+ parser.add_argument(
+ "--mark-preview", action="store_true", help="Mark the report as a preview"
+ )
+ return parser.parse_args()
+
+
+def main():
+ args = parse_args()
+
+ if args.pr_number is None or args.commit_sha is None:
+ run_details = get_run_details(args.actions_run_url)
+ if args.pr_number is None:
+ if len(run_details["pull_requests"]) > 0:
+ args.pr_number = run_details["pull_requests"][0]["number"]
+ else:
+ args.pr_number = 0
+ if args.commit_sha is None:
+ args.commit_sha = run_details["head_commit"]["id"]
+
+ db_client = Client(
+ host=os.getenv(DATABASE_HOST_VAR),
+ user=os.getenv(DATABASE_USER_VAR),
+ password=os.getenv(DATABASE_PASSWORD_VAR),
+ port=9440,
+ secure="y",
+ verify=False,
+ settings={"use_numpy": True},
+ )
+
+ fail_results = {
+ "job_statuses": get_commit_statuses(args.commit_sha),
+ "checks_fails": get_checks_fails(db_client, args.actions_run_url),
+ "checks_known_fails": [],
+ "pr_new_fails": [],
+ "checks_errors": get_checks_errors(db_client, args.actions_run_url),
+ "regression_fails": get_regression_fails(db_client, args.actions_run_url),
+ "docker_images_cves": (
+ [] if not args.cves else get_cves(args.pr_number, args.commit_sha)
+ ),
+ }
+
+ # get_cves returns ... in the case where no Grype result files were found.
+ # This might occur when run in preview mode.
+ cves_not_checked = not args.cves or fail_results["docker_images_cves"] is ...
+
+ if args.known_fails:
+ if not os.path.exists(args.known_fails):
+ print(f"Known fails file {args.known_fails} not found.")
+ exit(1)
+
+ with open(args.known_fails) as f:
+ known_fails = json.load(f)
+
+ if known_fails:
+ fail_results["checks_known_fails"] = get_checks_known_fails(
+ db_client, args.actions_run_url, known_fails
+ )
+
+ if args.pr_number == 0:
+ run_details = get_run_details(args.actions_run_url)
+ branch_name = run_details.get("head_branch", "unknown branch")
+ pr_info_html = f"Release ({branch_name})"
+ else:
+ try:
+ pr_info = get_pr_info_from_number(args.pr_number)
+ pr_info_html = f"""
+ #{pr_info.get("number")} ({pr_info.get("base", {}).get('ref')} <- {pr_info.get("head", {}).get('ref')}) {pr_info.get("title")}
+ """
+ fail_results["pr_new_fails"] = get_new_fails_this_pr(
+ db_client,
+ pr_info,
+ fail_results["checks_fails"],
+ fail_results["regression_fails"],
+ )
+ except Exception as e:
+ pr_info_html = e
+
+ high_cve_count = 0
+ if not cves_not_checked and len(fail_results["docker_images_cves"]) > 0:
+ high_cve_count = (
+ fail_results["docker_images_cves"]["severity"]
+ .str.lower()
+ .isin(("high", "critical"))
+ .sum()
+ )
+
+ # Define the context for rendering
+ context = {
+ "title": "ClickHouse® CI Workflow Run Report",
+ "github_repo": GITHUB_REPO,
+ "s3_bucket": S3_BUCKET,
+ "pr_info_html": pr_info_html,
+ "pr_number": args.pr_number,
+ "workflow_id": args.actions_run_url.split("/")[-1],
+ "commit_sha": args.commit_sha,
+ "base_sha": "" if args.pr_number == 0 else pr_info.get("base", {}).get("sha"),
+ "date": f"{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC",
+ "is_preview": args.mark_preview,
+ "counts": {
+ "jobs_status": f"{sum(fail_results['job_statuses']['job_status'] != 'success')} fail/error/pending",
+ "checks_errors": len(fail_results["checks_errors"]),
+ "checks_new_fails": len(fail_results["checks_fails"]),
+ "regression_new_fails": len(fail_results["regression_fails"]),
+ "cves": "N/A" if cves_not_checked else f"{high_cve_count} high/critical",
+ "checks_known_fails": (
+ "N/A"
+ if not args.known_fails
+ else len(fail_results["checks_known_fails"])
+ ),
+ "pr_new_fails": len(fail_results["pr_new_fails"]),
+ },
+ "ci_jobs_status_html": format_results_as_html_table(
+ fail_results["job_statuses"]
+ ),
+ "checks_errors_html": format_results_as_html_table(
+ fail_results["checks_errors"]
+ ),
+ "checks_fails_html": format_results_as_html_table(fail_results["checks_fails"]),
+ "regression_fails_html": format_results_as_html_table(
+ fail_results["regression_fails"]
+ ),
+ "docker_images_cves_html": (
+ "Not Checked
"
+ if cves_not_checked
+ else format_results_as_html_table(fail_results["docker_images_cves"])
+ ),
+ "checks_known_fails_html": (
+ "Not Checked
"
+ if not args.known_fails
+ else format_results_as_html_table(fail_results["checks_known_fails"])
+ ),
+ "new_fails_html": format_results_as_html_table(fail_results["pr_new_fails"]),
+ }
+
+ # Render the template with the context
+ rendered_html = template.render(context)
+
+ report_name = "ci_run_report.html"
+ report_path = Path(report_name)
+ report_path.write_text(rendered_html, encoding="utf-8")
+
+ if args.no_upload:
+ print(f"Report saved to {report_path}")
+ exit(0)
+
+ report_destination_key = f"{args.pr_number}/{args.commit_sha}/{report_name}"
+
+ # Upload the report to S3
+ s3_client = boto3.client("s3", endpoint_url=os.getenv("S3_URL"))
+
+ try:
+ s3_client.put_object(
+ Bucket=S3_BUCKET,
+ Key=report_destination_key,
+ Body=rendered_html,
+ ContentType="text/html; charset=utf-8",
+ )
+ except NoCredentialsError:
+ print("Credentials not available for S3 upload.")
+
+ print(f"https://s3.amazonaws.com/{S3_BUCKET}/" + report_destination_key)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.github/create_combined_ci_report.py b/.github/create_combined_ci_report.py
deleted file mode 100755
index 2607760e4abb..000000000000
--- a/.github/create_combined_ci_report.py
+++ /dev/null
@@ -1,294 +0,0 @@
-#!/usr/bin/env python3
-import argparse
-import os
-from pathlib import Path
-from itertools import combinations
-import json
-
-import requests
-from clickhouse_driver import Client
-import boto3
-from botocore.exceptions import NoCredentialsError
-
-DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST"
-DATABASE_USER_VAR = "CHECKS_DATABASE_USER"
-DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD"
-S3_BUCKET = "altinity-build-artifacts"
-
-
-def get_checks_fails(client: Client, job_url: str):
- """
- Get tests that did not succeed for the given job URL.
- Exclude checks that have status 'error' as they are counted in get_checks_errors.
- """
- columns = (
- "check_status, check_name, test_status, test_name, report_url as results_link"
- )
- query = f"""SELECT {columns} FROM `gh-data`.checks
- WHERE task_url='{job_url}'
- AND test_status IN ('FAIL', 'ERROR')
- AND check_status!='error'
- ORDER BY check_name, test_name
- """
- return client.query_dataframe(query)
-
-
-def get_checks_known_fails(client: Client, job_url: str, known_fails: dict):
- """
- Get tests that are known to fail for the given job URL.
- """
- assert len(known_fails) > 0, "cannot query the database with empty known fails"
- columns = (
- "check_status, check_name, test_status, test_name, report_url as results_link"
- )
- query = f"""SELECT {columns} FROM `gh-data`.checks
- WHERE task_url='{job_url}'
- AND test_status='BROKEN'
- AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())})
- ORDER BY test_name, check_name
- """
-
- df = client.query_dataframe(query)
-
- df.insert(
- len(df.columns) - 1,
- "reason",
- df["test_name"]
- .astype(str)
- .apply(
- lambda test_name: known_fails[test_name].get("reason", "No reason given")
- ),
- )
-
- return df
-
-
-def get_checks_errors(client: Client, job_url: str):
- """
- Get checks that have status 'error' for the given job URL.
- """
- columns = (
- "check_status, check_name, test_status, test_name, report_url as results_link"
- )
- query = f"""SELECT {columns} FROM `gh-data`.checks
- WHERE task_url='{job_url}'
- AND check_status=='error'
- ORDER BY check_name, test_name
- """
- return client.query_dataframe(query)
-
-
-def drop_prefix_rows(df, column_to_clean):
- """
- Drop rows from the dataframe if:
- - the row matches another row completely except for the specified column
- - the specified column of that row is a prefix of the same column in another row
- """
- to_drop = set()
- reference_columns = [col for col in df.columns if col != column_to_clean]
- for (i, row_1), (j, row_2) in combinations(df.iterrows(), 2):
- if all(row_1[col] == row_2[col] for col in reference_columns):
- if row_2[column_to_clean].startswith(row_1[column_to_clean]):
- to_drop.add(i)
- elif row_1[column_to_clean].startswith(row_2[column_to_clean]):
- to_drop.add(j)
- return df.drop(to_drop)
-
-
-def get_regression_fails(client: Client, job_url: str):
- """
- Get regression tests that did not succeed for the given job URL.
- """
- # If you rename the alias for report_url, also update the formatters in format_results_as_html_table
- # Nested SELECT handles test reruns
- query = f"""SELECT arch, job_name, status, test_name, results_link
- FROM (
- SELECT
- architecture as arch,
- test_name,
- argMax(result, start_time) AS status,
- job_url,
- job_name,
- report_url as results_link
- FROM `gh-data`.clickhouse_regression_results
- GROUP BY architecture, test_name, job_url, job_name, report_url, start_time
- ORDER BY start_time DESC, length(test_name) DESC
- )
- WHERE job_url='{job_url}'
- AND status IN ('Fail', 'Error')
- """
- df = client.query_dataframe(query)
- df = drop_prefix_rows(df, "test_name")
- df["job_name"] = df["job_name"].str.title()
- return df
-
-
-def url_to_html_link(url: str) -> str:
- if not url:
- return ""
- text = url.split("/")[-1]
- if not text:
- text = "results"
- return f'{text} '
-
-
-def format_test_name_for_linewrap(text: str) -> str:
- """Tweak the test name to improve line wrapping."""
- return text.replace(".py::", "/")
-
-
-def format_results_as_html_table(results) -> str:
- if len(results) == 0:
- return "Nothing to report
"
- results.columns = [col.replace("_", " ").title() for col in results.columns]
- html = (
- results.to_html(
- index=False,
- formatters={
- "Results Link": url_to_html_link,
- "Test Name": format_test_name_for_linewrap,
- },
- escape=False,
- ) # tbody/thead tags interfere with the table sorting script
- .replace("\n", "")
- .replace(" \n", "")
- .replace("\n", "")
- .replace(" \n", "")
- .replace(' argparse.Namespace:
- parser = argparse.ArgumentParser(description="Create a combined CI report.")
- parser.add_argument(
- "--actions-run-url", required=True, help="URL of the actions run"
- )
- parser.add_argument(
- "--pr-number", required=True, help="Pull request number for the S3 path"
- )
- parser.add_argument(
- "--commit-sha", required=True, help="Commit SHA for the S3 path"
- )
- parser.add_argument(
- "--no-upload", action="store_true", help="Do not upload the report"
- )
- parser.add_argument(
- "--known-fails", type=str, help="Path to the file with known fails"
- )
- parser.add_argument(
- "--mark-preview", action="store_true", help="Mark the report as a preview"
- )
- return parser.parse_args()
-
-
-def main():
- args = parse_args()
-
- db_client = Client(
- host=os.getenv(DATABASE_HOST_VAR),
- user=os.getenv(DATABASE_USER_VAR),
- password=os.getenv(DATABASE_PASSWORD_VAR),
- port=9440,
- secure="y",
- verify=False,
- settings={"use_numpy": True},
- )
-
- s3_path = (
- f"https://s3.amazonaws.com/{S3_BUCKET}/{args.pr_number}/{args.commit_sha}/"
- )
- report_destination_url = s3_path + "combined_report.html"
- ci_running_report_url = s3_path + "ci_running.html"
-
- response = requests.get(ci_running_report_url)
- if response.status_code == 200:
- ci_running_report: str = response.text
- else:
- print(
- f"Failed to download CI running report. Status code: {response.status_code}, Response: {response.text}"
- )
- exit(1)
-
- fail_results = {
- "checks_fails": get_checks_fails(db_client, args.actions_run_url),
- "checks_known_fails": [],
- "checks_errors": get_checks_errors(db_client, args.actions_run_url),
- "regression_fails": get_regression_fails(db_client, args.actions_run_url),
- }
-
- if args.known_fails:
- if not os.path.exists(args.known_fails):
- print(f"Known fails file {args.known_fails} not found.")
- exit(1)
-
- with open(args.known_fails) as f:
- known_fails = json.load(f)
-
- if known_fails:
- fail_results["checks_known_fails"] = get_checks_known_fails(
- db_client, args.actions_run_url, known_fails
- )
-
- combined_report = (
- ci_running_report.replace("ClickHouse CI running for", "Combined CI Report for")
- .replace(
- "",
- f"""Table of Contents
-{'This is a preview. FinishCheck has not completed.
' if args.mark_preview else ""}
-
-
-CI Jobs Status
-""",
- 1,
- )
- .replace(
- "
",
- f"""
-
-Checks Errors
-{format_results_as_html_table(fail_results['checks_errors'])}
-
-Checks New Fails
-{format_results_as_html_table(fail_results['checks_fails'])}
-
-Regression New Fails
-{format_results_as_html_table(fail_results['regression_fails'])}
-
-Checks Known Fails
-{format_results_as_html_table(fail_results['checks_known_fails'])}
-""",
- 1,
- )
- )
- report_path = Path("combined_report.html")
- report_path.write_text(combined_report, encoding="utf-8")
-
- if args.no_upload:
- print(f"Report saved to {report_path}")
- exit(0)
-
- # Upload the report to S3
- s3_client = boto3.client("s3")
-
- try:
- s3_client.put_object(
- Bucket=S3_BUCKET,
- Key=f"{args.pr_number}/{args.commit_sha}/combined_report.html",
- Body=combined_report,
- ContentType="text/html; charset=utf-8",
- )
- except NoCredentialsError:
- print("Credentials not available for S3 upload.")
-
- print(report_destination_url)
-
-
-if __name__ == "__main__":
- main()
diff --git a/.github/grype/parse_vulnerabilities_grype.py b/.github/grype/parse_vulnerabilities_grype.py
new file mode 100644
index 000000000000..fec2ef3bfac7
--- /dev/null
+++ b/.github/grype/parse_vulnerabilities_grype.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+import json
+
+from testflows.core import *
+
+xfails = {}
+
+
+@Name("docker vulnerabilities")
+@XFails(xfails)
+@TestModule
+def docker_vulnerabilities(self):
+ with Given("I gather grype scan results"):
+ with open("./result.json", "r") as f:
+ results = json.load(f)
+
+ for vulnerability in results["matches"]:
+ with Test(
+ f"{vulnerability['vulnerability']['id']}@{vulnerability['vulnerability']['namespace']},{vulnerability['vulnerability']['severity']}",
+ flags=TE,
+ ):
+ note(vulnerability)
+ critical_levels = set(["HIGH", "CRITICAL"])
+ if vulnerability['vulnerability']["severity"].upper() in critical_levels:
+ with Then(
+ f"Found vulnerability of {vulnerability['vulnerability']['severity']} severity"
+ ):
+ result(Fail)
+
+
+if main():
+ docker_vulnerabilities()
diff --git a/.github/grype/run_grype_scan.sh b/.github/grype/run_grype_scan.sh
new file mode 100755
index 000000000000..af428e37d669
--- /dev/null
+++ b/.github/grype/run_grype_scan.sh
@@ -0,0 +1,18 @@
+set -x
+set -e
+
+IMAGE=$1
+
+GRYPE_VERSION=${GRYPE_VERSION:-"v0.92.2"}
+
+docker pull $IMAGE
+docker pull anchore/grype:${GRYPE_VERSION}
+
+docker run \
+ --rm --volume /var/run/docker.sock:/var/run/docker.sock \
+ --name Grype anchore/grype:${GRYPE_VERSION} \
+ --scope all-layers \
+ -o json \
+ $IMAGE > result.json
+
+ls -sh
diff --git a/.github/grype/transform_and_upload_results_s3.sh b/.github/grype/transform_and_upload_results_s3.sh
new file mode 100755
index 000000000000..7a10b02887ef
--- /dev/null
+++ b/.github/grype/transform_and_upload_results_s3.sh
@@ -0,0 +1,13 @@
+DOCKER_IMAGE=$(echo "$DOCKER_IMAGE" | sed 's/[\/:]/_/g')
+
+S3_PATH="s3://$S3_BUCKET/$PR_NUMBER/$COMMIT_SHA/grype/$DOCKER_IMAGE"
+HTTPS_S3_PATH="https://s3.amazonaws.com/$S3_BUCKET/$PR_NUMBER/$COMMIT_SHA/grype/$DOCKER_IMAGE"
+echo "https_s3_path=$HTTPS_S3_PATH" >> $GITHUB_OUTPUT
+
+tfs --no-colors transform nice raw.log nice.log.txt
+tfs --no-colors report results -a $HTTPS_S3_PATH raw.log - --copyright "Altinity LTD" | tfs --no-colors document convert > results.html
+
+aws s3 cp --no-progress nice.log.txt $S3_PATH/nice.log.txt --content-type "text/plain; charset=utf-8" || echo "nice log file not found".
+aws s3 cp --no-progress results.html $S3_PATH/results.html || echo "results file not found".
+aws s3 cp --no-progress raw.log $S3_PATH/raw.log || echo "raw.log file not found".
+aws s3 cp --no-progress result.json $S3_PATH/result.json --content-type "text/plain; charset=utf-8" || echo "result.json not found".
\ No newline at end of file
diff --git a/.github/workflows/grype_scan.yml b/.github/workflows/grype_scan.yml
new file mode 100644
index 000000000000..e68c3e63e283
--- /dev/null
+++ b/.github/workflows/grype_scan.yml
@@ -0,0 +1,152 @@
+name: Grype Scan
+run-name: Grype Scan ${{ inputs.docker_image }}
+
+on:
+ workflow_dispatch:
+ # Inputs for manual run
+ inputs:
+ docker_image:
+ description: 'Docker image. If no tag, it will be determined by version_helper.py'
+ required: true
+ workflow_call:
+ # Inputs for workflow call
+ inputs:
+ docker_image:
+ description: 'Docker image. If no tag, it will be determined by version_helper.py'
+ required: true
+ type: string
+ version:
+ description: 'Version tag. If no version, it will be determined by version_helper.py'
+ required: false
+ type: string
+ default: ""
+ tag-suffix:
+ description: 'Tag suffix. To be appended the version from version_helper.py'
+ required: false
+ type: string
+ default: ""
+env:
+ PYTHONUNBUFFERED: 1
+ AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+ AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
+ AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ GRYPE_VERSION: "v0.92.2-arm64v8"
+
+jobs:
+ grype_scan:
+ name: Grype Scan
+ runs-on: [self-hosted, altinity-on-demand, altinity-func-tester-aarch64]
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Docker
+ uses: docker/setup-buildx-action@v3
+
+ - name: Set up Python
+ run: |
+ export TESTFLOWS_VERSION="2.4.19"
+ sudo apt-get update
+ sudo apt-get install -y python3-pip python3-venv
+ python3 -m venv venv
+ source venv/bin/activate
+ pip install --upgrade requests chardet urllib3 unidiff boto3 PyGithub
+ pip install testflows==$TESTFLOWS_VERSION awscli==1.33.28
+ echo PATH=$PATH >>$GITHUB_ENV
+
+ - name: Set image tag if not given
+ if: ${{ !contains(inputs.docker_image, ':') }}
+ id: set_version
+ env:
+ TAG_SUFFIX: ${{ inputs.tag-suffix }}
+ SPECIFIED_VERSION: ${{ inputs.version }}
+ run: |
+ python3 ./tests/ci/version_helper.py | tee /tmp/version_info
+ source /tmp/version_info
+ if [ -z "$SPECIFIED_VERSION" ]; then
+ VERSION=$CLICKHOUSE_VERSION_STRING
+ else
+ VERSION=$SPECIFIED_VERSION
+ fi
+ echo "docker_image=${{ inputs.docker_image }}:$PR_NUMBER-$VERSION$TAG_SUFFIX" >> $GITHUB_OUTPUT
+ echo "commit_sha=$CLICKHOUSE_VERSION_GITHASH" >> $GITHUB_OUTPUT
+
+ - name: Run Grype Scan
+ run: |
+ DOCKER_IMAGE=${{ steps.set_version.outputs.docker_image || inputs.docker_image }}
+ ./.github/grype/run_grype_scan.sh $DOCKER_IMAGE
+
+ - name: Parse grype results
+ run: |
+ python3 -u ./.github/grype/parse_vulnerabilities_grype.py -o nice --no-colors --log raw.log --test-to-end
+
+ - name: Transform and Upload Grype Results
+ if: always()
+ id: upload_results
+ env:
+ S3_BUCKET: "altinity-build-artifacts"
+ COMMIT_SHA: ${{ steps.set_version.outputs.commit_sha || github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+ PR_NUMBER: ${{ env.PR_NUMBER || github.event.pull_request.number || 0 }}
+ DOCKER_IMAGE: ${{ steps.set_version.outputs.docker_image || inputs.docker_image }}
+ run: |
+ echo "PR_NUMBER=$PR_NUMBER"
+ ./.github/grype/transform_and_upload_results_s3.sh
+
+ - name: Create step summary
+ if: always()
+ id: create_summary
+ run: |
+ jq -r '"**Image**: \(.source.target.userInput)"' result.json >> $GITHUB_STEP_SUMMARY
+ jq -r '.distro | "**Distro**: \(.name):\(.version)"' result.json >> $GITHUB_STEP_SUMMARY
+ if jq -e '.matches | length == 0' result.json > /dev/null; then
+ echo "No CVEs" >> $GITHUB_STEP_SUMMARY
+ else
+ echo "| Severity | Count |" >> $GITHUB_STEP_SUMMARY
+ echo "|------------|-------|" >> $GITHUB_STEP_SUMMARY
+ jq -r '
+ .matches |
+ map(.vulnerability.severity) |
+ group_by(.) |
+ map({severity: .[0], count: length}) |
+ sort_by(.severity) |
+ map("| \(.severity) | \(.count) |") |
+ .[]
+ ' result.json >> $GITHUB_STEP_SUMMARY
+ fi
+
+ HIGH_COUNT=$(jq -r '.matches | map(.vulnerability.severity) | map(select(. == "High")) | length' result.json)
+ CRITICAL_COUNT=$(jq -r '.matches | map(.vulnerability.severity) | map(select(. == "Critical")) | length' result.json)
+ TOTAL_HIGH_CRITICAL=$((HIGH_COUNT + CRITICAL_COUNT))
+ echo "total_high_critical=$TOTAL_HIGH_CRITICAL" >> $GITHUB_OUTPUT
+
+ if [ $TOTAL_HIGH_CRITICAL -gt 0 ]; then
+ echo '## High and Critical vulnerabilities found' >> $GITHUB_STEP_SUMMARY
+ echo '```' >> $GITHUB_STEP_SUMMARY
+ cat raw.log | tfs --no-colors show tests | grep -Pi 'High|Critical' >> $GITHUB_STEP_SUMMARY
+ echo '```' >> $GITHUB_STEP_SUMMARY
+ fi
+
+ - name: Set commit status
+ if: always()
+ uses: actions/github-script@v7
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ script: |
+ github.rest.repos.createCommitStatus({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ sha: '${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}',
+ state: '${{ steps.create_summary.outputs.total_high_critical > 0 && 'failure' || 'success' }}',
+ target_url: '${{ steps.upload_results.outputs.https_s3_path }}/results.html',
+ description: 'Grype Scan Completed with ${{ steps.create_summary.outputs.total_high_critical }} high/critical vulnerabilities',
+ context: 'Grype Scan ${{ steps.set_version.outputs.docker_image || inputs.docker_image }}'
+ })
+
+ - name: Upload artifacts
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: grype-results-${{ hashFiles('raw.log') }}
+ path: |
+ result.json
+ nice.log.txt
diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
index 576f68300b29..3d5d06e5b296 100644
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@@ -88,14 +88,14 @@ name: Regression test workflow - Release
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
- AWS_ACCESS_KEY_ID: ${{ secrets.AWS_REPORT_KEY_ID }}
- AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_REPORT_SECRET_ACCESS_KEY }}
- AWS_DEFAULT_REGION: ${{ secrets.AWS_REPORT_REGION }}
+ AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+ AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }}
- CHECKS_DATABASE_USER: ${{ secrets.CHECKS_DATABASE_USER }}
- CHECKS_DATABASE_PASSWORD: ${{ secrets.CHECKS_DATABASE_PASSWORD }}
+ CHECKS_DATABASE_USER: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }}
+ CHECKS_DATABASE_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }}
args: --test-to-end
--no-colors
--local
@@ -179,7 +179,7 @@ jobs:
python3
-u ${{ env.SUITE }}/regression.py
--clickhouse-binary-path ${{ env.clickhouse_path }}
- --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+ --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
${{ env.args }} || EXITCODE=$?;
.github/add_link_to_logs.sh;
exit $EXITCODE
@@ -243,7 +243,7 @@ jobs:
-u alter/regression.py
--clickhouse-binary-path ${{ env.clickhouse_path }}
--only "/alter/${{ matrix.ONLY }} partition/*"
- --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+ --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
${{ env.args }} || EXITCODE=$?;
.github/add_link_to_logs.sh;
exit $EXITCODE
@@ -314,7 +314,7 @@ jobs:
--aws-s3-region ${{ secrets.REGRESSION_AWS_S3_REGION }}
--aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }}
--aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }}
- --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+ --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
${{ env.args }} || EXITCODE=$?;
.github/add_link_to_logs.sh;
exit $EXITCODE
@@ -374,7 +374,7 @@ jobs:
-u ${{ env.SUITE }}/regression.py
--ssl
--clickhouse-binary-path ${{ env.clickhouse_path }}
- --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+ --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
${{ env.args }} || EXITCODE=$?;
.github/add_link_to_logs.sh;
exit $EXITCODE
@@ -436,7 +436,7 @@ jobs:
python3
-u ${{ env.SUITE }}/regression.py
--clickhouse-binary-path ${{ env.clickhouse_path }}
- --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+ --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
${{ env.args }} || EXITCODE=$?;
.github/add_link_to_logs.sh;
exit $EXITCODE
@@ -494,7 +494,7 @@ jobs:
python3
-u ${{ env.SUITE }}/regression.py
--clickhouse-binary-path ${{ env.clickhouse_path }}
- --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+ --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
${{ env.args }} || EXITCODE=$?;
.github/add_link_to_logs.sh;
exit $EXITCODE
@@ -562,7 +562,7 @@ jobs:
--aws-s3-region ${{ secrets.REGRESSION_AWS_S3_REGION }}
--aws-s3-key-id ${{ secrets.REGRESSION_AWS_S3_KEY_ID }}
--aws-s3-access-key ${{ secrets.REGRESSION_AWS_S3_SECRET_ACCESS_KEY }}
- --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+ --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
${{ env.args }} || EXITCODE=$?;
.github/add_link_to_logs.sh;
exit $EXITCODE
@@ -636,7 +636,7 @@ jobs:
--azure-account-name ${{ secrets.AZURE_ACCOUNT_NAME }}
--azure-storage-key ${{ secrets.AZURE_STORAGE_KEY }}
--azure-container ${{ secrets.AZURE_CONTAINER_NAME }}
- --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+ --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
${{ env.args }} || EXITCODE=$?;
.github/add_link_to_logs.sh;
exit $EXITCODE
@@ -706,7 +706,7 @@ jobs:
--gcs-key-secret ${{ secrets.REGRESSION_GCS_KEY_SECRET }}
--gcs-uri ${{ secrets.REGRESSION_GCS_URI }}
--with-${{ matrix.STORAGE }}
- --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
+ --attr project="$GITHUB_REPOSITORY" project.id="$GITHUB_REPOSITORY_ID" package="${{ env.clickhouse_path }}" version="${{ env.version }}" user.name="$GITHUB_ACTOR" repository="https://github.com/Altinity/clickhouse-regression" commit.hash="$(git rev-parse HEAD)" job.name=$GITHUB_JOB job.retry=$GITHUB_RUN_ATTEMPT job.url="$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" arch="$(uname -i)"
${{ env.args }} || EXITCODE=$?;
.github/add_link_to_logs.sh;
exit $EXITCODE
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index e70fd154996a..4e7c8629c1bf 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -7,6 +7,11 @@ env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ CLICKHOUSE_TEST_STAT_LOGIN: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }}
+ CLICKHOUSE_TEST_STAT_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }}
+ CLICKHOUSE_TEST_STAT_URL: ${{ secrets.CLICKHOUSE_TEST_STAT_URL }}
+ DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
+ ROBOT_TOKEN: ${{ secrets.ROBOT_TOKEN }}
on: # yamllint disable-line rule:truthy
pull_request:
@@ -16,7 +21,7 @@ on: # yamllint disable-line rule:truthy
- opened
branches:
# Anything/24.8 (e.g customizations/24.8.x)
- - '**/24.8*'
+ - '**24.8*'
release:
types:
- published
@@ -64,6 +69,14 @@ jobs:
- name: Re-create GH statuses for skipped jobs if any
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses
+ - name: Note report location to summary
+ env:
+ PR_NUMBER: ${{ github.event.pull_request.number || 0 }}
+ COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+ run: |
+ REPORT_LINK=https://s3.amazonaws.com/altinity-build-artifacts/$PR_NUMBER/$COMMIT_SHA/ci_run_report.html
+ echo "Workflow Run Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY
+
BuildDockers:
needs: [RunConfig]
if: ${{ !failure() && !cancelled() }}
@@ -533,24 +546,24 @@ jobs:
##################################### REGRESSION TESTS ######################################
#############################################################################################
RegressionTestsRelease:
- needs: [BuilderDebRelease]
- if: ${{ !failure() && !cancelled() }}
+ needs: [RunConfig, BuilderDebRelease]
+ if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.RunConfig.outputs.data).ci_settings.exclude_keywords, 'regression') }}
uses: ./.github/workflows/regression.yml
secrets: inherit
with:
runner_type: altinity-on-demand, altinity-type-cpx51, altinity-image-x86-app-docker-ce, altinity-setup-regression
- commit: 11dcb1ad771e6afebcb06bcc7bf1c1d8b184d838
+ commit: e3c00be97a045aa04e9d1a6ec50cc64f4c387b70
arch: release
build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout_minutes: 300
RegressionTestsAarch64:
- needs: [BuilderDebAarch64]
- if: ${{ !failure() && !cancelled() }}
+ needs: [RunConfig, BuilderDebAarch64]
+ if: ${{ !failure() && !cancelled() && !contains(fromJson(needs.RunConfig.outputs.data).ci_settings.exclude_keywords, 'regression') && !contains(fromJson(needs.RunConfig.outputs.data).ci_settings.exclude_keywords, 'aarch64')}}
uses: ./.github/workflows/regression.yml
secrets: inherit
with:
runner_type: altinity-on-demand, altinity-type-cax41, altinity-image-arm-app-docker-ce, altinity-setup-regression
- commit: 11dcb1ad771e6afebcb06bcc7bf1c1d8b184d838
+ commit: e3c00be97a045aa04e9d1a6ec50cc64f4c387b70
arch: aarch64
build_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout_minutes: 300
@@ -572,6 +585,25 @@ jobs:
test_name: Sign aarch64
runner_type: altinity-on-demand, altinity-func-tester
data: ${{ needs.RunConfig.outputs.data }}
+ GrypeScan:
+ needs: [RunConfig, DockerServerImage, DockerKeeperImage]
+ if: ${{ !failure() && !cancelled() }}
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - image: server
+ suffix: ''
+ - image: server
+ suffix: '-alpine'
+ - image: keeper
+ suffix: ''
+ uses: ./.github/workflows/grype_scan.yml
+ secrets: inherit
+ with:
+ docker_image: altinityinfra/clickhouse-${{ matrix.image }}
+ version: ${{ fromJson(needs.RunConfig.outputs.data).version }}
+ tag-suffix: ${{ matrix.suffix }}
FinishCheck:
if: ${{ !cancelled() }}
needs:
@@ -606,6 +638,7 @@ jobs:
- CompatibilityCheckAarch64
- RegressionTestsRelease
- RegressionTestsAarch64
+ - GrypeScan
- SignRelease
runs-on: [self-hosted, altinity-on-demand, altinity-style-checker-aarch64]
steps:
@@ -626,25 +659,13 @@ jobs:
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status
- - name: Create and upload combined report
+ - name: Create and upload report
if: ${{ !cancelled() }}
+ uses: ./.github/actions/create_workflow_report
env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }}
- CHECKS_DATABASE_USER: ${{ secrets.CHECKS_DATABASE_USER }}
- CHECKS_DATABASE_PASSWORD: ${{ secrets.CHECKS_DATABASE_PASSWORD }}
- COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
- PR_NUMBER: ${{ github.event.number }}
- ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}
- shell: bash
- run: |
- pip install clickhouse-driver==0.2.8 numpy==1.26.4 pandas==2.2.0
-
- REPORT_LINK=$(python3 .github/create_combined_ci_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json)
-
- IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://')
- if [[ -n $IS_VALID_URL ]]; then
- echo "Combined CI Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY
- else
- echo "Error: $REPORT_LINK" >> $GITHUB_STEP_SUMMARY
- exit 1
- fi
+ CHECKS_DATABASE_USER: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }}
+ CHECKS_DATABASE_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }}
+ with:
+ final: true
diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml
index c64d0aaec500..ab37118fade7 100644
--- a/.github/workflows/reusable_build.yml
+++ b/.github/workflows/reusable_build.yml
@@ -7,6 +7,11 @@ env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ CLICKHOUSE_TEST_STAT_LOGIN: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }}
+ CLICKHOUSE_TEST_STAT_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }}
+ CLICKHOUSE_TEST_STAT_URL: ${{ secrets.CLICKHOUSE_TEST_STAT_URL }}
+ DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
+ ROBOT_TOKEN: ${{ secrets.ROBOT_TOKEN }}
name: Build ClickHouse
'on':
@@ -88,6 +93,11 @@ jobs:
uses: ./.github/actions/common_setup
with:
job_type: build_check
+ - name: Create source tar
+ run: |
+ mkdir -p "$TEMP_PATH/build_check/package_release"
+ cd .. && tar czf $TEMP_PATH/build_source.src.tar.gz ClickHouse/
+ cd $TEMP_PATH && tar xvzf $TEMP_PATH/build_source.src.tar.gz
- name: Pre
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --pre --job-name '${{inputs.build_name}}'
diff --git a/.github/workflows/reusable_sign.yml b/.github/workflows/reusable_sign.yml
index 2bd8ae430fa5..fee2f9131469 100644
--- a/.github/workflows/reusable_sign.yml
+++ b/.github/workflows/reusable_sign.yml
@@ -1,7 +1,4 @@
-### For the pure soul wishes to move it to another place
-# https://github.com/orgs/community/discussions/9050
-
-name: Testing workflow
+name: Sigining workflow
'on':
workflow_call:
inputs:
@@ -63,6 +60,11 @@ env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ CLICKHOUSE_TEST_STAT_LOGIN: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }}
+ CLICKHOUSE_TEST_STAT_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }}
+ CLICKHOUSE_TEST_STAT_URL: ${{ secrets.CLICKHOUSE_TEST_STAT_URL }}
+ DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
+ ROBOT_TOKEN: ${{ secrets.ROBOT_TOKEN }}
jobs:
runner_labels_setup:
diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml
index 4867cab03480..64f41c3c28a4 100644
--- a/.github/workflows/reusable_test.yml
+++ b/.github/workflows/reusable_test.yml
@@ -44,10 +44,25 @@ name: Testing workflow
description: if given, it's passed to the environments
required: false
AWS_SECRET_ACCESS_KEY:
- description: the access key to the aws param store.
+ description: the access key to the aws s3 bucket.
required: true
AWS_ACCESS_KEY_ID:
- description: the access key id to the aws param store.
+ description: the access key id to the aws s3 bucket.
+ required: true
+ CLICKHOUSE_TEST_STAT_LOGIN:
+ description: username for ci db.
+ required: true
+ CLICKHOUSE_TEST_STAT_PASSWORD:
+ description: password for ci db.
+ required: true
+ CLICKHOUSE_TEST_STAT_URL:
+ description: url for ci db.
+ required: true
+ DOCKER_PASSWORD:
+ description: token to upload docker images.
+ required: true
+ ROBOT_TOKEN:
+ description: token to update ci status.
required: true
env:
@@ -57,6 +72,11 @@ env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ CLICKHOUSE_TEST_STAT_LOGIN: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }}
+ CLICKHOUSE_TEST_STAT_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }}
+ CLICKHOUSE_TEST_STAT_URL: ${{ secrets.CLICKHOUSE_TEST_STAT_URL }}
+ DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
+ ROBOT_TOKEN: ${{ secrets.ROBOT_TOKEN }}
jobs:
runner_labels_setup:
@@ -143,6 +163,16 @@ jobs:
if: ${{ !cancelled() }}
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.test_name}}'
+ - name: Update workflow report
+ if: ${{ !cancelled() }}
+ uses: ./.github/actions/create_workflow_report
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }}
+ CHECKS_DATABASE_USER: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }}
+ CHECKS_DATABASE_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }}
+ with:
+ final: false
- name: Mark as done
if: ${{ !cancelled() }}
run: |
diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index 28b744feeda5..d71dcfefe2c3 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -10,10 +10,10 @@ SET(VERSION_GITHASH c8a1e828dcf9832dc2d71adcbd50c698f93bb69b)
#10000 for altinitystable candidates
#20000 for altinityedge candidates
-SET(VERSION_TWEAK 10000)
+SET(VERSION_TWEAK 10500)
SET(VERSION_FLAVOUR altinitytest)
-SET(VERSION_DESCRIBE v24.8.14.10000.altinitytest)
-SET(VERSION_STRING 24.8.14.10000.altinitytest)
+SET(VERSION_DESCRIBE v24.8.14.10500.altinitytest)
+SET(VERSION_STRING 24.8.14.10500.altinitytest)
# end of autochange
diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 646ef74164ba..880dba685b95 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -12,9 +12,7 @@ RUN arch=${TARGETARCH:-amd64} \
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu
-# As of Nov 3rd 2024, alpine:3.20.3 has 1 "unknown"-type CVE:
-# * https://security.alpinelinux.org/vuln/CVE-2024-9143
-FROM alpine:3.20.3
+FROM alpine:3.21.3
ENV LANG=en_US.UTF-8 \
LANGUAGE=en_US:en \
diff --git a/docker/packager/packager b/docker/packager/packager
index 7975b6a03aae..c21bf41de4e7 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -6,12 +6,13 @@ import os
import subprocess
import sys
from pathlib import Path
-from typing import List, Optional
+from typing import Dict, List, Optional
SCRIPT_PATH = Path(__file__).absolute()
IMAGE_TYPE = "binary-builder"
IMAGE_NAME = f"altinityinfra/{IMAGE_TYPE}"
-
+DEFAULT_TMP_PATH = SCRIPT_PATH.parent.absolute() / 'tmp'
+TEMP_PATH = Path(os.getenv("TEMP_PATH", DEFAULT_TMP_PATH))
class BuildException(Exception):
pass
@@ -82,9 +83,22 @@ def run_docker_image_with_env(
ch_root: Path,
cargo_cache_dir: Path,
ccache_dir: Optional[Path],
+ aws_secrets : Optional[Dict[str,str]]
) -> None:
output_dir.mkdir(parents=True, exist_ok=True)
cargo_cache_dir.mkdir(parents=True, exist_ok=True)
+ extra_parts = ""
+
+ if aws_secrets:
+ # Pass AWS credentials via file rather than via env to avoid leaking secrets
+ env_part = {"AWS_CONFIG_FILE": "/home/clickhouse/.aws/credentials"}
+ host_aws_config_file_path = Path(TEMP_PATH) / 'aws_config'
+ with open(host_aws_config_file_path, 'wt') as f:
+ f.write("[default]")
+ for key, value in aws_secrets.items():
+ f.write(f"\n{key}={value}")
+
+ extra_parts = f"--volume={host_aws_config_file_path}:{env_part['AWS_CONFIG_FILE']}"
env_part = " -e ".join(env_variables)
if env_part:
@@ -107,6 +121,7 @@ def run_docker_image_with_env(
cmd = (
f"docker run --network=host --user={user} --rm {ccache_mount} "
f"--volume={output_dir}:/output --volume={ch_root}:/build {env_part} "
+ f" {extra_parts} "
f"--volume={cargo_cache_dir}:/rust/cargo/registry {interactive} {image_name}"
)
@@ -130,11 +145,9 @@ def parse_env_variables(
sanitizer: str,
package_type: str,
cache: str,
- s3_access_key_id: str,
s3_bucket: str,
s3_directory: str,
s3_rw_access: bool,
- s3_secret_access_key: str,
clang_tidy: bool,
version: str,
official: bool,
@@ -323,10 +336,6 @@ def parse_env_variables(
result.append(f"SCCACHE_S3_KEY_PREFIX={sccache_dir}")
if not s3_rw_access:
result.append("SCCACHE_S3_NO_CREDENTIALS=true")
- if s3_access_key_id:
- result.append(f"AWS_ACCESS_KEY_ID={s3_access_key_id}")
- if s3_secret_access_key:
- result.append(f"AWS_SECRET_ACCESS_KEY={s3_secret_access_key}")
if clang_tidy:
# `CTCACHE_DIR` has the same purpose as the `CCACHE_DIR` above.
@@ -544,11 +553,9 @@ def main() -> None:
args.sanitizer,
args.package_type,
args.cache,
- args.s3_access_key_id,
args.s3_bucket,
args.s3_directory,
args.s3_rw_access,
- args.s3_secret_access_key,
args.clang_tidy,
args.version,
args.official,
@@ -567,6 +574,10 @@ def main() -> None:
ch_root,
args.cargo_cache_dir,
args.ccache_dir,
+ {
+ "aws_access_key_id" : args.s3_access_key_id,
+ "aws_secret_access_key" : args.s3_secret_access_key
+ }
)
logging.info("Output placed into %s", args.output_dir)
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 183b94f46418..7e898d20f22c 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -519,7 +519,7 @@ Determines the lifetime of values.
The `TTL` clause can be set for the whole table and for each individual column. Table-level `TTL` can also specify the logic of automatic moving data between disks and volumes, or recompressing parts where all the data has been expired.
-Expressions must evaluate to [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type.
+Expressions must evaluate to [Date](/docs/en/sql-reference/data-types/date.md), [Date32](/docs/en/sql-reference/data-types/date32.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type.
**Syntax**
diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index 03fdfa048c8d..44e460dadbfb 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -791,7 +791,42 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
* Connection #0 to host localhost left intact
```
-## Valid JSON/XML response on exception during HTTP streaming {valid-output-on-exception-http-streaming}
+## HTTP Response Headers {#http-response-headers}
+
+ClickHouse allows you to configure custom HTTP response headers that can be applied to any kind of handler that can be configured. These headers can be set using the `http_response_headers` setting, which accepts key-value pairs representing header names and their values. This feature is particularly useful for implementing custom security headers, CORS policies, or any other HTTP header requirements across your ClickHouse HTTP interface.
+
+For example, you can configure headers for:
+- Regular query endpoints
+- Web UI
+- Health check.
+
+It is also possible to specify `common_http_response_headers`. These will be applied to all http handlers defined in the configuration.
+
+The headers will be included in the HTTP response for every configured handler.
+
+In the example below, every server response will contain two custom headers: `X-My-Common-Header` and `X-My-Custom-Header`.
+
+```xml
+
+
+
+ Common header
+
+
+ GET
+ /ping
+
+ ping
+
+ Custom indeed
+
+
+
+
+
+```
+
+## Valid JSON/XML response on exception during HTTP streaming {#valid-output-on-exception-http-streaming}
While query execution over HTTP an exception can happen when part of the data has already been sent. Usually an exception is sent to the client in plain text
even if some specific data format was used to output data and the output may become invalid in terms of specified data format.
diff --git a/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h b/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
index 3895cf3e77db..645d87dfda63 100644
--- a/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
+++ b/src/Functions/keyvaluepair/impl/CHKeyValuePairExtractor.h
@@ -102,7 +102,17 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
}
case State::FLUSH_PAIR:
{
- return flushPair(file, key, value, row_offset);
+ incrementRowOffset(row_offset);
+ return state_handler.flushPair(file, key, value);
+ }
+ case State::FLUSH_PAIR_AFTER_QUOTED_VALUE:
+ {
+ incrementRowOffset(row_offset);
+ return state_handler.flushPairAfterQuotedValue(file, key, value);
+ }
+ case State::WAITING_PAIR_DELIMITER:
+ {
+ return state_handler.waitPairDelimiter(file);
}
case State::END:
{
@@ -111,8 +121,7 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
}
}
- NextState flushPair(const std::string_view & file, auto & key,
- auto & value, uint64_t & row_offset)
+ void incrementRowOffset(uint64_t & row_offset)
{
row_offset++;
@@ -120,11 +129,6 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
{
throw Exception(ErrorCodes::LIMIT_EXCEEDED, "Number of pairs produced exceeded the limit of {}", max_number_of_pairs);
}
-
- key.commit();
- value.commit();
-
- return {0, file.empty() ? State::END : State::WAITING_KEY};
}
void reset(auto & key, auto & value)
diff --git a/src/Functions/keyvaluepair/impl/NeedleFactory.h b/src/Functions/keyvaluepair/impl/NeedleFactory.h
index 83862a2281a5..5e58523fb277 100644
--- a/src/Functions/keyvaluepair/impl/NeedleFactory.h
+++ b/src/Functions/keyvaluepair/impl/NeedleFactory.h
@@ -20,7 +20,7 @@ template
class NeedleFactory
{
public:
- SearchSymbols getWaitNeedles(const Configuration & extractor_configuration)
+ SearchSymbols getWaitKeyNeedles(const Configuration & extractor_configuration)
{
const auto & [key_value_delimiter, quoting_character, pair_delimiters]
= extractor_configuration;
@@ -39,6 +39,17 @@ class NeedleFactory
return SearchSymbols {std::string{needles.data(), needles.size()}};
}
+ SearchSymbols getWaitPairDelimiterNeedles(const Configuration & extractor_configuration)
+ {
+ const auto & pair_delimiters = extractor_configuration.pair_delimiters;
+
+ std::vector needles;
+
+ std::copy(pair_delimiters.begin(), pair_delimiters.end(), std::back_inserter(needles));
+
+ return SearchSymbols {std::string{needles.data(), needles.size()}};
+ }
+
SearchSymbols getReadKeyNeedles(const Configuration & extractor_configuration)
{
const auto & [key_value_delimiter, quoting_character, pair_delimiters]
diff --git a/src/Functions/keyvaluepair/impl/StateHandler.h b/src/Functions/keyvaluepair/impl/StateHandler.h
index 178974e9d366..f102debec609 100644
--- a/src/Functions/keyvaluepair/impl/StateHandler.h
+++ b/src/Functions/keyvaluepair/impl/StateHandler.h
@@ -30,6 +30,11 @@ class StateHandler
READING_QUOTED_VALUE,
// In this state, both key and value have already been collected and should be flushed. Might jump to WAITING_KEY or END.
FLUSH_PAIR,
+ // The `READING_QUOTED_VALUE` will assert the closing quoting character is found and then flush the pair. In this case, we should not
+ // move from `FLUSH_PAIR` directly to `WAITING_FOR_KEY` because a pair delimiter has not been found. Might jump to WAITING_FOR_PAIR_DELIMITER or END
+ FLUSH_PAIR_AFTER_QUOTED_VALUE,
+ // Might jump to WAITING_KEY or END.
+ WAITING_PAIR_DELIMITER,
END
};
diff --git a/src/Functions/keyvaluepair/impl/StateHandlerImpl.h b/src/Functions/keyvaluepair/impl/StateHandlerImpl.h
index cf31d30b9dc4..8b18b2050fdd 100644
--- a/src/Functions/keyvaluepair/impl/StateHandlerImpl.h
+++ b/src/Functions/keyvaluepair/impl/StateHandlerImpl.h
@@ -40,10 +40,11 @@ class StateHandlerImpl : public StateHandler
* */
NeedleFactory needle_factory;
- wait_needles = needle_factory.getWaitNeedles(configuration);
+ wait_key_needles = needle_factory.getWaitKeyNeedles(configuration);
read_key_needles = needle_factory.getReadKeyNeedles(configuration);
read_value_needles = needle_factory.getReadValueNeedles(configuration);
read_quoted_needles = needle_factory.getReadQuotedNeedles(configuration);
+ wait_pair_delimiter_needles = needle_factory.getWaitPairDelimiterNeedles(configuration);
}
/*
@@ -51,7 +52,7 @@ class StateHandlerImpl : public StateHandler
* */
[[nodiscard]] NextState waitKey(std::string_view file) const
{
- if (const auto * p = find_first_not_symbols_or_null(file, wait_needles))
+ if (const auto * p = find_first_not_symbols_or_null(file, wait_key_needles))
{
const size_t character_position = p - file.begin();
if (isQuotingCharacter(*p))
@@ -284,7 +285,7 @@ class StateHandlerImpl : public StateHandler
{
value.append(file.begin() + pos, file.begin() + character_position);
- return {next_pos, State::FLUSH_PAIR};
+ return {next_pos, State::FLUSH_PAIR_AFTER_QUOTED_VALUE};
}
pos = next_pos;
@@ -292,14 +293,44 @@ class StateHandlerImpl : public StateHandler
return {file.size(), State::END};
}
+ [[nodiscard]] NextState flushPair(std::string_view file, auto & key, auto & value) const
+ {
+ key.commit();
+ value.commit();
+
+ return {0, file.empty() ? State::END : State::WAITING_KEY};
+ }
+
+ [[nodiscard]] NextState flushPairAfterQuotedValue(std::string_view file, auto & key, auto & value) const
+ {
+ key.commit();
+ value.commit();
+
+ return {0, file.empty() ? State::END : State::WAITING_PAIR_DELIMITER};
+ }
+
+ [[nodiscard]] NextState waitPairDelimiter(std::string_view file) const
+ {
+ if (const auto * p = find_first_symbols_or_null(file, wait_pair_delimiter_needles))
+ {
+ const size_t character_position = p - file.data();
+ size_t next_pos = character_position + 1u;
+
+ return {next_pos, State::WAITING_KEY};
+ }
+
+ return {file.size(), State::END};
+ }
+
const Configuration configuration;
private:
- SearchSymbols wait_needles;
+ SearchSymbols wait_key_needles;
SearchSymbols read_key_needles;
SearchSymbols read_value_needles;
SearchSymbols read_quoted_needles;
+ SearchSymbols wait_pair_delimiter_needles;
/*
* Helper method to copy bytes until `character_pos` and process possible escape sequence. Returns a pair containing a boolean
diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 2a594839c6a9..d5b5695b24c5 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -134,11 +134,11 @@ ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs_)
outputs.push_back(&addInput(input.name, input.type));
}
-ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
+ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_, bool duplicate_const_columns)
{
for (const auto & input : inputs_)
{
- if (input.column && isColumnConst(*input.column))
+ if (input.column && isColumnConst(*input.column) && duplicate_const_columns)
{
addInput(input);
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index ee2b3fbf4f28..c73968d175b5 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -109,7 +109,7 @@ class ActionsDAG
ActionsDAG & operator=(ActionsDAG &&) = default;
ActionsDAG & operator=(const ActionsDAG &) = delete;
explicit ActionsDAG(const NamesAndTypesList & inputs_);
- explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_);
+ explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_, bool duplicate_const_columns = true);
const Nodes & getNodes() const { return nodes; }
static Nodes detachNodes(ActionsDAG && dag) { return std::move(dag.nodes); }
diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index ed3f78193ee8..9f51d439795e 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -121,7 +121,9 @@ std::optional analyzeAggregation(const QueryTreeNodeP
Names aggregation_keys;
ActionsAndProjectInputsFlagPtr before_aggregation_actions = std::make_shared();
- before_aggregation_actions->dag = ActionsDAG(input_columns);
+ /// Here it is OK to materialize const columns: if column is used in GROUP BY, it may be expected to become non-const
+ /// See https://github.com/ClickHouse/ClickHouse/issues/70655 for example
+ before_aggregation_actions->dag = ActionsDAG(input_columns, false);
before_aggregation_actions->dag.getOutputs().clear();
std::unordered_set before_aggregation_actions_output_node_names;
diff --git a/src/Processors/TTL/ITTLAlgorithm.cpp b/src/Processors/TTL/ITTLAlgorithm.cpp
index 761f43e2422e..9dd69a656a30 100644
--- a/src/Processors/TTL/ITTLAlgorithm.cpp
+++ b/src/Processors/TTL/ITTLAlgorithm.cpp
@@ -2,6 +2,8 @@
#include
#include
+#include
+
namespace DB
{
@@ -46,18 +48,26 @@ ColumnPtr ITTLAlgorithm::executeExpressionAndGetColumn(
return block_copy.getByName(result_column).column;
}
-UInt32 ITTLAlgorithm::getTimestampByIndex(const IColumn * column, size_t index) const
+Int64 ITTLAlgorithm::getTimestampByIndex(const IColumn * column, size_t index) const
{
if (const ColumnUInt16 * column_date = typeid_cast(column))
- return static_cast(date_lut.fromDayNum(DayNum(column_date->getData()[index])));
- else if (const ColumnUInt32 * column_date_time = typeid_cast(column))
+ return date_lut.fromDayNum(DayNum(column_date->getData()[index]));
+ if (const ColumnUInt32 * column_date_time = typeid_cast(column))
return column_date_time->getData()[index];
- else if (const ColumnConst * column_const = typeid_cast(column))
+ if (const ColumnInt32 * column_date_32 = typeid_cast(column))
+ return date_lut.fromDayNum(ExtendedDayNum(column_date_32->getData()[index]));
+ if (const ColumnDateTime64 * column_date_time_64 = typeid_cast(column))
+ return column_date_time_64->getData()[index] / intExp10OfSize(column_date_time_64->getScale());
+ if (const ColumnConst * column_const = typeid_cast(column))
{
if (typeid_cast(&column_const->getDataColumn()))
- return static_cast(date_lut.fromDayNum(DayNum(column_const->getValue())));
- else if (typeid_cast(&column_const->getDataColumn()))
+ return date_lut.fromDayNum(DayNum(column_const->getValue()));
+ if (typeid_cast(&column_const->getDataColumn()))
return column_const->getValue();
+ if (typeid_cast(&column_const->getDataColumn()))
+ return date_lut.fromDayNum(ExtendedDayNum(column_const->getValue()));
+ if (const ColumnDateTime64 * column_dt64 = typeid_cast(&column_const->getDataColumn()))
+ return column_const->getValue() / intExp10OfSize(column_dt64->getScale());
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of result TTL column");
diff --git a/src/Processors/TTL/ITTLAlgorithm.h b/src/Processors/TTL/ITTLAlgorithm.h
index d79aa8a8dfcf..542f93d6ffe6 100644
--- a/src/Processors/TTL/ITTLAlgorithm.h
+++ b/src/Processors/TTL/ITTLAlgorithm.h
@@ -43,7 +43,7 @@ class ITTLAlgorithm
protected:
bool isTTLExpired(time_t ttl) const;
- UInt32 getTimestampByIndex(const IColumn * column, size_t index) const;
+ Int64 getTimestampByIndex(const IColumn * column, size_t index) const;
const TTLExpressions ttl_expressions;
const TTLDescription description;
diff --git a/src/Processors/TTL/TTLAggregationAlgorithm.cpp b/src/Processors/TTL/TTLAggregationAlgorithm.cpp
index 2d7a37d0abea..1a4c52774033 100644
--- a/src/Processors/TTL/TTLAggregationAlgorithm.cpp
+++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp
@@ -86,7 +86,7 @@ void TTLAggregationAlgorithm::execute(Block & block)
for (size_t i = 0; i < block.rows(); ++i)
{
- UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
+ Int64 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
bool where_filter_passed = !where_column || where_column->getBool(i);
bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed;
diff --git a/src/Processors/TTL/TTLColumnAlgorithm.cpp b/src/Processors/TTL/TTLColumnAlgorithm.cpp
index e27050564cee..4b8f6b6f9ba2 100644
--- a/src/Processors/TTL/TTLColumnAlgorithm.cpp
+++ b/src/Processors/TTL/TTLColumnAlgorithm.cpp
@@ -59,7 +59,7 @@ void TTLColumnAlgorithm::execute(Block & block)
for (size_t i = 0; i < block.rows(); ++i)
{
- UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
+ Int64 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
if (isTTLExpired(cur_ttl))
{
if (default_column)
diff --git a/src/Processors/TTL/TTLDeleteAlgorithm.cpp b/src/Processors/TTL/TTLDeleteAlgorithm.cpp
index 6f9bc315276f..fde1d7c51d54 100644
--- a/src/Processors/TTL/TTLDeleteAlgorithm.cpp
+++ b/src/Processors/TTL/TTLDeleteAlgorithm.cpp
@@ -34,7 +34,7 @@ void TTLDeleteAlgorithm::execute(Block & block)
for (size_t i = 0; i < block.rows(); ++i)
{
- UInt32 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
+ Int64 cur_ttl = getTimestampByIndex(ttl_column.get(), i);
bool where_filter_passed = !where_column || where_column->getBool(i);
if (!isTTLExpired(cur_ttl) || !where_filter_passed)
diff --git a/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp
index 13d3030bbb87..e27698e7cb44 100644
--- a/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp
+++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp
@@ -25,7 +25,7 @@ void TTLUpdateInfoAlgorithm::execute(Block & block)
auto ttl_column = executeExpressionAndGetColumn(ttl_expressions.expression, block, description.result_column);
for (size_t i = 0; i < block.rows(); ++i)
{
- UInt32 cur_ttl = ITTLAlgorithm::getTimestampByIndex(ttl_column.get(), i);
+ Int64 cur_ttl = ITTLAlgorithm::getTimestampByIndex(ttl_column.get(), i);
new_ttl_info.update(cur_ttl);
}
}
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index d2bc22e98cc6..8d8174cb172c 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -895,11 +895,14 @@ std::string PredefinedQueryHandler::getQuery(HTTPServerRequest & request, HTMLFo
HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server,
const Poco::Util::AbstractConfiguration & config,
- const std::string & config_prefix)
+ const std::string & config_prefix,
+ std::unordered_map & common_headers)
{
auto query_param_name = config.getString(config_prefix + ".handler.query_param_name", "query");
HTTPResponseHeaderSetup http_response_headers_override = parseHTTPResponseHeaders(config, config_prefix);
+ if (http_response_headers_override.has_value())
+ http_response_headers_override.value().insert(common_headers.begin(), common_headers.end());
auto creator = [&server, query_param_name, http_response_headers_override]() -> std::unique_ptr
{ return std::make_unique(server, query_param_name, http_response_headers_override); };
@@ -932,7 +935,8 @@ static inline CompiledRegexPtr getCompiledRegex(const std::string & expression)
HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server,
const Poco::Util::AbstractConfiguration & config,
- const std::string & config_prefix)
+ const std::string & config_prefix,
+ std::unordered_map & common_headers)
{
if (!config.has(config_prefix + ".handler.query"))
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no path '{}.handler.query' in configuration file.", config_prefix);
@@ -958,6 +962,8 @@ HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server,
}
HTTPResponseHeaderSetup http_response_headers_override = parseHTTPResponseHeaders(config, config_prefix);
+ if (http_response_headers_override.has_value())
+ http_response_headers_override.value().insert(common_headers.begin(), common_headers.end());
std::shared_ptr> factory;
diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp
index fc31ad2874ef..ff3c5e2d4d60 100644
--- a/src/Server/HTTPHandlerFactory.cpp
+++ b/src/Server/HTTPHandlerFactory.cpp
@@ -13,6 +13,7 @@
#include "InterserverIOHTTPHandler.h"
#include "WebUIRequestHandler.h"
+#include
namespace DB
{
@@ -31,27 +32,35 @@ class RedirectRequestHandler : public HTTPRequestHandler
{
private:
std::string url;
+ std::unordered_map http_response_headers_override;
public:
- explicit RedirectRequestHandler(std::string url_)
- : url(std::move(url_))
+ explicit RedirectRequestHandler(std::string url_, std::unordered_map http_response_headers_override_ = {})
+ : url(std::move(url_)), http_response_headers_override(http_response_headers_override_)
{
}
void handleRequest(HTTPServerRequest &, HTTPServerResponse & response, const ProfileEvents::Event &) override
{
+ applyHTTPResponseHeaders(response, http_response_headers_override);
response.redirect(url);
}
};
HTTPRequestHandlerFactoryPtr createRedirectHandlerFactory(
const Poco::Util::AbstractConfiguration & config,
- const std::string & config_prefix)
+ const std::string & config_prefix,
+ std::unordered_map common_headers)
{
std::string url = config.getString(config_prefix + ".handler.location");
+ auto headers = parseHTTPResponseHeadersWithCommons(config, config_prefix, common_headers);
+
auto factory = std::make_shared>(
- [my_url = std::move(url)]() { return std::make_unique(my_url); });
+ [my_url = std::move(url), headers_override = std::move(headers)]()
+ {
+ return std::make_unique(my_url, headers_override);
+ });
factory->addFiltersFromConfig(config, config_prefix);
return factory;
@@ -78,6 +87,33 @@ static auto createPingHandlerFactory(IServer & server)
return std::make_shared>(std::move(creator));
}
+static auto createPingHandlerFactory(IServer & server, const Poco::Util::AbstractConfiguration & config, const String & config_prefix,
+ std::unordered_map common_headers)
+{
+ auto creator = [&server,&config,config_prefix,common_headers]() -> std::unique_ptr
+ {
+ constexpr auto ping_response_expression = "Ok.\n";
+
+ auto headers = parseHTTPResponseHeadersWithCommons(config, config_prefix, "text/html; charset=UTF-8", common_headers);
+
+ return std::make_unique(
+ server, ping_response_expression, headers);
+ };
+ return std::make_shared>(std::move(creator));
+}
+
+template
+static auto createWebUIHandlerFactory(IServer & server, const Poco::Util::AbstractConfiguration & config, const String & config_prefix,
+ std::unordered_map common_headers)
+{
+ auto creator = [&server,&config,config_prefix,common_headers]() -> std::unique_ptr
+ {
+ auto headers = parseHTTPResponseHeadersWithCommons(config, config_prefix, "text/html; charset=UTF-8", common_headers);
+ return std::make_unique(server, headers);
+ };
+ return std::make_shared>(std::move(creator));
+}
+
static inline auto createHandlersFactoryFromConfig(
IServer & server,
const Poco::Util::AbstractConfiguration & config,
@@ -90,6 +126,19 @@ static inline auto createHandlersFactoryFromConfig(
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(prefix, keys);
+ std::unordered_map common_headers_override;
+
+ if (std::find(keys.begin(), keys.end(), "common_http_response_headers") != keys.end())
+ {
+ auto common_headers_prefix = prefix + ".common_http_response_headers";
+ Poco::Util::AbstractConfiguration::Keys headers_keys;
+ config.keys(common_headers_prefix, headers_keys);
+ for (const auto & header_key : headers_keys)
+ {
+ common_headers_override[header_key] = config.getString(common_headers_prefix + "." + header_key);
+ }
+ }
+
for (const auto & key : keys)
{
if (key == "defaults")
@@ -106,50 +155,67 @@ static inline auto createHandlersFactoryFromConfig(
if (handler_type == "static")
{
- main_handler_factory->addHandler(createStaticHandlerFactory(server, config, prefix + "." + key));
+ main_handler_factory->addHandler(createStaticHandlerFactory(server, config, prefix + "." + key, common_headers_override));
}
else if (handler_type == "redirect")
{
- main_handler_factory->addHandler(createRedirectHandlerFactory(config, prefix + "." + key));
+ main_handler_factory->addHandler(createRedirectHandlerFactory(config, prefix + "." + key, common_headers_override));
}
else if (handler_type == "dynamic_query_handler")
{
- main_handler_factory->addHandler(createDynamicHandlerFactory(server, config, prefix + "." + key));
+ main_handler_factory->addHandler(createDynamicHandlerFactory(server, config, prefix + "." + key, common_headers_override));
}
else if (handler_type == "predefined_query_handler")
{
- main_handler_factory->addHandler(createPredefinedHandlerFactory(server, config, prefix + "." + key));
+ main_handler_factory->addHandler(createPredefinedHandlerFactory(server, config, prefix + "." + key, common_headers_override));
}
else if (handler_type == "prometheus")
{
main_handler_factory->addHandler(
- createPrometheusHandlerFactoryForHTTPRule(server, config, prefix + "." + key, async_metrics));
+ createPrometheusHandlerFactoryForHTTPRule(server, config, prefix + "." + key, async_metrics, common_headers_override));
}
else if (handler_type == "replicas_status")
{
- main_handler_factory->addHandler(createReplicasStatusHandlerFactory(server, config, prefix + "." + key));
+ main_handler_factory->addHandler(createReplicasStatusHandlerFactory(server, config, prefix + "." + key, common_headers_override));
}
else if (handler_type == "ping")
{
- auto handler = createPingHandlerFactory(server);
- handler->addFiltersFromConfig(config, prefix + "." + key);
+ const String config_prefix = prefix + "." + key;
+ auto handler = createPingHandlerFactory(server, config, config_prefix, common_headers_override);
+ handler->addFiltersFromConfig(config, config_prefix);
main_handler_factory->addHandler(std::move(handler));
}
else if (handler_type == "play")
{
- auto handler = std::make_shared>(server);
+ auto handler = createWebUIHandlerFactory(server, config, prefix + "." + key, common_headers_override);
handler->addFiltersFromConfig(config, prefix + "." + key);
main_handler_factory->addHandler(std::move(handler));
}
else if (handler_type == "dashboard")
{
- auto handler = std::make_shared>(server);
+ auto handler = createWebUIHandlerFactory(server, config, prefix + "." + key, common_headers_override);
handler->addFiltersFromConfig(config, prefix + "." + key);
main_handler_factory->addHandler(std::move(handler));
}
else if (handler_type == "binary")
{
- auto handler = std::make_shared>(server);
+ auto handler = createWebUIHandlerFactory(server, config, prefix + "." + key, common_headers_override);
+ handler->addFiltersFromConfig(config, prefix + "." + key);
+ main_handler_factory->addHandler(std::move(handler));
+ }
+ else if (handler_type == "js")
+ {
+ // NOTE: JavaScriptWebUIRequestHandler only makes sense for paths other then /js/uplot.js, /js/lz-string.js
+ // because these paths are hardcoded in dashboard.html
+ const auto & path = config.getString(prefix + "." + key + ".url", "");
+ if (path != "/js/uplot.js" && path != "/js/lz-string.js")
+ {
+ throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER,
+ "Handler type 'js' is only supported for url '/js/'. "
+ "Configured path here: {}", path);
+ }
+
+ auto handler = createWebUIHandlerFactory(server, config, prefix + "." + key, common_headers_override);
handler->addFiltersFromConfig(config, prefix + "." + key);
main_handler_factory->addHandler(std::move(handler));
}
@@ -157,7 +223,7 @@ static inline auto createHandlersFactoryFromConfig(
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Unknown handler type '{}' in config here: {}.{}.handler.type",
handler_type, prefix, key);
}
- else
+ else if (key != "common_http_response_headers")
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: "
"{}.{}, must be 'rule' or 'defaults'", prefix, key);
}
diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h
index db4bb73cbc44..f2b7760f8b02 100644
--- a/src/Server/HTTPHandlerFactory.h
+++ b/src/Server/HTTPHandlerFactory.h
@@ -110,19 +110,23 @@ class HandlingRuleHTTPHandlerFactory : public HTTPRequestHandlerFactory
HTTPRequestHandlerFactoryPtr createStaticHandlerFactory(IServer & server,
const Poco::Util::AbstractConfiguration & config,
- const std::string & config_prefix);
+ const std::string & config_prefix,
+ std::unordered_map & common_headers);
HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server,
const Poco::Util::AbstractConfiguration & config,
- const std::string & config_prefix);
+ const std::string & config_prefix,
+ std::unordered_map & common_headers);
HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server,
const Poco::Util::AbstractConfiguration & config,
- const std::string & config_prefix);
+ const std::string & config_prefix,
+ std::unordered_map & common_headers);
HTTPRequestHandlerFactoryPtr createReplicasStatusHandlerFactory(IServer & server,
const Poco::Util::AbstractConfiguration & config,
- const std::string & config_prefix);
+ const std::string & config_prefix,
+ std::unordered_map & common_headers);
/// @param server - used in handlers to check IServer::isCancelled()
/// @param config - not the same as server.config(), since it can be newer
diff --git a/src/Server/HTTPResponseHeaderWriter.cpp b/src/Server/HTTPResponseHeaderWriter.cpp
index fd29af5bdc77..912b696faffb 100644
--- a/src/Server/HTTPResponseHeaderWriter.cpp
+++ b/src/Server/HTTPResponseHeaderWriter.cpp
@@ -66,4 +66,25 @@ void applyHTTPResponseHeaders(Poco::Net::HTTPResponse & response, const std::uno
response.set(header_name, header_value);
}
+std::unordered_map parseHTTPResponseHeadersWithCommons(
+ const Poco::Util::AbstractConfiguration & config,
+ const std::string & config_prefix,
+ const std::string & default_content_type,
+ const std::unordered_map & common_headers)
+{
+ auto headers = parseHTTPResponseHeaders(config, config_prefix, default_content_type);
+ headers.insert(common_headers.begin(), common_headers.end());
+ return headers;
+}
+
+std::unordered_map parseHTTPResponseHeadersWithCommons(
+ const Poco::Util::AbstractConfiguration & config,
+ const std::string & config_prefix,
+ const std::unordered_map & common_headers)
+{
+ auto headers = parseHTTPResponseHeaders(config, config_prefix).value_or(std::unordered_map{});
+ headers.insert(common_headers.begin(), common_headers.end());
+ return headers;
+}
+
}
diff --git a/src/Server/HTTPResponseHeaderWriter.h b/src/Server/HTTPResponseHeaderWriter.h
index 06281abb42dd..7e240b9eddec 100644
--- a/src/Server/HTTPResponseHeaderWriter.h
+++ b/src/Server/HTTPResponseHeaderWriter.h
@@ -22,4 +22,16 @@ std::unordered_map parseHTTPResponseHeaders(const std::string &
void applyHTTPResponseHeaders(Poco::Net::HTTPResponse & response, const HTTPResponseHeaderSetup & setup);
void applyHTTPResponseHeaders(Poco::Net::HTTPResponse & response, const std::unordered_map & setup);
+
+std::unordered_map parseHTTPResponseHeadersWithCommons(
+ const Poco::Util::AbstractConfiguration & config,
+ const std::string & config_prefix,
+ const std::unordered_map & common_headers);
+
+std::unordered_map parseHTTPResponseHeadersWithCommons(
+ const Poco::Util::AbstractConfiguration & config,
+ const std::string & config_prefix,
+ const std::string & default_content_type,
+ const std::unordered_map & common_headers);
+
}
diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp
index ae1fb6d629e0..d99b2c825598 100644
--- a/src/Server/PrometheusRequestHandler.cpp
+++ b/src/Server/PrometheusRequestHandler.cpp
@@ -3,6 +3,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -303,13 +304,15 @@ PrometheusRequestHandler::PrometheusRequestHandler(
IServer & server_,
const PrometheusRequestHandlerConfig & config_,
const AsynchronousMetrics & async_metrics_,
- std::shared_ptr metrics_writer_)
+ std::shared_ptr metrics_writer_,
+ std::unordered_map response_headers_)
: server(server_)
, config(config_)
, async_metrics(async_metrics_)
, metrics_writer(metrics_writer_)
, log(getLogger("PrometheusRequestHandler"))
{
+ response_headers = response_headers_;
createImpl();
}
@@ -341,6 +344,7 @@ void PrometheusRequestHandler::createImpl()
void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event_)
{
setThreadName("PrometheusHndlr");
+ applyHTTPResponseHeaders(response, response_headers);
try
{
diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h
index 281ecf5260ed..550404f7cf64 100644
--- a/src/Server/PrometheusRequestHandler.h
+++ b/src/Server/PrometheusRequestHandler.h
@@ -19,7 +19,8 @@ class PrometheusRequestHandler : public HTTPRequestHandler
IServer & server_,
const PrometheusRequestHandlerConfig & config_,
const AsynchronousMetrics & async_metrics_,
- std::shared_ptr metrics_writer_);
+ std::shared_ptr metrics_writer_,
+ std::unordered_map response_headers_ = {});
~PrometheusRequestHandler() override;
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event_) override;
@@ -59,6 +60,7 @@ class PrometheusRequestHandler : public HTTPRequestHandler
std::unique_ptr write_buffer_from_response;
bool response_finalized = false;
ProfileEvents::Event write_event;
+ std::unordered_map response_headers;
};
}
diff --git a/src/Server/PrometheusRequestHandlerFactory.cpp b/src/Server/PrometheusRequestHandlerFactory.cpp
index 52f1d3b64c14..65061a5cc018 100644
--- a/src/Server/PrometheusRequestHandlerFactory.cpp
+++ b/src/Server/PrometheusRequestHandlerFactory.cpp
@@ -1,6 +1,7 @@
#include
#include
+#include
#include
#include
#include
@@ -131,14 +132,15 @@ namespace
IServer & server,
const AsynchronousMetrics & async_metrics,
const PrometheusRequestHandlerConfig & config,
- bool for_keeper)
+ bool for_keeper,
+ std::unordered_map headers = {})
{
if (!canBeHandled(config, for_keeper))
return nullptr;
auto metric_writer = createPrometheusMetricWriter(for_keeper);
- auto creator = [&server, &async_metrics, config, metric_writer]() -> std::unique_ptr
+ auto creator = [&server, &async_metrics, config, metric_writer, headers_moved = std::move(headers)]() -> std::unique_ptr
{
- return std::make_unique(server, config, async_metrics, metric_writer);
+ return std::make_unique(server, config, async_metrics, metric_writer, headers_moved);
};
return std::make_shared>(std::move(creator));
}
@@ -200,10 +202,13 @@ HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactoryForHTTPRule(
IServer & server,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
- const AsynchronousMetrics & asynchronous_metrics)
+ const AsynchronousMetrics & asynchronous_metrics,
+ std::unordered_map & common_headers)
{
+ auto headers = parseHTTPResponseHeadersWithCommons(config, config_prefix, common_headers);
+
auto parsed_config = parseExposeMetricsConfig(config, config_prefix + ".handler");
- auto handler = createPrometheusHandlerFactoryFromConfig(server, asynchronous_metrics, parsed_config, /* for_keeper= */ false);
+ auto handler = createPrometheusHandlerFactoryFromConfig(server, asynchronous_metrics, parsed_config, /* for_keeper= */ false, headers);
chassert(handler); /// `handler` can't be nullptr here because `for_keeper` is false.
handler->addFiltersFromConfig(config, config_prefix);
return handler;
diff --git a/src/Server/PrometheusRequestHandlerFactory.h b/src/Server/PrometheusRequestHandlerFactory.h
index c52395ca93f3..23d00b50095b 100644
--- a/src/Server/PrometheusRequestHandlerFactory.h
+++ b/src/Server/PrometheusRequestHandlerFactory.h
@@ -93,7 +93,8 @@ HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactoryForHTTPRule(
IServer & server,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix, /// path to "http_handlers.my_handler_1"
- const AsynchronousMetrics & asynchronous_metrics);
+ const AsynchronousMetrics & asynchronous_metrics,
+ std::unordered_map & common_headers);
/// Makes a HTTP Handler factory to handle requests for prometheus metrics as a part of the default HTTP rule in the section.
/// Expects a configuration like this:
diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp
index 419ad635d0d5..042c9657f6d2 100644
--- a/src/Server/ReplicasStatusHandler.cpp
+++ b/src/Server/ReplicasStatusHandler.cpp
@@ -7,6 +7,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -28,6 +29,8 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe
{
try
{
+ applyHTTPResponseHeaders(response, http_response_headers_override);
+
HTMLForm params(getContext()->getSettingsRef(), request);
const auto & config = getContext()->getConfigRef();
@@ -129,9 +132,16 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe
HTTPRequestHandlerFactoryPtr createReplicasStatusHandlerFactory(IServer & server,
const Poco::Util::AbstractConfiguration & config,
- const std::string & config_prefix)
+ const std::string & config_prefix,
+ std::unordered_map & common_headers)
{
- auto factory = std::make_shared>(server);
+ std::unordered_map http_response_headers_override
+ = parseHTTPResponseHeadersWithCommons(config, config_prefix, "text/plain; charset=UTF-8", common_headers);
+
+ auto creator = [&server, http_response_headers_override]() -> std::unique_ptr
+ { return std::make_unique(server, http_response_headers_override); };
+
+ auto factory = std::make_shared>(std::move(creator));
factory->addFiltersFromConfig(config, config_prefix);
return factory;
}
diff --git a/src/Server/ReplicasStatusHandler.h b/src/Server/ReplicasStatusHandler.h
index 08fd757b0d61..2d3aaad184b5 100644
--- a/src/Server/ReplicasStatusHandler.h
+++ b/src/Server/ReplicasStatusHandler.h
@@ -1,6 +1,7 @@
#pragma once
#include
+#include
namespace DB
{
@@ -13,8 +14,17 @@ class ReplicasStatusHandler : public HTTPRequestHandler, WithContext
{
public:
explicit ReplicasStatusHandler(IServer & server_);
+ explicit ReplicasStatusHandler(IServer & server_, const std::unordered_map & http_response_headers_override_)
+ : ReplicasStatusHandler(server_)
+ {
+ http_response_headers_override = http_response_headers_override_;
+ }
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
+
+private:
+ /// Overrides for response headers.
+ std::unordered_map http_response_headers_override;
};
diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp
index d8c0765bca48..34774d6689d8 100644
--- a/src/Server/StaticRequestHandler.cpp
+++ b/src/Server/StaticRequestHandler.cpp
@@ -163,13 +163,14 @@ StaticRequestHandler::StaticRequestHandler(
HTTPRequestHandlerFactoryPtr createStaticHandlerFactory(IServer & server,
const Poco::Util::AbstractConfiguration & config,
- const std::string & config_prefix)
+ const std::string & config_prefix,
+ std::unordered_map & common_headers)
{
int status = config.getInt(config_prefix + ".handler.status", 200);
std::string response_content = config.getRawString(config_prefix + ".handler.response_content", "Ok.\n");
std::unordered_map http_response_headers_override
- = parseHTTPResponseHeaders(config, config_prefix, "text/plain; charset=UTF-8");
+ = parseHTTPResponseHeadersWithCommons(config, config_prefix, "text/plain; charset=UTF-8", common_headers);
auto creator = [&server, http_response_headers_override, response_content, status]() -> std::unique_ptr
{ return std::make_unique(server, response_content, http_response_headers_override, status); };
diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp
index c04d7a3f2a03..7f1a06b27858 100644
--- a/src/Server/WebUIRequestHandler.cpp
+++ b/src/Server/WebUIRequestHandler.cpp
@@ -1,6 +1,7 @@
#include "WebUIRequestHandler.h"
#include "IServer.h"
#include
+#include
#include
#include
@@ -30,8 +31,10 @@ DashboardWebUIRequestHandler::DashboardWebUIRequestHandler(IServer & server_) :
BinaryWebUIRequestHandler::BinaryWebUIRequestHandler(IServer & server_) : server(server_) {}
JavaScriptWebUIRequestHandler::JavaScriptWebUIRequestHandler(IServer & server_) : server(server_) {}
-static void handle(HTTPServerRequest & request, HTTPServerResponse & response, std::string_view html)
+static void handle(HTTPServerRequest & request, HTTPServerResponse & response, std::string_view html,
+ std::unordered_map http_response_headers_override = {})
{
+ applyHTTPResponseHeaders(response, http_response_headers_override);
response.setContentType("text/html; charset=UTF-8");
if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
response.setChunkedTransferEncoding(true);
@@ -43,7 +46,7 @@ static void handle(HTTPServerRequest & request, HTTPServerResponse & response, s
void PlayWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &)
{
- handle(request, response, {reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize});
+ handle(request, response, {reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize}, http_response_headers_override);
}
void DashboardWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &)
@@ -61,23 +64,23 @@ void DashboardWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HT
static re2::RE2 lz_string_url = R"(https://[^\s"'`]+lz-string[^\s"'`]*\.js)";
RE2::Replace(&html, lz_string_url, "/js/lz-string.js");
- handle(request, response, html);
+ handle(request, response, html, http_response_headers_override);
}
void BinaryWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &)
{
- handle(request, response, {reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize});
+ handle(request, response, {reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize}, http_response_headers_override);
}
void JavaScriptWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &)
{
if (request.getURI() == "/js/uplot.js")
{
- handle(request, response, {reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize});
+ handle(request, response, {reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize}, http_response_headers_override);
}
else if (request.getURI() == "/js/lz-string.js")
{
- handle(request, response, {reinterpret_cast(gresource_lz_string_jsData), gresource_lz_string_jsSize});
+ handle(request, response, {reinterpret_cast(gresource_lz_string_jsData), gresource_lz_string_jsSize}, http_response_headers_override);
}
else
{
@@ -85,7 +88,7 @@ void JavaScriptWebUIRequestHandler::handleRequest(HTTPServerRequest & request, H
*response.send() << "Not found.\n";
}
- handle(request, response, {reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize});
+ handle(request, response, {reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize}, http_response_headers_override);
}
}
diff --git a/src/Server/WebUIRequestHandler.h b/src/Server/WebUIRequestHandler.h
index b84c8f6534d7..8fa2f10daae0 100644
--- a/src/Server/WebUIRequestHandler.h
+++ b/src/Server/WebUIRequestHandler.h
@@ -16,7 +16,15 @@ class PlayWebUIRequestHandler : public HTTPRequestHandler
IServer & server;
public:
explicit PlayWebUIRequestHandler(IServer & server_);
+ explicit PlayWebUIRequestHandler(IServer & server_, const std::unordered_map & http_response_headers_override_)
+ : PlayWebUIRequestHandler(server_)
+ {
+ http_response_headers_override = http_response_headers_override_;
+ }
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
+private:
+ /// Overrides for response headers.
+ std::unordered_map http_response_headers_override;
};
class DashboardWebUIRequestHandler : public HTTPRequestHandler
@@ -25,7 +33,15 @@ class DashboardWebUIRequestHandler : public HTTPRequestHandler
IServer & server;
public:
explicit DashboardWebUIRequestHandler(IServer & server_);
+ explicit DashboardWebUIRequestHandler(IServer & server_, const std::unordered_map & http_response_headers_override_)
+ : DashboardWebUIRequestHandler(server_)
+ {
+ http_response_headers_override = http_response_headers_override_;
+ }
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
+private:
+ /// Overrides for response headers.
+ std::unordered_map http_response_headers_override;
};
class BinaryWebUIRequestHandler : public HTTPRequestHandler
@@ -34,7 +50,15 @@ class BinaryWebUIRequestHandler : public HTTPRequestHandler
IServer & server;
public:
explicit BinaryWebUIRequestHandler(IServer & server_);
+ explicit BinaryWebUIRequestHandler(IServer & server_, const std::unordered_map & http_response_headers_override_)
+ : BinaryWebUIRequestHandler(server_)
+ {
+ http_response_headers_override = http_response_headers_override_;
+ }
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
+private:
+ /// Overrides for response headers.
+ std::unordered_map http_response_headers_override;
};
class JavaScriptWebUIRequestHandler : public HTTPRequestHandler
@@ -43,7 +67,15 @@ class JavaScriptWebUIRequestHandler : public HTTPRequestHandler
IServer & server;
public:
explicit JavaScriptWebUIRequestHandler(IServer & server_);
+ explicit JavaScriptWebUIRequestHandler(IServer & server_, const std::unordered_map & http_response_headers_override_)
+ : JavaScriptWebUIRequestHandler(server_)
+ {
+ http_response_headers_override = http_response_headers_override_;
+ }
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
+private:
+ /// Overrides for response headers.
+ std::unordered_map http_response_headers_override;
};
}
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 64d33da1d2c5..fbc33ec800f6 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -101,7 +101,7 @@ static String extractFixedPrefixFromRegularExpression(const String & regexp)
const char * pos = begin;
const char * end = regexp.data() + regexp.size();
- while (pos != end)
+ while (pos < end)
{
switch (*pos)
{
@@ -124,19 +124,22 @@ static String extractFixedPrefixFromRegularExpression(const String & regexp)
case '$':
case '.':
case '[':
+ case ']':
case '?':
case '*':
case '+':
+ case '\\':
case '{':
+ case '}':
+ case '-':
fixed_prefix += *pos;
+ ++pos;
break;
default:
/// all other escape sequences are not supported
- pos = end;
- break;
+ pos = end;
}
- ++pos;
break;
}
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index f29d715e791d..aee7bc404d61 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -1,4 +1,7 @@
#include
+#include
+#include
+#include
#include
#include
#include
@@ -159,6 +162,17 @@ void updateTTL(
for (const auto & val : column_date_time->getData())
ttl_info.update(val);
}
+ else if (const ColumnInt32 * column_date_32 = typeid_cast(ttl_column.get()))
+ {
+ const auto & date_lut = DateLUT::serverTimezoneInstance();
+ for (const auto & val : column_date_32->getData())
+ ttl_info.update(date_lut.fromDayNum(ExtendedDayNum(val)));
+ }
+ else if (const ColumnDateTime64 * column_date_time_64 = typeid_cast(ttl_column.get()))
+ {
+ for (const auto & val : column_date_time_64->getData())
+ ttl_info.update(val / intExp10OfSize(column_date_time_64->getScale()));
+ }
else if (const ColumnConst * column_const = typeid_cast(ttl_column.get()))
{
if (typeid_cast(&column_const->getDataColumn()))
@@ -170,6 +184,15 @@ void updateTTL(
{
ttl_info.update(column_const->getValue());
}
+ else if (typeid_cast(&column_const->getDataColumn()))
+ {
+ const auto & date_lut = DateLUT::serverTimezoneInstance();
+ ttl_info.update(date_lut.fromDayNum(ExtendedDayNum(column_const->getValue())));
+ }
+ else if (const ColumnDateTime64 * column_dt64 = typeid_cast(&column_const->getDataColumn()))
+ {
+ ttl_info.update(column_const->getValue() / intExp10OfSize(column_dt64->getScale()));
+ }
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of result TTL column");
}
diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp
index ae1a233ea81e..23819d0f8cd2 100644
--- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp
+++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp
@@ -21,6 +21,11 @@ namespace fs = std::filesystem;
namespace DB
{
+namespace ErrorCodes
+{
+ extern const int SYNTAX_ERROR;
+}
+
enum class Status : uint8_t
{
INACTIVE,
@@ -54,7 +59,7 @@ ColumnsDescription StorageSystemDDLWorkerQueue::getColumnsDescription()
{"entry_version", std::make_shared(std::make_shared()), "Version of the entry."},
{"initiator_host", std::make_shared(std::make_shared()), "Host that initiated the DDL operation."},
{"initiator_port", std::make_shared(std::make_shared()), "Port used by the initiator."},
- {"cluster", std::make_shared(), "Cluster name."},
+ {"cluster", std::make_shared(), "Cluster name, empty if not determined."},
{"query", std::make_shared(), "Query executed."},
{"settings", std::make_shared(std::make_shared(), std::make_shared()), "Settings used in the DDL operation."},
{"query_create_time", std::make_shared(), "Query created time."},
@@ -77,10 +82,25 @@ static String clusterNameFromDDLQuery(ContextPtr context, const DDLTask & task)
String description = fmt::format("from {}", task.entry_path);
ParserQuery parser_query(end, settings.allow_settings_after_format_in_insert);
- ASTPtr query = parseQuery(parser_query, begin, end, description,
- settings.max_query_size,
- settings.max_parser_depth,
- settings.max_parser_backtracks);
+ ASTPtr query;
+
+ try
+ {
+ query = parseQuery(parser_query, begin, end, description,
+ settings.max_query_size,
+ settings.max_parser_depth,
+ settings.max_parser_backtracks);
+ }
+ catch (const Exception & e)
+ {
+ LOG_INFO(getLogger("StorageSystemDDLWorkerQueue"), "Failed to determine cluster");
+ if (e.code() == ErrorCodes::SYNTAX_ERROR)
+ {
+ /// ignore parse error and present available information
+ return "";
+ }
+ throw;
+ }
String cluster_name;
if (const auto * query_on_cluster = dynamic_cast(query.get()))
diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp
index d674f0546325..e0f16e0a945d 100644
--- a/src/Storages/TTLDescription.cpp
+++ b/src/Storages/TTLDescription.cpp
@@ -15,7 +15,9 @@
#include
#include
+#include
#include
+#include
#include
#include
#include
@@ -81,10 +83,12 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin
const auto & result_column = ttl_expression->getSampleBlock().getByName(result_column_name);
if (!typeid_cast(result_column.type.get())
- && !typeid_cast(result_column.type.get()))
+ && !typeid_cast(result_column.type.get())
+ && !typeid_cast(result_column.type.get())
+ && !typeid_cast(result_column.type.get()))
{
throw Exception(ErrorCodes::BAD_TTL_EXPRESSION,
- "TTL expression result column should have DateTime or Date type, but has {}",
+ "TTL expression result column should have Date, Date32, DateTime or DateTime64 type, but has {}",
result_column.type->getName());
}
}
diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index 5b59fb92c7aa..4b0eef9c44cc 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -14,6 +14,7 @@
from git_helper import Git
from pr_info import PRInfo, EventType
from report import FAILURE, SUCCESS, JobReport, StatusType
+from s3_helper import S3Helper
from stopwatch import Stopwatch
from tee_popen import TeePopen
from version_helper import (
@@ -223,6 +224,28 @@ def main():
f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True
)
logging.info("Build finished as %s, log path %s", build_status, log_path)
+
+ s3_helper = S3Helper()
+ s3_path_prefix = "/".join(
+ (
+ get_release_or_pr(pr_info, get_version_from_repo())[0],
+ pr_info.sha,
+ build_name,
+ )
+ )
+ src_path = temp_path / "build_source.src.tar.gz"
+ s3_path = s3_path_prefix + "/clickhouse-" + version.string + ".src.tar.gz"
+ logging.info("s3_path %s", s3_path)
+ if src_path.exists():
+ src_url = s3_helper.upload_build_file_to_s3(
+ src_path, s3_path
+ )
+ logging.info("Source tar %s", src_url)
+ print(f"::notice ::Source tar URL: {src_url}")
+ else:
+ logging.info("Source tar doesn't exist")
+ print("Source tar doesn't exist")
+
if build_status != SUCCESS:
# We check if docker works, because if it's down, it's infrastructure
try:
diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py
index 422e1738701a..e49e2f2d0b7e 100644
--- a/tests/ci/clickhouse_helper.py
+++ b/tests/ci/clickhouse_helper.py
@@ -9,6 +9,7 @@
import requests
+from env_helper import CLICKHOUSE_TEST_STAT_URL, CLICKHOUSE_TEST_STAT_PASSWORD, CLICKHOUSE_TEST_STAT_LOGIN
from get_robot_token import get_parameter_from_ssm
from pr_info import PRInfo
from report import TestResults
@@ -27,12 +28,12 @@ def __init__(
self, url: Optional[str] = None, auth: Optional[Dict[str, str]] = None
):
if url is None:
- url = get_parameter_from_ssm("clickhouse-test-stat-url")
+ url = CLICKHOUSE_TEST_STAT_URL
self.url = url
self.auth = auth or {
- "X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"),
- "X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password"),
+ "X-ClickHouse-User": CLICKHOUSE_TEST_STAT_LOGIN,
+ "X-ClickHouse-Key": CLICKHOUSE_TEST_STAT_PASSWORD,
}
@staticmethod
diff --git a/tests/ci/docker_images_helper.py b/tests/ci/docker_images_helper.py
index 9faefef12c24..219fdc00e20a 100644
--- a/tests/ci/docker_images_helper.py
+++ b/tests/ci/docker_images_helper.py
@@ -6,8 +6,7 @@
from pathlib import Path
from typing import Any, Dict, List, Optional
-from env_helper import ROOT_DIR, DOCKER_TAG
-from get_robot_token import get_parameter_from_ssm
+from env_helper import ROOT_DIR, DOCKER_TAG, DOCKER_PASSWORD
from ci_utils import Shell
IMAGES_FILE_PATH = Path("docker/images.json")
@@ -22,7 +21,7 @@ def docker_login(relogin: bool = True) -> None:
Shell.check( # pylint: disable=unexpected-keyword-arg
"docker login --username 'altinityinfra' --password-stdin",
strict=True,
- stdin_str=get_parameter_from_ssm("dockerhub-password"),
+ stdin_str=DOCKER_PASSWORD,
encoding="utf-8",
)
diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py
index cad781a17445..11eae74689d2 100644
--- a/tests/ci/env_helper.py
+++ b/tests/ci/env_helper.py
@@ -46,6 +46,12 @@
)
CI_CONFIG_PATH = f"{TEMP_PATH}/ci_config.json"
+CLICKHOUSE_TEST_STAT_LOGIN = os.getenv("CLICKHOUSE_TEST_STAT_LOGIN")
+CLICKHOUSE_TEST_STAT_PASSWORD = os.getenv("CLICKHOUSE_TEST_STAT_PASSWORD")
+CLICKHOUSE_TEST_STAT_URL = os.getenv("CLICKHOUSE_TEST_STAT_URL")
+DOCKER_PASSWORD = os.getenv("DOCKER_PASSWORD")
+ROBOT_TOKEN = os.getenv("ROBOT_TOKEN")
+
# These parameters are set only on demand, and only once
_GITHUB_JOB_ID = ""
_GITHUB_JOB_URL = ""
diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py
index a218202fd76e..f9b30beb178f 100644
--- a/tests/ci/get_robot_token.py
+++ b/tests/ci/get_robot_token.py
@@ -10,6 +10,7 @@
from github.GithubException import BadCredentialsException
from github.NamedUser import NamedUser
+from env_helper import ROBOT_TOKEN
@dataclass
class Token:
@@ -56,20 +57,11 @@ def get_parameters_from_ssm(
return results
-
-ROBOT_TOKEN = None # type: Optional[Token]
-
# NOTE(Arthur Passos): Original CI code uses the "_original" version of this method. Each robot token is rate limited
# and the original implementation selects the "best one". To make it simpler and iterate faster,
# we are using only one robot and keeping the method signature. In the future we might reconsider
# having multiple robot tokens
-def get_best_robot_token(token_prefix_env_name="github_robot_token"):
- # Re-use already fetched token (same as in get_best_robot_token_original)
- # except here we assume it is always a string (since we use only one token and don't do token rotation)
- global ROBOT_TOKEN
- if ROBOT_TOKEN is not None:
- return ROBOT_TOKEN
- ROBOT_TOKEN = get_parameter_from_ssm(token_prefix_env_name)
+def get_best_robot_token():
return ROBOT_TOKEN
def get_best_robot_token_original(tokens_path: str = "/github-tokens") -> str:
diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py
index 86656e6e7c0c..5d9d0758e5c3 100644
--- a/tests/ci/s3_helper.py
+++ b/tests/ci/s3_helper.py
@@ -6,6 +6,7 @@
from multiprocessing.dummy import Pool
from pathlib import Path
from typing import Any, List, Union
+import os
import boto3 # type: ignore
import botocore # type: ignore
@@ -19,6 +20,42 @@
S3_URL,
)
+sensitive_var_pattern = re.compile(
+ r"\b[A-Z_]*(? str:
+ logging.debug("Checking %s for sensitive values", file_path)
+ try:
+ file_content = file_path.read_text(encoding="utf-8")
+ except UnicodeDecodeError:
+ logging.warning("Failed to read file %s, unknown encoding", file_path)
+ else:
+ scan_file_for_sensitive_data(file_content, file_path.name)
+
logging.debug(
"Start uploading %s to bucket=%s path=%s", file_path, bucket_name, s3_path
)
diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py
index afee29c9c193..f097d4a57b9f 100755
--- a/tests/ci/version_helper.py
+++ b/tests/ci/version_helper.py
@@ -4,6 +4,8 @@
from pathlib import Path
from typing import Any, Dict, Iterable, List, Literal, Optional, Set, Tuple, Union
+from pr_info import PRInfo # grype scan needs to know the PR number
+
from git_helper import TWEAK, Git, get_tags, git_runner, removeprefix, VersionType
FILE_WITH_VERSION_PATH = "cmake/autogenerated_versions.txt"
@@ -531,6 +533,12 @@ def main():
if args.update_part or args.update_cmake:
update_cmake_version(version)
+ # grype scan needs to know the PR number
+ pr_info = PRInfo()
+ print(f"PR_NUMBER={pr_info.number}")
+ if args.export:
+ print(f"export PR_NUMBER")
+
for k, v in version.as_dict().items():
name = f"CLICKHOUSE_VERSION_{k.upper()}"
print(f"{name}='{v}'")
diff --git a/tests/integration/test_http_handlers_config/test.py b/tests/integration/test_http_handlers_config/test.py
index b2efbf4bb657..091690cf6b31 100644
--- a/tests/integration/test_http_handlers_config/test.py
+++ b/tests/integration/test_http_handlers_config/test.py
@@ -601,3 +601,31 @@ def test_replicas_status_handler():
"test_replicas_status", method="GET", headers={"XXX": "xxx"}
).content
)
+
+
+def test_headers_in_response():
+ with contextlib.closing(
+ SimpleCluster(
+ ClickHouseCluster(__file__), "headers_in_response", "test_headers_in_response"
+ )
+ ) as cluster:
+ for endpoint in ("static", "ping", "replicas_status", "play", "dashboard", "binary", "metrics",
+ "js/lz-string.js", "js/uplot.js", "?query=SELECT%201"):
+ response = cluster.instance.http_request(endpoint, method="GET")
+
+ assert "X-My-Answer" in response.headers
+ assert "X-My-Common-Header" in response.headers
+
+ assert response.headers["X-My-Common-Header"] == "Common header present"
+
+ if endpoint == "?query=SELECT%201":
+ assert response.headers["X-My-Answer"] == "Iam dynamic"
+ else:
+ assert response.headers["X-My-Answer"] == f"Iam {endpoint}"
+
+
+ # Handle predefined_query_handler separately because we need to pass headers there
+ response_predefined = cluster.instance.http_request(
+ "query_param_with_url", method="GET", headers={"PARAMS_XXX": "test_param"})
+ assert response_predefined.headers["X-My-Answer"] == f"Iam predefined"
+ assert response_predefined.headers["X-My-Common-Header"] == "Common header present"
diff --git a/tests/integration/test_http_handlers_config/test_headers_in_response/config.xml b/tests/integration/test_http_handlers_config/test_headers_in_response/config.xml
new file mode 100644
index 000000000000..4ee24ae123f8
--- /dev/null
+++ b/tests/integration/test_http_handlers_config/test_headers_in_response/config.xml
@@ -0,0 +1,135 @@
+
+
+
+ Common header present
+
+
+
+ GET,HEAD
+ /static
+
+ static
+ config://http_server_default_response
+ text/html; charset=UTF-8
+
+ Iam static
+
+
+
+
+
+ GET,HEAD
+ /ping
+
+ ping
+
+ Iam ping
+
+
+
+
+
+ GET,HEAD
+ /replicas_status
+
+ replicas_status
+
+ Iam replicas_status
+
+
+
+
+
+ GET,HEAD
+ /play
+
+ play
+
+ Iam play
+
+
+
+
+
+ GET,HEAD
+ /dashboard
+
+ dashboard
+
+ Iam dashboard
+
+
+
+
+
+ GET,HEAD
+ /binary
+
+ binary
+
+ Iam binary
+
+
+
+
+
+ GET,HEAD
+ /metrics
+
+ prometheus
+
+ Iam metrics
+
+
+
+
+
+ /js/lz-string.js
+
+ js
+
+ Iam js/lz-string.js
+
+
+
+
+
+ GET,HEAD
+ /js/uplot.js
+
+ js
+
+ Iam js/uplot.js
+
+
+
+
+
+ /query_param_with_url
+ GET,HEAD
+
+ [^/]+)]]>
+
+
+ predefined_query_handler
+
+ SELECT {name_1:String}
+
+
+ Iam predefined
+
+
+
+
+
+ GET,POST,HEAD,OPTIONS
+
+ dynamic_query_handler
+ query
+
+ Iam dynamic
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/integration/test_system_ddl_worker_queue/configs/remote_servers.xml b/tests/integration/test_system_ddl_worker_queue/configs/remote_servers.xml
index 791af83a2d6d..f6392caf5e51 100644
--- a/tests/integration/test_system_ddl_worker_queue/configs/remote_servers.xml
+++ b/tests/integration/test_system_ddl_worker_queue/configs/remote_servers.xml
@@ -25,4 +25,5 @@
+ 1
diff --git a/tests/integration/test_system_ddl_worker_queue/test.py b/tests/integration/test_system_ddl_worker_queue/test.py
index 4659e5b92e84..a743dbe3f396 100644
--- a/tests/integration/test_system_ddl_worker_queue/test.py
+++ b/tests/integration/test_system_ddl_worker_queue/test.py
@@ -1,4 +1,8 @@
import pytest
+import time
+from io import StringIO
+import csv
+import logging
from helpers.cluster import ClickHouseCluster
@@ -25,46 +29,134 @@ def started_cluster():
try:
cluster.start()
- for i, node in enumerate([node1, node2]):
- node.query("CREATE DATABASE testdb")
- node.query(
- """CREATE TABLE testdb.test_table(id UInt32, val String) ENGINE = ReplicatedMergeTree('/clickhouse/test/test_table1', '{}') ORDER BY id;""".format(
- i
- )
- )
- for i, node in enumerate([node3, node4]):
- node.query("CREATE DATABASE testdb")
- node.query(
- """CREATE TABLE testdb.test_table(id UInt32, val String) ENGINE = ReplicatedMergeTree('/clickhouse/test/test_table2', '{}') ORDER BY id;""".format(
- i
- )
- )
yield cluster
finally:
cluster.shutdown()
+def maintain_test_table(test_table):
+ tmark = time.time() # to guarantee ZK path uniqueness
+
+ for i, node in enumerate([node1, node2]):
+ node.query(f"DROP TABLE IF EXISTS testdb.{test_table} SYNC")
+ node.query("DROP DATABASE IF EXISTS testdb")
+
+ node.query("CREATE DATABASE testdb")
+ node.query(
+ f"CREATE TABLE testdb.{test_table}(id UInt32, val String) ENGINE = ReplicatedMergeTree('/clickhouse/test/{test_table}1-{tmark}', '{i}') ORDER BY id;"
+ )
+ for i, node in enumerate([node3, node4]):
+ node.query(f"DROP TABLE IF EXISTS testdb.{test_table} SYNC")
+ node.query("DROP DATABASE IF EXISTS testdb")
+
+ node.query("CREATE DATABASE testdb")
+ node.query(
+ f"CREATE TABLE testdb.{test_table}(id UInt32, val String) ENGINE = ReplicatedMergeTree('/clickhouse/test/{test_table}2-{tmark}', '{i}') ORDER BY id;"
+ )
+
+
def test_distributed_ddl_queue(started_cluster):
+ test_table = "test_table"
+ maintain_test_table(test_table)
node1.query(
- "INSERT INTO testdb.test_table SELECT number, toString(number) FROM numbers(100)"
+ f"INSERT INTO testdb.{test_table} SELECT number, toString(number) FROM numbers(100)"
)
node3.query(
- "INSERT INTO testdb.test_table SELECT number, toString(number) FROM numbers(100)"
+ f"INSERT INTO testdb.{test_table} SELECT number, toString(number) FROM numbers(100)"
)
- node2.query("SYSTEM SYNC REPLICA testdb.test_table")
- node4.query("SYSTEM SYNC REPLICA testdb.test_table")
+ node2.query(f"SYSTEM SYNC REPLICA testdb.{test_table}")
+ node4.query(f"SYSTEM SYNC REPLICA testdb.{test_table}")
node1.query(
- "ALTER TABLE testdb.test_table ON CLUSTER test_cluster ADD COLUMN somecolumn UInt8 AFTER val",
+ f"ALTER TABLE testdb.{test_table} ON CLUSTER test_cluster ADD COLUMN somecolumn UInt8 AFTER val",
settings={"replication_alter_partitions_sync": "2"},
)
for node in nodes:
- node.query("SYSTEM SYNC REPLICA testdb.test_table")
- assert node.query("SELECT somecolumn FROM testdb.test_table LIMIT 1") == "0\n"
+ node.query(f"SYSTEM SYNC REPLICA testdb.{test_table}")
+ assert (
+ node.query(f"SELECT somecolumn FROM testdb.{test_table} LIMIT 1") == "0\n"
+ )
assert (
node.query(
"SELECT If((SELECT count(*) FROM system.distributed_ddl_queue WHERE cluster='test_cluster' AND entry='query-0000000000') > 0, 'ok', 'fail')"
)
== "ok\n"
)
+
+ node1.query(
+ f"ALTER TABLE testdb.{test_table} ON CLUSTER test_cluster DROP COLUMN somecolumn",
+ settings={"replication_alter_partitions_sync": "2"},
+ )
+
+
+def test_distributed_ddl_rubbish(started_cluster):
+ test_table = "test_table_rubbish"
+ maintain_test_table(test_table)
+ node1.query(
+ f"ALTER TABLE testdb.{test_table} ON CLUSTER test_cluster ADD COLUMN somenewcolumn UInt8 AFTER val",
+ settings={"replication_alter_partitions_sync": "2"},
+ )
+
+ zk_content_raw = node1.query(
+ "SELECT name, value, path FROM system.zookeeper WHERE path LIKE '/clickhouse/task_queue/ddl%' SETTINGS allow_unrestricted_reads_from_keeper=true FORMAT TabSeparatedWithNames",
+ # parse=True,
+ ) # .to_dict("records")
+
+ dict_reader = csv.DictReader(StringIO(zk_content_raw), delimiter='\t')
+ zk_content = [row for row in dict_reader]
+
+ original_query = ""
+ new_query = "query-artificial-" + str(time.monotonic_ns())
+
+ # Copy information about query (one that added 'somenewcolumn') with new query ID
+ # and broken query text (TABLE => TUBLE)
+ for row in zk_content:
+ if row["value"].find("somenewcolumn") >= 0:
+ original_query = row["name"]
+ break
+
+ rows_to_insert = []
+
+ for row in zk_content:
+ if row["name"] == original_query:
+ rows_to_insert.append(
+ {
+ "name": new_query,
+ "path": row["path"],
+ "value": row["value"].replace("TABLE", "TUBLE"),
+ }
+ )
+ continue
+ pos = row["path"].find(original_query)
+ if pos >= 0:
+ rows_to_insert.append(
+ {
+ "name": row["name"],
+ "path": row["path"].replace(original_query, new_query),
+ "value": row["value"],
+ }
+ )
+
+ # Ingest it to ZK
+ for row in rows_to_insert:
+ node1.query(
+ "insert into system.zookeeper (name, path, value) values ('{}', '{}', '{}')".format(
+ f'{row["name"]}', f'{row["path"]}', f'{row["value"]}'
+ )
+ )
+
+ # Ensure that data is visible via system.distributed_ddl_queue
+ assert (
+ int(
+ node1.query(
+ f"SELECT count(1) FROM system.distributed_ddl_queue WHERE entry='{new_query}' AND cluster=''"
+ )
+ )
+ == 4
+ )
+
+ # node1.query(
+ # f"ALTER TABLE testdb.{test_table} ON CLUSTER test_cluster DROP COLUMN somenewcolumn",
+ # settings={"replication_alter_partitions_sync": "2"},
+ # )
diff --git a/tests/queries/0_stateless/00757_enum_defaults_const_analyzer.reference b/tests/queries/0_stateless/00757_enum_defaults_const_analyzer.reference
index 6895acffed15..56ead34ad3b7 100644
--- a/tests/queries/0_stateless/00757_enum_defaults_const_analyzer.reference
+++ b/tests/queries/0_stateless/00757_enum_defaults_const_analyzer.reference
@@ -3,4 +3,4 @@ iphone 1
iphone 1
iphone 1
-iphone 1
+\N 1
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
index 9a0cfdffcb50..bf6f30bfc1b1 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.reference
@@ -381,3 +381,15 @@ WITH
SELECT
x;
{'age':'31','name':'neymar','nationality':'brazil','team':'psg'}
+-- after parsing a quoted value, the next key should only start after a pair delimiter
+WITH
+ extractKeyValuePairs('key:"quoted_value"junk,second_key:0') as s_map,
+ CAST(
+ arrayMap(
+ (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+ ),
+ 'Map(String,String)'
+ ) AS x
+SELECT
+ x;
+{'key':'quoted_value','second_key':'0'}
diff --git a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
index 4f3db3f166b4..ce05ee6a3f3b 100644
--- a/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
+++ b/tests/queries/0_stateless/02499_extract_key_value_pairs_multiple_input.sql
@@ -516,3 +516,15 @@ WITH
) AS x
SELECT
x;
+
+-- after parsing a quoted value, the next key should only start after a pair delimiter
+WITH
+ extractKeyValuePairs('key:"quoted_value"junk,second_key:0') as s_map,
+ CAST(
+ arrayMap(
+ (x) -> (x, s_map[x]), arraySort(mapKeys(s_map))
+ ),
+ 'Map(String,String)'
+ ) AS x
+SELECT
+ x;
diff --git a/tests/queries/0_stateless/03447_grouping_sets_analyzer_const_columns.reference b/tests/queries/0_stateless/03447_grouping_sets_analyzer_const_columns.reference
new file mode 100644
index 000000000000..723208a70e1f
--- /dev/null
+++ b/tests/queries/0_stateless/03447_grouping_sets_analyzer_const_columns.reference
@@ -0,0 +1,27 @@
+Const column in grouping set, analyzer on:
+0 0 value 0 1
+0 0 value 1 1
+0 0 value 2 1
+0 0 value 3 1
+1 0 0 1
+1 0 1 1
+1 0 2 1
+1 0 3 1
+Non-const column in grouping set, analyzer on:
+0 0 value 0 1
+0 0 value 1 1
+0 0 value 2 1
+0 0 value 3 1
+1 0 0 1
+1 0 1 1
+1 0 2 1
+1 0 3 1
+Const column in grouping set, analyzer off:
+0 0 value 0 1
+0 0 value 1 1
+0 0 value 2 1
+0 0 value 3 1
+1 0 0 1
+1 0 1 1
+1 0 2 1
+1 0 3 1
diff --git a/tests/queries/0_stateless/03447_grouping_sets_analyzer_const_columns.sql b/tests/queries/0_stateless/03447_grouping_sets_analyzer_const_columns.sql
new file mode 100644
index 000000000000..f12e5636bd3a
--- /dev/null
+++ b/tests/queries/0_stateless/03447_grouping_sets_analyzer_const_columns.sql
@@ -0,0 +1,26 @@
+SET enable_analyzer=1;
+
+SELECT 'Const column in grouping set, analyzer on:';
+
+SELECT grouping(key_a), grouping(key_b), key_a, key_b, count() FROM (
+ SELECT 'value' as key_a, number as key_b FROM numbers(4)
+)
+GROUP BY GROUPING SETS((key_b), (key_a, key_b))
+ORDER BY (grouping(key_a), grouping(key_b), key_a, key_b);
+
+SELECT 'Non-const column in grouping set, analyzer on:';
+
+SELECT grouping(key_a), grouping(key_b), key_a, key_b, count() FROM (
+ SELECT materialize('value') as key_a, number as key_b FROM numbers(4)
+)
+GROUP BY GROUPING SETS((key_b), (key_a, key_b))
+ORDER BY (grouping(key_a), grouping(key_b), key_a, key_b);
+
+SELECT 'Const column in grouping set, analyzer off:';
+
+SELECT grouping(key_a), grouping(key_b), key_a, key_b, count() FROM (
+ SELECT 'value' as key_a, number as key_b FROM numbers(4)
+)
+GROUP BY GROUPING SETS((key_b), (key_a, key_b))
+ORDER BY (grouping(key_a), grouping(key_b), key_a, key_b)
+SETTINGS allow_experimental_analyzer=0;
diff --git a/tests/queries/0_stateless/03456_match_index_prefix_extraction.reference b/tests/queries/0_stateless/03456_match_index_prefix_extraction.reference
new file mode 100644
index 000000000000..72aa8c7e403f
--- /dev/null
+++ b/tests/queries/0_stateless/03456_match_index_prefix_extraction.reference
@@ -0,0 +1,52 @@
+Condition: (path in [\'xxx(zzz\', \'xxx(zz{\'))
+1
+Condition: (path in [\'xxx)zzz\', \'xxx)zz{\'))
+1
+Condition: (path in [\'xxx^zzz\', \'xxx^zz{\'))
+1
+Condition: (path in [\'xxx$zzz\', \'xxx$zz{\'))
+1
+Condition: (path in [\'xxx.zzz\', \'xxx.zz{\'))
+1
+Condition: (path in [\'xxx[zzz\', \'xxx[zz{\'))
+1
+Condition: (path in [\'xxx]zzz\', \'xxx]zz{\'))
+1
+Condition: (path in [\'xxx?zzz\', \'xxx?zz{\'))
+1
+Condition: (path in [\'xxx*zzz\', \'xxx*zz{\'))
+1
+Condition: (path in [\'xxx+zzz\', \'xxx+zz{\'))
+1
+Condition: (path in [\'xxx\\\\zzz\', \'xxx\\\\zz{\'))
+1
+Condition: (path in [\'xxx{zzz\', \'xxx{zz{\'))
+1
+Condition: (path in [\'xxx}zzz\', \'xxx}zz{\'))
+1
+Condition: (path in [\'xxx-zzz\', \'xxx-zz{\'))
+1
+Condition: (path in [\'xxx\', \'xxy\'))
+0
+Condition: (path in [\'xxx\', \'xxy\'))
+0
+Condition: (path in [\'xxx\', \'xxy\'))
+0
+Condition: (path in [\'xxx\', \'xxy\'))
+0
+Condition: (path in [\'xxx\', \'xxy\'))
+0
+Condition: (path in [\'xxx\', \'xxy\'))
+0
+Condition: (path in [\'xxx\', \'xxy\'))
+15
+Condition: (path in [\'xxx\', \'xxy\'))
+15
+Condition: (path in [\'xxx\', \'xxy\'))
+15
+Condition: (path in [\'xxx\', \'xxy\'))
+0
+Condition: (path in [\'xxx\', \'xxy\'))
+0
+Condition: true
+Condition: true
diff --git a/tests/queries/0_stateless/03456_match_index_prefix_extraction.sql b/tests/queries/0_stateless/03456_match_index_prefix_extraction.sql
new file mode 100644
index 000000000000..56eeb8bdb93d
--- /dev/null
+++ b/tests/queries/0_stateless/03456_match_index_prefix_extraction.sql
@@ -0,0 +1,111 @@
+SET parallel_replicas_local_plan=1;
+
+drop table if exists foo;
+
+CREATE TABLE foo (id UInt8, path String) engine = MergeTree ORDER BY (path) SETTINGS index_granularity=1;
+
+INSERT INTO foo VALUES (1, 'xxx|yyy'),
+(2, 'xxx(zzz'),
+(3, 'xxx)zzz'),
+(4, 'xxx^zzz'),
+(5, 'xxx$zzz'),
+(6, 'xxx.zzz'),
+(7, 'xxx[zzz'),
+(8, 'xxx]zzz'),
+(9, 'xxx?zzz'),
+(10, 'xxx*zzz'),
+(11, 'xxx+zzz'),
+(12, 'xxx\\zzz'),
+(13, 'xxx{zzz'),
+(14, 'xxx}zzz'),
+(15, 'xxx-zzz');
+
+
+-- check if also escaped sequence are properly extracted
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\(zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\(zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\)zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\)zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\^zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\^zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\$zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\$zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\.zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\.zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\[zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\[zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\]zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\]zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\?zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\?zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\*zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\*zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\+zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\+zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\\\zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\\\zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\{zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\{zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\}zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\}zzz') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\-zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\-zzz') SETTINGS force_primary_key = 1;
+
+
+-- those regex chars prevent the index use (only 3 first chars used during index scan)
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\0bla')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\0bla') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx(bla)')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx(bla)') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx[bla]')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx[bla]') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx^bla')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx^bla') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx.bla')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx.bla') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx+bla')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx+bla') SETTINGS force_primary_key = 1;
+
+
+-- here the forth char is not used during index, because it has 0+ quantifier
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxxx{0,1}')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxxx{0,1}') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxxx?')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxxx?') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxxx*')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxxx*') SETTINGS force_primary_key = 1;
+
+-- some unsupported regex chars - only 3 first chars used during index scan
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\d+')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\d+') SETTINGS force_primary_key = 1;
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\w+')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\w+') SETTINGS force_primary_key = 1;
+
+
+-- fully disabled for pipes - see https://github.com/ClickHouse/ClickHouse/pull/54696
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\|zzz')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxx\\|zzz') SETTINGS force_primary_key = 1; -- { serverError INDEX_NOT_USED }
+
+SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxxx|foo')) WHERE explain like '%Condition%';
+SELECT count() FROM foo WHERE match(path, '^xxxx|foo') SETTINGS force_primary_key = 1; -- { serverError INDEX_NOT_USED }
diff --git a/tests/queries/0_stateless/03519_ttl_extended_data_types.reference b/tests/queries/0_stateless/03519_ttl_extended_data_types.reference
new file mode 100644
index 000000000000..d098f930edc6
--- /dev/null
+++ b/tests/queries/0_stateless/03519_ttl_extended_data_types.reference
@@ -0,0 +1,8 @@
+"2170-01-01",2170
+"2170-01-01 12:12:12.12345",2170
+"1901-01-01",""
+"2010-01-01",""
+"2170-01-01","uio"
+"1901-01-01 12:12:12.12345",""
+"2010-01-01 12:12:12.00000",""
+"2170-01-01 12:12:12.12345","uio"
diff --git a/tests/queries/0_stateless/03519_ttl_extended_data_types.sql b/tests/queries/0_stateless/03519_ttl_extended_data_types.sql
new file mode 100644
index 000000000000..e7c7dcebe48b
--- /dev/null
+++ b/tests/queries/0_stateless/03519_ttl_extended_data_types.sql
@@ -0,0 +1,39 @@
+-- Row TTL with extended data types
+DROP TABLE IF EXISTS ttl_03519_1 SYNC;
+CREATE TABLE ttl_03519_1 (date Date32, date_key Int) ENGINE=MergeTree TTL date + INTERVAL 1 MONTH ORDER BY date;
+INSERT INTO ttl_03519_1 VALUES ('2010-01-01', 2010);
+INSERT INTO ttl_03519_1 VALUES ('1901-01-01', 1901);
+INSERT INTO ttl_03519_1 VALUES ('2170-01-01', 2170);
+OPTIMIZE TABLE ttl_03519_1 FINAL;
+SELECT * FROM ttl_03519_1 ORDER BY date FORMAT CSV;
+DROP TABLE ttl_03519_1 SYNC;
+
+DROP TABLE IF EXISTS ttl_03519_2 SYNC;
+CREATE TABLE ttl_03519_2 (date DateTime64(5, 'UTC'), date_key Int) ENGINE=MergeTree TTL date + INTERVAL 1 MONTH ORDER BY date;
+INSERT INTO ttl_03519_2 VALUES ('2010-01-01 12:12:12.12345', 2010);
+INSERT INTO ttl_03519_2 VALUES ('1901-01-01 12:12:12.12345', 1901);
+INSERT INTO ttl_03519_2 VALUES ('2170-01-01 12:12:12.12345', 2170);
+OPTIMIZE TABLE ttl_03519_2 FINAL;
+SELECT * FROM ttl_03519_2 ORDER BY date FORMAT CSV;
+DROP TABLE ttl_03519_2 SYNC;
+
+-- Column TTL with extended data types
+
+DROP TABLE IF EXISTS ttl_03519_3 SYNC;
+CREATE TABLE ttl_03519_3 (date Date32, str String TTL date + INTERVAL 1 MONTH) ENGINE=MergeTree ORDER BY date;
+INSERT INTO ttl_03519_3 VALUES ('2010-01-01', 'qwe');
+INSERT INTO ttl_03519_3 VALUES ('1901-01-01', 'rty');
+INSERT INTO ttl_03519_3 VALUES ('2170-01-01', 'uio');
+OPTIMIZE TABLE ttl_03519_3 FINAL;
+SELECT * FROM ttl_03519_3 ORDER BY date FORMAT CSV;
+DROP TABLE ttl_03519_3 SYNC;
+
+DROP TABLE IF EXISTS ttl_03519_4 SYNC;
+CREATE TABLE ttl_03519_4 (date DateTime64(5, 'UTC'), str String TTL date + INTERVAL 1 MONTH) ENGINE=MergeTree ORDER BY date;
+INSERT INTO ttl_03519_4 VALUES ('2010-01-01 12:12:12', 'qwe');
+INSERT INTO ttl_03519_4 VALUES ('1901-01-01 12:12:12.12345', 'rty');
+INSERT INTO ttl_03519_4 VALUES ('2170-01-01 12:12:12.12345', 'uio');
+OPTIMIZE TABLE ttl_03519_4 FINAL;
+SELECT * FROM ttl_03519_4 ORDER BY date FORMAT CSV;
+DROP TABLE ttl_03519_4 SYNC;
+