From 6b6c1fbed39239d7dadb101f60a7e16bcafd7e60 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 20 Nov 2025 19:18:10 -0500 Subject: [PATCH 01/10] Simple change to doc --- docs/src/user-docs/guides-multi-host.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/user-docs/guides-multi-host.md b/docs/src/user-docs/guides-multi-host.md index 6ef651b84d..19a6ab7b96 100755 --- a/docs/src/user-docs/guides-multi-host.md +++ b/docs/src/user-docs/guides-multi-host.md @@ -162,13 +162,13 @@ docker compose \ up db-table-creator \ --no-deps -# Start queue +# Start queue (optional, only if using Celery) docker compose \ --project-name "clp-package-$(cat var/log/instance-id)" \ up queue \ --no-deps --wait -# Start redis +# Start redis (optional, only if using Celery) docker compose \ --project-name "clp-package-$(cat var/log/instance-id)" \ up redis \ From 0a47380deb5a0cb4f26a2dbd7a1f94b4c560f506 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 20 Nov 2025 19:42:33 -0500 Subject: [PATCH 02/10] Add Spider worker --- docs/src/user-docs/guides-multi-host.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/src/user-docs/guides-multi-host.md b/docs/src/user-docs/guides-multi-host.md index 19a6ab7b96..3ed443ffc4 100755 --- a/docs/src/user-docs/guides-multi-host.md +++ b/docs/src/user-docs/guides-multi-host.md @@ -230,11 +230,17 @@ docker compose \ # Worker services (can be started on multiple hosts) ################################################################################ -# Start compression worker +# Start compression worker (optional, only if using Celery) docker compose \ --project-name "clp-package-$(cat var/log/instance-id)" \ up compression-worker \ --no-deps --wait + +# Start Spider compression worker (optional, only if using Spider) +docker compose \ + --project-name "clp-package-$(cat var/log/instance-id)" \ + up spider-compression-worker \ + --no-deps --wait # Start query worker docker compose \ From f539a43ca5247f65772c31eb94ef1f2f9de294b0 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Thu, 20 Nov 2025 19:43:28 -0500 Subject: [PATCH 03/10] Add spider scheduler --- docs/src/user-docs/guides-multi-host.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/src/user-docs/guides-multi-host.md b/docs/src/user-docs/guides-multi-host.md index 3ed443ffc4..f3f0b03eaa 100755 --- a/docs/src/user-docs/guides-multi-host.md +++ b/docs/src/user-docs/guides-multi-host.md @@ -195,6 +195,12 @@ docker compose \ --project-name "clp-package-$(cat var/log/instance-id)" \ up compression-scheduler \ --no-deps --wait + +# Start Spider scheduler (optional, only if using Spider) +docker compose \ + --project-name "clp-package-$(cat var/log/instance-id)" \ + up spider-scheduler \ + --no-deps --wait # Start query scheduler docker compose \ From c7ff3d1c65246941b8059346af8ed2ff5305aa5c Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Fri, 21 Nov 2025 15:18:06 -0500 Subject: [PATCH 04/10] Add spider component into table --- .../design-deployment-orchestration.md | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/docs/src/dev-docs/design-deployment-orchestration.md b/docs/src/dev-docs/design-deployment-orchestration.md index 130e0bd096..f42d964e93 100755 --- a/docs/src/dev-docs/design-deployment-orchestration.md +++ b/docs/src/dev-docs/design-deployment-orchestration.md @@ -117,21 +117,23 @@ graph LR :::{table} :align: left -| Service | Description | -|-----------------------|-----------------------------------------------------------------| -| database | Database for archive metadata, compression jobs, and query jobs | -| queue | Task queue for schedulers | -| redis | Task result storage for workers | -| compression_scheduler | Scheduler for compression jobs | -| query_scheduler | Scheduler for search/aggregation jobs | -| results_cache | Storage for the workers to return search results to the UI | -| compression_worker | Worker processes for compression jobs | -| query_worker | Worker processes for search/aggregation jobs | -| reducer | Reducers for performing the final stages of aggregation jobs | -| api_server | API server for submitting queries | -| webui | Web server for the UI | -| mcp_server | MCP server for AI agent to access CLP functionalities | -| garbage_collector | Process to manage data retention | +| Service | Description | +|---------------------------|-----------------------------------------------------------------| +| database | Database for archive metadata, compression jobs, and query jobs | +| queue | Task queue for schedulers | +| redis | Task result storage for workers | +| compression_scheduler | Scheduler for compression jobs | +| query_scheduler | Scheduler for search/aggregation jobs | +| spider_scheduler | Scheduler for Spider distributed task execution framework. | +| results_cache | Storage for the workers to return search results to the UI | +| compression_worker | Worker processes for compression jobs | +| spider_compression_worker | Worker processes for Spider distributed compression jobs | +| query_worker | Worker processes for search/aggregation jobs | +| reducer | Reducers for performing the final stages of aggregation jobs | +| api_server | API server for submitting queries | +| webui | Web server for the UI | +| mcp_server | MCP server for AI agent to access CLP functionalities | +| garbage_collector | Process to manage data retention | ::: From b1287c261479f452a2fd92a4af226c4bce22372a Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Fri, 21 Nov 2025 15:36:31 -0500 Subject: [PATCH 05/10] Use table to present all deployment type --- .../dev-docs/design-deployment-orchestration.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/src/dev-docs/design-deployment-orchestration.md b/docs/src/dev-docs/design-deployment-orchestration.md index f42d964e93..c0a00dcf69 100755 --- a/docs/src/dev-docs/design-deployment-orchestration.md +++ b/docs/src/dev-docs/design-deployment-orchestration.md @@ -127,7 +127,7 @@ graph LR | spider_scheduler | Scheduler for Spider distributed task execution framework. | | results_cache | Storage for the workers to return search results to the UI | | compression_worker | Worker processes for compression jobs | -| spider_compression_worker | Worker processes for Spider distributed compression jobs | +| spider_compression_worker | Worker processes for compression jobs using Spider | | query_worker | Worker processes for search/aggregation jobs | | reducer | Reducers for performing the final stages of aggregation jobs | | api_server | API server for submitting queries | @@ -212,12 +212,16 @@ instance ID. ### Deployment Types -CLP supports two deployment types determined by the `package.query_engine` configuration setting. +CLP supports four deployment types determined by the `package.compression_scheduler.type` and +`package.query_engine` configuration setting. + +| Deployment Type | Compression Scheduler | Query Engine | Docker Compose File | +|-----------------|-----------------------|------------------------------|------------------------------------| +| Base | Celery | [Presto][presto-integration] | `docker-compose-base.yaml` | +| Full | Celery | Native | `docker-compose.yaml` | +| Spider Base | Spider | [Presto][presto-integration] | `docker-compose-spider-base.yaml` | +| Spider Full | Spider | Native | `docker-compose-spider.yaml` | -1. **BASE**: For deployments using [Presto][presto-integration] as the query engine. This deployment - only uses `docker-compose.base.yaml`. -2. **FULL**: For deployments using one of CLP's native query engines. This uses both - `docker-compose.base.yaml` and `docker-compose.yaml`. ### Implementation details From 6355c00c871c72c88347d2b285e6cdb57d5c87e0 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Fri, 21 Nov 2025 23:12:27 -0500 Subject: [PATCH 06/10] Fix the mermaid graph --- .../design-deployment-orchestration.md | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/docs/src/dev-docs/design-deployment-orchestration.md b/docs/src/dev-docs/design-deployment-orchestration.md index c0a00dcf69..1ac8e04b92 100755 --- a/docs/src/dev-docs/design-deployment-orchestration.md +++ b/docs/src/dev-docs/design-deployment-orchestration.md @@ -29,7 +29,8 @@ one-time initialization jobs and their functions. "primaryBorderColor": "transparent", "lineColor": "#007fff", "secondaryColor": "#007fff", - "tertiaryColor": "#fff" + "tertiaryColor": "#fff", + "clusterBkg": "#d1f6ff" } } }%% @@ -41,7 +42,9 @@ graph LR results_cache["results-cache (MongoDB)"] compression_scheduler["compression-scheduler"] query_scheduler["query-scheduler"] + spider_scheduler["spider-scheduler"] compression_worker["compression-worker"] + spider_compression_worker["spider-compression-worker"] query_worker["query-worker"] reducer["reducer"] api_server["api-server"] @@ -63,6 +66,8 @@ graph LR queue -->|healthy| query_scheduler redis -->|healthy| query_scheduler query_scheduler -->|healthy| reducer + db_table_creator -->|healthy| spider_scheduler + db_table_creator -->|healthy| spider_compression_worker results_cache_indices_creator -->|completed_successfully| reducer db_table_creator -->|completed_successfully| api_server results_cache_indices_creator -->|completed_successfully| api_server @@ -75,9 +80,11 @@ graph LR subgraph Databases database - queue - redis results_cache + subgraph Celery[Celery
Native Query Engine] + queue + redis + end end subgraph Initialization jobs @@ -88,10 +95,17 @@ graph LR subgraph Schedulers compression_scheduler query_scheduler + subgraph SpiderSchedulers[Spider] + spider_scheduler + end + spider_scheduler end subgraph Workers compression_worker + subgraph SpiderWorkers[Spider Workers] + spider_compression_worker["spider-compression-worker"] + end query_worker reducer end @@ -106,6 +120,13 @@ graph LR mcp_server end + %% Edges Styles + linkStyle 3,4,6,7 stroke:#ffd700,color:#ffd700 + linkStyle 9,10 stroke:#00ced1,color:#00ced1 + %% Subgraphs Styles + style Celery fill:#ffffe0,stroke:#ffd700 + style SpiderSchedulers fill:#e0ffff,stroke:#00ced1 + style SpiderWorkers fill:#e0ffff,stroke:#00ced1 +++ **Figure 1**: Orchestration architecture of the services in the CLP package. From 314822dea102dd330b2db6f104f8812c3a511e62 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 24 Nov 2025 12:20:03 -0500 Subject: [PATCH 07/10] Add using spider guide --- docs/src/user-docs/guides-using-spider.md | 63 +++++++++++++++++++++++ docs/src/user-docs/index.md | 1 + 2 files changed, 64 insertions(+) create mode 100644 docs/src/user-docs/guides-using-spider.md diff --git a/docs/src/user-docs/guides-using-spider.md b/docs/src/user-docs/guides-using-spider.md new file mode 100644 index 0000000000..0fe63347c4 --- /dev/null +++ b/docs/src/user-docs/guides-using-spider.md @@ -0,0 +1,63 @@ +# Using Spider with CLP + +[Spider] is a fast and scalable distributed task execution engine that can be used to run tasks. +This guide describes how to set up and use Spider with CLP. + +:::{note} +Spider is under active development, and its integration with CLP may change in the future. +Right now Spider only supports executing CLP compression tasks. Support for search tasks will be added +later. +::: + +## Requirements +* [CLP][clp-releases] v0.7.0 or higher +* [Docker] v28 or higher +* [Docker Compose][docker-compose] v2.20.2 or higher +* Python +* python3-venv (for the version of Python installed) + +## Set up +To use Spider for CLP compression tasks, you need to [set up CLP](#setting-up-clp-with-spider) with +Spider in configuration. + +### Setting up CLP with Spider + +1. Follow the [quick-start](quick-start/index.md) guide to download and extract the CLP package, + but don't start the package just yet. +2. Before starting the package, update the package's config file (`etc/clp-config.yaml`) as follows: + + * Set the `compression_scheduler.type` key to `"spider"`. + + ```yaml + compression_scheduler: + type: "spider" + ``` + + * Optionally, set the `spider_db`. + + ```yaml + spider_db: + db_name: "spider-db" + ``` + + * Optionally, set the `spider_scheduler`. + + ```yaml + spider_scheduler: + host: "localhost" + port: 6000 + ``` +3. Optionally, before starting the package, update the package's credential file (`etc/credentials.yaml`) + to add Spider database credentials as follows: + + ```yaml + spider_db: + username: "spider_user" + password: "spider_password" + ``` +4. Continue following the [quick-start](./quick-start/index.md#using-clp) guide to start CLP. + +[clp-releases]: https://github.com/y-scope/clp/releases +[docker-compose]: https://docs.docker.com/compose/install/ +[Docker]: https://docs.docker.com/engine/install/ +[Spider]: https://github.com/y-scope/spider \ No newline at end of file diff --git a/docs/src/user-docs/index.md b/docs/src/user-docs/index.md index a256dac5a6..405ad04df1 100644 --- a/docs/src/user-docs/index.md +++ b/docs/src/user-docs/index.md @@ -66,6 +66,7 @@ guides-external-database guides-multi-host guides-retention guides-using-presto +guides-using-spider ::: :::{toctree} From 2e059394395d9a64e13693cb57010f20883d1ab4 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 24 Nov 2025 12:32:14 -0500 Subject: [PATCH 08/10] Minor grammer improvement --- docs/src/user-docs/guides-using-spider.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/src/user-docs/guides-using-spider.md b/docs/src/user-docs/guides-using-spider.md index 0fe63347c4..60e0773a90 100644 --- a/docs/src/user-docs/guides-using-spider.md +++ b/docs/src/user-docs/guides-using-spider.md @@ -5,7 +5,7 @@ This guide describes how to set up and use Spider with CLP. :::{note} Spider is under active development, and its integration with CLP may change in the future. -Right now Spider only supports executing CLP compression tasks. Support for search tasks will be added +Right now, Spider only supports executing CLP compression tasks. Support for search tasks will be added later. ::: @@ -33,21 +33,21 @@ Spider in configuration. type: "spider" ``` - * Optionally, set the `spider_db`. + * (Optional) Set the `spider_db`. ```yaml spider_db: db_name: "spider-db" ``` - * Optionally, set the `spider_scheduler`. + * (Optional) Set the `spider_scheduler`. ```yaml spider_scheduler: host: "localhost" port: 6000 ``` -3. Optionally, before starting the package, update the package's credential file (`etc/credentials.yaml`) +3. (Optional) Before starting the package, update the package's credential file (`etc/credentials.yaml`) to add Spider database credentials as follows: ```yaml From 8caa9732192db683a80141adcf4dbd28d461e8b3 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 24 Nov 2025 12:40:28 -0500 Subject: [PATCH 09/10] Fix markdown lint --- docs/src/user-docs/guides-using-spider.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/src/user-docs/guides-using-spider.md b/docs/src/user-docs/guides-using-spider.md index 60e0773a90..96b87db5cd 100644 --- a/docs/src/user-docs/guides-using-spider.md +++ b/docs/src/user-docs/guides-using-spider.md @@ -10,6 +10,7 @@ later. ::: ## Requirements + * [CLP][clp-releases] v0.7.0 or higher * [Docker] v28 or higher * [Docker Compose][docker-compose] v2.20.2 or higher @@ -17,6 +18,7 @@ later. * python3-venv (for the version of Python installed) ## Set up + To use Spider for CLP compression tasks, you need to [set up CLP](#setting-up-clp-with-spider) with Spider in configuration. @@ -32,7 +34,7 @@ Spider in configuration. compression_scheduler: type: "spider" ``` - + * (Optional) Set the `spider_db`. ```yaml @@ -47,6 +49,7 @@ Spider in configuration. host: "localhost" port: 6000 ``` + 3. (Optional) Before starting the package, update the package's credential file (`etc/credentials.yaml`) to add Spider database credentials as follows: @@ -55,6 +58,7 @@ Spider in configuration. username: "spider_user" password: "spider_password" ``` + 4. Continue following the [quick-start](./quick-start/index.md#using-clp) guide to start CLP. [clp-releases]: https://github.com/y-scope/clp/releases From 1c1636e6f7be03989cb87b5ee8a92e3b725ef832 Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Mon, 24 Nov 2025 12:44:39 -0500 Subject: [PATCH 10/10] Fix md lint --- docs/src/user-docs/guides-using-spider.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/user-docs/guides-using-spider.md b/docs/src/user-docs/guides-using-spider.md index 96b87db5cd..f0d4ee21c7 100644 --- a/docs/src/user-docs/guides-using-spider.md +++ b/docs/src/user-docs/guides-using-spider.md @@ -36,14 +36,14 @@ Spider in configuration. ``` * (Optional) Set the `spider_db`. - + ```yaml spider_db: db_name: "spider-db" ``` * (Optional) Set the `spider_scheduler`. - + ```yaml spider_scheduler: host: "localhost"