diff --git a/.yamllint.yaml b/.yamllint.yaml index 906fb5a5cd..ebade3e380 100644 --- a/.yamllint.yaml +++ b/.yamllint.yaml @@ -37,7 +37,7 @@ rules: line-length: max: 100 - allow-non-breakable-words: false + allow-non-breakable-words: true octal-values: forbid-implicit-octal: true diff --git a/components/clp-py-utils/clp_py_utils/clp_config.py b/components/clp-py-utils/clp_py_utils/clp_config.py index 48559ee2c2..4525c57c6d 100644 --- a/components/clp-py-utils/clp_py_utils/clp_config.py +++ b/components/clp-py-utils/clp_py_utils/clp_config.py @@ -867,10 +867,14 @@ def transform_for_container(self): self.archive_output.storage.transform_for_container() self.stream_output.storage.transform_for_container() - self.database.transform_for_container() - self.queue.transform_for_container() - self.redis.transform_for_container() - self.results_cache.transform_for_container() + if BundledService.DATABASE in self.bundled: + self.database.transform_for_container() + if BundledService.QUEUE in self.bundled: + self.queue.transform_for_container() + if BundledService.REDIS in self.bundled: + self.redis.transform_for_container() + if BundledService.RESULTS_CACHE in self.bundled: + self.results_cache.transform_for_container() self.query_scheduler.transform_for_container() self.reducer.transform_for_container() if self.package.query_engine == QueryEngine.PRESTO and self.presto is not None: diff --git a/components/package-template/src/etc/clp-config.yaml b/components/package-template/src/etc/clp-config.yaml index a541123161..84e15cfa34 100644 --- a/components/package-template/src/etc/clp-config.yaml +++ b/components/package-template/src/etc/clp-config.yaml @@ -1,4 +1,5 @@ # yaml-language-server: $schema=../usr/share/config-schemas/clp-config.schema.json +# #package: # storage_engine: "clp-s" # query_engine: "clp-s" @@ -15,6 +16,8 @@ ## File containing credentials for services #credentials_file_path: "etc/credentials.yaml" # +## Remove any bundled services below if you wish to use your own. For more information, see +## https://docs.yscope.com/clp/main/user-docs/guides-external-database.html#configuring-clp-to-use-external-databases #bundled: ["database", "queue", "redis", "results_cache"] # #database: diff --git a/docs/src/user-docs/guides-external-database.md b/docs/src/user-docs/guides-external-database.md index e4d5a8b670..b835066044 100644 --- a/docs/src/user-docs/guides-external-database.md +++ b/docs/src/user-docs/guides-external-database.md @@ -173,28 +173,47 @@ When using AWS DocumentDB or MongoDB Atlas: ## Configuring CLP to use external databases -After setting up your external databases, configure CLP to use them by editing `etc/clp-config.yaml`: - -```yaml -database: - host: "" - port: 3306 - name: "clp-db" - # Credentials will be set in etc/credentials.yaml - -results_cache: - host: "" - port: 27017 - name: "clp-query-results" -``` +After setting up your external databases, configure CLP to use them: -Set the credentials in `etc/credentials.yaml`: +1. Edit `etc/clp-config.yaml` to specify which services are bundled (managed by the `clp-package` + Docker Compose project): -```yaml -database: - username: "clp-user" - password: "" -``` + ```yaml + # Remove "database" and "results_cache" from this list to use external instances + bundled: + # - "database" + - "queue" + - "redis" + # - "results_cache" + ``` + +2. Configure the connection details for your external databases in `etc/clp-config.yaml`: + + ```yaml + database: + host: "" + port: + + results_cache: + host: "" + port: + ``` + + :::{important} + If your external service is running on the host machine, **do not use `127.0.0.1`** as the host. + The CLP components run in containers, so `127.0.0.1` would refer to the container itself. + Instead, use: + - `host.docker.internal` to refer to the host machine. + - The host's actual network IP address (e.g., `192.168.1.10`) + ::: + +3. Set the credentials in `etc/credentials.yaml`: + + ```yaml + database: + username: "clp-user" + password: "" + ``` :::{note} When using external databases in a multi-host deployment, you do **not** need to start the diff --git a/docs/src/user-docs/guides-multi-host.md b/docs/src/user-docs/guides-multi-host.md index 6ef651b84d..f474234d00 100755 --- a/docs/src/user-docs/guides-multi-host.md +++ b/docs/src/user-docs/guides-multi-host.md @@ -37,9 +37,7 @@ In a multi-host cluster: To configure CLP for multi-host deployment, you'll need to: 1. [configure and run CLP's environment setup scripts](#clp-environment-setup). -2. [update CLP's *generated* configuration to support a multi-host deployment]( - #updating-clps-generated-configuration). -3. [distribute and configure the CLP package on all hosts in your cluster]( +2. [distribute and configure the CLP package on all hosts in your cluster]( #distributing-the-set-up-package). ### CLP environment setup @@ -54,6 +52,18 @@ To configure CLP for multi-host deployment, you'll need to: 3. Edit CLP's configuration file: * Open `etc/clp-config.yaml`. + * Configure which services should be bundled (managed by the `clp-package` Docker Compose + project) vs. external. + + ```yaml + bundled: + # Remove services you want to run on specific hosts or use external instances + - database # Remove if running on a dedicated host or using external MySQL-compatible DB + - queue # Remove if running on a dedicated host or using external RabbitMQ + - redis # Remove if running on a dedicated host or using external Redis + - results_cache # Remove if running on a dedicated host or using external MongoDB + ``` + * For each service, set the `host` and `port` fields to the actual hostname/IP and port where you plan to run the specific service. * When using local filesystem storage (i.e., not S3), set `logs_input.storage.directory`, @@ -74,33 +84,6 @@ To configure CLP for multi-host deployment, you'll need to: * Create `var/log/.clp-config.yaml` (the container-specific configuration file) * Create `var/www/webui/server/dist/settings.json` (the `webui` server's configuration file) -### Updating CLP's generated configuration - -The last step in the previous section (`sbin/start-clp.sh --setup-only`) will generate any necessary -configuration files, but they're unsuitable for use across multiple hosts (they're designed for use -on a single host). - -:::{note} -As mentioned at the beginning of this guide, this setup will be made simpler in a future release. -::: - -To update the generated configuration files for use across multiple hosts: - -1. Edit `var/log/.clp-config.yaml`: - - * Update all `host` fields to use the actual hostname or IP address where each service will run - (matching what you configured in `etc/clp-config.yaml`). - * Similarly, update any `port` fields. - * For example, if your database runs on `192.168.1.10:3306`, ensure `database.host` is set to - `192.168.1.10` and `database.port` is `3306`. - -2. Edit `var/www/webui/server/dist/settings.json`: - - * Update `SqlDbHost` to the actual hostname or IP address of your database service. - * Update `SqlDbPort` if you changed the database port. - * Update `MongoDbHost` to the actual hostname or IP address of your results cache service. - * Update `MongoDbPort` if you changed the results cache port. - ### Distributing the set-up package With the package set up, we can now distribute it to all hosts in the cluster: diff --git a/docs/src/user-docs/quick-start/clp-json.md b/docs/src/user-docs/quick-start/clp-json.md index 385736e339..429ff11224 100644 --- a/docs/src/user-docs/quick-start/clp-json.md +++ b/docs/src/user-docs/quick-start/clp-json.md @@ -18,8 +18,10 @@ sbin/start-clp.sh ``` :::{tip} -To validate configuration and prepare directories without launching services, add the -`--setup-only` flag (e.g., `sbin/start-clp.sh --setup-only`). +To validate configuration and prepare directories without launching services, add the `--setup-only` +flag (e.g., `sbin/start-clp.sh --setup-only`). To use external databases or other third-party +services instead of bundled services, see the +[external database guide](../guides-external-database.md). ::: :::{note} diff --git a/docs/src/user-docs/quick-start/clp-text.md b/docs/src/user-docs/quick-start/clp-text.md index f1f637c66c..91d08bf9a4 100644 --- a/docs/src/user-docs/quick-start/clp-text.md +++ b/docs/src/user-docs/quick-start/clp-text.md @@ -20,8 +20,10 @@ sbin/start-clp.sh ``` :::{tip} -To validate configuration and prepare directories without launching services, add the -`--setup-only` flag (e.g., `sbin/start-clp.sh --setup-only`). +To validate configuration and prepare directories without launching services, add the `--setup-only` +flag (e.g., `sbin/start-clp.sh --setup-only`). To use external databases or other third-party +services instead of bundled services, see the +[external database guide](../guides-external-database.md). ::: :::{note} diff --git a/tools/deployment/package/docker-compose-all.yaml b/tools/deployment/package/docker-compose-all.yaml index 72704a085e..d4ae5ec579 100644 --- a/tools/deployment/package/docker-compose-all.yaml +++ b/tools/deployment/package/docker-compose-all.yaml @@ -212,7 +212,8 @@ services: "python3", "-u", "-m", "clp_py_utils.initialize-results-cache", - "--uri", "mongodb://results_cache:27017/${CLP_RESULTS_CACHE_DB_NAME:-clp-query-results}", + "--uri", "mongodb://${CLP_RESULTS_CACHE_HOST:-results-cache}:${CLP_RESULTS_CACHE_PORT:-27017}\ + /${CLP_RESULTS_CACHE_DB_NAME:-clp-query-results}", "--stream-collection", "${CLP_RESULTS_CACHE_STREAM_COLLECTION_NAME:-stream-files}", ] @@ -222,13 +223,15 @@ services: stop_grace_period: "300s" environment: BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ - :${CLP_QUEUE_PASS:?Please set a value.}@queue:5672" + :${CLP_QUEUE_PASS:?Please set a value.}@${CLP_QUEUE_HOST:-queue}\ + :${CLP_QUEUE_PORT:-5672}" CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" CLP_DB_USER: "${CLP_DB_USER:?Please set a value.}" CLP_LOGGING_LEVEL: "${CLP_COMPRESSION_SCHEDULER_LOGGING_LEVEL:-INFO}" CLP_LOGS_DIR: "/var/log/compression_scheduler" PYTHONPATH: "/opt/clp/lib/python3/site-packages" - RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ + RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}\ + @${CLP_REDIS_HOST:-redis}:${CLP_REDIS_PORT:-6379}\ /${CLP_REDIS_BACKEND_DB_COMPRESSION:-1}" volumes: - *volume_clp_config_readonly @@ -254,14 +257,16 @@ services: hostname: "compression_worker" environment: BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ - :${CLP_QUEUE_PASS:?Please set a value.}@queue:5672" + :${CLP_QUEUE_PASS:?Please set a value.}@${CLP_QUEUE_HOST:-queue}\ + :${CLP_QUEUE_PORT:-5672}" CLP_CONFIG_PATH: "/etc/clp-config.yaml" CLP_HOME: "/opt/clp" CLP_LOGGING_LEVEL: "${CLP_COMPRESSION_WORKER_LOGGING_LEVEL:-INFO}" CLP_LOGS_DIR: "/var/log/compression_worker" CLP_WORKER_LOG_PATH: "/var/log/compression_worker/worker.log" PYTHONPATH: "/opt/clp/lib/python3/site-packages" - RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ + RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}\ + @${CLP_REDIS_HOST:-redis}:${CLP_REDIS_PORT:-6379}\ /${CLP_REDIS_BACKEND_DB_COMPRESSION:-1}" volumes: - *volume_clp_config_readonly @@ -369,13 +374,15 @@ services: stop_grace_period: "10s" environment: BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ - :${CLP_QUEUE_PASS:?Please set a value.}@queue:5672" + :${CLP_QUEUE_PASS:?Please set a value.}@${CLP_QUEUE_HOST:-queue}\ + :${CLP_QUEUE_PORT:-5672}" CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" CLP_DB_USER: "${CLP_DB_USER:?Please set a value.}" CLP_LOGGING_LEVEL: "${CLP_QUERY_SCHEDULER_LOGGING_LEVEL:-INFO}" CLP_LOGS_DIR: "/var/log/query_scheduler" PYTHONPATH: "/opt/clp/lib/python3/site-packages" - RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ + RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}\ + @${CLP_REDIS_HOST:-redis}:${CLP_REDIS_PORT:-6379}\ /${CLP_REDIS_BACKEND_DB_QUERY:-0}" volumes: - *volume_clp_config_readonly @@ -407,14 +414,16 @@ services: hostname: "query_worker" environment: BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ - :${CLP_QUEUE_PASS:?Please set a value.}@queue:5672" + :${CLP_QUEUE_PASS:?Please set a value.}@${CLP_QUEUE_HOST:-queue}\ + :${CLP_QUEUE_PORT:-5672}" CLP_CONFIG_PATH: "/etc/clp-config.yaml" CLP_HOME: "/opt/clp" CLP_LOGGING_LEVEL: "${CLP_QUERY_WORKER_LOGGING_LEVEL:-INFO}" CLP_LOGS_DIR: "/var/log/query_worker" CLP_WORKER_LOG_PATH: "/var/log/query_worker/worker.log" PYTHONPATH: "/opt/clp/lib/python3/site-packages" - RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ + RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}\ + @${CLP_REDIS_HOST:-redis}:${CLP_REDIS_PORT:-6379}\ /${CLP_REDIS_BACKEND_DB_QUERY:-0}" volumes: - *volume_clp_config_readonly