Skip to content
Merged
12 changes: 10 additions & 2 deletions components/clp-package-utils/clp_package_utils/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@
API_SERVER_COMPONENT_NAME,
AwsAuthType,
BundledService,
CLP_DB_PASS_ENV_VAR_NAME,
CLP_DB_ROOT_PASS_ENV_VAR_NAME,
CLP_DB_ROOT_USER_ENV_VAR_NAME,
CLP_DB_USER_ENV_VAR_NAME,
ClpConfig,
ClpDbUserType,
COMPRESSION_JOBS_TABLE_NAME,
COMPRESSION_SCHEDULER_COMPONENT_NAME,
COMPRESSION_WORKER_COMPONENT_NAME,
Expand Down Expand Up @@ -175,9 +180,12 @@ def _set_up_env_for_database(self) -> EnvVarsDict:
}

# Credentials
credentials = self._clp_config.database.credentials
env_vars |= {
"CLP_DB_PASS": self._clp_config.database.password,
"CLP_DB_USER": self._clp_config.database.username,
CLP_DB_ROOT_PASS_ENV_VAR_NAME: credentials[ClpDbUserType.ROOT].password,
CLP_DB_ROOT_USER_ENV_VAR_NAME: credentials[ClpDbUserType.ROOT].username,
CLP_DB_PASS_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].password,
CLP_DB_USER_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].username,
}

return env_vars
Expand Down
7 changes: 6 additions & 1 deletion components/clp-package-utils/clp_package_utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,12 @@ def load_config_file(

def generate_credentials_file(credentials_file_path: pathlib.Path):
credentials = {
DB_COMPONENT_NAME: {"username": "clp-user", "password": secrets.token_urlsafe(8)},
DB_COMPONENT_NAME: {
"username": "clp-user",
"password": secrets.token_urlsafe(8),
"root_username": "root",
"root_password": secrets.token_urlsafe(8),
},
QUEUE_COMPONENT_NAME: {"username": "clp-user", "password": secrets.token_urlsafe(8)},
REDIS_COMPONENT_NAME: {"password": secrets.token_urlsafe(16)},
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
CLP_DB_USER_ENV_VAR_NAME,
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
CLP_DEFAULT_DATASET_NAME,
ClpDbUserType,
StorageEngine,
StorageType,
)
Expand Down Expand Up @@ -226,9 +227,10 @@ def main(argv: list[str]) -> int:
mounts.logs_dir,
mounts.archives_output_dir,
]
credentials = clp_config.database.credentials
extra_env_vars = {
CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username,
CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password,
CLP_DB_PASS_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].password,
CLP_DB_USER_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].username,
}
container_start_cmd: list[str] = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.container_image_ref, extra_env_vars
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
CLP_DB_USER_ENV_VAR_NAME,
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
CLP_DEFAULT_DATASET_NAME,
ClpDbUserType,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

CLP credential usage is correct; consider a shared helper for DB env construction

Using credentials[ClpDbUserType.CLP].username/password for CLP_DB_USER_ENV_VAR_NAME / CLP_DB_PASS_ENV_VAR_NAME is consistent with the new per‑user credential model and with how you validate/load credentials earlier in this script, so behaviour here looks correct.

Given the identical pattern now exists in multiple wrappers (compress.py, compress_from_s3.py, search.py, dataset_manager.py, decompress.py, and the native scripts), you may want to factor this into a small helper (e.g., in clp_package_utils.general or a tiny DB‑env utility) that takes a Database or ClpConfig and returns the CLP DB env dict. That would reduce duplication and keep future changes to credential handling in one place.

Also applies to: 256-260

🤖 Prompt for AI Agents
components/clp-package-utils/clp_package_utils/scripts/compress.py lines ~14 and
~256-260: the code repeats constructing CLP DB env vars using
credentials[ClpDbUserType.CLP].username/password across multiple scripts;
extract this duplicated logic into a small helper (e.g.,
clp_package_utils.general or clp_package_utils.db_utils) that accepts the
Database/ClpConfig (or credentials mapping) and returns the CLP DB env dict with
keys CLP_DB_USER_ENV_VAR_NAME and CLP_DB_PASS_ENV_VAR_NAME (and any other shared
DB env entries), then replace the inline construction in compress.py (and the
other listed scripts: compress_from_s3.py, search.py, dataset_manager.py,
decompress.py and native scripts) with calls to that helper to centralize
credential handling and reduce duplication.

StorageEngine,
StorageType,
)
Expand Down Expand Up @@ -252,9 +253,10 @@ def main(argv):
logger.error("No filesystem paths given for compression.")
return -1

credentials = clp_config.database.credentials
extra_env_vars = {
CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username,
CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password,
CLP_DB_PASS_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].password,
CLP_DB_USER_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].username,
}
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.container_image_ref, extra_env_vars
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
CLP_DB_USER_ENV_VAR_NAME,
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
CLP_DEFAULT_DATASET_NAME,
ClpDbUserType,
StorageEngine,
StorageType,
)
Expand Down Expand Up @@ -306,9 +307,10 @@ def main(argv):
logger.error("No S3 URLs given for compression.")
return -1

credentials = clp_config.database.credentials
extra_env_vars = {
CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username,
CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password,
CLP_DB_PASS_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].password,
CLP_DB_USER_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].username,
}
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.container_image_ref, extra_env_vars
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
CLP_DB_PASS_ENV_VAR_NAME,
CLP_DB_USER_ENV_VAR_NAME,
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
ClpDbUserType,
StorageEngine,
StorageType,
)
Expand Down Expand Up @@ -155,9 +156,10 @@ def main(argv: list[str]) -> int:
if aws_mount:
necessary_mounts.append(mounts.aws_config_dir)

credentials = clp_config.database.credentials
extra_env_vars = {
CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username,
CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password,
CLP_DB_PASS_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].password,
CLP_DB_USER_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].username,
}
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.container_image_ref, extra_env_vars
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
CLP_DEFAULT_DATASET_NAME,
ClpConfig,
ClpDbUserType,
StorageEngine,
StorageType,
)
Expand Down Expand Up @@ -132,9 +133,10 @@ def handle_extract_file_cmd(
)
)

credentials = clp_config.database.credentials
extra_env_vars = {
CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username,
CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password,
CLP_DB_PASS_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].password,
CLP_DB_USER_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].username,
}
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.container_image_ref, extra_env_vars
Expand Down Expand Up @@ -214,9 +216,10 @@ def handle_extract_stream_cmd(
container_clp_config, clp_config, get_container_config_filename(container_name)
)
necessary_mounts = [mounts.logs_dir]
credentials = clp_config.database.credentials
extra_env_vars = {
CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username,
CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password,
CLP_DB_PASS_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].password,
CLP_DB_USER_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].username,
}
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.container_image_ref, extra_env_vars
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
CLP_DB_USER_ENV_VAR_NAME,
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
ClpConfig,
ClpDbUserType,
Database,
)
from clp_py_utils.clp_metadata_db_utils import get_files_table_name
Expand Down Expand Up @@ -245,10 +246,11 @@ def handle_extract_file_cmd(
"--db-table-prefix", clp_db_connection_params["table_prefix"],
]
# fmt: on
credentials = clp_db_connection_params["credentials"]
extract_env = {
**os.environ,
CLP_DB_USER_ENV_VAR_NAME: clp_db_connection_params["username"],
CLP_DB_PASS_ENV_VAR_NAME: clp_db_connection_params["password"],
CLP_DB_USER_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].username,
CLP_DB_PASS_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].password,
}

files_to_extract_list_path = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
CLP_DB_USER_ENV_VAR_NAME,
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
CLP_DEFAULT_DATASET_NAME,
ClpDbUserType,
StorageEngine,
StorageType,
)
Expand Down Expand Up @@ -134,9 +135,10 @@ def main(argv):
container_clp_config, clp_config, get_container_config_filename(container_name)
)
necessary_mounts = [mounts.logs_dir]
credentials = clp_config.database.credentials
extra_env_vars = {
CLP_DB_USER_ENV_VAR_NAME: clp_config.database.username,
CLP_DB_PASS_ENV_VAR_NAME: clp_config.database.password,
CLP_DB_PASS_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].password,
CLP_DB_USER_ENV_VAR_NAME: credentials[ClpDbUserType.CLP].username,
}
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.container_image_ref, extra_env_vars
Expand Down
96 changes: 70 additions & 26 deletions components/clp-py-utils/clp_py_utils/clp_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
CLP_VERSION_FILE_PATH = pathlib.Path("VERSION")

# Environment variable names
CLP_DB_ROOT_USER_ENV_VAR_NAME = "CLP_DB_ROOT_USER"
CLP_DB_ROOT_PASS_ENV_VAR_NAME = "CLP_DB_ROOT_PASS"
CLP_DB_USER_ENV_VAR_NAME = "CLP_DB_USER"
CLP_DB_PASS_ENV_VAR_NAME = "CLP_DB_PASS"
CLP_QUEUE_USER_ENV_VAR_NAME = "CLP_QUEUE_USER"
Expand Down Expand Up @@ -171,6 +173,16 @@ def validate_query_engine_package_compatibility(self):
return self


class ClpDbUserType(KebabCaseStrEnum):
CLP = auto()
ROOT = auto()


class DbUserCredentials(BaseModel):
username: NonEmptyStr | None = None
password: NonEmptyStr | None = None


class Database(BaseModel):
DEFAULT_PORT: ClassVar[int] = 3306

Expand All @@ -182,15 +194,24 @@ class Database(BaseModel):
auto_commit: bool = False
compress: bool = True

username: NonEmptyStr | None = None
password: NonEmptyStr | None = None
credentials: dict[ClpDbUserType, DbUserCredentials] = {
ClpDbUserType.CLP: DbUserCredentials(),
ClpDbUserType.ROOT: DbUserCredentials(),
}

def ensure_credentials_loaded(self):
if self.username is None or self.password is None:
def ensure_credentials_loaded(self, user_type: ClpDbUserType):
if (
self.credentials[user_type].username is None
or self.credentials[user_type].password is None
):
raise ValueError("Credentials not loaded.")

def get_mysql_connection_params(self, disable_localhost_socket_connection: bool = False):
self.ensure_credentials_loaded()
def get_mysql_connection_params(
self,
disable_localhost_socket_connection: bool = False,
user_type: ClpDbUserType = ClpDbUserType.CLP,
):
self.ensure_credentials_loaded(user_type)

host = self.host
if disable_localhost_socket_connection and "localhost" == self.host:
Expand All @@ -200,8 +221,8 @@ def get_mysql_connection_params(self, disable_localhost_socket_connection: bool
connection_params = {
"host": host,
"port": self.port,
"user": self.username,
"password": self.password,
"user": self.credentials[user_type].username,
"password": self.credentials[user_type].password,
"database": self.name,
"compress": self.compress,
"autocommit": self.auto_commit,
Expand All @@ -210,51 +231,74 @@ def get_mysql_connection_params(self, disable_localhost_socket_connection: bool
connection_params["ssl_cert"] = self.ssl_cert
return connection_params

def get_clp_connection_params_and_type(self, disable_localhost_socket_connection: bool = False):
self.ensure_credentials_loaded()
def get_clp_connection_params_and_type(
self,
disable_localhost_socket_connection: bool = False,
user_type: ClpDbUserType = ClpDbUserType.CLP,
):
self.ensure_credentials_loaded(user_type)

host = self.host
if disable_localhost_socket_connection and "localhost" == self.host:
host = "127.0.0.1"

connection_params_and_type = {
return {
# NOTE: clp-core does not distinguish between mysql and mariadb
"type": DatabaseEngine.MYSQL.value,
"host": host,
"port": self.port,
"username": self.username,
"password": self.password,
"name": self.name,
"table_prefix": CLP_METADATA_TABLE_PREFIX,
"credentials": {user_type: self.credentials[user_type].model_dump()},
"database": self.name,
"compress": self.compress,
"autocommit": self.auto_commit,
"name": self.name,
"type": DatabaseEngine.MYSQL.value,
"table_prefix": CLP_METADATA_TABLE_PREFIX,
}
if self.ssl_cert:
connection_params_and_type["ssl_cert"] = self.ssl_cert
return connection_params_and_type

def dump_to_primitive_dict(self):
d = self.model_dump(exclude={"username", "password"})
d = self.model_dump(exclude={"credentials"})
return d

def load_credentials_from_file(self, credentials_file_path: pathlib.Path):
config = read_yaml_config_file(credentials_file_path)
if config is None:
raise ValueError(f"Credentials file '{credentials_file_path}' is empty.")
try:
self.username = get_config_value(config, f"{DB_COMPONENT_NAME}.username")
self.password = get_config_value(config, f"{DB_COMPONENT_NAME}.password")
self.credentials[ClpDbUserType.CLP].username = get_config_value(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kirkrodrigues

unrelated to the PR - using the get_config_value helper to read a value from a dictionary seems overcomplicating things. why did we create this helper?

Copy link
Member

@junhaoliao junhaoliao Nov 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

briefly discussed offline - the get_config_value value might not be that helpful. we should investigate whether to remove it then

@coderabbitai create an issue to track

config, f"{DB_COMPONENT_NAME}.username"
)
self.credentials[ClpDbUserType.CLP].password = get_config_value(
config, f"{DB_COMPONENT_NAME}.password"
)
self.credentials[ClpDbUserType.ROOT].username = get_config_value(
config, f"{DB_COMPONENT_NAME}.root_username"
)
self.credentials[ClpDbUserType.ROOT].password = get_config_value(
config, f"{DB_COMPONENT_NAME}.root_password"
)
except KeyError as ex:
raise ValueError(
f"Credentials file '{credentials_file_path}' does not contain key '{ex}'."
)

def load_credentials_from_env(self):
def load_credentials_from_env(self, user_type: ClpDbUserType = ClpDbUserType.CLP):
"""
:raise ValueError: if any expected environment variable is not set.
Loads database credentials from environment variables.

:param user_type: User type whose credentials are to be loaded.

:raise ValueError: If any expected environment variable is not set.
"""
self.username = _get_env_var(CLP_DB_USER_ENV_VAR_NAME)
self.password = _get_env_var(CLP_DB_PASS_ENV_VAR_NAME)
match user_type:
case ClpDbUserType.CLP:
user_env_var = CLP_DB_USER_ENV_VAR_NAME
pass_env_var = CLP_DB_PASS_ENV_VAR_NAME
case ClpDbUserType.ROOT:
user_env_var = CLP_DB_ROOT_USER_ENV_VAR_NAME
pass_env_var = CLP_DB_ROOT_PASS_ENV_VAR_NAME

self.credentials[user_type].username = _get_env_var(user_env_var)
self.credentials[user_type].password = _get_env_var(pass_env_var)

def transform_for_container(self):
self.host = DB_COMPONENT_NAME
Expand Down
Loading
Loading