Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 83 additions & 4 deletions src/common/db/Database.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from os import _exit, getenv, sep
from os.path import join as os_join
from pathlib import Path
from re import DOTALL, Match, compile as re_compile, escape, error as RegexError, search
from re import DOTALL, Match, compile as re_compile, escape, error as RegexError, search, sub as re_sub
from sys import argv, path as sys_path
from tarfile import open as tar_open
from threading import Lock
Expand Down Expand Up @@ -207,6 +207,21 @@ def validate_and_update_db_string(db_string: str) -> Tuple[str, Optional[Match]]
sqlalchemy_string, main_match = validate_and_update_db_string(sqlalchemy_string)
sqlalchemy_string_readonly, readonly_match = validate_and_update_db_string(sqlalchemy_string_readonly)

def _mask_db_uri(uri: str) -> str:
"""Best-effort mask password in SQLAlchemy URI for logs."""
if not uri:
return uri
try:
from sqlalchemy.engine.url import make_url

u = make_url(uri)
if u.password:
u = u.set(password="***")
return str(u)
except Exception:
# Fallback: mask :password@ in authority section
return re_sub(r"(//[^:/@]+:)[^@]*@", r"\1***@", uri)
Comment on lines +214 to +223

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

cat -n src/common/db/Database.py | head -230 | tail -30

Repository: bunkerity/bunkerweb

Length of output: 1582


🏁 Script executed:

cat -n src/common/db/Database.py | sed -n '210,300p'

Repository: bunkerity/bunkerweb

Length of output: 4407


🏁 Script executed:

# Check what exceptions make_url can raise
rg "def make_url" --type py -A 5

Repository: bunkerity/bunkerweb

Length of output: 45


🏁 Script executed:

# Check the full function context for _mask_db_uri
ast-grep --pattern 'def _mask_db_uri($$$) {
  $$$
}'

Repository: bunkerity/bunkerweb

Length of output: 45


Catch specific exceptions from SQLAlchemy in helper functions, not Exception.

Lines 221 and 295 catch broad Exception in nested helper functions (_mask_db_uri() and _ssl_expectations()), which hides unexpected parser faults and masks programming errors. These are not process boundaries; they should catch only the exceptions make_url() actually raises.

SQLAlchemy's make_url() raises ArgumentError for invalid URIs. Catch that explicitly, and let other exceptions surface:

except ArgumentError:
    # Fallback: mask :password@ in authority section
    return re_sub(r"(//[^:/@]+:)[^@]*@", r"\1***@", uri)

This aligns with the coding guideline: "Catch specific exceptions; never use bare except:. Catching Exception is acceptable only at explicit process boundaries (scheduler loops, job runners, worker entrypoints, graceful-shutdown boundaries)."

Applies to:

  • Line 221 in _mask_db_uri()
  • Line 295 in _ssl_expectations()
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/common/db/Database.py` around lines 214 - 223, The helper functions
_mask_db_uri and _ssl_expectations currently catch broad Exception around
sqlalchemy.engine.url.make_url(uri); change those handlers to catch SQLAlchemy's
specific ArgumentError instead (import ArgumentError from sqlalchemy.exc) so
only invalid-URI parsing falls back to the existing masking logic and other
unexpected errors propagate; keep the existing fallback masking behavior
unchanged.


self.database_uri = "" if sqlalchemy_string == sqlalchemy_string_readonly else sqlalchemy_string
self.database_uri_readonly = sqlalchemy_string_readonly
error = False
Expand Down Expand Up @@ -261,6 +276,27 @@ def validate_and_update_db_string(db_string: str) -> Tuple[str, Optional[Match]]
current_time = datetime.now().astimezone()
not_connected = True
fallback = False
ssl_ca_hint_logged = False

def _ssl_expectations(uri: str) -> Tuple[bool, bool]:
"""Return (ssl_enabled, has_explicit_root_ca) for a DB URI."""
if not uri:
return False, False
try:
from sqlalchemy.engine.url import make_url

u = make_url(uri)
q = {str(k).lower(): str(v) for k, v in dict(u.query or {}).items()}
ssl_flag = q.get("ssl", "").strip().lower()
sslmode = q.get("sslmode", "").strip().lower()
has_ca = bool((q.get("ssl_ca", "") or q.get("sslrootcert", "")).strip())
ssl_enabled = has_ca or ssl_flag in ("1", "true", "yes", "on") or (sslmode not in ("", "disable", "off", "no", "false", "0"))
return ssl_enabled, has_ca
except Exception:
_lower = uri.lower()
has_ca = ("ssl_ca=" in _lower) or ("sslrootcert=" in _lower)
ssl_enabled = has_ca or ("ssl=true" in _lower) or ("sslmode=" in _lower and "sslmode=disable" not in _lower)
return ssl_enabled, has_ca

while not_connected:
try:
Expand All @@ -275,6 +311,36 @@ def validate_and_update_db_string(db_string: str) -> Tuple[str, Optional[Match]]

not_connected = False
except (OperationalError, DatabaseError) as e:
# Helpful TLS diagnostics: SSL enabled but no CA configured.
# Print once to avoid log spam during retries.
if not ssl_ca_hint_logged:
_uri_for_check = self.database_uri or sqlalchemy_string
ssl_enabled, has_root_ca = _ssl_expectations(_uri_for_check)
err_l = str(e).lower()
looks_like_tls_error = any(
token in err_l
for token in (
"ssl",
"tls",
"certificate",
"cert",
"verify failed",
"unknown ca",
"self signed",
)
)
if ssl_enabled and not has_root_ca and looks_like_tls_error:
self.logger.error(
"Database connection failed with SSL/TLS enabled but no root CA configured. "
"Reason: %s",
e,
)
self.logger.error(
"Set a CA file in DATABASE_URI (ssl_ca=... for MySQL/MariaDB, sslrootcert=... for PostgreSQL), "
"or disable SSL for testing only."
)
Comment on lines +333 to +341

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Use warning/debug for pre-timeout TLS hints.

These lines run inside the retry loop, so a transient startup failure can still emit error-level events even when the next retry succeeds. That is likely to create false-positive startup alerts.

Suggested change
-                        self.logger.error(
+                        self.logger.warning(
                             "Database connection failed with SSL/TLS enabled but no root CA configured. "
                             "Reason: %s",
                             e,
                         )
-                        self.logger.error(
+                        self.logger.warning(
                             "Set a CA file in DATABASE_URI (ssl_ca=... for MySQL/MariaDB, sslrootcert=... for PostgreSQL), "
                             "or disable SSL for testing only."
                         )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
self.logger.error(
"Database connection failed with SSL/TLS enabled but no root CA configured. "
"Reason: %s",
e,
)
self.logger.error(
"Set a CA file in DATABASE_URI (ssl_ca=... for MySQL/MariaDB, sslrootcert=... for PostgreSQL), "
"or disable SSL for testing only."
)
self.logger.warning(
"Database connection failed with SSL/TLS enabled but no root CA configured. "
"Reason: %s",
e,
)
self.logger.warning(
"Set a CA file in DATABASE_URI (ssl_ca=... for MySQL/MariaDB, sslrootcert=... for PostgreSQL), "
"or disable SSL for testing only."
)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/common/db/Database.py` around lines 333 - 341, The TLS hint logs inside
the retry loop currently use self.logger.error and should be lowered to avoid
false-positive alerts; change the two self.logger.error calls in the retrying
connection path (the block that logs "Database connection failed with SSL/TLS
enabled..." and the subsequent instruction about setting ssl_ca/sslrootcert) to
self.logger.warning or self.logger.debug (prefer warning for actionable hints,
debug if very verbose) and ensure only the final failure after exhausting
retries still logs as error; locate these calls in the Database class
connection/retry method (the method that performs the retry loop and currently
calls self.logger.error with the exception variable e) and update them
accordingly.

ssl_ca_hint_logged = True

if (datetime.now().astimezone() - current_time).total_seconds() > DATABASE_RETRY_TIMEOUT:
if not fallback and self.database_uri_readonly:
self.logger.error(f"Can't connect to database after {DATABASE_RETRY_TIMEOUT} seconds. Falling back to read-only database connection")
Expand All @@ -283,7 +349,12 @@ def validate_and_update_db_string(db_string: str) -> Tuple[str, Optional[Match]]
self.readonly = True
fallback = True
continue
self.logger.error(f"Can't connect to database after {DATABASE_RETRY_TIMEOUT} seconds: {e}")
self.logger.error(
"Can't connect to database after %s seconds: %s (DATABASE_URI=%s)",
DATABASE_RETRY_TIMEOUT,
e,
_mask_db_uri(self.database_uri or self.database_uri_readonly),
)
Comment on lines +352 to +357

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Stop logging DATABASE_URI in these paths.

Line 356, Line 373 and Line 380 still emit the connection URI, which the repo rules require you to scrub from logs. They also pick self.database_uri after the read-only fallback because self.database_uri or self.database_uri_readonly always prefers the primary target, so the message can identify the wrong backend during a dual-endpoint failure. Log a non-secret target label plus the backend name instead.

Suggested change
+                    current_uri = self.database_uri_readonly if fallback or self.readonly else (self.database_uri or self.database_uri_readonly)
+                    current_target = "readonly" if fallback or self.readonly else "primary"
+                    current_backend = current_uri.split(":", 1)[0] if current_uri else "unknown"
                     self.logger.error(
-                        "Can't connect to database after %s seconds: %s (DATABASE_URI=%s)",
+                        "Can't connect to %s database target (%s) after %s seconds: %s",
+                        current_target,
+                        current_backend,
                         DATABASE_RETRY_TIMEOUT,
                         e,
-                        _mask_db_uri(self.database_uri or self.database_uri_readonly),
                     )
@@
                     self.logger.warning(
-                        "Can't connect to database (%s). Retrying in 5 seconds ... (DATABASE_URI=%s)",
+                        "Can't connect to %s database target (%s): %s. Retrying in 5 seconds ...",
+                        current_target,
+                        current_backend,
                         e,
-                        _mask_db_uri(self.database_uri or self.database_uri_readonly),
                     )
@@
                 self.logger.error(
-                    "Error when trying to connect to the database: %s (DATABASE_URI=%s)",
+                    "Error when trying to connect to the %s database target (%s): %s",
+                    current_target,
+                    current_backend,
                     e,
-                    _mask_db_uri(self.database_uri or self.database_uri_readonly),
                 )

As per coding guidelines, "Scrub secrets, tokens, cookies, database URIs, and Authorization headers from logs. Use the logging framework rather than print()."

Also applies to: 370-381

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/common/db/Database.py` around lines 352 - 357, The error logs in Database
(self.logger.error calls that currently include DATABASE_URI) must stop emitting
the full connection URI and must not prefer self.database_uri over the readonly
URI when reporting which target failed; update each logger.error (the ones after
retry failures using DATABASE_RETRY_TIMEOUT and in the dual-endpoint failure
paths) to omit the URI and instead log a non-secret target label plus the
backend name (use a clear label like "primary" or "readonly" derived from
whether the attempt was against self.database_uri or self.database_uri_readonly,
and include self.database_backend or similar backend identifier). Ensure the
message uses DATABASE_RETRY_TIMEOUT and the caught exception e, but replace any
_mask_db_uri(...) or direct URI interpolation with the non-secret target label
and backend name.

_exit(1)

if any(error in str(e) for error in self.READONLY_ERROR):
Expand All @@ -296,10 +367,18 @@ def validate_and_update_db_string(db_string: str) -> Tuple[str, Optional[Match]]
not_connected = False
continue
elif log:
self.logger.warning("Can't connect to database, retrying in 5 seconds ...")
self.logger.warning(
"Can't connect to database (%s). Retrying in 5 seconds ... (DATABASE_URI=%s)",
e,
_mask_db_uri(self.database_uri or self.database_uri_readonly),
)
sleep(5)
except BaseException as e:
self.logger.error(f"Error when trying to connect to the database: {e}")
self.logger.error(
"Error when trying to connect to the database: %s (DATABASE_URI=%s)",
e,
_mask_db_uri(self.database_uri or self.database_uri_readonly),
)
exit(1)

if log:
Expand Down
Loading