Skip to content

Commit f63874a

Browse files
test(cli): fix environment variable isolation in CLI utils tests
Add clear=True to mock.patch.dict calls to properly isolate environment variables in tests. Without this, tests fail when DATAHUB_* environment variables are set in the user's shell.
1 parent 7bd7401 commit f63874a

File tree

4 files changed

+403
-19
lines changed

4 files changed

+403
-19
lines changed

metadata-ingestion/tests/unit/cli/test_cli_utils.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@ def test_first_non_null():
1515
assert cli_utils.first_non_null([" ", "1", "2"]) == "1"
1616

1717

18-
@mock.patch.dict(os.environ, {"DATAHUB_GMS_HOST": "http://localhost:9092"})
18+
@mock.patch.dict(os.environ, {"DATAHUB_GMS_HOST": "http://localhost:9092"}, clear=True)
1919
def test_correct_url_when_gms_host_in_old_format():
2020
assert _get_config_from_env() == ("http://localhost:9092", None)
2121

2222

2323
@mock.patch.dict(
24-
os.environ, {"DATAHUB_GMS_HOST": "localhost", "DATAHUB_GMS_PORT": "8080"}
24+
os.environ,
25+
{"DATAHUB_GMS_HOST": "localhost", "DATAHUB_GMS_PORT": "8080"},
26+
clear=True,
2527
)
2628
def test_correct_url_when_gms_host_and_port_set():
2729
assert _get_config_from_env() == ("http://localhost:8080", None)
@@ -34,6 +36,7 @@ def test_correct_url_when_gms_host_and_port_set():
3436
"DATAHUB_GMS_HOST": "localhost",
3537
"DATAHUB_GMS_PORT": "8080",
3638
},
39+
clear=True,
3740
)
3841
def test_correct_url_when_gms_host_port_url_set():
3942
assert _get_config_from_env() == ("http://localhost:8080", None)
@@ -47,6 +50,7 @@ def test_correct_url_when_gms_host_port_url_set():
4750
"DATAHUB_GMS_PORT": "8080",
4851
"DATAHUB_GMS_PROTOCOL": "https",
4952
},
53+
clear=True,
5054
)
5155
def test_correct_url_when_gms_host_port_url_protocol_set():
5256
assert _get_config_from_env() == ("https://localhost:8080", None)
@@ -57,6 +61,7 @@ def test_correct_url_when_gms_host_port_url_protocol_set():
5761
{
5862
"DATAHUB_GMS_URL": "https://example.com",
5963
},
64+
clear=True,
6065
)
6166
def test_correct_url_when_url_set():
6267
assert _get_config_from_env() == ("https://example.com", None)

metadata-ingestion/tests/unit/config/test_nested_secrets.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,5 +251,141 @@ class UnityCatalogConnectionConfig(ConfigModel):
251251
)
252252

253253

254+
class TestConfigLoggingMasking:
255+
"""Test that SecretStr fields are masked when configs are logged."""
256+
257+
def setup_method(self):
258+
from datahub.masking.bootstrap import (
259+
initialize_secret_masking,
260+
shutdown_secret_masking,
261+
)
262+
263+
shutdown_secret_masking()
264+
SecretRegistry.reset_instance()
265+
initialize_secret_masking(force=True)
266+
267+
def teardown_method(self):
268+
from datahub.masking.bootstrap import shutdown_secret_masking
269+
270+
shutdown_secret_masking()
271+
SecretRegistry.reset_instance()
272+
273+
def test_config_secrets_masked_in_logs(self):
274+
"""Test that SecretStr fields are masked when config is logged."""
275+
import logging
276+
277+
from datahub.masking.masking_filter import SecretMaskingFilter
278+
279+
class TestSourceConfig(ConfigModel):
280+
password: SecretStr = Field(description="Database password")
281+
api_key: SecretStr = Field(description="API key")
282+
host: str = Field(description="Database host")
283+
284+
# Create config with secrets
285+
config = TestSourceConfig(
286+
password="my-secret-password", api_key="my-api-key", host="my-host"
287+
)
288+
289+
# Verify secrets are registered
290+
registry = SecretRegistry.get_instance()
291+
assert registry.has_secret("password")
292+
assert registry.has_secret("api_key")
293+
294+
# Get the masking filter
295+
masking_filter = SecretMaskingFilter(registry)
296+
297+
# Create log records with secrets and verify they're masked
298+
test_cases = [
299+
(f"Password is: {config.password.get_secret_value()}", "password"),
300+
(f"API key is: {config.api_key.get_secret_value()}", "api_key"),
301+
(f"Host is: {config.host}", None), # Should not be masked
302+
]
303+
304+
for message, secret_name in test_cases:
305+
record = logging.LogRecord(
306+
name="test",
307+
level=logging.INFO,
308+
pathname="",
309+
lineno=0,
310+
msg=message,
311+
args=(),
312+
exc_info=None,
313+
)
314+
315+
# Apply filter
316+
masking_filter.filter(record)
317+
318+
# Check result
319+
masked_message = record.getMessage()
320+
321+
if secret_name:
322+
# Secret should be masked
323+
assert "my-secret-password" not in masked_message, (
324+
f"Password not masked: {masked_message}"
325+
)
326+
assert "my-api-key" not in masked_message, (
327+
f"API key not masked: {masked_message}"
328+
)
329+
assert f"***REDACTED:{secret_name}***" in masked_message, (
330+
f"Redaction marker not found: {masked_message}"
331+
)
332+
else:
333+
# Host should not be masked
334+
assert "my-host" in masked_message
335+
336+
def test_config_secrets_masked_at_all_log_levels(self):
337+
"""Test that SecretStr fields are masked at all log levels (DEBUG, INFO, WARNING, ERROR)."""
338+
import logging
339+
340+
from datahub.masking.masking_filter import SecretMaskingFilter
341+
342+
class TestSourceConfig(ConfigModel):
343+
password: SecretStr = Field(description="Database password")
344+
345+
config = TestSourceConfig(password="debug-secret-password")
346+
347+
# Verify secret is registered
348+
registry = SecretRegistry.get_instance()
349+
assert registry.has_secret("password")
350+
351+
# Get the masking filter
352+
masking_filter = SecretMaskingFilter(registry)
353+
354+
# Test all log levels
355+
log_levels = [
356+
(logging.DEBUG, "DEBUG"),
357+
(logging.INFO, "INFO"),
358+
(logging.WARNING, "WARNING"),
359+
(logging.ERROR, "ERROR"),
360+
(logging.CRITICAL, "CRITICAL"),
361+
]
362+
363+
for level, level_name in log_levels:
364+
record = logging.LogRecord(
365+
name="test",
366+
level=level,
367+
pathname="",
368+
lineno=0,
369+
msg=f"[{level_name}] Password: {config.password.get_secret_value()}",
370+
args=(),
371+
exc_info=None,
372+
)
373+
374+
# Apply filter
375+
masking_filter.filter(record)
376+
377+
# Check result
378+
masked_message = record.getMessage()
379+
380+
# Secret should be masked at ALL log levels
381+
assert "debug-secret-password" not in masked_message, (
382+
f"Password not masked at {level_name} level: {masked_message}"
383+
)
384+
assert "***REDACTED:password***" in masked_message, (
385+
f"Redaction marker not found at {level_name} level: {masked_message}"
386+
)
387+
assert level_name in masked_message # Level name should still be there
388+
389+
254390
if __name__ == "__main__":
255391
pytest.main([__file__, "-v"])

0 commit comments

Comments
 (0)