From 099cf9d1ef7283bff2cc7b4bc647b62477619416 Mon Sep 17 00:00:00 2001 From: ausarkhan Date: Fri, 31 Oct 2025 22:28:01 +0000 Subject: [PATCH] Treat 403/429/503 as WAF for 'message' detection to avoid false CLAIMED results (fixes #2734) --- sherlock_project/sherlock.py | 62 +++++++++++++++++++++++------------- tests/test_giphy_blocking.py | 50 +++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 22 deletions(-) create mode 100644 tests/test_giphy_blocking.py diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index f78d4b8ca..ea73faa7f 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -401,30 +401,48 @@ def sherlock( query_status = QueryStatus.UNKNOWN else: if "message" in error_type: - # error_flag True denotes no error found in the HTML - # error_flag False denotes error found in the HTML - error_flag = True - errors = net_info.get("errorMsg") - # errors will hold the error message - # it can be string or list - # by isinstance method we can detect that - # and handle the case for strings as normal procedure - # and if its list we can iterate the errors - if isinstance(errors, str): - # Checks if the error message is in the HTML - # if error is present we will set flag to False - if errors in r.text: - error_flag = False + # If the server returns a blocking or error status (common when a + # WAF or similar is in front of the site), treat it as WAF so we + # don't incorrectly return CLAIMED just because the response body + # doesn't include the configured error message. + # + # This addresses cases like Giphy where both existing and + # non-existing pages can return 403 and an empty body. + try: + status_code_val = int(http_status) + except Exception: + status_code_val = None + + if status_code_val in (403, 429, 503): + # Common codes indicating blocking / rate limiting / service unavailable. + # Mark as WAF so the caller knows the probe couldn't determine existence. + query_status = QueryStatus.WAF else: - # If it's list, it will iterate all the error message - for error in errors: - if error in r.text: + # error_flag True denotes no "error_message" found in the HTML + # error_flag False denotes the configured error message is present + # (meaning the username is AVAILABLE). + error_flag = True + errors = net_info.get("errorMsg") + # errors will hold the error message + # it can be string or list + # by isinstance method we can detect that + # and handle the case for strings as normal procedure + # and if its list we can iterate the errors + if isinstance(errors, str): + # Checks if the error message is in the HTML + # if error is present we will set flag to False + if errors in r.text: error_flag = False - break - if error_flag: - query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE + else: + # If it's list, it will iterate all the error message + for error in errors: + if error in r.text: + error_flag = False + break + if error_flag: + query_status = QueryStatus.CLAIMED + else: + query_status = QueryStatus.AVAILABLE if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: error_codes = net_info.get("errorCode") diff --git a/tests/test_giphy_blocking.py b/tests/test_giphy_blocking.py new file mode 100644 index 000000000..4d5868021 --- /dev/null +++ b/tests/test_giphy_blocking.py @@ -0,0 +1,50 @@ +import json +import os + +from sherlock_project.sherlock import sherlock, SherlockFuturesSession +from sherlock_project.notify import QueryNotify +from sherlock_project.result import QueryStatus + + +class DummyResponse: + def __init__(self, status_code: int, text: str = "", encoding: str = "utf-8"): + self.status_code = status_code + self.text = text + self.encoding = encoding + self.elapsed = 0.0 + + +class DummyFuture: + def __init__(self, response): + self._response = response + + def result(self): + return self._response + + +def load_giphy_manifest(): + base = os.path.dirname(os.path.dirname(__file__)) + data_file = os.path.join(base, "sherlock_project", "resources", "data.json") + with open(data_file, "r", encoding="utf-8") as f: + data = json.load(f) + return data["Giphy"].copy() + + +def test_giphy_blocking_marked_waf(monkeypatch): + giphy = load_giphy_manifest() + assert giphy.get("errorType") == "message" + + site_data = {"Giphy": giphy} + + def fake_get(self, *args, **kwargs): + resp = DummyResponse(status_code=403, text="") + return DummyFuture(resp) + + monkeypatch.setattr(SherlockFuturesSession, "get", fake_get) + + qn = QueryNotify() + results = sherlock(username="doesNotExist", site_data=site_data, query_notify=qn) + + assert "Giphy" in results + status = results["Giphy"]["status"].status + assert status is QueryStatus.WAF, f"Expected Giphy to be marked WAF on 403+empty body, got {status}"