Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 40 additions & 22 deletions sherlock_project/sherlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,30 +401,48 @@ def sherlock(
query_status = QueryStatus.UNKNOWN
else:
if "message" in error_type:
# error_flag True denotes no error found in the HTML
# error_flag False denotes error found in the HTML
error_flag = True
errors = net_info.get("errorMsg")
# errors will hold the error message
# it can be string or list
# by isinstance method we can detect that
# and handle the case for strings as normal procedure
# and if its list we can iterate the errors
if isinstance(errors, str):
# Checks if the error message is in the HTML
# if error is present we will set flag to False
if errors in r.text:
error_flag = False
# If the server returns a blocking or error status (common when a
# WAF or similar is in front of the site), treat it as WAF so we
# don't incorrectly return CLAIMED just because the response body
# doesn't include the configured error message.
#
# This addresses cases like Giphy where both existing and
# non-existing pages can return 403 and an empty body.
try:
status_code_val = int(http_status)
except Exception:
status_code_val = None

if status_code_val in (403, 429, 503):
# Common codes indicating blocking / rate limiting / service unavailable.
# Mark as WAF so the caller knows the probe couldn't determine existence.
query_status = QueryStatus.WAF
else:
# If it's list, it will iterate all the error message
for error in errors:
if error in r.text:
# error_flag True denotes no "error_message" found in the HTML
# error_flag False denotes the configured error message is present
# (meaning the username is AVAILABLE).
error_flag = True
errors = net_info.get("errorMsg")
# errors will hold the error message
# it can be string or list
# by isinstance method we can detect that
# and handle the case for strings as normal procedure
# and if its list we can iterate the errors
if isinstance(errors, str):
# Checks if the error message is in the HTML
# if error is present we will set flag to False
if errors in r.text:
error_flag = False
break
if error_flag:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
else:
# If it's list, it will iterate all the error message
for error in errors:
if error in r.text:
error_flag = False
break
if error_flag:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE

if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
error_codes = net_info.get("errorCode")
Expand Down
50 changes: 50 additions & 0 deletions tests/test_giphy_blocking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import json
import os

from sherlock_project.sherlock import sherlock, SherlockFuturesSession
from sherlock_project.notify import QueryNotify
from sherlock_project.result import QueryStatus


class DummyResponse:
def __init__(self, status_code: int, text: str = "", encoding: str = "utf-8"):
self.status_code = status_code
self.text = text
self.encoding = encoding
self.elapsed = 0.0


class DummyFuture:
def __init__(self, response):
self._response = response

def result(self):
return self._response


def load_giphy_manifest():
base = os.path.dirname(os.path.dirname(__file__))
data_file = os.path.join(base, "sherlock_project", "resources", "data.json")
with open(data_file, "r", encoding="utf-8") as f:
data = json.load(f)
return data["Giphy"].copy()


def test_giphy_blocking_marked_waf(monkeypatch):
giphy = load_giphy_manifest()
assert giphy.get("errorType") == "message"

site_data = {"Giphy": giphy}

def fake_get(self, *args, **kwargs):
resp = DummyResponse(status_code=403, text="")
return DummyFuture(resp)

monkeypatch.setattr(SherlockFuturesSession, "get", fake_get)

qn = QueryNotify()
results = sherlock(username="doesNotExist", site_data=site_data, query_notify=qn)

assert "Giphy" in results
status = results["Giphy"]["status"].status
assert status is QueryStatus.WAF, f"Expected Giphy to be marked WAF on 403+empty body, got {status}"