Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 125 additions & 9 deletions bbot/core/helpers/web/web.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import json
import logging
import re
import warnings
from pathlib import Path
from bs4 import BeautifulSoup
import ipaddress

from bbot.core.engine import EngineClient
from bbot.core.helpers.misc import truncate_filename
Expand Down Expand Up @@ -319,12 +322,12 @@ async def curl(self, *args, **kwargs):
method (str, optional): The HTTP method to use for the request (e.g., 'GET', 'POST').
cookies (dict, optional): A dictionary of cookies to include in the request.
path_override (str, optional): Overrides the request-target to use in the HTTP request line.
head_mode (bool, optional): If True, includes '-I' to fetch headers only. Defaults to None.
raw_body (str, optional): Raw string to be sent in the body of the request.
resolve (dict, optional): Host resolution override as dict with 'host', 'port', 'ip' keys for curl --resolve.
**kwargs: Arbitrary keyword arguments that will be forwarded to the HTTP request function.

Returns:
str: The output of the cURL command.
dict: JSON object with response data and metadata.

Raises:
CurlError: If 'url' is not supplied.
Expand All @@ -338,7 +341,11 @@ async def curl(self, *args, **kwargs):
if not url:
raise CurlError("No URL supplied to CURL helper")

curl_command = ["curl", url, "-s"]
# Use BBOT-specific curl binary
bbot_curl = self.parent_helper.tools_dir / "curl"
if not bbot_curl.exists():
raise CurlError(f"BBOT curl binary not found at {bbot_curl}. Run dependency installation.")
curl_command = [str(bbot_curl), url, "-s"]

raw_path = kwargs.get("raw_path", False)
if raw_path:
Expand Down Expand Up @@ -382,6 +389,12 @@ async def curl(self, *args, **kwargs):
curl_command.append("-m")
curl_command.append(str(timeout))

# mirror the web helper behavior
retries = self.parent_helper.web_config.get("http_retries", 1)
if retries > 0:
curl_command.extend(["--retry", str(retries)])
curl_command.append("--retry-all-errors")

for k, v in headers.items():
if isinstance(v, list):
for x in v:
Expand Down Expand Up @@ -418,17 +431,120 @@ async def curl(self, *args, **kwargs):
curl_command.append("--request-target")
curl_command.append(f"{path_override}")

head_mode = kwargs.get("head_mode", None)
if head_mode:
curl_command.append("-I")

raw_body = kwargs.get("raw_body", None)
if raw_body:
curl_command.append("-d")
curl_command.append(raw_body)
log.verbose(f"Running curl command: {curl_command}")

# --resolve <host>:<port>:<ip>
resolve_dict = kwargs.get("resolve", None)

if resolve_dict is not None:
# Validate "resolve" is a dict
if not isinstance(resolve_dict, dict):
raise CurlError("'resolve' must be a dictionary containing 'host', 'port', and 'ip' keys")

# Extract and validate IP (required)
ip = resolve_dict.get("ip")
if not ip:
raise CurlError("'resolve' dictionary requires an 'ip' value")
try:
ipaddress.ip_address(ip)
except ValueError:
raise CurlError(f"Invalid IP address supplied to 'resolve': {ip}")

# Host, port, and ip must ALL be supplied explicitly
host = resolve_dict.get("host")
if not host:
raise CurlError("'resolve' dictionary requires a 'host' value")

if "port" not in resolve_dict:
raise CurlError("'resolve' dictionary requires a 'port' value")
port = resolve_dict["port"]

try:
port = int(port)
except (TypeError, ValueError):
raise CurlError("'port' supplied to resolve must be an integer")
if port < 1 or port > 65535:
raise CurlError("'port' supplied to resolve must be between 1 and 65535")

# Append the --resolve directive
curl_command.append("--resolve")
curl_command.append(f"{host}:{port}:{ip}")

# Always add JSON --write-out format with separator and capture headers
curl_command.extend(["-D", "-", "-w", "\\n---CURL_METADATA---\\n%{json}"])

log.debug(f"Running curl command: {curl_command}")
output = (await self.parent_helper.run(curl_command)).stdout
return output

# Parse the output to separate headers, content, and metadata
parts = output.split("\n---CURL_METADATA---\n")

# Raise CurlError if separator not found - this indicates a problem with our curl implementation
if len(parts) < 2:
raise CurlError(f"Curl output missing expected separator. Got: {output[:200]}...")

# Headers and content are in the first part, JSON metadata is in the last part
header_content = parts[0]
json_data = parts[-1].strip()

# Split headers from content
header_lines = []
content_lines = []
in_headers = True

for line in header_content.split("\n"):
if in_headers:
if line.strip() == "":
in_headers = False
else:
header_lines.append(line)
else:
content_lines.append(line)

# Parse headers into dictionary
headers_dict = {}
raw_headers = "\n".join(header_lines)

for line in header_lines:
if ":" in line:
key, value = line.split(":", 1)
key = key.strip().lower()
value = value.strip()

# Convert hyphens to underscores to match httpx (projectdiscovery) format
# This ensures consistency with how other modules expect headers
normalized_key = key.replace("-", "_")

if normalized_key in headers_dict:
if isinstance(headers_dict[normalized_key], list):
headers_dict[normalized_key].append(value)
else:
headers_dict[normalized_key] = [headers_dict[normalized_key], value]
else:
headers_dict[normalized_key] = value

response_data = "\n".join(content_lines)

# Raise CurlError if JSON parsing fails - this indicates a problem with curl's %{json} output
try:
metadata = json.loads(json_data)
except json.JSONDecodeError as e:
# Try to fix common malformed JSON issues from curl output
try:
# Fix empty values like "certs":, -> "certs":null,
fixed_json = re.sub(r':"?\s*,', ":null,", json_data)
# Fix trailing commas before closing braces
fixed_json = re.sub(r",\s*}", "}", fixed_json)
metadata = json.loads(fixed_json)
log.debug(f"Fixed malformed JSON from curl: {json_data[:100]}... -> {fixed_json[:100]}...")
except json.JSONDecodeError:
raise CurlError(f"Failed to parse curl JSON metadata: {e}. JSON data: {json_data[:200]}...")

# Combine into final JSON structure
return {"response_data": response_data, "headers": headers_dict, "raw_headers": raw_headers, **metadata}

def beautifulsoup(
self,
Expand Down
25 changes: 25 additions & 0 deletions bbot/core/shared_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,31 @@
},
]

DEP_CURL = [
{
"name": "Download static curl binary (v8.11.0)",
"get_url": {
"url": "https://github.com/moparisthebest/static-curl/releases/download/v8.11.0/curl-amd64",
"dest": "#{BBOT_TOOLS}/curl",
"mode": "0755",
"force": True,
},
},
{
"name": "Ensure curl binary is executable",
"file": {
"path": "#{BBOT_TOOLS}/curl",
"mode": "0755",
},
},
{
"name": "Verify curl binary works",
"command": "#{BBOT_TOOLS}/curl --version",
"register": "curl_version_output",
"changed_when": False,
},
]

DEP_MASSCAN = [
{
"name": "install os deps (Debian)",
Expand Down
8 changes: 5 additions & 3 deletions bbot/modules/generic_ssrf.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class BaseSubmodule:
severity = "INFO"
paths = []

deps_common = ["curl"]

def __init__(self, generic_ssrf):
self.generic_ssrf = generic_ssrf
self.test_paths = self.create_paths()
Expand All @@ -61,7 +63,7 @@ async def test(self, event):
self.generic_ssrf.debug(f"Sending request to URL: {test_url}")
r = await self.generic_ssrf.helpers.curl(url=test_url)
if r:
self.process(event, r, subdomain_tag)
self.process(event, r["response_data"], subdomain_tag)

def process(self, event, r, subdomain_tag):
response_token = self.generic_ssrf.interactsh_domain.split(".")[0][::-1]
Expand Down Expand Up @@ -123,7 +125,7 @@ async def test(self, event):

for tag, pd in post_data_list:
r = await self.generic_ssrf.helpers.curl(url=test_url, method="POST", post_data=pd)
self.process(event, r, tag)
self.process(event, r["response_data"], tag)


class Generic_XXE(BaseSubmodule):
Expand All @@ -146,7 +148,7 @@ async def test(self, event):
url=test_url, method="POST", raw_body=post_body, headers={"Content-type": "application/xml"}
)
if r:
self.process(event, r, subdomain_tag)
self.process(event, r["response_data"], subdomain_tag)


class generic_ssrf(BaseModule):
Expand Down
11 changes: 5 additions & 6 deletions bbot/modules/host_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class host_header(BaseModule):
in_scope_only = True
per_hostport_only = True

deps_apt = ["curl"]
deps_common = ["curl"]

async def setup(self):
self.subdomain_tags = {}
Expand Down Expand Up @@ -106,7 +106,7 @@ async def handle_event(self, event):
ignore_bbot_global_settings=True,
cookies=added_cookies,
)
if self.domain in output:
if self.domain in output["response_data"]:
domain_reflections.append(technique_description)

# absolute URL / Host header transposition
Expand All @@ -120,7 +120,7 @@ async def handle_event(self, event):
cookies=added_cookies,
)

if self.domain in output:
if self.domain in output["response_data"]:
domain_reflections.append(technique_description)

# duplicate host header tolerance
Expand All @@ -131,10 +131,9 @@ async def handle_event(self, event):
# The fact that it's accepting two host headers is rare enough to note on its own, and not too noisy. Having the 3rd header be an interactsh would result in false negatives for the slightly less interesting cases.
headers={"Host": ["", str(event.host), str(event.host)]},
cookies=added_cookies,
head_mode=True,
)

split_output = output.split("\n")
split_output = output["raw_headers"].split("\n")
if " 4" in split_output:
description = "Duplicate Host Header Tolerated"
await self.emit_event(
Expand Down Expand Up @@ -173,7 +172,7 @@ async def handle_event(self, event):
headers=override_headers,
cookies=added_cookies,
)
if self.domain in output:
if self.domain in output["response_data"]:
domain_reflections.append(technique_description)

# emit all the domain reflections we found
Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/output/web_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class web_report(BaseOutputModule):
watched_events = ["URL", "TECHNOLOGY", "FINDING", "VULNERABILITY"]
watched_events = ["URL", "TECHNOLOGY", "FINDING", "VULNERABILITY", "VIRTUAL_HOST"]
meta = {
"description": "Create a markdown report with web assets",
"created_date": "2023-02-08",
Expand Down
Loading
Loading