blacklanternsecurity · liquidsec · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025
diff --git a/bbot/core/helpers/web/web.py b/bbot/core/helpers/web/web.py
@@ -1,7 +1,10 @@
+import json
 import logging
+import re
 import warnings
 from pathlib import Path
 from bs4 import BeautifulSoup
+import ipaddress
 
 from bbot.core.engine import EngineClient
 from bbot.core.helpers.misc import truncate_filename
@@ -319,12 +322,12 @@ async def curl(self, *args, **kwargs):
             method (str, optional): The HTTP method to use for the request (e.g., 'GET', 'POST').
             cookies (dict, optional): A dictionary of cookies to include in the request.
             path_override (str, optional): Overrides the request-target to use in the HTTP request line.
-            head_mode (bool, optional): If True, includes '-I' to fetch headers only. Defaults to None.
             raw_body (str, optional): Raw string to be sent in the body of the request.
+            resolve (dict, optional): Host resolution override as dict with 'host', 'port', 'ip' keys for curl --resolve.
             **kwargs: Arbitrary keyword arguments that will be forwarded to the HTTP request function.
 
         Returns:
-            str: The output of the cURL command.
+            dict: JSON object with response data and metadata.
 
         Raises:
             CurlError: If 'url' is not supplied.
@@ -338,7 +341,11 @@ async def curl(self, *args, **kwargs):
         if not url:
             raise CurlError("No URL supplied to CURL helper")
 
-        curl_command = ["curl", url, "-s"]
+        # Use BBOT-specific curl binary
+        bbot_curl = self.parent_helper.tools_dir / "curl"
+        if not bbot_curl.exists():
+            raise CurlError(f"BBOT curl binary not found at {bbot_curl}. Run dependency installation.")
+        curl_command = [str(bbot_curl), url, "-s"]
 
         raw_path = kwargs.get("raw_path", False)
         if raw_path:
@@ -382,6 +389,12 @@ async def curl(self, *args, **kwargs):
         curl_command.append("-m")
         curl_command.append(str(timeout))
 
+        # mirror the web helper behavior
+        retries = self.parent_helper.web_config.get("http_retries", 1)
+        if retries > 0:
+            curl_command.extend(["--retry", str(retries)])
+            curl_command.append("--retry-all-errors")
+
         for k, v in headers.items():
             if isinstance(v, list):
                 for x in v:
@@ -418,17 +431,120 @@ async def curl(self, *args, **kwargs):
             curl_command.append("--request-target")
             curl_command.append(f"{path_override}")
 
-        head_mode = kwargs.get("head_mode", None)
-        if head_mode:
-            curl_command.append("-I")
-
         raw_body = kwargs.get("raw_body", None)
         if raw_body:
             curl_command.append("-d")
             curl_command.append(raw_body)
-        log.verbose(f"Running curl command: {curl_command}")
+
+        # --resolve <host>:<port>:<ip>
+        resolve_dict = kwargs.get("resolve", None)
+
+        if resolve_dict is not None:
+            # Validate "resolve" is a dict
+            if not isinstance(resolve_dict, dict):
+                raise CurlError("'resolve' must be a dictionary containing 'host', 'port', and 'ip' keys")
+
+            # Extract and validate IP (required)
+            ip = resolve_dict.get("ip")
+            if not ip:
+                raise CurlError("'resolve' dictionary requires an 'ip' value")
+            try:
+                ipaddress.ip_address(ip)
+            except ValueError:
+                raise CurlError(f"Invalid IP address supplied to 'resolve': {ip}")
+
+            # Host, port, and ip must ALL be supplied explicitly
+            host = resolve_dict.get("host")
+            if not host:
+                raise CurlError("'resolve' dictionary requires a 'host' value")
+
+            if "port" not in resolve_dict:
+                raise CurlError("'resolve' dictionary requires a 'port' value")
+            port = resolve_dict["port"]
+
+            try:
+                port = int(port)
+            except (TypeError, ValueError):
+                raise CurlError("'port' supplied to resolve must be an integer")
+            if port < 1 or port > 65535:
+                raise CurlError("'port' supplied to resolve must be between 1 and 65535")
+
+            # Append the --resolve directive
+            curl_command.append("--resolve")
+            curl_command.append(f"{host}:{port}:{ip}")
+
+        # Always add JSON --write-out format with separator and capture headers
+        curl_command.extend(["-D", "-", "-w", "\\n---CURL_METADATA---\\n%{json}"])
+
+        log.debug(f"Running curl command: {curl_command}")
         output = (await self.parent_helper.run(curl_command)).stdout
-        return output
+
+        # Parse the output to separate headers, content, and metadata
+        parts = output.split("\n---CURL_METADATA---\n")
+
+        # Raise CurlError if separator not found - this indicates a problem with our curl implementation
+        if len(parts) < 2:
+            raise CurlError(f"Curl output missing expected separator. Got: {output[:200]}...")
+
+        # Headers and content are in the first part, JSON metadata is in the last part
+        header_content = parts[0]
+        json_data = parts[-1].strip()
+
+        # Split headers from content
+        header_lines = []
+        content_lines = []
+        in_headers = True
+
+        for line in header_content.split("\n"):
+            if in_headers:
+                if line.strip() == "":
+                    in_headers = False
+                else:
+                    header_lines.append(line)
+            else:
+                content_lines.append(line)
+
+        # Parse headers into dictionary
+        headers_dict = {}
+        raw_headers = "\n".join(header_lines)
+
+        for line in header_lines:
+            if ":" in line:
+                key, value = line.split(":", 1)
+                key = key.strip().lower()
+                value = value.strip()
+
+                # Convert hyphens to underscores to match httpx (projectdiscovery) format
+                # This ensures consistency with how other modules expect headers
+                normalized_key = key.replace("-", "_")
+
+                if normalized_key in headers_dict:
+                    if isinstance(headers_dict[normalized_key], list):
+                        headers_dict[normalized_key].append(value)
+                    else:
+                        headers_dict[normalized_key] = [headers_dict[normalized_key], value]
+                else:
+                    headers_dict[normalized_key] = value
+
+        response_data = "\n".join(content_lines)
+
+        # Raise CurlError if JSON parsing fails - this indicates a problem with curl's %{json} output
+        try:
+            metadata = json.loads(json_data)
+        except json.JSONDecodeError as e:
+            # Try to fix common malformed JSON issues from curl output
+            try:
+                # Fix empty values like "certs":, -> "certs":null,
+                fixed_json = re.sub(r':"?\s*,', ":null,", json_data)
+                # Fix trailing commas before closing braces
+                fixed_json = re.sub(r",\s*}", "}", fixed_json)
+                metadata = json.loads(fixed_json)
+                log.debug(f"Fixed malformed JSON from curl: {json_data[:100]}... -> {fixed_json[:100]}...")
+            except json.JSONDecodeError:
+                raise CurlError(f"Failed to parse curl JSON metadata: {e}. JSON data: {json_data[:200]}...")
+
+        # Combine into final JSON structure
+        return {"response_data": response_data, "headers": headers_dict, "raw_headers": raw_headers, **metadata}
 
     def beautifulsoup(
         self,

diff --git a/bbot/core/shared_deps.py b/bbot/core/shared_deps.py
@@ -173,6 +173,31 @@
     },
 ]
 
+DEP_CURL = [
+    {
+        "name": "Download static curl binary (v8.11.0)",
+        "get_url": {
+            "url": "https://github.com/moparisthebest/static-curl/releases/download/v8.11.0/curl-amd64",
+            "dest": "#{BBOT_TOOLS}/curl",
+            "mode": "0755",
+            "force": True,
+        },
+    },
+    {
+        "name": "Ensure curl binary is executable",
+        "file": {
+            "path": "#{BBOT_TOOLS}/curl",
+            "mode": "0755",
+        },
+    },
+    {
+        "name": "Verify curl binary works",
+        "command": "#{BBOT_TOOLS}/curl --version",
+        "register": "curl_version_output",
+        "changed_when": False,
+    },
+]
+
 DEP_MASSCAN = [
     {
         "name": "install os deps (Debian)",

diff --git a/bbot/modules/generic_ssrf.py b/bbot/modules/generic_ssrf.py
@@ -39,6 +39,8 @@ class BaseSubmodule:
     severity = "INFO"
     paths = []
 
+    deps_common = ["curl"]
+
     def __init__(self, generic_ssrf):
         self.generic_ssrf = generic_ssrf
         self.test_paths = self.create_paths()
@@ -61,7 +63,7 @@ async def test(self, event):
                 self.generic_ssrf.debug(f"Sending request to URL: {test_url}")
                 r = await self.generic_ssrf.helpers.curl(url=test_url)
                 if r:
-                    self.process(event, r, subdomain_tag)
+                    self.process(event, r["response_data"], subdomain_tag)
 
     def process(self, event, r, subdomain_tag):
         response_token = self.generic_ssrf.interactsh_domain.split(".")[0][::-1]
@@ -123,7 +125,7 @@ async def test(self, event):
 
         for tag, pd in post_data_list:
             r = await self.generic_ssrf.helpers.curl(url=test_url, method="POST", post_data=pd)
-            self.process(event, r, tag)
+            self.process(event, r["response_data"], tag)
 
 
 class Generic_XXE(BaseSubmodule):
@@ -146,7 +148,7 @@ async def test(self, event):
             url=test_url, method="POST", raw_body=post_body, headers={"Content-type": "application/xml"}
         )
         if r:
-            self.process(event, r, subdomain_tag)
+            self.process(event, r["response_data"], subdomain_tag)
 
 
 class generic_ssrf(BaseModule):

diff --git a/bbot/modules/host_header.py b/bbot/modules/host_header.py
@@ -15,7 +15,7 @@ class host_header(BaseModule):
     in_scope_only = True
     per_hostport_only = True
 
-    deps_apt = ["curl"]
+    deps_common = ["curl"]
 
     async def setup(self):
         self.subdomain_tags = {}
@@ -106,7 +106,7 @@ async def handle_event(self, event):
             ignore_bbot_global_settings=True,
             cookies=added_cookies,
         )
-        if self.domain in output:
+        if self.domain in output["response_data"]:
             domain_reflections.append(technique_description)
 
         # absolute URL / Host header transposition
@@ -120,7 +120,7 @@ async def handle_event(self, event):
             cookies=added_cookies,
         )
 
-        if self.domain in output:
+        if self.domain in output["response_data"]:
             domain_reflections.append(technique_description)
 
         # duplicate host header tolerance
@@ -131,10 +131,9 @@ async def handle_event(self, event):
             # The fact that it's accepting two host headers is rare enough to note on its own, and not too noisy. Having the 3rd header be an interactsh would result in false negatives for the slightly less interesting cases.
             headers={"Host": ["", str(event.host), str(event.host)]},
             cookies=added_cookies,
-            head_mode=True,
         )
 
-        split_output = output.split("\n")
+        split_output = output["raw_headers"].split("\n")
         if " 4" in split_output:
             description = "Duplicate Host Header Tolerated"
             await self.emit_event(
@@ -173,7 +172,7 @@ async def handle_event(self, event):
             headers=override_headers,
             cookies=added_cookies,
         )
-        if self.domain in output:
+        if self.domain in output["response_data"]:
             domain_reflections.append(technique_description)
 
         # emit all the domain reflections we found

diff --git a/bbot/modules/output/web_report.py b/bbot/modules/output/web_report.py
@@ -4,7 +4,7 @@
 
 
 class web_report(BaseOutputModule):
-    watched_events = ["URL", "TECHNOLOGY", "FINDING", "VULNERABILITY"]
+    watched_events = ["URL", "TECHNOLOGY", "FINDING", "VULNERABILITY", "VIRTUAL_HOST"]
     meta = {
         "description": "Create a markdown report with web assets",
         "created_date": "2023-02-08",