rehosting · zestrada · Dec 13, 2024 · Dec 18, 2024 · Dec 19, 2024 · Dec 20, 2024
diff --git a/Dockerfile b/Dockerfile
@@ -262,7 +262,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \
       setuptools \
       sqlalchemy \
       yamlcore \
-      junit-xml
+      junit-xml \
+      requests
 
 
 FROM python_builder as version_generator

diff --git a/pyplugins/actuation/fetch_web.py b/pyplugins/actuation/fetch_web.py
@@ -5,6 +5,7 @@
 from collections import Counter
 import math
 import time
+import requests
 
 from pandare import PyPlugin
 from penguin import getColoredLogger, plugins
@@ -78,15 +79,17 @@ def fetch(self, guest_ip, host_ip, guest_port, host_port, log_file_name):
             log_file_name += ".alt"
 
         time.sleep(20)  # Give service plenty of time to start
-        cmd = ["wget", "-q", f"https://{host_ip}:{host_port}" if guest_port == 443 else f"http://{host_ip}:{host_port}",
-               "--no-check-certificate", "-O", log_file_name]
+        url = f"https://{host_ip}:{host_port}" if guest_port == 443 else f"http://{host_ip}:{host_port}"
         timestamp = f"{(time.time() - self.start_time):.02f}s"
         try:
-            subprocess.check_output(cmd, timeout=30)
-        except subprocess.CalledProcessError as e:
-            self.logger.warning(f"{timestamp}: Error running wget: {e}")
+           response = requests.get(url, verify=False, timeout=30)
+           with open(log_file_name, 'wb') as log_file:
+               log_file.write(response.content)
+
+        except requests.RequestException as e:
+            self.logger.warning(f"{timestamp}: Error running fetching HTTP: {e}")
             return False
-        except subprocess.TimeoutExpired:
+        except requests.Timeout:
             self.logger.warning(f"{timestamp}: No response to wget for {host_ip}:{host_port} after 30s")
             return False
 

diff --git a/pyplugins/actuation/vpn.py b/pyplugins/actuation/vpn.py
@@ -229,7 +229,7 @@ def find_free_port(port):
         e.g. 80 -> 1080, 443 -> 2443, 8080 -> 18080, 65535 -> 1000
         '''
         for offset in range(1000, 65535, 1000):
-            if VPN.is_port_open(offset + port):
+            if (offset+port) <= 65535 and VPN.is_port_open(offset + port):
                 return offset + port
 
         """

diff --git a/src/penguin/__main__.py b/src/penguin/__main__.py
@@ -212,7 +212,7 @@ def penguin_init(args):
 
         if not os.path.exists(args.output_base):
             print("Creating output_base:", args.output_base)
-            os.makedirs(args.output_base)
+            os.makedirs(args.output_base, exist_ok=True)
 
         args.output = args.output_base + "/" + basename_stem
         output_type = "generated"
@@ -234,8 +234,7 @@ def penguin_init(args):
             )
 
     # Ensure output parent directory exists
-    if not os.path.exists(os.path.dirname(args.output)):
-        os.makedirs(os.path.dirname(args.output))
+    os.makedirs(os.path.dirname(args.output), exist_ok=True)
 
     out_config_path = Path(args.output, "config.yaml")
     config = fakeroot_gen_config(
@@ -455,8 +454,8 @@ def add_explore_arguments(parser):
     parser.add_argument(
         "--nworkers",
         type=int,
-        default=4,
-        help="Number of workers to run in parallel. Default is 4",
+        default=1,
+        help="Number of workers to run in parallel. Default is 1",
     )
     parser.add_argument(
         "--timeout",

diff --git a/src/penguin/patch_minimizer.py b/src/penguin/patch_minimizer.py
@@ -48,7 +48,7 @@ def __init__(self, proj_dir, config_path, output_dir, timeout,
         self.verbose = verbose
         self.minimization_target = minimization_target
         self.patches_to_test = list()
-        self.binary_search = True
+        self.binary_search = False
         self.dynamic_patch_dir = os.path.join(self.proj_dir, "dynamic_patches")
         self.data_baseline = dict()
 
@@ -58,6 +58,8 @@ def __init__(self, proj_dir, config_path, output_dir, timeout,
         self.run_count = 0
         self.scores = dict()  # run_index -> score
         self.runmap = dict()  # run_index -> patchset
+        self.max_attempts = 10
+        self.attempts = dict()
 
         # TODO: use FICD to set timeout if timeout parameter. Warn if FICD not reached
         #      add an FICD option to run until FICD (which might have to do the baseline single-threaded)
@@ -84,6 +86,7 @@ def __init__(self, proj_dir, config_path, output_dir, timeout,
         for patch in base_config["patches"]:
             if any(patch.endswith(f"{x}.yaml") for x in ignore_patches):
                 self.logger.info(f"Ignoring {patch} to support automated minimization")
+                continue
 
             if any(patch.endswith(f"/{x}.yaml") for x in required_patches):
                 # Patches we just leave *always* enabled: base, auto_explore and lib_inject.core
@@ -96,7 +99,7 @@ def __init__(self, proj_dir, config_path, output_dir, timeout,
         if not any([patch.endswith(f"/{this_required}.yaml") for patch in self.base_config["patches"]]):
             self.logger.warning(f"Adding {this_required} patch to supported automated exploration to guide minimization")
             # Ensure static_patches dir is in at least one of the patches
-            assert (any([patch.startswith("static_patches") for patch in self.patches_to_test])), "No static_patches dir in patches - not sure how to add auto_explore"
+            assert (any([patch.startswith("static_patches") for patch in self.patches_to_test])), f"No static_patches dir in patches - not sure how to add {this_required}"
             self.base_config["patches"].append(f"static_patches/{this_required}.yaml")
 
         # Patches can override options in previous patches
@@ -111,7 +114,7 @@ def __init__(self, proj_dir, config_path, output_dir, timeout,
         dump_config(self.base_config, os.path.join(output_dir, "base_config.yaml"))
 
         self.logger.setLevel("DEBUG" if verbose else "INFO")
-        self.logger.info(f"Loaded {len(self.patches_to_test)} patches to test")
+        self.logger.info(f"Loaded {len(self.patches_to_test)} patches to test with {self.nworkers} workers")
         self.logger.debug(f"Candidate patches: {self.patches_to_test}")
 
     @staticmethod
@@ -594,16 +597,25 @@ def establish_baseline(self):
         assert (self.run_count == 0), f"Establish baseline should be run first not {self.run_count}"
 
         patchset = self.patches_to_test
-        _, score = self.run_config(patchset, 0)
-        self.run_count += 1  # Bump run_count so we don't re-run baseline
 
-        # Score for baseline goes in self.scores (coverage) and data_baseline stores network data (entropy, bytes)
-        self.scores[0] = score
-        self.data_baseline = self.calculate_network_data(0)
-        self.logger.debug(f"data_baseline: {self.data_baseline}")
+        for j in range(self.max_attempts):
+            _, score = self.run_config(patchset, 0)
+
+            # Score for baseline goes in self.scores (coverage) and data_baseline stores network data (entropy, bytes)
+            self.scores[0] = score
+            self.data_baseline = self.calculate_network_data(0)
+            self.logger.debug(f"data_baseline: {self.data_baseline}")
+
+            if self.data_baseline:
+                self.logger.info(f"Baseline established on attempt {j+1}/{self.max_attempts}")
+                break
+            else:
+                self.logger.info(f"Baseline still not established... re-running. This was attempt {j+1}/{self.max_attempts}")
 
         assert (self.data_baseline), "Baseline data not established"
 
+        self.run_count += 1  # Bump run_count so we don't re-run baseline
+
         self.config_still_viable(0)
 
         # Output is in run_dir
@@ -741,8 +753,10 @@ def run(self):
         if slow_mode:
             # Assuming orthoganality of patches, we'll generate a config without each patch
             # Greater than 2 since if we have two left binary search would've tested them both
-            if len(self.patches_to_test) > 2 or not self.binary_search:
+            #if len(self.patches_to_test) > 2 or not self.binary_search:
+            for j in range(self.max_attempts):
                 run_tracker = dict()
+                patchsets.clear()
                 for i, patch in enumerate(self.patches_to_test, start=self.run_count):
                     if i >= self.max_iters:
                         self.logger.info("Hit max iterations. Stopping")
@@ -751,6 +765,10 @@ def run(self):
                     patchset.remove(patch)
                     patchsets.append(patchset)
                     run_tracker[i] = patch
+                    if j == 0:
+                        self.attempts[patch] = 1
+                    else:
+                        self.attempts[patch] += 1
 
                 self.run_configs(patchsets)
 
@@ -759,17 +777,21 @@ def run(self):
                         self.logger.info(f"After running {i} removing {patch} from consideration, appears unnecessary")
                         self.patches_to_test.remove(patch)
                     else:
-                        self.logger.info(f"Keeping {patch} since run {i} was not viable without it")
+                        self.logger.info(f"Patch-sweep {j}: keeping {patch} since run {i} was not viable without it")
 
         output_file = os.path.join(self.proj_dir, "minimized.yaml")
         # TODO: force overwrite of this when --force
-        if not os.path.exists(output_file):
-            self.logger.info(f"Writing minimized config to {output_file} (note: this may include auto_explore.yaml)")
-            self.base_config["patches"].extend(self.patches_to_test)
-            with open(output_file, "w") as f:
-                yaml.dump(self.base_config, f)
-        else:
-            self.logger.info(f"Config already exists at {output_file}, not overwriting")
+        #if not os.path.exists(output_file):
+        #if we got here and this was already ran, we've specified --force
+        self.logger.info(f"Writing minimized config to {output_file} (note: this may include auto_explore.yaml)")
+        self.base_config["patches"].extend(self.patches_to_test)
+        with open(output_file, "w") as f:
+            yaml.dump(self.base_config, f)
+        output_file = os.path.join(self.proj_dir, "minimize_attempts.yaml")
+        with open(output_file, "w") as f:
+            yaml.dump(self.attempts, f)
+        #else:
+        #    self.logger.info(f"Config already exists at {output_file}, not overwriting")
         return self.patches_to_test