diff --git a/Dockerfile b/Dockerfile index fd577ea4f..3577d7476 100644 --- a/Dockerfile +++ b/Dockerfile @@ -262,7 +262,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \ setuptools \ sqlalchemy \ yamlcore \ - junit-xml + junit-xml \ + requests FROM python_builder as version_generator diff --git a/pyplugins/actuation/fetch_web.py b/pyplugins/actuation/fetch_web.py index f0b577bf7..6572cbfe0 100644 --- a/pyplugins/actuation/fetch_web.py +++ b/pyplugins/actuation/fetch_web.py @@ -5,6 +5,7 @@ from collections import Counter import math import time +import requests from pandare import PyPlugin from penguin import getColoredLogger, plugins @@ -78,15 +79,17 @@ def fetch(self, guest_ip, host_ip, guest_port, host_port, log_file_name): log_file_name += ".alt" time.sleep(20) # Give service plenty of time to start - cmd = ["wget", "-q", f"https://{host_ip}:{host_port}" if guest_port == 443 else f"http://{host_ip}:{host_port}", - "--no-check-certificate", "-O", log_file_name] + url = f"https://{host_ip}:{host_port}" if guest_port == 443 else f"http://{host_ip}:{host_port}" timestamp = f"{(time.time() - self.start_time):.02f}s" try: - subprocess.check_output(cmd, timeout=30) - except subprocess.CalledProcessError as e: - self.logger.warning(f"{timestamp}: Error running wget: {e}") + response = requests.get(url, verify=False, timeout=30) + with open(log_file_name, 'wb') as log_file: + log_file.write(response.content) + + except requests.RequestException as e: + self.logger.warning(f"{timestamp}: Error running fetching HTTP: {e}") return False - except subprocess.TimeoutExpired: + except requests.Timeout: self.logger.warning(f"{timestamp}: No response to wget for {host_ip}:{host_port} after 30s") return False diff --git a/pyplugins/actuation/vpn.py b/pyplugins/actuation/vpn.py index 17cbe0db5..d0391ae36 100644 --- a/pyplugins/actuation/vpn.py +++ b/pyplugins/actuation/vpn.py @@ -229,7 +229,7 @@ def find_free_port(port): e.g. 80 -> 1080, 443 -> 2443, 8080 -> 18080, 65535 -> 1000 ''' for offset in range(1000, 65535, 1000): - if VPN.is_port_open(offset + port): + if (offset+port) <= 65535 and VPN.is_port_open(offset + port): return offset + port """ diff --git a/src/penguin/__main__.py b/src/penguin/__main__.py index 90a28cb42..f975cc7a0 100644 --- a/src/penguin/__main__.py +++ b/src/penguin/__main__.py @@ -212,7 +212,7 @@ def penguin_init(args): if not os.path.exists(args.output_base): print("Creating output_base:", args.output_base) - os.makedirs(args.output_base) + os.makedirs(args.output_base, exist_ok=True) args.output = args.output_base + "/" + basename_stem output_type = "generated" @@ -234,8 +234,7 @@ def penguin_init(args): ) # Ensure output parent directory exists - if not os.path.exists(os.path.dirname(args.output)): - os.makedirs(os.path.dirname(args.output)) + os.makedirs(os.path.dirname(args.output), exist_ok=True) out_config_path = Path(args.output, "config.yaml") config = fakeroot_gen_config( @@ -455,8 +454,8 @@ def add_explore_arguments(parser): parser.add_argument( "--nworkers", type=int, - default=4, - help="Number of workers to run in parallel. Default is 4", + default=1, + help="Number of workers to run in parallel. Default is 1", ) parser.add_argument( "--timeout", diff --git a/src/penguin/patch_minimizer.py b/src/penguin/patch_minimizer.py index ad3603ca7..83d8fba99 100644 --- a/src/penguin/patch_minimizer.py +++ b/src/penguin/patch_minimizer.py @@ -48,7 +48,7 @@ def __init__(self, proj_dir, config_path, output_dir, timeout, self.verbose = verbose self.minimization_target = minimization_target self.patches_to_test = list() - self.binary_search = True + self.binary_search = False self.dynamic_patch_dir = os.path.join(self.proj_dir, "dynamic_patches") self.data_baseline = dict() @@ -58,6 +58,8 @@ def __init__(self, proj_dir, config_path, output_dir, timeout, self.run_count = 0 self.scores = dict() # run_index -> score self.runmap = dict() # run_index -> patchset + self.max_attempts = 10 + self.attempts = dict() # TODO: use FICD to set timeout if timeout parameter. Warn if FICD not reached # add an FICD option to run until FICD (which might have to do the baseline single-threaded) @@ -84,6 +86,7 @@ def __init__(self, proj_dir, config_path, output_dir, timeout, for patch in base_config["patches"]: if any(patch.endswith(f"{x}.yaml") for x in ignore_patches): self.logger.info(f"Ignoring {patch} to support automated minimization") + continue if any(patch.endswith(f"/{x}.yaml") for x in required_patches): # Patches we just leave *always* enabled: base, auto_explore and lib_inject.core @@ -96,7 +99,7 @@ def __init__(self, proj_dir, config_path, output_dir, timeout, if not any([patch.endswith(f"/{this_required}.yaml") for patch in self.base_config["patches"]]): self.logger.warning(f"Adding {this_required} patch to supported automated exploration to guide minimization") # Ensure static_patches dir is in at least one of the patches - assert (any([patch.startswith("static_patches") for patch in self.patches_to_test])), "No static_patches dir in patches - not sure how to add auto_explore" + assert (any([patch.startswith("static_patches") for patch in self.patches_to_test])), f"No static_patches dir in patches - not sure how to add {this_required}" self.base_config["patches"].append(f"static_patches/{this_required}.yaml") # Patches can override options in previous patches @@ -111,7 +114,7 @@ def __init__(self, proj_dir, config_path, output_dir, timeout, dump_config(self.base_config, os.path.join(output_dir, "base_config.yaml")) self.logger.setLevel("DEBUG" if verbose else "INFO") - self.logger.info(f"Loaded {len(self.patches_to_test)} patches to test") + self.logger.info(f"Loaded {len(self.patches_to_test)} patches to test with {self.nworkers} workers") self.logger.debug(f"Candidate patches: {self.patches_to_test}") @staticmethod @@ -594,16 +597,25 @@ def establish_baseline(self): assert (self.run_count == 0), f"Establish baseline should be run first not {self.run_count}" patchset = self.patches_to_test - _, score = self.run_config(patchset, 0) - self.run_count += 1 # Bump run_count so we don't re-run baseline - # Score for baseline goes in self.scores (coverage) and data_baseline stores network data (entropy, bytes) - self.scores[0] = score - self.data_baseline = self.calculate_network_data(0) - self.logger.debug(f"data_baseline: {self.data_baseline}") + for j in range(self.max_attempts): + _, score = self.run_config(patchset, 0) + + # Score for baseline goes in self.scores (coverage) and data_baseline stores network data (entropy, bytes) + self.scores[0] = score + self.data_baseline = self.calculate_network_data(0) + self.logger.debug(f"data_baseline: {self.data_baseline}") + + if self.data_baseline: + self.logger.info(f"Baseline established on attempt {j+1}/{self.max_attempts}") + break + else: + self.logger.info(f"Baseline still not established... re-running. This was attempt {j+1}/{self.max_attempts}") assert (self.data_baseline), "Baseline data not established" + self.run_count += 1 # Bump run_count so we don't re-run baseline + self.config_still_viable(0) # Output is in run_dir @@ -741,8 +753,10 @@ def run(self): if slow_mode: # Assuming orthoganality of patches, we'll generate a config without each patch # Greater than 2 since if we have two left binary search would've tested them both - if len(self.patches_to_test) > 2 or not self.binary_search: + #if len(self.patches_to_test) > 2 or not self.binary_search: + for j in range(self.max_attempts): run_tracker = dict() + patchsets.clear() for i, patch in enumerate(self.patches_to_test, start=self.run_count): if i >= self.max_iters: self.logger.info("Hit max iterations. Stopping") @@ -751,6 +765,10 @@ def run(self): patchset.remove(patch) patchsets.append(patchset) run_tracker[i] = patch + if j == 0: + self.attempts[patch] = 1 + else: + self.attempts[patch] += 1 self.run_configs(patchsets) @@ -759,17 +777,21 @@ def run(self): self.logger.info(f"After running {i} removing {patch} from consideration, appears unnecessary") self.patches_to_test.remove(patch) else: - self.logger.info(f"Keeping {patch} since run {i} was not viable without it") + self.logger.info(f"Patch-sweep {j}: keeping {patch} since run {i} was not viable without it") output_file = os.path.join(self.proj_dir, "minimized.yaml") # TODO: force overwrite of this when --force - if not os.path.exists(output_file): - self.logger.info(f"Writing minimized config to {output_file} (note: this may include auto_explore.yaml)") - self.base_config["patches"].extend(self.patches_to_test) - with open(output_file, "w") as f: - yaml.dump(self.base_config, f) - else: - self.logger.info(f"Config already exists at {output_file}, not overwriting") + #if not os.path.exists(output_file): + #if we got here and this was already ran, we've specified --force + self.logger.info(f"Writing minimized config to {output_file} (note: this may include auto_explore.yaml)") + self.base_config["patches"].extend(self.patches_to_test) + with open(output_file, "w") as f: + yaml.dump(self.base_config, f) + output_file = os.path.join(self.proj_dir, "minimize_attempts.yaml") + with open(output_file, "w") as f: + yaml.dump(self.attempts, f) + #else: + # self.logger.info(f"Config already exists at {output_file}, not overwriting") return self.patches_to_test