From 73a9255d01aef5d3d1af86cd787a8c34105a8d5b Mon Sep 17 00:00:00 2001 From: Caden Kline Date: Wed, 18 Dec 2024 13:26:40 -0500 Subject: [PATCH 1/4] intial linker support --- src/penguin/config_patchers.py | 35 +++++++++++++++++++++++++++++++++- src/penguin/gen_config.py | 1 + src/penguin/static_analyses.py | 13 +++++++++---- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/penguin/config_patchers.py b/src/penguin/config_patchers.py index 4227e4eb7..2991dbc5e 100644 --- a/src/penguin/config_patchers.py +++ b/src/penguin/config_patchers.py @@ -352,7 +352,7 @@ def __init__(self, arch_info, inits): self.patch_name = "base" self.enabled = True - self.set_arch_info(arch_info) + self.set_arch_info(arch_info[0]) if len(inits): self.igloo_init = inits[0] @@ -1642,3 +1642,36 @@ def generate(self, patches): if len(results): return {'pseudofiles': results} + + +class LinkerSymbolSearch(PatchGenerator): + ''' + During static analysis the LibrarySymbols class collected + key->value mappings from libraries exporting some common nvram + defaults symbols ("Nvrams", "router_defaults") - add these to our + nvram config if we have any. + + TODO: if we find multiple nvram source files here, we should generate multiple patches. + Then we should consider these during search. For now we just take non-conflicting values + from largest to smallest source files. More realistic might be to try each file individually. + ''' + + def __init__(self, library_info, archid): + self.library_info = library_info + self.patch_name = "ld.01_library" + self.linker_paths = archid[1] + self.enabled = True + + def generate(self, patches): + sources = self.library_info.get("symbols", {}) + if not len(sources): + return + linkers_without_preload = [] + for file in sources.keys(): + if file in self.linker_paths.keys(): + self.linker_paths.pop('file') + if not ("_dl_preload" in sources[file] or "handle_ld_preload" in sources[file]): + linkers_without_preload.append(file) + print(linkers_without_preload) + + diff --git a/src/penguin/gen_config.py b/src/penguin/gen_config.py index 38b8dfc9a..1ee4c26e1 100644 --- a/src/penguin/gen_config.py +++ b/src/penguin/gen_config.py @@ -200,6 +200,7 @@ def create_patches(self, fs_archive, static_results, extract_dir): CP.NvramConfigRecoveryWild(extract_dir), CP.NvramConfigRecovery(extract_dir), CP.NvramLibraryRecovery(static_results['LibrarySymbols']), + CP.LinkerSymbolSearch(static_results['LibrarySymbols'], static_results['ArchId']), ] # collect patches in patches[patchfile_name] -> {section -> {key -> value}} diff --git a/src/penguin/static_analyses.py b/src/penguin/static_analyses.py index 711cfdaef..eeff068cd 100644 --- a/src/penguin/static_analyses.py +++ b/src/penguin/static_analyses.py @@ -37,7 +37,7 @@ def find_regex(target_regex, extract_root, ignore=None, only_files=True): if filepath.startswith(os.path.join(extract_root, "igloo")): continue - # skip non-regular files if `only_files` is true + # skip non-regular.. files if `only_files` is true if only_files and not os.path.isfile(filepath): continue @@ -81,6 +81,7 @@ def run(self, extracted_fs, prior_results): ''' arch_counts = {32: Counter(), 64: Counter(), "unknown": 0} + loaders = {} for root, _, files in os.walk(extracted_fs): for file_name in files: path = os.path.join(root, file_name) @@ -105,6 +106,11 @@ def run(self, extracted_fs, prior_results): arch_counts["unknown"] += 1 else: arch_counts[info.bits][info.arch] += 1 + # Search for PT_INTERP + for segment in ef.iter_segments(type=0x3): + name = segment.get_interp_name() + if name is not None: + loaders[name] = loaders.get(name, 0) + 1 # If there is at least one intel and non-intel arch, # filter out all the intel ones. @@ -145,7 +151,7 @@ def run(self, extracted_fs, prior_results): raise ValueError("Failed to determine architecture of filesystem") logger.debug(f"Identified architecture: {best}") - return best + return (best, loaders) @staticmethod def _binary_filter(fsbase, name): @@ -653,7 +659,7 @@ class LibrarySymbols(StaticAnalysis): def run(self, extract_dir, prior_results): self.extract_dir = extract_dir - self.archend = arch_end(prior_results['ArchId']) + self.archend = arch_end(prior_results['ArchId'][0]) if any([x is None for x in self.archend]): self.enabled = False @@ -749,7 +755,6 @@ def _analyze_library(elf_path, archend): symbols = {} # Symbol name -> relative(?) address nvram_data = {} # key -> value (may be empty string) - def _is_elf(filename): try: with open(filename, "rb") as f: From 6c55511147314748dd6ee46d80f18808eea43b84 Mon Sep 17 00:00:00 2001 From: Caden Kline Date: Wed, 8 Jan 2025 13:10:38 -0500 Subject: [PATCH 2/4] working linker check --- src/penguin/config_patchers.py | 18 ++++++++++-------- src/penguin/gen_config.py | 2 ++ src/penguin/static_analyses.py | 14 +++++++++----- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/penguin/config_patchers.py b/src/penguin/config_patchers.py index 2991dbc5e..3c9fe8f63 100644 --- a/src/penguin/config_patchers.py +++ b/src/penguin/config_patchers.py @@ -1650,10 +1650,6 @@ class LinkerSymbolSearch(PatchGenerator): key->value mappings from libraries exporting some common nvram defaults symbols ("Nvrams", "router_defaults") - add these to our nvram config if we have any. - - TODO: if we find multiple nvram source files here, we should generate multiple patches. - Then we should consider these during search. For now we just take non-conflicting values - from largest to smallest source files. More realistic might be to try each file individually. ''' def __init__(self, library_info, archid): @@ -1667,11 +1663,17 @@ def generate(self, patches): if not len(sources): return linkers_without_preload = [] + linkers = self.linker_paths.copy() + for file in sources.keys(): if file in self.linker_paths.keys(): - self.linker_paths.pop('file') + self.linker_paths.pop(file) if not ("_dl_preload" in sources[file] or "handle_ld_preload" in sources[file]): linkers_without_preload.append(file) - print(linkers_without_preload) - - + for x in sources[file]: + if "GLIBC_" in x: + linkers_without_preload.remove(file) + break + if not len(linkers_without_preload): + return + logger.critical(f"The following linkers are missing PRELOAD capabilites: {linkers_without_preload} out of {linkers.keys()}") diff --git a/src/penguin/gen_config.py b/src/penguin/gen_config.py index 1ee4c26e1..3fa756160 100644 --- a/src/penguin/gen_config.py +++ b/src/penguin/gen_config.py @@ -94,6 +94,8 @@ def run_static_analyses(self, output_dir, extracted_dir, static_dir_name="static # If we have results, store on disk. Always store in results dict, even if empty if this_result: with open(results_dir / f"{analysis.__name__}.yaml", "w") as f: + if analysis == STATIC.ArchId: + this_result = this_result[0] yaml.dump(this_result, f) return results diff --git a/src/penguin/static_analyses.py b/src/penguin/static_analyses.py index eeff068cd..0e39ef3ca 100644 --- a/src/penguin/static_analyses.py +++ b/src/penguin/static_analyses.py @@ -102,15 +102,17 @@ def run(self, extracted_fs, prior_results): logger.warning(f"Failed to parse ELF file {path}: {e}. Ignoring") continue info = arch_filter(ef) + for segment in ef.iter_segments(): + try: + name = segment.get_interp_name() + if name is not None: + loaders[name] = loaders.get(name, 0) + 1 + except AttributeError: + continue if info.bits is None or info.arch is None: arch_counts["unknown"] += 1 else: arch_counts[info.bits][info.arch] += 1 - # Search for PT_INTERP - for segment in ef.iter_segments(type=0x3): - name = segment.get_interp_name() - if name is not None: - loaders[name] = loaders.get(name, 0) + 1 # If there is at least one intel and non-intel arch, # filter out all the intel ones. @@ -660,6 +662,7 @@ class LibrarySymbols(StaticAnalysis): def run(self, extract_dir, prior_results): self.extract_dir = extract_dir self.archend = arch_end(prior_results['ArchId'][0]) + self.linkers = prior_results['ArchId'][1] if any([x is None for x in self.archend]): self.enabled = False @@ -755,6 +758,7 @@ def _analyze_library(elf_path, archend): symbols = {} # Symbol name -> relative(?) address nvram_data = {} # key -> value (may be empty string) + def _is_elf(filename): try: with open(filename, "rb") as f: From 921ed75465cdd54caec19b1a086e8b0ed8e420ff Mon Sep 17 00:00:00 2001 From: Caden Kline Date: Wed, 15 Jan 2025 13:27:08 -0500 Subject: [PATCH 3/4] speed up and path changes --- src/penguin/static_analyses.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/penguin/static_analyses.py b/src/penguin/static_analyses.py index 0e39ef3ca..ac061b5fa 100644 --- a/src/penguin/static_analyses.py +++ b/src/penguin/static_analyses.py @@ -102,11 +102,18 @@ def run(self, extracted_fs, prior_results): logger.warning(f"Failed to parse ELF file {path}: {e}. Ignoring") continue info = arch_filter(ef) - for segment in ef.iter_segments(): + for segment in ef.iter_segments(type='PT_INTERP'): try: name = segment.get_interp_name() if name is not None: - loaders[name] = loaders.get(name, 0) + 1 + # handle non path loaders might be a problem on certain version where loader path is handled with an env var + if '/' not in name: + for loader in loaders.keys(): + if name in loader: + loaders[loader] = loaders.get(loader, 0) + 1 + break + else: + loaders[name] = loaders.get(name, 0) + 1 except AttributeError: continue if info.bits is None or info.arch is None: From 463c624a5536ee4e7e4dc20b612b10a10ad8d694 Mon Sep 17 00:00:00 2001 From: Caden Kline Date: Mon, 27 Jan 2025 11:36:39 -0500 Subject: [PATCH 4/4] fix comment --- src/penguin/config_patchers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/penguin/config_patchers.py b/src/penguin/config_patchers.py index 3c9fe8f63..8aa610926 100644 --- a/src/penguin/config_patchers.py +++ b/src/penguin/config_patchers.py @@ -1647,9 +1647,7 @@ def generate(self, patches): class LinkerSymbolSearch(PatchGenerator): ''' During static analysis the LibrarySymbols class collected - key->value mappings from libraries exporting some common nvram - defaults symbols ("Nvrams", "router_defaults") - add these to our - nvram config if we have any. + key->value mappings from libraries exporting symbols. Check these symbols for compatibility with our preload strategy. ''' def __init__(self, library_info, archid):