1+ import logging
2+ from typing import Union , Iterator
3+ from pathlib import Path
4+
5+ from .models import AndroidReport , Call
6+ from capa .features .common import Feature
7+ from capa .features .address import (
8+ NO_ADDRESS ,
9+ Address ,
10+ ThreadAddress ,
11+ ProcessAddress ,
12+ DynamicCallAddress ,
13+ _NoAddress
14+ )
15+ from capa .features .extractors .base_extractor import (
16+ CallHandle ,
17+ SampleHashes ,
18+ ThreadHandle ,
19+ ProcessHandle ,
20+ DynamicFeatureExtractor ,
21+ )
22+
23+ logger = logging .getLogger (__name__ )
24+
25+
26+ class AndroidFeatureExtractor (DynamicFeatureExtractor ):
27+
28+ def __init__ (self , report : AndroidReport ):
29+ # TODO: Not sure how to get APK hashes yet, will figure out later
30+ super ().__init__ (
31+ hashes = SampleHashes (md5 = "" , sha1 = "" , sha256 = "" )
32+ )
33+ self .report : AndroidReport = report
34+
35+ self .global_features = []
36+
37+ def get_base_address (self ) -> Union [_NoAddress , None ]:
38+ return NO_ADDRESS
39+
40+ def extract_global_features (self ) -> Iterator [tuple [Feature , Address ]]:
41+ # TODO: Need to figure out what global features Android should have
42+ yield from self .global_features
43+
44+ def extract_file_features (self ) -> Iterator [tuple [Feature , Address ]]:
45+ # TODO: Will extract file-level features from Frida data later
46+ yield from []
47+
48+ def get_processes (self ) -> Iterator [ProcessHandle ]:
49+ """Get all processes from the report"""
50+ for process in self .report .processes :
51+ addr = ProcessAddress (pid = process .pid , ppid = 0 )
52+ yield ProcessHandle (address = addr , inner = process )
53+
54+ def extract_process_features (self , ph : ProcessHandle ) -> Iterator [tuple [Feature , Address ]]:
55+ # TODO: Need to understand what process-level features make sense for Android
56+ yield from []
57+
58+ def get_process_name (self , ph : ProcessHandle ) -> str :
59+ return ph .inner .package_name
60+
61+ def get_threads (self , ph : ProcessHandle ) -> Iterator [ThreadHandle ]:
62+ """Get all threads by grouping calls by thread_id"""
63+ thread_ids = set ()
64+ for call in ph .inner .calls :
65+ thread_ids .add (call .thread_id )
66+
67+ for tid in thread_ids :
68+ addr = ThreadAddress (process = ph .address , tid = tid )
69+ yield ThreadHandle (address = addr , inner = {"tid" : tid })
70+
71+ def extract_thread_features (self , ph : ProcessHandle , th : ThreadHandle ) -> Iterator [tuple [Feature , Address ]]:
72+ # TODO: Need to understand what thread features would be useful for Android
73+ yield from []
74+
75+ def get_calls (self , ph : ProcessHandle , th : ThreadHandle ) -> Iterator [CallHandle ]:
76+ """Get all API calls in a specific thread"""
77+ for i , call in enumerate (ph .inner .calls ):
78+ if call .thread_id == th .address .tid :
79+ addr = DynamicCallAddress (thread = th .address , id = i )
80+ yield CallHandle (address = addr , inner = call )
81+
82+ def extract_call_features (self , ph : ProcessHandle , th : ThreadHandle , ch : CallHandle
83+ ) -> Iterator [tuple [Feature , Address ]]:
84+ # TODO: Implement call feature extraction (not sure API names, arguments, return values)
85+ yield from []
86+
87+ def get_call_name (self , ph : ProcessHandle , th : ThreadHandle , ch : CallHandle ) -> str :
88+ """Format API call name and parameters"""
89+ call : Call = ch .inner
90+
91+ parts = []
92+ parts .append (call .api )
93+ parts .append ("(" )
94+
95+ if call .arguments :
96+ args = [f"{ k } ={ v } " for k , v in call .arguments .items ()]
97+ parts .append (", " .join (args ))
98+
99+ parts .append (")" )
100+
101+ if call .return_value :
102+ parts .append (f" -> { call .return_value } " )
103+
104+ return "" .join (parts )
105+
106+ @classmethod
107+ def from_frida_log (cls , package_name : str , log_file : Path ) -> "AndroidFeatureExtractor" :
108+ """Create extractor from Frida log file - main entry point"""
109+ with open (log_file , 'r' , encoding = 'utf-8' ) as f :
110+ log_lines = f .readlines ()
111+
112+ report = AndroidReport .from_frida_logs (package_name , log_lines )
113+ return cls (report )
0 commit comments