11from typing import Union , Iterator
22from pathlib import Path
33
4- from . models import FridaReport , Call
4+ from models import FridaReport , Call
55from capa .features .common import Feature , String , OS , Arch , Format
66from capa .features .insn import API , Number
77from capa .features .address import (
@@ -28,6 +28,9 @@ class FridaExtractor(DynamicFeatureExtractor):
2828 Processes JSON output from Frida instrumentation to extract behavioral features.
2929 """
3030 def __init__ (self , report : FridaReport ):
31+ # TODO: From what I’ve found, Frida cannot access original APK file to compute hashes at runtime.
32+ # we may need to require users to provide both the Frida-generated log file and original file to capa,
33+ # like we do with other extractors e.g. BinExport, VMRay, etc..
3134 super ().__init__ (
3235 hashes = SampleHashes (md5 = "" , sha1 = "" , sha256 = "" )
3336 )
@@ -39,12 +42,27 @@ def get_base_address(self) -> Union[_NoAddress, None]:
3942
4043 def extract_global_features (self ) -> Iterator [tuple [Feature , Address ]]:
4144 """Basic global features"""
42- yield OS ("android" ), NO_ADDRESS
43- yield Arch ("aarch64" ), NO_ADDRESS
44- yield Format ("android" ), NO_ADDRESS
45+ yield OS ("android" ), NO_ADDRESS # OS: Frida doesn't provide OS info
4546
47+ if self .report .processes :
48+ process = self .report .processes [0 ]
49+
50+ if process .arch :
51+ arch_mapping = {
52+ "arm64" : "aarch64" ,
53+ "arm" : "arm" ,
54+ "x64" : "amd64" ,
55+ "x86" : "i386"
56+ }
57+ capa_arch = arch_mapping .get (process .arch , process .arch )
58+ yield Arch (capa_arch ), NO_ADDRESS
59+
60+ if process .platform :
61+ # TODO: capa doesn't have a dedicated FORMAT_ANDROID constant yet.
62+ yield Format ("android" ), NO_ADDRESS
63+
4664 def extract_file_features (self ) -> Iterator [tuple [Feature , Address ]]:
47- """Baisc file features"""
65+ """Basic file features"""
4866 yield String (self .report .package_name ), NO_ADDRESS
4967
5068 def get_processes (self ) -> Iterator [ProcessHandle ]:
@@ -78,20 +96,24 @@ def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle]
7896 """Get all API calls in a specific thread"""
7997 for i , call in enumerate (ph .inner .calls ):
8098 if call .thread_id == th .address .tid :
81- addr = DynamicCallAddress (thread = th .address , id = i )
99+ addr = DynamicCallAddress (thread = th .address , id = call . call_id )
82100 yield CallHandle (address = addr , inner = call )
83101
84102 def extract_call_features (self , ph : ProcessHandle , th : ThreadHandle , ch : CallHandle
85103 ) -> Iterator [tuple [Feature , Address ]]:
86104 """Extract features from individual API calls"""
87- # TODO: Implement call feature extraction
88-
105+ # TODO: Implement call feature extraction from arguments and return value
106+ call : Call = ch .inner
107+
108+ yield API (call .api_name ), ch .address
109+
89110 def get_call_name (self , ph : ProcessHandle , th : ThreadHandle , ch : CallHandle ) -> str :
90111 """Format API call name and parameters"""
112+ # TODO: Implement after extract_call_features
91113 call : Call = ch .inner
92114
93115 parts = []
94- parts .append (call .api )
116+ parts .append (call .api_name )
95117 parts .append ("(" )
96118
97119 if call .arguments :
0 commit comments