diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f8d685a0d..b927122b69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## master (unreleased) ### New Features +- emit number(0) for instructions like "xor eax,eax" #2622 @v1bh475u - ci: add support for arm64 binary releases ### Breaking Changes diff --git a/capa/features/extractors/binexport2/arch/arm/helpers.py b/capa/features/extractors/binexport2/arch/arm/helpers.py index 01f1ae79cb..43c9ae0241 100644 --- a/capa/features/extractors/binexport2/arch/arm/helpers.py +++ b/capa/features/extractors/binexport2/arch/arm/helpers.py @@ -20,3 +20,8 @@ def is_stack_register_expression(be2: BinExport2, expression: BinExport2.Express return bool( expression and expression.type == BinExport2.Expression.REGISTER and expression.symbol.lower().endswith("sp") ) + + +def are_operands_equal(be2: BinExport2, instruction: BinExport2.Instruction) -> bool: + operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] + return operands[1] == operands[2] diff --git a/capa/features/extractors/binexport2/arch/arm/insn.py b/capa/features/extractors/binexport2/arch/arm/insn.py index 8b481040dd..61da4ac39e 100644 --- a/capa/features/extractors/binexport2/arch/arm/insn.py +++ b/capa/features/extractors/binexport2/arch/arm/insn.py @@ -30,7 +30,7 @@ get_operand_immediate_expression, ) from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 -from capa.features.extractors.binexport2.arch.arm.helpers import is_stack_register_expression +from capa.features.extractors.binexport2.arch.arm.helpers import are_operands_equal, is_stack_register_expression logger = logging.getLogger(__name__) @@ -53,6 +53,14 @@ def extract_insn_number_features( mnemonic: str = get_instruction_mnemonic(be2, instruction) + if mnemonic == "xor": + if are_operands_equal(be2, instruction): + # for pattern like: + # + # eor x0, x0, x0 + # + yield Number(0), ih.address + if mnemonic in ("add", "sub"): assert len(instruction.operand_index) == 3 @@ -138,9 +146,7 @@ def extract_insn_nzxor_characteristic_features( instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] # guaranteed to be simple int/reg operands # so we don't have to realize the tree/list. - operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] - - if operands[1] != operands[2]: + if not are_operands_equal(be2, instruction): yield Characteristic("nzxor"), ih.address diff --git a/capa/features/extractors/binexport2/arch/intel/helpers.py b/capa/features/extractors/binexport2/arch/intel/helpers.py index ce50607545..cc08ec32b9 100644 --- a/capa/features/extractors/binexport2/arch/intel/helpers.py +++ b/capa/features/extractors/binexport2/arch/intel/helpers.py @@ -140,3 +140,8 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt raise NotImplementedError(len(expressions)) return None + + +def are_operands_equal(be2: BinExport2, instruction: BinExport2.Instruction) -> bool: + operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] + return operands[0] == operands[1] diff --git a/capa/features/extractors/binexport2/arch/intel/insn.py b/capa/features/extractors/binexport2/arch/intel/insn.py index 02e51a6dc9..e7f0954831 100644 --- a/capa/features/extractors/binexport2/arch/intel/insn.py +++ b/capa/features/extractors/binexport2/arch/intel/insn.py @@ -29,7 +29,7 @@ get_instruction_mnemonic, ) from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 -from capa.features.extractors.binexport2.arch.intel.helpers import SECURITY_COOKIE_BYTES_DELTA +from capa.features.extractors.binexport2.arch.intel.helpers import SECURITY_COOKIE_BYTES_DELTA, are_operands_equal logger = logging.getLogger(__name__) @@ -81,6 +81,13 @@ def extract_insn_number_features( match = NUMBER_PATTERNS.match_with_be2(be2, ii.instruction_index) if not match: + if BinExport2InstructionPatternMatcher.from_str("xor reg, reg").match_with_be2(be2, ii.instruction_index): + # for pattern like: + # + # xor eax, eax + # + if are_operands_equal(be2, be2.instruction[ii.instruction_index]): + yield Number(0), ih.address return value: int = mask_immediate(fhi.arch, match.expression.immediate) @@ -216,9 +223,7 @@ def extract_insn_nzxor_characteristic_features( instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] # guaranteed to be simple int/reg operands # so we don't have to realize the tree/list. - operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] - - if operands[0] == operands[1]: + if are_operands_equal(be2, instruction): return instruction_address: int = idx.insn_address_by_index[ii.instruction_index] diff --git a/capa/features/extractors/ghidra/insn.py b/capa/features/extractors/ghidra/insn.py index 4cfc8b7856..161a9e6c19 100644 --- a/capa/features/extractors/ghidra/insn.py +++ b/capa/features/extractors/ghidra/insn.py @@ -156,6 +156,13 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl # .text:00401145 add esp, 0Ch return + if insn.getMnemonicString().startswith("XOR"): + if capa.features.extractors.ghidra.helpers.is_zxor(insn): + # for patern like: + # + # xor eax, eax + yield Number(0), ih.address + for i in range(insn.getNumOperands()): # Exceptions for LEA insn: # invalid operand encoding, considered numbers instead of offsets diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index 365a20675c..9a5a41c5ee 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -188,6 +188,10 @@ def get_instructions_in_range(start: int, end: int) -> Iterator[idaapi.insn_t]: yield insn +def is_xor(insn: idaapi.insn_t) -> bool: + return insn.itype in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor) + + def is_operand_equal(op1: idaapi.op_t, op2: idaapi.op_t) -> bool: """compare two IDA op_t""" if op1.flags != op2.flags: @@ -214,6 +218,10 @@ def is_operand_equal(op1: idaapi.op_t, op2: idaapi.op_t) -> bool: return True +def is_zxor(insn: idaapi.insn_t) -> bool: + return is_xor(insn) and is_operand_equal(insn.Op1, insn.Op2) + + def is_basic_block_equal(bb1: idaapi.BasicBlock, bb2: idaapi.BasicBlock) -> bool: """compare two IDA BasicBlock""" if bb1.start_ea != bb2.start_ea: diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index 0e92b21f5e..c45273e5a9 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -160,6 +160,12 @@ def extract_insn_number_features( # .text:00401145 add esp, 0Ch return + if capa.features.extractors.ida.helpers.is_zxor(insn): + # for pattern like: + # + # xor eax, eax + yield Number(0), ih.address + for i, op in enumerate(insn.ops): if op.type == idaapi.o_void: break @@ -383,7 +389,7 @@ def extract_insn_nzxor_characteristic_features( """ insn: idaapi.insn_t = ih.inner - if insn.itype not in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor): + if not capa.features.extractors.ida.helpers.is_xor(insn): return if capa.features.extractors.ida.helpers.is_operand_equal(insn.Op1, insn.Op2): return diff --git a/capa/features/extractors/viv/helpers.py b/capa/features/extractors/viv/helpers.py index 9442e0ebaf..cb66eb25c6 100644 --- a/capa/features/extractors/viv/helpers.py +++ b/capa/features/extractors/viv/helpers.py @@ -14,6 +14,7 @@ from typing import Optional +import envi from vivisect import VivWorkspace from vivisect.const import XR_TO, REF_CODE @@ -28,3 +29,11 @@ def get_coderef_from(vw: VivWorkspace, va: int) -> Optional[int]: return xrefs[0][XR_TO] else: return None + + +def is_xor(insn: envi.Opcode): + return insn.mnem in ("xor", "xorpd", "xorps", "pxor") + + +def is_zxor(insn: envi.Opcode): + return is_xor(insn) and insn.opers[0] == insn.opers[1] diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 0b3e79f990..bf81dd479a 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -387,7 +387,7 @@ def extract_insn_nzxor_characteristic_features( bb: viv_utils.BasicBlock = bbhandle.inner f: viv_utils.Function = fh.inner - if insn.mnem not in ("xor", "xorpd", "xorps", "pxor"): + if not capa.features.extractors.viv.helpers.is_xor(insn): return if insn.opers[0] == insn.opers[1]: @@ -594,6 +594,13 @@ def extract_op_number_features( insn: envi.Opcode = ih.inner f: viv_utils.Function = fh.inner + if capa.features.extractors.viv.helpers.is_zxor(insn): + # for pattern like: + # + # xor eax, eax + # + yield Number(0), ih.address + # this is for both x32 and x64 if not isinstance(oper, (envi.archs.i386.disasm.i386ImmOper, envi.archs.i386.disasm.i386ImmMemOper)): return diff --git a/tests/fixtures.py b/tests/fixtures.py index b9199061d5..46f19c8567 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1021,6 +1021,8 @@ def parametrize(params, values, **kwargs): ("7351f.elf", "function=0x408753,bb=0x408781", capa.features.insn.API("open"), True), ("79abd...", "function=0x10002385,bb=0x10002385", capa.features.common.Characteristic("call $+5"), True), ("946a9...", "function=0x10001510,bb=0x100015c0", capa.features.common.Characteristic("call $+5"), True), + ("9324d...", "function=0x40806C,bb=0x40806C,insn=0x40806C", capa.features.insn.Number(0), True), + ("mimikatz", "function=0x40105d", capa.features.insn.Number(0), True), ], # order tests by (file, item) # so that our LRU cache is most effective.