Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a793d60
feat(insn): emit number(0) on xor of same registers
v1bh475u Mar 18, 2025
828509c
add test case for emit number(0) for insn like " xor eax, eax"
v1bh475u Mar 21, 2025
d940b7b
format conditional check for xor insn
v1bh475u Mar 21, 2025
d753fdd
udpate CHANGELOG
v1bh475u Mar 21, 2025
221e885
Merge branch 'master' into emit-number-0
v1bh475u Mar 21, 2025
172b365
feat(insn): emit number(0) on xor of same registers
v1bh475u Mar 21, 2025
b3b51a3
ghidra: emit number(0) on xor of same registers
v1bh475u Mar 21, 2025
7c20e31
Merge branch 'master' into emit-number-0
v1bh475u Mar 21, 2025
ce727a0
Merge branch 'master' into emit-number-0
v1bh475u Mar 21, 2025
998e850
binexport2: emit number(0) on xor of same registers
v1bh475u Mar 21, 2025
976a1ec
add fixture for mimikatz with number(0)
v1bh475u Mar 21, 2025
f278a35
refactor: rename instruction variable
v1bh475u Mar 21, 2025
a0053c9
Merge branch 'master' into emit-number-0
v1bh475u Mar 24, 2025
310eb0c
add helper functions to identify XOR insns & zeored XORs
v1bh475u Mar 25, 2025
469d76c
Merge branch 'master' into emit-number-0
v1bh475u Mar 26, 2025
e239784
fix is_zxor & add is_operands_equal helper function
v1bh475u Mar 27, 2025
f27c9ba
inline is_operand_equal logic into is_zxor and insn extraction
v1bh475u Mar 28, 2025
cf0831d
add is_xor and is_zxor helper functions
v1bh475u Mar 31, 2025
237c9ef
refactor extract_insn_number_feature to use is_zxor helper
v1bh475u Mar 31, 2025
fa55058
add is_operands_equal helper function for instruction operand comparison
v1bh475u Mar 31, 2025
51c16ab
simplify is_zxor logic
v1bh475u Apr 1, 2025
e91ba10
fix is_zxor to compare both operands directly
v1bh475u Apr 1, 2025
46b3d4e
viv-backend: refactor is_zxor
v1bh475u Apr 1, 2025
6e3944c
rename is_operands_equal to are_operands_equal for consistency
v1bh475u Apr 2, 2025
88f15bd
Merge branch 'master' into emit-number-0
v1bh475u Jun 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## master (unreleased)

### New Features
- emit number(0) for instructions like "xor eax,eax" #2622 @v1bh475u
- ci: add support for arm64 binary releases

### Breaking Changes
Expand Down
5 changes: 5 additions & 0 deletions capa/features/extractors/binexport2/arch/arm/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,8 @@ def is_stack_register_expression(be2: BinExport2, expression: BinExport2.Express
return bool(
expression and expression.type == BinExport2.Expression.REGISTER and expression.symbol.lower().endswith("sp")
)


def are_operands_equal(be2: BinExport2, instruction: BinExport2.Instruction) -> bool:
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
return operands[1] == operands[2]
14 changes: 10 additions & 4 deletions capa/features/extractors/binexport2/arch/arm/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
get_operand_immediate_expression,
)
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
from capa.features.extractors.binexport2.arch.arm.helpers import is_stack_register_expression
from capa.features.extractors.binexport2.arch.arm.helpers import are_operands_equal, is_stack_register_expression

logger = logging.getLogger(__name__)

Expand All @@ -53,6 +53,14 @@ def extract_insn_number_features(

mnemonic: str = get_instruction_mnemonic(be2, instruction)

if mnemonic == "xor":
if are_operands_equal(be2, instruction):
# for pattern like:
#
# eor x0, x0, x0
#
yield Number(0), ih.address

if mnemonic in ("add", "sub"):
assert len(instruction.operand_index) == 3

Expand Down Expand Up @@ -138,9 +146,7 @@ def extract_insn_nzxor_characteristic_features(
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
# guaranteed to be simple int/reg operands
# so we don't have to realize the tree/list.
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]

if operands[1] != operands[2]:
if not are_operands_equal(be2, instruction):
yield Characteristic("nzxor"), ih.address


Expand Down
5 changes: 5 additions & 0 deletions capa/features/extractors/binexport2/arch/intel/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,8 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt
raise NotImplementedError(len(expressions))

return None


def are_operands_equal(be2: BinExport2, instruction: BinExport2.Instruction) -> bool:
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
return operands[0] == operands[1]
13 changes: 9 additions & 4 deletions capa/features/extractors/binexport2/arch/intel/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
get_instruction_mnemonic,
)
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2
from capa.features.extractors.binexport2.arch.intel.helpers import SECURITY_COOKIE_BYTES_DELTA
from capa.features.extractors.binexport2.arch.intel.helpers import SECURITY_COOKIE_BYTES_DELTA, are_operands_equal

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -81,6 +81,13 @@ def extract_insn_number_features(

match = NUMBER_PATTERNS.match_with_be2(be2, ii.instruction_index)
if not match:
if BinExport2InstructionPatternMatcher.from_str("xor reg, reg").match_with_be2(be2, ii.instruction_index):
# for pattern like:
#
# xor eax, eax
#
if are_operands_equal(be2, be2.instruction[ii.instruction_index]):
yield Number(0), ih.address
return

value: int = mask_immediate(fhi.arch, match.expression.immediate)
Expand Down Expand Up @@ -216,9 +223,7 @@ def extract_insn_nzxor_characteristic_features(
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
# guaranteed to be simple int/reg operands
# so we don't have to realize the tree/list.
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]

if operands[0] == operands[1]:
if are_operands_equal(be2, instruction):
return

instruction_address: int = idx.insn_address_by_index[ii.instruction_index]
Expand Down
7 changes: 7 additions & 0 deletions capa/features/extractors/ghidra/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,13 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
# .text:00401145 add esp, 0Ch
return

if insn.getMnemonicString().startswith("XOR"):
if capa.features.extractors.ghidra.helpers.is_zxor(insn):
# for patern like:
#
# xor eax, eax
yield Number(0), ih.address

for i in range(insn.getNumOperands()):
# Exceptions for LEA insn:
# invalid operand encoding, considered numbers instead of offsets
Expand Down
8 changes: 8 additions & 0 deletions capa/features/extractors/ida/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,10 @@ def get_instructions_in_range(start: int, end: int) -> Iterator[idaapi.insn_t]:
yield insn


def is_xor(insn: idaapi.insn_t) -> bool:
return insn.itype in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor)


def is_operand_equal(op1: idaapi.op_t, op2: idaapi.op_t) -> bool:
"""compare two IDA op_t"""
if op1.flags != op2.flags:
Expand All @@ -214,6 +218,10 @@ def is_operand_equal(op1: idaapi.op_t, op2: idaapi.op_t) -> bool:
return True


def is_zxor(insn: idaapi.insn_t) -> bool:
return is_xor(insn) and is_operand_equal(insn.Op1, insn.Op2)


def is_basic_block_equal(bb1: idaapi.BasicBlock, bb2: idaapi.BasicBlock) -> bool:
"""compare two IDA BasicBlock"""
if bb1.start_ea != bb2.start_ea:
Expand Down
8 changes: 7 additions & 1 deletion capa/features/extractors/ida/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ def extract_insn_number_features(
# .text:00401145 add esp, 0Ch
return

if capa.features.extractors.ida.helpers.is_zxor(insn):
# for pattern like:
#
# xor eax, eax
yield Number(0), ih.address

for i, op in enumerate(insn.ops):
if op.type == idaapi.o_void:
break
Expand Down Expand Up @@ -383,7 +389,7 @@ def extract_insn_nzxor_characteristic_features(
"""
insn: idaapi.insn_t = ih.inner

if insn.itype not in (idaapi.NN_xor, idaapi.NN_xorpd, idaapi.NN_xorps, idaapi.NN_pxor):
if not capa.features.extractors.ida.helpers.is_xor(insn):
return
if capa.features.extractors.ida.helpers.is_operand_equal(insn.Op1, insn.Op2):
return
Expand Down
9 changes: 9 additions & 0 deletions capa/features/extractors/viv/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from typing import Optional

import envi
from vivisect import VivWorkspace
from vivisect.const import XR_TO, REF_CODE

Expand All @@ -28,3 +29,11 @@ def get_coderef_from(vw: VivWorkspace, va: int) -> Optional[int]:
return xrefs[0][XR_TO]
else:
return None


def is_xor(insn: envi.Opcode):
return insn.mnem in ("xor", "xorpd", "xorps", "pxor")


def is_zxor(insn: envi.Opcode):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we're fine without this helper here? The other ones are fine I think.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough

return is_xor(insn) and insn.opers[0] == insn.opers[1]
9 changes: 8 additions & 1 deletion capa/features/extractors/viv/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def extract_insn_nzxor_characteristic_features(
bb: viv_utils.BasicBlock = bbhandle.inner
f: viv_utils.Function = fh.inner

if insn.mnem not in ("xor", "xorpd", "xorps", "pxor"):
if not capa.features.extractors.viv.helpers.is_xor(insn):
return

if insn.opers[0] == insn.opers[1]:
Expand Down Expand Up @@ -594,6 +594,13 @@ def extract_op_number_features(
insn: envi.Opcode = ih.inner
f: viv_utils.Function = fh.inner

if capa.features.extractors.viv.helpers.is_zxor(insn):
# for pattern like:
#
# xor eax, eax
#
yield Number(0), ih.address

# this is for both x32 and x64
if not isinstance(oper, (envi.archs.i386.disasm.i386ImmOper, envi.archs.i386.disasm.i386ImmMemOper)):
return
Expand Down
2 changes: 2 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,8 @@ def parametrize(params, values, **kwargs):
("7351f.elf", "function=0x408753,bb=0x408781", capa.features.insn.API("open"), True),
("79abd...", "function=0x10002385,bb=0x10002385", capa.features.common.Characteristic("call $+5"), True),
("946a9...", "function=0x10001510,bb=0x100015c0", capa.features.common.Characteristic("call $+5"), True),
("9324d...", "function=0x40806C,bb=0x40806C,insn=0x40806C", capa.features.insn.Number(0), True),
("mimikatz", "function=0x40105d", capa.features.insn.Number(0), True),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
Expand Down