Skip to content
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a793d60
feat(insn): emit number(0) on xor of same registers
v1bh475u Mar 18, 2025
828509c
add test case for emit number(0) for insn like " xor eax, eax"
v1bh475u Mar 21, 2025
d940b7b
format conditional check for xor insn
v1bh475u Mar 21, 2025
d753fdd
udpate CHANGELOG
v1bh475u Mar 21, 2025
221e885
Merge branch 'master' into emit-number-0
v1bh475u Mar 21, 2025
172b365
feat(insn): emit number(0) on xor of same registers
v1bh475u Mar 21, 2025
b3b51a3
ghidra: emit number(0) on xor of same registers
v1bh475u Mar 21, 2025
7c20e31
Merge branch 'master' into emit-number-0
v1bh475u Mar 21, 2025
ce727a0
Merge branch 'master' into emit-number-0
v1bh475u Mar 21, 2025
998e850
binexport2: emit number(0) on xor of same registers
v1bh475u Mar 21, 2025
976a1ec
add fixture for mimikatz with number(0)
v1bh475u Mar 21, 2025
f278a35
refactor: rename instruction variable
v1bh475u Mar 21, 2025
a0053c9
Merge branch 'master' into emit-number-0
v1bh475u Mar 24, 2025
310eb0c
add helper functions to identify XOR insns & zeored XORs
v1bh475u Mar 25, 2025
469d76c
Merge branch 'master' into emit-number-0
v1bh475u Mar 26, 2025
e239784
fix is_zxor & add is_operands_equal helper function
v1bh475u Mar 27, 2025
f27c9ba
inline is_operand_equal logic into is_zxor and insn extraction
v1bh475u Mar 28, 2025
cf0831d
add is_xor and is_zxor helper functions
v1bh475u Mar 31, 2025
237c9ef
refactor extract_insn_number_feature to use is_zxor helper
v1bh475u Mar 31, 2025
fa55058
add is_operands_equal helper function for instruction operand comparison
v1bh475u Mar 31, 2025
51c16ab
simplify is_zxor logic
v1bh475u Apr 1, 2025
e91ba10
fix is_zxor to compare both operands directly
v1bh475u Apr 1, 2025
46b3d4e
viv-backend: refactor is_zxor
v1bh475u Apr 1, 2025
6e3944c
rename is_operands_equal to are_operands_equal for consistency
v1bh475u Apr 2, 2025
88f15bd
Merge branch 'master' into emit-number-0
v1bh475u Jun 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## master (unreleased)

### New Features
- emit number(0) for instructions like "xor eax,eax" #2622 @v1bh475u

### Breaking Changes

Expand Down
9 changes: 9 additions & 0 deletions capa/features/extractors/binexport2/arch/arm/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ def extract_insn_number_features(

mnemonic: str = get_instruction_mnemonic(be2, instruction)

if mnemonic == "xor":
operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index]
if operands[1] == operands[2]:
# for pattern like:
#
# eor x0, x0, x0
#
yield Number(0), ih.address

if mnemonic in ("add", "sub"):
assert len(instruction.operand_index) == 3

Expand Down
15 changes: 13 additions & 2 deletions capa/features/extractors/binexport2/arch/intel/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ def extract_insn_number_features(

match = NUMBER_PATTERNS.match_with_be2(be2, ii.instruction_index)
if not match:
if BinExport2InstructionPatternMatcher.from_str("xor reg, reg").match_with_be2(be2, ii.instruction_index):
# for pattern like:
#
# xor eax, eax
#
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index]
operands: list[BinExport2.Operand] = [
be2.operand[operand_index] for operand_index in instruction.operand_index
]
if operands[0] == operands[1]:
yield Number(0), ih.address
return

value: int = mask_immediate(fhi.arch, match.expression.immediate)
Expand All @@ -91,9 +102,9 @@ def extract_insn_number_features(
yield OperandNumber(match.operand_index, value), ih.address

instruction_index: int = ii.instruction_index
instruction: BinExport2.Instruction = be2.instruction[instruction_index]
current_instruction: BinExport2.Instruction = be2.instruction[instruction_index]

mnemonic: str = get_instruction_mnemonic(be2, instruction)
mnemonic: str = get_instruction_mnemonic(be2, current_instruction)
if mnemonic.startswith("add"):
if 0 < value < MAX_STRUCTURE_SIZE:
yield Offset(value), ih.address
Expand Down
8 changes: 8 additions & 0 deletions capa/features/extractors/ghidra/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,14 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl
# .text:00401145 add esp, 0Ch
return

if insn.getMnemonicString().startswith("XOR"):
# for patern like:
#
# xor eax, eax
if insn.getNumOperands() == 2:
if insn.getOpObjects(0)[-1] == insn.getOpObjects(1)[-1]:
yield Number(0), ih.address

for i in range(insn.getNumOperands()):
# Exceptions for LEA insn:
# invalid operand encoding, considered numbers instead of offsets
Expand Down
7 changes: 7 additions & 0 deletions capa/features/extractors/ida/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,13 @@ def extract_insn_number_features(
# .text:00401145 add esp, 0Ch
return

if insn.itype == idaapi.NN_xor:
# for pattern like:
#
# xor eax, eax
if insn.ops[0].type == idaapi.o_reg and insn.ops[1].type == idaapi.o_reg and insn.ops[0].reg == insn.ops[1].reg:
yield Number(0), ih.address

for i, op in enumerate(insn.ops):
if op.type == idaapi.o_void:
break
Expand Down
16 changes: 16 additions & 0 deletions capa/features/extractors/viv/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from typing import Optional

import envi
from vivisect import VivWorkspace
from vivisect.const import XR_TO, REF_CODE

Expand All @@ -28,3 +29,18 @@ def get_coderef_from(vw: VivWorkspace, va: int) -> Optional[int]:
return xrefs[0][XR_TO]
else:
return None


def is_xor(insn: envi.Opcode):
return insn.mnem in ("xor", "xorpd", "xorps", "pxor")


def is_operands_equal(insn: envi.Opcode):
return insn.opers[0] == insn.opers[1]


def is_zxor(insn: envi.Opcode):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we're fine without this helper here? The other ones are fine I think.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough

if is_xor(insn):
return is_operands_equal(insn)

return False
11 changes: 9 additions & 2 deletions capa/features/extractors/viv/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,10 +387,10 @@ def extract_insn_nzxor_characteristic_features(
bb: viv_utils.BasicBlock = bbhandle.inner
f: viv_utils.Function = fh.inner

if insn.mnem not in ("xor", "xorpd", "xorps", "pxor"):
if not capa.features.extractors.viv.helpers.is_xor(insn):
return

if insn.opers[0] == insn.opers[1]:
if capa.features.extractors.viv.helpers.is_operands_equal(insn):
return

if is_security_cookie(f, bb, insn):
Expand Down Expand Up @@ -594,6 +594,13 @@ def extract_op_number_features(
insn: envi.Opcode = ih.inner
f: viv_utils.Function = fh.inner

if capa.features.extractors.viv.helpers.is_zxor(insn):
# for pattern like:
#
# xor eax, eax
#
yield Number(0), ih.address

# this is for both x32 and x64
if not isinstance(oper, (envi.archs.i386.disasm.i386ImmOper, envi.archs.i386.disasm.i386ImmMemOper)):
return
Expand Down
2 changes: 2 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,8 @@ def parametrize(params, values, **kwargs):
("7351f.elf", "function=0x408753,bb=0x408781", capa.features.insn.API("open"), True),
("79abd...", "function=0x10002385,bb=0x10002385", capa.features.common.Characteristic("call $+5"), True),
("946a9...", "function=0x10001510,bb=0x100015c0", capa.features.common.Characteristic("call $+5"), True),
("9324d...", "function=0x40806C,bb=0x40806C,insn=0x40806C", capa.features.insn.Number(0), True),
("mimikatz", "function=0x40105d", capa.features.insn.Number(0), True),
],
# order tests by (file, item)
# so that our LRU cache is most effective.
Expand Down