Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
f574cff
Add auto-sync updater.
Rot127 Jun 19, 2023
6854706
Update Capstone core with auto-sync changes.
Rot127 Jun 19, 2023
7eae10c
Update ARM via auto-sync.
Rot127 Jun 19, 2023
5801a9c
Make changes to arch modules which are introduced by auto-sync.
Rot127 Jun 19, 2023
9a0d41e
Update tests for ARM.
Rot127 Jun 19, 2023
e7ba1aa
Fix build warnings for make
Rot127 Jun 19, 2023
4af8382
Remove meson.build
Rot127 Jun 19, 2023
3eb96f7
Print shift amount in decimal
Rot127 Jun 20, 2023
a11c27f
Patch non LLVM register alias.
Rot127 Jun 21, 2023
690d9a5
Change type of immediate operand to unsiged (due to: #771)
Rot127 Jun 21, 2023
257caf4
Replace all occurances of a register with its alias.
Rot127 Jun 21, 2023
b1c5dc0
Fix printing of signed imms
Rot127 Jun 21, 2023
055f4e9
Print rotate amount in decimal
Rot127 Jun 21, 2023
1ca407f
CHange imm type to int64_t to match LLVM imm type.
Rot127 Jun 21, 2023
ca30bcc
Fix search for register names, by completing string first.
Rot127 Jun 21, 2023
1e0d032
Print ModImm operands always in decimal
Rot127 Jun 21, 2023
e59f614
Use number format of previous capstone version.
Rot127 Jun 26, 2023
c6ecb13
Correct implicit writes and update_flags according to SBit.
Rot127 Jun 29, 2023
13be852
Add missing test for RegImmShift
Rot127 Jun 29, 2023
cefc8f2
Reverse incorrect comparision.
Rot127 Jun 29, 2023
2c6e52e
Set shift information for move instructions.
Rot127 Jun 29, 2023
252bc82
Set mem access for all memory operands
Rot127 Jun 29, 2023
0e58490
Set subtracted flag if offset is negative.
Rot127 Jul 1, 2023
996cc8d
Add flag for post-index memory operands.
Rot127 Jul 1, 2023
be8df60
Add detail op for BX_RET and MOVPCLR
Rot127 Jul 1, 2023
ab5d4d5
Use instruction post_index operand.
Rot127 Jul 1, 2023
b3f0056
Add VPOP and VPUSH as unique CS IDs.
Rot127 Jul 2, 2023
f9651ad
Add shifting info for MOVsr.
Rot127 Jul 2, 2023
46e9eb2
Add TODOs.
Rot127 Jul 2, 2023
5ab878b
Add in LLVM hardcoded operands to detail.
Rot127 Jul 2, 2023
b9bf59e
Move detail editing from InstPrinter to Mapping
Rot127 Jul 2, 2023
11c67ef
Formatting
Rot127 Jul 2, 2023
10dab16
Add removed check.
Rot127 Jul 2, 2023
5792f08
Add writeback register and constraints to RFEI instructions.
Rot127 Jul 3, 2023
407557f
Translate shift immediate
Rot127 Jul 3, 2023
e609c7d
Print negative immediates
Rot127 Jul 3, 2023
8ee1b91
Remove duplicate invalid entry
Rot127 Jul 4, 2023
80ea1cf
Add CS groups to instructions
Rot127 Jul 4, 2023
8870c79
Fix write attriutes of stores.
Rot127 Jul 4, 2023
9eb1fe1
Add missing names of added instructions
Rot127 Jul 4, 2023
d64f749
Fix LLVM bug
Rot127 Jul 5, 2023
e5f22c6
Add more post_index flags
Rot127 Jul 5, 2023
c109849
http -> https
Rot127 Jul 5, 2023
4bae1df
Make generated functions static
Rot127 Jul 5, 2023
5815d9e
Remove tab prefix for alias instructions.
Rot127 Jul 5, 2023
6a0db6b
Set ValidateMCOperand to NULL.
Rot127 Jul 5, 2023
082629b
Fix AddrMode3Operand operands
Rot127 Jul 6, 2023
cf483fa
Allow getting system and banked register name via API
Rot127 Jul 7, 2023
a6e43f4
Add writeback to STC/LDC instructions.
Rot127 Jul 7, 2023
19bf8f1
Fix (hopefully) last case where disp is negative and subtracted = true
Rot127 Jul 7, 2023
683a595
Remove accidentially introduced regressions
Rot127 Jul 7, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "suite/auto-sync/vendor/tree-sitter-cpp"]
path = suite/auto-sync/vendor/tree-sitter-cpp
url = https://github.com/tree-sitter/tree-sitter-cpp.git
18 changes: 12 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ set(SOURCES_ENGINE
Mapping.c
MCInst.c
MCInstrDesc.c
MCInstPrinter.c
MCRegisterInfo.c
SStream.c
utils.c
Expand All @@ -115,6 +116,7 @@ set(HEADERS_ENGINE
MCFixedLenDisassembler.h
MCInst.h
MCInstrDesc.h
MCInstPrinter.h
MCRegisterInfo.h
SStream.h
utils.h
Expand All @@ -141,6 +143,7 @@ set(HEADERS_COMMON
include/capstone/sh.h
include/capstone/tricore.h
include/capstone/platform.h
include/capstone/sh.h
)

set(TEST_SOURCES test_basic.c test_detail.c test_skipdata.c test_iter.c)
Expand All @@ -149,28 +152,30 @@ set(TEST_SOURCES test_basic.c test_detail.c test_skipdata.c test_iter.c)
if(CAPSTONE_ARM_SUPPORT)
add_definitions(-DCAPSTONE_HAS_ARM)
set(SOURCES_ARM
arch/ARM/ARMBaseInfo.c
arch/ARM/ARMDisassembler.c
arch/ARM/ARMDisassemblerExtension.c
arch/ARM/ARMInstPrinter.c
arch/ARM/ARMMapping.c
arch/ARM/ARMModule.c
)
set(HEADERS_ARM
arch/ARM/ARMAddressingModes.h
arch/ARM/ARMBaseInfo.h
arch/ARM/ARMDisassembler.h
arch/ARM/ARMDisassemblerExtension.h
arch/ARM/ARMInstPrinter.h
arch/ARM/ARMLinkage.h
arch/ARM/ARMMapping.h
arch/ARM/ARMGenAsmWriter.inc
arch/ARM/ARMGenDisassemblerTables.inc
arch/ARM/ARMGenInstrInfo.inc
arch/ARM/ARMGenRegisterInfo.inc
arch/ARM/ARMGenSubtargetInfo.inc
arch/ARM/ARMMappingInsn.inc
arch/ARM/ARMMappingInsnOp.inc
arch/ARM/ARMGenRegisterName.inc
arch/ARM/ARMGenRegisterName_digit.inc
arch/ARM/ARMGenCSFeatureName.inc
arch/ARM/ARMGenCSMappingInsn.inc
arch/ARM/ARMGenCSMappingInsnOp.inc
arch/ARM/ARMGenCSMappingInsnName.inc
arch/ARM/ARMGenSystemRegister.inc
arch/ARM/ARMMappingInsnName.inc
)
set(TEST_SOURCES ${TEST_SOURCES} test_arm.c)
endif()
Expand Down Expand Up @@ -696,6 +701,7 @@ if(CAPSTONE_INSTALL)
include("GNUInstallDirs")

install(FILES ${HEADERS_COMMON} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/capstone)
install(FILES ${HEADERS_INC} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/capstone/inc)

configure_file(capstone.pc.in ${CMAKE_BINARY_DIR}/capstone.pc @ONLY)
install(FILES ${CMAKE_BINARY_DIR}/capstone.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
Expand Down
25 changes: 25 additions & 0 deletions HACK.TXT
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,35 @@ Coding style
- C code follows Linux kernel coding style, using tabs for indentation.
- Python code uses 4 spaces for indentation.

Updating an Architecture
------------------------

The update tool for Capstone is called `auto-sync` and can be found in `suite/auto-sync`.

Not all architectures are supported yet.
Run `suite/auto-sync/Update-Arch.sh -h` to get a list of currently supported architectures.

The documentation how to update with `auto-sync` or refactor an architecture module
can be found in [docs/AutoSync.md](docs/AutoSync.md).

If a module does not support `auto-sync` yet, it is highly recommended to refactor it
instead of attempting to update it manually.
Refactoring will take less time and updates it during the procedure.

The one exception is `x86`. In LLVM we use several emitter backends to generate C code.
One of those LLVM backends (the `DecoderEmitter`) has two versions.
One for `x86` and another for all the other architectures.
Until now it was not worth it to refactoring this unique `x86` backend. So `x86` is not
supported currently.

Adding an architecture
----------------------

If your architecture is supported in LLVM or one of its forks, you can use `auto-sync` to
add the new module.

<!-- TODO: Move this info to the auto-sync docs -->

Obviously, you first need to write all the logic and put it in a new directory arch/newarch
Then, you have to modify other files.
(You can look for one architecture such as EVM in these files to get what you need to do)
Expand Down
14 changes: 13 additions & 1 deletion MCInst.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ void MCInst_Init(MCInst *inst)
inst->size = 0;
inst->has_imm = false;
inst->op1_size = 0;
inst->writeback = false;
inst->ac_idx = 0;
inst->popcode_adjust = 0;
inst->assembly[0] = '\0';
Expand Down Expand Up @@ -268,3 +267,16 @@ bool MCInst_opIsTying(const MCInst *MI, unsigned OpNum)
assert(OpNum < MAX_MC_OPS && "Maximum number of MC operands exceeded.");
return MI->tied_op_idx[OpNum] != -1;
}

/// Returns the value of the @MCInst operand at index @OpNum.
uint64_t MCInst_getOpVal(MCInst *MI, unsigned OpNum)
{
assert(OpNum < MAX_MC_OPS);
MCOperand *op = MCInst_getOperand(MI, OpNum);
if (MCOperand_isReg(op))
return MCOperand_getReg(op);
else if (MCOperand_isImm(op))
return MCOperand_getImm(op);
else
assert(0 && "Operand type not handled in this getter.");
}
3 changes: 2 additions & 1 deletion MCInst.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ struct MCOperand {
kDFPImmediate, ///< Double-Floating-point immediate operand.
kExpr, ///< Relocatable immediate operand.
kInst ///< Sub-instruction operand.

} MachineOperandType;
unsigned char Kind;

Expand Down Expand Up @@ -162,4 +161,6 @@ bool MCInst_opIsTied(const MCInst *MI, unsigned OpNum);

bool MCInst_opIsTying(const MCInst *MI, unsigned OpNum);

uint64_t MCInst_getOpVal(MCInst *MI, unsigned OpNum);

#endif
227 changes: 227 additions & 0 deletions MCInstPrinter.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
/* Capstone Disassembly Engine */
/* By Rot127 <[email protected]>, 2023 */

#include "MCInstPrinter.h"
#include "cs_priv.h"
#include <capstone/platform.h>

extern bool ARM_getFeatureBits(unsigned int mode, unsigned int feature);

static bool testFeatureBits(const MCInst *MI, uint32_t Value)
{
assert(MI && MI->csh);
switch (MI->csh->arch) {
default:
assert(0 && "Not implemented for current arch.");
case CS_ARCH_ARM:
return ARM_getFeatureBits(MI->csh->mode, Value);
}
}

static bool matchAliasCondition(MCInst *MI, const MCRegisterInfo *MRI,
unsigned *OpIdx, const AliasMatchingData *M,
const AliasPatternCond *C,
bool *OrPredicateResult)
{
// Feature tests are special, they don't consume operands.
if (C->Kind == AliasPatternCond_K_Feature)
return testFeatureBits(MI, C->Value);
if (C->Kind == AliasPatternCond_K_NegFeature)
return !testFeatureBits(MI, C->Value);
// For feature tests where just one feature is required in a list, set the
// predicate result bit to whether the expression will return true, and only
// return the real result at the end of list marker.
if (C->Kind == AliasPatternCond_K_OrFeature) {
*OrPredicateResult |= testFeatureBits(MI, C->Value);
return true;
}
if (C->Kind == AliasPatternCond_K_OrNegFeature) {
*OrPredicateResult |= !(testFeatureBits(MI, C->Value));
return true;
}
if (C->Kind == AliasPatternCond_K_EndOrFeatures) {
bool Res = *OrPredicateResult;
*OrPredicateResult = false;
return Res;
}

// Get and consume an operand.
MCOperand *Opnd = MCInst_getOperand(MI, *OpIdx);
++(*OpIdx);

// Check the specific condition for the operand.
switch (C->Kind) {
case AliasPatternCond_K_Imm:
// Operand must be a specific immediate.
return MCOperand_isImm(Opnd) &&
MCOperand_getImm(Opnd) == (int32_t)C->Value;
case AliasPatternCond_K_Reg:
// Operand must be a specific register.
return MCOperand_isReg(Opnd) && MCOperand_getReg(Opnd) == C->Value;
case AliasPatternCond_K_TiedReg:
// Operand must match the register of another operand.
return MCOperand_isReg(Opnd) &&
MCOperand_getReg(Opnd) ==
MCOperand_getReg(MCInst_getOperand(MI, C->Value));
case AliasPatternCond_K_RegClass:
// Operand must be a register in this class. Value is a register class
// id.
return MCOperand_isReg(Opnd) &&
MCRegisterClass_contains(
MCRegisterInfo_getRegClass(MRI, C->Value),
MCOperand_getReg(Opnd));
case AliasPatternCond_K_Custom:
// Operand must match some custom criteria.
assert(M->ValidateMCOperand && "A custom validator should be set but isn't.");
return M->ValidateMCOperand(Opnd, C->Value);
case AliasPatternCond_K_Ignore:
// Operand can be anything.
return true;
case AliasPatternCond_K_Feature:
case AliasPatternCond_K_NegFeature:
case AliasPatternCond_K_OrFeature:
case AliasPatternCond_K_OrNegFeature:
case AliasPatternCond_K_EndOrFeatures:
assert(0 && "handled earlier");
}
assert(0 && "invalid kind");
}

/// Check if PatternsForOpcode is all zero.
static inline bool validOpToPatter(const PatternsForOpcode *P)
{
return !(P->Opcode == 0 && P->PatternStart == 0 && P->NumPatterns == 0);
}

const char *matchAliasPatterns(MCInst *MI, const AliasMatchingData *M)
{
// TODO Rewrite to C

// auto It = lower_bound(M.OpToPatterns, MI->getOpcode(),
// [](const PatternsForOpcode &L, unsigned Opcode) {
// return L.Opcode < Opcode;
// });
// if (It == M.OpToPatterns.end() || It->Opcode != MI->getOpcode())
// return nullptr;

// Binary search by opcode. Return false if there are no aliases for this
// opcode.
unsigned MIOpcode = MI->Opcode;
size_t i = 0;
uint32_t PatternOpcode = M->OpToPatterns[i].Opcode;
while (PatternOpcode < MIOpcode && validOpToPatter(&M->OpToPatterns[i]))
PatternOpcode = M->OpToPatterns[++i].Opcode;
if (PatternOpcode != MI->Opcode || !validOpToPatter(&M->OpToPatterns[i]))
return NULL;

// // Try all patterns for this opcode.
uint32_t AsmStrOffset = ~0U;
const AliasPattern *Patterns = M->Patterns + M->OpToPatterns[i].PatternStart;
for (const AliasPattern *P = Patterns;
P != Patterns + M->OpToPatterns[i].NumPatterns; ++P) {
// Check operand count first.
if (MCInst_getNumOperands(MI) != P->NumOperands)
return NULL;

// Test all conditions for this pattern.
const AliasPatternCond *Conds = M->PatternConds + P->AliasCondStart;
unsigned OpIdx = 0;
bool OrPredicateResult = false;
bool allMatch = true;
for (const AliasPatternCond *C = Conds; C != Conds + P->NumConds; ++C) {
if (!matchAliasCondition(MI, MI->MRI, &OpIdx, M, C, &OrPredicateResult)) {
allMatch = false;
break;
}
}
if (allMatch) {
AsmStrOffset = P->AsmStrOffset;
break;
}
}
// If no alias matched, don't print an alias.
if (AsmStrOffset == ~0U)
return NULL;

// Go to offset AsmStrOffset and use the null terminated string there. The
// offset should point to the beginning of an alias string, so it should
// either be zero or be preceded by a null byte.
return M->AsmStrings + AsmStrOffset;
}

// TODO Add functionality to toggle the flag.
bool getUseMarkup(void) { return false; }

/// Utility functions to make adding mark ups simpler.
const char *markup(const char *s)
{
static const char *no_markup = "";
if (getUseMarkup())
return s;
else
return no_markup;
}

// binary search for encoding in IndexType array
// return -1 if not found, or index if found
unsigned int binsearch_IndexTypeEncoding(const struct IndexType *index, size_t size, uint16_t encoding)
{
// binary searching since the index is sorted in encoding order
size_t left, right, m;

right = size - 1;

if (encoding < index[0].encoding || encoding > index[right].encoding)
// not found
return -1;

left = 0;

while(left <= right) {
m = (left + right) / 2;
if (encoding == index[m].encoding) {
return m;
}

if (encoding < index[m].encoding)
right = m - 1;
else
left = m + 1;
}

// not found
return -1;
}

// binary search for encoding in IndexTypeStr array
// return -1 if not found, or index if found
unsigned int binsearch_IndexTypeStrEncoding(const struct IndexTypeStr *index, size_t size, const char *name)
{
// binary searching since the index is sorted in encoding order
size_t left, right, m;

right = size - 1;

size_t str_left_cmp = strcmp(name, index[0].name);
size_t str_right_cmp = strcmp(name, index[right].name);
if (str_left_cmp < 0 || str_right_cmp > 0)
// not found
return -1;

left = 0;

while(left <= right) {
m = (left + right) / 2;
if (strcmp(name, index[m].name) == 0) {
return m;
}

if (strcmp(name, index[m].name) < 0)
right = m - 1;
else
left = m + 1;
}

// not found
return -1;
}
Loading