Skip to content

Commit be87959

Browse files
yavtukyavtuk
authored andcommitted
[bolt][aarch64] Change indirect call instrumentation snippet
Indirect call instrumentation snippet uses x16 register in exit handler to go to destination target __bolt_instr_ind_call_handler_func: msr nzcv, x1 ldp x0, x1, [sp], #16 ldr x16, [sp], #16 ldp x0, x1, [sp], #16 br x16 <----- This patch adds the instrumentation snippet by calling instrumentation runtime library through indirect call instruction and adding the wrapper to store/load target value and the register for original indirect instruction. Example: mov x16, foo infirectCall: adrp x8, Label add x8, x8, #:lo12:Label blr x8 Before: Instrumented indirect call: stp x0, x1, [sp, #-16]! mov x0, x8 movk x1, #0x0, lsl #48 movk x1, #0x0, lsl #32 movk x1, #0x0, lsl #16 movk x1, #0x0 stp x0, x1, [sp, #-16]! adrp x0, __bolt_instr_ind_call_handler_func add x0, x0, #:lo12:__bolt_instr_ind_call_handler_func blr x0 __bolt_instr_ind_call_handler: (exit snippet) msr nzcv, x1 ldp x0, x1, [sp], #16 ldr x16, [sp], #16 ldp x0, x1, [sp], #16 br x16 <- overwrites the original value in X16 __bolt_instr_ind_call_handler_func: (entry snippet) stp x0, x1, [sp, #-16]! mrs x1, nzcv adrp x0, __bolt_instr_ind_call_handler add x0, x0, x0, #:lo12:__bolt_instr_ind_call_handler ldr x0, [x0] cmp x0, #0x0 b.eq __bolt_instr_ind_call_handler str x30, [sp, #-16]! blr x0 <--- runtime lib store/load all regs ldr x30, [sp], #16 b __bolt_instr_ind_call_handler _________________________________________________________________________ After: mov x16, foo infirectCall: adrp x8, Label add x8, x8, #:lo12:Label blr x8 Instrumented indirect call: stp x0, x30, [sp, #-16]! movz/k x0, 1 stp x8, x0, [sp, #-16]! ; push address and id for lib adrp x8, __bolt_instr_ind_call_handler_func add x8, x8, #:lo12:__bolt_instr_ind_call_handler_func blr x8 <--- call trampoline instr lib ldr x8, [sp], #16 ; restore target address ldp x0, x30, [sp], #16 blr x8 <--- original indirect call instruction __bolt_instr_ind_call_handler: (exit snippet) ret <---- return to original function with indirect call __bolt_instr_ind_call_handler_func: (entry snippet) adrp x0, __bolt_instr_ind_call_handler add x0, x0, #:lo12:__bolt_instr_ind_call_handler ldr x0, [x0] cmp x0, #0x0 b.eq __bolt_instr_ind_call_handler str x30, [sp, #-16]! blr x0 <--- runtime lib store/load all regs ldr x30, [sp], #16 b __bolt_instr_ind_call_handler
1 parent c2b69b1 commit be87959

7 files changed

Lines changed: 202 additions & 82 deletions

File tree

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,11 @@ class MCPlusBuilder {
545545
llvm_unreachable("not implemented");
546546
}
547547

548+
virtual void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
549+
MCContext *Ctx) {
550+
llvm_unreachable("not implemented");
551+
}
552+
548553
virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
549554

550555
virtual unsigned getShortBranchOpcode(unsigned Opcode) const {
@@ -2399,7 +2404,7 @@ class MCPlusBuilder {
23992404

24002405
virtual InstructionListType
24012406
createInstrumentedIndirectCall(MCInst &&CallInst, MCSymbol *HandlerFuncAddr,
2402-
int CallSiteID, MCContext *Ctx) {
2407+
size_t CallSiteID, MCContext *Ctx) {
24032408
llvm_unreachable("not implemented");
24042409
return InstructionListType();
24052410
}

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 105 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,14 @@ static cl::opt<bool> NoLSEAtomics(
4848

4949
namespace {
5050

51-
static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
51+
[[maybe_unused]] static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
5252
Inst.setOpcode(AArch64::MRS);
5353
Inst.clear();
5454
Inst.addOperand(MCOperand::createReg(RegName));
5555
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
5656
}
5757

58-
static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
58+
[[maybe_unused]] static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
5959
Inst.setOpcode(AArch64::MSR);
6060
Inst.clear();
6161
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
@@ -2413,6 +2413,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
24132413
return isCompAndBranch(Inst);
24142414
}
24152415

2416+
void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
2417+
MCContext *Ctx) override {
2418+
Inst.setOpcode(AArch64::B);
2419+
Inst.clear();
2420+
Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
2421+
Inst, MCSymbolRefExpr::create(Target, *Ctx), *Ctx, 0)));
2422+
}
2423+
24162424
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
24172425
const MCSymbol *&TBB, const MCSymbol *&FBB,
24182426
MCInst *&CondBranch,
@@ -2770,21 +2778,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
27702778
}
27712779

27722780
InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
2773-
InstructionListType Insts(5);
27742781
// Code sequence for instrumented indirect call handler:
2775-
// msr nzcv, x1
2776-
// ldp x0, x1, [sp], #16
2777-
// ldr x16, [sp], #16
2778-
// ldp x0, x1, [sp], #16
2779-
// br x16
2780-
setSystemFlag(Insts[0], AArch64::X1);
2781-
createPopRegisters(Insts[1], AArch64::X0, AArch64::X1);
2782-
// Here we load address of the next function which should be called in the
2783-
// original binary to X16 register. Writing to X16 is permitted without
2784-
// needing to restore.
2785-
loadReg(Insts[2], AArch64::X16, AArch64::SP);
2786-
createPopRegisters(Insts[3], AArch64::X0, AArch64::X1);
2787-
createIndirectBranch(Insts[4], AArch64::X16, 0);
2782+
// ret
2783+
2784+
InstructionListType Insts;
2785+
2786+
Insts.emplace_back();
2787+
createReturn(Insts.back());
2788+
27882789
return Insts;
27892790
}
27902791

@@ -2837,14 +2838,28 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
28372838

28382839
InstructionListType createLoadImmediate(const MCPhysReg Dest,
28392840
uint64_t Imm) const override {
2840-
InstructionListType Insts(4);
2841-
int Shift = 48;
2842-
for (int I = 0; I < 4; I++, Shift -= 16) {
2843-
Insts[I].setOpcode(AArch64::MOVKXi);
2844-
Insts[I].addOperand(MCOperand::createReg(Dest));
2845-
Insts[I].addOperand(MCOperand::createReg(Dest));
2846-
Insts[I].addOperand(MCOperand::createImm((Imm >> Shift) & 0xFFFF));
2847-
Insts[I].addOperand(MCOperand::createImm(Shift));
2841+
InstructionListType Insts;
2842+
2843+
Insts.emplace_back();
2844+
MCInst &Inst = Insts.back();
2845+
Inst.clear();
2846+
Inst.setOpcode(AArch64::MOVZXi);
2847+
Inst.addOperand(MCOperand::createReg(Dest));
2848+
Inst.addOperand(MCOperand::createImm(Imm & 0xFFFF));
2849+
Inst.addOperand(MCOperand::createImm(0));
2850+
2851+
int Shift = 16;
2852+
for (int I = 0; I < 3; I++, Shift += 16) {
2853+
const uint64_t ImmVal = (Imm >> Shift) & 0xFFFF;
2854+
if (!ImmVal)
2855+
continue;
2856+
Insts.emplace_back();
2857+
MCInst &Inst = Insts.back();
2858+
Inst.setOpcode(AArch64::MOVKXi);
2859+
Inst.addOperand(MCOperand::createReg(Dest));
2860+
Inst.addOperand(MCOperand::createReg(Dest));
2861+
Inst.addOperand(MCOperand::createImm(ImmVal));
2862+
Inst.addOperand(MCOperand::createImm(Shift));
28482863
}
28492864
return Insts;
28502865
}
@@ -2858,41 +2873,48 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
28582873

28592874
InstructionListType createInstrumentedIndirectCall(MCInst &&CallInst,
28602875
MCSymbol *HandlerFuncAddr,
2861-
int CallSiteID,
2876+
size_t CallSiteID,
28622877
MCContext *Ctx) override {
2863-
InstructionListType Insts;
28642878
// Code sequence used to enter indirect call instrumentation helper:
2865-
// stp x0, x1, [sp, #-16]! createPushRegisters
2866-
// mov target x0 convertIndirectCallToLoad -> orr x0 target xzr
2867-
// mov x1 CallSiteID createLoadImmediate ->
2868-
// movk x1, #0x0, lsl #48
2869-
// movk x1, #0x0, lsl #32
2870-
// movk x1, #0x0, lsl #16
2871-
// movk x1, #0x0
2872-
// stp x0, x1, [sp, #-16]!
2873-
// bl *HandlerFuncAddr createIndirectCall ->
2874-
// adr x0 *HandlerFuncAddr -> adrp + add
2875-
// blr x0
2879+
// snippet requires 2 registers: target address and call site id
2880+
// stp CallIDReg, x30, [sp, #-16]!
2881+
// movz/k CallIDReg, CallSiteID
2882+
// stp TAReg, CallIDReg, [sp, #-16]! ; push address and id for lib
2883+
// adr + add TAReg, *HandlerFuncAddr ; __bolt_instr_ind_call_handler_func
2884+
// blr TAReg
2885+
// ldr TAReg, [sp], #16 ; restore target address
2886+
// ldp CallIDReg, x30, [sp], #16
2887+
// blr TAReg
2888+
2889+
const MCRegister TAReg = CallInst.getOperand(0).getReg();
2890+
const MCRegister CallIDReg =
2891+
TAReg != AArch64::X0 ? AArch64::X0 : AArch64::X1;
2892+
2893+
InstructionListType Insts;
28762894
Insts.emplace_back();
2877-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2878-
Insts.emplace_back(CallInst);
2879-
convertIndirectCallToLoad(Insts.back(), AArch64::X0);
2880-
InstructionListType LoadImm =
2881-
createLoadImmediate(getIntArgRegister(1), CallSiteID);
2895+
createPushRegisters(Insts.back(), CallIDReg, AArch64::LR);
2896+
2897+
InstructionListType LoadImm = createLoadImmediate(CallIDReg, CallSiteID);
28822898
Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
2899+
28832900
Insts.emplace_back();
2884-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2901+
createPushRegisters(Insts.back(), TAReg, CallIDReg);
2902+
28852903
Insts.resize(Insts.size() + 2);
2886-
InstructionListType Addr =
2887-
materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0);
2904+
InstructionListType Addr = materializeAddress(HandlerFuncAddr, Ctx, TAReg);
28882905
assert(Addr.size() == 2 && "Invalid Addr size");
28892906
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
2907+
2908+
Insts.emplace_back();
2909+
createIndirectCallInst(Insts.back(), false, TAReg);
2910+
2911+
Insts.emplace_back();
2912+
loadReg(Insts.back(), TAReg, getStackPointer());
2913+
28902914
Insts.emplace_back();
2891-
createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0);
2915+
createPopRegisters(Insts.back(), CallIDReg, AArch64::LR);
28922916

2893-
// Carry over metadata including tail call marker if present.
2894-
stripAnnotations(Insts.back());
2895-
moveAnnotations(std::move(CallInst), Insts.back());
2917+
Insts.emplace_back(CallInst);
28962918

28972919
return Insts;
28982920
}
@@ -2901,43 +2923,53 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
29012923
createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
29022924
const MCSymbol *IndCallHandler,
29032925
MCContext *Ctx) override {
2904-
// Code sequence used to check whether InstrTampoline was initialized
2926+
// Code sequence used to check whether InstrTrampoline was initialized
29052927
// and call it if so, returns via IndCallHandler
2906-
// stp x0, x1, [sp, #-16]!
2907-
// mrs x1, nzcv
2908-
// adr x0, InstrTrampoline -> adrp + add
2909-
// ldr x0, [x0]
2928+
// adrp x0, InstrTrampoline
2929+
// ldr x0, [x0, #lo12:InstrTrampoline]
29102930
// subs x0, x0, #0x0
29112931
// b.eq IndCallHandler
29122932
// str x30, [sp, #-16]!
29132933
// blr x0
29142934
// ldr x30, [sp], #16
29152935
// b IndCallHandler
29162936
InstructionListType Insts;
2937+
2938+
// load handler address
2939+
MCInst InstAdrp;
2940+
InstAdrp.setOpcode(AArch64::ADRP);
2941+
InstAdrp.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2942+
InstAdrp.addOperand(MCOperand::createImm(0));
2943+
setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, InstrTrampoline,
2944+
/* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
2945+
Insts.emplace_back(InstAdrp);
2946+
2947+
MCInst InstLoad;
2948+
InstLoad.setOpcode(AArch64::LDRXui);
2949+
InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2950+
InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
2951+
InstLoad.addOperand(MCOperand::createImm(0));
2952+
setOperandToSymbolRef(InstLoad, /* OpNum */ 2, InstrTrampoline,
2953+
/* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
2954+
Insts.emplace_back(InstLoad);
2955+
2956+
InstructionListType CmpJmp =
2957+
createCmpJE(getIntArgRegister(0), 0, IndCallHandler, Ctx);
2958+
Insts.insert(Insts.end(), CmpJmp.begin(), CmpJmp.end());
2959+
29172960
Insts.emplace_back();
2918-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2919-
Insts.emplace_back();
2920-
getSystemFlag(Insts.back(), getIntArgRegister(1));
2921-
Insts.emplace_back();
2922-
Insts.emplace_back();
2923-
InstructionListType Addr =
2924-
materializeAddress(InstrTrampoline, Ctx, AArch64::X0);
2925-
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
2926-
assert(Addr.size() == 2 && "Invalid Addr size");
2927-
Insts.emplace_back();
2928-
loadReg(Insts.back(), AArch64::X0, AArch64::X0);
2929-
InstructionListType cmpJmp =
2930-
createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx);
2931-
Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
2932-
Insts.emplace_back();
2933-
storeReg(Insts.back(), AArch64::LR, AArch64::SP);
2961+
storeReg(Insts.back(), AArch64::LR, getStackPointer());
2962+
29342963
Insts.emplace_back();
29352964
Insts.back().setOpcode(AArch64::BLR);
2936-
Insts.back().addOperand(MCOperand::createReg(AArch64::X0));
2965+
Insts.back().addOperand(MCOperand::createReg(getIntArgRegister(0)));
2966+
29372967
Insts.emplace_back();
2938-
loadReg(Insts.back(), AArch64::LR, AArch64::SP);
2968+
loadReg(Insts.back(), AArch64::LR, getStackPointer());
2969+
29392970
Insts.emplace_back();
2940-
createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
2971+
createDirectBranch(Insts.back(), IndCallHandler, Ctx);
2972+
29412973
return Insts;
29422974
}
29432975

bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,7 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
821821

822822
InstructionListType createInstrumentedIndirectCall(MCInst &&CallInst,
823823
MCSymbol *HandlerFuncAddr,
824-
int CallSiteID,
824+
size_t CallSiteID,
825825
MCContext *Ctx) override {
826826
// Code sequence used to enter indirect call instrumentation helper:
827827
// addi sp, sp, -0x10

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3123,7 +3123,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
31233123

31243124
InstructionListType createInstrumentedIndirectCall(MCInst &&CallInst,
31253125
MCSymbol *HandlerFuncAddr,
3126-
int CallSiteID,
3126+
size_t CallSiteID,
31273127
MCContext *Ctx) override {
31283128
// Check if the target address expression used in the original indirect call
31293129
// uses the stack pointer, which we are going to clobber.

bolt/runtime/instr.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1696,7 +1696,7 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
16961696
#if defined(__aarch64__)
16971697
// clang-format off
16981698
__asm__ __volatile__(SAVE_ALL
1699-
"ldp x0, x1, [sp, #288]\n"
1699+
"ldp x0, x1, [sp, #272]\n"
17001700
"bl instrumentIndirectCall\n"
17011701
RESTORE_ALL
17021702
"ret\n"
@@ -1733,7 +1733,7 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
17331733
#if defined(__aarch64__)
17341734
// clang-format off
17351735
__asm__ __volatile__(SAVE_ALL
1736-
"ldp x0, x1, [sp, #288]\n"
1736+
"ldp x0, x1, [sp, #272]\n"
17371737
"bl instrumentIndirectCall\n"
17381738
RESTORE_ALL
17391739
"ret\n"

bolt/runtime/sys_aarch64.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@
1818
"stp x24, x25, [sp, #-16]!\n" \
1919
"stp x26, x27, [sp, #-16]!\n" \
2020
"stp x28, x29, [sp, #-16]!\n" \
21-
"str x30, [sp,#-16]!\n"
21+
"mrs x29, nzcv\n" \
22+
"stp x29, x30, [sp, #-16]!\n"
2223
// Mirrors SAVE_ALL
2324
#define RESTORE_ALL \
24-
"ldr x30, [sp], #16\n" \
25+
"ldp x29, x30, [sp], #16\n" \
26+
"msr nzcv, x29\n" \
2527
"ldp x28, x29, [sp], #16\n" \
2628
"ldp x26, x27, [sp], #16\n" \
2729
"ldp x24, x25, [sp], #16\n" \

0 commit comments

Comments
 (0)