-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LoongArch] Insert nops and emit align reloc when handle alignment directive #72962
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,8 +16,11 @@ | |
| #include "llvm/MC/MCAssembler.h" | ||
| #include "llvm/MC/MCContext.h" | ||
| #include "llvm/MC/MCELFObjectWriter.h" | ||
| #include "llvm/MC/MCExpr.h" | ||
| #include "llvm/MC/MCSection.h" | ||
| #include "llvm/MC/MCValue.h" | ||
| #include "llvm/Support/EndianStream.h" | ||
| #include "llvm/Support/MathExtras.h" | ||
|
|
||
| #define DEBUG_TYPE "loongarch-asmbackend" | ||
|
|
||
|
|
@@ -174,6 +177,72 @@ void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, | |
| } | ||
| } | ||
|
|
||
| // Linker relaxation may change code size. We have to insert Nops | ||
| // for .align directive when linker relaxation enabled. So then Linker | ||
| // could satisfy alignment by removing Nops. | ||
| // The function returns the total Nops Size we need to insert. | ||
| bool LoongArchAsmBackend::shouldInsertExtraNopBytesForCodeAlign( | ||
| const MCAlignFragment &AF, unsigned &Size) { | ||
| // Calculate Nops Size only when linker relaxation enabled. | ||
| const MCSubtargetInfo *STI = AF.getSubtargetInfo(); | ||
| if (!STI->hasFeature(LoongArch::FeatureRelax)) | ||
| return false; | ||
|
|
||
| // Ignore alignment if the minimum Nop size is less than the MaxBytesToEmit. | ||
|
||
| const unsigned MinNopLen = 4; | ||
| if (AF.getMaxBytesToEmit() < MinNopLen) | ||
| return false; | ||
| Size = AF.getAlignment().value() - MinNopLen; | ||
| return AF.getAlignment() > MinNopLen; | ||
| } | ||
|
|
||
| // We need to insert R_LARCH_ALIGN relocation type to indicate the | ||
| // position of Nops and the total bytes of the Nops have been inserted | ||
| // when linker relaxation enabled. | ||
| // The function inserts fixup_loongarch_align fixup which eventually will | ||
| // transfer to R_LARCH_ALIGN relocation type. | ||
| // The improved R_LARCH_ALIGN requires symbol index. The lowest 8 bits of | ||
| // addend represent alignment and the other bits of addend represent the | ||
| // maximum number of bytes to emit. The maximum number of bytes is zero | ||
| // means ignore the emit limit. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems this is not the same as GAS.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The psABI docs don't mention it, but GAS and LD really does it. |
||
| bool LoongArchAsmBackend::shouldInsertFixupForCodeAlign( | ||
| MCAssembler &Asm, const MCAsmLayout &Layout, MCAlignFragment &AF) { | ||
| // Insert the fixup only when linker relaxation enabled. | ||
| const MCSubtargetInfo *STI = AF.getSubtargetInfo(); | ||
| if (!STI->hasFeature(LoongArch::FeatureRelax)) | ||
| return false; | ||
|
|
||
| // Calculate total Nops we need to insert. If there are none to insert | ||
| // then simply return. | ||
| unsigned Count; | ||
| if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count)) | ||
| return false; | ||
|
|
||
| MCSection *Sec = AF.getParent(); | ||
| MCContext &Ctx = Asm.getContext(); | ||
| const MCExpr *Dummy = MCConstantExpr::create(0, Ctx); | ||
| // Create fixup_loongarch_align fixup. | ||
| MCFixup Fixup = | ||
| MCFixup::create(0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_align)); | ||
| const MCSymbolRefExpr *MCSym = getSecToAlignSym()[Sec]; | ||
| if (MCSym == nullptr) { | ||
| // Create a symbol and make the value of symbol is zero. | ||
| MCSymbol *Sym = Ctx.createTempSymbol(".Lla-relax-align", false); | ||
|
||
| Sym->setFragment(&*Sec->getBeginSymbol()->getFragment()); | ||
| Asm.registerSymbol(*Sym); | ||
| MCSym = MCSymbolRefExpr::create(Sym, Ctx); | ||
| getSecToAlignSym()[Sec] = MCSym; | ||
| } | ||
|
|
||
| uint64_t FixedValue = 0; | ||
| unsigned Lo = Log2_64(Count) + 1; | ||
| unsigned Hi = AF.getMaxBytesToEmit() >= Count ? 0 : AF.getMaxBytesToEmit(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems this is not the same as GAS.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make things easier. They behave same when static-link. |
||
| MCValue Value = MCValue::get(MCSym, nullptr, Hi << 8 | Lo); | ||
| Asm.getWriter().recordRelocation(Asm, Layout, &AF, Fixup, Value, FixedValue); | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, | ||
| const MCFixup &Fixup, | ||
| const MCValue &Target, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,9 @@ | |
| #include "MCTargetDesc/LoongArchFixupKinds.h" | ||
| #include "MCTargetDesc/LoongArchMCTargetDesc.h" | ||
| #include "llvm/MC/MCAsmBackend.h" | ||
| #include "llvm/MC/MCExpr.h" | ||
| #include "llvm/MC/MCFixupKindInfo.h" | ||
| #include "llvm/MC/MCSection.h" | ||
| #include "llvm/MC/MCSubtargetInfo.h" | ||
|
|
||
| namespace llvm { | ||
|
|
@@ -27,6 +29,7 @@ class LoongArchAsmBackend : public MCAsmBackend { | |
| uint8_t OSABI; | ||
| bool Is64Bit; | ||
| const MCTargetOptions &TargetOptions; | ||
| DenseMap<MCSection *, const MCSymbolRefExpr *> SecToAlignSym; | ||
|
|
||
| public: | ||
| LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit, | ||
|
|
@@ -45,6 +48,15 @@ class LoongArchAsmBackend : public MCAsmBackend { | |
| uint64_t Value, bool IsResolved, | ||
| const MCSubtargetInfo *STI) const override; | ||
|
|
||
| // Return Size with extra Nop Bytes for alignment directive in code section. | ||
| bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF, | ||
| unsigned &Size) override; | ||
|
|
||
| // Insert target specific fixup type for alignment directive in code section. | ||
| bool shouldInsertFixupForCodeAlign(MCAssembler &Asm, | ||
| const MCAsmLayout &Layout, | ||
| MCAlignFragment &AF) override; | ||
|
|
||
| bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, | ||
| const MCValue &Target, | ||
| const MCSubtargetInfo *STI) override; | ||
|
|
@@ -75,6 +87,9 @@ class LoongArchAsmBackend : public MCAsmBackend { | |
| std::unique_ptr<MCObjectTargetWriter> | ||
| createObjectTargetWriter() const override; | ||
| const MCTargetOptions &getTargetOptions() const { return TargetOptions; } | ||
| DenseMap<MCSection *, const MCSymbolRefExpr *> &getSecToAlignSym() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This does not match binutils. Binutils does not generate
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I think there may be some potential errors if the section of the symbol ".Lla-relax-align" is inconsistent with the section of the alignment directive. And I submitted patch before (see [1]). For example if the section of the ".Lla-relax-align" is discard. And another example for linux kernel [2]. [1] https://sourceware.org/pipermail/binutils/2024-January/131615.html |
||
| return SecToAlignSym; | ||
| } | ||
| }; | ||
| } // end namespace llvm | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| # RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s \ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It could be better to add a description about the aim of this test. For example: |
||
| # RUN: | llvm-readobj -r - | FileCheck --check-prefixes=CHECK,RELAX %s | ||
| # RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s \ | ||
| # RUN: | llvm-readobj -r - | FileCheck %s | ||
|
|
||
| .section ".dummy", "a" | ||
| .L1: | ||
| la.pcrel $t0, sym | ||
| .p2align 3 | ||
| .L2: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the aim of this test is to check R_LARCH_ALIGN is not generated for non executable section, seems
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not that. Here we need to check the ADDSUB relocation are emitted (see llvm/test/MC/RISCV/align-non-executable.s). Because in AttemptToFoldSymbolOffsetDifference(), the aligment fragments in a section which has instructions cannot be folded. Maybe it can be improved in the future. |
||
| .dword .L2 - .L1 | ||
|
|
||
| # CHECK: Relocations [ | ||
| # CHECK-NEXT: Section ({{.*}}) .rela.dummy { | ||
| # CHECK-NEXT: 0x0 R_LARCH_PCALA_HI20 sym 0x0 | ||
| # RELAX-NEXT: 0x0 R_LARCH_RELAX - 0x0 | ||
| # CHECK-NEXT: 0x4 R_LARCH_PCALA_LO12 sym 0x0 | ||
| # RELAX-NEXT: 0x4 R_LARCH_RELAX - 0x0 | ||
| # RELAX-NEXT: 0x8 R_LARCH_ADD64 .L2 0x0 | ||
| # RELAX-NEXT: 0x8 R_LARCH_SUB64 .L1 0x0 | ||
| # CHECK-NEXT: } | ||
| # CHECK-NEXT: ] | ||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,53 @@ | ||||||||
| # RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s \ | ||||||||
|
||||||||
| # RUN: | llvm-readelf -rs - | FileCheck %s --check-prefix=NORELAX | ||||||||
| # RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s \ | ||||||||
| # RUN: | llvm-readelf -rs - | FileCheck %s --check-prefix=RELAX | ||||||||
| # RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s \ | ||||||||
| # RUN: | llvm-objdump -d - | FileCheck -check-prefix=RELAX-INST %s | ||||||||
|
|
||||||||
| # NORELAX: There are no relocations in this file. | ||||||||
| # NORELAX: Symbol table '.symtab' contains 1 entries: | ||||||||
|
|
||||||||
| # RELAX: 0000000000000000 0000000100000066 R_LARCH_ALIGN 0000000000000000 {{.*}} + 4 | ||||||||
| # RELAX-NEXT: 0000000000000010 0000000100000066 R_LARCH_ALIGN 0000000000000000 {{.*}} + 5 | ||||||||
| # RELAX-NEXT: 000000000000002c 0000000100000066 R_LARCH_ALIGN 0000000000000000 {{.*}} + 4 | ||||||||
| # RELAX-NEXT: 000000000000003c 0000000100000066 R_LARCH_ALIGN 0000000000000000 {{.*}} + b04 | ||||||||
| # RELAX-NEXT: 0000000000000048 0000000100000066 R_LARCH_ALIGN 0000000000000000 {{.*}} + 4 | ||||||||
| # RELAX-EMPTY: | ||||||||
| # RELAX: 0000000000000000 0000000200000066 R_LARCH_ALIGN 0000000000000000 <null> + 4 | ||||||||
| # RELAX-EMPTY: | ||||||||
| # RELAX: Symbol table '.symtab' contains 3 entries: | ||||||||
|
||||||||
| # RELAX: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND | ||||||||
| # RELAX-NEXT: 1: 0000000000000000 0 NOTYPE LOCAL DEFAULT 2 | ||||||||
| # RELAX-NEXT: 2: 0000000000000000 0 NOTYPE LOCAL DEFAULT 4 | ||||||||
|
|
||||||||
| .text | ||||||||
| .p2align 4 # A = 0x0 | ||||||||
| nop | ||||||||
|
||||||||
| .p2align 5 # B = A + 3 * NOP + NOP = 0x10 | ||||||||
| .p2align 4 # C = B + 7 * NOP = 0x2C | ||||||||
| nop | ||||||||
| .p2align 4, , 11 # D = C + 3 * NOP + NOP = 0x3C | ||||||||
| ## Not emit the third parameter. | ||||||||
|
||||||||
| .p2align 4, , 12 # E = D + 3 * NOP = 0x48 | ||||||||
| # END = E + 3 * NOP = 0x54 = 21 * NOP | ||||||||
|
|
||||||||
| ## Not emit R_LARCH_ALIGN if code alignment great than alignment directive. | ||||||||
|
||||||||
| ## Not emit R_LARCH_ALIGN if code alignment great than alignment directive. | |
| ## Not emit R_LARCH_ALIGN if alignment directive is less than or equal to minimum code alignment(a.k.a 4). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| ## Not emit R_LARCH_ALIGN if alignment directive with specific padding value. | |
| ## Not emit R_LARCH_ALIGN if alignment directive with specific padding value. | |
| ## The behavior is the same as GNU assembler. |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I got different output:
0000000000000000 <.text2>:
0: 00 00 40 03 nop
4: 00 00 40 03 nop
8: 00 00 40 03 nop
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't check <.text2>, because <.text2> is used to check symbol index.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why the last directive also take effect while not when relax is off. But this match RISCV and GAS.
But we can add an extra instruction at the end of line.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this case, it is both the first and the last instruction in this section. There are some cases which alignment directive can not-generate R_LARCH_ALIGN. It may be improved in the future. But now the alignment directive is generated almost unconditionally when relax is enabled. I'll add an extra instruction at the end of line.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can inline one-time-used variable
STI.