Skip to content

Commit

Permalink
[AArch64] Rewrite stack frame handling for win64 vararg functions
Browse files Browse the repository at this point in the history
The previous attempt, which made do with a single offset in
computeCalleeSaveRegisterPairs, wasn't quite enough. The previous
attempt only worked as long as CombineSPBump == true (since the
offset would be adjusted later in fixupCalleeSaveRestoreStackOffset).

Instead include the size for the fixed stack area used for win64
varargs in calculations in emitPrologue/emitEpilogue. The stack
consists of mainly three parts;
- AFI->getLocalStackSize()
- AFI->getCalleeSavedStackSize()
- FixedObject

Most of the places in the code which previously used the CSStackSize
now use PrologueSaveSize instead, which is the sum of the latter
two, while some cases which need exactly the middle one use
AFI->getCalleeSavedStackSize() explicitly instead of a local variable.

In addition to moving the offsetting into emitPrologue/emitEpilogue
(which fixes functions with CombineSPBump == false), also set the
frame pointer to point to the right location, where the frame pointer
and link register actually are stored. In addition to the prologue/epilogue,
this also requires changes to resolveFrameIndexReference.

Add tests for a function that keeps a frame pointer and another one
that uses a VLA.

Differential Revision: https://reviews.llvm.org/D35919

llvm-svn: 309744
  • Loading branch information
mstorsjo committed Aug 1, 2017
1 parent 206f826 commit eacf4e4
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 26 deletions.
52 changes: 30 additions & 22 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,19 +506,23 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
return;
}

auto CSStackSize = AFI->getCalleeSavedStackSize();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;

auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
// All of the remaining stack allocations are for locals.
AFI->setLocalStackSize(NumBytes - CSStackSize);
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);

bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
if (CombineSPBump) {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
MachineInstr::FrameSetup);
NumBytes = 0;
} else if (CSStackSize != 0) {
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
-CSStackSize);
NumBytes -= CSStackSize;
-PrologueSaveSize);
NumBytes -= PrologueSaveSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");

Expand All @@ -532,8 +536,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
}
if (HasFP) {
// Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
int FPOffset = CSStackSize - 16;
// Only set up FP if we actually need to. Frame pointer is fp =
// sp - fixedobject - 16.
int FPOffset = AFI->getCalleeSavedStackSize() - 16;
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();

Expand Down Expand Up @@ -672,8 +677,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (HasFP) {
// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
nullptr, Reg, 2 * StackGrowth - FixedObject));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
Expand Down Expand Up @@ -759,12 +764,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
// it as the 2nd argument of AArch64ISD::TC_RETURN.

auto CSStackSize = AFI->getCalleeSavedStackSize();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;

auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);

if (!CombineSPBump && CSStackSize != 0)
if (!CombineSPBump && PrologueSaveSize != 0)
convertCalleeSaveRestoreToSPPrePostIncDec(
MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize);
MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);

// Move past the restores of the callee-saved registers.
MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
Expand All @@ -786,7 +795,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
return;
}

NumBytes -= CSStackSize;
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");

if (!hasFP(MF)) {
Expand All @@ -796,7 +805,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (RedZone && ArgumentPopSize == 0)
return;

bool NoCalleeSaveRestore = CSStackSize == 0;
bool NoCalleeSaveRestore = PrologueSaveSize == 0;
int StackRestoreBytes = RedZone ? 0 : NumBytes;
if (NoCalleeSaveRestore)
StackRestoreBytes += ArgumentPopSize;
Expand All @@ -815,7 +824,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// be able to save any instructions.
if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
-CSStackSize + 16, TII, MachineInstr::FrameDestroy);
-AFI->getCalleeSavedStackSize() + 16, TII,
MachineInstr::FrameDestroy);
else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
MachineInstr::FrameDestroy);
Expand Down Expand Up @@ -845,7 +855,11 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
int FPOffset = MFI.getObjectOffset(FI) + 16;
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
bool isFixed = MFI.isFixedObjectIndex(FI);

Expand Down Expand Up @@ -956,12 +970,6 @@ static void computeCalleeSaveRegisterPairs(
"Odd number of callee-saved regs to spill!");
int Offset = AFI->getCalleeSavedStackSize();

unsigned GPRSaveSize = AFI->getVarArgsGPRSize();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv());
if (IsWin64)
Offset -= alignTo(GPRSaveSize, 16);

for (unsigned i = 0; i < Count; ++i) {
RegPairInfo RPI;
RPI.Reg1 = CSI[i].getReg();
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

define win64cc void @pass_va(i32 %count, ...) nounwind {
entry:
; CHECK: sub sp, sp, #80
; CHECK: str x30, [sp, #-80]!
; CHECK: add x8, sp, #24
; CHECK: add x0, sp, #24
; CHECK: stp x6, x7, [sp, #64]
; CHECK: stp x4, x5, [sp, #48]
; CHECK: stp x2, x3, [sp, #32]
; CHECK: str x1, [sp, #24]
; CHECK: stp x30, x8, [sp]
; CHECK: str x8, [sp, #8]
; CHECK: bl other_func
; CHECK: ldr x30, [sp], #80
; CHECK: ret
Expand Down
111 changes: 109 additions & 2 deletions llvm/test/CodeGen/AArch64/win64_vararg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

define void @pass_va(i32 %count, ...) nounwind {
entry:
; CHECK: sub sp, sp, #80
; CHECK: str x30, [sp, #-80]!
; CHECK: add x8, sp, #24
; CHECK: add x0, sp, #24
; CHECK: stp x6, x7, [sp, #64]
; CHECK: stp x4, x5, [sp, #48]
; CHECK: stp x2, x3, [sp, #32]
; CHECK: str x1, [sp, #24]
; CHECK: stp x30, x8, [sp]
; CHECK: str x8, [sp, #8]
; CHECK: bl other_func
; CHECK: ldr x30, [sp], #80
; CHECK: ret
Expand Down Expand Up @@ -102,6 +102,113 @@ declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
declare i32 @__stdio_common_vsprintf(i64, i8*, i64, i8*, i8*, i8*) local_unnamed_addr #3
declare i64* @__local_stdio_printf_options() local_unnamed_addr #4

; CHECK-LABEL: fp
; CHECK: str x21, [sp, #-96]!
; CHECK: stp x20, x19, [sp, #16]
; CHECK: stp x29, x30, [sp, #32]
; CHECK: add x29, sp, #32
; CHECK: add x8, x29, #24
; CHECK: mov x19, x2
; CHECK: mov x20, x1
; CHECK: mov x21, x0
; CHECK: stp x6, x7, [x29, #48]
; CHECK: stp x4, x5, [x29, #32]
; CHECK: str x3, [x29, #24]
; CHECK: str x8, [sp, #8]
; CHECK: bl __local_stdio_printf_options
; CHECK: ldr x8, [x0]
; CHECK: add x5, x29, #24
; CHECK: mov x1, x21
; CHECK: mov x2, x20
; CHECK: orr x0, x8, #0x2
; CHECK: mov x3, x19
; CHECK: mov x4, xzr
; CHECK: bl __stdio_common_vsprintf
; CHECK: ldp x29, x30, [sp, #32]
; CHECK: ldp x20, x19, [sp, #16]
; CHECK: cmp w0, #0
; CHECK: csinv w0, w0, wzr, ge
; CHECK: ldr x21, [sp], #96
; CHECK: ret
define i32 @fp(i8*, i64, i8*, ...) local_unnamed_addr #6 {
%4 = alloca i8*, align 8
%5 = bitcast i8** %4 to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %5) #2
call void @llvm.va_start(i8* nonnull %5)
%6 = load i8*, i8** %4, align 8
%7 = call i64* @__local_stdio_printf_options() #2
%8 = load i64, i64* %7, align 8
%9 = or i64 %8, 2
%10 = call i32 @__stdio_common_vsprintf(i64 %9, i8* %0, i64 %1, i8* %2, i8* null, i8* %6) #2
%11 = icmp sgt i32 %10, -1
%12 = select i1 %11, i32 %10, i32 -1
call void @llvm.va_end(i8* nonnull %5)
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %5) #2
ret i32 %12
}

attributes #6 = { "no-frame-pointer-elim"="true" }

; CHECK-LABEL: vla
; CHECK: str x23, [sp, #-112]!
; CHECK: stp x22, x21, [sp, #16]
; CHECK: stp x20, x19, [sp, #32]
; CHECK: stp x29, x30, [sp, #48]
; CHECK: add x29, sp, #48
; CHECK: add x8, x29, #16
; CHECK: stur x8, [x29, #-40]
; CHECK: mov w8, w0
; CHECK: add x8, x8, #15
; CHECK: mov x9, sp
; CHECK: and x8, x8, #0x1fffffff0
; CHECK: sub x20, x9, x8
; CHECK: mov x19, x1
; CHECK: mov x23, sp
; CHECK: stp x6, x7, [x29, #48]
; CHECK: stp x4, x5, [x29, #32]
; CHECK: stp x2, x3, [x29, #16]
; CHECK: mov sp, x20
; CHECK: ldur x21, [x29, #-40]
; CHECK: sxtw x22, w0
; CHECK: bl __local_stdio_printf_options
; CHECK: ldr x8, [x0]
; CHECK: mov x1, x20
; CHECK: mov x2, x22
; CHECK: mov x3, x19
; CHECK: orr x0, x8, #0x2
; CHECK: mov x4, xzr
; CHECK: mov x5, x21
; CHECK: bl __stdio_common_vsprintf
; CHECK: mov sp, x23
; CHECK: sub sp, x29, #48
; CHECK: ldp x29, x30, [sp, #48]
; CHECK: ldp x20, x19, [sp, #32]
; CHECK: ldp x22, x21, [sp, #16]
; CHECK: ldr x23, [sp], #112
; CHECK: ret
define void @vla(i32, i8*, ...) local_unnamed_addr {
%3 = alloca i8*, align 8
%4 = bitcast i8** %3 to i8*
call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %4) #5
call void @llvm.va_start(i8* nonnull %4)
%5 = zext i32 %0 to i64
%6 = call i8* @llvm.stacksave()
%7 = alloca i8, i64 %5, align 1
%8 = load i8*, i8** %3, align 8
%9 = sext i32 %0 to i64
%10 = call i64* @__local_stdio_printf_options()
%11 = load i64, i64* %10, align 8
%12 = or i64 %11, 2
%13 = call i32 @__stdio_common_vsprintf(i64 %12, i8* nonnull %7, i64 %9, i8* %1, i8* null, i8* %8)
call void @llvm.va_end(i8* nonnull %4)
call void @llvm.stackrestore(i8* %6)
call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %4) #5
ret void
}

declare i8* @llvm.stacksave()
declare void @llvm.stackrestore(i8*)

; CHECK-LABEL: snprintf
; CHECK: sub sp, sp, #96
; CHECK: stp x21, x20, [sp, #16]
Expand Down

0 comments on commit eacf4e4

Please sign in to comment.