-
Notifications
You must be signed in to change notification settings - Fork 12.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ARM] Modify codegen for memcpy intrinsic to prefer LDM/STM.
We were previously codegen'ing memcpy as regular load/store operations and hoping that the register allocator would allocate registers in ascending order so that we could apply an LDM/STM combine after register allocation. According to the commit that first introduced this code (r37179), we planned to teach the register allocator to allocate the registers in ascending order. This never got implemented, and up to now we've been stuck with very poor codegen. A much simpler approach for achieving better codegen is to create MEMCPY pseudo instructions, attach scratch virtual registers to them and then, post register allocation, expand the MEMCPYs into LDM/STM pairs using the scratch registers. The register allocator will have picked arbitrary registers which we sort when expanding the MEMCPY. This approach also avoids the need to repeatedly calculate offsets which ultimately ought to be eliminated pre-RA in order to decrease register pressure. Fixes PR9199 and PR23768. [This is based on Peter Collingbourne's r238473 which was reverted.] Differential Revision: http://reviews.llvm.org/D13239 Change-Id: I727543c2e94136e0f80b8e22d5642d7b9ee5b458 Author: Peter Collingbourne <[email protected]> llvm-svn: 249322
- Loading branch information
Scott Douglass
committed
Oct 5, 2015
1 parent
3fd6c11
commit 953f908
Showing
14 changed files
with
520 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
; RUN: llc -mtriple armv7a-none-eabi -mattr=-neon < %s -verify-machineinstrs -o - | FileCheck %s | ||
|
||
; Thumb1 (thumbv6m) is tested in tests/Thumb | ||
|
||
@a = external global i32* | ||
@b = external global i32* | ||
|
||
; Function Attrs: nounwind | ||
define void @foo24() #0 { | ||
entry: | ||
; CHECK-LABEL: foo24: | ||
; We use '[rl0-9]*' to allow 'r0'..'r12', 'lr' | ||
; CHECK: movt [[LB:[rl0-9]+]], :upper16:b | ||
; CHECK: movt [[SB:[rl0-9]+]], :upper16:a | ||
; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4 | ||
; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4 | ||
; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]], [[R6:[rl0-9]+]]} | ||
; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]], [[R6]]} | ||
%0 = load i32*, i32** @a, align 4 | ||
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1 | ||
%1 = bitcast i32* %arrayidx to i8* | ||
%2 = load i32*, i32** @b, align 4 | ||
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 | ||
%3 = bitcast i32* %arrayidx1 to i8* | ||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false) | ||
ret void | ||
} | ||
|
||
define void @foo28() #0 { | ||
entry: | ||
; CHECK-LABEL: foo28: | ||
; CHECK: movt [[LB:[rl0-9]+]], :upper16:b | ||
; CHECK: movt [[SB:[rl0-9]+]], :upper16:a | ||
; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4 | ||
; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4 | ||
; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]]} | ||
; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]]} | ||
; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]} | ||
; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]]} | ||
%0 = load i32*, i32** @a, align 4 | ||
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1 | ||
%1 = bitcast i32* %arrayidx to i8* | ||
%2 = load i32*, i32** @b, align 4 | ||
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 | ||
%3 = bitcast i32* %arrayidx1 to i8* | ||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false) | ||
ret void | ||
} | ||
|
||
define void @foo32() #0 { | ||
entry: | ||
; CHECK-LABEL: foo32: | ||
; CHECK: movt [[LB:[rl0-9]+]], :upper16:b | ||
; CHECK: movt [[SB:[rl0-9]+]], :upper16:a | ||
; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4 | ||
; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4 | ||
; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]} | ||
; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]} | ||
; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]} | ||
; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]]} | ||
%0 = load i32*, i32** @a, align 4 | ||
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1 | ||
%1 = bitcast i32* %arrayidx to i8* | ||
%2 = load i32*, i32** @b, align 4 | ||
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 | ||
%3 = bitcast i32* %arrayidx1 to i8* | ||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false) | ||
ret void | ||
} | ||
|
||
define void @foo36() #0 { | ||
entry: | ||
; CHECK-LABEL: foo36: | ||
; CHECK: movt [[LB:[rl0-9]+]], :upper16:b | ||
; CHECK: movt [[SB:[rl0-9]+]], :upper16:a | ||
; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4 | ||
; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4 | ||
; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]} | ||
; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]} | ||
; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]]} | ||
; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]]} | ||
%0 = load i32*, i32** @a, align 4 | ||
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1 | ||
%1 = bitcast i32* %arrayidx to i8* | ||
%2 = load i32*, i32** @b, align 4 | ||
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 | ||
%3 = bitcast i32* %arrayidx1 to i8* | ||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false) | ||
ret void | ||
} | ||
|
||
; Function Attrs: nounwind | ||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 |
Oops, something went wrong.