Skip to content

Commit

Permalink
[X86] WaitPKG instructions
Browse files Browse the repository at this point in the history
Three new instructions:

umonitor - Sets up a linear address range to be
monitored by hardware and activates the monitor.
The address range should be a writeback memory
caching type.

umwait - A hint that allows the processor to
stop instruction execution and enter an
implementation-dependent optimized state
until occurrence of a class of events.

tpause - Directs the processor to enter an
implementation-dependent optimized state
until the TSC reaches the value in EDX:EAX.

Also modifying the description of the mfence
instruction, as the rep prefix (0xF3) was allowed
before, which would conflict with umonitor during
disassembly.

Before:
$ echo 0xf3,0x0f,0xae,0xf0 | llvm-mc -disassemble
.text
mfence

After:
$ echo 0xf3,0x0f,0xae,0xf0 | llvm-mc -disassemble
.text
umonitor        %rax

Reviewers: craig.topper, zvi

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D45253

llvm-svn: 330462
  • Loading branch information
GBuella committed Apr 20, 2018
1 parent 041eb6f commit 31fa802
Show file tree
Hide file tree
Showing 17 changed files with 247 additions and 13 deletions.
11 changes: 11 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -6282,3 +6282,14 @@ let TargetPrefix = "x86" in {
def int_x86_cldemote : GCCBuiltin<"__builtin_ia32_cldemote">,
Intrinsic<[], [llvm_ptr_ty], []>;
}

//===----------------------------------------------------------------------===//
// Wait and pause enhancements
let TargetPrefix = "x86" in {
def int_x86_umonitor : GCCBuiltin<"__builtin_ia32_umonitor">,
Intrinsic<[], [llvm_ptr_ty], []>;
def int_x86_umwait : GCCBuiltin<"__builtin_ia32_umwait">,
Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_x86_tpause : GCCBuiltin<"__builtin_ia32_tpause">,
Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
}
1 change: 1 addition & 0 deletions llvm/lib/Support/Host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1);
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1);
Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save;
Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1);
Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1);
Expand Down
15 changes: 9 additions & 6 deletions llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -965,8 +965,6 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
break;
}

if (insn->hasAdSize)
attrMask |= ATTR_ADSIZE;
}

if (insn->rexPrefix & 0x08) {
Expand Down Expand Up @@ -1059,22 +1057,27 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
}

/*
* Absolute moves need special handling.
* Absolute moves and umonitor need special handling.
* -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
* inverted w.r.t.
* -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
* any position.
*/
if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) {
if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
(insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE))) {
/* Make sure we observed the prefixes in any position. */
if (insn->hasAdSize)
attrMask |= ATTR_ADSIZE;
if (insn->hasOpSize)
attrMask |= ATTR_OPSIZE;

/* In 16-bit, invert the attributes. */
if (insn->mode == MODE_16BIT)
attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE;
if (insn->mode == MODE_16BIT) {
attrMask ^= ATTR_ADSIZE;
/* The OpSize attribute is only valid with the absolute moves. */
if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
attrMask ^= ATTR_OPSIZE;
}

if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
"Write Back No Invalidate">;
def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
"Support RDPID instructions">;
def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
"Wait and pause enhancements">;
// On some processors, instructions that implicitly take two memory operands are
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
Expand Down Expand Up @@ -628,7 +630,8 @@ class TremontProc<string Name> : ProcModel<Name, SLMModel,
FeatureCLDEMOTE,
FeatureGFNI,
FeatureRDPID,
FeatureSGX
FeatureSGX,
FeatureWAITPKG
]>;
def : TremontProc<"tremont">;

Expand Down
30 changes: 25 additions & 5 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21393,17 +21393,35 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return SDValue();
}
case Intrinsic::x86_lwpins32:
case Intrinsic::x86_lwpins64: {
case Intrinsic::x86_lwpins64:
case Intrinsic::x86_umwait:
case Intrinsic::x86_tpause: {
SDLoc dl(Op);
SDValue Chain = Op->getOperand(0);
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
SDValue LwpIns =
DAG.getNode(X86ISD::LWPINS, dl, VTs, Chain, Op->getOperand(2),
unsigned Opcode;

switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic");
case Intrinsic::x86_umwait:
Opcode = X86ISD::UMWAIT;
break;
case Intrinsic::x86_tpause:
Opcode = X86ISD::TPAUSE;
break;
case Intrinsic::x86_lwpins32:
case Intrinsic::x86_lwpins64:
Opcode = X86ISD::LWPINS;
break;
}

SDValue Operation =
DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2),
Op->getOperand(3), Op->getOperand(4));
SDValue SetCC = getSETCC(X86::COND_B, LwpIns.getValue(0), dl, DAG);
SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, SetCC);
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
LwpIns.getValue(1));
Operation.getValue(1));
}
}
return SDValue();
Expand Down Expand Up @@ -25846,6 +25864,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::GF2P8AFFINEINVQB: return "X86ISD::GF2P8AFFINEINVQB";
case X86ISD::NT_CALL: return "X86ISD::NT_CALL";
case X86ISD::NT_BRIND: return "X86ISD::NT_BRIND";
case X86ISD::UMWAIT: return "X86ISD::UMWAIT";
case X86ISD::TPAUSE: return "X86ISD::TPAUSE";
}
return nullptr;
}
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,9 @@ namespace llvm {
// LWP insert record.
LWPINS,

// User level wait
UMWAIT, TPAUSE,

// Compare and swap.
LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
Expand Down
36 changes: 36 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,16 @@ def X86lwpins : SDNode<"X86ISD::LWPINS",
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>;

def X86umwait : SDNode<"X86ISD::UMWAIT",
SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
[SDNPHasChain, SDNPSideEffect]>;

def X86tpause : SDNode<"X86ISD::TPAUSE",
SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
[SDNPHasChain, SDNPSideEffect]>;

//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
//
Expand Down Expand Up @@ -891,6 +901,7 @@ def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
def HasCLWB : Predicate<"Subtarget->hasCLWB()">;
def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">;
def HasRDPID : Predicate<"Subtarget->hasRDPID()">;
def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
Expand Down Expand Up @@ -2639,6 +2650,31 @@ def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORXrrr)>,
def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>,
Requires<[ In64BitMode ]>;

//===----------------------------------------------------------------------===//
// WAITPKG Instructions
//
let SchedRW = [WriteSystem] in {
def UMONITOR16 : I<0xAE, MRM6r, (outs), (ins GR16:$src),
"umonitor\t$src", [(int_x86_umonitor GR16:$src)]>,
XS, AdSize16, Requires<[HasWAITPKG, Not64BitMode]>;
def UMONITOR32 : I<0xAE, MRM6r, (outs), (ins GR32:$src),
"umonitor\t$src", [(int_x86_umonitor GR32:$src)]>,
XS, AdSize32, Requires<[HasWAITPKG]>;
def UMONITOR64 : I<0xAE, MRM6r, (outs), (ins GR64:$src),
"umonitor\t$src", [(int_x86_umonitor GR64:$src)]>,
XS, AdSize64, Requires<[HasWAITPKG, In64BitMode]>;
let Uses = [EAX, EDX], Defs = [EFLAGS] in {
def UMWAIT : I<0xAE, MRM6r,
(outs), (ins GR32orGR64:$src), "umwait\t$src",
[(set EFLAGS, (X86umwait GR32orGR64:$src, EDX, EAX))]>,
XD, Requires<[HasWAITPKG]>;
def TPAUSE : I<0xAE, MRM6r,
(outs), (ins GR32orGR64:$src), "tpause\t$src",
[(set EFLAGS, (X86tpause GR32orGR64:$src, EDX, EAX))]>,
PD, Requires<[HasWAITPKG]>;
}
} // SchedRW

//===----------------------------------------------------------------------===//
// CLZERO Instruction
//
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -3106,7 +3106,7 @@ def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
def LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>,
TB, Requires<[HasSSE2]>;
def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>,
TB, Requires<[HasMFence]>;
PS, Requires<[HasMFence]>;
} // SchedRW

def : Pat<(X86MFence), (MFENCE)>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ void X86Subtarget::initializeEnvironment() {
HasCLWB = false;
HasWBNOINVD = false;
HasRDPID = false;
HasWAITPKG = false;
UseRetpoline = false;
UseRetpolineExternalThunk = false;
IsPMULLDSlow = false;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Processor support RDPID instruction
bool HasRDPID;

/// Processor supports WaitPKG instructions
bool HasWAITPKG;

/// Use a retpoline thunk rather than indirect calls to block speculative
/// execution.
bool UseRetpoline;
Expand Down Expand Up @@ -628,6 +631,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool hasCLWB() const { return HasCLWB; }
bool hasWBNOINVD() const { return HasWBNOINVD; }
bool hasRDPID() const { return HasRDPID; }
bool hasWAITPKG() const { return HasWAITPKG; }
bool useRetpoline() const { return UseRetpoline; }
bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }

Expand Down
67 changes: 67 additions & 0 deletions llvm/test/CodeGen/X86/waitpkg-intrinsics.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+waitpkg | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i386-pc-linux -mattr=+waitpkg | FileCheck %s --check-prefix=X32

define void @test_umonitor(i8* %address) {
; X64-LABEL: test_umonitor:
; X64: # %bb.0: # %entry
; X64-NEXT: umonitor %rdi
; X64-NEXT: retq
;
; X32-LABEL: test_umonitor:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: umonitor %eax
; X32-NEXT: retl
entry:
call void @llvm.x86.umonitor(i8* %address)
ret void
}

define i8 @test_umwait(i32 %control, i32 %counter_high, i32 %counter_low) {
; X64-LABEL: test_umwait:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %edx, %eax
; X64-NEXT: movl %esi, %edx
; X64-NEXT: umwait %edi
; X64-NEXT: setb %al
; X64-NEXT: retq
;
; X32-LABEL: test_umwait:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: umwait %ecx
; X32-NEXT: setb %al
; X32-NEXT: retl
entry:
call i8 @llvm.x86.umwait(i32 %control, i32 %counter_high, i32 %counter_low)
ret i8 %0
}

define i8 @test_tpause(i32 %control, i32 %counter_high, i32 %counter_low) {
; X64-LABEL: test_tpause:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %edx, %eax
; X64-NEXT: movl %esi, %edx
; X64-NEXT: tpause %edi
; X64-NEXT: setb %al
; X64-NEXT: retq
;
; X32-LABEL: test_tpause:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: tpause %ecx
; X32-NEXT: setb %al
; X32-NEXT: retl
entry:
call i8 @llvm.x86.tpause(i32 %control, i32 %counter_high, i32 %counter_low)
ret i8 %0
}

declare void @llvm.x86.umonitor(i8*)
declare i8 @llvm.x86.umwait(i32, i32, i32)
declare i8 @llvm.x86.tpause(i32, i32, i32)
6 changes: 6 additions & 0 deletions llvm/test/MC/Disassembler/X86/x86-16.txt
Original file line number Diff line number Diff line change
Expand Up @@ -794,3 +794,9 @@

# CHECK: wbnoinvd
0xf3 0x0f 0x09

# CHECK: umonitor %ax
0xf3 0x0f 0xae 0xf0

# CHECK: umonitor %eax
0x67 0xf3 0x0f 0xae 0xf0
12 changes: 12 additions & 0 deletions llvm/test/MC/Disassembler/X86/x86-32.txt
Original file line number Diff line number Diff line change
Expand Up @@ -847,3 +847,15 @@
0x0f 0xb7 0x00
# CHECK: movzww (%eax), %ax
0x66 0x0f 0xb7 0x00

# CHECK: umonitor %eax
0xf3 0x0f 0xae 0xf0

# CHECK: umonitor %ax
0x67 0xf3 0x0f 0xae 0xf0

# CHECK: umwait %eax
0xf2 0x0f 0xae 0xf0

# CHECK: tpause %eax
0x66 0x0f 0xae 0xf0
15 changes: 15 additions & 0 deletions llvm/test/MC/Disassembler/X86/x86-64.txt
Original file line number Diff line number Diff line change
Expand Up @@ -525,3 +525,18 @@

# CHECK: cldemote -559038737(%rbx,%rcx,8)
0x0f,0x1c,0x84,0xcb,0xef,0xbe,0xad,0xde

# CHECK: umonitor %rax
0xf3 0x0f 0xae 0xf0

# CHECK: umonitor %eax
0x67 0xf3 0x0f 0xae 0xf0

# CHECK: umonitor %r13
0xf3 0x41 0x0f 0xae 0xf5

# CHECK: umwait %r15
0xf2 0x41 0x0f 0xae 0xf7

# CHECK: tpause %r15
0x66 0x41 0x0f 0xae 0xf7
8 changes: 8 additions & 0 deletions llvm/test/MC/X86/x86-16.s
Original file line number Diff line number Diff line change
Expand Up @@ -973,3 +973,11 @@ data32 lgdt 4(%eax)
// CHECK: wbnoinvd
// CHECK: encoding: [0xf3,0x0f,0x09]
wbnoinvd

// CHECK: umonitor %ax
// CHECK: encoding: [0xf3,0x0f,0xae,0xf0]
umonitor %ax

// CHECK: umonitor %eax
// CHECK: encoding: [0x67,0xf3,0x0f,0xae,0xf0]
umonitor %eax
16 changes: 16 additions & 0 deletions llvm/test/MC/X86/x86-32-coverage.s
Original file line number Diff line number Diff line change
Expand Up @@ -10752,3 +10752,19 @@ btcl $4, (%eax)
// CHECK: cldemote 3735928559(%ebx,%ecx,8)
// CHECK: encoding: [0x0f,0x1c,0x84,0xcb,0xef,0xbe,0xad,0xde]
cldemote 0xdeadbeef(%ebx,%ecx,8)

// CHECK: umonitor %eax
// CHECK: encoding: [0xf3,0x0f,0xae,0xf0]
umonitor %eax

// CHECK: umonitor %ax
// CHECK: encoding: [0x67,0xf3,0x0f,0xae,0xf0]
umonitor %ax

// CHECK: umwait %eax
// CHECK: encoding: [0xf2,0x0f,0xae,0xf0]
umwait %eax

// CHECK: tpause %eax
// CHECK: encoding: [0x66,0x0f,0xae,0xf0]
tpause %eax
Loading

0 comments on commit 31fa802

Please sign in to comment.