Skip to content

Commit d803472

Browse files
committed
codegen: fix optimization misses from IR changes
- dereferenceable attribute requires align attribute (if eltype is not sized) or it is simply ignored - tbaa_const isn't necessarily sufficient to allow LICM hoisting. an unknown call instruction (such as julia.gc_alloc_obj) present in the same loop can prevent TBAA from declaring the memory to be immutable. However, the invariant.load attribute is much stronger: additionally mark all tbaa_const loads with MD_immutable_load, permitting llvm to reorder and left them them much more freely (requiring only dereferenceable).
1 parent 808f266 commit d803472

6 files changed

Lines changed: 54 additions & 136 deletions

File tree

src/cgutils.cpp

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44

55
static Instruction *tbaa_decorate(MDNode *md, Instruction *load_or_store)
66
{
7-
load_or_store->setMetadata( llvm::LLVMContext::MD_tbaa, md );
7+
load_or_store->setMetadata(llvm::LLVMContext::MD_tbaa, md);
8+
if (isa<LoadInst>(load_or_store) && md == tbaa_const)
9+
load_or_store->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(md->getContext(), None));
810
return load_or_store;
911
}
1012

@@ -358,20 +360,29 @@ static size_t dereferenceable_size(jl_value_t *jt)
358360
}
359361
}
360362

363+
// If given alignment is 0 and LLVM's assumed alignment for a load/store via ptr
364+
// might be stricter than the Julia alignment for jltype, return the alignment of jltype.
365+
// Otherwise return the given alignment.
366+
static unsigned julia_alignment(jl_value_t *jltype)
367+
{
368+
unsigned alignment = jl_datatype_align(jltype);
369+
assert(alignment <= JL_HEAP_ALIGNMENT);
370+
assert(JL_HEAP_ALIGNMENT % alignment == 0);
371+
return alignment;
372+
}
373+
361374
static inline void maybe_mark_argument_dereferenceable(Argument *A, jl_value_t *jt)
362375
{
363-
auto F = A->getParent();
376+
AttrBuilder B;
377+
B.addAttribute(Attribute::NonNull);
364378
// The `dereferencable` below does not imply `nonnull` for non addrspace(0) pointers.
365-
#if JL_LLVM_VERSION >= 50000
366-
F->addParamAttr(A->getArgNo(), Attribute::NonNull);
367-
#else
368-
F->setAttributes(F->getAttributes().addAttribute(jl_LLVMContext, A->getArgNo() + 1,
369-
Attribute::NonNull));
370-
#endif
371379
size_t size = dereferenceable_size(jt);
372-
if (!size)
373-
return;
374-
F->addDereferenceableAttr(A->getArgNo() + 1, size);
380+
if (size) {
381+
B.addDereferenceableAttr(size);
382+
if (!A->getType()->getPointerElementType()->isSized()) // mimic LLVM Loads.cpp isAligned
383+
B.addAlignmentAttr(julia_alignment(jt));
384+
}
385+
A->addAttrs(B);
375386
}
376387

377388
static inline Instruction *maybe_mark_load_dereferenceable(Instruction *LI, bool can_be_null,
@@ -1229,19 +1240,6 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
12291240
return im1;
12301241
}
12311242

1232-
// If given alignment is 0 and LLVM's assumed alignment for a load/store via ptr
1233-
// might be stricter than the Julia alignment for jltype, return the alignment of jltype.
1234-
// Otherwise return the given alignment.
1235-
static unsigned julia_alignment(jl_value_t *jltype, unsigned alignment)
1236-
{
1237-
if (!alignment) {
1238-
alignment = jl_datatype_align(jltype);
1239-
assert(alignment <= JL_HEAP_ALIGNMENT);
1240-
assert(JL_HEAP_ALIGNMENT % alignment == 0);
1241-
}
1242-
return alignment;
1243-
}
1244-
12451243
static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest = NULL, MDNode *tbaa_dest = nullptr, bool isVolatile = false);
12461244

12471245
static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, jl_value_t *jltype,
@@ -1267,8 +1265,8 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
12671265
// elt = data;
12681266
//}
12691267
//else {
1270-
Instruction *load = ctx.builder.CreateAlignedLoad(data, isboxed ?
1271-
alignment : julia_alignment(jltype, alignment), false);
1268+
Instruction *load = ctx.builder.CreateAlignedLoad(data, isboxed || alignment ?
1269+
alignment : julia_alignment(jltype), false);
12721270
if (isboxed)
12731271
load = maybe_mark_load_dereferenceable(load, true, jltype);
12741272
if (tbaa) {
@@ -1316,7 +1314,7 @@ static void typed_store(jl_codectx_t &ctx,
13161314
}
13171315
if (idx_0based)
13181316
data = ctx.builder.CreateInBoundsGEP(r->getType(), data, idx_0based);
1319-
Instruction *store = ctx.builder.CreateAlignedStore(r, data, isboxed ? alignment : julia_alignment(jltype, alignment));
1317+
Instruction *store = ctx.builder.CreateAlignedStore(r, data, isboxed || alignment ? alignment : julia_alignment(jltype));
13201318
if (tbaa)
13211319
tbaa_decorate(tbaa, store);
13221320
}
@@ -2229,7 +2227,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
22292227
else {
22302228
Value *src_ptr = data_pointer(ctx, src);
22312229
unsigned nb = jl_datatype_size(typ);
2232-
unsigned alignment = julia_alignment(typ, 0);
2230+
unsigned alignment = julia_alignment(typ);
22332231
Value *nbytes = ConstantInt::get(T_size, nb);
22342232
if (skip) {
22352233
// TODO: this Select is very bad for performance, but is necessary to work around LLVM bugs with the undef option that we want to use:
@@ -2256,7 +2254,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
22562254
bool allunboxed = for_each_uniontype_small(
22572255
[&](unsigned idx, jl_datatype_t *jt) {
22582256
unsigned nb = jl_datatype_size(jt);
2259-
unsigned alignment = julia_alignment((jl_value_t*)jt, 0);
2257+
unsigned alignment = julia_alignment((jl_value_t*)jt);
22602258
BasicBlock *tempBB = BasicBlock::Create(jl_LLVMContext, "union_move", ctx.f);
22612259
ctx.builder.SetInsertPoint(tempBB);
22622260
switchInst->addCase(ConstantInt::get(T_int8, idx), tempBB);

src/codegen.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4186,12 +4186,12 @@ static void emit_cfunc_invalidate(
41864186
}
41874187
else {
41884188
gf_ret = emit_bitcast(ctx, gf_ret, gfrt->getPointerTo());
4189-
ctx.builder.CreateRet(ctx.builder.CreateAlignedLoad(gf_ret, julia_alignment(astrt, 0)));
4189+
ctx.builder.CreateRet(ctx.builder.CreateAlignedLoad(gf_ret, julia_alignment(astrt)));
41904190
}
41914191
break;
41924192
}
41934193
case jl_returninfo_t::SRet: {
4194-
emit_memcpy(ctx, &*gf_thunk->arg_begin(), nullptr, gf_ret, nullptr, jl_datatype_size(astrt), julia_alignment(astrt, 0));
4194+
emit_memcpy(ctx, &*gf_thunk->arg_begin(), nullptr, gf_ret, nullptr, jl_datatype_size(astrt), julia_alignment(astrt));
41954195
ctx.builder.CreateRetVoid();
41964196
break;
41974197
}
@@ -5038,7 +5038,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, const jl_returnin
50385038
if (lty != NULL && !isboxed) {
50395039
theArg = decay_derived(emit_bitcast(ctx, theArg, PointerType::get(lty, 0)));
50405040
if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers
5041-
theArg = ctx.builder.CreateAlignedLoad(theArg, julia_alignment(ty, 0));
5041+
theArg = ctx.builder.CreateAlignedLoad(theArg, julia_alignment(ty));
50425042
}
50435043
assert(dyn_cast<UndefValue>(theArg) == NULL);
50445044
args[idx] = theArg;
@@ -6134,7 +6134,7 @@ static std::unique_ptr<Module> emit_function(
61346134
if (returninfo.cc == jl_returninfo_t::SRet) {
61356135
assert(jl_is_concrete_type(jlrettype));
61366136
emit_memcpy(ctx, sret, nullptr, retvalinfo, jl_datatype_size(jlrettype),
6137-
julia_alignment(jlrettype, 0));
6137+
julia_alignment(jlrettype));
61386138
}
61396139
else { // must be jl_returninfo_t::Union
61406140
emit_unionmove(ctx, sret, nullptr, retvalinfo, /*skip*/isboxed_union);

src/intrinsics.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
367367
return NULL;
368368
}
369369

370-
unsigned alignment = julia_alignment(jt, 0);
370+
unsigned alignment = julia_alignment(jt);
371371
Type *ptype = to->getPointerTo();
372372
if (dest) {
373373
emit_memcpy(ctx, dest, tbaa_dest, p, x.tbaa, jl_datatype_size(jt), alignment, false);

src/llvm-late-gc-lowering.cpp

Lines changed: 16 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -31,49 +31,6 @@
3131

3232
using namespace llvm;
3333

34-
namespace {
35-
#if JL_LLVM_VERSION < 50000
36-
static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
37-
GlobalVariable *GV = M.getGlobalVariable(Name);
38-
SmallPtrSet<Constant *, 16> InitAsSet;
39-
SmallVector<Constant *, 16> Init;
40-
if (GV) {
41-
ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
42-
for (auto &Op : CA->operands()) {
43-
Constant *C = cast_or_null<Constant>(Op);
44-
if (InitAsSet.insert(C).second)
45-
Init.push_back(C);
46-
}
47-
GV->eraseFromParent();
48-
}
49-
50-
Type *Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext());
51-
for (auto *V : Values) {
52-
Constant *C = ConstantExpr::getBitCast(V, Int8PtrTy);
53-
if (InitAsSet.insert(C).second)
54-
Init.push_back(C);
55-
}
56-
57-
if (Init.empty())
58-
return;
59-
60-
ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size());
61-
GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
62-
ConstantArray::get(ATy, Init), Name);
63-
GV->setSection("llvm.metadata");
64-
}
65-
66-
static void append_to_compiler_used(Module &M, ArrayRef<GlobalValue *> Values) {
67-
appendToUsedList(M, "llvm.compiler.used", Values);
68-
}
69-
#else
70-
static void append_to_compiler_used(Module &M, ArrayRef<GlobalValue *> Values)
71-
{
72-
appendToCompilerUsed(M, Values);
73-
}
74-
#endif
75-
}
76-
7734
/* Julia GC Root Placement pass. For a general overview of the design of GC
7835
root lowering, see the devdocs. This file is the actual implementation.
7936
@@ -340,16 +297,6 @@ namespace llvm {
340297
void initializeLateLowerGCFramePass(PassRegistry &Registry);
341298
}
342299

343-
template<typename T>
344-
static void addReturnAttr(T *f, Attribute::AttrKind Kind)
345-
{
346-
#if JL_LLVM_VERSION >= 50000
347-
f->addAttribute(AttributeList::ReturnIndex, Kind);
348-
#else
349-
f->addAttribute(AttributeSet::ReturnIndex, Kind);
350-
#endif
351-
}
352-
353300
extern std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false);
354301
struct LateLowerGCFrame: public FunctionPass {
355302
static char ID;
@@ -822,6 +769,8 @@ JL_USED_FUNC static void dumpLivenessState(Function &F, State &S) {
822769
// jtbaa_immut.
823770
static bool isLoadFromImmut(LoadInst *LI)
824771
{
772+
if (LI->getMetadata(LLVMContext::MD_invariant_load))
773+
return true;
825774
MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa);
826775
if (!TBAA)
827776
return false;
@@ -1573,7 +1522,6 @@ Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Value *V)
15731522
// that's initialized by `addrspacecast`. Such a global variable is not supported by the backend.
15741523
// This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled
15751524
// for NI pointers.
1576-
#if JL_LLVM_VERSION >= 40000
15771525
static SmallVector<int, 1> *FindRefinements(Value *V, State *S)
15781526
{
15791527
if (!S)
@@ -1593,12 +1541,6 @@ static bool IsPermRooted(Value *V, State *S)
15931541
return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2;
15941542
return false;
15951543
}
1596-
#else
1597-
static bool IsPermRooted(Value *V, State *S)
1598-
{
1599-
return false;
1600-
}
1601-
#endif
16021544

16031545
static inline void UpdatePtrNumbering(Value *From, Value *To, State *S)
16041546
{
@@ -1625,11 +1567,8 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
16251567
AllocaInst *Frame = nullptr;
16261568
if (T_prjlvalue) {
16271569
T_pprjlvalue = T_prjlvalue->getPointerTo();
1628-
Frame = new AllocaInst(T_prjlvalue,
1629-
#if JL_LLVM_VERSION >= 50000
1630-
0,
1631-
#endif
1632-
ConstantInt::get(T_int32, maxframeargs), "", StartOff);
1570+
Frame = new AllocaInst(T_prjlvalue, 0,
1571+
ConstantInt::get(T_int32, maxframeargs), "", StartOff);
16331572
}
16341573
SmallVector<CallInst*, 16> write_barriers;
16351574
for (BasicBlock &BB : F) {
@@ -1670,8 +1609,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
16701609
auto pool_osize = ConstantInt::get(T_int32, osize);
16711610
newI = builder.CreateCall(pool_alloc_func, {ptls, pool_offs, pool_osize});
16721611
}
1673-
addReturnAttr(newI, Attribute::NoAlias);
1674-
addReturnAttr(newI, Attribute::NonNull);
1612+
newI->setAttributes(newI->getCalledFunction()->getAttributes());
16751613
newI->takeName(CI);
16761614
auto store = builder.CreateStore(CI->getArgOperand(2),
16771615
EmitTagPtr(builder, T_prjlvalue, newI));
@@ -1726,26 +1664,10 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
17261664
CallInst *NewCall = CallInst::Create(newFptr, ReplacementArgs, "", CI);
17271665
NewCall->setTailCallKind(CI->getTailCallKind());
17281666
auto old_attrs = CI->getAttributes();
1729-
#if JL_LLVM_VERSION >= 50000
17301667
NewCall->setAttributes(AttributeList::get(CI->getContext(),
17311668
old_attrs.getFnAttributes(),
17321669
old_attrs.getRetAttributes(), {}));
1733-
#else
1734-
AttributeSet attr;
1735-
attr = attr.addAttributes(CI->getContext(), AttributeSet::ReturnIndex,
1736-
old_attrs.getRetAttributes())
1737-
.addAttributes(CI->getContext(), AttributeSet::FunctionIndex,
1738-
old_attrs.getFnAttributes());
1739-
NewCall->setAttributes(attr);
1740-
#endif
1741-
#if JL_LLVM_VERSION >= 40000
17421670
NewCall->copyMetadata(*CI);
1743-
#else
1744-
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
1745-
CI->getAllMetadata(MDs);
1746-
for (auto MD : MDs)
1747-
NewCall->setMetadata(MD.first, MD.second);
1748-
#endif
17491671
CI->replaceAllUsesWith(NewCall);
17501672
UpdatePtrNumbering(CI, NewCall, S);
17511673
} else if (CI->getNumArgOperands() == CI->getNumOperands()) {
@@ -1755,14 +1677,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
17551677
} else {
17561678
CallInst *NewCall = CallInst::Create(CI, None, CI);
17571679
NewCall->takeName(CI);
1758-
#if JL_LLVM_VERSION >= 40000
17591680
NewCall->copyMetadata(*CI);
1760-
#else
1761-
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
1762-
CI->getAllMetadata(MDs);
1763-
for (auto MD : MDs)
1764-
NewCall->setMetadata(MD.first, MD.second);
1765-
#endif
17661681
CI->replaceAllUsesWith(NewCall);
17671682
UpdatePtrNumbering(CI, NewCall, S);
17681683
}
@@ -1910,11 +1825,8 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
19101825
if (MaxColor != -1 || S.Allocas.size() != 0) {
19111826
unsigned NRoots = MaxColor + 1 + S.Allocas.size();
19121827
// Create GC Frame
1913-
AllocaInst *gcframe = new AllocaInst(T_prjlvalue,
1914-
#if JL_LLVM_VERSION >= 50000
1915-
0,
1916-
#endif
1917-
ConstantInt::get(T_int32, NRoots+2), "gcframe");
1828+
AllocaInst *gcframe = new AllocaInst(T_prjlvalue, 0,
1829+
ConstantInt::get(T_int32, NRoots + 2), "gcframe");
19181830
gcframe->insertBefore(&*F->getEntryBlock().begin());
19191831
// Zero out gcframe
19201832
BitCastInst *tempSlot_i8 = new BitCastInst(gcframe, Type::getInt8PtrTy(F->getContext()), "");
@@ -2009,17 +1921,21 @@ bool LateLowerGCFrame::doInitialization(Module &M) {
20091921
args.push_back(T_int32);
20101922
pool_alloc_func = Function::Create(FunctionType::get(T_prjlvalue, args, false),
20111923
Function::ExternalLinkage, "jl_gc_pool_alloc", &M);
2012-
addReturnAttr(pool_alloc_func, Attribute::NoAlias);
2013-
addReturnAttr(pool_alloc_func, Attribute::NonNull);
1924+
pool_alloc_func->setAttributes(AttributeList::get(M.getContext(),
1925+
alloc_obj_func->getAttributes().getFnAttributes(),
1926+
alloc_obj_func->getAttributes().getRetAttributes(),
1927+
None));
20141928
}
20151929
if (!(big_alloc_func = M.getFunction("jl_gc_big_alloc"))) {
20161930
std::vector<Type*> args(0);
20171931
args.push_back(T_pint8);
20181932
args.push_back(T_size);
20191933
big_alloc_func = Function::Create(FunctionType::get(T_prjlvalue, args, false),
20201934
Function::ExternalLinkage, "jl_gc_big_alloc", &M);
2021-
addReturnAttr(big_alloc_func, Attribute::NoAlias);
2022-
addReturnAttr(big_alloc_func, Attribute::NonNull);
1935+
big_alloc_func->setAttributes(AttributeList::get(M.getContext(),
1936+
alloc_obj_func->getAttributes().getFnAttributes(),
1937+
alloc_obj_func->getAttributes().getRetAttributes(),
1938+
None));
20231939
}
20241940
auto T_jlvalue = cast<PointerType>(T_prjlvalue)->getElementType();
20251941
T_pjlvalue = PointerType::get(T_jlvalue, 0);
@@ -2053,7 +1969,7 @@ bool LateLowerGCFrame::doInitialization(Module &M) {
20531969
j++;
20541970
}
20551971
if (j != 0)
2056-
append_to_compiler_used(M, ArrayRef<GlobalValue*>(function_list, j));
1972+
appendToCompilerUsed(M, ArrayRef<GlobalValue*>(function_list, j));
20571973
return true;
20581974
}
20591975

src/llvm-multiversioning.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,7 @@ void CloneCtx::fix_inst_uses()
814814
std::tie(id, slot) = get_reloc_slot(orig_f);
815815
Instruction *ptr = new LoadInst(T_pvoidfunc, slot, "", false, insert_before);
816816
ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
817+
ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ctx, None));
817818
ptr = new BitCastInst(ptr, F->getType(), "", insert_before);
818819
use_i->setOperand(info.use->getOperandNo(),
819820
rewrite_inst_use(uses.get_stack(), ptr,

0 commit comments

Comments
 (0)