Skip to content

Commit 956f45b

Browse files
committed
A few cleanup/optimizations related to tag handling in codegen
* Create `julia.typeof` and `julia.write_barrier` intrinsics So that tag access is easier for the llvm passes to handle. This also completely hides the tag access from LLVM before lowering of these intrinsics which makes it possible to mark the write barrier as `inaccessiblememonly` and enable store to load forwarding on mutable types. * Mark `jl_gc_queue_root` as `inaccessiblemem_or_argmemonly`. Should also have a positive impact on LLVM memory access optimizations. * In non-imaging mode, use the pointer literals directly for union type. * Remove old code that check whether the child is NULL before running the write barrier This is not needed anymore. * Mark functions returning julia object as `nonnull` return. This can avoid `NULL` check together with the improved store to load forwarding. * Remove `jlnewbits_func` which is not used in codegen anymore. * Fix `LateLowerGCFrame` initialization `runOnFunction` shouldn't modify the `Module`. * Add more `dereferencable` attributes and explicitly specify `nonnull` since it is not implied by `dereferencable` for non default addrspace. Also add range metadata on the tag load. The change should also make it easier to do more allocation optmization in LLVM for objects with object reference as fields.
1 parent 03045ca commit 956f45b

6 files changed

Lines changed: 427 additions & 196 deletions

File tree

src/cgutils.cpp

Lines changed: 110 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ static Function *function_proto(Function *F, Module *M = nullptr)
2727
F->setPersonalityFn(nullptr);
2828
}
2929

30-
// FunctionType does not include any attributes. Copy them over manually
31-
// as codegen may make decisions based on the presence of certain attributes
32-
NewF->copyAttributesFrom(F);
30+
// FunctionType does not include any attributes. Copy them over manually
31+
// as codegen may make decisions based on the presence of certain attributes
32+
NewF->copyAttributesFrom(F);
3333

3434
if (OldPersonalityFn)
3535
F->setPersonalityFn(OldPersonalityFn);
@@ -330,14 +330,66 @@ static Value *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
330330
return julia_pgv(ctx, "jl_global#", p);
331331
}
332332

333+
static size_t dereferenceable_size(jl_value_t *jt)
334+
{
335+
if (jl_is_array_type(jt)) {
336+
// Array has at least this much data
337+
return sizeof(jl_array_t);
338+
} else if (((jl_datatype_t*)jt)->layout) {
339+
return jl_datatype_size(jt);
340+
} else {
341+
return 0;
342+
}
343+
}
344+
345+
static inline void maybe_mark_argument_dereferenceable(Argument *A, jl_value_t *jt)
346+
{
347+
auto F = A->getParent();
348+
// The `dereferencable` below does not imply `nonnull` for non addrspace(0) pointers.
349+
#if JL_LLVM_VERSION >= 50000
350+
F->addParamAttr(A->getArgNo(), Attribute::NonNull);
351+
#else
352+
F->setAttributes(F->getAttributes().addAttribute(jl_LLVMContext, A->getArgNo() + 1,
353+
Attribute::NonNull));
354+
#endif
355+
size_t size = dereferenceable_size(jt);
356+
if (!size)
357+
return;
358+
F->addDereferenceableAttr(A->getArgNo() + 1, size);
359+
}
360+
361+
static inline Instruction *maybe_mark_load_dereferenceable(Instruction *LI, bool can_be_null,
362+
size_t size=0)
363+
{
364+
// The `dereferencable` below does not imply `nonnull` for non addrspace(0) pointers.
365+
if (!can_be_null)
366+
LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(jl_LLVMContext, None));
367+
if (!size) {
368+
return LI;
369+
}
370+
llvm::SmallVector<Metadata *, 1> OPs;
371+
OPs.push_back(ConstantAsMetadata::get(ConstantInt::get(T_int64, size)));
372+
LI->setMetadata(can_be_null ? "dereferenceable_or_null" :
373+
"dereferenceable",
374+
MDNode::get(jl_LLVMContext, OPs));
375+
return LI;
376+
}
377+
378+
static inline Instruction *maybe_mark_load_dereferenceable(Instruction *LI, bool can_be_null,
379+
jl_value_t *jt)
380+
{
381+
return maybe_mark_load_dereferenceable(LI, can_be_null, dereferenceable_size(jt));
382+
}
383+
333384
static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p)
334385
{
335386
if (p == NULL)
336387
return V_null;
337388
if (!imaging_mode)
338389
return literal_static_pointer_val(ctx, p);
339390
Value *pgv = literal_pointer_val_slot(ctx, p);
340-
return tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_pjlvalue, pgv));
391+
return tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(
392+
ctx.builder.CreateLoad(T_pjlvalue, pgv), false, jl_typeof(p)));
341393
}
342394

343395
static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p)
@@ -349,7 +401,9 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p)
349401
return literal_static_pointer_val(ctx, p);
350402
// bindings are prefixed with jl_bnd#
351403
Value *pgv = julia_pgv(ctx, "jl_bnd#", p->name, p->owner, p);
352-
return tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_pjlvalue, pgv));
404+
return tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(
405+
ctx.builder.CreateLoad(T_pjlvalue, pgv), false,
406+
sizeof(jl_binding_t)));
353407
}
354408

355409
// bitcast a value, but preserve its address space when dealing with pointer types
@@ -720,34 +774,12 @@ static Value *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, ssize_t n, MDNode
720774
return tbaa_decorate(tbaa, ctx.builder.CreateLoad(emit_bitcast(ctx, vptr, ptype)));
721775
}
722776

723-
static Value *emit_typeptr_addr(jl_codectx_t &ctx, Value *p)
724-
{
725-
ssize_t offset = (sizeof(jl_taggedvalue_t) -
726-
offsetof(jl_taggedvalue_t, type)) / sizeof(jl_value_t*);
727-
return emit_nthptr_addr(ctx, p, -offset);
728-
}
729-
730777
static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v);
731778

732-
static Value* mask_gc_bits(jl_codectx_t &ctx, Value *tag)
733-
{
734-
return ctx.builder.CreateIntToPtr(ctx.builder.CreateAnd(
735-
ctx.builder.CreatePtrToInt(tag, T_size),
736-
ConstantInt::get(T_size, ~(uintptr_t)15)),
737-
tag->getType());
738-
}
739-
740779
static Value *emit_typeof(jl_codectx_t &ctx, Value *tt)
741780
{
742781
assert(tt != NULL && !isa<AllocaInst>(tt) && "expected a conditionally boxed value");
743-
// given p, a jl_value_t*, compute its type tag
744-
// The raw address is not GC-safe to load from as it may have mask bits set.
745-
// Note that this gives the optimizer license to not root this value. That
746-
// is fine however, since leaf types are not GCed at the moment. Should
747-
// that ever change, this may have to go through a special intrinsic.
748-
Value *addr = emit_bitcast(ctx, emit_typeptr_addr(ctx, tt), T_ppjlvalue);
749-
tt = tbaa_decorate(tbaa_tag, ctx.builder.CreateLoad(T_pjlvalue, addr));
750-
return maybe_decay_untracked(mask_gc_bits(ctx, tt));
782+
return ctx.builder.CreateCall(prepare_call(jl_typeof_func), {tt});
751783
}
752784

753785
static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p)
@@ -760,36 +792,57 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p)
760792
}
761793
if (p.TIndex) {
762794
Value *tindex = ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(T_int8, 0x7f));
763-
Value *pdatatype;
764-
unsigned counter;
765-
counter = 0;
795+
unsigned counter = 0;
766796
bool allunboxed = for_each_uniontype_small(
767797
[&](unsigned idx, jl_datatype_t *jt) { },
768798
p.typ,
769799
counter);
770-
if (allunboxed)
771-
pdatatype = decay_derived(Constant::getNullValue(T_ppjlvalue));
772-
else {
773-
// See note above in emit_typeof(Value*), we can't tell the system
774-
// about this until we've cleared the GC bits.
775-
pdatatype = emit_bitcast(ctx, emit_typeptr_addr(ctx, p.Vboxed), T_ppjlvalue);
776-
}
800+
Value *datatype_or_p = (imaging_mode ? Constant::getNullValue(T_ppjlvalue) :
801+
Constant::getNullValue(T_prjlvalue));
777802
counter = 0;
778803
for_each_uniontype_small(
779-
[&](unsigned idx, jl_datatype_t *jt) {
780-
Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(T_int8, idx));
781-
pdatatype = ctx.builder.CreateSelect(cmp,
782-
decay_derived(literal_pointer_val_slot(ctx, (jl_value_t*)jt)),
783-
pdatatype);
784-
},
785-
p.typ,
786-
counter);
787-
Value *datatype = tbaa_decorate(allunboxed ? tbaa_const : tbaa_tag,
788-
ctx.builder.CreateLoad(T_pjlvalue, pdatatype));
789-
if (!allunboxed)
790-
datatype = mask_gc_bits(ctx, datatype);
791-
datatype = maybe_decay_untracked(datatype);
792-
return mark_julia_type(ctx, datatype, true, jl_datatype_type);
804+
[&](unsigned idx, jl_datatype_t *jt) {
805+
Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(T_int8, idx));
806+
Value *ptr;
807+
if (imaging_mode) {
808+
ptr = literal_pointer_val_slot(ctx, (jl_value_t*)jt);
809+
}
810+
else {
811+
ptr = maybe_decay_untracked(literal_pointer_val(ctx, (jl_value_t*)jt));
812+
}
813+
datatype_or_p = ctx.builder.CreateSelect(cmp, ptr, datatype_or_p);
814+
},
815+
p.typ,
816+
counter);
817+
auto emit_unboxty = [&] () -> Value* {
818+
if (imaging_mode)
819+
return maybe_decay_untracked(
820+
tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_pjlvalue, datatype_or_p)));
821+
return datatype_or_p;
822+
};
823+
Value *res;
824+
if (!allunboxed) {
825+
Value *isnull = ctx.builder.CreateIsNull(datatype_or_p);
826+
BasicBlock *boxBB = BasicBlock::Create(jl_LLVMContext, "boxed", ctx.f);
827+
BasicBlock *unboxBB = BasicBlock::Create(jl_LLVMContext, "unboxed", ctx.f);
828+
BasicBlock *mergeBB = BasicBlock::Create(jl_LLVMContext, "merge", ctx.f);
829+
ctx.builder.CreateCondBr(isnull, boxBB, unboxBB);
830+
ctx.builder.SetInsertPoint(boxBB);
831+
auto boxTy = emit_typeof(ctx, p.Vboxed);
832+
ctx.builder.CreateBr(mergeBB);
833+
ctx.builder.SetInsertPoint(unboxBB);
834+
auto unboxTy = emit_unboxty();
835+
ctx.builder.CreateBr(mergeBB);
836+
ctx.builder.SetInsertPoint(mergeBB);
837+
auto phi = ctx.builder.CreatePHI(T_prjlvalue, 2);
838+
phi->addIncoming(boxTy, boxBB);
839+
phi->addIncoming(unboxTy, unboxBB);
840+
res = phi;
841+
}
842+
else {
843+
res = emit_unboxty();
844+
}
845+
return mark_julia_type(ctx, res, true, jl_datatype_type);
793846
}
794847
jl_value_t *aty = p.typ;
795848
if (jl_is_type_type(aty)) {
@@ -973,48 +1026,6 @@ static void raise_exception_unless(jl_codectx_t &ctx, Value *cond, Value *exc)
9731026
raise_exception(ctx, exc, passBB);
9741027
}
9751028

976-
static size_t dereferenceable_size(jl_value_t *jt) {
977-
size_t size = 0;
978-
if (jl_is_array_type(jt)) {
979-
// Array has at least this much data
980-
size = sizeof(jl_array_t);
981-
} else {
982-
size = jl_datatype_size(jt);
983-
}
984-
return size;
985-
}
986-
987-
static inline void maybe_mark_argument_dereferenceable(Argument *A, jl_value_t *jt) {
988-
if (!jl_is_leaf_type(jt)) {
989-
return;
990-
}
991-
size_t size = dereferenceable_size(jt);
992-
if (!size) {
993-
return;
994-
}
995-
A->getParent()->addDereferenceableAttr(A->getArgNo() + 1, size);
996-
}
997-
998-
static inline Instruction *maybe_mark_load_dereferenceable(Instruction *LI, bool can_be_null, size_t size) {
999-
if (!size) {
1000-
return LI;
1001-
}
1002-
llvm::SmallVector<Metadata *, 1> OPs;
1003-
OPs.push_back(ConstantAsMetadata::get(ConstantInt::get(T_int64, size)));
1004-
LI->setMetadata(can_be_null ? "dereferenceable_or_null" :
1005-
"dereferenceable",
1006-
MDNode::get(jl_LLVMContext, OPs));
1007-
return LI;
1008-
}
1009-
1010-
static inline Instruction *maybe_mark_load_dereferenceable(Instruction *LI, bool can_be_null, jl_value_t *jt) {
1011-
if (!jl_is_leaf_type(jt)) {
1012-
return LI;
1013-
}
1014-
size_t size = dereferenceable_size(jt);
1015-
return maybe_mark_load_dereferenceable(LI, can_be_null, size);
1016-
}
1017-
10181029
static void null_pointer_check(jl_codectx_t &ctx, Value *v)
10191030
{
10201031
raise_exception_unless(ctx,
@@ -1398,7 +1409,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
13981409
ctx.builder.CreateGEP(decay_derived(
13991410
emit_bitcast(ctx, data_pointer(ctx, strct), T_pprjlvalue)), idx),
14001411
PointerType::get(T_prjlvalue, AddressSpace::Derived))),
1401-
maybe_null, minimum_field_size));
1412+
maybe_null, minimum_field_size));
14021413
if (maybe_null)
14031414
null_pointer_check(ctx, fld);
14041415
*ret = mark_julia_type(ctx, fld, true, jl_any_type);
@@ -2281,43 +2292,9 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
22812292
// if ptr is NULL this emits a write barrier _back_
22822293
static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, Value *ptr)
22832294
{
2284-
Value *parenttag = emit_bitcast(ctx, emit_typeptr_addr(ctx, parent), T_psize);
2285-
Value *parent_type = tbaa_decorate(tbaa_tag, ctx.builder.CreateLoad(parenttag));
2286-
Value *parent_bits = ctx.builder.CreateAnd(parent_type, 3);
2287-
2288-
// the branch hint does not seem to make it to the generated code
2289-
Value *parent_old_marked = ctx.builder.CreateICmpEQ(parent_bits,
2290-
ConstantInt::get(T_size, 3));
2291-
2292-
BasicBlock *cont = BasicBlock::Create(jl_LLVMContext, "cont");
2293-
BasicBlock *barrier_may_trigger = BasicBlock::Create(jl_LLVMContext, "wb_may_trigger", ctx.f);
2294-
BasicBlock *barrier_trigger = BasicBlock::Create(jl_LLVMContext, "wb_trigger", ctx.f);
2295-
ctx.builder.CreateCondBr(parent_old_marked, barrier_may_trigger, cont);
2296-
2297-
ctx.builder.SetInsertPoint(barrier_may_trigger);
2298-
Value *ptr_mark_bit = ctx.builder.CreateAnd(tbaa_decorate(tbaa_tag,
2299-
ctx.builder.CreateLoad(emit_bitcast(ctx, emit_typeptr_addr(ctx, ptr), T_psize))), 1);
2300-
Value *ptr_not_marked = ctx.builder.CreateICmpEQ(ptr_mark_bit, ConstantInt::get(T_size, 0));
2301-
ctx.builder.CreateCondBr(ptr_not_marked, barrier_trigger, cont);
2302-
ctx.builder.SetInsertPoint(barrier_trigger);
2303-
ctx.builder.CreateCall(prepare_call(queuerootfun), maybe_decay_untracked(emit_bitcast(ctx, parent, T_prjlvalue)));
2304-
ctx.builder.CreateBr(cont);
2305-
ctx.f->getBasicBlockList().push_back(cont);
2306-
ctx.builder.SetInsertPoint(cont);
2307-
}
2308-
2309-
static void emit_checked_write_barrier(jl_codectx_t &ctx, Value *parent, Value *ptr)
2310-
{
2311-
BasicBlock *cont;
2312-
Value *not_null = ctx.builder.CreateICmpNE(mark_callee_rooted(ptr), mark_callee_rooted(V_null));
2313-
BasicBlock *if_not_null = BasicBlock::Create(jl_LLVMContext, "wb_not_null", ctx.f);
2314-
cont = BasicBlock::Create(jl_LLVMContext, "cont");
2315-
ctx.builder.CreateCondBr(not_null, if_not_null, cont);
2316-
ctx.builder.SetInsertPoint(if_not_null);
2317-
emit_write_barrier(ctx, parent, ptr);
2318-
ctx.builder.CreateBr(cont);
2319-
ctx.f->getBasicBlockList().push_back(cont);
2320-
ctx.builder.SetInsertPoint(cont);
2295+
parent = maybe_decay_untracked(emit_bitcast(ctx, parent, T_prjlvalue));
2296+
ptr = maybe_decay_untracked(emit_bitcast(ctx, ptr, T_prjlvalue));
2297+
ctx.builder.CreateCall(prepare_call(jl_write_barrier_func), {parent, ptr});
23212298
}
23222299

23232300
static void emit_setfield(jl_codectx_t &ctx,
@@ -2339,7 +2316,7 @@ static void emit_setfield(jl_codectx_t &ctx,
23392316
tbaa_decorate(strct.tbaa, ctx.builder.CreateStore(r,
23402317
emit_bitcast(ctx, addr, T_pprjlvalue)));
23412318
if (wb && strct.isboxed)
2342-
emit_checked_write_barrier(ctx, boxed(ctx, strct), r);
2319+
emit_write_barrier(ctx, boxed(ctx, strct), r);
23432320
}
23442321
else if (jl_is_uniontype(jfty)) {
23452322
int fsz = jl_field_size(sty, idx0);

0 commit comments

Comments
 (0)