Skip to content

Commit

Permalink
[OPENMP] CodeGen - 'omp for' with dynamic schedule kinds.
Browse files Browse the repository at this point in the history
Differential Revision: http://reviews.llvm.org/D7138

llvm-svn: 232036
  • Loading branch information
amusman committed Mar 12, 2015
1 parent c579d66 commit 92bdaab
Show file tree
Hide file tree
Showing 4 changed files with 385 additions and 47 deletions.
146 changes: 118 additions & 28 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,51 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
return RTLFn;
}

llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
auto Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
: (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
ITy, // lower
ITy, // upper
ITy, // stride
ITy // chunk
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}

llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
auto Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
: (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
auto PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
PtrTy, // p_lower
PtrTy, // p_upper
PtrTy // p_stride
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}

llvm::Constant *
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
// Lookup the entry, lazily creating it if necessary.
Expand Down Expand Up @@ -1076,34 +1121,56 @@ void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *UB, llvm::Value *ST,
llvm::Value *Chunk) {
OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
// Call __kmpc_for_static_init(
// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
// TODO: Implement dynamic schedule.

// If the Chunk was not specified in the clause - use default value 1.
if (Chunk == nullptr)
Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1);

llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
CGF.Builder.getInt32(Schedule), // Schedule type
IL, // &isLastIter
LB, // &LB
UB, // &UB
ST, // &Stride
CGF.Builder.getIntN(IVSize, 1), // Incr
Chunk // Chunk
};
assert((IVSize == 32 || IVSize == 64) &&
"Index size is not compatible with the omp runtime");
auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4
: OMPRTL__kmpc_for_static_init_4u)
: (IVSigned ? OMPRTL__kmpc_for_static_init_8
: OMPRTL__kmpc_for_static_init_8u);
CGF.EmitRuntimeCall(createRuntimeFunction(F), Args);
if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) {
// Call __kmpc_dispatch_init(
// ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
// kmp_int[32|64] lower, kmp_int[32|64] upper,
// kmp_int[32|64] stride, kmp_int[32|64] chunk);

// If the Chunk was not specified in the clause - use default value 1.
if (Chunk == nullptr)
Chunk = CGF.Builder.getIntN(IVSize, 1);
llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
getThreadID(CGF, Loc),
CGF.Builder.getInt32(Schedule), // Schedule type
CGF.Builder.getIntN(IVSize, 0), // Lower
UB, // Upper
CGF.Builder.getIntN(IVSize, 1), // Stride
Chunk // Chunk
};
CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
} else {
// Call __kmpc_for_static_init(
// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
if (Chunk == nullptr) {
assert(Schedule == OMP_sch_static &&
"expected static non-chunked schedule");
// If the Chunk was not specified in the clause - use default value 1.
Chunk = CGF.Builder.getIntN(IVSize, 1);
} else
assert(Schedule == OMP_sch_static_chunked &&
"expected static chunked schedule");
llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
getThreadID(CGF, Loc),
CGF.Builder.getInt32(Schedule), // Schedule type
IL, // &isLastIter
LB, // &LB
UB, // &UB
ST, // &Stride
CGF.Builder.getIntN(IVSize, 1), // Incr
Chunk // Chunk
};
assert((IVSize == 32 || IVSize == 64) &&
"Index size is not compatible with the omp runtime");
auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4
: OMPRTL__kmpc_for_static_init_4u)
: (IVSigned ? OMPRTL__kmpc_for_static_init_8
: OMPRTL__kmpc_for_static_init_8u);
CGF.EmitRuntimeCall(createRuntimeFunction(F), Args);
}
}

void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
Expand All @@ -1118,6 +1185,29 @@ void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
Args);
}

llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
SourceLocation Loc, unsigned IVSize,
bool IVSigned, llvm::Value *IL,
llvm::Value *LB, llvm::Value *UB,
llvm::Value *ST) {
// Call __kmpc_dispatch_next(
// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
// kmp_int[32|64] *p_stride);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
IL, // &isLastIter
LB, // &Lower
UB, // &Upper
ST // &Stride
};
llvm::Value *Call =
CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
return CGF.EmitScalarConversion(
Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
CGF.getContext().BoolTy);
}

void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
llvm::Value *NumThreads,
SourceLocation Loc) {
Expand Down
27 changes: 27 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,14 @@ class CGOpenMPRuntime {
/// \return Specified function.
llvm::Constant *createRuntimeFunction(OpenMPRTLFunction Function);

/// \brief Returns __kmpc_dispatch_init_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
llvm::Constant *createDispatchInitFunction(unsigned IVSize, bool IVSigned);

/// \brief Returns __kmpc_dispatch_next_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
llvm::Constant *createDispatchNextFunction(unsigned IVSize, bool IVSigned);

/// \brief If the specified mangled name is not in the module, create and
/// return threadprivate cache object. This object is a pointer's worth of
/// storage that's reserved for use by the OpenMP runtime.
Expand Down Expand Up @@ -400,6 +408,25 @@ class CGOpenMPRuntime {
virtual void emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPScheduleClauseKind ScheduleKind);

/// Call __kmpc_dispatch_next(
/// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
/// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
/// kmp_int[32|64] *p_stride);
/// \param IVSize Size of the iteration variable in bits.
/// \param IVSigned Sign of the interation variable.
/// \param IL Address of the output variable in which the flag of the
/// last iteration is returned.
/// \param LB Address of the output variable in which the lower iteration
/// number is returned.
/// \param UB Address of the output variable in which the upper iteration
/// number is returned.
/// \param ST Address of the output variable in which the stride value is
/// returned.
virtual llvm::Value *emitForNext(CodeGenFunction &CGF, SourceLocation Loc,
unsigned IVSize, bool IVSigned,
llvm::Value *IL, llvm::Value *LB,
llvm::Value *UB, llvm::Value *ST);

/// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
/// clause.
Expand Down
84 changes: 68 additions & 16 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,16 +490,50 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
llvm::Value *ST, llvm::Value *IL,
llvm::Value *Chunk) {
auto &RT = CGM.getOpenMPRuntime();

// Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
const bool Dynamic = RT.isDynamic(ScheduleKind);

assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
"static non-chunked schedule does not need outer loop");
if (RT.isDynamic(ScheduleKind)) {
ErrorUnsupported(&S, "OpenMP loop with dynamic schedule");
return;
}

// Emit outer loop.
//
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
// When schedule(dynamic,chunk_size) is specified, the iterations are
// distributed to threads in the team in chunks as the threads request them.
// Each thread executes a chunk of iterations, then requests another chunk,
// until no chunks remain to be distributed. Each chunk contains chunk_size
// iterations, except for the last chunk to be distributed, which may have
// fewer iterations. When no chunk_size is specified, it defaults to 1.
//
// When schedule(guided,chunk_size) is specified, the iterations are assigned
// to threads in the team in chunks as the executing threads request them.
// Each thread executes a chunk of iterations, then requests another chunk,
// until no chunks remain to be assigned. For a chunk_size of 1, the size of
// each chunk is proportional to the number of unassigned iterations divided
// by the number of threads in the team, decreasing to 1. For a chunk_size
// with value k (greater than 1), the size of each chunk is determined in the
// same way, with the restriction that the chunks do not contain fewer than k
// iterations (except for the last chunk to be assigned, which may have fewer
// than k iterations).
//
// When schedule(auto) is specified, the decision regarding scheduling is
// delegated to the compiler and/or runtime system. The programmer gives the
// implementation the freedom to choose any possible mapping of iterations to
// threads in the team.
//
// When schedule(runtime) is specified, the decision regarding scheduling is
// deferred until run time, and the schedule and chunk size are taken from the
// run-sched-var ICV. If the ICV is set to auto, the schedule is
// implementation defined
//
// while(__kmpc_dispatch_next(&LB, &UB)) {
// idx = LB;
// while (idx <= UB) { BODY; ++idx; } // inner loop
// }
//
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
// When schedule(static, chunk_size) is specified, iterations are divided into
// chunks of size chunk_size, and the chunks are assigned to the threads in
// the team in a round-robin fashion in the order of the thread number.
Expand All @@ -510,12 +544,16 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
// UB = UB + ST;
// }
//

const Expr *IVExpr = S.getIterationVariable();
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();

RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
UB, ST, Chunk);
RT.emitForInit(
*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
(Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
Chunk);

auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");

// Start the loop with a block that tests the condition.
Expand All @@ -524,12 +562,17 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
LoopStack.push(CondBlock);

llvm::Value *BoolCondVal = nullptr;
// UB = min(UB, GlobalUB)
EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB
EmitIgnoredExpr(S.getInit());
// IV < UB
BoolCondVal = EvaluateExprAsBool(S.getCond(false));
if (!Dynamic) {
// UB = min(UB, GlobalUB)
EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB
EmitIgnoredExpr(S.getInit());
// IV < UB
BoolCondVal = EvaluateExprAsBool(S.getCond(false));
} else {
BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
IL, LB, UB, ST);
}

// If there are any cleanups between here and the loop-exit scope,
// create a block to stage a loop exit along.
Expand All @@ -545,6 +588,11 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
}
EmitBlock(LoopBody);

// Emit "IV = LB" (in case of static schedule, we have already calculated new
// LB for loop condition and emitted it above).
if (Dynamic)
EmitIgnoredExpr(S.getInit());

// Create a block for the increment.
auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
Expand All @@ -557,17 +605,21 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,

EmitBlock(Continue.getBlock());
BreakContinueStack.pop_back();
// Emit "LB = LB + Stride", "UB = UB + Stride".
EmitIgnoredExpr(S.getNextLowerBound());
EmitIgnoredExpr(S.getNextUpperBound());
if (!Dynamic) {
// Emit "LB = LB + Stride", "UB = UB + Stride".
EmitIgnoredExpr(S.getNextLowerBound());
EmitIgnoredExpr(S.getNextUpperBound());
}

EmitBranch(CondBlock);
LoopStack.pop();
// Emit the fall-through block.
EmitBlock(LoopExit.getBlock());

// Tell the runtime we are done.
RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
// FIXME: Also call fini for ordered loops with dynamic scheduling.
if (!Dynamic)
RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
}

/// \brief Emit a helper variable and return corresponding lvalue.
Expand Down
Loading

0 comments on commit 92bdaab

Please sign in to comment.