68#define DEBUG_TYPE "openmp-ir-builder"
75 cl::desc(
"Use optimistic attributes describing "
76 "'as-if' properties of runtime calls."),
80 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
81 cl::desc(
"Factor for the unroll threshold to account for code "
82 "simplifications still taking place"),
93 if (!IP1.isSet() || !IP2.isSet())
95 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
100 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
101 case OMPScheduleType::UnorderedStaticChunked:
102 case OMPScheduleType::UnorderedStatic:
103 case OMPScheduleType::UnorderedDynamicChunked:
104 case OMPScheduleType::UnorderedGuidedChunked:
105 case OMPScheduleType::UnorderedRuntime:
106 case OMPScheduleType::UnorderedAuto:
107 case OMPScheduleType::UnorderedTrapezoidal:
108 case OMPScheduleType::UnorderedGreedy:
109 case OMPScheduleType::UnorderedBalanced:
110 case OMPScheduleType::UnorderedGuidedIterativeChunked:
111 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
112 case OMPScheduleType::UnorderedSteal:
113 case OMPScheduleType::UnorderedStaticBalancedChunked:
114 case OMPScheduleType::UnorderedGuidedSimd:
115 case OMPScheduleType::UnorderedRuntimeSimd:
116 case OMPScheduleType::OrderedStaticChunked:
117 case OMPScheduleType::OrderedStatic:
118 case OMPScheduleType::OrderedDynamicChunked:
119 case OMPScheduleType::OrderedGuidedChunked:
120 case OMPScheduleType::OrderedRuntime:
121 case OMPScheduleType::OrderedAuto:
122 case OMPScheduleType::OrderdTrapezoidal:
123 case OMPScheduleType::NomergeUnorderedStaticChunked:
124 case OMPScheduleType::NomergeUnorderedStatic:
125 case OMPScheduleType::NomergeUnorderedDynamicChunked:
126 case OMPScheduleType::NomergeUnorderedGuidedChunked:
127 case OMPScheduleType::NomergeUnorderedRuntime:
128 case OMPScheduleType::NomergeUnorderedAuto:
129 case OMPScheduleType::NomergeUnorderedTrapezoidal:
130 case OMPScheduleType::NomergeUnorderedGreedy:
131 case OMPScheduleType::NomergeUnorderedBalanced:
132 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
133 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
134 case OMPScheduleType::NomergeUnorderedSteal:
135 case OMPScheduleType::NomergeOrderedStaticChunked:
136 case OMPScheduleType::NomergeOrderedStatic:
137 case OMPScheduleType::NomergeOrderedDynamicChunked:
138 case OMPScheduleType::NomergeOrderedGuidedChunked:
139 case OMPScheduleType::NomergeOrderedRuntime:
140 case OMPScheduleType::NomergeOrderedAuto:
141 case OMPScheduleType::NomergeOrderedTrapezoidal:
142 case OMPScheduleType::OrderedDistributeChunked:
143 case OMPScheduleType::OrderedDistribute:
151 SchedType & OMPScheduleType::MonotonicityMask;
152 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
164 Builder.restoreIP(IP);
174 Kernel->getFnAttribute(
"target-features").getValueAsString();
175 if (Features.
count(
"+wavefrontsize64"))
190 bool HasSimdModifier,
bool HasDistScheduleChunks) {
192 switch (ClauseKind) {
193 case OMP_SCHEDULE_Default:
194 case OMP_SCHEDULE_Static:
195 return HasChunks ? OMPScheduleType::BaseStaticChunked
196 : OMPScheduleType::BaseStatic;
197 case OMP_SCHEDULE_Dynamic:
198 return OMPScheduleType::BaseDynamicChunked;
199 case OMP_SCHEDULE_Guided:
200 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
201 : OMPScheduleType::BaseGuidedChunked;
202 case OMP_SCHEDULE_Auto:
204 case OMP_SCHEDULE_Runtime:
205 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
206 : OMPScheduleType::BaseRuntime;
207 case OMP_SCHEDULE_Distribute:
208 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
209 : OMPScheduleType::BaseDistribute;
217 bool HasOrderedClause) {
218 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
219 OMPScheduleType::None &&
220 "Must not have ordering nor monotonicity flags already set");
223 ? OMPScheduleType::ModifierOrdered
224 : OMPScheduleType::ModifierUnordered;
225 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
228 if (OrderingScheduleType ==
229 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
230 return OMPScheduleType::OrderedGuidedChunked;
231 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
232 OMPScheduleType::ModifierOrdered))
233 return OMPScheduleType::OrderedRuntime;
235 return OrderingScheduleType;
241 bool HasSimdModifier,
bool HasMonotonic,
242 bool HasNonmonotonic,
bool HasOrderedClause) {
243 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
244 OMPScheduleType::None &&
245 "Must not have monotonicity flags already set");
246 assert((!HasMonotonic || !HasNonmonotonic) &&
247 "Monotonic and Nonmonotonic are contradicting each other");
250 return ScheduleType | OMPScheduleType::ModifierMonotonic;
251 }
else if (HasNonmonotonic) {
252 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
262 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
263 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
269 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
277 bool HasSimdModifier,
bool HasMonotonicModifier,
278 bool HasNonmonotonicModifier,
bool HasOrderedClause,
279 bool HasDistScheduleChunks) {
281 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
285 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
286 HasNonmonotonicModifier, HasOrderedClause);
301 assert(!Br->isConditional() &&
302 "BB's terminator must be an unconditional branch (or degenerate)");
305 Br->setSuccessor(0,
Target);
310 NewBr->setDebugLoc(
DL);
316 "Target BB must not have PHI nodes");
336 NewBr->setDebugLoc(
DL);
344 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
348 Builder.SetInsertPoint(Old);
352 Builder.SetCurrentDebugLocation(
DebugLoc);
361 spliceBB(IP, New, CreateBranch,
DL);
362 New->replaceSuccessorsPhiUsesWith(Old, New);
371 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
373 Builder.SetInsertPoint(Builder.GetInsertBlock());
376 Builder.SetCurrentDebugLocation(
DebugLoc);
385 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
387 Builder.SetInsertPoint(Builder.GetInsertBlock());
390 Builder.SetCurrentDebugLocation(
DebugLoc);
397 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
404 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
406 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
407 const Twine &Name =
"",
bool AsPtr =
true,
408 bool Is64Bit =
false) {
409 Builder.restoreIP(OuterAllocaIP);
413 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
417 FakeVal = FakeValAddr;
419 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
424 Builder.restoreIP(InnerAllocaIP);
427 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
430 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
443enum OpenMPOffloadingRequiresDirFlags {
445 OMP_REQ_UNDEFINED = 0x000,
447 OMP_REQ_NONE = 0x001,
449 OMP_REQ_REVERSE_OFFLOAD = 0x002,
451 OMP_REQ_UNIFIED_ADDRESS = 0x004,
453 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
455 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
461OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
462 : RequiresFlags(OMP_REQ_UNDEFINED) {}
464OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
465 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
466 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
467 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
468 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
469 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
470 RequiresFlags(OMP_REQ_UNDEFINED) {
471 if (HasRequiresReverseOffload)
472 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
473 if (HasRequiresUnifiedAddress)
474 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
475 if (HasRequiresUnifiedSharedMemory)
476 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
477 if (HasRequiresDynamicAllocators)
478 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
481bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
482 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
485bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
486 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
489bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
490 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
493bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
494 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
497int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
498 return hasRequiresFlags() ? RequiresFlags
499 :
static_cast<int64_t
>(OMP_REQ_NONE);
502void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
504 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
506 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
509void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
511 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
513 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
516void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
518 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
520 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
523void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
525 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
527 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
534void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
538 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
540 constexpr size_t MaxDim = 3;
543 Value *HasNoWaitFlag = Builder.getInt64(KernelArgs.HasNoWait);
545 Value *DynCGroupMemFallbackFlag =
546 Builder.getInt64(
static_cast<uint64_t>(KernelArgs.DynCGroupMemFallback));
547 DynCGroupMemFallbackFlag = Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
548 Value *Flags = Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
550 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
553 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
554 Value *NumThreads3D =
555 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
557 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
559 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
561 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
563 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
565 ArgsVector = {Version,
567 KernelArgs.RTArgs.BasePointersArray,
568 KernelArgs.RTArgs.PointersArray,
569 KernelArgs.RTArgs.SizesArray,
570 KernelArgs.RTArgs.MapTypesArray,
571 KernelArgs.RTArgs.MapNamesArray,
572 KernelArgs.RTArgs.MappersArray,
573 KernelArgs.NumIterations,
577 KernelArgs.DynCGroupMem};
585 auto FnAttrs =
Attrs.getFnAttrs();
586 auto RetAttrs =
Attrs.getRetAttrs();
588 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
593 bool Param =
true) ->
void {
594 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
595 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
596 if (HasSignExt || HasZeroExt) {
597 assert(AS.getNumAttributes() == 1 &&
598 "Currently not handling extension attr combined with others.");
600 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
603 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
610#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
611#include "llvm/Frontend/OpenMP/OMPKinds.def"
615#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
617 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
618 addAttrSet(RetAttrs, RetAttrSet, false); \
619 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
620 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
621 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
623#include "llvm/Frontend/OpenMP/OMPKinds.def"
637#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
639 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
641 Fn = M.getFunction(Str); \
643#include "llvm/Frontend/OpenMP/OMPKinds.def"
649#define OMP_RTL(Enum, Str, ...) \
651 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
653#include "llvm/Frontend/OpenMP/OMPKinds.def"
657 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
667 LLVMContext::MD_callback,
669 2, {-1, -1},
true)}));
675 addAttributes(FnID, *Fn);
682 assert(Fn &&
"Failed to create OpenMP runtime function");
688OpenMPIRBuilder::FinalizationInfo::getFiniBB(
IRBuilderBase &Builder) {
693 Builder.SetInsertPoint(FiniBB);
695 if (
Error Err = FiniCB(Builder.saveIP()))
705 FiniBB = OtherFiniBB;
707 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
708 if (
Error Err = FiniCB(Builder.saveIP()))
715 auto EndIt = FiniBB->end();
716 if (FiniBB->size() >= 1)
717 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
722 FiniBB->replaceAllUsesWith(OtherFiniBB);
723 FiniBB->eraseFromParent();
724 FiniBB = OtherFiniBB;
731 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
738 CallInst *
Call = Builder.CreateCall(Callee, Args, Name);
739 Call->setCallingConv(Config.getRuntimeCC());
743void OpenMPIRBuilder::initialize() { initializeTypes(M); }
754 for (
auto Inst =
Block->getReverseIterator()->begin();
755 Inst !=
Block->getReverseIterator()->end();) {
784 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
790void OpenMPIRBuilder::finalize(
Function *Fn) {
794 for (OutlineInfo &OI : OutlineInfos) {
797 if (Fn && OI.getFunction() != Fn) {
802 ParallelRegionBlockSet.
clear();
804 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
814 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
823 ".omp_par", ArgsInZeroAddressSpace);
827 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
828 assert(Extractor.isEligible() &&
829 "Expected OpenMP outlining to be possible!");
831 for (
auto *V : OI.ExcludeArgsFromAggregate)
832 Extractor.excludeArgFromAggregate(V);
835 Extractor.extractCodeRegion(CEAC, OI.Inputs, OI.Outputs);
839 if (TargetCpuAttr.isStringAttribute())
842 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
843 if (TargetFeaturesAttr.isStringAttribute())
844 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
847 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
849 "OpenMP outlined functions should not return a value!");
854 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
861 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
868 "Expected instructions to add in the outlined region entry");
870 End = ArtificialEntry.
rend();
875 if (
I.isTerminator()) {
877 if (OI.EntryBB->getTerminator())
878 OI.EntryBB->getTerminator()->adoptDbgRecords(
879 &ArtificialEntry,
I.getIterator(),
false);
883 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
886 OI.EntryBB->moveBefore(&ArtificialEntry);
893 if (OI.PostOutlineCB)
894 OI.PostOutlineCB(*OutlinedFn);
896 if (OI.FixUpNonEntryAllocas) {
899 if (PostDomTree.properlyDominates(&BB, &OutlinedFn->getEntryBlock()))
905 OutlineInfos = std::move(DeferredOutlines);
926 for (
Function *
F : ConstantAllocaRaiseCandidates)
929 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
930 [](EmitMetadataErrorKind Kind,
931 const TargetRegionEntryInfo &EntryInfo) ->
void {
932 errs() <<
"Error of kind: " << Kind
933 <<
" when emitting offload entries and metadata during "
934 "OMPIRBuilder finalization \n";
937 if (!OffloadInfoManager.empty())
938 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
940 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
941 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
942 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
943 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
949bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
951OpenMPIRBuilder::~OpenMPIRBuilder() {
952 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
960 ConstantInt::get(I32Ty,
Value), Name);
972 UsedArray.
resize(List.size());
973 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
977 if (UsedArray.
empty())
984 GV->setSection(
"llvm.metadata");
988OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
990 auto *Int8Ty = Builder.getInt8Ty();
993 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
1001 unsigned Reserve2Flags) {
1003 LocFlags |= OMP_IDENT_FLAG_KMPC;
1006 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
1010 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1011 ConstantInt::get(Int32, Reserve2Flags),
1012 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1014 size_t SrcLocStrArgIdx = 4;
1015 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1019 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1026 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1027 if (
GV.getInitializer() == Initializer)
1032 M, OpenMPIRBuilder::Ident,
1035 M.getDataLayout().getDefaultGlobalsAddressSpace());
1047 SrcLocStrSize = LocStr.
size();
1048 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
1056 if (
GV.isConstant() &&
GV.hasInitializer() &&
1057 GV.getInitializer() == Initializer)
1060 SrcLocStr = Builder.CreateGlobalString(
1061 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
1069 unsigned Line,
unsigned Column,
1075 Buffer.
append(FunctionName);
1077 Buffer.
append(std::to_string(Line));
1079 Buffer.
append(std::to_string(Column));
1082 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
1086OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
1087 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1088 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1096 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1098 if (
DIFile *DIF = DIL->getFile())
1099 if (std::optional<StringRef> Source = DIF->getSource())
1104 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1105 DIL->getColumn(), SrcLocStrSize);
1108Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1110 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1111 Loc.IP.getBlock()->getParent());
1114Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1115 return createRuntimeFunctionCall(
1116 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1117 "omp_global_thread_num");
1120OpenMPIRBuilder::InsertPointOrErrorTy
1121OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1122 bool ForceSimpleCall,
bool CheckCancelFlag) {
1123 if (!updateToLocation(
Loc))
1132 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1135 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1138 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1141 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1144 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1149 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1151 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1152 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1157 bool UseCancelBarrier =
1158 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1161 getOrCreateRuntimeFunctionPtr(UseCancelBarrier
1162 ? OMPRTL___kmpc_cancel_barrier
1163 : OMPRTL___kmpc_barrier),
1166 if (UseCancelBarrier && CheckCancelFlag)
1167 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1170 return Builder.saveIP();
1173OpenMPIRBuilder::InsertPointOrErrorTy
1174OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1176 omp::Directive CanceledDirective) {
1177 if (!updateToLocation(
Loc))
1181 auto *UI = Builder.CreateUnreachable();
1189 Builder.SetInsertPoint(ElseTI);
1190 auto ElseIP = Builder.saveIP();
1192 InsertPointOrErrorTy IPOrErr = createCancellationPoint(
1193 LocationDescription{ElseIP,
Loc.DL}, CanceledDirective);
1198 Builder.SetInsertPoint(ThenTI);
1200 Value *CancelKind =
nullptr;
1201 switch (CanceledDirective) {
1202#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1203 case DirectiveEnum: \
1204 CancelKind = Builder.getInt32(Value); \
1206#include "llvm/Frontend/OpenMP/OMPKinds.def"
1212 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1213 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1214 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1216 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1219 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective))
1223 Builder.SetInsertPoint(UI->getParent());
1224 UI->eraseFromParent();
1226 return Builder.saveIP();
1229OpenMPIRBuilder::InsertPointOrErrorTy
1230OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1231 omp::Directive CanceledDirective) {
1232 if (!updateToLocation(
Loc))
1236 auto *UI = Builder.CreateUnreachable();
1237 Builder.SetInsertPoint(UI);
1239 Value *CancelKind =
nullptr;
1240 switch (CanceledDirective) {
1241#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1242 case DirectiveEnum: \
1243 CancelKind = Builder.getInt32(Value); \
1245#include "llvm/Frontend/OpenMP/OMPKinds.def"
1251 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1252 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1253 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1255 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1258 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective))
1262 Builder.SetInsertPoint(UI->getParent());
1263 UI->eraseFromParent();
1265 return Builder.saveIP();
1268OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1269 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1272 if (!updateToLocation(
Loc))
1275 Builder.restoreIP(AllocaIP);
1276 auto *KernelArgsPtr =
1277 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1278 updateToLocation(
Loc);
1282 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1283 Builder.CreateAlignedStore(
1285 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1289 NumThreads, HostPtr, KernelArgsPtr};
1291 Return = createRuntimeFunctionCall(
1292 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1295 return Builder.saveIP();
1298OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1299 const LocationDescription &
Loc,
Value *OutlinedFnID,
1300 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1301 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1303 if (!updateToLocation(
Loc))
1316 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1320 Value *Return =
nullptr;
1324 getKernelArgsVector(Args, Builder, ArgsVector);
1339 Builder.restoreIP(emitTargetKernel(
1340 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1341 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1348 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1350 auto CurFn = Builder.GetInsertBlock()->getParent();
1351 emitBlock(OffloadFailedBlock, CurFn);
1352 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1354 return AfterIP.takeError();
1355 Builder.restoreIP(*AfterIP);
1356 emitBranch(OffloadContBlock);
1357 emitBlock(OffloadContBlock, CurFn,
true);
1358 return Builder.saveIP();
1361Error OpenMPIRBuilder::emitCancelationCheckImpl(
1362 Value *CancelFlag, omp::Directive CanceledDirective) {
1363 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1364 "Unexpected cancellation!");
1369 if (Builder.GetInsertPoint() == BB->
end()) {
1375 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1377 Builder.SetInsertPoint(BB);
1383 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1384 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1389 auto &FI = FinalizationStack.back();
1393 Builder.SetInsertPoint(CancellationBlock);
1394 Builder.CreateBr(*FiniBBOrErr);
1397 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1416 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1419 "Expected at least tid and bounded tid as arguments");
1420 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1423 assert(CI &&
"Expected call instruction to outlined function");
1424 CI->
getParent()->setName(
"omp_parallel");
1426 Builder.SetInsertPoint(CI);
1427 Type *PtrTy = OMPIRBuilder->VoidPtr;
1431 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1435 Value *Args = ArgsAlloca;
1439 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1440 Builder.restoreIP(CurrentIP);
1443 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1445 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1447 Builder.CreateStore(V, StoreAddress);
1451 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1452 : Builder.getInt32(1);
1455 Value *Parallel60CallArgs[] = {
1459 NumThreads ? NumThreads : Builder.getInt32(-1),
1460 Builder.getInt32(-1),
1464 Builder.getInt64(NumCapturedVars),
1465 Builder.getInt32(0)};
1468 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_60);
1470 OMPIRBuilder->createRuntimeFunctionCall(RTLFn, Parallel60CallArgs);
1473 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1476 Builder.SetInsertPoint(PrivTID);
1478 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1485 I->eraseFromParent();
1502 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1505 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1508 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1516 F->addMetadata(LLVMContext::MD_callback,
1525 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1528 "Expected at least tid and bounded tid as arguments");
1529 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1532 CI->
getParent()->setName(
"omp_parallel");
1533 Builder.SetInsertPoint(CI);
1536 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1540 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1542 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1549 auto PtrTy = OMPIRBuilder->VoidPtr;
1550 if (IfCondition && NumCapturedVars == 0) {
1555 OMPIRBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
1558 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1561 Builder.SetInsertPoint(PrivTID);
1563 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1570 I->eraseFromParent();
1574OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1575 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1576 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1577 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1578 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1581 if (!updateToLocation(
Loc))
1585 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1586 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1587 Value *ThreadID = getOrCreateThreadID(Ident);
1593 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1597 if (NumThreads && !Config.isTargetDevice()) {
1600 Builder.CreateIntCast(NumThreads, Int32,
false)};
1601 createRuntimeFunctionCall(
1602 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1605 if (ProcBind != OMP_PROC_BIND_default) {
1609 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1610 createRuntimeFunctionCall(
1611 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1614 BasicBlock *InsertBB = Builder.GetInsertBlock();
1619 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1627 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1628 Builder.restoreIP(NewOuter);
1629 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(Int32,
nullptr,
"tid.addr");
1631 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1634 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1637 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1641 PointerType ::get(M.getContext(), 0),
1642 "zero.addr.ascast");
1663 auto FiniCBWrapper = [&](InsertPointTy IP) {
1668 Builder.restoreIP(IP);
1670 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1674 "Unexpected insertion point for finalization call!");
1678 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1683 InsertPointTy InnerAllocaIP = Builder.saveIP();
1686 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1687 Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr,
"tid");
1690 ToBeDeleted.
push_back(Builder.CreateLoad(Int32, TIDAddr,
"tid.addr.use"));
1692 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1710 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1713 assert(BodyGenCB &&
"Expected body generation callback!");
1714 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1715 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1718 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1721 if (Config.isTargetDevice()) {
1723 OI.PostOutlineCB = [=, ToBeDeletedVec =
1724 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1726 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1727 ThreadID, ToBeDeletedVec);
1729 OI.FixUpNonEntryAllocas =
true;
1732 OI.PostOutlineCB = [=, ToBeDeletedVec =
1733 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1735 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1737 OI.FixUpNonEntryAllocas =
true;
1740 OI.OuterAllocaBB = OuterAllocaBlock;
1741 OI.EntryBB = PRegEntryBB;
1742 OI.ExitBB = PRegExitBB;
1746 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1757 ".omp_par", ArgsInZeroAddressSpace);
1762 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1764 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1769 return GV->getValueType() == OpenMPIRBuilder::Ident;
1774 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1777 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1780 if (&V == TIDAddr || &V == ZeroAddr) {
1781 OI.ExcludeArgsFromAggregate.push_back(&V);
1786 for (
Use &U : V.uses())
1788 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1798 if (!V.getType()->isPointerTy()) {
1802 Builder.restoreIP(OuterAllocaIP);
1804 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1808 Builder.SetInsertPoint(InsertBB,
1810 Builder.CreateStore(&V, Ptr);
1813 Builder.restoreIP(InnerAllocaIP);
1814 Inner = Builder.CreateLoad(V.getType(), Ptr);
1817 Value *ReplacementValue =
nullptr;
1820 ReplacementValue = PrivTID;
1822 InsertPointOrErrorTy AfterIP =
1823 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1825 return AfterIP.takeError();
1826 Builder.restoreIP(*AfterIP);
1828 InnerAllocaIP.getBlock(),
1829 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1831 assert(ReplacementValue &&
1832 "Expected copy/create callback to set replacement value!");
1833 if (ReplacementValue == &V)
1838 UPtr->set(ReplacementValue);
1863 for (
Value *Output : Outputs)
1866 assert(Outputs.empty() &&
1867 "OpenMP outlining should not produce live-out values!");
1869 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1871 for (
auto *BB : Blocks)
1878 auto FiniInfo = FinalizationStack.pop_back_val();
1880 assert(FiniInfo.DK == OMPD_parallel &&
1881 "Unexpected finalization stack state!");
1885 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1891 Builder.restoreIP(PreFiniIP);
1892 Builder.CreateBr(*FiniBBOrErr);
1895 if (
Instruction *Term = Builder.GetInsertBlock()->getTerminator())
1896 Term->eraseFromParent();
1900 addOutlineInfo(std::move(OI));
1902 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1903 UI->eraseFromParent();
1908void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1911 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1912 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1914 createRuntimeFunctionCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush),
1918void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1919 if (!updateToLocation(
Loc))
1924void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1928 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1929 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1930 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1933 createRuntimeFunctionCall(
1934 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), Args);
1937void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1938 if (!updateToLocation(
Loc))
1940 emitTaskwaitImpl(
Loc);
1943void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1946 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1947 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1949 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1951 createRuntimeFunctionCall(
1952 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), Args);
1955void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1956 if (!updateToLocation(
Loc))
1958 emitTaskyieldImpl(
Loc);
1967 OpenMPIRBuilder &OMPBuilder,
1970 if (Dependencies.
empty())
1990 Type *DependInfo = OMPBuilder.DependInfo;
1991 Module &M = OMPBuilder.M;
1993 Value *DepArray =
nullptr;
1994 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1995 Builder.SetInsertPoint(
1996 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1999 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2001 Builder.restoreIP(OldIP);
2003 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2005 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2007 Value *Addr = Builder.CreateStructGEP(
2009 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2010 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
2011 Builder.CreateStore(DepValPtr, Addr);
2014 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2015 Builder.CreateStore(
2016 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
2019 Value *Flags = Builder.CreateStructGEP(
2021 static_cast<unsigned int>(RTLDependInfoFields::Flags));
2022 Builder.CreateStore(
2023 ConstantInt::get(Builder.getInt8Ty(),
2024 static_cast<unsigned int>(Dep.DepKind)),
2032 Type *PrivatesTy, int32_t PrivatesIndex, TaskDupCallbackTy DupCB) {
2033 unsigned ProgramAddressSpace = M.getDataLayout().getProgramAddressSpace();
2045 auto *VoidPtrTy =
PointerType::get(Builder.getContext(), ProgramAddressSpace);
2048 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2052 "omp_taskloop_dup", M);
2055 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2056 DestTaskArg->
setName(
"dest_task");
2057 SrcTaskArg->
setName(
"src_task");
2058 LastprivateFlagArg->
setName(
"lastprivate_flag");
2061 Builder.SetInsertPoint(
2064 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2065 Type *TaskWithPrivatesTy =
2067 Value *TaskPrivates = Builder.CreateGEP(
2068 TaskWithPrivatesTy, Arg, {Builder.getInt32(0), Builder.getInt32(1)});
2069 Value *ContextPtr = Builder.CreateGEP(
2070 PrivatesTy, TaskPrivates,
2071 {Builder.getInt32(0), Builder.getInt32(PrivatesIndex)});
2075 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2076 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2078 DestTaskContextPtr->
setName(
"destPtr");
2079 SrcTaskContextPtr->
setName(
"srcPtr");
2083 InsertPointTy CodeGenIP = Builder.saveIP();
2085 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2086 if (!AfterIPOrError)
2088 Builder.restoreIP(*AfterIPOrError);
2090 Builder.CreateRetVoid();
2095OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
2096 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2097 BodyGenCallbackTy BodyGenCB,
2100 TaskDupCallbackTy DupCB,
Value *TaskContextStructPtrVal) {
2102 if (!updateToLocation(
Loc))
2103 return InsertPointTy();
2106 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2107 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2110 splitBB(Builder,
true,
"taskloop.exit");
2112 splitBB(Builder,
true,
"taskloop.body");
2114 splitBB(Builder,
true,
"taskloop.alloca");
2116 InsertPointTy TaskloopAllocaIP =
2117 InsertPointTy(TaskloopAllocaBB, TaskloopAllocaBB->
begin());
2118 InsertPointTy TaskloopBodyIP =
2119 InsertPointTy(TaskloopBodyBB, TaskloopBodyBB->
begin());
2121 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
2129 llvm::CanonicalLoopInfo *CLI = result.
get();
2131 OI.EntryBB = TaskloopAllocaBB;
2132 OI.OuterAllocaBB = AllocaIP.getBlock();
2133 OI.ExitBB = TaskloopExitBB;
2139 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2141 TaskloopAllocaIP,
"lb",
false,
true);
2143 TaskloopAllocaIP,
"ub",
false,
true);
2145 TaskloopAllocaIP,
"step",
false,
true);
2148 OI.Inputs.insert(FakeLB);
2149 OI.Inputs.insert(FakeUB);
2150 OI.Inputs.insert(FakeStep);
2151 if (TaskContextStructPtrVal)
2152 OI.Inputs.insert(TaskContextStructPtrVal);
2154 (TaskContextStructPtrVal && DupCB) ||
2155 (!TaskContextStructPtrVal && !DupCB) &&
2156 "Task context struct ptr and duplication callback must be both set "
2162 unsigned ProgramAddressSpace = M.getDataLayout().getProgramAddressSpace();
2165 Builder.getContext(),
2166 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2170 if (!TaskDupFnOrErr) {
2173 Value *TaskDupFn = *TaskDupFnOrErr;
2175 OI.PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Tied,
2176 TaskloopAllocaBB, CLI,
Loc, TaskDupFn, ToBeDeleted,
2177 FakeLB, FakeUB, FakeStep](
Function &OutlinedFn)
mutable {
2180 "there must be a single user for the outlined function");
2188 Value *CastedLBVal =
2189 Builder.CreateIntCast(LBVal, Builder.getInt64Ty(),
true,
"lb64");
2190 Value *CastedUBVal =
2191 Builder.CreateIntCast(UBVal, Builder.getInt64Ty(),
true,
"ub64");
2192 Value *CastedStepVal =
2193 Builder.CreateIntCast(StepVal, Builder.getInt64Ty(),
true,
"step64");
2194 Builder.restoreIP(CurrentIp);
2196 Builder.SetInsertPoint(StaleCI);
2201 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2203 Value *ThreadID = getOrCreateThreadID(Ident);
2207 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2208 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2211 Value *Flags = Builder.getInt32(Tied);
2213 Value *TaskSize = Builder.getInt64(
2214 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2218 assert(ArgStructAlloca &&
2219 "Unable to find the alloca instruction corresponding to arguments "
2220 "for extracted function");
2223 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2224 "arguments for extracted function");
2225 Value *SharedsSize =
2226 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2231 CallInst *TaskData = Builder.CreateCall(
2232 TaskAllocFn, {Ident, ThreadID, Flags,
2233 TaskSize, SharedsSize,
2238 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2239 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2244 ArgStructType, TaskShareds, {Builder.getInt32(0), Builder.getInt32(0)});
2247 ArgStructType, TaskShareds, {Builder.getInt32(0), Builder.getInt32(1)});
2250 ArgStructType, TaskShareds, {Builder.getInt32(0), Builder.getInt32(2)});
2251 llvm::Value *Loadstep = Builder.CreateLoad(Builder.getInt64Ty(), Step);
2255 Value *IfVal = Builder.getInt32(1);
2256 Value *NoGroup = Builder.getInt32(1);
2258 Value *GrainSize = Builder.getInt64(0);
2259 Value *TaskDup = TaskDupFn;
2261 Value *
Args[] = {Ident, ThreadID, TaskData, IfVal, Lb, Ub,
2262 Loadstep, NoGroup,
Sched, GrainSize, TaskDup};
2266 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskloop);
2267 Builder.CreateCall(TaskloopFn, Args);
2271 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2272 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2276 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2279 Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2282 [SharedsOutlined](
Use &U) {
return U.getUser() != SharedsOutlined; });
2285 Type *IVTy =
IV->getType();
2286 Constant *One = ConstantInt::get(Builder.getInt64Ty(), 1);
2291 Value *TaskLB =
nullptr;
2292 Value *TaskUB =
nullptr;
2293 Value *LoadTaskLB =
nullptr;
2294 Value *LoadTaskUB =
nullptr;
2296 if (
I.getOpcode() == Instruction::GetElementPtr) {
2299 switch (CI->getZExtValue()) {
2308 }
else if (
I.getOpcode() == Instruction::Load) {
2310 if (
Load.getPointerOperand() == TaskLB) {
2311 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2313 }
else if (
Load.getPointerOperand() == TaskUB) {
2314 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2320 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2322 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2323 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2324 Value *TripCountMinusOne =
2325 Builder.CreateSDiv(Builder.CreateSub(LoadTaskUB, LoadTaskLB), FakeStep);
2326 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2327 Value *CastedTripCount = Builder.CreateIntCast(TripCount, IVTy,
true);
2328 Value *CastedTaskLB = Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2330 CLI->setTripCount(CastedTripCount);
2332 Builder.SetInsertPoint(CLI->getBody(),
2333 CLI->getBody()->getFirstInsertionPt());
2349 assert(CLI->getIndVar()->getNumUses() == 3 &&
2350 "Canonical loop should have exactly three uses of the ind var");
2351 for (
User *IVUser : CLI->getIndVar()->
users()) {
2353 if (
Mul->getOpcode() == Instruction::Mul) {
2354 for (
User *MulUser :
Mul->users()) {
2356 if (
Add->getOpcode() == Instruction::Add) {
2357 Add->setOperand(1, CastedTaskLB);
2365 FakeLB->replaceAllUsesWith(CastedLBVal);
2366 FakeUB->replaceAllUsesWith(CastedUBVal);
2369 I->eraseFromParent();
2373 addOutlineInfo(std::move(OI));
2374 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2375 return Builder.saveIP();
2378OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
2379 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2380 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
2384 if (!updateToLocation(
Loc))
2385 return InsertPointTy();
2388 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2389 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2406 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
2407 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
2409 splitBB(Builder,
true,
"task.alloca");
2411 InsertPointTy TaskAllocaIP =
2412 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
2413 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
2414 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
2418 OI.EntryBB = TaskAllocaBB;
2419 OI.OuterAllocaBB = AllocaIP.getBlock();
2420 OI.ExitBB = TaskExitBB;
2425 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2427 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2428 Mergeable, Priority, EventHandle, TaskAllocaBB,
2429 ToBeDeleted](
Function &OutlinedFn)
mutable {
2432 "there must be a single user for the outlined function");
2437 bool HasShareds = StaleCI->
arg_size() > 1;
2438 Builder.SetInsertPoint(StaleCI);
2443 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2447 Value *ThreadID = getOrCreateThreadID(Ident);
2459 Value *Flags = Builder.getInt32(Tied);
2462 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2463 Flags = Builder.CreateOr(FinalFlag, Flags);
2467 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2469 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2475 Value *TaskSize = Builder.getInt64(
2476 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2481 Value *SharedsSize = Builder.getInt64(0);
2485 assert(ArgStructAlloca &&
2486 "Unable to find the alloca instruction corresponding to arguments "
2487 "for extracted function");
2490 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2491 "arguments for extracted function");
2493 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2498 CallInst *TaskData = createRuntimeFunctionCall(
2499 TaskAllocFn, {Ident, ThreadID, Flags,
2500 TaskSize, SharedsSize,
2507 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2508 OMPRTL___kmpc_task_allow_completion_event);
2510 createRuntimeFunctionCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2512 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2513 Builder.getPtrTy(0));
2514 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2515 Builder.CreateStore(EventVal, EventHandleAddr);
2521 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2522 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2540 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2543 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2544 Value *PriorityData = Builder.CreateInBoundsGEP(
2545 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2548 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2550 Builder.CreateStore(Priority, CmplrData);
2575 splitBB(Builder,
true,
"if.end");
2577 Builder.GetInsertPoint()->
getParent()->getTerminator();
2578 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2579 Builder.SetInsertPoint(IfTerminator);
2582 Builder.SetInsertPoint(ElseTI);
2584 if (Dependencies.size()) {
2586 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2587 createRuntimeFunctionCall(
2589 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2590 ConstantInt::get(Builder.getInt32Ty(), 0),
2594 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2596 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2597 createRuntimeFunctionCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2600 CI = createRuntimeFunctionCall(&OutlinedFn, {ThreadID, TaskData});
2602 CI = createRuntimeFunctionCall(&OutlinedFn, {ThreadID});
2604 createRuntimeFunctionCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2605 Builder.SetInsertPoint(ThenTI);
2608 if (Dependencies.size()) {
2610 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2611 createRuntimeFunctionCall(
2613 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2614 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2619 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2620 createRuntimeFunctionCall(TaskFn, {Ident, ThreadID, TaskData});
2625 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2627 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2629 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2633 I->eraseFromParent();
2636 addOutlineInfo(std::move(OI));
2637 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2639 return Builder.saveIP();
2642OpenMPIRBuilder::InsertPointOrErrorTy
2643OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2644 InsertPointTy AllocaIP,
2645 BodyGenCallbackTy BodyGenCB) {
2646 if (!updateToLocation(
Loc))
2647 return InsertPointTy();
2650 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2651 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2652 Value *ThreadID = getOrCreateThreadID(Ident);
2656 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2657 createRuntimeFunctionCall(TaskgroupFn, {Ident, ThreadID});
2659 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2660 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2663 Builder.SetInsertPoint(TaskgroupExitBB);
2666 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2667 createRuntimeFunctionCall(EndTaskgroupFn, {Ident, ThreadID});
2669 return Builder.saveIP();
2672OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2673 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2675 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2678 if (!updateToLocation(
Loc))
2681 FinalizationStack.push_back({FiniCB, OMPD_sections, IsCancellable});
2699 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2700 Builder.restoreIP(CodeGenIP);
2702 splitBBWithSuffix(Builder,
false,
".sections.after");
2706 unsigned CaseNumber = 0;
2707 for (
auto SectionCB : SectionCBs) {
2709 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2710 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2711 Builder.SetInsertPoint(CaseBB);
2713 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2725 Value *LB = ConstantInt::get(I32Ty, 0);
2726 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2727 Value *
ST = ConstantInt::get(I32Ty, 1);
2729 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2733 InsertPointOrErrorTy WsloopIP =
2734 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2735 WorksharingLoopType::ForStaticLoop, !IsNowait);
2737 return WsloopIP.takeError();
2738 InsertPointTy AfterIP = *WsloopIP;
2741 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2744 auto FiniInfo = FinalizationStack.pop_back_val();
2745 assert(FiniInfo.DK == OMPD_sections &&
2746 "Unexpected finalization stack state!");
2747 if (
Error Err = FiniInfo.mergeFiniBB(Builder, LoopFini))
2753OpenMPIRBuilder::InsertPointOrErrorTy
2754OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2755 BodyGenCallbackTy BodyGenCB,
2756 FinalizeCallbackTy FiniCB) {
2757 if (!updateToLocation(
Loc))
2760 auto FiniCBWrapper = [&](InsertPointTy IP) {
2771 Builder.restoreIP(IP);
2772 auto *CaseBB =
Loc.IP.getBlock();
2776 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2780 Directive OMPD = Directive::OMPD_sections;
2783 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2791 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2794Value *OpenMPIRBuilder::getGPUThreadID() {
2795 return createRuntimeFunctionCall(
2796 getOrCreateRuntimeFunction(M,
2797 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2801Value *OpenMPIRBuilder::getGPUWarpSize() {
2802 return createRuntimeFunctionCall(
2803 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2806Value *OpenMPIRBuilder::getNVPTXWarpID() {
2807 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2808 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2811Value *OpenMPIRBuilder::getNVPTXLaneID() {
2812 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2813 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2814 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2815 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2819Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2822 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2823 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2824 assert(FromSize > 0 &&
"From size must be greater than zero");
2825 assert(ToSize > 0 &&
"To size must be greater than zero");
2826 if (FromType == ToType)
2828 if (FromSize == ToSize)
2829 return Builder.CreateBitCast(From, ToType);
2831 return Builder.CreateIntCast(From, ToType,
true);
2832 InsertPointTy SaveIP = Builder.saveIP();
2833 Builder.restoreIP(AllocaIP);
2834 Value *CastItem = Builder.CreateAlloca(ToType);
2835 Builder.restoreIP(SaveIP);
2837 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2838 CastItem, Builder.getPtrTy(0));
2839 Builder.CreateStore(From, ValCastItem);
2840 return Builder.CreateLoad(ToType, CastItem);
2843Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2847 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2848 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2852 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2854 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2855 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2856 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2857 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2858 Value *WarpSizeCast =
2859 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2860 Value *ShuffleCall =
2861 createRuntimeFunctionCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2862 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2865void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2869 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2880 Type *IndexTy = Builder.getIndexTy(
2881 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2882 Value *ElemPtr = DstAddr;
2883 Value *Ptr = SrcAddr;
2884 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2888 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2889 Ptr, Builder.getPtrTy(0), Ptr->
getName() +
".ascast");
2891 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2892 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2893 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2896 if ((
Size / IntSize) > 1) {
2897 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2898 SrcAddrGEP, Builder.getPtrTy());
2903 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2904 emitBlock(PreCondBB, CurFunc);
2906 Builder.CreatePHI(Ptr->
getType(), 2);
2909 Builder.CreatePHI(ElemPtr->
getType(), 2);
2913 Value *PtrDiff = Builder.CreatePtrDiff(
2914 Builder.getInt8Ty(), PtrEnd,
2915 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr, Builder.getPtrTy()));
2916 Builder.CreateCondBr(
2917 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2919 emitBlock(ThenBB, CurFunc);
2920 Value *Res = createRuntimeShuffleFunction(
2922 Builder.CreateAlignedLoad(
2923 IntType, Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2925 Builder.CreateAlignedStore(Res, ElemPtr,
2926 M.getDataLayout().getPrefTypeAlign(ElemType));
2928 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2929 Value *LocalElemPtr =
2930 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2933 emitBranch(PreCondBB);
2934 emitBlock(ExitBB, CurFunc);
2936 Value *Res = createRuntimeShuffleFunction(
2937 AllocaIP, Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
2940 Res = Builder.CreateTrunc(Res, ElemType);
2941 Builder.CreateStore(Res, ElemPtr);
2942 Ptr = Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2944 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2950Error OpenMPIRBuilder::emitReductionListCopy(
2951 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2954 Type *IndexTy = Builder.getIndexTy(
2955 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2956 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2960 for (
auto En :
enumerate(ReductionInfos)) {
2961 const ReductionInfo &RI = En.value();
2962 Value *SrcElementAddr =
nullptr;
2964 Value *DestElementAddr =
nullptr;
2965 Value *DestElementPtrAddr =
nullptr;
2967 bool ShuffleInElement =
false;
2970 bool UpdateDestListPtr =
false;
2973 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2974 ReductionArrayTy, SrcBase,
2975 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2976 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2980 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2981 ReductionArrayTy, DestBase,
2982 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2983 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
2985 case CopyAction::RemoteLaneToThread: {
2986 InsertPointTy CurIP = Builder.saveIP();
2987 Builder.restoreIP(AllocaIP);
2989 Type *DestAllocaType =
2990 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
2991 DestAlloca = Builder.CreateAlloca(DestAllocaType,
nullptr,
2992 ".omp.reduction.element");
2994 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
2995 DestElementAddr = DestAlloca;
2997 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2998 DestElementAddr->
getName() +
".ascast");
2999 Builder.restoreIP(CurIP);
3000 ShuffleInElement =
true;
3001 UpdateDestListPtr =
true;
3004 case CopyAction::ThreadCopy: {
3006 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
3013 if (ShuffleInElement) {
3014 Type *ShuffleType = RI.ElementType;
3015 Value *ShuffleSrcAddr = SrcElementAddr;
3016 Value *ShuffleDestAddr = DestElementAddr;
3020 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3021 assert(RI.ByRefAllocatedType &&
3022 "Expected by-ref allocated type to be set");
3027 ShuffleType = RI.ByRefElementType;
3029 InsertPointOrErrorTy GenResult =
3030 RI.DataPtrPtrGen(Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3033 return GenResult.takeError();
3035 ShuffleSrcAddr = Builder.CreateLoad(Builder.getPtrTy(), ShuffleSrcAddr);
3038 InsertPointTy OldIP = Builder.saveIP();
3039 Builder.restoreIP(AllocaIP);
3041 LocalStorage = Builder.CreateAlloca(ShuffleType);
3042 Builder.restoreIP(OldIP);
3043 ShuffleDestAddr = LocalStorage;
3047 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3048 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3052 InsertPointOrErrorTy GenResult =
3053 RI.DataPtrPtrGen(Builder.saveIP(),
3054 Builder.CreatePointerBitCastOrAddrSpaceCast(
3055 DestAlloca, Builder.getPtrTy(),
".ascast"),
3059 return GenResult.takeError();
3061 Builder.CreateStore(Builder.CreatePointerBitCastOrAddrSpaceCast(
3062 LocalStorage, Builder.getPtrTy(),
".ascast"),
3066 switch (RI.EvaluationKind) {
3067 case EvalKind::Scalar: {
3068 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3070 Builder.CreateStore(Elem, DestElementAddr);
3073 case EvalKind::Complex: {
3074 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3075 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3076 Value *SrcReal = Builder.CreateLoad(
3077 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3078 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3079 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3080 Value *SrcImg = Builder.CreateLoad(
3081 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3083 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3084 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3085 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3086 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3087 Builder.CreateStore(SrcReal, DestRealPtr);
3088 Builder.CreateStore(SrcImg, DestImgPtr);
3091 case EvalKind::Aggregate: {
3092 Value *SizeVal = Builder.getInt64(
3093 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3094 Builder.CreateMemCpy(
3095 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3096 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3108 if (UpdateDestListPtr) {
3109 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3110 DestElementAddr, Builder.getPtrTy(),
3111 DestElementAddr->
getName() +
".ascast");
3112 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3122 InsertPointTy SavedIP = Builder.saveIP();
3125 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3129 "_omp_reduction_inter_warp_copy_func", &M);
3134 Builder.SetInsertPoint(EntryBB);
3152 "__openmp_nvptx_data_transfer_temporary_storage";
3153 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
3154 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
3156 if (!TransferMedium) {
3165 Value *GPUThreadID = getGPUThreadID();
3167 Value *LaneID = getNVPTXLaneID();
3169 Value *WarpID = getNVPTXWarpID();
3171 InsertPointTy AllocaIP =
3172 InsertPointTy(Builder.GetInsertBlock(),
3173 Builder.GetInsertBlock()->getFirstInsertionPt());
3176 Builder.restoreIP(AllocaIP);
3177 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
3178 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3180 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3181 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3182 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3183 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3184 NumWarpsAlloca, Builder.getPtrTy(0),
3185 NumWarpsAlloca->
getName() +
".ascast");
3186 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3187 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3189 InsertPointTy CodeGenIP =
3191 Builder.restoreIP(CodeGenIP);
3194 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
3196 for (
auto En :
enumerate(ReductionInfos)) {
3201 const ReductionInfo &RI = En.value();
3202 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3203 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(
3204 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3205 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3208 unsigned NumIters = RealTySize / TySize;
3211 Value *Cnt =
nullptr;
3212 Value *CntAddr =
nullptr;
3216 CodeGenIP = Builder.saveIP();
3217 Builder.restoreIP(AllocaIP);
3219 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3221 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
3222 CntAddr->
getName() +
".ascast");
3223 Builder.restoreIP(CodeGenIP);
3230 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
3231 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
3233 Value *
Cmp = Builder.CreateICmpULT(
3234 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
3235 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3236 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
3240 InsertPointOrErrorTy BarrierIP1 =
3241 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
3242 omp::Directive::OMPD_unknown,
3246 return BarrierIP1.takeError();
3252 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
3253 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3254 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3257 auto *RedListArrayTy =
3259 Type *IndexTy = Builder.getIndexTy(
3260 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3262 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3263 {ConstantInt::get(IndexTy, 0),
3264 ConstantInt::get(IndexTy, En.index())});
3266 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3269 InsertPointOrErrorTy GenRes =
3270 RI.DataPtrPtrGen(Builder.saveIP(), ElemPtr, ElemPtr);
3273 return GenRes.takeError();
3275 ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtr);
3279 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
3283 Value *MediumPtr = Builder.CreateInBoundsGEP(
3284 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
3287 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
3289 Builder.CreateStore(Elem, MediumPtr,
3291 Builder.CreateBr(MergeBB);
3294 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3295 Builder.CreateBr(MergeBB);
3298 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3299 InsertPointOrErrorTy BarrierIP2 =
3300 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
3301 omp::Directive::OMPD_unknown,
3305 return BarrierIP2.takeError();
3312 Value *NumWarpsVal =
3313 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
3315 Value *IsActiveThread =
3316 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3317 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3319 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
3323 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
3324 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
3326 Value *TargetElemPtrPtr =
3327 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3328 {ConstantInt::get(IndexTy, 0),
3329 ConstantInt::get(IndexTy, En.index())});
3330 Value *TargetElemPtrVal =
3331 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
3332 Value *TargetElemPtr = TargetElemPtrVal;
3335 InsertPointOrErrorTy GenRes =
3336 RI.DataPtrPtrGen(Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3339 return GenRes.takeError();
3341 TargetElemPtr = Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtr);
3346 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
3349 Value *SrcMediumValue =
3350 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3351 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3352 Builder.CreateBr(W0MergeBB);
3354 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
3355 Builder.CreateBr(W0MergeBB);
3357 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
3360 Cnt = Builder.CreateNSWAdd(
3361 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
3362 Builder.CreateStore(Cnt, CntAddr,
false);
3364 auto *CurFn = Builder.GetInsertBlock()->
getParent();
3365 emitBranch(PrecondBB);
3366 emitBlock(ExitBB, CurFn);
3368 RealTySize %= TySize;
3372 Builder.CreateRetVoid();
3373 Builder.restoreIP(SavedIP);
3384 {Builder.getPtrTy(), Builder.getInt16Ty(),
3385 Builder.getInt16Ty(), Builder.getInt16Ty()},
3389 "_omp_reduction_shuffle_and_reduce_func", &M);
3399 Builder.SetInsertPoint(EntryBB);
3410 Type *ReduceListArgType = ReduceListArg->
getType();
3412 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
3413 Value *ReduceListAlloca = Builder.CreateAlloca(
3414 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3415 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
3416 LaneIDArg->
getName() +
".addr");
3417 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
3418 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3419 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
3420 AlgoVerArg->
getName() +
".addr");
3426 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
3427 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3429 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3430 ReduceListAlloca, ReduceListArgType,
3431 ReduceListAlloca->
getName() +
".ascast");
3432 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3433 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3434 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3435 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3436 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3437 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3438 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3439 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3440 RemoteReductionListAlloca, Builder.getPtrTy(),
3441 RemoteReductionListAlloca->
getName() +
".ascast");
3443 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3444 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3445 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3446 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3448 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3449 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3450 Value *RemoteLaneOffset =
3451 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3452 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3459 Error EmitRedLsCpRes = emitReductionListCopy(
3460 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
3461 ReduceList, RemoteListAddrCast, IsByRef,
3462 {RemoteLaneOffset,
nullptr,
nullptr});
3465 return EmitRedLsCpRes;
3488 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
3489 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3490 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3491 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
3492 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
3493 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
3494 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
3495 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
3496 Value *RemoteOffsetComp =
3497 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
3498 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3499 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3500 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3506 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3507 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3508 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3509 ReduceList, Builder.getPtrTy());
3510 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3511 RemoteListAddrCast, Builder.getPtrTy());
3512 createRuntimeFunctionCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3513 ->addFnAttr(Attribute::NoUnwind);
3514 Builder.CreateBr(MergeBB);
3516 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3517 Builder.CreateBr(MergeBB);
3519 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3523 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3524 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3525 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3530 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3532 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3534 EmitRedLsCpRes = emitReductionListCopy(
3535 AllocaIP, CopyAction::ThreadCopy, RedListArrayTy, ReductionInfos,
3536 RemoteListAddrCast, ReduceList, IsByRef);
3539 return EmitRedLsCpRes;
3541 Builder.CreateBr(CpyMergeBB);
3543 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3544 Builder.CreateBr(CpyMergeBB);
3546 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3548 Builder.CreateRetVoid();
3556 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3559 Builder.getVoidTy(),
3560 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3564 "_omp_reduction_list_to_global_copy_func", &M);
3571 Builder.SetInsertPoint(EntryBlock);
3580 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3581 BufferArg->
getName() +
".addr");
3582 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3584 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3585 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3586 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3587 BufferArgAlloca, Builder.getPtrTy(),
3588 BufferArgAlloca->
getName() +
".ascast");
3589 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3590 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3591 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3592 ReduceListArgAlloca, Builder.getPtrTy(),
3593 ReduceListArgAlloca->
getName() +
".ascast");
3595 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3596 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3597 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3599 Value *LocalReduceList =
3600 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3601 Value *BufferArgVal =
3602 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3603 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3604 Type *IndexTy = Builder.getIndexTy(
3605 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3606 for (
auto En :
enumerate(ReductionInfos)) {
3607 const ReductionInfo &RI = En.value();
3608 auto *RedListArrayTy =
3611 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3612 RedListArrayTy, LocalReduceList,
3613 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3615 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3619 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3620 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3621 ReductionsBufferTy, BufferVD, 0, En.index());
3623 switch (RI.EvaluationKind) {
3624 case EvalKind::Scalar: {
3625 Value *TargetElement;
3627 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3628 TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3630 InsertPointOrErrorTy GenResult =
3631 RI.DataPtrPtrGen(Builder.saveIP(), ElemPtr, ElemPtr);
3634 return GenResult.takeError();
3636 ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtr);
3637 TargetElement = Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3640 Builder.CreateStore(TargetElement, GlobVal);
3643 case EvalKind::Complex: {
3644 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3645 RI.ElementType, ElemPtr, 0, 0,
".realp");
3646 Value *SrcReal = Builder.CreateLoad(
3647 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3648 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3649 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3650 Value *SrcImg = Builder.CreateLoad(
3651 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3653 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3654 RI.ElementType, GlobVal, 0, 0,
".realp");
3655 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3656 RI.ElementType, GlobVal, 0, 1,
".imagp");
3657 Builder.CreateStore(SrcReal, DestRealPtr);
3658 Builder.CreateStore(SrcImg, DestImgPtr);
3661 case EvalKind::Aggregate: {
3663 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3664 Builder.CreateMemCpy(
3665 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3666 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3672 Builder.CreateRetVoid();
3673 Builder.restoreIP(OldIP);
3680 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3683 Builder.getVoidTy(),
3684 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3688 "_omp_reduction_list_to_global_reduce_func", &M);
3695 Builder.SetInsertPoint(EntryBlock);
3704 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3705 BufferArg->
getName() +
".addr");
3706 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3708 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3709 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3710 auto *RedListArrayTy =
3715 Value *LocalReduceList =
3716 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3718 InsertPointTy AllocaIP{EntryBlock, EntryBlock->
begin()};
3720 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3721 BufferArgAlloca, Builder.getPtrTy(),
3722 BufferArgAlloca->
getName() +
".ascast");
3723 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3724 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3725 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3726 ReduceListArgAlloca, Builder.getPtrTy(),
3727 ReduceListArgAlloca->
getName() +
".ascast");
3728 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3729 LocalReduceList, Builder.getPtrTy(),
3730 LocalReduceList->
getName() +
".ascast");
3732 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3733 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3734 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3736 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3737 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3738 Type *IndexTy = Builder.getIndexTy(
3739 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3740 for (
auto En :
enumerate(ReductionInfos)) {
3741 const ReductionInfo &RI = En.value();
3744 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3745 InsertPointTy OldIP = Builder.saveIP();
3746 Builder.restoreIP(AllocaIP);
3748 ByRefAlloc = Builder.CreateAlloca(RI.ByRefAllocatedType);
3749 ByRefAlloc = Builder.CreatePointerBitCastOrAddrSpaceCast(
3750 ByRefAlloc, Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
3752 Builder.restoreIP(OldIP);
3755 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3756 RedListArrayTy, LocalReduceListAddrCast,
3757 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3759 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3761 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3762 ReductionsBufferTy, BufferVD, 0, En.index());
3764 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3765 Value *ByRefDataPtr;
3767 InsertPointOrErrorTy GenResult =
3768 RI.DataPtrPtrGen(Builder.saveIP(), ByRefAlloc, ByRefDataPtr);
3771 return GenResult.takeError();
3773 Builder.CreateStore(GlobValPtr, ByRefDataPtr);
3774 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
3776 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3782 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3783 createRuntimeFunctionCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3784 ->addFnAttr(Attribute::NoUnwind);
3785 Builder.CreateRetVoid();
3786 Builder.restoreIP(OldIP);
3793 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3796 Builder.getVoidTy(),
3797 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3801 "_omp_reduction_global_to_list_copy_func", &M);
3808 Builder.SetInsertPoint(EntryBlock);
3817 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3818 BufferArg->
getName() +
".addr");
3819 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3821 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3822 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3823 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3824 BufferArgAlloca, Builder.getPtrTy(),
3825 BufferArgAlloca->
getName() +
".ascast");
3826 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3827 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3828 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3829 ReduceListArgAlloca, Builder.getPtrTy(),
3830 ReduceListArgAlloca->
getName() +
".ascast");
3831 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3832 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3833 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3835 Value *LocalReduceList =
3836 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3837 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3838 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3839 Type *IndexTy = Builder.getIndexTy(
3840 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3841 for (
auto En :
enumerate(ReductionInfos)) {
3842 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3843 auto *RedListArrayTy =
3846 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3847 RedListArrayTy, LocalReduceList,
3848 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3850 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3853 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3854 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3855 ReductionsBufferTy, BufferVD, 0, En.index());
3857 switch (RI.EvaluationKind) {
3858 case EvalKind::Scalar: {
3859 Type *ElemType = RI.ElementType;
3861 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3862 ElemType = RI.ByRefElementType;
3863 InsertPointOrErrorTy GenResult =
3864 RI.DataPtrPtrGen(Builder.saveIP(), ElemPtr, ElemPtr);
3867 return GenResult.takeError();
3869 ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtr);
3872 Value *TargetElement = Builder.CreateLoad(ElemType, GlobValPtr);
3873 Builder.CreateStore(TargetElement, ElemPtr);
3876 case EvalKind::Complex: {
3877 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3878 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3879 Value *SrcReal = Builder.CreateLoad(
3880 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3881 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3882 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3883 Value *SrcImg = Builder.CreateLoad(
3884 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3886 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3887 RI.ElementType, ElemPtr, 0, 0,
".realp");
3888 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3889 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3890 Builder.CreateStore(SrcReal, DestRealPtr);
3891 Builder.CreateStore(SrcImg, DestImgPtr);
3894 case EvalKind::Aggregate: {
3896 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3897 Builder.CreateMemCpy(
3898 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3899 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3906 Builder.CreateRetVoid();
3907 Builder.restoreIP(OldIP);
3914 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3917 Builder.getVoidTy(),
3918 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3922 "_omp_reduction_global_to_list_reduce_func", &M);
3929 Builder.SetInsertPoint(EntryBlock);
3938 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3939 BufferArg->
getName() +
".addr");
3940 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3942 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3943 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3949 Value *LocalReduceList =
3950 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3952 InsertPointTy AllocaIP{EntryBlock, EntryBlock->
begin()};
3954 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3955 BufferArgAlloca, Builder.getPtrTy(),
3956 BufferArgAlloca->
getName() +
".ascast");
3957 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3958 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3959 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3960 ReduceListArgAlloca, Builder.getPtrTy(),
3961 ReduceListArgAlloca->
getName() +
".ascast");
3962 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3963 LocalReduceList, Builder.getPtrTy(),
3964 LocalReduceList->
getName() +
".ascast");
3966 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3967 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3968 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3970 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3971 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3972 Type *IndexTy = Builder.getIndexTy(
3973 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3974 for (
auto En :
enumerate(ReductionInfos)) {
3975 const ReductionInfo &RI = En.value();
3978 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3979 InsertPointTy OldIP = Builder.saveIP();
3980 Builder.restoreIP(AllocaIP);
3982 ByRefAlloc = Builder.CreateAlloca(RI.ByRefAllocatedType);
3983 ByRefAlloc = Builder.CreatePointerBitCastOrAddrSpaceCast(
3984 ByRefAlloc, Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
3986 Builder.restoreIP(OldIP);
3989 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3990 RedListArrayTy, ReductionList,
3991 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3994 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3995 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3996 ReductionsBufferTy, BufferVD, 0, En.index());
3998 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3999 Value *ByRefDataPtr;
4000 InsertPointOrErrorTy GenResult =
4001 RI.DataPtrPtrGen(Builder.saveIP(), ByRefAlloc, ByRefDataPtr);
4003 return GenResult.takeError();
4005 Builder.CreateStore(GlobValPtr, ByRefDataPtr);
4006 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4008 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4014 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
4015 createRuntimeFunctionCall(ReduceFn, {ReduceList, ReductionList})
4016 ->addFnAttr(Attribute::NoUnwind);
4017 Builder.CreateRetVoid();
4018 Builder.restoreIP(OldIP);
4022std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
4023 std::string Suffix =
4024 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
4025 return (Name + Suffix).
str();
4031 AttributeList FuncAttrs) {
4033 {Builder.getPtrTy(), Builder.getPtrTy()},
4035 std::string
Name = getReductionFuncName(ReducerName);
4043 Builder.SetInsertPoint(EntryBB);
4047 Value *LHSArrayPtr =
nullptr;
4048 Value *RHSArrayPtr =
nullptr;
4055 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4057 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4058 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
4059 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4060 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
4061 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4062 Builder.CreateStore(Arg0, LHSAddrCast);
4063 Builder.CreateStore(Arg1, RHSAddrCast);
4064 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4065 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4068 Type *IndexTy = Builder.getIndexTy(
4069 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
4071 for (
auto En :
enumerate(ReductionInfos)) {
4072 const ReductionInfo &RI = En.value();
4073 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
4074 RedArrayTy, RHSArrayPtr,
4075 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4076 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4077 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4078 RHSI8Ptr, RI.PrivateVariable->getType(),
4079 RHSI8Ptr->
getName() +
".ascast");
4081 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
4082 RedArrayTy, LHSArrayPtr,
4083 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4084 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4085 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4086 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4088 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
4095 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4096 LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
4097 RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
4101 InsertPointOrErrorTy AfterIP =
4102 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
4104 return AfterIP.takeError();
4105 if (!Builder.GetInsertBlock())
4106 return ReductionFunc;
4108 Builder.restoreIP(*AfterIP);
4110 if (!IsByRef.
empty() && !IsByRef[En.index()])
4111 Builder.CreateStore(Reduced, LHSPtr);
4115 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
4116 for (
auto En :
enumerate(ReductionInfos)) {
4117 unsigned Index = En.index();
4118 const ReductionInfo &RI = En.value();
4119 Value *LHSFixupPtr, *RHSFixupPtr;
4120 Builder.restoreIP(RI.ReductionGenClang(
4121 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4126 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4131 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4137 Builder.CreateRetVoid();
4138 return ReductionFunc;
4144 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
4146 assert(RI.Variable &&
"expected non-null variable");
4147 assert(RI.PrivateVariable &&
"expected non-null private variable");
4148 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4149 "expected non-null reduction generator callback");
4152 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4153 "expected variables and their private equivalents to have the same "
4156 assert(RI.Variable->getType()->isPointerTy() &&
4157 "expected variables to be pointers");
4161OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
4162 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4165 ReductionGenCBKind ReductionGenCBKind, std::optional<omp::GV> GridValue,
4166 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4167 if (!updateToLocation(
Loc))
4168 return InsertPointTy();
4169 Builder.restoreIP(CodeGenIP);
4176 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4177 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4180 if (ReductionInfos.
size() == 0)
4181 return Builder.saveIP();
4184 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
4190 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4194 AttributeList FuncAttrs;
4195 AttrBuilder AttrBldr(Ctx);
4197 AttrBldr.addAttribute(Attr);
4198 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4199 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4201 CodeGenIP = Builder.saveIP();
4203 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4204 ReductionGenCBKind, FuncAttrs);
4205 if (!ReductionResult)
4207 Function *ReductionFunc = *ReductionResult;
4208 Builder.restoreIP(CodeGenIP);
4211 if (GridValue.has_value())
4212 Config.setGridValue(GridValue.value());
4227 Builder.getPtrTy(M.getDataLayout().getProgramAddressSpace());
4229 CodeGenIP = Builder.saveIP();
4230 Builder.restoreIP(AllocaIP);
4231 Value *ReductionListAlloca =
4232 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4233 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
4234 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4235 Builder.restoreIP(CodeGenIP);
4236 Type *IndexTy = Builder.getIndexTy(
4237 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
4238 for (
auto En :
enumerate(ReductionInfos)) {
4239 const ReductionInfo &RI = En.value();
4240 Value *ElemPtr = Builder.CreateInBoundsGEP(
4241 RedArrayTy, ReductionList,
4242 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4244 Value *PrivateVar = RI.PrivateVariable;
4245 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4247 PrivateVar = Builder.CreateLoad(RI.ElementType, PrivateVar);
4250 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4251 Builder.CreateStore(CastElem, ElemPtr);
4253 CodeGenIP = Builder.saveIP();
4255 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4261 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4265 Builder.restoreIP(CodeGenIP);
4267 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4269 unsigned MaxDataSize = 0;
4271 for (
auto En :
enumerate(ReductionInfos)) {
4272 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
4273 if (
Size > MaxDataSize)
4275 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4276 ? En.value().ByRefElementType
4277 : En.value().ElementType;
4280 Value *ReductionDataSize =
4281 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4282 if (!IsTeamsReduction) {
4283 Value *SarFuncCast =
4284 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4286 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4287 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4289 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
4290 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4291 Res = createRuntimeFunctionCall(Pv2Ptr, Args);
4293 CodeGenIP = Builder.saveIP();
4295 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4296 Function *RedFixedBufferFn = getOrCreateRuntimeFunctionPtr(
4297 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4300 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4305 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4310 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4315 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4319 Builder.restoreIP(CodeGenIP);
4321 Value *KernelTeamsReductionPtr = createRuntimeFunctionCall(
4322 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4324 Value *Args3[] = {SrcLocInfo,
4325 KernelTeamsReductionPtr,
4326 Builder.getInt32(ReductionBufNum),
4336 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
4337 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4338 Res = createRuntimeFunctionCall(TeamsReduceFn, Args3);
4344 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
4345 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
4351 emitBlock(ThenBB, CurFunc);
4354 for (
auto En :
enumerate(ReductionInfos)) {
4355 const ReductionInfo &RI = En.value();
4357 Value *RedValue = RI.Variable;
4359 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
4361 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
4362 Value *LHSPtr, *RHSPtr;
4363 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
4364 &LHSPtr, &RHSPtr, CurFunc));
4377 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4378 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
4379 "red.value." +
Twine(En.index()));
4381 Value *PrivateRedValue = Builder.CreateLoad(
4384 InsertPointOrErrorTy AfterIP =
4385 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
4387 return AfterIP.takeError();
4388 Builder.restoreIP(*AfterIP);
4390 if (!IsByRef.
empty() && !IsByRef[En.index()])
4391 Builder.CreateStore(Reduced, RI.Variable);
4394 emitBlock(ExitBB, CurFunc);
4395 if (ContinuationBlock) {
4396 Builder.CreateBr(ContinuationBlock);
4397 Builder.SetInsertPoint(ContinuationBlock);
4399 Config.setEmitLLVMUsed();
4401 return Builder.saveIP();
4410 ".omp.reduction.func", &M);
4420 Builder.SetInsertPoint(ReductionFuncBlock);
4421 Value *LHSArrayPtr =
nullptr;
4422 Value *RHSArrayPtr =
nullptr;
4433 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4435 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4436 Value *LHSAddrCast =
4437 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4438 Value *RHSAddrCast =
4439 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4440 Builder.CreateStore(Arg0, LHSAddrCast);
4441 Builder.CreateStore(Arg1, RHSAddrCast);
4442 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4443 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4445 LHSArrayPtr = ReductionFunc->
getArg(0);
4446 RHSArrayPtr = ReductionFunc->
getArg(1);
4449 unsigned NumReductions = ReductionInfos.
size();
4452 for (
auto En :
enumerate(ReductionInfos)) {
4453 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
4454 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4455 RedArrayTy, LHSArrayPtr, 0, En.index());
4456 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4457 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4458 LHSI8Ptr, RI.Variable->
getType());
4459 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
4460 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4461 RedArrayTy, RHSArrayPtr, 0, En.index());
4462 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4463 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4464 RHSI8Ptr, RI.PrivateVariable->
getType());
4465 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
4467 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4468 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
4470 return AfterIP.takeError();
4472 Builder.restoreIP(*AfterIP);
4474 if (!Builder.GetInsertBlock())
4478 if (!IsByRef[En.index()])
4479 Builder.CreateStore(Reduced, LHSPtr);
4481 Builder.CreateRetVoid();
4485OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
4486 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4488 bool IsNoWait,
bool IsTeamsReduction) {
4491 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
4492 IsByRef, IsNoWait, IsTeamsReduction);
4496 if (!updateToLocation(
Loc))
4497 return InsertPointTy();
4499 if (ReductionInfos.
size() == 0)
4500 return Builder.saveIP();
4509 unsigned NumReductions = ReductionInfos.
size();
4511 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
4512 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4514 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4516 for (
auto En :
enumerate(ReductionInfos)) {
4517 unsigned Index = En.index();
4518 const ReductionInfo &RI = En.value();
4519 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
4520 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4521 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
4526 Type *IndexTy = Builder.getIndexTy(
4527 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
4528 Function *
Func = Builder.GetInsertBlock()->getParent();
4531 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4532 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
4533 return RI.AtomicReductionGen;
4535 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
4537 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4539 Value *ThreadId = getOrCreateThreadID(Ident);
4540 Constant *NumVariables = Builder.getInt32(NumReductions);
4542 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4543 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4545 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4546 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
4547 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4548 : RuntimeFunction::OMPRTL___kmpc_reduce);
4550 createRuntimeFunctionCall(ReduceFunc,
4551 {Ident, ThreadId, NumVariables, RedArraySize,
4552 RedArray, ReductionFunc, Lock},
4563 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4564 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
4565 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
4570 Builder.SetInsertPoint(NonAtomicRedBlock);
4571 for (
auto En :
enumerate(ReductionInfos)) {
4572 const ReductionInfo &RI = En.value();
4576 Value *RedValue = RI.Variable;
4577 if (!IsByRef[En.index()]) {
4578 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
4579 "red.value." +
Twine(En.index()));
4581 Value *PrivateRedValue =
4582 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
4583 "red.private.value." +
Twine(En.index()));
4585 InsertPointOrErrorTy AfterIP =
4586 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
4588 return AfterIP.takeError();
4589 Builder.restoreIP(*AfterIP);
4591 if (!Builder.GetInsertBlock())
4592 return InsertPointTy();
4594 if (!IsByRef[En.index()])
4595 Builder.CreateStore(Reduced, RI.Variable);
4597 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
4598 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4599 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4600 createRuntimeFunctionCall(EndReduceFunc, {Ident, ThreadId, Lock});
4601 Builder.CreateBr(ContinuationBlock);
4606 Builder.SetInsertPoint(AtomicRedBlock);
4607 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4608 for (
const ReductionInfo &RI : ReductionInfos) {
4609 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
4610 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
4612 return AfterIP.takeError();
4613 Builder.restoreIP(*AfterIP);
4614 if (!Builder.GetInsertBlock())
4615 return InsertPointTy();
4617 Builder.CreateBr(ContinuationBlock);
4619 Builder.CreateUnreachable();
4630 if (!Builder.GetInsertBlock())
4631 return InsertPointTy();
4633 Builder.SetInsertPoint(ContinuationBlock);
4634 return Builder.saveIP();
4637OpenMPIRBuilder::InsertPointOrErrorTy
4638OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4639 BodyGenCallbackTy BodyGenCB,
4640 FinalizeCallbackTy FiniCB) {
4641 if (!updateToLocation(
Loc))
4644 Directive OMPD = Directive::OMPD_master;
4646 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4647 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4648 Value *ThreadId = getOrCreateThreadID(Ident);
4651 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4652 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
4654 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4655 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
4657 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4661OpenMPIRBuilder::InsertPointOrErrorTy
4662OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4663 BodyGenCallbackTy BodyGenCB,
4665 if (!updateToLocation(
Loc))
4668 Directive OMPD = Directive::OMPD_masked;
4670 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4671 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4672 Value *ThreadId = getOrCreateThreadID(Ident);
4674 Value *ArgsEnd[] = {Ident, ThreadId};
4676 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4677 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
4679 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4680 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, ArgsEnd);
4682 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4692 Call->setDoesNotThrow();
4704OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4705 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4707 bool IsInclusive, ScanInfo *ScanRedInfo) {
4708 if (ScanRedInfo->OMPFirstScanLoop) {
4709 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4710 ScanVarsType, ScanRedInfo);
4714 if (!updateToLocation(
Loc))
4719 if (ScanRedInfo->OMPFirstScanLoop) {
4721 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4722 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4723 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4724 Type *DestTy = ScanVarsType[i];
4725 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4726 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4728 Builder.CreateStore(Src, Val);
4731 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4732 emitBlock(ScanRedInfo->OMPScanDispatch,
4733 Builder.GetInsertBlock()->getParent());
4735 if (!ScanRedInfo->OMPFirstScanLoop) {
4736 IV = ScanRedInfo->IV;
4739 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4740 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4741 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4742 Type *DestTy = ScanVarsType[i];
4744 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4745 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4746 Builder.CreateStore(Src, ScanVars[i]);
4752 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4753 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4754 ScanRedInfo->OMPAfterScanBlock);
4756 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4757 ScanRedInfo->OMPBeforeScanBlock);
4759 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4760 Builder.GetInsertBlock()->getParent());
4761 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4762 return Builder.saveIP();
4765Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4769 Builder.restoreIP(AllocaIP);
4771 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4773 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4774 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4778 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4779 InsertPointTy CodeGenIP) ->
Error {
4780 Builder.restoreIP(CodeGenIP);
4782 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4783 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4787 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4788 AllocSpan,
nullptr,
"arr");
4789 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4797 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4799 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4800 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4803 return AfterIP.takeError();
4804 Builder.restoreIP(*AfterIP);
4805 BasicBlock *InputBB = Builder.GetInsertBlock();
4807 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4808 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4810 return AfterIP.takeError();
4811 Builder.restoreIP(*AfterIP);
4816Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4818 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4819 InsertPointTy CodeGenIP) ->
Error {
4820 Builder.restoreIP(CodeGenIP);
4821 for (ReductionInfo RedInfo : ReductionInfos) {
4822 Value *PrivateVar = RedInfo.PrivateVariable;
4823 Value *OrigVar = RedInfo.Variable;
4824 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4825 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4827 Type *SrcTy = RedInfo.ElementType;
4828 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4830 Value *Src = Builder.CreateLoad(SrcTy, Val);
4832 Builder.CreateStore(Src, OrigVar);
4833 Builder.CreateFree(Buff);
4841 if (ScanRedInfo->OMPScanFinish->getTerminator())
4842 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4844 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4847 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4848 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4851 return AfterIP.takeError();
4852 Builder.restoreIP(*AfterIP);
4853 BasicBlock *InputBB = Builder.GetInsertBlock();
4855 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4856 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4858 return AfterIP.takeError();
4859 Builder.restoreIP(*AfterIP);
4863OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4864 const LocationDescription &
Loc,
4866 ScanInfo *ScanRedInfo) {
4868 if (!updateToLocation(
Loc))
4870 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4871 InsertPointTy CodeGenIP) ->
Error {
4872 Builder.restoreIP(CodeGenIP);
4878 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4880 Builder.GetInsertBlock()->getModule(),
4884 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4887 Builder.GetInsertBlock()->getModule(),
4890 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4893 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4894 Builder.SetInsertPoint(InputBB);
4895 Builder.CreateBr(LoopBB);
4896 emitBlock(LoopBB, CurFn);
4897 Builder.SetInsertPoint(LoopBB);
4899 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4901 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4902 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4904 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4912 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4913 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4914 emitBlock(InnerLoopBB, CurFn);
4915 Builder.SetInsertPoint(InnerLoopBB);
4916 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4918 for (ReductionInfo RedInfo : ReductionInfos) {
4919 Value *ReductionVal = RedInfo.PrivateVariable;
4920 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4921 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4922 Type *DestTy = RedInfo.ElementType;
4923 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4925 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4926 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4928 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4929 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4930 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4932 InsertPointOrErrorTy AfterIP =
4933 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4935 return AfterIP.takeError();
4936 Builder.CreateStore(Result, LHSPtr);
4939 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4940 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4941 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4942 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4943 emitBlock(InnerExitBB, CurFn);
4945 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4948 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4949 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4951 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4961 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4962 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4965 return AfterIP.takeError();
4966 Builder.restoreIP(*AfterIP);
4967 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4970 return AfterIP.takeError();
4971 Builder.restoreIP(*AfterIP);
4972 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4979Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4982 ScanInfo *ScanRedInfo) {
4990 ScanRedInfo->OMPFirstScanLoop =
true;
4991 Error Err = InputLoopGen();
5001 ScanRedInfo->OMPFirstScanLoop =
false;
5002 Error Err = ScanLoopGen(Builder.saveIP());
5009void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5010 Function *
Fun = Builder.GetInsertBlock()->getParent();
5011 ScanRedInfo->OMPScanDispatch =
5013 ScanRedInfo->OMPAfterScanBlock =
5015 ScanRedInfo->OMPBeforeScanBlock =
5017 ScanRedInfo->OMPScanLoopExit =
5020CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
5044 Builder.SetCurrentDebugLocation(
DL);
5046 Builder.SetInsertPoint(Preheader);
5047 Builder.CreateBr(Header);
5049 Builder.SetInsertPoint(Header);
5050 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5051 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5052 Builder.CreateBr(
Cond);
5054 Builder.SetInsertPoint(
Cond);
5056 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5057 Builder.CreateCondBr(Cmp, Body, Exit);
5059 Builder.SetInsertPoint(Body);
5060 Builder.CreateBr(Latch);
5062 Builder.SetInsertPoint(Latch);
5063 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
5064 "omp_" + Name +
".next",
true);
5065 Builder.CreateBr(Header);
5068 Builder.SetInsertPoint(Exit);
5069 Builder.CreateBr(After);
5072 LoopInfos.emplace_front();
5073 CanonicalLoopInfo *CL = &LoopInfos.front();
5075 CL->Header = Header;
5087OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
5088 LoopBodyGenCallbackTy BodyGenCB,
5093 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
5094 NextBB, NextBB, Name);
5098 if (updateToLocation(
Loc)) {
5102 spliceBB(Builder, After,
false);
5103 Builder.CreateBr(CL->getPreheader());
5108 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
5118 ScanInfos.emplace_front();
5119 ScanInfo *
Result = &ScanInfos.front();
5124OpenMPIRBuilder::createCanonicalScanLoops(
5125 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
5126 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5127 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
5128 LocationDescription ComputeLoc =
5129 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
5130 updateToLocation(ComputeLoc);
5134 Value *TripCount = calculateCanonicalLoopTripCount(
5135 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5136 ScanRedInfo->Span = TripCount;
5137 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
5138 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
5140 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
5141 Builder.restoreIP(CodeGenIP);
5142 ScanRedInfo->IV =
IV;
5143 createScanBBs(ScanRedInfo);
5144 BasicBlock *InputBlock = Builder.GetInsertBlock();
5148 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
5149 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
5150 Builder.GetInsertBlock()->getParent());
5151 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
5152 emitBlock(ScanRedInfo->OMPScanLoopExit,
5153 Builder.GetInsertBlock()->getParent());
5154 Builder.CreateBr(ContinueBlock);
5155 Builder.SetInsertPoint(
5156 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
5157 return BodyGenCB(Builder.saveIP(),
IV);
5160 const auto &&InputLoopGen = [&]() ->
Error {
5162 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5163 ComputeIP, Name,
true, ScanRedInfo);
5167 Builder.restoreIP((*LoopInfo)->getAfterIP());
5170 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
5172 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
5173 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5177 Builder.restoreIP((*LoopInfo)->getAfterIP());
5178 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
5181 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5187Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
5189 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5199 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5200 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5202 updateToLocation(
Loc);
5219 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
5220 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
5221 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
5222 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
5223 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
5224 ZeroCmp = Builder.CreateICmp(
5227 Span = Builder.CreateSub(Stop, Start,
"",
true);
5228 ZeroCmp = Builder.CreateICmp(
5232 Value *CountIfLooping;
5233 if (InclusiveStop) {
5234 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
5237 Value *CountIfTwo = Builder.CreateAdd(
5238 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
5240 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
5243 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5244 "omp_" + Name +
".tripcount");
5248 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
5249 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5250 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
5251 ScanInfo *ScanRedInfo) {
5252 LocationDescription ComputeLoc =
5253 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
5255 Value *TripCount = calculateCanonicalLoopTripCount(
5256 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5258 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
5259 Builder.restoreIP(CodeGenIP);
5260 Value *Span = Builder.CreateMul(
IV, Step);
5261 Value *IndVar = Builder.CreateAdd(Span, Start);
5263 ScanRedInfo->IV = IndVar;
5264 return BodyGenCB(Builder.saveIP(), IndVar);
5266 LocationDescription LoopLoc =
5269 : LocationDescription(Builder.saveIP(),
5270 Builder.getCurrentDebugLocation());
5271 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
5280 OpenMPIRBuilder &OMPBuilder) {
5281 unsigned Bitwidth = Ty->getIntegerBitWidth();
5283 return OMPBuilder.getOrCreateRuntimeFunction(
5284 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5286 return OMPBuilder.getOrCreateRuntimeFunction(
5287 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5296 OpenMPIRBuilder &OMPBuilder) {
5297 unsigned Bitwidth = Ty->getIntegerBitWidth();
5299 return OMPBuilder.getOrCreateRuntimeFunction(
5300 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5302 return OMPBuilder.getOrCreateRuntimeFunction(
5303 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5307OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
5308 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5311 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5313 "Require dedicated allocate IP");
5316 Builder.restoreIP(CLI->getPreheaderIP());
5317 Builder.SetCurrentDebugLocation(
DL);
5320 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5321 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5325 Type *IVTy =
IV->getType();
5327 LoopType == WorksharingLoopType::DistributeForStaticLoop
5331 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
5334 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5337 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5338 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5339 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5340 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5341 CLI->setLastIter(PLastIter);
5347 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
5349 Constant *One = ConstantInt::get(IVTy, 1);
5350 Builder.CreateStore(Zero, PLowerBound);
5351 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
5352 Builder.CreateStore(UpperBound, PUpperBound);
5353 Builder.CreateStore(One, PStride);
5355 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5358 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5359 ? OMPScheduleType::OrderedDistribute
5362 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5366 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5367 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5368 this](
Value *SchedulingType,
auto &Builder) {
5370 PLowerBound, PUpperBound});
5371 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5372 Value *PDistUpperBound =
5373 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5374 Args.push_back(PDistUpperBound);
5377 createRuntimeFunctionCall(StaticInit, Args);
5379 BuildInitCall(SchedulingType, Builder);
5380 if (HasDistSchedule &&
5381 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5382 Constant *DistScheduleSchedType = ConstantInt::get(
5387 BuildInitCall(DistScheduleSchedType, Builder);
5389 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
5390 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
5391 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
5392 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
5393 CLI->setTripCount(TripCount);
5400 Builder.SetInsertPoint(CLI->getBody(),
5401 CLI->getBody()->getFirstInsertionPt());
5402 Builder.SetCurrentDebugLocation(
DL);
5403 return Builder.CreateAdd(OldIV, LowerBound);
5407 Builder.SetInsertPoint(CLI->getExit(),
5408 CLI->getExit()->getTerminator()->getIterator());
5409 createRuntimeFunctionCall(StaticFini, {SrcLoc, ThreadNum});
5413 InsertPointOrErrorTy BarrierIP =
5414 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5415 omp::Directive::OMPD_for,
false,
5418 return BarrierIP.takeError();
5421 InsertPointTy AfterIP = CLI->getAfterIP();
5443 if (
Block == CLI->getCond() ||
Block == CLI->getHeader())
5445 Reachable.insert(
Block);
5455 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5458OpenMPIRBuilder::InsertPointOrErrorTy
5459OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5460 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5463 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5464 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5466 LLVMContext &Ctx = CLI->getFunction()->getContext();
5468 Value *OrigTripCount = CLI->getTripCount();
5469 Type *IVTy =
IV->getType();
5471 "Max supported tripcount bitwidth is 64 bits");
5473 :
Type::getInt64Ty(Ctx);
5476 Constant *One = ConstantInt::get(InternalIVTy, 1);
5486 if (ChunkSize || DistScheduleChunkSize)
5494 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
5497 Builder.restoreIP(AllocaIP);
5498 Builder.SetCurrentDebugLocation(
DL);
5499 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5500 Value *PLowerBound =
5501 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5502 Value *PUpperBound =
5503 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5504 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5505 CLI->setLastIter(PLastIter);
5508 Builder.restoreIP(CLI->getPreheaderIP());
5509 Builder.SetCurrentDebugLocation(
DL);
5512 Value *CastedChunkSize = Builder.CreateZExtOrTrunc(
5513 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5514 Value *CastedDistScheduleChunkSize = Builder.CreateZExtOrTrunc(
5515 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5516 "distschedulechunksize");
5517 Value *CastedTripCount =
5518 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5521 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5523 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5524 Builder.CreateStore(Zero, PLowerBound);
5525 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
5526 Value *IsTripCountZero = Builder.CreateICmpEQ(CastedTripCount, Zero);
5528 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5529 Builder.CreateStore(UpperBound, PUpperBound);
5530 Builder.CreateStore(One, PStride);
5535 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5536 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5537 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5538 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5539 PUpperBound, PStride, One,
5540 this](
Value *SchedulingType,
Value *ChunkSize,
5542 createRuntimeFunctionCall(
5543 StaticInit, {SrcLoc, ThreadNum,
5544 SchedulingType, PLastIter,
5545 PLowerBound, PUpperBound,
5549 BuildInitCall(SchedulingType, CastedChunkSize, Builder);
5550 if (DistScheduleSchedType != OMPScheduleType::None &&
5551 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5552 SchedType != OMPScheduleType::OrderedDistribute) {
5556 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize, Builder);
5560 Value *FirstChunkStart =
5561 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5562 Value *FirstChunkStop =
5563 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5564 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
5566 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5567 Value *NextChunkStride =
5568 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5571 BasicBlock *DispatchEnter = splitBB(Builder,
true);
5572 Value *DispatchCounter;
5577 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
5578 {Builder.saveIP(),
DL},
5579 [&](InsertPointTy BodyIP,
Value *Counter) {
5580 DispatchCounter = Counter;
5583 FirstChunkStart, CastedTripCount, NextChunkStride,
5589 BasicBlock *DispatchBody = DispatchCLI->getBody();
5590 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
5591 BasicBlock *DispatchExit = DispatchCLI->getExit();
5592 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
5593 DispatchCLI->invalidate();
5601 Builder.restoreIP(CLI->getPreheaderIP());
5602 Builder.SetCurrentDebugLocation(
DL);
5605 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
5606 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
5607 Value *IsLastChunk =
5608 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
5609 Value *CountUntilOrigTripCount =
5610 Builder.CreateSub(CastedTripCount, DispatchCounter);
5611 Value *ChunkTripCount = Builder.CreateSelect(
5612 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
5613 Value *BackcastedChunkTC =
5614 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
5615 CLI->setTripCount(BackcastedChunkTC);
5620 Value *BackcastedDispatchCounter =
5621 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
5623 Builder.restoreIP(CLI->getBodyIP());
5624 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
5629 createRuntimeFunctionCall(StaticFini, {SrcLoc, ThreadNum});
5633 InsertPointOrErrorTy AfterIP =
5634 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
5637 return AfterIP.takeError();
5655 unsigned Bitwidth = Ty->getIntegerBitWidth();
5656 Module &M = OMPBuilder->M;
5658 case WorksharingLoopType::ForStaticLoop:
5660 return OMPBuilder->getOrCreateRuntimeFunction(
5661 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
5663 return OMPBuilder->getOrCreateRuntimeFunction(
5664 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
5666 case WorksharingLoopType::DistributeStaticLoop:
5668 return OMPBuilder->getOrCreateRuntimeFunction(
5669 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
5671 return OMPBuilder->getOrCreateRuntimeFunction(
5672 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
5674 case WorksharingLoopType::DistributeForStaticLoop:
5676 return OMPBuilder->getOrCreateRuntimeFunction(
5677 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
5679 return OMPBuilder->getOrCreateRuntimeFunction(
5680 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
5683 if (Bitwidth != 32 && Bitwidth != 64) {
5695 Function &LoopBodyFn,
bool NoLoop) {
5697 Module &M = OMPBuilder->M;
5706 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5707 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5708 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5709 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5710 OMPBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
5713 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
5714 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5715 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5716 Value *NumThreads = OMPBuilder->createRuntimeFunctionCall(RTLNumThreads, {});
5719 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5720 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5721 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5722 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5723 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5725 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5728 OMPBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
5732 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5737 Value *TripCount = CLI->getTripCount();
5743 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5744 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5749 Builder.restoreIP({Preheader, Preheader->
end()});
5752 Builder.CreateBr(CLI->getExit());
5755 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5758 CleanUpInfo.EntryBB = CLI->getHeader();
5759 CleanUpInfo.ExitBB = CLI->getExit();
5760 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5768 "Expected unique undroppable user of outlined function");
5770 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5772 "Expected outlined function call to be located in loop preheader");
5774 if (OutlinedFnCallInstruction->
arg_size() > 1)
5781 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5783 for (
auto &ToBeDeletedItem : ToBeDeleted)
5784 ToBeDeletedItem->eraseFromParent();
5788OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget(
5789 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5792 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5793 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5796 OI.OuterAllocaBB = CLI->getPreheader();
5802 OI.OuterAllocaBB = AllocaIP.getBlock();
5805 OI.EntryBB = CLI->getBody();
5806 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5807 "omp.prelatch",
true);
5810 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5814 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5816 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5827 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5838 CLI->getPreheader(),
5847 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5853 CLI->getIndVar()->user_end());
5856 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5857 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5863 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5870 OI.PostOutlineCB = [=, ToBeDeletedVec =
5871 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5875 addOutlineInfo(std::move(OI));
5876 return CLI->getAfterIP();
5879OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5880 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5881 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5882 bool HasSimdModifier,
bool HasMonotonicModifier,
5883 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5885 Value *DistScheduleChunkSize) {
5886 if (Config.isTargetDevice())
5887 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
5889 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5890 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
5892 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5893 OMPScheduleType::ModifierOrdered;
5895 if (HasDistSchedule) {
5896 DistScheduleSchedType = DistScheduleChunkSize
5897 ? OMPScheduleType::OrderedDistributeChunked
5898 : OMPScheduleType::OrderedDistribute;
5900 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5901 case OMPScheduleType::BaseStatic:
5902 case OMPScheduleType::BaseDistribute:
5903 assert((!ChunkSize || !DistScheduleChunkSize) &&
5904 "No chunk size with static-chunked schedule");
5905 if (IsOrdered && !HasDistSchedule)
5906 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5907 NeedsBarrier, ChunkSize);
5909 if (DistScheduleChunkSize)
5910 return applyStaticChunkedWorkshareLoop(
5911 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5912 DistScheduleChunkSize, DistScheduleSchedType);
5913 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
5916 case OMPScheduleType::BaseStaticChunked:
5917 case OMPScheduleType::BaseDistributeChunked:
5918 if (IsOrdered && !HasDistSchedule)
5919 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5920 NeedsBarrier, ChunkSize);
5922 return applyStaticChunkedWorkshareLoop(
5923 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5924 DistScheduleChunkSize, DistScheduleSchedType);
5926 case OMPScheduleType::BaseRuntime:
5927 case OMPScheduleType::BaseAuto:
5928 case OMPScheduleType::BaseGreedy:
5929 case OMPScheduleType::BaseBalanced:
5930 case OMPScheduleType::BaseSteal:
5931 case OMPScheduleType::BaseGuidedSimd:
5932 case OMPScheduleType::BaseRuntimeSimd:
5934 "schedule type does not support user-defined chunk sizes");
5936 case OMPScheduleType::BaseDynamicChunked:
5937 case OMPScheduleType::BaseGuidedChunked:
5938 case OMPScheduleType::BaseGuidedIterativeChunked:
5939 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5940 case OMPScheduleType::BaseStaticBalancedChunked:
5941 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5942 NeedsBarrier, ChunkSize);
5955 unsigned Bitwidth = Ty->getIntegerBitWidth();
5957 return OMPBuilder.getOrCreateRuntimeFunction(
5958 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5960 return OMPBuilder.getOrCreateRuntimeFunction(
5961 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5971 unsigned Bitwidth = Ty->getIntegerBitWidth();
5973 return OMPBuilder.getOrCreateRuntimeFunction(
5974 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5976 return OMPBuilder.getOrCreateRuntimeFunction(
5977 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5986 unsigned Bitwidth = Ty->getIntegerBitWidth();
5988 return OMPBuilder.getOrCreateRuntimeFunction(
5989 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5991 return OMPBuilder.getOrCreateRuntimeFunction(
5992 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5996OpenMPIRBuilder::InsertPointOrErrorTy
5997OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5998 InsertPointTy AllocaIP,
6000 bool NeedsBarrier,
Value *Chunk) {
6001 assert(CLI->isValid() &&
"Requires a valid canonical loop");
6003 "Require dedicated allocate IP");
6005 "Require valid schedule type");
6007 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6008 OMPScheduleType::ModifierOrdered;
6011 Builder.SetCurrentDebugLocation(
DL);
6014 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
6015 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6019 Type *IVTy =
IV->getType();
6024 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6026 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6027 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6028 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6029 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6030 CLI->setLastIter(PLastIter);
6038 Constant *One = ConstantInt::get(IVTy, 1);
6039 Builder.CreateStore(One, PLowerBound);
6040 Value *UpperBound = CLI->getTripCount();
6041 Builder.CreateStore(UpperBound, PUpperBound);
6042 Builder.CreateStore(One, PStride);
6048 InsertPointTy AfterIP = CLI->getAfterIP();
6056 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
6059 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6062 createRuntimeFunctionCall(DynamicInit, {SrcLoc, ThreadNum, SchedulingType,
6071 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6072 Value *Res = createRuntimeFunctionCall(
6074 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6075 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6078 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6079 Builder.CreateCondBr(MoreWork, Header, Exit);
6085 PI->setIncomingBlock(0, OuterCond);
6086 PI->setIncomingValue(0, LowerBound);
6091 Br->setSuccessor(0, OuterCond);
6096 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
6097 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6100 CI->setOperand(1, UpperBound);
6104 assert(BI->getSuccessor(1) == Exit);
6105 BI->setSuccessor(1, OuterCond);
6109 Builder.SetInsertPoint(&Latch->
back());
6111 createRuntimeFunctionCall(DynamicFini, {SrcLoc, ThreadNum});
6116 Builder.SetInsertPoint(&
Exit->back());
6117 InsertPointOrErrorTy BarrierIP =
6118 createBarrier(LocationDescription(Builder.saveIP(),
DL),
6119 omp::Directive::OMPD_for,
false,
6122 return BarrierIP.takeError();
6141 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
6146 if (BBsToErase.
count(UseInst->getParent()))
6153 while (BBsToErase.
remove_if(HasRemainingUses)) {
6163 InsertPointTy ComputeIP) {
6164 assert(
Loops.size() >= 1 &&
"At least one loop required");
6165 size_t NumLoops =
Loops.size();
6169 return Loops.front();
6171 CanonicalLoopInfo *Outermost =
Loops.front();
6172 CanonicalLoopInfo *Innermost =
Loops.back();
6173 BasicBlock *OrigPreheader = Outermost->getPreheader();
6174 BasicBlock *OrigAfter = Outermost->getAfter();
6181 Loop->collectControlBlocks(OldControlBBs);
6184 Builder.SetCurrentDebugLocation(
DL);
6185 if (ComputeIP.isSet())
6186 Builder.restoreIP(ComputeIP);
6188 Builder.restoreIP(Outermost->getPreheaderIP());
6192 Value *CollapsedTripCount =
nullptr;
6193 for (CanonicalLoopInfo *L :
Loops) {
6195 "All loops to collapse must be valid canonical loops");
6196 Value *OrigTripCount =
L->getTripCount();
6197 if (!CollapsedTripCount) {
6198 CollapsedTripCount = OrigTripCount;
6203 CollapsedTripCount =
6204 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6208 CanonicalLoopInfo *
Result =
6209 createLoopSkeleton(
DL, CollapsedTripCount,
F,
6210 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6216 Builder.restoreIP(
Result->getBodyIP());
6220 NewIndVars.
resize(NumLoops);
6221 for (
int i = NumLoops - 1; i >= 1; --i) {
6222 Value *OrigTripCount =
Loops[i]->getTripCount();
6224 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
6225 NewIndVars[i] = NewIndVar;
6227 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
6230 NewIndVars[0] = Leftover;
6241 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6248 ContinueBlock =
nullptr;
6249 ContinuePred = NextSrc;
6256 for (
size_t i = 0; i < NumLoops - 1; ++i)
6257 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6260 ContinueWith(Innermost->getBody(), Innermost->getLatch());
6263 for (
size_t i = NumLoops - 1; i > 0; --i)
6264 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6267 ContinueWith(
Result->getLatch(),
nullptr);
6274 for (
size_t i = 0; i < NumLoops; ++i)
6275 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6280 for (CanonicalLoopInfo *L :
Loops)
6289std::vector<CanonicalLoopInfo *>
6293 "Must pass as many tile sizes as there are loops");
6294 int NumLoops =
Loops.size();
6295 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6297 CanonicalLoopInfo *OutermostLoop =
Loops.front();
6298 CanonicalLoopInfo *InnermostLoop =
Loops.back();
6299 Function *
F = OutermostLoop->getBody()->getParent();
6300 BasicBlock *InnerEnter = InnermostLoop->getBody();
6301 BasicBlock *InnerLatch = InnermostLoop->getLatch();
6307 Loop->collectControlBlocks(OldControlBBs);
6314 for (CanonicalLoopInfo *L :
Loops) {
6315 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
6327 for (
int i = 0; i < NumLoops - 1; ++i) {
6328 CanonicalLoopInfo *Surrounding =
Loops[i];
6331 BasicBlock *EnterBB = Surrounding->getBody();
6337 Builder.SetCurrentDebugLocation(
DL);
6338 Builder.restoreIP(OutermostLoop->getPreheaderIP());
6340 for (
int i = 0; i < NumLoops; ++i) {
6342 Value *OrigTripCount = OrigTripCounts[i];
6345 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
6346 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
6355 Value *FloorTripOverflow =
6356 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6358 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
6359 Value *FloorTripCount =
6360 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6361 "omp_floor" +
Twine(i) +
".tripcount",
true);
6364 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6370 std::vector<CanonicalLoopInfo *>
Result;
6371 Result.reserve(NumLoops * 2);
6375 BasicBlock *Enter = OutermostLoop->getPreheader();
6382 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
6384 auto EmbeddNewLoop =
6385 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6387 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
6388 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6393 Enter = EmbeddedLoop->getBody();
6394 Continue = EmbeddedLoop->getLatch();
6395 OutroInsertBefore = EmbeddedLoop->getLatch();
6396 return EmbeddedLoop;
6400 const Twine &NameBase) {
6402 CanonicalLoopInfo *EmbeddedLoop =
6403 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6404 Result.push_back(EmbeddedLoop);
6408 EmbeddNewLoops(FloorCount,
"floor");
6412 Builder.SetInsertPoint(Enter->getTerminator());
6414 for (
int i = 0; i < NumLoops; ++i) {
6415 CanonicalLoopInfo *FloorLoop =
Result[i];
6418 Value *FloorIsEpilogue =
6419 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
6420 Value *TileTripCount =
6421 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
6427 EmbeddNewLoops(TileCounts,
"tile");
6432 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6441 BodyEnter =
nullptr;
6442 BodyEntered = ExitBB;
6454 Builder.restoreIP(
Result.back()->getBodyIP());
6455 for (
int i = 0; i < NumLoops; ++i) {
6456 CanonicalLoopInfo *FloorLoop =
Result[i];
6457 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
6458 Value *OrigIndVar = OrigIndVars[i];
6462 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
6464 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
6471 for (CanonicalLoopInfo *L :
Loops)
6475 for (CanonicalLoopInfo *GenL : Result)
6486 if (Properties.
empty())
6509 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6513 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6521 if (
I.mayReadOrWriteMemory()) {
6525 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6530void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
6537void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
6545void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
6548 const Twine &NamePrefix) {
6549 Function *
F = CanonicalLoop->getFunction();
6571 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
6577 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
6579 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
6582 Builder.SetInsertPoint(SplitBeforeIt);
6584 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
6587 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
6590 Builder.SetInsertPoint(ElseBlock);
6596 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
6598 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
6604 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
6606 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
6613 if (
Block == ThenBlock)
6614 NewBB->
setName(NamePrefix +
".if.else");
6617 VMap[
Block] = NewBB;
6621 Builder.CreateBr(NewBlocks.
front());
6625 L->getLoopLatch()->splitBasicBlock(
6626 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
6630 L->addBasicBlockToLoop(ThenBlock, LI);
6634OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
6636 if (TargetTriple.
isX86()) {
6637 if (Features.
lookup(
"avx512f"))
6639 else if (Features.
lookup(
"avx"))
6643 if (TargetTriple.
isPPC())
6645 if (TargetTriple.
isWasm())
6650void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
6652 Value *IfCond, OrderKind Order,
6656 Function *
F = CanonicalLoop->getFunction();
6671 if (AlignedVars.
size()) {
6672 InsertPointTy IP = Builder.saveIP();
6673 for (
auto &AlignedItem : AlignedVars) {
6674 Value *AlignedPtr = AlignedItem.first;
6675 Value *Alignment = AlignedItem.second;
6678 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
6681 Builder.restoreIP(IP);
6686 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
6696 if (
Block == CanonicalLoop->getCond() ||
6697 Block == CanonicalLoop->getHeader())
6699 Reachable.insert(
Block);
6709 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
6725 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6727 if (Simdlen || Safelen) {
6731 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6757static std::unique_ptr<TargetMachine>
6761 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6762 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6773 std::nullopt, OptLevel));
6797 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6798 FAM.registerPass([&]() {
return TIRA; });
6812 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6817 nullptr, ORE,
static_cast<int>(OptLevel),
6838 <<
" Threshold=" << UP.
Threshold <<
"\n"
6841 <<
" PartialOptSizeThreshold="
6861 Ptr = Load->getPointerOperand();
6863 Ptr = Store->getPointerOperand();
6870 if (Alloca->getParent() == &
F->getEntryBlock())
6890 int MaxTripCount = 0;
6891 bool MaxOrZero =
false;
6892 unsigned TripMultiple = 0;
6894 bool UseUpperBound =
false;
6896 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6898 unsigned Factor = UP.
Count;
6899 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6907void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6909 CanonicalLoopInfo **UnrolledCLI) {
6910 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6926 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6939 *UnrolledCLI =
Loop;
6944 "unrolling only makes sense with a factor of 2 or larger");
6946 Type *IndVarTy =
Loop->getIndVarType();
6953 std::vector<CanonicalLoopInfo *>
LoopNest =
6954 tileLoops(
DL, {
Loop}, {FactorVal});
6957 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6968 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6971 (*UnrolledCLI)->assertOK();
6975OpenMPIRBuilder::InsertPointTy
6976OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6979 if (!updateToLocation(
Loc))
6983 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6984 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6985 Value *ThreadId = getOrCreateThreadID(Ident);
6987 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6989 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6991 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6992 createRuntimeFunctionCall(Fn, Args);
6994 return Builder.saveIP();
6997OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6998 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
7002 if (!updateToLocation(
Loc))
7008 if (!CPVars.
empty()) {
7010 Builder.CreateStore(Builder.getInt32(0), DidIt);
7013 Directive OMPD = Directive::OMPD_single;
7015 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7016 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7017 Value *ThreadId = getOrCreateThreadID(Ident);
7020 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
7021 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
7023 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
7024 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
7026 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
7027 if (
Error Err = FiniCB(IP))
7034 Builder.CreateStore(Builder.getInt32(1), DidIt);
7047 InsertPointOrErrorTy AfterIP =
7048 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7052 return AfterIP.takeError();
7055 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
7057 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
7058 ConstantInt::get(Int64, 0), CPVars[
I],
7061 }
else if (!IsNowait) {
7062 InsertPointOrErrorTy AfterIP =
7063 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
7064 omp::Directive::OMPD_unknown,
false,
7067 return AfterIP.takeError();
7069 return Builder.saveIP();
7072OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
7073 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
7074 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
7076 if (!updateToLocation(
Loc))
7079 Directive OMPD = Directive::OMPD_critical;
7081 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7082 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7083 Value *ThreadId = getOrCreateThreadID(Ident);
7084 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7085 Value *
Args[] = {Ident, ThreadId, LockVar};
7091 EnterArgs.push_back(HintInst);
7092 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
7094 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
7096 Instruction *EntryCall = createRuntimeFunctionCall(RTFn, EnterArgs);
7099 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
7100 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
7102 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7106OpenMPIRBuilder::InsertPointTy
7107OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
7108 InsertPointTy AllocaIP,
unsigned NumLoops,
7110 const Twine &Name,
bool IsDependSource) {
7114 "OpenMP runtime requires depend vec with i64 type");
7116 if (!updateToLocation(
Loc))
7121 Builder.restoreIP(AllocaIP);
7122 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
7124 updateToLocation(
Loc);
7127 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7128 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
7129 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
7130 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
7134 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
7135 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
7138 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7139 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7140 Value *ThreadId = getOrCreateThreadID(Ident);
7141 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7145 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
7147 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
7148 createRuntimeFunctionCall(RTLFn, Args);
7150 return Builder.saveIP();
7153OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
7154 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
7155 FinalizeCallbackTy FiniCB,
bool IsThreads) {
7156 if (!updateToLocation(
Loc))
7159 Directive OMPD = Directive::OMPD_ordered;
7165 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7166 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7167 Value *ThreadId = getOrCreateThreadID(Ident);
7170 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
7171 EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
7174 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
7175 ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
7178 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7182OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
7184 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
7185 bool HasFinalize,
bool IsCancellable) {
7188 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
7192 BasicBlock *EntryBB = Builder.GetInsertBlock();
7201 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7204 if (
Error Err = BodyGenCB( InsertPointTy(),
7212 "Unexpected control flow graph state!!");
7213 InsertPointOrErrorTy AfterIP =
7214 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7216 return AfterIP.takeError();
7221 "Unexpected Insertion point location!");
7224 auto InsertBB = merged ? ExitPredBB : ExitBB;
7227 Builder.SetInsertPoint(InsertBB);
7229 return Builder.saveIP();
7232OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
7235 if (!Conditional || !EntryCall)
7236 return Builder.saveIP();
7238 BasicBlock *EntryBB = Builder.GetInsertBlock();
7239 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
7251 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7253 Builder.SetInsertPoint(UI);
7254 Builder.Insert(EntryBBTI);
7255 UI->eraseFromParent();
7262OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
7263 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
7266 Builder.restoreIP(FinIP);
7270 assert(!FinalizationStack.empty() &&
7271 "Unexpected finalization stack state!");
7273 FinalizationInfo Fi = FinalizationStack.pop_back_val();
7274 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7276 if (
Error Err = Fi.mergeFiniBB(Builder, FinIP.getBlock()))
7277 return std::move(Err);
7281 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7285 return Builder.saveIP();
7289 Builder.Insert(ExitCall);
7295OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
7296 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
7325 "copyin.not.master.end");
7332 Builder.SetInsertPoint(OMP_Entry);
7333 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7334 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7335 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7336 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7338 Builder.SetInsertPoint(CopyBegin);
7340 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
7342 return Builder.saveIP();
7345CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
7349 updateToLocation(
Loc);
7352 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7353 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7354 Value *ThreadId = getOrCreateThreadID(Ident);
7357 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
7359 return createRuntimeFunctionCall(Fn, Args, Name);
7362CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
7366 updateToLocation(
Loc);
7369 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7370 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7371 Value *ThreadId = getOrCreateThreadID(Ident);
7373 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
7374 return createRuntimeFunctionCall(Fn, Args, Name);
7377CallInst *OpenMPIRBuilder::createOMPInteropInit(
7378 const LocationDescription &
Loc,
Value *InteropVar,
7380 Value *DependenceAddress,
bool HaveNowaitClause) {
7382 updateToLocation(
Loc);
7385 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7386 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7387 Value *ThreadId = getOrCreateThreadID(Ident);
7388 if (Device ==
nullptr)
7390 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
7391 if (NumDependences ==
nullptr) {
7392 NumDependences = ConstantInt::get(Int32, 0);
7396 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7398 Ident, ThreadId, InteropVar, InteropTypeVal,
7399 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
7401 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
7403 return createRuntimeFunctionCall(Fn, Args);
7406CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
7407 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
7408 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
7410 updateToLocation(
Loc);
7413 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7414 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7415 Value *ThreadId = getOrCreateThreadID(Ident);
7416 if (Device ==
nullptr)
7418 if (NumDependences ==
nullptr) {
7419 NumDependences = ConstantInt::get(Int32, 0);
7423 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7425 Ident, ThreadId, InteropVar,
Device,
7426 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7428 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
7430 return createRuntimeFunctionCall(Fn, Args);
7433CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
7435 Value *NumDependences,
7436 Value *DependenceAddress,
7437 bool HaveNowaitClause) {
7439 updateToLocation(
Loc);
7441 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7442 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7443 Value *ThreadId = getOrCreateThreadID(Ident);
7444 if (Device ==
nullptr)
7446 if (NumDependences ==
nullptr) {
7447 NumDependences = ConstantInt::get(Int32, 0);
7451 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7453 Ident, ThreadId, InteropVar,
Device,
7454 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7456 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
7458 return createRuntimeFunctionCall(Fn, Args);
7461CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
7465 updateToLocation(
Loc);
7468 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7469 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7470 Value *ThreadId = getOrCreateThreadID(Ident);
7472 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
7476 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
7478 return createRuntimeFunctionCall(Fn, Args);
7481OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
7482 const LocationDescription &
Loc,
7483 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
7485 "expected num_threads and num_teams to be specified");
7487 if (!updateToLocation(
Loc))
7491 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7492 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7504 const std::string DebugPrefix =
"_debug__";
7505 if (KernelName.
ends_with(DebugPrefix)) {
7506 KernelName = KernelName.
drop_back(DebugPrefix.length());
7507 Kernel = M.getFunction(KernelName);
7513 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
7518 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
7519 if (MaxThreadsVal < 0)
7520 MaxThreadsVal = std::max(
7523 if (MaxThreadsVal > 0)
7524 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
7535 Function *Fn = getOrCreateRuntimeFunctionPtr(
7536 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
7539 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
7540 Constant *DynamicEnvironmentInitializer =
7544 DynamicEnvironmentInitializer, DynamicEnvironmentName,
7546 DL.getDefaultGlobalsAddressSpace());
7550 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
7551 ? DynamicEnvironmentGV
7553 DynamicEnvironmentPtr);
7556 ConfigurationEnvironment, {
7557 UseGenericStateMachineVal,
7558 MayUseNestedParallelismVal,
7565 ReductionBufferLength,
7568 KernelEnvironment, {
7569 ConfigurationEnvironmentInitializer,
7573 std::string KernelEnvironmentName =
7574 (KernelName +
"_kernel_environment").str();
7577 KernelEnvironmentInitializer, KernelEnvironmentName,
7579 DL.getDefaultGlobalsAddressSpace());
7583 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
7584 ? KernelEnvironmentGV
7586 KernelEnvironmentPtr);
7587 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
7589 KernelLaunchEnvironment =
7590 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
7591 ? KernelLaunchEnvironment
7592 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
7593 KernelLaunchEnvParamTy);
7594 CallInst *ThreadKind = createRuntimeFunctionCall(
7595 Fn, {KernelEnvironment, KernelLaunchEnvironment});
7597 Value *ExecUserCode = Builder.CreateICmpEQ(
7607 auto *UI = Builder.CreateUnreachable();
7613 Builder.SetInsertPoint(WorkerExitBB);
7614 Builder.CreateRetVoid();
7617 Builder.SetInsertPoint(CheckBBTI);
7618 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
7621 UI->eraseFromParent();
7628void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
7629 int32_t TeamsReductionDataSize,
7630 int32_t TeamsReductionBufferLength) {
7631 if (!updateToLocation(
Loc))
7634 Function *Fn = getOrCreateRuntimeFunctionPtr(
7635 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
7637 createRuntimeFunctionCall(Fn, {});
7639 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
7645 const std::string DebugPrefix =
"_debug__";
7647 KernelName = KernelName.
drop_back(DebugPrefix.length());
7648 auto *KernelEnvironmentGV =
7649 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
7650 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
7651 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
7653 KernelEnvironmentInitializer,
7654 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
7656 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
7663 if (
Kernel.hasFnAttribute(Name)) {
7664 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
7670std::pair<int32_t, int32_t>
7672 int32_t ThreadLimit =
7673 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
7676 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
7677 if (!Attr.isValid() || !Attr.isStringAttribute())
7678 return {0, ThreadLimit};
7679 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
7682 return {0, ThreadLimit};
7683 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
7689 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
7690 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
7691 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
7693 return {0, ThreadLimit};
7696void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
7699 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
7702 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
7710std::pair<int32_t, int32_t>
7713 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7717 int32_t LB, int32_t UB) {
7724 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7727void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7729 if (Config.isTargetDevice()) {
7736 else if (
T.isNVPTX())
7738 else if (
T.isSPIRV())
7745 if (Config.isTargetDevice()) {
7746 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7755Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7760 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7761 "Named kernel already exists?");
7767Error OpenMPIRBuilder::emitTargetRegionFunction(
7768 TargetRegionEntryInfo &EntryInfo,
7769 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7773 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7775 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7779 OutlinedFn = *CBResult;
7781 OutlinedFn =
nullptr;
7787 if (!IsOffloadEntry)
7790 std::string EntryFnIDName =
7791 Config.isTargetDevice()
7792 ? std::string(EntryFnName)
7793 : createPlatformSpecificName({EntryFnName,
"region_id"});
7795 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7796 EntryFnName, EntryFnIDName);
7800Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7801 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7804 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7805 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7806 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7807 OffloadInfoManager.registerTargetRegionEntryInfo(
7808 EntryInfo, EntryAddr, OutlinedFnID,
7809 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7810 return OutlinedFnID;
7813OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7814 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7815 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7816 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7818 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7819 BodyGenTy BodyGenType)>
7822 if (!updateToLocation(
Loc))
7823 return InsertPointTy();
7825 Builder.restoreIP(CodeGenIP);
7827 if (Config.IsTargetDevice.value_or(
false)) {
7829 InsertPointOrErrorTy AfterIP =
7830 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7832 return AfterIP.takeError();
7833 Builder.restoreIP(*AfterIP);
7835 return Builder.saveIP();
7838 bool IsStandAlone = !BodyGenCB;
7839 MapInfosTy *MapInfo;
7843 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7844 InsertPointTy CodeGenIP) ->
Error {
7845 MapInfo = &GenMapInfoCB(Builder.saveIP());
7846 if (
Error Err = emitOffloadingArrays(
7847 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7848 true, DeviceAddrCB))
7851 TargetDataRTArgs RTArgs;
7852 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7855 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7860 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7861 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7865 SrcLocInfo, DeviceID,
7866 PointerNum, RTArgs.BasePointersArray,
7867 RTArgs.PointersArray, RTArgs.SizesArray,
7868 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7869 RTArgs.MappersArray};
7872 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7876 if (
Info.HasNoWait) {
7883 createRuntimeFunctionCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7886 if (
Info.HasNoWait) {
7890 emitBlock(OffloadContBlock, CurFn,
true);
7891 Builder.restoreIP(Builder.saveIP());
7896 bool RequiresOuterTargetTask =
Info.HasNoWait;
7897 if (!RequiresOuterTargetTask)
7898 cantFail(TaskBodyCB(
nullptr,
nullptr,
7901 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7902 {}, RTArgs,
Info.HasNoWait));
7904 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7905 omp::OMPRTL___tgt_target_data_begin_mapper);
7907 createRuntimeFunctionCall(BeginMapperFunc, OffloadingArgs);
7909 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7912 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7913 Builder.CreateStore(LI, DeviceMap.second.second);
7920 InsertPointOrErrorTy AfterIP =
7921 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7923 return AfterIP.takeError();
7924 Builder.restoreIP(*AfterIP);
7932 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7933 InsertPointTy CodeGenIP) ->
Error {
7934 InsertPointOrErrorTy AfterIP =
7935 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7937 return AfterIP.takeError();
7938 Builder.restoreIP(*AfterIP);
7943 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7944 TargetDataRTArgs RTArgs;
7945 Info.EmitDebug = !MapInfo->Names.empty();
7946 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7949 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7954 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7955 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7958 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7959 PointerNum, RTArgs.BasePointersArray,
7960 RTArgs.PointersArray, RTArgs.SizesArray,
7961 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7962 RTArgs.MappersArray};
7964 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7966 createRuntimeFunctionCall(EndMapperFunc, OffloadingArgs);
7972 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7980 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7981 return BeginThenGen(AllocaIP, Builder.saveIP());
7989 InsertPointOrErrorTy AfterIP =
7990 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7992 return AfterIP.takeError();
7996 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7997 return EndThenGen(AllocaIP, Builder.saveIP());
8000 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8001 return BeginThenGen(AllocaIP, Builder.saveIP());
8007 return Builder.saveIP();
8011OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
8012 bool IsGPUDistribute) {
8013 assert((IVSize == 32 || IVSize == 64) &&
8014 "IV size is not compatible with the omp runtime");
8016 if (IsGPUDistribute)
8018 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8019 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8020 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
8021 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
8023 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8024 : omp::OMPRTL___kmpc_for_static_init_4u)
8025 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8026 : omp::OMPRTL___kmpc_for_static_init_8u);
8028 return getOrCreateRuntimeFunction(M, Name);
8031FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
8033 assert((IVSize == 32 || IVSize == 64) &&
8034 "IV size is not compatible with the omp runtime");
8036 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8037 : omp::OMPRTL___kmpc_dispatch_init_4u)
8038 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
8039 :
omp::OMPRTL___kmpc_dispatch_init_8u);
8041 return getOrCreateRuntimeFunction(M, Name);
8044FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
8046 assert((IVSize == 32 || IVSize == 64) &&
8047 "IV size is not compatible with the omp runtime");
8049 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8050 : omp::OMPRTL___kmpc_dispatch_next_4u)
8051 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
8052 :
omp::OMPRTL___kmpc_dispatch_next_8u);
8054 return getOrCreateRuntimeFunction(M, Name);
8057FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
8059 assert((IVSize == 32 || IVSize == 64) &&
8060 "IV size is not compatible with the omp runtime");
8062 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8063 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8064 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
8065 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
8067 return getOrCreateRuntimeFunction(M, Name);
8071 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
8076 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8084 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8088 if (NewVar && (arg == NewVar->
getArg()))
8098 auto UpdateDebugRecord = [&](
auto *DR) {
8101 for (
auto Loc : DR->location_ops()) {
8102 auto Iter = ValueReplacementMap.find(
Loc);
8103 if (Iter != ValueReplacementMap.end()) {
8104 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8105 ArgNo = std::get<1>(Iter->second) + 1;
8109 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8116 "Unexpected debug intrinsic");
8118 UpdateDebugRecord(&DVR);
8121 if (OMPBuilder.Config.isTargetDevice()) {
8123 Module *M = Func->getParent();
8126 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8128 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
8129 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
8131 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
8144 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8146 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
8147 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
8149 if (OMPBuilder.Config.isTargetDevice()) {
8157 for (
auto &Arg : Inputs)
8162 for (
auto &Arg : Inputs)
8166 auto BB = Builder.GetInsertBlock();
8178 if (TargetCpuAttr.isStringAttribute())
8179 Func->addFnAttr(TargetCpuAttr);
8181 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8182 if (TargetFeaturesAttr.isStringAttribute())
8183 Func->addFnAttr(TargetFeaturesAttr);
8185 if (OMPBuilder.Config.isTargetDevice()) {
8187 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
8188 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
8199 Builder.SetInsertPoint(EntryBB);
8202 if (OMPBuilder.Config.isTargetDevice())
8203 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
8205 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8210 if (OMPBuilder.Config.isTargetDevice())
8211 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
8215 splitBB(Builder,
true,
"outlined.body");
8216 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
8218 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
8220 return AfterIP.takeError();
8221 Builder.restoreIP(*AfterIP);
8222 if (OMPBuilder.Config.isTargetDevice())
8223 OMPBuilder.createTargetDeinit(Builder);
8226 Builder.CreateRetVoid();
8230 auto AllocaIP = Builder.saveIP();
8235 const auto &ArgRange =
8236 OMPBuilder.Config.isTargetDevice()
8237 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
8270 if (Instr->getFunction() == Func)
8271 Instr->replaceUsesOfWith(
Input, InputCopy);
8277 for (
auto InArg :
zip(Inputs, ArgRange)) {
8279 Argument &Arg = std::get<1>(InArg);
8280 Value *InputCopy =
nullptr;
8282 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
8283 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
8285 return AfterIP.takeError();
8286 Builder.restoreIP(*AfterIP);
8287 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8307 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
8314 ReplaceValue(
Input, InputCopy, Func);
8318 for (
auto Deferred : DeferredReplacement)
8319 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
8322 ValueReplacementMap);
8330 Value *TaskWithPrivates,
8331 Type *TaskWithPrivatesTy) {
8333 Type *TaskTy = OMPIRBuilder.Task;
8336 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
8337 Value *Shareds = TaskT;
8347 if (TaskWithPrivatesTy != TaskTy)
8348 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
8365 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
8370 assert((!NumOffloadingArrays || PrivatesTy) &&
8371 "PrivatesTy cannot be nullptr when there are offloadingArrays"
8374 Module &M = OMPBuilder.M;
8398 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
8404 Type *TaskPtrTy = OMPBuilder.TaskPtr;
8405 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
8411 ".omp_target_task_proxy_func",
8412 Builder.GetInsertBlock()->getModule());
8413 Value *ThreadId = ProxyFn->getArg(0);
8414 Value *TaskWithPrivates = ProxyFn->getArg(1);
8415 ThreadId->
setName(
"thread.id");
8416 TaskWithPrivates->
setName(
"task");
8418 bool HasShareds = SharedArgsOperandNo > 0;
8419 bool HasOffloadingArrays = NumOffloadingArrays > 0;
8422 Builder.SetInsertPoint(EntryBB);
8428 if (HasOffloadingArrays) {
8429 assert(TaskTy != TaskWithPrivatesTy &&
8430 "If there are offloading arrays to pass to the target"
8431 "TaskTy cannot be the same as TaskWithPrivatesTy");
8434 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
8435 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
8437 Builder.CreateStructGEP(PrivatesTy, Privates, i));
8441 auto *ArgStructAlloca =
8443 assert(ArgStructAlloca &&
8444 "Unable to find the alloca instruction corresponding to arguments "
8445 "for extracted function");
8449 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
8451 Value *SharedsSize =
8452 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8455 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
8457 Builder.CreateMemCpy(
8458 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
8460 KernelLaunchArgs.
push_back(NewArgStructAlloca);
8462 OMPBuilder.createRuntimeFunctionCall(KernelLaunchFunction, KernelLaunchArgs);
8463 Builder.CreateRetVoid();
8469 return GEP->getSourceElementType();
8471 return Alloca->getAllocatedType();
8494 if (OffloadingArraysToPrivatize.
empty())
8495 return OMPIRBuilder.Task;
8498 for (
Value *V : OffloadingArraysToPrivatize) {
8499 assert(V->getType()->isPointerTy() &&
8500 "Expected pointer to array to privatize. Got a non-pointer value "
8503 assert(ArrayTy &&
"ArrayType cannot be nullptr");
8509 "struct.task_with_privates");
8512 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
8513 TargetRegionEntryInfo &EntryInfo,
8514 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8517 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
8518 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
8520 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
8523 EntryFnName, Inputs, CBFunc,
8527 return OMPBuilder.emitTargetRegionFunction(
8528 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
8532OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
8533 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
8534 OpenMPIRBuilder::InsertPointTy AllocaIP,
8536 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
8660 splitBB(Builder,
true,
"target.task.body");
8662 splitBB(Builder,
true,
"target.task.alloca");
8664 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
8665 TargetTaskAllocaBB->
begin());
8666 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
8669 OI.EntryBB = TargetTaskAllocaBB;
8670 OI.OuterAllocaBB = AllocaIP.getBlock();
8675 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
8678 Builder.restoreIP(TargetTaskBodyIP);
8679 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
8693 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
8697 bool NeedsTargetTask = HasNoWait && DeviceID;
8698 if (NeedsTargetTask) {
8700 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
8701 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
8702 RTArgs.SizesArray}) {
8704 OffloadingArraysToPrivatize.
push_back(V);
8705 OI.ExcludeArgsFromAggregate.push_back(V);
8709 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
8710 DeviceID, OffloadingArraysToPrivatize](
8713 "there must be a single user for the outlined function");
8727 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8728 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8730 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8731 "Wrong number of arguments for StaleCI when shareds are present");
8732 int SharedArgOperandNo =
8733 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8739 if (!OffloadingArraysToPrivatize.
empty())
8744 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8745 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8747 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8750 Builder.SetInsertPoint(StaleCI);
8755 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8756 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8765 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8766 : getOrCreateRuntimeFunctionPtr(
8767 OMPRTL___kmpc_omp_target_task_alloc);
8771 Value *ThreadID = getOrCreateThreadID(Ident);
8778 Value *TaskSize = Builder.getInt64(
8779 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8784 Value *SharedsSize = Builder.getInt64(0);
8786 auto *ArgStructAlloca =
8788 assert(ArgStructAlloca &&
8789 "Unable to find the alloca instruction corresponding to arguments "
8790 "for extracted function");
8791 auto *ArgStructType =
8793 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8794 "arguments for extracted function");
8796 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8805 Value *Flags = Builder.getInt32(0);
8815 TaskSize, SharedsSize,
8818 if (NeedsTargetTask) {
8819 assert(DeviceID &&
"Expected non-empty device ID.");
8823 TaskData = createRuntimeFunctionCall(TaskAllocFn, TaskAllocArgs);
8829 *
this, Builder, TaskData, TaskWithPrivatesTy);
8830 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8833 if (!OffloadingArraysToPrivatize.
empty()) {
8835 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8836 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8837 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8844 "ElementType should match ArrayType");
8847 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8848 Builder.CreateMemCpy(
8849 Dst, Alignment, PtrToPrivatize, Alignment,
8850 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8864 if (!NeedsTargetTask) {
8867 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8868 createRuntimeFunctionCall(
8871 Builder.getInt32(Dependencies.size()),
8873 ConstantInt::get(Builder.getInt32Ty(), 0),
8879 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8881 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8882 createRuntimeFunctionCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8883 CallInst *CI = createRuntimeFunctionCall(ProxyFn, {ThreadID, TaskData});
8885 createRuntimeFunctionCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8886 }
else if (DepArray) {
8891 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8892 createRuntimeFunctionCall(
8894 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8895 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8899 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8900 createRuntimeFunctionCall(TaskFn, {Ident, ThreadID, TaskData});
8905 I->eraseFromParent();
8907 addOutlineInfo(std::move(OI));
8910 << *(Builder.GetInsertBlock()) <<
"\n");
8912 << *(Builder.GetInsertBlock()->getParent()->getParent())
8914 return Builder.saveIP();
8917Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8918 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8919 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8920 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8923 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8924 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8926 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8932 OpenMPIRBuilder::InsertPointTy AllocaIP,
8933 OpenMPIRBuilder::TargetDataInfo &
Info,
8934 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8935 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8938 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8939 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8941 bool HasNoWait,
Value *DynCGroupMem,
8946 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8947 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8948 Builder.restoreIP(IP);
8949 OMPBuilder.createRuntimeFunctionCall(OutlinedFn, Args);
8950 return Builder.saveIP();
8953 bool HasDependencies = Dependencies.
size() > 0;
8954 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8956 OpenMPIRBuilder::TargetKernelArgs KArgs;
8963 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8971 if (OutlinedFnID && DeviceID)
8972 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8973 EmitTargetCallFallbackCB, KArgs,
8974 DeviceID, RTLoc, TargetTaskAllocaIP);
8982 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8985 OMPBuilder.Builder.restoreIP(AfterIP);
8989 auto &&EmitTargetCallElse =
8990 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8991 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8994 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8995 if (RequiresOuterTargetTask) {
8999 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
9000 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
9002 Dependencies, EmptyRTArgs, HasNoWait);
9004 return EmitTargetCallFallbackCB(Builder.saveIP());
9007 Builder.restoreIP(AfterIP);
9011 auto &&EmitTargetCallThen =
9012 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
9013 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
9014 Info.HasNoWait = HasNoWait;
9015 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
9016 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
9017 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
9018 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
9025 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
9026 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9031 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9033 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9037 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9040 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9048 Value *MaxThreadsClause =
9049 RuntimeAttrs.TeamsThreadLimit.size() == 1
9050 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
9053 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9054 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
9055 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9056 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9058 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9059 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9061 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9064 unsigned NumTargetItems =
Info.NumberOfPtrs;
9066 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9067 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
9070 Value *TripCount = RuntimeAttrs.LoopTripCount
9071 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
9072 Builder.getInt64Ty(),
9074 : Builder.getInt64(0);
9078 DynCGroupMem = Builder.getInt32(0);
9080 KArgs = OpenMPIRBuilder::TargetKernelArgs(
9081 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9082 HasNoWait, DynCGroupMemFallback);
9086 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
9089 if (RequiresOuterTargetTask)
9090 return OMPBuilder.emitTargetTask(TaskBodyCB, RuntimeAttrs.DeviceID,
9091 RTLoc, AllocaIP, Dependencies,
9092 KArgs.RTArgs,
Info.HasNoWait);
9094 return OMPBuilder.emitKernelLaunch(
9095 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9096 RuntimeAttrs.DeviceID, RTLoc, AllocaIP);
9099 Builder.restoreIP(AfterIP);
9106 if (!OutlinedFnID) {
9107 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
9113 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
9117 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
9118 EmitTargetCallElse, AllocaIP));
9121OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
9122 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
9123 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
9124 TargetRegionEntryInfo &EntryInfo,
9125 const TargetKernelDefaultAttrs &DefaultAttrs,
9126 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
9128 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
9129 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
9130 CustomMapperCallbackTy CustomMapperCB,
9134 if (!updateToLocation(
Loc))
9135 return InsertPointTy();
9137 Builder.restoreIP(CodeGenIP);
9145 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9146 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9152 if (!Config.isTargetDevice())
9154 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
9155 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
9156 DynCGroupMemFallback);
9157 return Builder.saveIP();
9170 return OS.
str().str();
9175 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
9176 Config.separator());
9181 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
9183 assert(Elem.second->getValueType() == Ty &&
9184 "OMP internal variable has different type than requested");
9197 : M.getTargetTriple().isAMDGPU()
9199 :
DL.getDefaultGlobalsAddressSpace();
9208 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9209 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9216Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9217 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9218 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
9219 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
9222Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
9227 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
9229 return SizePtrToInt;
9234 std::string VarName) {
9238 M, MaptypesArrayInit->
getType(),
9242 return MaptypesArrayGlobal;
9245void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
9246 InsertPointTy AllocaIP,
9247 unsigned NumOperands,
9248 struct MapperAllocas &MapperAllocas) {
9249 if (!updateToLocation(
Loc))
9254 Builder.restoreIP(AllocaIP);
9256 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9260 ArrI64Ty,
nullptr,
".offload_sizes");
9261 updateToLocation(
Loc);
9262 MapperAllocas.ArgsBase = ArgsBase;
9263 MapperAllocas.Args =
Args;
9264 MapperAllocas.ArgSizes = ArgSizes;
9267void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
9270 struct MapperAllocas &MapperAllocas,
9271 int64_t DeviceID,
unsigned NumOperands) {
9272 if (!updateToLocation(
Loc))
9277 Value *ArgsBaseGEP =
9278 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
9279 {Builder.getInt32(0), Builder.getInt32(0)});
9281 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
9282 {Builder.getInt32(0), Builder.getInt32(0)});
9283 Value *ArgSizesGEP =
9284 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
9285 {Builder.getInt32(0), Builder.getInt32(0)});
9288 createRuntimeFunctionCall(MapperFunc, {SrcLocInfo, Builder.getInt64(DeviceID),
9289 Builder.getInt32(NumOperands),
9290 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
9291 MaptypesArg, MapnamesArg, NullPtr});
9294void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
9295 TargetDataRTArgs &RTArgs,
9296 TargetDataInfo &
Info,
9298 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
9299 "expected region end call to runtime only when end call is separate");
9301 auto VoidPtrTy = UnqualPtrTy;
9302 auto VoidPtrPtrTy = UnqualPtrTy;
9304 auto Int64PtrTy = UnqualPtrTy;
9306 if (!
Info.NumberOfPtrs) {
9316 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
9318 Info.RTArgs.BasePointersArray,
9320 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
9324 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
9327 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
9329 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
9330 :
Info.RTArgs.MapTypesArray,
9336 if (!
Info.EmitDebug)
9339 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
9345 if (!
Info.HasMapper)
9348 RTArgs.MappersArray =
9349 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
9352void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
9353 InsertPointTy CodeGenIP,
9354 MapInfosTy &CombinedInfo,
9355 TargetDataInfo &
Info) {
9356 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
9357 CombinedInfo.NonContigInfo;
9370 "struct.descriptor_dim");
9372 enum { OffsetFD = 0, CountFD, StrideFD };
9376 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
9379 if (NonContigInfo.Dims[
I] == 1)
9381 Builder.restoreIP(AllocaIP);
9384 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
9385 Builder.restoreIP(CodeGenIP);
9386 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
9387 unsigned RevIdx = EE -
II - 1;
9388 Value *DimsLVal = Builder.CreateInBoundsGEP(
9390 {Builder.getInt64(0), Builder.getInt64(II)});
9392 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
9393 Builder.CreateAlignedStore(
9394 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
9395 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
9397 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
9398 Builder.CreateAlignedStore(
9399 NonContigInfo.Counts[L][RevIdx], CountLVal,
9400 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9402 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
9403 Builder.CreateAlignedStore(
9404 NonContigInfo.Strides[L][RevIdx], StrideLVal,
9405 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9408 Builder.restoreIP(CodeGenIP);
9409 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
9410 DimsAddr, Builder.getPtrTy());
9411 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9413 Info.RTArgs.PointersArray, 0,
I);
9414 Builder.CreateAlignedStore(
9415 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
9420void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
9428 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
9430 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
9431 Value *DeleteBit = Builder.CreateAnd(
9434 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9435 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9440 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
9442 Value *PtrAndObjBit = Builder.CreateAnd(
9445 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9446 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9447 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
9448 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9449 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
9450 DeleteCond = Builder.CreateIsNull(
9452 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
9455 DeleteCond = Builder.CreateIsNotNull(
9457 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
9459 Cond = Builder.CreateAnd(
Cond, DeleteCond);
9460 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
9462 emitBlock(BodyBB, MapperFn);
9465 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
9468 Value *MapTypeArg = Builder.CreateAnd(
9471 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9472 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9473 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9474 MapTypeArg = Builder.CreateOr(
9477 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9478 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9482 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
9483 ArraySize, MapTypeArg, MapName};
9484 createRuntimeFunctionCall(
9485 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
9493 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
9509 MapperFn->
addFnAttr(Attribute::NoInline);
9510 MapperFn->
addFnAttr(Attribute::NoUnwind);
9520 auto SavedIP = Builder.saveIP();
9521 Builder.SetInsertPoint(EntryBB);
9533 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
9534 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
9535 Value *PtrBegin = BeginIn;
9536 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
9541 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9542 MapType, MapName, ElementSize, HeadBB,
9548 emitBlock(HeadBB, MapperFn);
9553 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
9554 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9557 emitBlock(BodyBB, MapperFn);
9560 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
9564 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
9566 return Info.takeError();
9570 Value *OffloadingArgs[] = {MapperHandle};
9571 Value *PreviousSize = createRuntimeFunctionCall(
9572 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
9574 Value *ShiftedPreviousSize =
9575 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
9578 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
9587 Value *OriMapType = Builder.getInt64(
9588 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9590 Value *MemberMapType =
9591 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9605 Value *LeftToFrom = Builder.CreateAnd(
9608 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9609 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9610 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9619 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
9620 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9622 emitBlock(AllocBB, MapperFn);
9623 Value *AllocMapType = Builder.CreateAnd(
9626 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9627 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9628 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9629 Builder.CreateBr(EndBB);
9630 emitBlock(AllocElseBB, MapperFn);
9631 Value *IsTo = Builder.CreateICmpEQ(
9634 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9635 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9636 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9638 emitBlock(ToBB, MapperFn);
9639 Value *ToMapType = Builder.CreateAnd(
9642 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9643 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9644 Builder.CreateBr(EndBB);
9645 emitBlock(ToElseBB, MapperFn);
9646 Value *IsFrom = Builder.CreateICmpEQ(
9649 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9650 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9651 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9653 emitBlock(FromBB, MapperFn);
9654 Value *FromMapType = Builder.CreateAnd(
9657 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9658 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9660 emitBlock(EndBB, MapperFn);
9663 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
9669 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
9670 CurSizeArg, CurMapType, CurNameArg};
9672 auto ChildMapperFn = CustomMapperCB(
I);
9674 return ChildMapperFn.takeError();
9675 if (*ChildMapperFn) {
9677 createRuntimeFunctionCall(*ChildMapperFn, OffloadingArgs)
9678 ->setDoesNotThrow();
9682 createRuntimeFunctionCall(
9683 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
9690 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
9691 "omp.arraymap.next");
9693 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
9695 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9697 emitBlock(ExitBB, MapperFn);
9700 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9701 MapType, MapName, ElementSize, DoneBB,
9705 emitBlock(DoneBB, MapperFn,
true);
9707 Builder.CreateRetVoid();
9708 Builder.restoreIP(SavedIP);
9712Error OpenMPIRBuilder::emitOffloadingArrays(
9713 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
9714 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
9715 bool IsNonContiguous,
9719 Info.clearArrayInfo();
9720 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9722 if (
Info.NumberOfPtrs == 0)
9725 Builder.restoreIP(AllocaIP);
9731 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
9732 PointerArrayType,
nullptr,
".offload_baseptrs");
9734 Info.RTArgs.PointersArray = Builder.CreateAlloca(
9735 PointerArrayType,
nullptr,
".offload_ptrs");
9736 AllocaInst *MappersArray = Builder.CreateAlloca(
9737 PointerArrayType,
nullptr,
".offload_mappers");
9738 Info.RTArgs.MappersArray = MappersArray;
9745 ConstantInt::get(Int64Ty, 0));
9747 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9750 if (IsNonContiguous &&
9751 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9752 CombinedInfo.Types[
I] &
9753 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9755 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9761 RuntimeSizes.set(
I);
9764 if (RuntimeSizes.all()) {
9766 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9767 SizeArrayType,
nullptr,
".offload_sizes");
9772 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9773 auto *SizesArrayGbl =
9778 if (!RuntimeSizes.any()) {
9779 Info.RTArgs.SizesArray = SizesArrayGbl;
9781 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9782 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9785 SizeArrayType,
nullptr,
".offload_sizes");
9788 Builder.CreateMemCpy(
9789 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9790 SizesArrayGbl, OffloadSizeAlign,
9795 Info.RTArgs.SizesArray = Buffer;
9803 for (
auto mapFlag : CombinedInfo.Types)
9805 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9807 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9808 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9809 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9812 if (!CombinedInfo.Names.empty()) {
9813 auto *MapNamesArrayGbl = createOffloadMapnames(
9814 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9815 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9816 Info.EmitDebug =
true;
9818 Info.RTArgs.MapNamesArray =
9820 Info.EmitDebug =
false;
9825 if (
Info.separateBeginEndCalls()) {
9826 bool EndMapTypesDiffer =
false;
9828 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9829 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9830 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9831 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9832 EndMapTypesDiffer =
true;
9835 if (EndMapTypesDiffer) {
9836 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9837 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9842 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9843 Value *BPVal = CombinedInfo.BasePointers[
I];
9844 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9847 Builder.CreateAlignedStore(BPVal, BP,
9848 M.getDataLayout().getPrefTypeAlign(PtrTy));
9850 if (
Info.requiresDevicePointerInfo()) {
9851 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9852 CodeGenIP = Builder.saveIP();
9853 Builder.restoreIP(AllocaIP);
9854 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9855 Builder.restoreIP(CodeGenIP);
9857 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9858 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9859 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9861 DeviceAddrCB(
I, BP);
9865 Value *PVal = CombinedInfo.Pointers[
I];
9866 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9870 Builder.CreateAlignedStore(PVal,
P,
9871 M.getDataLayout().getPrefTypeAlign(PtrTy));
9873 if (RuntimeSizes.test(
I)) {
9874 Value *S = Builder.CreateConstInBoundsGEP2_32(
9878 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9881 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9884 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9887 auto CustomMFunc = CustomMapperCB(
I);
9889 return CustomMFunc.takeError();
9891 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9893 Value *MAddr = Builder.CreateInBoundsGEP(
9895 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9896 Builder.CreateAlignedStore(
9897 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9900 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9901 Info.NumberOfPtrs == 0)
9903 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9908 BasicBlock *CurBB = Builder.GetInsertBlock();
9915 Builder.CreateBr(
Target);
9918 Builder.ClearInsertionPoint();
9923 BasicBlock *CurBB = Builder.GetInsertBlock();
9939 Builder.SetInsertPoint(BB);
9942Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9943 BodyGenCallbackTy ElseGen,
9944 InsertPointTy AllocaIP) {
9948 auto CondConstant = CI->getSExtValue();
9950 return ThenGen(AllocaIP, Builder.saveIP());
9952 return ElseGen(AllocaIP, Builder.saveIP());
9962 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9964 emitBlock(ThenBlock, CurFn);
9965 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9967 emitBranch(ContBlock);
9970 emitBlock(ElseBlock, CurFn);
9971 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9974 emitBranch(ContBlock);
9976 emitBlock(ContBlock, CurFn,
true);
9980bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9984 "Unexpected Atomic Ordering.");
10041OpenMPIRBuilder::InsertPointTy
10042OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
10043 AtomicOpValue &
X, AtomicOpValue &V,
10045 if (!updateToLocation(
Loc))
10048 assert(
X.Var->getType()->isPointerTy() &&
10049 "OMP Atomic expects a pointer to target memory");
10050 Type *XElemTy =
X.ElemTy;
10053 "OMP atomic read expected a scalar type");
10055 Value *XRead =
nullptr;
10059 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10065 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
10068 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10069 OpenMPIRBuilder::AtomicInfo atomicInfo(
10070 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
10071 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10072 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10073 XRead = AtomicLoadRes.first;
10080 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10083 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10085 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10088 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10089 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10090 return Builder.saveIP();
10093OpenMPIRBuilder::InsertPointTy
10094OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
10095 AtomicOpValue &
X,
Value *Expr,
10097 if (!updateToLocation(
Loc))
10100 assert(
X.Var->getType()->isPointerTy() &&
10101 "OMP Atomic expects a pointer to target memory");
10102 Type *XElemTy =
X.ElemTy;
10105 "OMP atomic write expected a scalar type");
10108 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
10111 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
10113 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10114 OpenMPIRBuilder::AtomicInfo atomicInfo(
10115 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
10116 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10117 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
10124 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10125 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
10129 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10130 return Builder.saveIP();
10133OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
10134 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
10136 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10137 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10139 if (!updateToLocation(
Loc))
10143 Type *XTy =
X.Var->getType();
10145 "OMP Atomic expects a pointer to target memory");
10146 Type *XElemTy =
X.ElemTy;
10149 "OMP atomic update expected a scalar type");
10152 "OpenMP atomic does not support LT or GT operations");
10156 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10157 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10159 return AtomicResult.takeError();
10160 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10161 return Builder.saveIP();
10165Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10169 return Builder.CreateAdd(Src1, Src2);
10171 return Builder.CreateSub(Src1, Src2);
10173 return Builder.CreateAnd(Src1, Src2);
10175 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10177 return Builder.CreateOr(Src1, Src2);
10179 return Builder.CreateXor(Src1, Src2);
10204 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10205 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10208 bool emitRMWOp =
false;
10216 emitRMWOp = XElemTy;
10219 emitRMWOp = (IsXBinopExpr && XElemTy);
10226 std::pair<Value *, Value *> Res;
10230 if (
T.isAMDGPU()) {
10231 if (IsIgnoreDenormalMode)
10232 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10234 if (!IsFineGrainedMemory)
10235 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10237 if (!IsRemoteMemory)
10241 Res.first = RMWInst;
10246 Res.second = Res.first;
10248 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10252 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10255 unsigned LoadSize =
10258 OpenMPIRBuilder::AtomicInfo atomicInfo(
10259 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
10260 OldVal->
getAlign(),
true , AllocaIP,
X);
10261 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10262 BasicBlock *CurBB = Builder.GetInsertBlock();
10264 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
10268 X->getName() +
".atomic.cont");
10270 Builder.restoreIP(AllocaIP);
10271 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
10272 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10273 Builder.SetInsertPoint(ContBB);
10275 PHI->addIncoming(AtomicLoadRes.first, CurBB);
10280 Value *Upd = *CBResult;
10281 Builder.CreateStore(Upd, NewAtomicAddr);
10284 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
10285 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
10287 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
10288 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
10290 Res.first = OldExprVal;
10296 Builder.SetInsertPoint(ExitBB);
10298 Builder.SetInsertPoint(ExitTI);
10304 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
10311 BasicBlock *CurBB = Builder.GetInsertBlock();
10313 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
10317 X->getName() +
".atomic.cont");
10319 Builder.restoreIP(AllocaIP);
10320 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
10321 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10322 Builder.SetInsertPoint(ContBB);
10324 PHI->addIncoming(OldVal, CurBB);
10329 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
10330 X->getName() +
".atomic.fltCast");
10332 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
10333 X->getName() +
".atomic.ptrCast");
10340 Value *Upd = *CBResult;
10341 Builder.CreateStore(Upd, NewAtomicAddr);
10342 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
10347 Result->setVolatile(VolatileX);
10348 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
10349 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
10350 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
10351 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
10353 Res.first = OldExprVal;
10360 Builder.SetInsertPoint(ExitBB);
10362 Builder.SetInsertPoint(ExitTI);
10369OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
10370 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
10373 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
10374 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10375 if (!updateToLocation(
Loc))
10379 Type *XTy =
X.Var->getType();
10381 "OMP Atomic expects a pointer to target memory");
10382 Type *XElemTy =
X.ElemTy;
10385 "OMP atomic capture expected a scalar type");
10387 "OpenMP atomic does not support LT or GT operations");
10394 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
10395 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10398 Value *CapturedVal =
10399 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
10400 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
10402 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
10403 return Builder.saveIP();
10406OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
10407 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
10413 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
10414 IsPostfixUpdate, IsFailOnly, Failure);
10417OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
10418 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
10423 if (!updateToLocation(
Loc))
10426 assert(
X.Var->getType()->isPointerTy() &&
10427 "OMP atomic expects a pointer to target memory");
10430 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
10431 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
10434 bool IsInteger =
E->getType()->isIntegerTy();
10436 if (
Op == OMPAtomicCompareOp::EQ) {
10441 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
10442 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
10447 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
10451 Value *OldValue = Builder.CreateExtractValue(Result, 0);
10453 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
10455 "OldValue and V must be of same type");
10456 if (IsPostfixUpdate) {
10457 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
10459 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
10470 BasicBlock *CurBB = Builder.GetInsertBlock();
10472 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
10474 CurBBTI,
X.Var->getName() +
".atomic.exit");
10480 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
10482 Builder.SetInsertPoint(ContBB);
10483 Builder.CreateStore(OldValue, V.Var);
10484 Builder.CreateBr(ExitBB);
10489 Builder.SetInsertPoint(ExitBB);
10491 Builder.SetInsertPoint(ExitTI);
10494 Value *CapturedValue =
10495 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
10496 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10502 assert(
R.Var->getType()->isPointerTy() &&
10503 "r.var must be of pointer type");
10504 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
10506 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
10507 Value *ResultCast =
R.IsSigned
10508 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
10509 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
10510 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
10513 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
10514 "Op should be either max or min at this point");
10515 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
10526 if (IsXBinopExpr) {
10553 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
10555 Value *CapturedValue =
nullptr;
10556 if (IsPostfixUpdate) {
10557 CapturedValue = OldValue;
10582 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
10583 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
10585 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10589 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
10591 return Builder.saveIP();
10594OpenMPIRBuilder::InsertPointOrErrorTy
10595OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
10596 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
10599 if (!updateToLocation(
Loc))
10600 return InsertPointTy();
10603 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
10604 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
10609 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
10610 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
10611 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10631 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
10632 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
10634 splitBB(Builder,
true,
"teams.alloca");
10636 bool SubClausesPresent =
10637 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
10639 if (!Config.isTargetDevice() && SubClausesPresent) {
10640 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
10641 "if lowerbound is non-null, then upperbound must also be non-null "
10642 "for bounds on num_teams");
10644 if (NumTeamsUpper ==
nullptr)
10645 NumTeamsUpper = Builder.getInt32(0);
10647 if (NumTeamsLower ==
nullptr)
10648 NumTeamsLower = NumTeamsUpper;
10652 "argument to if clause must be an integer value");
10656 IfExpr = Builder.CreateICmpNE(IfExpr,
10657 ConstantInt::get(IfExpr->
getType(), 0));
10658 NumTeamsUpper = Builder.CreateSelect(
10659 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
10662 NumTeamsLower = Builder.CreateSelect(
10663 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
10666 if (ThreadLimit ==
nullptr)
10667 ThreadLimit = Builder.getInt32(0);
10669 Value *ThreadNum = getOrCreateThreadID(Ident);
10670 createRuntimeFunctionCall(
10671 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
10672 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
10675 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10676 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10677 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10681 OI.EntryBB = AllocaBB;
10682 OI.ExitBB = ExitBB;
10683 OI.OuterAllocaBB = &OuterAllocaBB;
10687 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
10689 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
10691 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
10693 auto HostPostOutlineCB = [
this, Ident,
10694 ToBeDeleted](
Function &OutlinedFn)
mutable {
10699 "there must be a single user for the outlined function");
10704 "Outlined function must have two or three arguments only");
10706 bool HasShared = OutlinedFn.
arg_size() == 3;
10714 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
10715 "outlined function.");
10716 Builder.SetInsertPoint(StaleCI);
10718 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
10721 createRuntimeFunctionCall(
10722 getOrCreateRuntimeFunctionPtr(
10723 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10727 I->eraseFromParent();
10730 if (!Config.isTargetDevice())
10731 OI.PostOutlineCB = HostPostOutlineCB;
10733 addOutlineInfo(std::move(OI));
10735 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10737 return Builder.saveIP();
10740OpenMPIRBuilder::InsertPointOrErrorTy
10741OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10742 InsertPointTy OuterAllocaIP,
10743 BodyGenCallbackTy BodyGenCB) {
10744 if (!updateToLocation(
Loc))
10745 return InsertPointTy();
10747 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10749 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10751 splitBB(Builder,
true,
"distribute.entry");
10752 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10755 splitBB(Builder,
true,
"distribute.exit");
10757 splitBB(Builder,
true,
"distribute.body");
10759 splitBB(Builder,
true,
"distribute.alloca");
10762 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10763 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10764 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10769 if (Config.isTargetDevice()) {
10771 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10772 OI.EntryBB = AllocaBB;
10773 OI.ExitBB = ExitBB;
10775 addOutlineInfo(std::move(OI));
10777 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10779 return Builder.saveIP();
10784 std::string VarName) {
10790 M, MapNamesArrayInit->
getType(),
10793 return MapNamesArrayGlobal;
10798void OpenMPIRBuilder::initializeTypes(
Module &M) {
10801 unsigned DefaultTargetAS = Config.getDefaultTargetAS();
10802 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
10803#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10804#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10805 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10806 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10807#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10808 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10809 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
10810#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10811 T = StructType::getTypeByName(Ctx, StructName); \
10813 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10815 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10816#include "llvm/Frontend/OpenMP/OMPKinds.def"
10819void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10827 while (!Worklist.
empty()) {
10831 if (
BlockSet.insert(SuccBB).second)
10840 if (!Config.isGPU()) {
10855 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10856 Fn->
addFnAttr(Attribute::MustProgress);
10860void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10861 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10864 if (OffloadInfoManager.empty())
10868 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10869 TargetRegionEntryInfo>,
10871 OrderedEntries(OffloadInfoManager.size());
10874 auto &&GetMDInt = [
this](
unsigned V) {
10881 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10882 auto &&TargetRegionMetadataEmitter =
10883 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10884 const TargetRegionEntryInfo &EntryInfo,
10885 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10898 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10899 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10900 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10901 GetMDInt(
E.getOrder())};
10904 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10910 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10913 auto &&DeviceGlobalVarMetadataEmitter =
10914 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10916 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10924 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10925 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10928 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10929 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10935 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10936 DeviceGlobalVarMetadataEmitter);
10938 for (
const auto &
E : OrderedEntries) {
10939 assert(
E.first &&
"All ordered entries must exist!");
10940 if (
const auto *CE =
10943 if (!
CE->getID() || !
CE->getAddress()) {
10945 TargetRegionEntryInfo EntryInfo =
E.second;
10946 StringRef FnName = EntryInfo.ParentName;
10947 if (!M.getNamedValue(FnName))
10949 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10952 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10955 }
else if (
const auto *CE =
dyn_cast<
10956 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10958 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10959 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10962 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10963 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10964 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10966 if (!
CE->getAddress()) {
10967 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10971 if (
CE->getVarSize() == 0)
10974 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10975 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10976 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10977 "Declaret target link address is set.");
10978 if (Config.isTargetDevice())
10980 if (!
CE->getAddress()) {
10981 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10985 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect:
10986 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable:
10987 if (!
CE->getAddress()) {
10988 ErrorFn(EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR,
E.second);
11000 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11002 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect &&
11003 Flags != OffloadEntriesInfoManager::
11004 OMPTargetGlobalVarEntryIndirectVTable))
11009 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect ||
11011 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable)
11012 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
11013 Flags,
CE->getLinkage(),
CE->getVarName());
11015 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
11016 Flags,
CE->getLinkage());
11027 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
11032 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
11033 Config.getRequiresFlags());
11036void TargetRegionEntryInfo::getTargetRegionEntryFnName(
11038 unsigned FileID,
unsigned Line,
unsigned Count) {
11040 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
11041 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
11043 OS <<
"_" <<
Count;
11046void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
11048 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
11049 TargetRegionEntryInfo::getTargetRegionEntryFnName(
11050 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
11051 EntryInfo.Line, NewCount);
11054TargetRegionEntryInfo
11055OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
11059 auto FileIDInfo = CallBack();
11063 FileID =
Status->getUniqueID().getFile();
11067 FileID =
hash_value(std::get<0>(FileIDInfo));
11070 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
11071 std::get<1>(FileIDInfo));
11074unsigned OpenMPIRBuilder::getFlagMemberOffset() {
11077 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11079 !(Remain & 1); Remain = Remain >> 1)
11085OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
11088 << getFlagMemberOffset());
11091void OpenMPIRBuilder::setCorrectMemberOfFlag(
11097 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11099 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11106 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11112 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
11113 Flags |= MemberOfFlag;
11116Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
11117 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
11118 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
11119 bool IsDeclaration,
bool IsExternallyVisible,
11120 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
11121 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11122 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
11123 std::function<
Constant *()> GlobalInitializer,
11130 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
11131 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
11133 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
11134 Config.hasRequiresUnifiedSharedMemory())) {
11139 if (!IsExternallyVisible)
11140 OS <<
format(
"_%x", EntryInfo.FileID);
11141 OS <<
"_decl_tgt_ref_ptr";
11144 Value *Ptr = M.getNamedValue(PtrName);
11148 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
11153 if (!Config.isTargetDevice()) {
11154 if (GlobalInitializer)
11155 GV->setInitializer(GlobalInitializer());
11160 registerTargetGlobalVariable(
11161 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11162 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11163 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
11172void OpenMPIRBuilder::registerTargetGlobalVariable(
11173 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
11174 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
11175 bool IsDeclaration,
bool IsExternallyVisible,
11176 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
11177 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11178 std::vector<Triple> TargetTriple,
11179 std::function<
Constant *()> GlobalInitializer,
11182 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
11183 (TargetTriple.empty() && !Config.isTargetDevice()))
11186 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
11191 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
11193 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
11194 !Config.hasRequiresUnifiedSharedMemory()) {
11195 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
11199 if (!IsDeclaration)
11201 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
11208 if (Config.isTargetDevice() &&
11212 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
11215 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
11217 if (!M.getNamedValue(RefName)) {
11219 getOrCreateInternalVariable(Addr->
getType(), RefName);
11221 GvAddrRef->setConstant(
true);
11223 GvAddrRef->setInitializer(Addr);
11224 GeneratedRefs.push_back(GvAddrRef);
11228 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
11229 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
11231 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
11233 if (Config.isTargetDevice()) {
11237 Addr = getAddrOfDeclareTargetVar(
11238 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11239 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11240 LlvmPtrTy, GlobalInitializer, VariableLinkage);
11243 VarSize = M.getDataLayout().getPointerSize();
11247 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
11253void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
11257 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
11262 auto &&GetMDInt = [MN](
unsigned Idx) {
11267 auto &&GetMDString = [MN](
unsigned Idx) {
11269 return V->getString();
11272 switch (GetMDInt(0)) {
11276 case OffloadEntriesInfoManager::OffloadEntryInfo::
11277 OffloadingEntryInfoTargetRegion: {
11278 TargetRegionEntryInfo EntryInfo(GetMDString(3),
11283 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
11287 case OffloadEntriesInfoManager::OffloadEntryInfo::
11288 OffloadingEntryInfoDeviceGlobalVar:
11289 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
11291 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
11301 if (HostFilePath.
empty())
11305 if (std::error_code Err = Buf.getError()) {
11307 "OpenMPIRBuilder: " +
11315 if (std::error_code Err = M.getError()) {
11317 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
11321 loadOffloadInfoMetadata(*M.get());
11328bool OffloadEntriesInfoManager::empty()
const {
11329 return OffloadEntriesTargetRegion.empty() &&
11330 OffloadEntriesDeviceGlobalVar.empty();
11333unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
11334 const TargetRegionEntryInfo &EntryInfo)
const {
11335 auto It = OffloadEntriesTargetRegionCount.find(
11336 getTargetRegionEntryCountKey(EntryInfo));
11337 if (It == OffloadEntriesTargetRegionCount.end())
11342void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
11343 const TargetRegionEntryInfo &EntryInfo) {
11344 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
11345 EntryInfo.Count + 1;
11349void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
11350 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
11351 OffloadEntriesTargetRegion[EntryInfo] =
11352 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
11353 OMPTargetRegionEntryTargetRegion);
11354 ++OffloadingEntriesNum;
11357void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
11359 OMPTargetRegionEntryKind Flags) {
11360 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
11363 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
11367 if (OMPBuilder->Config.isTargetDevice()) {
11369 if (!hasTargetRegionEntryInfo(EntryInfo)) {
11372 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
11373 Entry.setAddress(Addr);
11375 Entry.setFlags(Flags);
11377 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
11378 hasTargetRegionEntryInfo(EntryInfo,
true))
11380 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
11381 "Target region entry already registered!");
11382 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
11383 OffloadEntriesTargetRegion[EntryInfo] = Entry;
11384 ++OffloadingEntriesNum;
11386 incrementTargetRegionEntryInfoCount(EntryInfo);
11389bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
11390 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
11393 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
11395 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
11396 if (It == OffloadEntriesTargetRegion.end()) {
11400 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
11405void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
11406 const OffloadTargetRegionEntryInfoActTy &Action) {
11408 for (
const auto &It : OffloadEntriesTargetRegion) {
11409 Action(It.first, It.second);
11413void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
11414 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
11415 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
11416 ++OffloadingEntriesNum;
11419void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
11422 if (OMPBuilder->Config.isTargetDevice()) {
11424 if (!hasDeviceGlobalVarEntryInfo(VarName))
11426 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
11427 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
11428 if (Entry.getVarSize() == 0) {
11429 Entry.setVarSize(VarSize);
11434 Entry.setVarSize(VarSize);
11436 Entry.setAddress(Addr);
11438 if (hasDeviceGlobalVarEntryInfo(VarName)) {
11439 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
11440 assert(Entry.isValid() && Entry.getFlags() == Flags &&
11441 "Entry not initialized!");
11442 if (Entry.getVarSize() == 0) {
11443 Entry.setVarSize(VarSize);
11448 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect ||
11450 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable)
11451 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
11452 Addr, VarSize, Flags,
Linkage,
11455 OffloadEntriesDeviceGlobalVar.try_emplace(
11456 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
11457 ++OffloadingEntriesNum;
11461void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
11462 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
11464 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
11465 Action(
E.getKey(),
E.getValue());
11472void CanonicalLoopInfo::collectControlBlocks(
11479 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
11482BasicBlock *CanonicalLoopInfo::getPreheader()
const {
11491void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
11503void CanonicalLoopInfo::mapIndVar(
11513 for (
Use &U : OldIV->
uses()) {
11517 if (
User->getParent() == getCond())
11519 if (
User->getParent() == getLatch())
11525 Value *NewIV = Updater(OldIV);
11528 for (
Use *U : ReplacableUses)
11536void CanonicalLoopInfo::assertOK()
const {
11549 "Preheader must terminate with unconditional branch");
11551 "Preheader must jump to header");
11555 "Header must terminate with unconditional branch");
11556 assert(Header->getSingleSuccessor() ==
Cond &&
11557 "Header must jump to exiting block");
11560 assert(
Cond->getSinglePredecessor() == Header &&
11561 "Exiting block only reachable from header");
11564 "Exiting block must terminate with conditional branch");
11566 "Exiting block must have two successors");
11568 "Exiting block's first successor jump to the body");
11570 "Exiting block's second successor must exit the loop");
11574 "Body only reachable from exiting block");
11579 "Latch must terminate with unconditional branch");
11588 "Exit block must terminate with unconditional branch");
11589 assert(
Exit->getSingleSuccessor() == After &&
11590 "Exit block must jump to after block");
11594 "After block only reachable from exit block");
11598 assert(IndVar &&
"Canonical induction variable not found?");
11600 "Induction variable must be an integer");
11602 "Induction variable must be a PHI in the loop header");
11608 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
11615 Value *TripCount = getTripCount();
11616 assert(TripCount &&
"Loop trip count not found?");
11618 "Trip count and induction variable must have the same type");
11622 "Exit condition must be a signed less-than comparison");
11624 "Exit condition must compare the induction variable");
11626 "Exit condition must compare with the trip count");
11630void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
@ Null
Return null pointer.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...