49#include <unordered_map>
54#define DEBUG_TYPE "memprof-context-disambiguation"
57 "Number of function clones created during whole program analysis");
59 "Number of function clones created during ThinLTO backend");
61 "Number of functions that had clones created during ThinLTO backend");
62STATISTIC(AllocTypeNotCold,
"Number of not cold static allocations (possibly "
63 "cloned) during whole program analysis");
64STATISTIC(AllocTypeCold,
"Number of cold static allocations (possibly cloned) "
65 "during whole program analysis");
67 "Number of not cold static allocations (possibly cloned) during "
69STATISTIC(AllocTypeColdThinBackend,
"Number of cold static allocations "
70 "(possibly cloned) during ThinLTO backend");
72 "Number of original (not cloned) allocations with memprof profiles "
73 "during ThinLTO backend");
75 AllocVersionsThinBackend,
76 "Number of allocation versions (including clones) during ThinLTO backend");
78 "Maximum number of allocation versions created for an original "
79 "allocation during ThinLTO backend");
81 "Number of unclonable ambigous allocations during ThinLTO backend");
83 "Number of edges removed due to mismatched callees (profiled vs IR)");
85 "Number of profiled callees found via tail calls");
87 "Aggregate depth of profiled callees found via tail calls");
89 "Maximum depth of profiled callees found via tail calls");
91 "Number of profiled callees found via multiple tail call chains");
96 cl::desc(
"Specify the path prefix of the MemProf dot files."));
100 cl::desc(
"Export graph to dot files."));
104 cl::desc(
"Dump CallingContextGraph to stdout after each stage."));
108 cl::desc(
"Perform verification checks on CallingContextGraph."));
112 cl::desc(
"Perform frequent verification checks on nodes."));
115 "memprof-import-summary",
116 cl::desc(
"Import summary to use for testing the ThinLTO backend via opt"),
122 cl::desc(
"Max depth to recursively search for missing "
123 "frames through tail calls."));
128 cl::desc(
"Allow cloning of callsites involved in recursive cycles"));
137 cl::desc(
"Allow cloning of contexts through recursive cycles"));
148 cl::desc(
"Linking with hot/cold operator new interfaces"));
153 "Require target function definition when promoting indirect calls"));
174template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
175class CallsiteContextGraph {
177 CallsiteContextGraph() =
default;
178 CallsiteContextGraph(
const CallsiteContextGraph &) =
default;
179 CallsiteContextGraph(CallsiteContextGraph &&) =
default;
186 void identifyClones();
193 bool assignFunctions();
200 const CallsiteContextGraph &CCG) {
206 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
208 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
210 void exportToDot(std::string Label)
const;
213 struct FuncInfo final
214 :
public std::pair<FuncTy *, unsigned > {
215 using Base = std::pair<FuncTy *, unsigned>;
216 FuncInfo(
const Base &
B) :
Base(
B) {}
217 FuncInfo(FuncTy *
F =
nullptr,
unsigned CloneNo = 0) :
Base(
F, CloneNo) {}
218 explicit operator bool()
const {
return this->first !=
nullptr; }
219 FuncTy *
func()
const {
return this->first; }
220 unsigned cloneNo()
const {
return this->second; }
224 struct CallInfo final :
public std::pair<CallTy, unsigned > {
225 using Base = std::pair<CallTy, unsigned>;
227 CallInfo(CallTy Call =
nullptr,
unsigned CloneNo = 0)
229 explicit operator bool()
const {
return (
bool)this->first; }
230 CallTy call()
const {
return this->first; }
231 unsigned cloneNo()
const {
return this->second; }
232 void setCloneNo(
unsigned N) { this->second =
N; }
234 if (!
operator bool()) {
240 OS <<
"\t(clone " << cloneNo() <<
")";
263 bool Recursive =
false;
290 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
294 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
298 const std::vector<std::shared_ptr<ContextEdge>> *
299 getEdgesWithAllocInfo()
const {
302 if (!CalleeEdges.empty())
304 if (!CallerEdges.empty()) {
317 auto *Edges = getEdgesWithAllocInfo();
321 for (
auto &Edge : *Edges)
322 Count += Edge->getContextIds().size();
324 for (
auto &Edge : *Edges)
325 ContextIds.
insert(Edge->getContextIds().begin(),
326 Edge->getContextIds().end());
332 uint8_t computeAllocType()
const {
333 auto *Edges = getEdgesWithAllocInfo();
335 return (
uint8_t)AllocationType::None;
339 for (
auto &Edge : *Edges) {
350 bool emptyContextIds()
const {
351 auto *Edges = getEdgesWithAllocInfo();
354 for (
auto &Edge : *Edges) {
355 if (!Edge->getContextIds().empty())
362 std::vector<ContextNode *> Clones;
365 ContextNode *CloneOf =
nullptr;
367 ContextNode(
bool IsAllocation) : IsAllocation(IsAllocation),
Call() {}
369 ContextNode(
bool IsAllocation,
CallInfo C)
370 : IsAllocation(IsAllocation),
Call(
C) {}
372 void addClone(ContextNode *Clone) {
374 CloneOf->Clones.push_back(Clone);
375 Clone->CloneOf = CloneOf;
377 Clones.push_back(Clone);
379 Clone->CloneOf =
this;
383 ContextNode *getOrigNode() {
390 unsigned int ContextId);
392 ContextEdge *findEdgeFromCallee(
const ContextNode *Callee);
393 ContextEdge *findEdgeFromCaller(
const ContextNode *Caller);
394 void eraseCalleeEdge(
const ContextEdge *Edge);
395 void eraseCallerEdge(
const ContextEdge *Edge);
399 bool hasCall()
const {
return (
bool)
Call.call(); }
405 bool isRemoved()
const {
408 return AllocTypes == (
uint8_t)AllocationType::None;
436 ContextIds(
std::
move(ContextIds)) {}
442 inline void clear() {
444 AllocTypes = (
uint8_t)AllocationType::None;
452 inline bool isRemoved()
const {
453 if (Callee || Caller)
474 void removeNoneTypeCalleeEdges(ContextNode *
Node);
475 void removeNoneTypeCallerEdges(ContextNode *
Node);
477 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *
Node,
483 template <
class NodeT,
class IteratorT>
484 std::vector<uint64_t>
489 ContextNode *addAllocNode(
CallInfo Call,
const FuncTy *
F);
492 template <
class NodeT,
class IteratorT>
493 void addStackNodesForMIB(ContextNode *AllocNode,
502 void updateStackNodes();
506 void handleCallsitesWithMultipleTargets();
512 bool partitionCallsByCallee(
514 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
521 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
524 using EdgeIter =
typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
529 struct CallContextInfo {
533 std::vector<uint64_t> StackIds;
547 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI =
nullptr,
548 bool CalleeIter =
true);
556 void assignStackNodesPostOrder(
569 void propagateDuplicateContextIds(
575 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
583 return static_cast<const DerivedCCG *
>(
this)->getStackId(IdOrIndex);
593 calleesMatch(CallTy Call, EdgeIter &EI,
598 const FuncTy *getCalleeFunc(CallTy Call) {
599 return static_cast<DerivedCCG *
>(
this)->getCalleeFunc(Call);
605 bool calleeMatchesFunc(
606 CallTy Call,
const FuncTy *Func,
const FuncTy *CallerFunc,
607 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
608 return static_cast<DerivedCCG *
>(
this)->calleeMatchesFunc(
609 Call, Func, CallerFunc, FoundCalleeChain);
613 bool sameCallee(CallTy Call1, CallTy Call2) {
614 return static_cast<DerivedCCG *
>(
this)->sameCallee(Call1, Call2);
619 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
620 return static_cast<DerivedCCG *
>(
this)->getStackIdsWithContextNodesForCall(
625 uint64_t getLastStackId(CallTy Call) {
626 return static_cast<DerivedCCG *
>(
this)->getLastStackId(Call);
631 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
632 static_cast<DerivedCCG *
>(
this)->updateAllocationCall(Call,
AllocType);
637 return static_cast<const DerivedCCG *
>(
this)->getAllocationCallType(Call);
642 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc) {
643 static_cast<DerivedCCG *
>(
this)->updateCall(CallerCall, CalleeFunc);
649 FuncInfo cloneFunctionForCallsite(
650 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
651 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
652 return static_cast<DerivedCCG *
>(
this)->cloneFunctionForCallsite(
653 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
658 std::string getLabel(
const FuncTy *Func,
const CallTy Call,
659 unsigned CloneNo)
const {
660 return static_cast<const DerivedCCG *
>(
this)->getLabel(Func, Call, CloneNo);
664 ContextNode *createNewNode(
bool IsAllocation,
const FuncTy *
F =
nullptr,
666 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation,
C));
667 auto *NewNode = NodeOwner.back().get();
669 NodeToCallingFunc[NewNode] =
F;
674 ContextNode *getNodeForInst(
const CallInfo &
C);
675 ContextNode *getNodeForAlloc(
const CallInfo &
C);
676 ContextNode *getNodeForStackId(
uint64_t StackId);
699 moveEdgeToNewCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
700 EdgeIter *CallerEdgeI =
nullptr,
708 void moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
709 ContextNode *NewCallee,
710 EdgeIter *CallerEdgeI =
nullptr,
711 bool NewClone =
false,
720 void moveCalleeEdgeToNewCaller(EdgeIter &CalleeEdgeI, ContextNode *NewCaller);
749 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
755 unsigned int LastContextId = 0;
758template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
760 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
761template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
763 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
764template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
766 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
767template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
769 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
772class ModuleCallsiteContextGraph
773 :
public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
776 ModuleCallsiteContextGraph(
781 friend CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
786 bool calleeMatchesFunc(
788 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
790 bool findProfiledCalleeThroughTailCalls(
792 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
793 bool &FoundMultipleCalleeChains);
795 std::vector<uint64_t> getStackIdsWithContextNodesForCall(
Instruction *Call);
798 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
799 CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
801 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
802 std::map<CallInfo, CallInfo> &CallMap,
803 std::vector<CallInfo> &CallsWithMetadataInFunc,
806 unsigned CloneNo)
const;
815struct IndexCall :
public PointerUnion<CallsiteInfo *, AllocInfo *> {
817 IndexCall(std::nullptr_t) : IndexCall() {}
822 IndexCall *operator->() {
return this; }
826 if (
auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(
Base)) {
829 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(
Base);
850class IndexCallsiteContextGraph
851 :
public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
854 IndexCallsiteContextGraph(
859 ~IndexCallsiteContextGraph() {
864 for (
auto &
I : FunctionCalleesToSynthesizedCallsiteInfos) {
866 for (
auto &Callsite :
I.second)
867 FS->addCallsite(*Callsite.second);
872 friend CallsiteContextGraph<IndexCallsiteContextGraph,
FunctionSummary,
877 bool calleeMatchesFunc(
880 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
881 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
882 bool findProfiledCalleeThroughTailCalls(
884 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
885 bool &FoundMultipleCalleeChains);
886 uint64_t getLastStackId(IndexCall &Call);
887 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
890 void updateCall(
CallInfo &CallerCall, FuncInfo CalleeFunc);
893 cloneFunctionForCallsite(FuncInfo &Func,
CallInfo &Call,
894 std::map<CallInfo, CallInfo> &CallMap,
895 std::vector<CallInfo> &CallsWithMetadataInFunc,
897 std::string getLabel(
const FunctionSummary *Func,
const IndexCall &Call,
898 unsigned CloneNo)
const;
902 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
913 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
914 FunctionCalleesToSynthesizedCallsiteInfos;
926 :
public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
929 :
public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
942 ((
uint8_t)AllocationType::NotCold | (
uint8_t)AllocationType::Cold))
943 return AllocationType::NotCold;
951template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
953 const std::vector<uint8_t> &InAllocTypes,
954 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
958 assert(InAllocTypes.size() == Edges.size());
960 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
962 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
966 if (l == (uint8_t)AllocationType::None ||
967 r->AllocTypes == (uint8_t)AllocationType::None)
969 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
978template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
979bool allocTypesMatchClone(
980 const std::vector<uint8_t> &InAllocTypes,
981 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
982 const ContextNode<DerivedCCG, FuncTy, CallTy> *
Node = Clone->CloneOf;
986 assert(InAllocTypes.size() ==
Node->CalleeEdges.size());
990 for (
const auto &E : Clone->CalleeEdges) {
992 EdgeCalleeMap[E->Callee] = E->AllocTypes;
996 for (
unsigned I = 0;
I <
Node->CalleeEdges.size();
I++) {
997 auto Iter = EdgeCalleeMap.
find(
Node->CalleeEdges[
I]->Callee);
999 if (Iter == EdgeCalleeMap.
end())
1004 if (InAllocTypes[
I] == (
uint8_t)AllocationType::None ||
1005 Iter->second == (
uint8_t)AllocationType::None)
1007 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[
I]))
1015template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1016typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1017CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1019 ContextNode *
Node = getNodeForAlloc(
C);
1023 return NonAllocationCallToContextNodeMap.lookup(
C);
1026template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1027typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1028CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1030 return AllocationCallToContextNodeMap.lookup(
C);
1033template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1034typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1035CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1037 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1038 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1039 return StackEntryNode->second;
1043template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1044void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1046 unsigned int ContextId) {
1047 for (
auto &Edge : CallerEdges) {
1048 if (Edge->Caller == Caller) {
1050 Edge->getContextIds().insert(ContextId);
1054 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
1056 CallerEdges.push_back(Edge);
1057 Caller->CalleeEdges.push_back(Edge);
1060template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1061void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1062 ContextEdge *Edge, EdgeIter *EI,
bool CalleeIter) {
1063 assert(!EI || (*EI)->get() == Edge);
1068 auto *
Callee = Edge->Callee;
1069 auto *
Caller = Edge->Caller;
1077 Callee->eraseCallerEdge(Edge);
1078 Caller->eraseCalleeEdge(Edge);
1079 }
else if (CalleeIter) {
1080 Callee->eraseCallerEdge(Edge);
1081 *EI =
Caller->CalleeEdges.erase(*EI);
1083 Caller->eraseCalleeEdge(Edge);
1084 *EI =
Callee->CallerEdges.erase(*EI);
1088template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1089void CallsiteContextGraph<
1090 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *
Node) {
1091 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
1093 if (Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1094 assert(Edge->ContextIds.empty());
1095 removeEdgeFromGraph(Edge.get(), &EI,
true);
1101template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1102void CallsiteContextGraph<
1103 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *
Node) {
1104 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
1106 if (Edge->AllocTypes == (
uint8_t)AllocationType::None) {
1107 assert(Edge->ContextIds.empty());
1108 Edge->Caller->eraseCalleeEdge(Edge.get());
1109 EI =
Node->CallerEdges.erase(EI);
1115template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1116typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1117CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1118 findEdgeFromCallee(
const ContextNode *Callee) {
1119 for (
const auto &Edge : CalleeEdges)
1120 if (Edge->Callee == Callee)
1125template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1126typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1127CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1128 findEdgeFromCaller(
const ContextNode *Caller) {
1129 for (
const auto &Edge : CallerEdges)
1130 if (Edge->Caller == Caller)
1135template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1136void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1137 eraseCalleeEdge(
const ContextEdge *Edge) {
1139 CalleeEdges, [Edge](
const std::shared_ptr<ContextEdge> &CalleeEdge) {
1140 return CalleeEdge.get() == Edge;
1142 assert(EI != CalleeEdges.end());
1143 CalleeEdges.erase(EI);
1146template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1147void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1148 eraseCallerEdge(
const ContextEdge *Edge) {
1150 CallerEdges, [Edge](
const std::shared_ptr<ContextEdge> &CallerEdge) {
1151 return CallerEdge.get() == Edge;
1153 assert(EI != CallerEdges.end());
1154 CallerEdges.erase(EI);
1157template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1158uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1161 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1163 for (
auto Id : ContextIds) {
1172template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1174CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1177 (
uint8_t)AllocationType::Cold | (
uint8_t)AllocationType::NotCold;
1179 for (
auto Id : Node1Ids) {
1180 if (!Node2Ids.
count(Id))
1190template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1191uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1193 if (Node1Ids.
size() < Node2Ids.
size())
1194 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1196 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1199template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1200typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1201CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1203 assert(!getNodeForAlloc(Call));
1204 ContextNode *AllocNode = createNewNode(
true,
F, Call);
1205 AllocationCallToContextNodeMap[
Call] = AllocNode;
1207 AllocNode->OrigStackOrAllocId = LastContextId;
1210 AllocNode->AllocTypes = (
uint8_t)AllocationType::None;
1219 if (AllocTypes & (
uint8_t)AllocationType::NotCold)
1221 if (AllocTypes & (
uint8_t)AllocationType::Cold)
1226template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1227template <
class NodeT,
class IteratorT>
1228void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1237 ContextIdToAllocationType[++LastContextId] =
AllocType;
1239 if (!ContextSizeInfo.
empty()) {
1240 auto &
Entry = ContextIdToContextSizeInfos[LastContextId];
1250 ContextNode *PrevNode = AllocNode;
1257 ContextIter != StackContext.
end(); ++ContextIter) {
1258 auto StackId = getStackId(*ContextIter);
1259 ContextNode *
StackNode = getNodeForStackId(StackId);
1262 StackEntryIdToContextNodeMap[StackId] =
StackNode;
1263 StackNode->OrigStackOrAllocId = StackId;
1278template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1280CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1284 for (
auto OldId : StackSequenceContextIds) {
1285 NewContextIds.
insert(++LastContextId);
1286 OldToNewContextIds[OldId].insert(LastContextId);
1287 assert(ContextIdToAllocationType.count(OldId));
1289 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1291 return NewContextIds;
1294template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1295void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1296 propagateDuplicateContextIds(
1301 for (
auto Id : ContextIds)
1302 if (
auto NewId = OldToNewContextIds.find(Id);
1303 NewId != OldToNewContextIds.end())
1304 NewIds.
insert(NewId->second.begin(), NewId->second.end());
1309 auto UpdateCallers = [&](ContextNode *
Node,
1311 auto &&UpdateCallers) ->
void {
1312 for (
const auto &Edge :
Node->CallerEdges) {
1313 auto Inserted = Visited.insert(Edge.get());
1316 ContextNode *NextNode = Edge->Caller;
1320 if (!NewIdsToAdd.
empty()) {
1321 Edge->getContextIds().insert(NewIdsToAdd.
begin(), NewIdsToAdd.
end());
1322 UpdateCallers(NextNode, Visited, UpdateCallers);
1328 for (
auto &Entry : AllocationCallToContextNodeMap) {
1330 UpdateCallers(
Node, Visited, UpdateCallers);
1334template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1335void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1336 ContextNode *NewNode, ContextNode *OrigNode,
bool TowardsCallee,
1341 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1343 for (
auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1349 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1350 NotFoundContextIds);
1351 RemainingContextIds.
swap(NotFoundContextIds);
1353 if (NewEdgeContextIds.
empty()) {
1357 if (TowardsCallee) {
1358 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1359 auto NewEdge = std::make_shared<ContextEdge>(
1360 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1361 NewNode->CalleeEdges.push_back(NewEdge);
1362 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1364 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1365 auto NewEdge = std::make_shared<ContextEdge>(
1366 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1367 NewNode->CallerEdges.push_back(NewEdge);
1368 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1371 if (Edge->getContextIds().empty()) {
1372 removeEdgeFromGraph(Edge.get(), &EI, TowardsCallee);
1379template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1381 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1385 assert(!Edge->ContextIds.empty());
1388template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1390 bool CheckEdges =
true) {
1391 if (
Node->isRemoved())
1395 auto NodeContextIds =
Node->getContextIds();
1399 if (
Node->CallerEdges.size()) {
1401 Node->CallerEdges.front()->ContextIds);
1404 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1405 set_union(CallerEdgeContextIds, Edge->ContextIds);
1412 NodeContextIds == CallerEdgeContextIds ||
1415 if (
Node->CalleeEdges.size()) {
1417 Node->CalleeEdges.front()->ContextIds);
1420 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1421 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1423 assert(NodeContextIds == CalleeEdgeContextIds);
1432 for (
const auto &E :
Node->CalleeEdges)
1438template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1439void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1440 assignStackNodesPostOrder(
1443 &StackIdToMatchingCalls,
1452 auto CallerEdges =
Node->CallerEdges;
1453 for (
auto &Edge : CallerEdges) {
1455 if (Edge->isRemoved()) {
1459 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
1460 CallToMatchingCall);
1469 if (
Node->IsAllocation ||
1470 !StackIdToMatchingCalls.count(
Node->OrigStackOrAllocId))
1473 auto &Calls = StackIdToMatchingCalls[
Node->OrigStackOrAllocId];
1477 if (Calls.size() == 1) {
1478 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[0];
1479 if (Ids.size() == 1) {
1480 assert(SavedContextIds.empty());
1483 if (
Node->Recursive)
1485 Node->setCall(Call);
1486 NonAllocationCallToContextNodeMap[
Call] =
Node;
1496 ContextNode *LastNode = getNodeForStackId(LastId);
1501 ContextNode *LastNode =
Node;
1508 bool PrevIterCreatedNode =
false;
1509 bool CreatedNode =
false;
1510 for (
unsigned I = 0;
I < Calls.size();
1511 I++, PrevIterCreatedNode = CreatedNode) {
1512 CreatedNode =
false;
1513 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1516 if (SavedContextIds.empty()) {
1521 if (!CallToMatchingCall.
contains(Call))
1523 auto MatchingCall = CallToMatchingCall[
Call];
1524 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1528 assert(
I > 0 && !PrevIterCreatedNode);
1531 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1536 assert(LastId == Ids.back());
1545 ContextNode *PrevNode = LastNode;
1549 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1551 ContextNode *CurNode = getNodeForStackId(Id);
1555 assert(!CurNode->Recursive);
1557 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1569 if (SavedContextIds.empty()) {
1578 ContextNode *NewNode = createNewNode(
false, Func, Call);
1579 NonAllocationCallToContextNodeMap[
Call] = NewNode;
1581 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1583 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1589 connectNewNode(NewNode, FirstNode,
true, SavedContextIds);
1594 connectNewNode(NewNode, LastNode,
false, SavedContextIds);
1599 for (
auto Id : Ids) {
1600 ContextNode *CurNode = getNodeForStackId(Id);
1607 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1609 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1610 if (PrevEdge->getContextIds().empty())
1611 removeEdgeFromGraph(PrevEdge);
1616 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1617 ? (
uint8_t)AllocationType::None
1618 : CurNode->computeAllocType();
1622 checkNode<DerivedCCG, FuncTy, CallTy>(NewNode,
true);
1623 for (
auto Id : Ids) {
1624 ContextNode *CurNode = getNodeForStackId(Id);
1627 checkNode<DerivedCCG, FuncTy, CallTy>(CurNode,
true);
1633template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1634void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1643 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1644 for (
auto &Call : CallsWithMetadata) {
1646 if (AllocationCallToContextNodeMap.count(Call))
1648 auto StackIdsWithContextNodes =
1649 getStackIdsWithContextNodesForCall(
Call.call());
1652 if (StackIdsWithContextNodes.empty())
1656 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1657 {
Call.call(), StackIdsWithContextNodes, Func, {}});
1672 for (
auto &It : StackIdToMatchingCalls) {
1673 auto &Calls = It.getSecond();
1675 if (Calls.size() == 1) {
1676 auto &Ids = Calls[0].StackIds;
1677 if (Ids.size() == 1)
1691 for (
const auto &[
Idx, CallCtxInfo] :
enumerate(Calls))
1692 FuncToIndex.
insert({CallCtxInfo.Func,
Idx});
1694 Calls.begin(), Calls.end(),
1695 [&FuncToIndex](
const CallContextInfo &
A,
const CallContextInfo &
B) {
1696 return A.StackIds.size() > B.StackIds.size() ||
1697 (A.StackIds.size() == B.StackIds.size() &&
1698 (A.StackIds < B.StackIds ||
1699 (A.StackIds == B.StackIds &&
1700 FuncToIndex[A.Func] < FuncToIndex[B.Func])));
1707 ContextNode *LastNode = getNodeForStackId(LastId);
1711 if (LastNode->Recursive)
1727 for (
unsigned I = 0;
I < Calls.size();
I++) {
1728 auto &[
Call, Ids,
Func, SavedContextIds] = Calls[
I];
1729 assert(SavedContextIds.empty());
1730 assert(LastId == Ids.back());
1735 if (
I > 0 && Ids != Calls[
I - 1].StackIds)
1736 MatchingIdsFuncSet.
clear();
1745 ContextNode *PrevNode = LastNode;
1746 ContextNode *CurNode = LastNode;
1751 for (
auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1753 CurNode = getNodeForStackId(Id);
1757 if (CurNode->Recursive) {
1762 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1780 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1783 if (StackSequenceContextIds.
empty()) {
1796 if (Ids.back() != getLastStackId(Call)) {
1797 for (
const auto &PE : LastNode->CallerEdges) {
1798 set_subtract(StackSequenceContextIds, PE->getContextIds());
1799 if (StackSequenceContextIds.
empty())
1803 if (StackSequenceContextIds.
empty())
1815 MatchingIdsFuncSet.
insert(Func);
1822 bool DuplicateContextIds =
false;
1823 for (
unsigned J =
I + 1; J < Calls.size(); J++) {
1824 auto &CallCtxInfo = Calls[J];
1825 auto &NextIds = CallCtxInfo.StackIds;
1828 auto *NextFunc = CallCtxInfo.Func;
1829 if (NextFunc != Func) {
1832 DuplicateContextIds =
true;
1835 auto &NextCall = CallCtxInfo.Call;
1836 CallToMatchingCall[NextCall] =
Call;
1847 OldToNewContextIds.
reserve(OldToNewContextIds.
size() +
1848 StackSequenceContextIds.
size());
1851 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1852 : StackSequenceContextIds;
1853 assert(!SavedContextIds.empty());
1855 if (!DuplicateContextIds) {
1859 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1860 if (LastNodeContextIds.
empty())
1867 propagateDuplicateContextIds(OldToNewContextIds);
1878 for (
auto &Entry : AllocationCallToContextNodeMap)
1879 assignStackNodesPostOrder(
Entry.second, Visited, StackIdToMatchingCalls,
1880 CallToMatchingCall);
1887 Call->getMetadata(LLVMContext::MD_callsite));
1888 return CallsiteContext.
back();
1891uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1892 assert(isa<CallsiteInfo *>(Call));
1894 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call));
1896 return Index.getStackIdAtIndex(CallsiteContext.
back());
1913std::string ModuleCallsiteContextGraph::getLabel(
const Function *Func,
1915 unsigned CloneNo)
const {
1916 return (
Twine(
Call->getFunction()->getName()) +
" -> " +
1917 cast<CallBase>(Call)->getCalledFunction()->getName())
1921std::string IndexCallsiteContextGraph::getLabel(
const FunctionSummary *Func,
1922 const IndexCall &Call,
1923 unsigned CloneNo)
const {
1924 auto VI = FSToVIMap.find(Func);
1925 assert(VI != FSToVIMap.end());
1926 if (isa<AllocInfo *>(Call))
1927 return (
VI->second.name() +
" -> alloc").str();
1929 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call);
1930 return (
VI->second.name() +
" -> " +
1932 Callsite->Clones[CloneNo]))
1937std::vector<uint64_t>
1938ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1941 Call->getMetadata(LLVMContext::MD_callsite));
1942 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1946std::vector<uint64_t>
1947IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1948 assert(isa<CallsiteInfo *>(Call));
1950 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call));
1956template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
1957template <
class NodeT,
class IteratorT>
1958std::vector<uint64_t>
1959CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1961 std::vector<uint64_t> StackIds;
1962 for (
auto IdOrIndex : CallsiteContext) {
1963 auto StackId = getStackId(IdOrIndex);
1964 ContextNode *
Node = getNodeForStackId(StackId);
1967 StackIds.push_back(StackId);
1972ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1975 :
Mod(
M), OREGetter(OREGetter) {
1977 std::vector<CallInfo> CallsWithMetadata;
1978 for (
auto &BB :
F) {
1979 for (
auto &
I : BB) {
1980 if (!isa<CallBase>(
I))
1982 if (
auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof)) {
1983 CallsWithMetadata.push_back(&
I);
1984 auto *AllocNode = addAllocNode(&
I, &
F);
1985 auto *CallsiteMD =
I.getMetadata(LLVMContext::MD_callsite);
1989 for (
auto &MDOp : MemProfMD->operands()) {
1990 auto *MIBMD = cast<const MDNode>(MDOp);
1991 std::vector<ContextTotalSize> ContextSizeInfo;
1993 if (MIBMD->getNumOperands() > 2) {
1994 for (
unsigned I = 2;
I < MIBMD->getNumOperands();
I++) {
1995 MDNode *ContextSizePair =
1996 dyn_cast<MDNode>(MIBMD->getOperand(
I));
1998 uint64_t FullStackId = mdconst::dyn_extract<ConstantInt>(
2001 uint64_t TotalSize = mdconst::dyn_extract<ConstantInt>(
2004 ContextSizeInfo.push_back({FullStackId, TotalSize});
2010 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2011 AllocNode, StackContext, CallsiteContext,
2014 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
2017 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
2018 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
2021 else if (
I.getMetadata(LLVMContext::MD_callsite)) {
2022 CallsWithMetadata.push_back(&
I);
2026 if (!CallsWithMetadata.empty())
2027 FuncToCallsWithMetadata[&
F] = CallsWithMetadata;
2031 dbgs() <<
"CCG before updating call stack chains:\n";
2036 exportToDot(
"prestackupdate");
2040 handleCallsitesWithMultipleTargets();
2043 for (
auto &FuncEntry : FuncToCallsWithMetadata)
2044 for (
auto &Call : FuncEntry.second)
2045 Call.call()->setMetadata(LLVMContext::MD_callsite,
nullptr);
2048IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2053 for (
auto &
I : Index) {
2055 for (
auto &S :
VI.getSummaryList()) {
2064 !isPrevailing(
VI.getGUID(), S.get()))
2066 auto *
FS = dyn_cast<FunctionSummary>(S.get());
2069 std::vector<CallInfo> CallsWithMetadata;
2070 if (!
FS->allocs().empty()) {
2071 for (
auto &AN :
FS->mutableAllocs()) {
2076 if (AN.MIBs.empty())
2078 IndexCall AllocCall(&AN);
2079 CallsWithMetadata.push_back(AllocCall);
2080 auto *AllocNode = addAllocNode(AllocCall, FS);
2089 AN.ContextSizeInfos.size() == AN.MIBs.size());
2091 for (
auto &MIB : AN.MIBs) {
2094 std::vector<ContextTotalSize> ContextSizeInfo;
2095 if (!AN.ContextSizeInfos.empty()) {
2096 for (
auto [FullStackId, TotalSize] : AN.ContextSizeInfos[
I])
2097 ContextSizeInfo.push_back({FullStackId, TotalSize});
2099 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2100 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2104 assert(AllocNode->AllocTypes != (
uint8_t)AllocationType::None);
2109 AN.Versions[0] = (
uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2113 if (!
FS->callsites().empty())
2114 for (
auto &SN :
FS->mutableCallsites()) {
2115 IndexCall StackNodeCall(&SN);
2116 CallsWithMetadata.push_back(StackNodeCall);
2119 if (!CallsWithMetadata.empty())
2120 FuncToCallsWithMetadata[
FS] = CallsWithMetadata;
2122 if (!
FS->allocs().empty() || !
FS->callsites().empty())
2128 dbgs() <<
"CCG before updating call stack chains:\n";
2133 exportToDot(
"prestackupdate");
2137 handleCallsitesWithMultipleTargets();
2140template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2141void CallsiteContextGraph<DerivedCCG, FuncTy,
2142 CallTy>::handleCallsitesWithMultipleTargets() {
2157 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2158 for (
auto &Entry : NonAllocationCallToContextNodeMap) {
2168 std::vector<CallInfo> AllCalls;
2169 AllCalls.reserve(
Node->MatchingCalls.size() + 1);
2170 AllCalls.push_back(
Node->Call);
2171 AllCalls.insert(AllCalls.end(),
Node->MatchingCalls.begin(),
2172 Node->MatchingCalls.end());
2185 if (partitionCallsByCallee(
Node, AllCalls, NewCallToNode))
2188 auto It = AllCalls.begin();
2190 for (; It != AllCalls.end(); ++It) {
2193 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();
2196 if (!Edge->Callee->hasCall())
2198 assert(NodeToCallingFunc.count(Edge->Callee));
2200 if (!calleesMatch(
ThisCall.call(), EI, TailCallToContextNodeMap)) {
2209 if (
Node->Call != ThisCall) {
2210 Node->setCall(ThisCall);
2221 Node->MatchingCalls.clear();
2224 if (It == AllCalls.end()) {
2225 RemovedEdgesWithMismatchedCallees++;
2234 for (++It; It != AllCalls.end(); ++It) {
2238 Node->MatchingCalls.push_back(ThisCall);
2247 NonAllocationCallToContextNodeMap.remove_if([](
const auto &it) {
2248 return !it.second->hasCall() || it.second->Call != it.first;
2252 for (
auto &[Call,
Node] : NewCallToNode)
2253 NonAllocationCallToContextNodeMap[
Call] =
Node;
2257 for (
auto &[Call,
Node] : TailCallToContextNodeMap)
2258 NonAllocationCallToContextNodeMap[
Call] =
Node;
2261template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2262bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2264 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2268 struct CallsWithSameCallee {
2269 std::vector<CallInfo> Calls;
2270 ContextNode *
Node =
nullptr;
2276 for (
auto ThisCall : AllCalls) {
2277 auto *
F = getCalleeFunc(
ThisCall.call());
2279 CalleeFuncToCallInfo[
F].Calls.push_back(ThisCall);
2288 for (
const auto &Edge :
Node->CalleeEdges) {
2289 if (!Edge->Callee->hasCall())
2291 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2292 if (CalleeFuncToCallInfo.
contains(ProfiledCalleeFunc))
2293 CalleeNodeToCallInfo[Edge->Callee] =
2294 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2300 if (CalleeNodeToCallInfo.
empty())
2312 ContextNode *UnmatchedCalleesNode =
nullptr;
2314 bool UsedOrigNode =
false;
2316 for (
auto EI =
Node->CalleeEdges.begin(); EI !=
Node->CalleeEdges.end();) {
2318 if (!Edge->Callee->hasCall()) {
2325 ContextNode *CallerNodeToUse =
nullptr;
2329 if (!CalleeNodeToCallInfo.
contains(Edge->Callee)) {
2330 if (!UnmatchedCalleesNode)
2331 UnmatchedCalleesNode =
2332 createNewNode(
false, NodeToCallingFunc[
Node]);
2333 CallerNodeToUse = UnmatchedCalleesNode;
2337 auto *
Info = CalleeNodeToCallInfo[Edge->Callee];
2340 if (!UsedOrigNode) {
2343 Node->MatchingCalls.clear();
2344 UsedOrigNode =
true;
2347 createNewNode(
false, NodeToCallingFunc[
Node]);
2351 Info->Node->setCall(
Info->Calls.front());
2352 Info->Node->MatchingCalls.insert(
Info->Node->MatchingCalls.end(),
2353 Info->Calls.begin() + 1,
2358 NewCallToNode.push_back({
Info->Node->Call,
Info->Node});
2360 CallerNodeToUse =
Info->Node;
2364 if (CallerNodeToUse ==
Node) {
2369 moveCalleeEdgeToNewCaller(EI, CallerNodeToUse);
2376 for (
auto &
I : CalleeNodeToCallInfo)
2377 removeNoneTypeCallerEdges(
I.second->Node);
2378 if (UnmatchedCalleesNode)
2379 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2380 removeNoneTypeCallerEdges(
Node);
2393 return Index.getStackIdAtIndex(IdOrIndex);
2396template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2397bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2398 CallTy Call, EdgeIter &EI,
2401 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2402 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
2405 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2406 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
2411 if (FoundCalleeChain.empty())
2414 auto AddEdge = [Edge, &EI](ContextNode *
Caller, ContextNode *
Callee) {
2415 auto *CurEdge =
Callee->findEdgeFromCaller(Caller);
2419 CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
2420 Edge->ContextIds.end());
2421 CurEdge->AllocTypes |= Edge->AllocTypes;
2426 auto NewEdge = std::make_shared<ContextEdge>(
2427 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
2428 Callee->CallerEdges.push_back(NewEdge);
2429 if (Caller == Edge->Caller) {
2433 EI =
Caller->CalleeEdges.insert(EI, NewEdge);
2436 "Iterator position not restored after insert and increment");
2438 Caller->CalleeEdges.push_back(NewEdge);
2443 auto *CurCalleeNode = Edge->Callee;
2444 for (
auto &[NewCall, Func] : FoundCalleeChain) {
2445 ContextNode *NewNode =
nullptr;
2447 if (TailCallToContextNodeMap.
count(NewCall)) {
2448 NewNode = TailCallToContextNodeMap[NewCall];
2449 NewNode->AllocTypes |= Edge->AllocTypes;
2451 FuncToCallsWithMetadata[
Func].push_back({NewCall});
2453 NewNode = createNewNode(
false, Func, NewCall);
2454 TailCallToContextNodeMap[NewCall] = NewNode;
2455 NewNode->AllocTypes = Edge->AllocTypes;
2459 AddEdge(NewNode, CurCalleeNode);
2461 CurCalleeNode = NewNode;
2465 AddEdge(Edge->Caller, CurCalleeNode);
2469 auto *
Caller = Edge->Caller;
2473 removeEdgeFromGraph(Edge.get(), &EI,
true);
2485bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2487 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2488 bool &FoundMultipleCalleeChains) {
2495 FoundCalleeChain.push_back({Callsite,
F});
2498 auto *CalleeFunc = dyn_cast<Function>(CurCallee);
2500 auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
2502 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
2510 bool FoundSingleCalleeChain =
false;
2511 for (
auto &BB : *CalleeFunc) {
2512 for (
auto &
I : BB) {
2513 auto *CB = dyn_cast<CallBase>(&
I);
2514 if (!CB || !CB->isTailCall())
2516 auto *CalledValue = CB->getCalledOperand();
2517 auto *CalledFunction = CB->getCalledFunction();
2518 if (CalledValue && !CalledFunction) {
2519 CalledValue = CalledValue->stripPointerCasts();
2521 CalledFunction = dyn_cast<Function>(CalledValue);
2525 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
2526 assert(!CalledFunction &&
2527 "Expected null called function in callsite for alias");
2528 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
2530 if (!CalledFunction)
2532 if (CalledFunction == ProfiledCallee) {
2533 if (FoundSingleCalleeChain) {
2534 FoundMultipleCalleeChains =
true;
2537 FoundSingleCalleeChain =
true;
2538 FoundProfiledCalleeCount++;
2539 FoundProfiledCalleeDepth +=
Depth;
2540 if (
Depth > FoundProfiledCalleeMaxDepth)
2541 FoundProfiledCalleeMaxDepth =
Depth;
2542 SaveCallsiteInfo(&
I, CalleeFunc);
2543 }
else if (findProfiledCalleeThroughTailCalls(
2544 ProfiledCallee, CalledFunction,
Depth + 1,
2545 FoundCalleeChain, FoundMultipleCalleeChains)) {
2548 assert(!FoundMultipleCalleeChains);
2549 if (FoundSingleCalleeChain) {
2550 FoundMultipleCalleeChains =
true;
2553 FoundSingleCalleeChain =
true;
2554 SaveCallsiteInfo(&
I, CalleeFunc);
2555 }
else if (FoundMultipleCalleeChains)
2560 return FoundSingleCalleeChain;
2564 auto *CB = dyn_cast<CallBase>(Call);
2565 if (!CB->getCalledOperand() || CB->isIndirectCall())
2567 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2568 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2570 return dyn_cast<Function>(Alias->getAliasee());
2571 return dyn_cast<Function>(CalleeVal);
2574bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2576 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2577 auto *CB = dyn_cast<CallBase>(Call);
2578 if (!CB->getCalledOperand() || CB->isIndirectCall())
2580 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2581 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
2582 if (CalleeFunc == Func)
2584 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2585 if (Alias && Alias->getAliasee() == Func)
2596 bool FoundMultipleCalleeChains =
false;
2597 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal,
Depth,
2599 FoundMultipleCalleeChains)) {
2600 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: "
2601 <<
Func->getName() <<
" from " << CallerFunc->
getName()
2602 <<
" that actually called " << CalleeVal->getName()
2603 << (FoundMultipleCalleeChains
2604 ?
" (found multiple possible chains)"
2607 if (FoundMultipleCalleeChains)
2608 FoundProfiledCalleeNonUniquelyCount++;
2615bool ModuleCallsiteContextGraph::sameCallee(
Instruction *Call1,
2617 auto *CB1 = cast<CallBase>(Call1);
2618 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2620 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2621 auto *CalleeFunc1 = dyn_cast<Function>(CalleeVal1);
2622 auto *CB2 = cast<CallBase>(Call2);
2623 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2625 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2626 auto *CalleeFunc2 = dyn_cast<Function>(CalleeVal2);
2627 return CalleeFunc1 == CalleeFunc2;
2630bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2632 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2633 bool &FoundMultipleCalleeChains) {
2642 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2643 !FunctionCalleesToSynthesizedCallsiteInfos[
FS].count(Callee))
2646 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
2649 FunctionCalleesToSynthesizedCallsiteInfos[
FS][
Callee].get();
2650 FoundCalleeChain.push_back({NewCallsiteInfo,
FS});
2657 bool FoundSingleCalleeChain =
false;
2660 !isPrevailing(CurCallee.
getGUID(), S.get()))
2662 auto *
FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2665 auto FSVI = CurCallee;
2666 auto *AS = dyn_cast<AliasSummary>(S.get());
2668 FSVI = AS->getAliaseeVI();
2669 for (
auto &CallEdge :
FS->calls()) {
2670 if (!CallEdge.second.hasTailCall())
2672 if (CallEdge.first == ProfiledCallee) {
2673 if (FoundSingleCalleeChain) {
2674 FoundMultipleCalleeChains =
true;
2677 FoundSingleCalleeChain =
true;
2678 FoundProfiledCalleeCount++;
2679 FoundProfiledCalleeDepth +=
Depth;
2680 if (
Depth > FoundProfiledCalleeMaxDepth)
2681 FoundProfiledCalleeMaxDepth =
Depth;
2682 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2684 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2685 FSToVIMap[
FS] = FSVI;
2686 }
else if (findProfiledCalleeThroughTailCalls(
2687 ProfiledCallee, CallEdge.first,
Depth + 1,
2688 FoundCalleeChain, FoundMultipleCalleeChains)) {
2691 assert(!FoundMultipleCalleeChains);
2692 if (FoundSingleCalleeChain) {
2693 FoundMultipleCalleeChains =
true;
2696 FoundSingleCalleeChain =
true;
2697 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2699 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2700 FSToVIMap[
FS] = FSVI;
2701 }
else if (FoundMultipleCalleeChains)
2706 return FoundSingleCalleeChain;
2710IndexCallsiteContextGraph::getCalleeFunc(IndexCall &Call) {
2712 if (
Callee.getSummaryList().empty())
2714 return dyn_cast<FunctionSummary>(
Callee.getSummaryList()[0]->getBaseObject());
2717bool IndexCallsiteContextGraph::calleeMatchesFunc(
2720 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2725 Callee.getSummaryList().empty()
2727 : dyn_cast<AliasSummary>(
Callee.getSummaryList()[0].get());
2728 assert(FSToVIMap.count(Func));
2729 auto FuncVI = FSToVIMap[
Func];
2730 if (Callee == FuncVI ||
2745 bool FoundMultipleCalleeChains =
false;
2746 if (!findProfiledCalleeThroughTailCalls(
2747 FuncVI, Callee,
Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2748 LLVM_DEBUG(
dbgs() <<
"Not found through unique tail call chain: " << FuncVI
2749 <<
" from " << FSToVIMap[CallerFunc]
2750 <<
" that actually called " << Callee
2751 << (FoundMultipleCalleeChains
2752 ?
" (found multiple possible chains)"
2755 if (FoundMultipleCalleeChains)
2756 FoundProfiledCalleeNonUniquelyCount++;
2763bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2764 ValueInfo Callee1 = dyn_cast_if_present<CallsiteInfo *>(Call1)->Callee;
2765 ValueInfo Callee2 = dyn_cast_if_present<CallsiteInfo *>(Call2)->Callee;
2766 return Callee1 == Callee2;
2769template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2770void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2776template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2777void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2779 OS <<
"Node " <<
this <<
"\n";
2783 OS <<
" (recursive)";
2785 if (!MatchingCalls.
empty()) {
2786 OS <<
"\tMatchingCalls:\n";
2787 for (
auto &MatchingCall : MatchingCalls) {
2789 MatchingCall.print(
OS);
2794 OS <<
"\tContextIds:";
2796 auto ContextIds = getContextIds();
2797 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2798 std::sort(SortedIds.begin(), SortedIds.end());
2799 for (
auto Id : SortedIds)
2802 OS <<
"\tCalleeEdges:\n";
2803 for (
auto &Edge : CalleeEdges)
2804 OS <<
"\t\t" << *Edge <<
"\n";
2805 OS <<
"\tCallerEdges:\n";
2806 for (
auto &Edge : CallerEdges)
2807 OS <<
"\t\t" << *Edge <<
"\n";
2808 if (!Clones.empty()) {
2811 for (
auto *Clone : Clones)
2814 }
else if (CloneOf) {
2815 OS <<
"\tClone of " << CloneOf <<
"\n";
2819template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2820void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
2826template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2827void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
2831 OS <<
" ContextIds:";
2832 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2833 std::sort(SortedIds.begin(), SortedIds.end());
2834 for (
auto Id : SortedIds)
2838template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2839void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump()
const {
2843template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2844void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
2846 OS <<
"Callsite Context Graph:\n";
2847 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2848 for (
const auto Node : nodes<GraphType>(
this)) {
2849 if (
Node->isRemoved())
2856template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2857void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
2859 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2860 for (
const auto Node : nodes<GraphType>(
this)) {
2861 if (
Node->isRemoved())
2863 if (!
Node->IsAllocation)
2866 auto AllocTypeFromCall = getAllocationCallType(
Node->Call);
2867 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
2868 std::sort(SortedIds.begin(), SortedIds.end());
2869 for (
auto Id : SortedIds) {
2870 auto TypeI = ContextIdToAllocationType.find(Id);
2871 assert(TypeI != ContextIdToAllocationType.end());
2872 auto CSI = ContextIdToContextSizeInfos.find(Id);
2873 if (CSI != ContextIdToContextSizeInfos.end()) {
2874 for (
auto &Info : CSI->second) {
2875 OS <<
"MemProf hinting: "
2877 <<
" full allocation context " <<
Info.FullStackId
2878 <<
" with total size " <<
Info.TotalSize <<
" is "
2880 if (allocTypeToUse(
Node->AllocTypes) != AllocTypeFromCall)
2882 <<
" due to cold byte percent";
2890template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2891void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check()
const {
2892 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2893 for (
const auto Node : nodes<GraphType>(
this)) {
2894 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
2895 for (
auto &Edge :
Node->CallerEdges)
2896 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2900template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2902 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2903 using NodeRef =
const ContextNode<DerivedCCG, FuncTy, CallTy> *;
2905 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
2921 return G->NodeOwner.begin()->get();
2924 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
2925 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
2933 decltype(&GetCallee)>;
2944template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
2949 using GraphType =
const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2955 std::string LabelString =
2956 (
Twine(
"OrigId: ") + (Node->IsAllocation ?
"Alloc" :
"") +
2957 Twine(Node->OrigStackOrAllocId))
2959 LabelString +=
"\n";
2960 if (Node->hasCall()) {
2961 auto Func =
G->NodeToCallingFunc.find(Node);
2962 assert(Func !=
G->NodeToCallingFunc.end());
2964 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
2966 LabelString +=
"null call";
2967 if (Node->Recursive)
2968 LabelString +=
" (recursive)";
2970 LabelString +=
" (external)";
2976 std::string AttributeString = (
Twine(
"tooltip=\"") + getNodeId(Node) +
" " +
2977 getContextIds(Node->getContextIds()) +
"\"")
2980 (
Twine(
",fillcolor=\"") + getColor(Node->AllocTypes) +
"\"").str();
2981 AttributeString +=
",style=\"filled\"";
2982 if (Node->CloneOf) {
2983 AttributeString +=
",color=\"blue\"";
2984 AttributeString +=
",style=\"filled,bold,dashed\"";
2986 AttributeString +=
",style=\"filled\"";
2987 return AttributeString;
2992 auto &Edge = *(ChildIter.getCurrent());
2993 return (
Twine(
"tooltip=\"") + getContextIds(Edge->ContextIds) +
"\"" +
2994 Twine(
",fillcolor=\"") + getColor(Edge->AllocTypes) +
"\"")
3001 return Node->isRemoved();
3006 std::string IdString =
"ContextIds:";
3007 if (ContextIds.
size() < 100) {
3008 std::vector<uint32_t> SortedIds(ContextIds.
begin(), ContextIds.
end());
3009 std::sort(SortedIds.begin(), SortedIds.end());
3010 for (
auto Id : SortedIds)
3011 IdString += (
" " +
Twine(Id)).str();
3013 IdString += (
" (" +
Twine(ContextIds.
size()) +
" ids)").str();
3018 static std::string getColor(
uint8_t AllocTypes) {
3027 return "mediumorchid1";
3031 static std::string getNodeId(NodeRef
Node) {
3032 std::stringstream SStream;
3033 SStream << std::hex <<
"N0x" << (
unsigned long long)
Node;
3034 std::string
Result = SStream.str();
3039template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3040void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3041 std::string Label)
const {
3046template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3047typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3048CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3049 const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI,
3051 ContextNode *
Node = Edge->Callee;
3053 ContextNode *Clone =
3054 createNewNode(
Node->IsAllocation, NodeToCallingFunc[
Node],
Node->Call);
3055 Node->addClone(Clone);
3056 Clone->MatchingCalls =
Node->MatchingCalls;
3057 moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI,
true,
3062template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3063void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3064 moveEdgeToExistingCalleeClone(
const std::shared_ptr<ContextEdge> &Edge,
3065 ContextNode *NewCallee, EdgeIter *CallerEdgeI,
3070 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
3072 ContextNode *OldCallee = Edge->Callee;
3076 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
3080 if (ContextIdsToMove.
empty())
3081 ContextIdsToMove = Edge->getContextIds();
3085 if (Edge->getContextIds().size() == ContextIdsToMove.
size()) {
3088 NewCallee->AllocTypes |= Edge->AllocTypes;
3090 if (ExistingEdgeToNewCallee) {
3093 ExistingEdgeToNewCallee->getContextIds().
insert(ContextIdsToMove.
begin(),
3094 ContextIdsToMove.
end());
3095 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
3096 assert(Edge->ContextIds == ContextIdsToMove);
3097 removeEdgeFromGraph(Edge.get(), CallerEdgeI,
false);
3100 Edge->Callee = NewCallee;
3101 NewCallee->CallerEdges.push_back(Edge);
3104 *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
3106 OldCallee->eraseCallerEdge(Edge.get());
3115 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3116 if (ExistingEdgeToNewCallee) {
3119 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.
begin(),
3120 ContextIdsToMove.
end());
3121 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3124 auto NewEdge = std::make_shared<ContextEdge>(
3125 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3126 Edge->Caller->CalleeEdges.push_back(NewEdge);
3127 NewCallee->CallerEdges.push_back(NewEdge);
3131 NewCallee->AllocTypes |= CallerEdgeAllocType;
3133 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
3138 for (
auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3143 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3144 OldCalleeEdge->AllocTypes =
3145 computeAllocType(OldCalleeEdge->getContextIds());
3152 if (
auto *NewCalleeEdge =
3153 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
3154 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
3155 EdgeContextIdsToMove.
end());
3156 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3160 auto NewEdge = std::make_shared<ContextEdge>(
3161 OldCalleeEdge->Callee, NewCallee,
3162 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3163 NewCallee->CalleeEdges.push_back(NewEdge);
3164 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3168 OldCallee->AllocTypes = OldCallee->computeAllocType();
3171 OldCallee->emptyContextIds());
3173 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee,
false);
3174 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee,
false);
3175 for (
const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3176 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
3178 for (
const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3179 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
3184template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3185void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3186 moveCalleeEdgeToNewCaller(EdgeIter &CalleeEdgeI, ContextNode *NewCaller) {
3187 auto Edge = *CalleeEdgeI;
3189 ContextNode *OldCaller = Edge->Caller;
3193 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(Edge->Callee);
3195 CalleeEdgeI = OldCaller->CalleeEdges.erase(CalleeEdgeI);
3196 if (ExistingEdgeToNewCaller) {
3199 ExistingEdgeToNewCaller->getContextIds().insert(
3200 Edge->getContextIds().begin(), Edge->getContextIds().end());
3201 ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes;
3202 Edge->ContextIds.clear();
3204 Edge->Callee->eraseCallerEdge(Edge.get());
3207 Edge->Caller = NewCaller;
3208 NewCaller->CalleeEdges.push_back(Edge);
3213 NewCaller->AllocTypes |= Edge->AllocTypes;
3220 bool IsNewNode = NewCaller->CallerEdges.empty();
3222 for (
auto &OldCallerEdge : OldCaller->CallerEdges) {
3227 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3228 OldCallerEdge->AllocTypes =
3229 computeAllocType(OldCallerEdge->getContextIds());
3234 auto *ExistingCallerEdge =
3235 NewCaller->findEdgeFromCaller(OldCallerEdge->Caller);
3236 assert(IsNewNode || ExistingCallerEdge);
3237 if (ExistingCallerEdge) {
3238 ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.
begin(),
3239 EdgeContextIdsToMove.
end());
3240 ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3243 auto NewEdge = std::make_shared<ContextEdge>(
3244 NewCaller, OldCallerEdge->Caller,
3245 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3246 NewCaller->CallerEdges.push_back(NewEdge);
3247 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3251 OldCaller->AllocTypes = OldCaller->computeAllocType();
3254 OldCaller->emptyContextIds());
3256 checkNode<DerivedCCG, FuncTy, CallTy>(OldCaller,
false);
3257 checkNode<DerivedCCG, FuncTy, CallTy>(NewCaller,
false);
3258 for (
const auto &OldCallerEdge : OldCaller->CallerEdges)
3259 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallerEdge->Caller,
3261 for (
const auto &NewCallerEdge : NewCaller->CallerEdges)
3262 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallerEdge->Caller,
3267template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3268void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3269 recursivelyRemoveNoneTypeCalleeEdges(
3275 removeNoneTypeCalleeEdges(
Node);
3277 for (
auto *Clone :
Node->Clones)
3278 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3282 auto CallerEdges =
Node->CallerEdges;
3283 for (
auto &Edge : CallerEdges) {
3285 if (Edge->isRemoved()) {
3289 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
3293template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3294void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3296 for (
auto &Entry : AllocationCallToContextNodeMap) {
3298 identifyClones(
Entry.second, Visited,
Entry.second->getContextIds());
3301 for (
auto &Entry : AllocationCallToContextNodeMap)
3302 recursivelyRemoveNoneTypeCalleeEdges(
Entry.second, Visited);
3315template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3316void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3320 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3328 if (!
Node->hasCall())
3343 auto CallerEdges =
Node->CallerEdges;
3344 for (
auto &Edge : CallerEdges) {
3346 if (Edge->isRemoved()) {
3351 if (!Visited.
count(Edge->Caller) && !Edge->Caller->CloneOf) {
3352 identifyClones(Edge->Caller, Visited, AllocContextIds);
3375 const unsigned AllocTypeCloningPriority[] = { 3, 4,
3378 std::stable_sort(
Node->CallerEdges.begin(),
Node->CallerEdges.end(),
3379 [&](
const std::shared_ptr<ContextEdge> &
A,
3380 const std::shared_ptr<ContextEdge> &
B) {
3383 if (A->ContextIds.empty())
3389 if (B->ContextIds.empty())
3392 if (A->AllocTypes == B->AllocTypes)
3395 return *A->ContextIds.begin() < *B->ContextIds.begin();
3396 return AllocTypeCloningPriority[A->AllocTypes] <
3397 AllocTypeCloningPriority[B->AllocTypes];
3407 for (
auto &CE :
Node->CallerEdges) {
3410 AllCallerContextIds.
reserve(
CE->getContextIds().size());
3411 for (
auto Id :
CE->getContextIds())
3412 if (!AllCallerContextIds.
insert(Id).second)
3413 RecursiveContextIds.
insert(Id);
3422 for (
auto EI =
Node->CallerEdges.begin(); EI !=
Node->CallerEdges.end();) {
3423 auto CallerEdge = *EI;
3432 if (!CallerEdge->Caller->hasCall()) {
3439 auto CallerEdgeContextsForAlloc =
3441 if (!RecursiveContextIds.
empty())
3442 CallerEdgeContextsForAlloc =
3444 if (CallerEdgeContextsForAlloc.empty()) {
3448 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3452 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3453 CalleeEdgeAllocTypesForCallerEdge.reserve(
Node->CalleeEdges.size());
3454 for (
auto &CalleeEdge :
Node->CalleeEdges)
3455 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3456 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3471 if (allocTypeToUse(CallerAllocTypeForAlloc) ==
3472 allocTypeToUse(
Node->AllocTypes) &&
3473 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3474 CalleeEdgeAllocTypesForCallerEdge,
Node->CalleeEdges)) {
3481 ContextNode *Clone =
nullptr;
3482 for (
auto *CurClone :
Node->Clones) {
3483 if (allocTypeToUse(CurClone->AllocTypes) !=
3484 allocTypeToUse(CallerAllocTypeForAlloc))
3491 assert(!BothSingleAlloc ||
3492 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3498 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3499 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3507 moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI,
false,
3508 CallerEdgeContextsForAlloc);
3511 moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc);
3526 checkNode<DerivedCCG, FuncTy, CallTy>(
Node,
false);
3529void ModuleCallsiteContextGraph::updateAllocationCall(
3533 "memprof", AllocTypeString);
3534 cast<CallBase>(
Call.call())->addFnAttr(
A);
3535 OREGetter(
Call.call()->getFunction())
3537 <<
ore::NV(
"AllocationCall",
Call.call()) <<
" in clone "
3539 <<
" marked with memprof allocation attribute "
3540 <<
ore::NV(
"Attribute", AllocTypeString));
3543void IndexCallsiteContextGraph::updateAllocationCall(
CallInfo &Call,
3545 auto *AI = cast<AllocInfo *>(
Call.call());
3547 assert(AI->Versions.size() >
Call.cloneNo());
3552ModuleCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3553 const auto *CB = cast<CallBase>(
Call.call());
3554 if (!CB->getAttributes().hasFnAttr(
"memprof"))
3556 return CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
3562IndexCallsiteContextGraph::getAllocationCallType(
const CallInfo &Call)
const {
3563 const auto *AI = cast<AllocInfo *>(
Call.call());
3564 assert(AI->Versions.size() >
Call.cloneNo());
3568void ModuleCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
3569 FuncInfo CalleeFunc) {
3570 if (CalleeFunc.cloneNo() > 0)
3571 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
3572 OREGetter(CallerCall.call()->getFunction())
3574 <<
ore::NV(
"Call", CallerCall.call()) <<
" in clone "
3575 <<
ore::NV(
"Caller", CallerCall.call()->getFunction())
3576 <<
" assigned to call function clone "
3577 <<
ore::NV(
"Callee", CalleeFunc.func()));
3580void IndexCallsiteContextGraph::updateCall(
CallInfo &CallerCall,
3581 FuncInfo CalleeFunc) {
3582 auto *CI = cast<CallsiteInfo *>(CallerCall.call());
3584 "Caller cannot be an allocation which should not have profiled calls");
3585 assert(CI->Clones.size() > CallerCall.cloneNo());
3586 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
3589CallsiteContextGraph<ModuleCallsiteContextGraph,
Function,
3591ModuleCallsiteContextGraph::cloneFunctionForCallsite(
3592 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3593 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
3599 NewFunc->setName(
Name);
3600 for (
auto &Inst : CallsWithMetadataInFunc) {
3602 assert(Inst.cloneNo() == 0);
3603 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
3605 OREGetter(
Func.func())
3607 <<
"created clone " <<
ore::NV(
"NewFunction", NewFunc));
3608 return {NewFunc, CloneNo};
3612 IndexCall>::FuncInfo
3613IndexCallsiteContextGraph::cloneFunctionForCallsite(
3614 FuncInfo &Func,
CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3615 std::vector<CallInfo> &CallsWithMetadataInFunc,
unsigned CloneNo) {
3621 (isa<AllocInfo *>(
Call.call())
3630 for (
auto &Inst : CallsWithMetadataInFunc) {
3632 assert(Inst.cloneNo() == 0);
3633 if (
auto *AI = dyn_cast<AllocInfo *>(Inst.call())) {
3634 assert(AI->Versions.size() == CloneNo);
3637 AI->Versions.push_back(0);
3639 auto *CI = cast<CallsiteInfo *>(Inst.call());
3640 assert(CI && CI->Clones.size() == CloneNo);
3643 CI->Clones.push_back(0);
3645 CallMap[Inst] = {Inst.call(), CloneNo};
3647 return {
Func.func(), CloneNo};
3681template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
3682bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3683 bool Changed =
false;
3691 auto RecordCalleeFuncOfCallsite = [&](ContextNode *
Caller,
3692 const FuncInfo &CalleeFunc) {
3694 CallsiteToCalleeFuncCloneMap[
Caller] = CalleeFunc;
3699 for (
auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
3700 FuncInfo OrigFunc(Func);
3704 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
3705 for (
auto &Call : CallsWithMetadata) {
3706 ContextNode *
Node = getNodeForInst(Call);
3713 "Not having a call should have prevented cloning");
3717 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
3721 auto AssignCallsiteCloneToFuncClone = [&](
const FuncInfo &FuncClone,
3723 ContextNode *CallsiteClone,
3726 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
3728 assert(FuncClonesToCallMap.count(FuncClone));
3729 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
3731 if (CallMap.count(Call))
3732 CallClone = CallMap[
Call];
3733 CallsiteClone->setCall(CallClone);
3735 for (
auto &MatchingCall :
Node->MatchingCalls) {
3737 if (CallMap.count(MatchingCall))
3738 CallClone = CallMap[MatchingCall];
3740 MatchingCall = CallClone;
3747 std::deque<ContextNode *> ClonesWorklist;
3749 if (!
Node->emptyContextIds())
3750 ClonesWorklist.push_back(
Node);
3751 ClonesWorklist.insert(ClonesWorklist.end(),
Node->Clones.begin(),
3752 Node->Clones.end());
3757 unsigned NodeCloneCount = 0;
3758 while (!ClonesWorklist.empty()) {
3759 ContextNode *Clone = ClonesWorklist.front();
3760 ClonesWorklist.pop_front();
3763 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3769 if (FuncClonesToCallMap.size() < NodeCloneCount) {
3771 if (NodeCloneCount == 1) {
3776 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3777 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3781 FuncClonesToCallMap[OrigFunc] = {};
3782 AssignCallsiteCloneToFuncClone(
3783 OrigFunc, Call, Clone,
3784 AllocationCallToContextNodeMap.count(Call));
3785 for (
auto &CE : Clone->CallerEdges) {
3787 if (!
CE->Caller->hasCall())
3789 RecordCalleeFuncOfCallsite(
CE->Caller, OrigFunc);
3799 FuncInfo PreviousAssignedFuncClone;
3801 Clone->CallerEdges, [&](
const std::shared_ptr<ContextEdge> &E) {
3802 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3804 bool CallerAssignedToCloneOfFunc =
false;
3805 if (EI != Clone->CallerEdges.end()) {
3806 const std::shared_ptr<ContextEdge> &Edge = *EI;
3807 PreviousAssignedFuncClone =
3808 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3809 CallerAssignedToCloneOfFunc =
true;
3814 std::map<CallInfo, CallInfo> NewCallMap;
3815 unsigned CloneNo = FuncClonesToCallMap.size();
3816 assert(CloneNo > 0 &&
"Clone 0 is the original function, which "
3817 "should already exist in the map");
3818 FuncInfo NewFuncClone = cloneFunctionForCallsite(
3819 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
3820 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
3821 FunctionClonesAnalysis++;
3827 if (!CallerAssignedToCloneOfFunc) {
3828 AssignCallsiteCloneToFuncClone(
3829 NewFuncClone, Call, Clone,
3830 AllocationCallToContextNodeMap.count(Call));
3831 for (
auto &CE : Clone->CallerEdges) {
3833 if (!
CE->Caller->hasCall())
3835 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3847 auto CallerEdges = Clone->CallerEdges;
3848 for (
auto CE : CallerEdges) {
3850 if (
CE->isRemoved()) {
3856 if (!
CE->Caller->hasCall())
3859 if (!CallsiteToCalleeFuncCloneMap.
count(
CE->Caller) ||
3863 CallsiteToCalleeFuncCloneMap[
CE->Caller] !=
3864 PreviousAssignedFuncClone)
3867 RecordCalleeFuncOfCallsite(
CE->Caller, NewFuncClone);
3880 auto CalleeEdges =
CE->Caller->CalleeEdges;
3881 for (
auto CalleeEdge : CalleeEdges) {
3884 if (CalleeEdge->isRemoved()) {
3889 ContextNode *
Callee = CalleeEdge->Callee;
3893 if (Callee == Clone || !
Callee->hasCall())
3895 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
3896 removeNoneTypeCalleeEdges(NewClone);
3899 removeNoneTypeCalleeEdges(Callee);
3904 if (CallsiteToCalleeFuncCloneMap.
count(Callee))
3905 RecordCalleeFuncOfCallsite(
3906 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
3915 OrigCall.setCloneNo(0);
3916 std::map<CallInfo, CallInfo> &CallMap =
3917 FuncClonesToCallMap[NewFuncClone];
3918 assert(CallMap.count(OrigCall));
3919 CallInfo NewCall(CallMap[OrigCall]);
3921 NewClone->setCall(NewCall);
3923 for (
auto &MatchingCall : NewClone->MatchingCalls) {
3924 CallInfo OrigMatchingCall(MatchingCall);
3925 OrigMatchingCall.setCloneNo(0);
3926 assert(CallMap.count(OrigMatchingCall));
3927 CallInfo NewCall(CallMap[OrigMatchingCall]);
3930 MatchingCall = NewCall;
3953 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
3954 FuncInfo FuncCloneAssignedToCurCallsiteClone;
3957 for (
auto EI = Clone->CallerEdges.begin();
3958 EI != Clone->CallerEdges.end();) {
3961 if (!Edge->Caller->hasCall()) {
3967 if (CallsiteToCalleeFuncCloneMap.
count(Edge->Caller)) {
3968 FuncInfo FuncCloneCalledByCaller =
3969 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3979 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
3980 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
3988 (FuncCloneAssignedToCurCallsiteClone &&
3989 FuncCloneAssignedToCurCallsiteClone !=
3990 FuncCloneCalledByCaller)) {
4005 if (FuncCloneToNewCallsiteCloneMap.count(
4006 FuncCloneCalledByCaller)) {
4007 ContextNode *NewClone =
4008 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
4009 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
4011 removeNoneTypeCalleeEdges(NewClone);
4014 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
4015 removeNoneTypeCalleeEdges(NewClone);
4016 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
4019 ClonesWorklist.push_back(NewClone);
4020 assert(EI == Clone->CallerEdges.end() ||
4026 removeNoneTypeCalleeEdges(Clone);
4035 if (!FuncCloneAssignedToCurCallsiteClone) {
4036 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4038 AssignCallsiteCloneToFuncClone(
4039 FuncCloneCalledByCaller, Call, Clone,
4040 AllocationCallToContextNodeMap.count(Call));
4044 assert(FuncCloneAssignedToCurCallsiteClone ==
4045 FuncCloneCalledByCaller);
4054 if (!FuncCloneAssignedToCurCallsiteClone) {
4059 for (
auto &CF : FuncClonesToCallMap) {
4060 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
4061 FuncCloneAssignedToCurCallsiteClone = CF.first;
4065 assert(FuncCloneAssignedToCurCallsiteClone);
4067 AssignCallsiteCloneToFuncClone(
4068 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4069 AllocationCallToContextNodeMap.count(Call));
4071 assert(FuncCloneToCurNodeCloneMap
4072 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4074 RecordCalleeFuncOfCallsite(Edge->Caller,
4075 FuncCloneAssignedToCurCallsiteClone);
4082 checkNode<DerivedCCG, FuncTy, CallTy>(
Node);
4083 for (
const auto &PE :
Node->CalleeEdges)
4084 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4085 for (
const auto &CE :
Node->CallerEdges)
4086 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4087 for (
auto *Clone :
Node->Clones) {
4088 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
4089 for (
const auto &PE : Clone->CalleeEdges)
4090 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4091 for (
const auto &CE : Clone->CallerEdges)
4092 checkNode<DerivedCCG, FuncTy, CallTy>(
CE->Caller);
4101 auto UpdateCalls = [&](ContextNode *
Node,
4103 auto &&UpdateCalls) {
4108 for (
auto *Clone :
Node->Clones)
4109 UpdateCalls(Clone, Visited, UpdateCalls);
4111 for (
auto &Edge :
Node->CallerEdges)
4112 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
4116 if (!
Node->hasCall() ||
Node->emptyContextIds())
4119 if (
Node->IsAllocation) {
4120 auto AT = allocTypeToUse(
Node->AllocTypes);
4126 !ContextIdToContextSizeInfos.empty()) {
4129 for (
auto Id :
Node->getContextIds()) {
4130 auto TypeI = ContextIdToAllocationType.find(Id);
4131 assert(TypeI != ContextIdToAllocationType.end());
4132 auto CSI = ContextIdToContextSizeInfos.find(Id);
4133 if (CSI != ContextIdToContextSizeInfos.end()) {
4134 for (
auto &Info : CSI->second) {
4137 TotalCold +=
Info.TotalSize;
4144 updateAllocationCall(
Node->Call, AT);
4149 if (!CallsiteToCalleeFuncCloneMap.
count(
Node))
4152 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[
Node];
4153 updateCall(
Node->Call, CalleeFunc);
4155 for (
auto &Call :
Node->MatchingCalls)
4156 updateCall(Call, CalleeFunc);
4165 for (
auto &Entry : AllocationCallToContextNodeMap)
4166 UpdateCalls(
Entry.second, Visited, UpdateCalls);
4180 FunctionsClonedThinBackend++;
4181 for (
unsigned I = 1;
I < NumClones;
I++) {
4182 VMaps.
emplace_back(std::make_unique<ValueToValueMapTy>());
4184 FunctionClonesThinBackend++;
4187 for (
auto &BB : *NewF) {
4188 for (
auto &Inst : BB) {
4189 Inst.setMetadata(LLVMContext::MD_memprof,
nullptr);
4190 Inst.setMetadata(LLVMContext::MD_callsite,
nullptr);
4194 auto *PrevF = M.getFunction(
Name);
4198 assert(PrevF->isDeclaration());
4199 NewF->takeName(PrevF);
4200 PrevF->replaceAllUsesWith(NewF);
4201 PrevF->eraseFromParent();
4203 NewF->setName(
Name);
4205 <<
"created clone " <<
ore::NV(
"NewFunction", NewF));
4208 if (!FuncToAliasMap.count(&
F))
4210 for (
auto *
A : FuncToAliasMap[&
F]) {
4212 auto *PrevA = M.getNamedAlias(
Name);
4214 A->getType()->getPointerAddressSpace(),
4215 A->getLinkage(),
Name, NewF);
4216 NewA->copyAttributesFrom(
A);
4220 assert(PrevA->isDeclaration());
4221 NewA->takeName(PrevA);
4222 PrevA->replaceAllUsesWith(NewA);
4223 PrevA->eraseFromParent();
4265bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
4267 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
4268 Symtab = std::make_unique<InstrProfSymtab>();
4279 if (
Error E = Symtab->create(M,
true,
false)) {
4280 std::string SymtabFailure =
toString(std::move(
E));
4281 M.getContext().emitError(
"Failed to create symtab: " + SymtabFailure);
4287bool MemProfContextDisambiguation::applyImport(
Module &M) {
4289 bool Changed =
false;
4294 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
4296 for (
auto &
A :
M.aliases()) {
4297 auto *Aliasee =
A.getAliaseeObject();
4298 if (
auto *
F = dyn_cast<Function>(Aliasee))
4299 FuncToAliasMap[
F].insert(&
A);
4302 if (!initializeIndirectCallPromotionInfo(M))
4312 bool ClonesCreated =
false;
4313 unsigned NumClonesCreated = 0;
4314 auto CloneFuncIfNeeded = [&](
unsigned NumClones) {
4324 if (ClonesCreated) {
4325 assert(NumClonesCreated == NumClones);
4332 ClonesCreated =
true;
4333 NumClonesCreated = NumClones;
4339 CloneFuncIfNeeded(
StackNode.Clones.size());
4346 auto CalleeOrigName = CalledFunction->getName();
4347 for (
unsigned J = 0; J <
StackNode.Clones.size(); J++) {
4352 auto NewF =
M.getOrInsertFunction(
4354 CalledFunction->getFunctionType());
4360 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4363 <<
ore::NV(
"Call", CBClone) <<
" in clone "
4365 <<
" assigned to call function clone "
4366 <<
ore::NV(
"Callee", NewF.getCallee()));
4384 auto SrcModuleMD =
F.getMetadata(
"thinlto_src_module");
4386 "enable-import-metadata is needed to emit thinlto_src_module");
4388 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
4390 if (GVS->modulePath() == SrcModule) {
4391 GVSummary = GVS.get();
4395 assert(GVSummary && GVSummary->modulePath() == SrcModule);
4400 if (isa<AliasSummary>(GVSummary))
4403 auto *
FS = cast<FunctionSummary>(GVSummary->getBaseObject());
4405 if (
FS->allocs().empty() &&
FS->callsites().empty())
4408 auto SI =
FS->callsites().begin();
4409 auto AI =
FS->allocs().begin();
4417 for (
auto CallsiteIt =
FS->callsites().rbegin();
4418 CallsiteIt !=
FS->callsites().rend(); CallsiteIt++) {
4419 auto &Callsite = *CallsiteIt;
4423 if (!Callsite.StackIdIndices.empty())
4425 MapTailCallCalleeVIToCallsite.
insert({Callsite.Callee, Callsite});
4434 for (
auto &BB :
F) {
4435 for (
auto &
I : BB) {
4436 auto *CB = dyn_cast<CallBase>(&
I);
4441 auto *CalledValue = CB->getCalledOperand();
4442 auto *CalledFunction = CB->getCalledFunction();
4443 if (CalledValue && !CalledFunction) {
4444 CalledValue = CalledValue->stripPointerCasts();
4446 CalledFunction = dyn_cast<Function>(CalledValue);
4450 if (
auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
4451 assert(!CalledFunction &&
4452 "Expected null called function in callsite for alias");
4453 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
4457 I.getMetadata(LLVMContext::MD_callsite));
4458 auto *MemProfMD =
I.getMetadata(LLVMContext::MD_memprof);
4462 if (CB->getAttributes().hasFnAttr(
"memprof")) {
4464 CB->getAttributes().getFnAttr(
"memprof").getValueAsString() ==
"cold"
4465 ? AllocTypeColdThinBackend++
4466 : AllocTypeNotColdThinBackend++;
4467 OrigAllocsThinBackend++;
4468 AllocVersionsThinBackend++;
4469 if (!MaxAllocVersionsThinBackend)
4470 MaxAllocVersionsThinBackend = 1;
4477 auto &AllocNode = *(AI++);
4481 auto MIBIter = AllocNode.MIBs.begin();
4482 for (
auto &MDOp : MemProfMD->operands()) {
4483 assert(MIBIter != AllocNode.MIBs.end());
4485 MIBIter->StackIdIndices.begin();
4486 auto *MIBMD = cast<const MDNode>(MDOp);
4490 auto ContextIterBegin =
4494 (ContextIterBegin != StackContext.
end() &&
4495 *ContextIterBegin == 0)
4498 for (
auto ContextIter = ContextIterBegin;
4499 ContextIter != StackContext.
end(); ++ContextIter) {
4503 if (LastStackContextId == *ContextIter)
4505 LastStackContextId = *ContextIter;
4506 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
4515 CloneFuncIfNeeded(AllocNode.Versions.size());
4517 OrigAllocsThinBackend++;
4518 AllocVersionsThinBackend += AllocNode.Versions.size();
4519 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
4520 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
4530 if (AllocNode.Versions.size() == 1 &&
4536 UnclonableAllocsThinBackend++;
4542 return Type == ((uint8_t)AllocationType::NotCold |
4543 (uint8_t)AllocationType::Cold);
4547 for (
unsigned J = 0; J < AllocNode.Versions.size(); J++) {
4553 : AllocTypeNotColdThinBackend++;
4565 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4568 <<
ore::NV(
"AllocationCall", CBClone) <<
" in clone "
4570 <<
" marked with memprof allocation attribute "
4571 <<
ore::NV(
"Attribute", AllocTypeString));
4573 }
else if (!CallsiteContext.empty()) {
4574 if (!CalledFunction) {
4577 auto *CI = dyn_cast<CallInst>(CB);
4578 assert(!CI || !CI->isInlineAsm());
4581 assert(CalledValue && !isa<Constant>(CalledValue));
4588 recordICPInfo(CB,
FS->callsites(), SI, ICallAnalysisInfo);
4594 CloneFuncIfNeeded(NumClones);
4599 assert(SI !=
FS->callsites().end());
4605 auto StackIdIndexIter =
StackNode.StackIdIndices.begin();
4606 for (
auto StackId : CallsiteContext) {
4614 CloneCallsite(
StackNode, CB, CalledFunction);
4616 }
else if (CB->isTailCall() && CalledFunction) {
4621 if (CalleeVI && MapTailCallCalleeVIToCallsite.
count(CalleeVI)) {
4622 auto Callsite = MapTailCallCalleeVIToCallsite.
find(CalleeVI);
4623 assert(Callsite != MapTailCallCalleeVIToCallsite.
end());
4624 CloneCallsite(Callsite->second, CB, CalledFunction);
4631 performICP(M,
FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
4641 for (
auto &BB :
F) {
4642 for (
auto &
I : BB) {
4643 if (!isa<CallBase>(
I))
4645 I.setMetadata(LLVMContext::MD_memprof,
nullptr);
4646 I.setMetadata(LLVMContext::MD_callsite,
nullptr);
4654unsigned MemProfContextDisambiguation::recordICPInfo(
4661 auto CandidateProfileData =
4662 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
4664 if (CandidateProfileData.empty())
4670 bool ICPNeeded =
false;
4671 unsigned NumClones = 0;
4672 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.
begin(), SI);
4673 for (
const auto &Candidate : CandidateProfileData) {
4675 auto CalleeValueInfo =
4680 assert(!CalleeValueInfo ||
SI->Callee == CalleeValueInfo);
4687 [](
unsigned CloneNo) { return CloneNo != 0; });
4697 ICallAnalysisInfo.
push_back({CB, CandidateProfileData.vec(), NumCandidates,
4698 TotalCount, CallsiteInfoStartIndex});
4702void MemProfContextDisambiguation::performICP(
4704 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
4713 for (
auto &
Info : ICallAnalysisInfo) {
4715 auto CallsiteIndex =
Info.CallsiteInfoStartIndex;
4716 auto TotalCount =
Info.TotalCount;
4717 unsigned NumPromoted = 0;
4718 unsigned NumClones = 0;
4720 for (
auto &Candidate :
Info.CandidateProfileData) {
4721 auto &
StackNode = AllCallsites[CallsiteIndex++];
4731 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
4732 if (TargetFunction ==
nullptr ||
4741 <<
"Memprof cannot promote indirect call: target with md5sum "
4742 <<
ore::NV(
"target md5sum", Candidate.Value) <<
" not found";
4751 const char *Reason =
nullptr;
4755 <<
"Memprof cannot promote indirect call to "
4756 <<
ore::NV(
"TargetFunction", TargetFunction)
4757 <<
" with count of " <<
ore::NV(
"TotalCount", TotalCount)
4769 for (
unsigned J = 0; J < NumClones; J++) {
4772 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4778 TotalCount, isSamplePGO, &ORE);
4779 auto *TargetToUse = TargetFunction;
4784 cast<Function>(
M.getOrInsertFunction(
4792 <<
ore::NV(
"Call", CBClone) <<
" in clone "
4794 <<
" promoted and assigned to call function clone "
4795 <<
ore::NV(
"Callee", TargetToUse));
4799 TotalCount -= Candidate.Count;
4805 for (
unsigned J = 0; J < NumClones; J++) {
4808 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4810 CBClone->
setMetadata(LLVMContext::MD_prof,
nullptr);
4813 if (TotalCount != 0)
4816 TotalCount, IPVK_IndirectCallTarget,
Info.NumCandidates);
4821template <
typename DerivedCCG,
typename FuncTy,
typename CallTy>
4822bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
4824 dbgs() <<
"CCG before cloning:\n";
4828 exportToDot(
"postbuild");
4841 dbgs() <<
"CCG after cloning:\n";
4845 exportToDot(
"cloned");
4847 bool Changed = assignFunctions();
4850 dbgs() <<
"CCG after assigning function clones:\n";
4854 exportToDot(
"clonefuncassign");
4857 printTotalSizes(
errs());
4862bool MemProfContextDisambiguation::processModule(
4869 return applyImport(M);
4882 ModuleCallsiteContextGraph CCG(M, OREGetter);
4883 return CCG.process();
4888 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
4889 if (ImportSummary) {
4899 auto ReadSummaryFile =
4901 if (!ReadSummaryFile) {
4908 if (!ImportSummaryForTestingOrErr) {
4914 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
4915 ImportSummary = ImportSummaryForTesting.get();
4924 if (!processModule(M, OREGetter))
4941 IndexCallsiteContextGraph CCG(
Index, isPrevailing);
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
#define LLVM_ATTRIBUTE_UNUSED
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Module.h This file contains the declarations for the Module class.
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static bool isMemProfClone(const Function &F)
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary)
cl::opt< unsigned > MinClonedColdBytePercent
static std::string getAllocTypeString(uint8_t AllocTypes)
cl::opt< bool > MemProfReportHintedSizes
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(false), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
static const std::string MemProfCloneSuffix
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
void print(OutputBuffer &OB) const
DomTreeNode::const_iterator end() const
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Implements a dense probed hash-table based set.
Lightweight error class with error context and mandatory checking.
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Function and variable summary information to aid decisions and implementation of importing.
static bool isLocalLinkage(LinkageTypes Linkage)
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
std::string getGlobalIdentifier() const
Return the modified name for this global value suitable to be used as the key for a global lookup (e....
@ InternalLinkage
Rename collisions when linking (static functions).
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const Function * getFunction() const
Return the function this instruction belongs to.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const MDOperand & getOperand(unsigned I) const
unsigned getNumOperands() const
Return number of MDNode operands.
This class implements a map that also provides access to all stored values in a deterministic order.
size_type count(const KeyT &Key) const
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
GlobalValue::GUID getGUIDFromOriginalID(GlobalValue::GUID OriginalID) const
Return the GUID for OriginalId in the OidGuidMap.
A Module instance is used to store all the information related to an LLVM module.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
void swap(DenseSetImpl &RHS)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
const char * toString(DWARFSectionKind Kind)
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
Implement std::hash so that hash_code can be used in STL containers.
DOTGraphTraits(bool IsSimple=false)
typename GTraits::NodeRef NodeRef
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType)
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
typename GTraits::ChildIteratorType ChildIteratorType
static bool isNodeHidden(NodeRef Node, GraphType)
static std::string getNodeLabel(NodeRef Node, GraphType G)
static std::string getNodeAttributes(NodeRef Node, GraphType)
static NodeRef getNode(const NodePtrTy &P)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
static ChildIteratorType child_end(NodeRef N)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
const CallsiteContextGraph< DerivedCCG, FuncTy, CallTy > * GraphType
const ContextNode< DerivedCCG, FuncTy, CallTy > * NodeRef
static ChildIteratorType child_begin(NodeRef N)
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
static NodeRef getEntryNode(GraphType G)
static nodes_iterator nodes_begin(GraphType G)
static nodes_iterator nodes_end(GraphType G)
Summary of memprof metadata on allocations.
SmallVector< uint8_t > Versions
Summary of memprof callsite metadata.
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
static SimpleType getSimplifiedValue(IndexCall &Val)
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...