LLVM 20.0.0git
MemProfContextDisambiguation.cpp
Go to the documentation of this file.
1//==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements support for context disambiguation of allocation
10// calls for profile guided heap optimization. Specifically, it uses Memprof
11// profiles which indicate context specific allocation behavior (currently
12// distinguishing cold vs hot memory allocations). Cloning is performed to
13// expose the cold allocation call contexts, and the allocation calls are
14// subsequently annotated with an attribute for later transformation.
15//
16// The transformations can be performed either directly on IR (regular LTO), or
17// on a ThinLTO index (and later applied to the IR during the ThinLTO backend).
18// Both types of LTO operate on a the same base graph representation, which
19// uses CRTP to support either IR or Index formats.
20//
21//===----------------------------------------------------------------------===//
22
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/MapVector.h"
29#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
37#include "llvm/IR/Module.h"
39#include "llvm/Pass.h"
43#include "llvm/Transforms/IPO.h"
47#include <deque>
48#include <sstream>
49#include <unordered_map>
50#include <vector>
51using namespace llvm;
52using namespace llvm::memprof;
53
54#define DEBUG_TYPE "memprof-context-disambiguation"
55
56STATISTIC(FunctionClonesAnalysis,
57 "Number of function clones created during whole program analysis");
58STATISTIC(FunctionClonesThinBackend,
59 "Number of function clones created during ThinLTO backend");
60STATISTIC(FunctionsClonedThinBackend,
61 "Number of functions that had clones created during ThinLTO backend");
62STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
63 "cloned) during whole program analysis");
64STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
65 "during whole program analysis");
66STATISTIC(AllocTypeNotColdThinBackend,
67 "Number of not cold static allocations (possibly cloned) during "
68 "ThinLTO backend");
69STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations "
70 "(possibly cloned) during ThinLTO backend");
71STATISTIC(OrigAllocsThinBackend,
72 "Number of original (not cloned) allocations with memprof profiles "
73 "during ThinLTO backend");
75 AllocVersionsThinBackend,
76 "Number of allocation versions (including clones) during ThinLTO backend");
77STATISTIC(MaxAllocVersionsThinBackend,
78 "Maximum number of allocation versions created for an original "
79 "allocation during ThinLTO backend");
80STATISTIC(UnclonableAllocsThinBackend,
81 "Number of unclonable ambigous allocations during ThinLTO backend");
82STATISTIC(RemovedEdgesWithMismatchedCallees,
83 "Number of edges removed due to mismatched callees (profiled vs IR)");
84STATISTIC(FoundProfiledCalleeCount,
85 "Number of profiled callees found via tail calls");
86STATISTIC(FoundProfiledCalleeDepth,
87 "Aggregate depth of profiled callees found via tail calls");
88STATISTIC(FoundProfiledCalleeMaxDepth,
89 "Maximum depth of profiled callees found via tail calls");
90STATISTIC(FoundProfiledCalleeNonUniquelyCount,
91 "Number of profiled callees found via multiple tail call chains");
92
94 "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
95 cl::value_desc("filename"),
96 cl::desc("Specify the path prefix of the MemProf dot files."));
97
98static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false),
100 cl::desc("Export graph to dot files."));
101
102static cl::opt<bool>
103 DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden,
104 cl::desc("Dump CallingContextGraph to stdout after each stage."));
105
106static cl::opt<bool>
107 VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden,
108 cl::desc("Perform verification checks on CallingContextGraph."));
109
110static cl::opt<bool>
111 VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden,
112 cl::desc("Perform frequent verification checks on nodes."));
113
115 "memprof-import-summary",
116 cl::desc("Import summary to use for testing the ThinLTO backend via opt"),
117 cl::Hidden);
118
120 TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5),
122 cl::desc("Max depth to recursively search for missing "
123 "frames through tail calls."));
124
125// Optionally enable cloning of callsites involved with recursive cycles
127 "memprof-allow-recursive-callsites", cl::init(false), cl::Hidden,
128 cl::desc("Allow cloning of callsites involved in recursive cycles"));
129
130// When disabled, try to detect and prevent cloning of recursive contexts.
131// This is only necessary until we support cloning through recursive cycles.
132// Leave on by default for now, as disabling requires a little bit of compile
133// time overhead and doesn't affect correctness, it will just inflate the cold
134// hinted bytes reporting a bit when -memprof-report-hinted-sizes is enabled.
136 "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden,
137 cl::desc("Allow cloning of contexts through recursive cycles"));
138
139namespace llvm {
141 "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
142 cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
143
144// Indicate we are linking with an allocator that supports hot/cold operator
145// new interfaces.
147 "supports-hot-cold-new", cl::init(false), cl::Hidden,
148 cl::desc("Linking with hot/cold operator new interfaces"));
149
151 "memprof-require-definition-for-promotion", cl::init(false), cl::Hidden,
152 cl::desc(
153 "Require target function definition when promoting indirect calls"));
154} // namespace llvm
155
158
159namespace {
160/// CRTP base for graphs built from either IR or ThinLTO summary index.
161///
162/// The graph represents the call contexts in all memprof metadata on allocation
163/// calls, with nodes for the allocations themselves, as well as for the calls
164/// in each context. The graph is initially built from the allocation memprof
165/// metadata (or summary) MIBs. It is then updated to match calls with callsite
166/// metadata onto the nodes, updating it to reflect any inlining performed on
167/// those calls.
168///
169/// Each MIB (representing an allocation's call context with allocation
170/// behavior) is assigned a unique context id during the graph build. The edges
171/// and nodes in the graph are decorated with the context ids they carry. This
172/// is used to correctly update the graph when cloning is performed so that we
173/// can uniquify the context for a single (possibly cloned) allocation.
174template <typename DerivedCCG, typename FuncTy, typename CallTy>
175class CallsiteContextGraph {
176public:
177 CallsiteContextGraph() = default;
178 CallsiteContextGraph(const CallsiteContextGraph &) = default;
179 CallsiteContextGraph(CallsiteContextGraph &&) = default;
180
181 /// Main entry point to perform analysis and transformations on graph.
182 bool process();
183
184 /// Perform cloning on the graph necessary to uniquely identify the allocation
185 /// behavior of an allocation based on its context.
186 void identifyClones();
187
188 /// Assign callsite clones to functions, cloning functions as needed to
189 /// accommodate the combinations of their callsite clones reached by callers.
190 /// For regular LTO this clones functions and callsites in the IR, but for
191 /// ThinLTO the cloning decisions are noted in the summaries and later applied
192 /// in applyImport.
193 bool assignFunctions();
194
195 void dump() const;
196 void print(raw_ostream &OS) const;
197 void printTotalSizes(raw_ostream &OS) const;
198
200 const CallsiteContextGraph &CCG) {
201 CCG.print(OS);
202 return OS;
203 }
204
205 friend struct GraphTraits<
206 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
207 friend struct DOTGraphTraits<
208 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
209
210 void exportToDot(std::string Label) const;
211
212 /// Represents a function clone via FuncTy pointer and clone number pair.
213 struct FuncInfo final
214 : public std::pair<FuncTy *, unsigned /*Clone number*/> {
215 using Base = std::pair<FuncTy *, unsigned>;
216 FuncInfo(const Base &B) : Base(B) {}
217 FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {}
218 explicit operator bool() const { return this->first != nullptr; }
219 FuncTy *func() const { return this->first; }
220 unsigned cloneNo() const { return this->second; }
221 };
222
223 /// Represents a callsite clone via CallTy and clone number pair.
224 struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> {
225 using Base = std::pair<CallTy, unsigned>;
226 CallInfo(const Base &B) : Base(B) {}
227 CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0)
228 : Base(Call, CloneNo) {}
229 explicit operator bool() const { return (bool)this->first; }
230 CallTy call() const { return this->first; }
231 unsigned cloneNo() const { return this->second; }
232 void setCloneNo(unsigned N) { this->second = N; }
233 void print(raw_ostream &OS) const {
234 if (!operator bool()) {
235 assert(!cloneNo());
236 OS << "null Call";
237 return;
238 }
239 call()->print(OS);
240 OS << "\t(clone " << cloneNo() << ")";
241 }
242 void dump() const {
243 print(dbgs());
244 dbgs() << "\n";
245 }
246 friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) {
247 Call.print(OS);
248 return OS;
249 }
250 };
251
252 struct ContextEdge;
253
254 /// Node in the Callsite Context Graph
255 struct ContextNode {
256 // Keep this for now since in the IR case where we have an Instruction* it
257 // is not as immediately discoverable. Used for printing richer information
258 // when dumping graph.
259 bool IsAllocation;
260
261 // Keeps track of when the Call was reset to null because there was
262 // recursion.
263 bool Recursive = false;
264
265 // This will be formed by ORing together the AllocationType enum values
266 // for contexts including this node.
267 uint8_t AllocTypes = 0;
268
269 // The corresponding allocation or interior call. This is the primary call
270 // for which we have created this node.
272
273 // List of other calls that can be treated the same as the primary call
274 // through cloning. I.e. located in the same function and have the same
275 // (possibly pruned) stack ids. They will be updated the same way as the
276 // primary call when assigning to function clones.
277 SmallVector<CallInfo, 0> MatchingCalls;
278
279 // For alloc nodes this is a unique id assigned when constructed, and for
280 // callsite stack nodes it is the original stack id when the node is
281 // constructed from the memprof MIB metadata on the alloc nodes. Note that
282 // this is only used when matching callsite metadata onto the stack nodes
283 // created when processing the allocation memprof MIBs, and for labeling
284 // nodes in the dot graph. Therefore we don't bother to assign a value for
285 // clones.
286 uint64_t OrigStackOrAllocId = 0;
287
288 // Edges to all callees in the profiled call stacks.
289 // TODO: Should this be a map (from Callee node) for more efficient lookup?
290 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
291
292 // Edges to all callers in the profiled call stacks.
293 // TODO: Should this be a map (from Caller node) for more efficient lookup?
294 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
295
296 // Get the list of edges from which we can compute allocation information
297 // such as the context ids and allocation type of this node.
298 const std::vector<std::shared_ptr<ContextEdge>> *
299 getEdgesWithAllocInfo() const {
300 // If node has any callees, compute from those, otherwise compute from
301 // callers (i.e. if this is the leaf allocation node).
302 if (!CalleeEdges.empty())
303 return &CalleeEdges;
304 if (!CallerEdges.empty()) {
305 // A node with caller edges but no callee edges must be the allocation
306 // node.
307 assert(IsAllocation);
308 return &CallerEdges;
309 }
310 return nullptr;
311 }
312
313 // Compute the context ids for this node from the union of its edge context
314 // ids.
315 DenseSet<uint32_t> getContextIds() const {
316 DenseSet<uint32_t> ContextIds;
317 auto *Edges = getEdgesWithAllocInfo();
318 if (!Edges)
319 return {};
320 unsigned Count = 0;
321 for (auto &Edge : *Edges)
322 Count += Edge->getContextIds().size();
323 ContextIds.reserve(Count);
324 for (auto &Edge : *Edges)
325 ContextIds.insert(Edge->getContextIds().begin(),
326 Edge->getContextIds().end());
327 return ContextIds;
328 }
329
330 // Compute the allocation type for this node from the OR of its edge
331 // allocation types.
332 uint8_t computeAllocType() const {
333 auto *Edges = getEdgesWithAllocInfo();
334 if (!Edges)
335 return (uint8_t)AllocationType::None;
336 uint8_t BothTypes =
337 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
338 uint8_t AllocType = (uint8_t)AllocationType::None;
339 for (auto &Edge : *Edges) {
340 AllocType |= Edge->AllocTypes;
341 // Bail early if alloc type reached both, no further refinement.
342 if (AllocType == BothTypes)
343 return AllocType;
344 }
345 return AllocType;
346 }
347
348 // The context ids set for this node is empty if its edge context ids are
349 // also all empty.
350 bool emptyContextIds() const {
351 auto *Edges = getEdgesWithAllocInfo();
352 if (!Edges)
353 return true;
354 for (auto &Edge : *Edges) {
355 if (!Edge->getContextIds().empty())
356 return false;
357 }
358 return true;
359 }
360
361 // List of clones of this ContextNode, initially empty.
362 std::vector<ContextNode *> Clones;
363
364 // If a clone, points to the original uncloned node.
365 ContextNode *CloneOf = nullptr;
366
367 ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {}
368
369 ContextNode(bool IsAllocation, CallInfo C)
370 : IsAllocation(IsAllocation), Call(C) {}
371
372 void addClone(ContextNode *Clone) {
373 if (CloneOf) {
374 CloneOf->Clones.push_back(Clone);
375 Clone->CloneOf = CloneOf;
376 } else {
377 Clones.push_back(Clone);
378 assert(!Clone->CloneOf);
379 Clone->CloneOf = this;
380 }
381 }
382
383 ContextNode *getOrigNode() {
384 if (!CloneOf)
385 return this;
386 return CloneOf;
387 }
388
389 void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
390 unsigned int ContextId);
391
392 ContextEdge *findEdgeFromCallee(const ContextNode *Callee);
393 ContextEdge *findEdgeFromCaller(const ContextNode *Caller);
394 void eraseCalleeEdge(const ContextEdge *Edge);
395 void eraseCallerEdge(const ContextEdge *Edge);
396
397 void setCall(CallInfo C) { Call = C; }
398
399 bool hasCall() const { return (bool)Call.call(); }
400
401 void printCall(raw_ostream &OS) const { Call.print(OS); }
402
403 // True if this node was effectively removed from the graph, in which case
404 // it should have an allocation type of None and empty context ids.
405 bool isRemoved() const {
406 assert((AllocTypes == (uint8_t)AllocationType::None) ==
407 emptyContextIds());
408 return AllocTypes == (uint8_t)AllocationType::None;
409 }
410
411 void dump() const;
412 void print(raw_ostream &OS) const;
413
414 friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) {
415 Node.print(OS);
416 return OS;
417 }
418 };
419
420 /// Edge in the Callsite Context Graph from a ContextNode N to a caller or
421 /// callee.
422 struct ContextEdge {
423 ContextNode *Callee;
424 ContextNode *Caller;
425
426 // This will be formed by ORing together the AllocationType enum values
427 // for contexts including this edge.
428 uint8_t AllocTypes = 0;
429
430 // The set of IDs for contexts including this edge.
431 DenseSet<uint32_t> ContextIds;
432
433 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType,
434 DenseSet<uint32_t> ContextIds)
435 : Callee(Callee), Caller(Caller), AllocTypes(AllocType),
436 ContextIds(std::move(ContextIds)) {}
437
438 DenseSet<uint32_t> &getContextIds() { return ContextIds; }
439
440 // Helper to clear the fields of this edge when we are removing it from the
441 // graph.
442 inline void clear() {
443 ContextIds.clear();
444 AllocTypes = (uint8_t)AllocationType::None;
445 Caller = nullptr;
446 Callee = nullptr;
447 }
448
449 // Check if edge was removed from the graph. This is useful while iterating
450 // over a copy of edge lists when performing operations that mutate the
451 // graph in ways that might remove one of the edges.
452 inline bool isRemoved() const {
453 if (Callee || Caller)
454 return false;
455 // Any edges that have been removed from the graph but are still in a
456 // shared_ptr somewhere should have all fields null'ed out by clear()
457 // above.
458 assert(AllocTypes == (uint8_t)AllocationType::None);
459 assert(ContextIds.empty());
460 return true;
461 }
462
463 void dump() const;
464 void print(raw_ostream &OS) const;
465
466 friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) {
467 Edge.print(OS);
468 return OS;
469 }
470 };
471
472 /// Helpers to remove edges that have allocation type None (due to not
473 /// carrying any context ids) after transformations.
474 void removeNoneTypeCalleeEdges(ContextNode *Node);
475 void removeNoneTypeCallerEdges(ContextNode *Node);
476 void
477 recursivelyRemoveNoneTypeCalleeEdges(ContextNode *Node,
479
480protected:
481 /// Get a list of nodes corresponding to the stack ids in the given callsite
482 /// context.
483 template <class NodeT, class IteratorT>
484 std::vector<uint64_t>
485 getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext);
486
487 /// Adds nodes for the given allocation and any stack ids on its memprof MIB
488 /// metadata (or summary).
489 ContextNode *addAllocNode(CallInfo Call, const FuncTy *F);
490
491 /// Adds nodes for the given MIB stack ids.
492 template <class NodeT, class IteratorT>
493 void addStackNodesForMIB(ContextNode *AllocNode,
494 CallStack<NodeT, IteratorT> &StackContext,
495 CallStack<NodeT, IteratorT> &CallsiteContext,
497 ArrayRef<ContextTotalSize> ContextSizeInfo);
498
499 /// Matches all callsite metadata (or summary) to the nodes created for
500 /// allocation memprof MIB metadata, synthesizing new nodes to reflect any
501 /// inlining performed on those callsite instructions.
502 void updateStackNodes();
503
504 /// Update graph to conservatively handle any callsite stack nodes that target
505 /// multiple different callee target functions.
506 void handleCallsitesWithMultipleTargets();
507
508 // Try to partition calls on the given node (already placed into the AllCalls
509 // array) by callee function, creating new copies of Node as needed to hold
510 // calls with different callees, and moving the callee edges appropriately.
511 // Returns true if partitioning was successful.
512 bool partitionCallsByCallee(
513 ContextNode *Node, ArrayRef<CallInfo> AllCalls,
514 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode);
515
516 /// Save lists of calls with MemProf metadata in each function, for faster
517 /// iteration.
518 MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;
519
520 /// Map from callsite node to the enclosing caller function.
521 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
522
523private:
524 using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
525
526 // Structure to keep track of information for each call as we are matching
527 // non-allocation callsites onto context nodes created from the allocation
528 // call metadata / summary contexts.
529 struct CallContextInfo {
530 // The callsite we're trying to match.
531 CallTy Call;
532 // The callsites stack ids that have a context node in the graph.
533 std::vector<uint64_t> StackIds;
534 // The function containing this callsite.
535 const FuncTy *Func;
536 // Initially empty, if needed this will be updated to contain the context
537 // ids for use in a new context node created for this callsite.
538 DenseSet<uint32_t> ContextIds;
539 };
540
541 /// Helper to remove edge from graph, updating edge iterator if it is provided
542 /// (in which case CalleeIter indicates which edge list is being iterated).
543 /// This will also perform the necessary clearing of the ContextEdge members
544 /// to enable later checking if the edge has been removed (since we may have
545 /// other copies of the shared_ptr in existence, and in fact rely on this to
546 /// enable removal while iterating over a copy of a node's edge list).
547 void removeEdgeFromGraph(ContextEdge *Edge, EdgeIter *EI = nullptr,
548 bool CalleeIter = true);
549
550 /// Assigns the given Node to calls at or inlined into the location with
551 /// the Node's stack id, after post order traversing and processing its
552 /// caller nodes. Uses the call information recorded in the given
553 /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences
554 /// as needed. Called by updateStackNodes which sets up the given
555 /// StackIdToMatchingCalls map.
556 void assignStackNodesPostOrder(
557 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
558 DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls,
559 DenseMap<CallInfo, CallInfo> &CallToMatchingCall);
560
561 /// Duplicates the given set of context ids, updating the provided
562 /// map from each original id with the newly generated context ids,
563 /// and returning the new duplicated id set.
564 DenseSet<uint32_t> duplicateContextIds(
565 const DenseSet<uint32_t> &StackSequenceContextIds,
566 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
567
568 /// Propagates all duplicated context ids across the graph.
569 void propagateDuplicateContextIds(
570 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
571
572 /// Connect the NewNode to OrigNode's callees if TowardsCallee is true,
573 /// else to its callers. Also updates OrigNode's edges to remove any context
574 /// ids moved to the newly created edge.
575 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
576 bool TowardsCallee,
577 DenseSet<uint32_t> RemainingContextIds);
578
579 /// Get the stack id corresponding to the given Id or Index (for IR this will
580 /// return itself, for a summary index this will return the id recorded in the
581 /// index for that stack id index value).
582 uint64_t getStackId(uint64_t IdOrIndex) const {
583 return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex);
584 }
585
586 /// Returns true if the given call targets the callee of the given edge, or if
587 /// we were able to identify the call chain through intermediate tail calls.
588 /// In the latter case new context nodes are added to the graph for the
589 /// identified tail calls, and their synthesized nodes are added to
590 /// TailCallToContextNodeMap. The EdgeIter is updated in the latter case for
591 /// the updated edges and to prepare it for an increment in the caller.
592 bool
593 calleesMatch(CallTy Call, EdgeIter &EI,
594 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap);
595
596 // Return the callee function of the given call, or nullptr if it can't be
597 // determined
598 const FuncTy *getCalleeFunc(CallTy Call) {
599 return static_cast<DerivedCCG *>(this)->getCalleeFunc(Call);
600 }
601
602 /// Returns true if the given call targets the given function, or if we were
603 /// able to identify the call chain through intermediate tail calls (in which
604 /// case FoundCalleeChain will be populated).
605 bool calleeMatchesFunc(
606 CallTy Call, const FuncTy *Func, const FuncTy *CallerFunc,
607 std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
608 return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(
609 Call, Func, CallerFunc, FoundCalleeChain);
610 }
611
612 /// Returns true if both call instructions have the same callee.
613 bool sameCallee(CallTy Call1, CallTy Call2) {
614 return static_cast<DerivedCCG *>(this)->sameCallee(Call1, Call2);
615 }
616
617 /// Get a list of nodes corresponding to the stack ids in the given
618 /// callsite's context.
619 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
620 return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall(
621 Call);
622 }
623
624 /// Get the last stack id in the context for callsite.
625 uint64_t getLastStackId(CallTy Call) {
626 return static_cast<DerivedCCG *>(this)->getLastStackId(Call);
627 }
628
629 /// Update the allocation call to record type of allocated memory.
630 void updateAllocationCall(CallInfo &Call, AllocationType AllocType) {
631 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
632 static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType);
633 }
634
635 /// Get the AllocationType assigned to the given allocation instruction clone.
636 AllocationType getAllocationCallType(const CallInfo &Call) const {
637 return static_cast<const DerivedCCG *>(this)->getAllocationCallType(Call);
638 }
639
640 /// Update non-allocation call to invoke (possibly cloned) function
641 /// CalleeFunc.
642 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
643 static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc);
644 }
645
646 /// Clone the given function for the given callsite, recording mapping of all
647 /// of the functions tracked calls to their new versions in the CallMap.
648 /// Assigns new clones to clone number CloneNo.
649 FuncInfo cloneFunctionForCallsite(
650 FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
651 std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
652 return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite(
653 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
654 }
655
656 /// Gets a label to use in the dot graph for the given call clone in the given
657 /// function.
658 std::string getLabel(const FuncTy *Func, const CallTy Call,
659 unsigned CloneNo) const {
660 return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo);
661 }
662
663 // Create and return a new ContextNode.
664 ContextNode *createNewNode(bool IsAllocation, const FuncTy *F = nullptr,
665 CallInfo C = CallInfo()) {
666 NodeOwner.push_back(std::make_unique<ContextNode>(IsAllocation, C));
667 auto *NewNode = NodeOwner.back().get();
668 if (F)
669 NodeToCallingFunc[NewNode] = F;
670 return NewNode;
671 }
672
673 /// Helpers to find the node corresponding to the given call or stackid.
674 ContextNode *getNodeForInst(const CallInfo &C);
675 ContextNode *getNodeForAlloc(const CallInfo &C);
676 ContextNode *getNodeForStackId(uint64_t StackId);
677
678 /// Computes the alloc type corresponding to the given context ids, by
679 /// unioning their recorded alloc types.
680 uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds);
681
682 /// Returns the allocation type of the intersection of the contexts of two
683 /// nodes (based on their provided context id sets), optimized for the case
684 /// when Node1Ids is smaller than Node2Ids.
685 uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids,
686 const DenseSet<uint32_t> &Node2Ids);
687
688 /// Returns the allocation type of the intersection of the contexts of two
689 /// nodes (based on their provided context id sets).
690 uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids,
691 const DenseSet<uint32_t> &Node2Ids);
692
693 /// Create a clone of Edge's callee and move Edge to that new callee node,
694 /// performing the necessary context id and allocation type updates.
695 /// If callee's caller edge iterator is supplied, it is updated when removing
696 /// the edge from that list. If ContextIdsToMove is non-empty, only that
697 /// subset of Edge's ids are moved to an edge to the new callee.
698 ContextNode *
699 moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
700 EdgeIter *CallerEdgeI = nullptr,
701 DenseSet<uint32_t> ContextIdsToMove = {});
702
703 /// Change the callee of Edge to existing callee clone NewCallee, performing
704 /// the necessary context id and allocation type updates.
705 /// If callee's caller edge iterator is supplied, it is updated when removing
706 /// the edge from that list. If ContextIdsToMove is non-empty, only that
707 /// subset of Edge's ids are moved to an edge to the new callee.
708 void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
709 ContextNode *NewCallee,
710 EdgeIter *CallerEdgeI = nullptr,
711 bool NewClone = false,
712 DenseSet<uint32_t> ContextIdsToMove = {});
713
714 /// Change the caller of the edge at the given callee edge iterator to be
715 /// NewCaller, performing the necessary context id and allocation type
716 /// updates. The iterator is updated as the edge is removed from the list of
717 /// callee edges in the original caller. This is similar to the above
718 /// moveEdgeToExistingCalleeClone, but a simplified version of it as we always
719 /// move the given edge and all of its context ids.
720 void moveCalleeEdgeToNewCaller(EdgeIter &CalleeEdgeI, ContextNode *NewCaller);
721
722 /// Recursively perform cloning on the graph for the given Node and its
723 /// callers, in order to uniquely identify the allocation behavior of an
724 /// allocation given its context. The context ids of the allocation being
725 /// processed are given in AllocContextIds.
726 void identifyClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
727 const DenseSet<uint32_t> &AllocContextIds);
728
729 /// Map from each context ID to the AllocationType assigned to that context.
730 DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;
731
732 /// Map from each contextID to the profiled full contexts and their total
733 /// sizes (there may be more than one due to context trimming),
734 /// optionally populated when requested (via MemProfReportHintedSizes or
735 /// MinClonedColdBytePercent).
736 DenseMap<uint32_t, std::vector<ContextTotalSize>> ContextIdToContextSizeInfos;
737
738 /// Identifies the context node created for a stack id when adding the MIB
739 /// contexts to the graph. This is used to locate the context nodes when
740 /// trying to assign the corresponding callsites with those stack ids to these
741 /// nodes.
742 DenseMap<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
743
744 /// Maps to track the calls to their corresponding nodes in the graph.
745 MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap;
746 MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
747
748 /// Owner of all ContextNode unique_ptrs.
749 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
750
751 /// Perform sanity checks on graph when requested.
752 void check() const;
753
754 /// Keeps track of the last unique context id assigned.
755 unsigned int LastContextId = 0;
756};
757
758template <typename DerivedCCG, typename FuncTy, typename CallTy>
759using ContextNode =
760 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
761template <typename DerivedCCG, typename FuncTy, typename CallTy>
762using ContextEdge =
763 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
764template <typename DerivedCCG, typename FuncTy, typename CallTy>
765using FuncInfo =
766 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
767template <typename DerivedCCG, typename FuncTy, typename CallTy>
768using CallInfo =
769 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
770
771/// CRTP derived class for graphs built from IR (regular LTO).
772class ModuleCallsiteContextGraph
773 : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
774 Instruction *> {
775public:
776 ModuleCallsiteContextGraph(
777 Module &M,
779
780private:
781 friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
782 Instruction *>;
783
784 uint64_t getStackId(uint64_t IdOrIndex) const;
785 const Function *getCalleeFunc(Instruction *Call);
786 bool calleeMatchesFunc(
787 Instruction *Call, const Function *Func, const Function *CallerFunc,
788 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
789 bool sameCallee(Instruction *Call1, Instruction *Call2);
790 bool findProfiledCalleeThroughTailCalls(
791 const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,
792 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
793 bool &FoundMultipleCalleeChains);
794 uint64_t getLastStackId(Instruction *Call);
795 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call);
796 void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
797 AllocationType getAllocationCallType(const CallInfo &Call) const;
798 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
799 CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
800 Instruction *>::FuncInfo
801 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
802 std::map<CallInfo, CallInfo> &CallMap,
803 std::vector<CallInfo> &CallsWithMetadataInFunc,
804 unsigned CloneNo);
805 std::string getLabel(const Function *Func, const Instruction *Call,
806 unsigned CloneNo) const;
807
808 const Module &Mod;
810};
811
812/// Represents a call in the summary index graph, which can either be an
813/// allocation or an interior callsite node in an allocation's context.
814/// Holds a pointer to the corresponding data structure in the index.
815struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> {
816 IndexCall() : PointerUnion() {}
817 IndexCall(std::nullptr_t) : IndexCall() {}
819 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
820 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
821
822 IndexCall *operator->() { return this; }
823
824 void print(raw_ostream &OS) const {
826 if (auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(Base)) {
827 OS << *AI;
828 } else {
829 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(Base);
830 assert(CI);
831 OS << *CI;
832 }
833 }
834};
835} // namespace
836
837namespace llvm {
838template <> struct simplify_type<IndexCall> {
840 static SimpleType getSimplifiedValue(IndexCall &Val) { return Val; }
841};
842template <> struct simplify_type<const IndexCall> {
844 static SimpleType getSimplifiedValue(const IndexCall &Val) { return Val; }
845};
846} // namespace llvm
847
848namespace {
849/// CRTP derived class for graphs built from summary index (ThinLTO).
850class IndexCallsiteContextGraph
851 : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
852 IndexCall> {
853public:
854 IndexCallsiteContextGraph(
855 ModuleSummaryIndex &Index,
857 isPrevailing);
858
859 ~IndexCallsiteContextGraph() {
860 // Now that we are done with the graph it is safe to add the new
861 // CallsiteInfo structs to the function summary vectors. The graph nodes
862 // point into locations within these vectors, so we don't want to add them
863 // any earlier.
864 for (auto &I : FunctionCalleesToSynthesizedCallsiteInfos) {
865 auto *FS = I.first;
866 for (auto &Callsite : I.second)
867 FS->addCallsite(*Callsite.second);
868 }
869 }
870
871private:
872 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
873 IndexCall>;
874
875 uint64_t getStackId(uint64_t IdOrIndex) const;
876 const FunctionSummary *getCalleeFunc(IndexCall &Call);
877 bool calleeMatchesFunc(
878 IndexCall &Call, const FunctionSummary *Func,
879 const FunctionSummary *CallerFunc,
880 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
881 bool sameCallee(IndexCall &Call1, IndexCall &Call2);
882 bool findProfiledCalleeThroughTailCalls(
883 ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,
884 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
885 bool &FoundMultipleCalleeChains);
886 uint64_t getLastStackId(IndexCall &Call);
887 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
888 void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
889 AllocationType getAllocationCallType(const CallInfo &Call) const;
890 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
891 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
892 IndexCall>::FuncInfo
893 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
894 std::map<CallInfo, CallInfo> &CallMap,
895 std::vector<CallInfo> &CallsWithMetadataInFunc,
896 unsigned CloneNo);
897 std::string getLabel(const FunctionSummary *Func, const IndexCall &Call,
898 unsigned CloneNo) const;
899
900 // Saves mapping from function summaries containing memprof records back to
901 // its VI, for use in checking and debugging.
902 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
903
906 isPrevailing;
907
908 // Saves/owns the callsite info structures synthesized for missing tail call
909 // frames that we discover while building the graph.
910 // It maps from the summary of the function making the tail call, to a map
911 // of callee ValueInfo to corresponding synthesized callsite info.
912 std::unordered_map<FunctionSummary *,
913 std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
914 FunctionCalleesToSynthesizedCallsiteInfos;
915};
916} // namespace
917
918namespace llvm {
919template <>
920struct DenseMapInfo<typename CallsiteContextGraph<
921 ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
923template <>
924struct DenseMapInfo<typename CallsiteContextGraph<
925 IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
926 : public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
927template <>
928struct DenseMapInfo<IndexCall>
929 : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
930} // end namespace llvm
931
932namespace {
933
934// Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc
935// type we should actually use on the corresponding allocation.
936// If we can't clone a node that has NotCold+Cold alloc type, we will fall
937// back to using NotCold. So don't bother cloning to distinguish NotCold+Cold
938// from NotCold.
939AllocationType allocTypeToUse(uint8_t AllocTypes) {
940 assert(AllocTypes != (uint8_t)AllocationType::None);
941 if (AllocTypes ==
942 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
943 return AllocationType::NotCold;
944 else
945 return (AllocationType)AllocTypes;
946}
947
948// Helper to check if the alloc types for all edges recorded in the
949// InAllocTypes vector match the alloc types for all edges in the Edges
950// vector.
951template <typename DerivedCCG, typename FuncTy, typename CallTy>
952bool allocTypesMatch(
953 const std::vector<uint8_t> &InAllocTypes,
954 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
955 &Edges) {
956 // This should be called only when the InAllocTypes vector was computed for
957 // this set of Edges. Make sure the sizes are the same.
958 assert(InAllocTypes.size() == Edges.size());
959 return std::equal(
960 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
961 [](const uint8_t &l,
962 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
963 // Can share if one of the edges is None type - don't
964 // care about the type along that edge as it doesn't
965 // exist for those context ids.
966 if (l == (uint8_t)AllocationType::None ||
967 r->AllocTypes == (uint8_t)AllocationType::None)
968 return true;
969 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
970 });
971}
972
973// Helper to check if the alloc types for all edges recorded in the
974// InAllocTypes vector match the alloc types for callee edges in the given
975// clone. Because the InAllocTypes were computed from the original node's callee
976// edges, and other cloning could have happened after this clone was created, we
977// need to find the matching clone callee edge, which may or may not exist.
978template <typename DerivedCCG, typename FuncTy, typename CallTy>
979bool allocTypesMatchClone(
980 const std::vector<uint8_t> &InAllocTypes,
981 const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
982 const ContextNode<DerivedCCG, FuncTy, CallTy> *Node = Clone->CloneOf;
983 assert(Node);
984 // InAllocTypes should have been computed for the original node's callee
985 // edges.
986 assert(InAllocTypes.size() == Node->CalleeEdges.size());
987 // First create a map of the clone callee edge callees to the edge alloc type.
989 EdgeCalleeMap;
990 for (const auto &E : Clone->CalleeEdges) {
991 assert(!EdgeCalleeMap.contains(E->Callee));
992 EdgeCalleeMap[E->Callee] = E->AllocTypes;
993 }
994 // Next, walk the original node's callees, and look for the corresponding
995 // clone edge to that callee.
996 for (unsigned I = 0; I < Node->CalleeEdges.size(); I++) {
997 auto Iter = EdgeCalleeMap.find(Node->CalleeEdges[I]->Callee);
998 // Not found is ok, we will simply add an edge if we use this clone.
999 if (Iter == EdgeCalleeMap.end())
1000 continue;
1001 // Can share if one of the edges is None type - don't
1002 // care about the type along that edge as it doesn't
1003 // exist for those context ids.
1004 if (InAllocTypes[I] == (uint8_t)AllocationType::None ||
1005 Iter->second == (uint8_t)AllocationType::None)
1006 continue;
1007 if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[I]))
1008 return false;
1009 }
1010 return true;
1011}
1012
1013} // end anonymous namespace
1014
1015template <typename DerivedCCG, typename FuncTy, typename CallTy>
1016typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1017CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
1018 const CallInfo &C) {
1019 ContextNode *Node = getNodeForAlloc(C);
1020 if (Node)
1021 return Node;
1022
1023 return NonAllocationCallToContextNodeMap.lookup(C);
1024}
1025
1026template <typename DerivedCCG, typename FuncTy, typename CallTy>
1027typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1028CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
1029 const CallInfo &C) {
1030 return AllocationCallToContextNodeMap.lookup(C);
1031}
1032
1033template <typename DerivedCCG, typename FuncTy, typename CallTy>
1034typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1035CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
1036 uint64_t StackId) {
1037 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
1038 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
1039 return StackEntryNode->second;
1040 return nullptr;
1041}
1042
1043template <typename DerivedCCG, typename FuncTy, typename CallTy>
1044void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1045 addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
1046 unsigned int ContextId) {
1047 for (auto &Edge : CallerEdges) {
1048 if (Edge->Caller == Caller) {
1049 Edge->AllocTypes |= (uint8_t)AllocType;
1050 Edge->getContextIds().insert(ContextId);
1051 return;
1052 }
1053 }
1054 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
1055 this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId}));
1056 CallerEdges.push_back(Edge);
1057 Caller->CalleeEdges.push_back(Edge);
1058}
1059
1060template <typename DerivedCCG, typename FuncTy, typename CallTy>
1061void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::removeEdgeFromGraph(
1062 ContextEdge *Edge, EdgeIter *EI, bool CalleeIter) {
1063 assert(!EI || (*EI)->get() == Edge);
1064 // Save the Caller and Callee pointers so we can erase Edge from their edge
1065 // lists after clearing Edge below. We do the clearing first in case it is
1066 // destructed after removing from the edge lists (if those were the last
1067 // shared_ptr references to Edge).
1068 auto *Callee = Edge->Callee;
1069 auto *Caller = Edge->Caller;
1070
1071 // Make sure the edge fields are cleared out so we can properly detect
1072 // removed edges if Edge is not destructed because there is still a shared_ptr
1073 // reference.
1074 Edge->clear();
1075
1076 if (!EI) {
1077 Callee->eraseCallerEdge(Edge);
1078 Caller->eraseCalleeEdge(Edge);
1079 } else if (CalleeIter) {
1080 Callee->eraseCallerEdge(Edge);
1081 *EI = Caller->CalleeEdges.erase(*EI);
1082 } else {
1083 Caller->eraseCalleeEdge(Edge);
1084 *EI = Callee->CallerEdges.erase(*EI);
1085 }
1086}
1087
1088template <typename DerivedCCG, typename FuncTy, typename CallTy>
1089void CallsiteContextGraph<
1090 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
1091 for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {
1092 auto Edge = *EI;
1093 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
1094 assert(Edge->ContextIds.empty());
1095 removeEdgeFromGraph(Edge.get(), &EI, /*CalleeIter=*/true);
1096 } else
1097 ++EI;
1098 }
1099}
1100
1101template <typename DerivedCCG, typename FuncTy, typename CallTy>
1102void CallsiteContextGraph<
1103 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCallerEdges(ContextNode *Node) {
1104 for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) {
1105 auto Edge = *EI;
1106 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
1107 assert(Edge->ContextIds.empty());
1108 Edge->Caller->eraseCalleeEdge(Edge.get());
1109 EI = Node->CallerEdges.erase(EI);
1110 } else
1111 ++EI;
1112 }
1113}
1114
1115template <typename DerivedCCG, typename FuncTy, typename CallTy>
1116typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1117CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1118 findEdgeFromCallee(const ContextNode *Callee) {
1119 for (const auto &Edge : CalleeEdges)
1120 if (Edge->Callee == Callee)
1121 return Edge.get();
1122 return nullptr;
1123}
1124
1125template <typename DerivedCCG, typename FuncTy, typename CallTy>
1126typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
1127CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1128 findEdgeFromCaller(const ContextNode *Caller) {
1129 for (const auto &Edge : CallerEdges)
1130 if (Edge->Caller == Caller)
1131 return Edge.get();
1132 return nullptr;
1133}
1134
1135template <typename DerivedCCG, typename FuncTy, typename CallTy>
1136void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1137 eraseCalleeEdge(const ContextEdge *Edge) {
1138 auto EI = llvm::find_if(
1139 CalleeEdges, [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) {
1140 return CalleeEdge.get() == Edge;
1141 });
1142 assert(EI != CalleeEdges.end());
1143 CalleeEdges.erase(EI);
1144}
1145
1146template <typename DerivedCCG, typename FuncTy, typename CallTy>
1147void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
1148 eraseCallerEdge(const ContextEdge *Edge) {
1149 auto EI = llvm::find_if(
1150 CallerEdges, [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) {
1151 return CallerEdge.get() == Edge;
1152 });
1153 assert(EI != CallerEdges.end());
1154 CallerEdges.erase(EI);
1155}
1156
1157template <typename DerivedCCG, typename FuncTy, typename CallTy>
1158uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
1159 DenseSet<uint32_t> &ContextIds) {
1160 uint8_t BothTypes =
1161 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1162 uint8_t AllocType = (uint8_t)AllocationType::None;
1163 for (auto Id : ContextIds) {
1164 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
1165 // Bail early if alloc type reached both, no further refinement.
1166 if (AllocType == BothTypes)
1167 return AllocType;
1168 }
1169 return AllocType;
1170}
1171
1172template <typename DerivedCCG, typename FuncTy, typename CallTy>
1173uint8_t
1174CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
1175 const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
1176 uint8_t BothTypes =
1177 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
1178 uint8_t AllocType = (uint8_t)AllocationType::None;
1179 for (auto Id : Node1Ids) {
1180 if (!Node2Ids.count(Id))
1181 continue;
1182 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
1183 // Bail early if alloc type reached both, no further refinement.
1184 if (AllocType == BothTypes)
1185 return AllocType;
1186 }
1187 return AllocType;
1188}
1189
1190template <typename DerivedCCG, typename FuncTy, typename CallTy>
1191uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
1192 const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
1193 if (Node1Ids.size() < Node2Ids.size())
1194 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
1195 else
1196 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
1197}
1198
1199template <typename DerivedCCG, typename FuncTy, typename CallTy>
1200typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1201CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
1202 CallInfo Call, const FuncTy *F) {
1203 assert(!getNodeForAlloc(Call));
1204 ContextNode *AllocNode = createNewNode(/*IsAllocation=*/true, F, Call);
1205 AllocationCallToContextNodeMap[Call] = AllocNode;
1206 // Use LastContextId as a uniq id for MIB allocation nodes.
1207 AllocNode->OrigStackOrAllocId = LastContextId;
1208 // Alloc type should be updated as we add in the MIBs. We should assert
1209 // afterwards that it is not still None.
1210 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
1211
1212 return AllocNode;
1213}
1214
1215static std::string getAllocTypeString(uint8_t AllocTypes) {
1216 if (!AllocTypes)
1217 return "None";
1218 std::string Str;
1219 if (AllocTypes & (uint8_t)AllocationType::NotCold)
1220 Str += "NotCold";
1221 if (AllocTypes & (uint8_t)AllocationType::Cold)
1222 Str += "Cold";
1223 return Str;
1224}
1225
1226template <typename DerivedCCG, typename FuncTy, typename CallTy>
1227template <class NodeT, class IteratorT>
1228void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
1229 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
1231 ArrayRef<ContextTotalSize> ContextSizeInfo) {
1232 // Treating the hot alloc type as NotCold before the disambiguation for "hot"
1233 // is done.
1234 if (AllocType == AllocationType::Hot)
1235 AllocType = AllocationType::NotCold;
1236
1237 ContextIdToAllocationType[++LastContextId] = AllocType;
1238
1239 if (!ContextSizeInfo.empty()) {
1240 auto &Entry = ContextIdToContextSizeInfos[LastContextId];
1241 Entry.insert(Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
1242 }
1243
1244 // Update alloc type and context ids for this MIB.
1245 AllocNode->AllocTypes |= (uint8_t)AllocType;
1246
1247 // Now add or update nodes for each stack id in alloc's context.
1248 // Later when processing the stack ids on non-alloc callsites we will adjust
1249 // for any inlining in the context.
1250 ContextNode *PrevNode = AllocNode;
1251 // Look for recursion (direct recursion should have been collapsed by
1252 // module summary analysis, here we should just be detecting mutual
1253 // recursion). Mark these nodes so we don't try to clone.
1254 SmallSet<uint64_t, 8> StackIdSet;
1255 // Skip any on the allocation call (inlining).
1256 for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext);
1257 ContextIter != StackContext.end(); ++ContextIter) {
1258 auto StackId = getStackId(*ContextIter);
1259 ContextNode *StackNode = getNodeForStackId(StackId);
1260 if (!StackNode) {
1261 StackNode = createNewNode(/*IsAllocation=*/false);
1262 StackEntryIdToContextNodeMap[StackId] = StackNode;
1263 StackNode->OrigStackOrAllocId = StackId;
1264 }
1265 // Marking a node recursive will prevent its cloning completely, even for
1266 // non-recursive contexts flowing through it.
1268 auto Ins = StackIdSet.insert(StackId);
1269 if (!Ins.second)
1270 StackNode->Recursive = true;
1271 }
1272 StackNode->AllocTypes |= (uint8_t)AllocType;
1273 PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
1274 PrevNode = StackNode;
1275 }
1276}
1277
1278template <typename DerivedCCG, typename FuncTy, typename CallTy>
1280CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
1281 const DenseSet<uint32_t> &StackSequenceContextIds,
1282 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1283 DenseSet<uint32_t> NewContextIds;
1284 for (auto OldId : StackSequenceContextIds) {
1285 NewContextIds.insert(++LastContextId);
1286 OldToNewContextIds[OldId].insert(LastContextId);
1287 assert(ContextIdToAllocationType.count(OldId));
1288 // The new context has the same allocation type as original.
1289 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
1290 }
1291 return NewContextIds;
1292}
1293
1294template <typename DerivedCCG, typename FuncTy, typename CallTy>
1295void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1296 propagateDuplicateContextIds(
1297 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
1298 // Build a set of duplicated context ids corresponding to the input id set.
1299 auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) {
1300 DenseSet<uint32_t> NewIds;
1301 for (auto Id : ContextIds)
1302 if (auto NewId = OldToNewContextIds.find(Id);
1303 NewId != OldToNewContextIds.end())
1304 NewIds.insert(NewId->second.begin(), NewId->second.end());
1305 return NewIds;
1306 };
1307
1308 // Recursively update context ids sets along caller edges.
1309 auto UpdateCallers = [&](ContextNode *Node,
1311 auto &&UpdateCallers) -> void {
1312 for (const auto &Edge : Node->CallerEdges) {
1313 auto Inserted = Visited.insert(Edge.get());
1314 if (!Inserted.second)
1315 continue;
1316 ContextNode *NextNode = Edge->Caller;
1317 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds());
1318 // Only need to recursively iterate to NextNode via this caller edge if
1319 // it resulted in any added ids to NextNode.
1320 if (!NewIdsToAdd.empty()) {
1321 Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
1322 UpdateCallers(NextNode, Visited, UpdateCallers);
1323 }
1324 }
1325 };
1326
1328 for (auto &Entry : AllocationCallToContextNodeMap) {
1329 auto *Node = Entry.second;
1330 UpdateCallers(Node, Visited, UpdateCallers);
1331 }
1332}
1333
1334template <typename DerivedCCG, typename FuncTy, typename CallTy>
1335void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
1336 ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee,
1337 // This must be passed by value to make a copy since it will be adjusted
1338 // as ids are moved.
1339 DenseSet<uint32_t> RemainingContextIds) {
1340 auto &OrigEdges =
1341 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
1342 // Increment iterator in loop so that we can remove edges as needed.
1343 for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
1344 auto Edge = *EI;
1345 // Remove any matching context ids from Edge, return set that were found and
1346 // removed, these are the new edge's context ids. Also update the remaining
1347 // (not found ids).
1348 DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds;
1349 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1350 NotFoundContextIds);
1351 RemainingContextIds.swap(NotFoundContextIds);
1352 // If no matching context ids for this edge, skip it.
1353 if (NewEdgeContextIds.empty()) {
1354 ++EI;
1355 continue;
1356 }
1357 if (TowardsCallee) {
1358 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1359 auto NewEdge = std::make_shared<ContextEdge>(
1360 Edge->Callee, NewNode, NewAllocType, std::move(NewEdgeContextIds));
1361 NewNode->CalleeEdges.push_back(NewEdge);
1362 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1363 } else {
1364 uint8_t NewAllocType = computeAllocType(NewEdgeContextIds);
1365 auto NewEdge = std::make_shared<ContextEdge>(
1366 NewNode, Edge->Caller, NewAllocType, std::move(NewEdgeContextIds));
1367 NewNode->CallerEdges.push_back(NewEdge);
1368 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1369 }
1370 // Remove old edge if context ids empty.
1371 if (Edge->getContextIds().empty()) {
1372 removeEdgeFromGraph(Edge.get(), &EI, TowardsCallee);
1373 continue;
1374 }
1375 ++EI;
1376 }
1377}
1378
1379template <typename DerivedCCG, typename FuncTy, typename CallTy>
1380static void checkEdge(
1381 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1382 // Confirm that alloc type is not None and that we have at least one context
1383 // id.
1384 assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
1385 assert(!Edge->ContextIds.empty());
1386}
1387
1388template <typename DerivedCCG, typename FuncTy, typename CallTy>
1389static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
1390 bool CheckEdges = true) {
1391 if (Node->isRemoved())
1392 return;
1393#ifndef NDEBUG
1394 // Compute node's context ids once for use in asserts.
1395 auto NodeContextIds = Node->getContextIds();
1396#endif
1397 // Node's context ids should be the union of both its callee and caller edge
1398 // context ids.
1399 if (Node->CallerEdges.size()) {
1400 DenseSet<uint32_t> CallerEdgeContextIds(
1401 Node->CallerEdges.front()->ContextIds);
1402 for (const auto &Edge : llvm::drop_begin(Node->CallerEdges)) {
1403 if (CheckEdges)
1404 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1405 set_union(CallerEdgeContextIds, Edge->ContextIds);
1406 }
1407 // Node can have more context ids than callers if some contexts terminate at
1408 // node and some are longer. If we are allowing recursive callsites but
1409 // haven't disabled recursive contexts, this will be violated for
1410 // incompletely cloned recursive cycles, so skip the checking in that case.
1412 NodeContextIds == CallerEdgeContextIds ||
1413 set_is_subset(CallerEdgeContextIds, NodeContextIds));
1414 }
1415 if (Node->CalleeEdges.size()) {
1416 DenseSet<uint32_t> CalleeEdgeContextIds(
1417 Node->CalleeEdges.front()->ContextIds);
1418 for (const auto &Edge : llvm::drop_begin(Node->CalleeEdges)) {
1419 if (CheckEdges)
1420 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1421 set_union(CalleeEdgeContextIds, Edge->getContextIds());
1422 }
1423 assert(NodeContextIds == CalleeEdgeContextIds);
1424 }
1425 // FIXME: Since this checking is only invoked under an option, we should
1426 // change the error checking from using assert to something that will trigger
1427 // an error on a release build.
1428#ifndef NDEBUG
1429 // Make sure we don't end up with duplicate edges between the same caller and
1430 // callee.
1432 for (const auto &E : Node->CalleeEdges)
1433 NodeSet.insert(E->Callee);
1434 assert(NodeSet.size() == Node->CalleeEdges.size());
1435#endif
1436}
1437
1438template <typename DerivedCCG, typename FuncTy, typename CallTy>
1439void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1440 assignStackNodesPostOrder(
1441 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
1442 DenseMap<uint64_t, std::vector<CallContextInfo>>
1443 &StackIdToMatchingCalls,
1444 DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
1445 auto Inserted = Visited.insert(Node);
1446 if (!Inserted.second)
1447 return;
1448 // Post order traversal. Iterate over a copy since we may add nodes and
1449 // therefore new callers during the recursive call, invalidating any
1450 // iterator over the original edge vector. We don't need to process these
1451 // new nodes as they were already processed on creation.
1452 auto CallerEdges = Node->CallerEdges;
1453 for (auto &Edge : CallerEdges) {
1454 // Skip any that have been removed during the recursion.
1455 if (Edge->isRemoved()) {
1456 assert(!is_contained(Node->CallerEdges, Edge));
1457 continue;
1458 }
1459 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
1460 CallToMatchingCall);
1461 }
1462
1463 // If this node's stack id is in the map, update the graph to contain new
1464 // nodes representing any inlining at interior callsites. Note we move the
1465 // associated context ids over to the new nodes.
1466
1467 // Ignore this node if it is for an allocation or we didn't record any
1468 // stack id lists ending at it.
1469 if (Node->IsAllocation ||
1470 !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId))
1471 return;
1472
1473 auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId];
1474 // Handle the simple case first. A single call with a single stack id.
1475 // In this case there is no need to create any new context nodes, simply
1476 // assign the context node for stack id to this Call.
1477 if (Calls.size() == 1) {
1478 auto &[Call, Ids, Func, SavedContextIds] = Calls[0];
1479 if (Ids.size() == 1) {
1480 assert(SavedContextIds.empty());
1481 // It should be this Node
1482 assert(Node == getNodeForStackId(Ids[0]));
1483 if (Node->Recursive)
1484 return;
1485 Node->setCall(Call);
1486 NonAllocationCallToContextNodeMap[Call] = Node;
1487 NodeToCallingFunc[Node] = Func;
1488 return;
1489 }
1490 }
1491
1492#ifndef NDEBUG
1493 // Find the node for the last stack id, which should be the same
1494 // across all calls recorded for this id, and is this node's id.
1495 uint64_t LastId = Node->OrigStackOrAllocId;
1496 ContextNode *LastNode = getNodeForStackId(LastId);
1497 // We should only have kept stack ids that had nodes.
1498 assert(LastNode);
1499 assert(LastNode == Node);
1500#else
1501 ContextNode *LastNode = Node;
1502#endif
1503
1504 // Compute the last node's context ids once, as it is shared by all calls in
1505 // this entry.
1506 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1507
1508 bool PrevIterCreatedNode = false;
1509 bool CreatedNode = false;
1510 for (unsigned I = 0; I < Calls.size();
1511 I++, PrevIterCreatedNode = CreatedNode) {
1512 CreatedNode = false;
1513 auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
1514 // Skip any for which we didn't assign any ids, these don't get a node in
1515 // the graph.
1516 if (SavedContextIds.empty()) {
1517 // If this call has a matching call (located in the same function and
1518 // having the same stack ids), simply add it to the context node created
1519 // for its matching call earlier. These can be treated the same through
1520 // cloning and get updated at the same time.
1521 if (!CallToMatchingCall.contains(Call))
1522 continue;
1523 auto MatchingCall = CallToMatchingCall[Call];
1524 if (!NonAllocationCallToContextNodeMap.contains(MatchingCall)) {
1525 // This should only happen if we had a prior iteration, and it didn't
1526 // create a node because of the below recomputation of context ids
1527 // finding none remaining and continuing early.
1528 assert(I > 0 && !PrevIterCreatedNode);
1529 continue;
1530 }
1531 NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1532 Call);
1533 continue;
1534 }
1535
1536 assert(LastId == Ids.back());
1537
1538 // Recompute the context ids for this stack id sequence (the
1539 // intersection of the context ids of the corresponding nodes).
1540 // Start with the ids we saved in the map for this call, which could be
1541 // duplicated context ids. We have to recompute as we might have overlap
1542 // overlap between the saved context ids for different last nodes, and
1543 // removed them already during the post order traversal.
1544 set_intersect(SavedContextIds, LastNodeContextIds);
1545 ContextNode *PrevNode = LastNode;
1546 bool Skip = false;
1547 // Iterate backwards through the stack Ids, starting after the last Id
1548 // in the list, which was handled once outside for all Calls.
1549 for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1550 auto Id = *IdIter;
1551 ContextNode *CurNode = getNodeForStackId(Id);
1552 // We should only have kept stack ids that had nodes and weren't
1553 // recursive.
1554 assert(CurNode);
1555 assert(!CurNode->Recursive);
1556
1557 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1558 if (!Edge) {
1559 Skip = true;
1560 break;
1561 }
1562 PrevNode = CurNode;
1563
1564 // Update the context ids, which is the intersection of the ids along
1565 // all edges in the sequence.
1566 set_intersect(SavedContextIds, Edge->getContextIds());
1567
1568 // If we now have no context ids for clone, skip this call.
1569 if (SavedContextIds.empty()) {
1570 Skip = true;
1571 break;
1572 }
1573 }
1574 if (Skip)
1575 continue;
1576
1577 // Create new context node.
1578 ContextNode *NewNode = createNewNode(/*IsAllocation=*/false, Func, Call);
1579 NonAllocationCallToContextNodeMap[Call] = NewNode;
1580 CreatedNode = true;
1581 NewNode->AllocTypes = computeAllocType(SavedContextIds);
1582
1583 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1584 assert(FirstNode);
1585
1586 // Connect to callees of innermost stack frame in inlined call chain.
1587 // This updates context ids for FirstNode's callee's to reflect those
1588 // moved to NewNode.
1589 connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true, SavedContextIds);
1590
1591 // Connect to callers of outermost stack frame in inlined call chain.
1592 // This updates context ids for FirstNode's caller's to reflect those
1593 // moved to NewNode.
1594 connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false, SavedContextIds);
1595
1596 // Now we need to remove context ids from edges/nodes between First and
1597 // Last Node.
1598 PrevNode = nullptr;
1599 for (auto Id : Ids) {
1600 ContextNode *CurNode = getNodeForStackId(Id);
1601 // We should only have kept stack ids that had nodes.
1602 assert(CurNode);
1603
1604 // Remove the context ids moved to NewNode from CurNode, and the
1605 // edge from the prior node.
1606 if (PrevNode) {
1607 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1608 assert(PrevEdge);
1609 set_subtract(PrevEdge->getContextIds(), SavedContextIds);
1610 if (PrevEdge->getContextIds().empty())
1611 removeEdgeFromGraph(PrevEdge);
1612 }
1613 // Since we update the edges from leaf to tail, only look at the callee
1614 // edges. This isn't an alloc node, so if there are no callee edges, the
1615 // alloc type is None.
1616 CurNode->AllocTypes = CurNode->CalleeEdges.empty()
1617 ? (uint8_t)AllocationType::None
1618 : CurNode->computeAllocType();
1619 PrevNode = CurNode;
1620 }
1621 if (VerifyNodes) {
1622 checkNode<DerivedCCG, FuncTy, CallTy>(NewNode, /*CheckEdges=*/true);
1623 for (auto Id : Ids) {
1624 ContextNode *CurNode = getNodeForStackId(Id);
1625 // We should only have kept stack ids that had nodes.
1626 assert(CurNode);
1627 checkNode<DerivedCCG, FuncTy, CallTy>(CurNode, /*CheckEdges=*/true);
1628 }
1629 }
1630 }
1631}
1632
1633template <typename DerivedCCG, typename FuncTy, typename CallTy>
1634void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1635 // Map of stack id to all calls with that as the last (outermost caller)
1636 // callsite id that has a context node (some might not due to pruning
1637 // performed during matching of the allocation profile contexts).
1638 // The CallContextInfo contains the Call and a list of its stack ids with
1639 // ContextNodes, the function containing Call, and the set of context ids
1640 // the analysis will eventually identify for use in any new node created
1641 // for that callsite.
1643 for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1644 for (auto &Call : CallsWithMetadata) {
1645 // Ignore allocations, already handled.
1646 if (AllocationCallToContextNodeMap.count(Call))
1647 continue;
1648 auto StackIdsWithContextNodes =
1649 getStackIdsWithContextNodesForCall(Call.call());
1650 // If there were no nodes created for MIBs on allocs (maybe this was in
1651 // the unambiguous part of the MIB stack that was pruned), ignore.
1652 if (StackIdsWithContextNodes.empty())
1653 continue;
1654 // Otherwise, record this Call along with the list of ids for the last
1655 // (outermost caller) stack id with a node.
1656 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1657 {Call.call(), StackIdsWithContextNodes, Func, {}});
1658 }
1659 }
1660
1661 // First make a pass through all stack ids that correspond to a call,
1662 // as identified in the above loop. Compute the context ids corresponding to
1663 // each of these calls when they correspond to multiple stack ids due to
1664 // due to inlining. Perform any duplication of context ids required when
1665 // there is more than one call with the same stack ids. Their (possibly newly
1666 // duplicated) context ids are saved in the StackIdToMatchingCalls map.
1667 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
1668 // Save a map from each call to any that are found to match it. I.e. located
1669 // in the same function and have the same (possibly pruned) stack ids. We use
1670 // this to avoid creating extra graph nodes as they can be treated the same.
1671 DenseMap<CallInfo, CallInfo> CallToMatchingCall;
1672 for (auto &It : StackIdToMatchingCalls) {
1673 auto &Calls = It.getSecond();
1674 // Skip single calls with a single stack id. These don't need a new node.
1675 if (Calls.size() == 1) {
1676 auto &Ids = Calls[0].StackIds;
1677 if (Ids.size() == 1)
1678 continue;
1679 }
1680 // In order to do the best and maximal matching of inlined calls to context
1681 // node sequences we will sort the vectors of stack ids in descending order
1682 // of length, and within each length, lexicographically by stack id. The
1683 // latter is so that we can specially handle calls that have identical stack
1684 // id sequences (either due to cloning or artificially because of the MIB
1685 // context pruning). Those with the same Ids are then sorted by function to
1686 // facilitate efficiently mapping them to the same context node.
1687 // Because the functions are pointers, to ensure a stable sort first assign
1688 // each function pointer to its first index in the Calls array, and then use
1689 // that to sort by.
1691 for (const auto &[Idx, CallCtxInfo] : enumerate(Calls))
1692 FuncToIndex.insert({CallCtxInfo.Func, Idx});
1693 std::stable_sort(
1694 Calls.begin(), Calls.end(),
1695 [&FuncToIndex](const CallContextInfo &A, const CallContextInfo &B) {
1696 return A.StackIds.size() > B.StackIds.size() ||
1697 (A.StackIds.size() == B.StackIds.size() &&
1698 (A.StackIds < B.StackIds ||
1699 (A.StackIds == B.StackIds &&
1700 FuncToIndex[A.Func] < FuncToIndex[B.Func])));
1701 });
1702
1703 // Find the node for the last stack id, which should be the same
1704 // across all calls recorded for this id, and is the id for this
1705 // entry in the StackIdToMatchingCalls map.
1706 uint64_t LastId = It.getFirst();
1707 ContextNode *LastNode = getNodeForStackId(LastId);
1708 // We should only have kept stack ids that had nodes.
1709 assert(LastNode);
1710
1711 if (LastNode->Recursive)
1712 continue;
1713
1714 // Initialize the context ids with the last node's. We will subsequently
1715 // refine the context ids by computing the intersection along all edges.
1716 DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
1717 assert(!LastNodeContextIds.empty());
1718
1719#ifndef NDEBUG
1720 // Save the set of functions seen for a particular set of the same stack
1721 // ids. This is used to ensure that they have been correctly sorted to be
1722 // adjacent in the Calls list, since we rely on that to efficiently place
1723 // all such matching calls onto the same context node.
1724 DenseSet<const FuncTy *> MatchingIdsFuncSet;
1725#endif
1726
1727 for (unsigned I = 0; I < Calls.size(); I++) {
1728 auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
1729 assert(SavedContextIds.empty());
1730 assert(LastId == Ids.back());
1731
1732#ifndef NDEBUG
1733 // If this call has a different set of ids than the last one, clear the
1734 // set used to ensure they are sorted properly.
1735 if (I > 0 && Ids != Calls[I - 1].StackIds)
1736 MatchingIdsFuncSet.clear();
1737#endif
1738
1739 // First compute the context ids for this stack id sequence (the
1740 // intersection of the context ids of the corresponding nodes).
1741 // Start with the remaining saved ids for the last node.
1742 assert(!LastNodeContextIds.empty());
1743 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
1744
1745 ContextNode *PrevNode = LastNode;
1746 ContextNode *CurNode = LastNode;
1747 bool Skip = false;
1748
1749 // Iterate backwards through the stack Ids, starting after the last Id
1750 // in the list, which was handled once outside for all Calls.
1751 for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1752 auto Id = *IdIter;
1753 CurNode = getNodeForStackId(Id);
1754 // We should only have kept stack ids that had nodes.
1755 assert(CurNode);
1756
1757 if (CurNode->Recursive) {
1758 Skip = true;
1759 break;
1760 }
1761
1762 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1763 // If there is no edge then the nodes belong to different MIB contexts,
1764 // and we should skip this inlined context sequence. For example, this
1765 // particular inlined context may include stack ids A->B, and we may
1766 // indeed have nodes for both A and B, but it is possible that they were
1767 // never profiled in sequence in a single MIB for any allocation (i.e.
1768 // we might have profiled an allocation that involves the callsite A,
1769 // but through a different one of its callee callsites, and we might
1770 // have profiled an allocation that involves callsite B, but reached
1771 // from a different caller callsite).
1772 if (!Edge) {
1773 Skip = true;
1774 break;
1775 }
1776 PrevNode = CurNode;
1777
1778 // Update the context ids, which is the intersection of the ids along
1779 // all edges in the sequence.
1780 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1781
1782 // If we now have no context ids for clone, skip this call.
1783 if (StackSequenceContextIds.empty()) {
1784 Skip = true;
1785 break;
1786 }
1787 }
1788 if (Skip)
1789 continue;
1790
1791 // If some of this call's stack ids did not have corresponding nodes (due
1792 // to pruning), don't include any context ids for contexts that extend
1793 // beyond these nodes. Otherwise we would be matching part of unrelated /
1794 // not fully matching stack contexts. To do this, subtract any context ids
1795 // found in caller nodes of the last node found above.
1796 if (Ids.back() != getLastStackId(Call)) {
1797 for (const auto &PE : LastNode->CallerEdges) {
1798 set_subtract(StackSequenceContextIds, PE->getContextIds());
1799 if (StackSequenceContextIds.empty())
1800 break;
1801 }
1802 // If we now have no context ids for clone, skip this call.
1803 if (StackSequenceContextIds.empty())
1804 continue;
1805 }
1806
1807#ifndef NDEBUG
1808 // If the prior call had the same stack ids this set would not be empty.
1809 // Check if we already have a call that "matches" because it is located
1810 // in the same function. If the Calls list was sorted properly we should
1811 // not encounter this situation as all such entries should be adjacent
1812 // and processed in bulk further below.
1813 assert(!MatchingIdsFuncSet.contains(Func));
1814
1815 MatchingIdsFuncSet.insert(Func);
1816#endif
1817
1818 // Check if the next set of stack ids is the same (since the Calls vector
1819 // of tuples is sorted by the stack ids we can just look at the next one).
1820 // If so, save them in the CallToMatchingCall map so that they get
1821 // assigned to the same context node, and skip them.
1822 bool DuplicateContextIds = false;
1823 for (unsigned J = I + 1; J < Calls.size(); J++) {
1824 auto &CallCtxInfo = Calls[J];
1825 auto &NextIds = CallCtxInfo.StackIds;
1826 if (NextIds != Ids)
1827 break;
1828 auto *NextFunc = CallCtxInfo.Func;
1829 if (NextFunc != Func) {
1830 // We have another Call with the same ids but that cannot share this
1831 // node, must duplicate ids for it.
1832 DuplicateContextIds = true;
1833 break;
1834 }
1835 auto &NextCall = CallCtxInfo.Call;
1836 CallToMatchingCall[NextCall] = Call;
1837 // Update I so that it gets incremented correctly to skip this call.
1838 I = J;
1839 }
1840
1841 // If we don't have duplicate context ids, then we can assign all the
1842 // context ids computed for the original node sequence to this call.
1843 // If there are duplicate calls with the same stack ids then we synthesize
1844 // new context ids that are duplicates of the originals. These are
1845 // assigned to SavedContextIds, which is a reference into the map entry
1846 // for this call, allowing us to access these ids later on.
1847 OldToNewContextIds.reserve(OldToNewContextIds.size() +
1848 StackSequenceContextIds.size());
1849 SavedContextIds =
1850 DuplicateContextIds
1851 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1852 : StackSequenceContextIds;
1853 assert(!SavedContextIds.empty());
1854
1855 if (!DuplicateContextIds) {
1856 // Update saved last node's context ids to remove those that are
1857 // assigned to other calls, so that it is ready for the next call at
1858 // this stack id.
1859 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1860 if (LastNodeContextIds.empty())
1861 break;
1862 }
1863 }
1864 }
1865
1866 // Propagate the duplicate context ids over the graph.
1867 propagateDuplicateContextIds(OldToNewContextIds);
1868
1869 if (VerifyCCG)
1870 check();
1871
1872 // Now perform a post-order traversal over the graph, starting with the
1873 // allocation nodes, essentially processing nodes from callers to callees.
1874 // For any that contains an id in the map, update the graph to contain new
1875 // nodes representing any inlining at interior callsites. Note we move the
1876 // associated context ids over to the new nodes.
1878 for (auto &Entry : AllocationCallToContextNodeMap)
1879 assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls,
1880 CallToMatchingCall);
1881 if (VerifyCCG)
1882 check();
1883}
1884
1885uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {
1887 Call->getMetadata(LLVMContext::MD_callsite));
1888 return CallsiteContext.back();
1889}
1890
1891uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1892 assert(isa<CallsiteInfo *>(Call));
1894 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call));
1895 // Need to convert index into stack id.
1896 return Index.getStackIdAtIndex(CallsiteContext.back());
1897}
1898
1899static const std::string MemProfCloneSuffix = ".memprof.";
1900
1901static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) {
1902 // We use CloneNo == 0 to refer to the original version, which doesn't get
1903 // renamed with a suffix.
1904 if (!CloneNo)
1905 return Base.str();
1906 return (Base + MemProfCloneSuffix + Twine(CloneNo)).str();
1907}
1908
1909static bool isMemProfClone(const Function &F) {
1910 return F.getName().contains(MemProfCloneSuffix);
1911}
1912
1913std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
1914 const Instruction *Call,
1915 unsigned CloneNo) const {
1916 return (Twine(Call->getFunction()->getName()) + " -> " +
1917 cast<CallBase>(Call)->getCalledFunction()->getName())
1918 .str();
1919}
1920
1921std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func,
1922 const IndexCall &Call,
1923 unsigned CloneNo) const {
1924 auto VI = FSToVIMap.find(Func);
1925 assert(VI != FSToVIMap.end());
1926 if (isa<AllocInfo *>(Call))
1927 return (VI->second.name() + " -> alloc").str();
1928 else {
1929 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call);
1930 return (VI->second.name() + " -> " +
1931 getMemProfFuncName(Callsite->Callee.name(),
1932 Callsite->Clones[CloneNo]))
1933 .str();
1934 }
1935}
1936
1937std::vector<uint64_t>
1938ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1939 Instruction *Call) {
1941 Call->getMetadata(LLVMContext::MD_callsite));
1942 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1943 CallsiteContext);
1944}
1945
1946std::vector<uint64_t>
1947IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1948 assert(isa<CallsiteInfo *>(Call));
1950 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call));
1951 return getStackIdsWithContextNodes<CallsiteInfo,
1953 CallsiteContext);
1954}
1955
1956template <typename DerivedCCG, typename FuncTy, typename CallTy>
1957template <class NodeT, class IteratorT>
1958std::vector<uint64_t>
1959CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1960 CallStack<NodeT, IteratorT> &CallsiteContext) {
1961 std::vector<uint64_t> StackIds;
1962 for (auto IdOrIndex : CallsiteContext) {
1963 auto StackId = getStackId(IdOrIndex);
1964 ContextNode *Node = getNodeForStackId(StackId);
1965 if (!Node)
1966 break;
1967 StackIds.push_back(StackId);
1968 }
1969 return StackIds;
1970}
1971
1972ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1973 Module &M,
1975 : Mod(M), OREGetter(OREGetter) {
1976 for (auto &F : M) {
1977 std::vector<CallInfo> CallsWithMetadata;
1978 for (auto &BB : F) {
1979 for (auto &I : BB) {
1980 if (!isa<CallBase>(I))
1981 continue;
1982 if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
1983 CallsWithMetadata.push_back(&I);
1984 auto *AllocNode = addAllocNode(&I, &F);
1985 auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
1986 assert(CallsiteMD);
1987 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD);
1988 // Add all of the MIBs and their stack nodes.
1989 for (auto &MDOp : MemProfMD->operands()) {
1990 auto *MIBMD = cast<const MDNode>(MDOp);
1991 std::vector<ContextTotalSize> ContextSizeInfo;
1992 // Collect the context size information if it exists.
1993 if (MIBMD->getNumOperands() > 2) {
1994 for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
1995 MDNode *ContextSizePair =
1996 dyn_cast<MDNode>(MIBMD->getOperand(I));
1997 assert(ContextSizePair->getNumOperands() == 2);
1998 uint64_t FullStackId = mdconst::dyn_extract<ConstantInt>(
1999 ContextSizePair->getOperand(0))
2000 ->getZExtValue();
2001 uint64_t TotalSize = mdconst::dyn_extract<ConstantInt>(
2002 ContextSizePair->getOperand(1))
2003 ->getZExtValue();
2004 ContextSizeInfo.push_back({FullStackId, TotalSize});
2005 }
2006 }
2010 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
2011 AllocNode, StackContext, CallsiteContext,
2012 getMIBAllocType(MIBMD), ContextSizeInfo);
2013 }
2014 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
2015 // Memprof and callsite metadata on memory allocations no longer
2016 // needed.
2017 I.setMetadata(LLVMContext::MD_memprof, nullptr);
2018 I.setMetadata(LLVMContext::MD_callsite, nullptr);
2019 }
2020 // For callsite metadata, add to list for this function for later use.
2021 else if (I.getMetadata(LLVMContext::MD_callsite)) {
2022 CallsWithMetadata.push_back(&I);
2023 }
2024 }
2025 }
2026 if (!CallsWithMetadata.empty())
2027 FuncToCallsWithMetadata[&F] = CallsWithMetadata;
2028 }
2029
2030 if (DumpCCG) {
2031 dbgs() << "CCG before updating call stack chains:\n";
2032 dbgs() << *this;
2033 }
2034
2035 if (ExportToDot)
2036 exportToDot("prestackupdate");
2037
2038 updateStackNodes();
2039
2040 handleCallsitesWithMultipleTargets();
2041
2042 // Strip off remaining callsite metadata, no longer needed.
2043 for (auto &FuncEntry : FuncToCallsWithMetadata)
2044 for (auto &Call : FuncEntry.second)
2045 Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr);
2046}
2047
2048IndexCallsiteContextGraph::IndexCallsiteContextGraph(
2049 ModuleSummaryIndex &Index,
2051 isPrevailing)
2052 : Index(Index), isPrevailing(isPrevailing) {
2053 for (auto &I : Index) {
2054 auto VI = Index.getValueInfo(I);
2055 for (auto &S : VI.getSummaryList()) {
2056 // We should only add the prevailing nodes. Otherwise we may try to clone
2057 // in a weak copy that won't be linked (and may be different than the
2058 // prevailing version).
2059 // We only keep the memprof summary on the prevailing copy now when
2060 // building the combined index, as a space optimization, however don't
2061 // rely on this optimization. The linker doesn't resolve local linkage
2062 // values so don't check whether those are prevailing.
2063 if (!GlobalValue::isLocalLinkage(S->linkage()) &&
2064 !isPrevailing(VI.getGUID(), S.get()))
2065 continue;
2066 auto *FS = dyn_cast<FunctionSummary>(S.get());
2067 if (!FS)
2068 continue;
2069 std::vector<CallInfo> CallsWithMetadata;
2070 if (!FS->allocs().empty()) {
2071 for (auto &AN : FS->mutableAllocs()) {
2072 // This can happen because of recursion elimination handling that
2073 // currently exists in ModuleSummaryAnalysis. Skip these for now.
2074 // We still added them to the summary because we need to be able to
2075 // correlate properly in applyImport in the backends.
2076 if (AN.MIBs.empty())
2077 continue;
2078 IndexCall AllocCall(&AN);
2079 CallsWithMetadata.push_back(AllocCall);
2080 auto *AllocNode = addAllocNode(AllocCall, FS);
2081 // Pass an empty CallStack to the CallsiteContext (second)
2082 // parameter, since for ThinLTO we already collapsed out the inlined
2083 // stack ids on the allocation call during ModuleSummaryAnalysis.
2085 EmptyContext;
2086 unsigned I = 0;
2087 assert(
2089 AN.ContextSizeInfos.size() == AN.MIBs.size());
2090 // Now add all of the MIBs and their stack nodes.
2091 for (auto &MIB : AN.MIBs) {
2093 StackContext(&MIB);
2094 std::vector<ContextTotalSize> ContextSizeInfo;
2095 if (!AN.ContextSizeInfos.empty()) {
2096 for (auto [FullStackId, TotalSize] : AN.ContextSizeInfos[I])
2097 ContextSizeInfo.push_back({FullStackId, TotalSize});
2098 }
2099 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
2100 AllocNode, StackContext, EmptyContext, MIB.AllocType,
2101 ContextSizeInfo);
2102 I++;
2103 }
2104 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
2105 // Initialize version 0 on the summary alloc node to the current alloc
2106 // type, unless it has both types in which case make it default, so
2107 // that in the case where we aren't able to clone the original version
2108 // always ends up with the default allocation behavior.
2109 AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
2110 }
2111 }
2112 // For callsite metadata, add to list for this function for later use.
2113 if (!FS->callsites().empty())
2114 for (auto &SN : FS->mutableCallsites()) {
2115 IndexCall StackNodeCall(&SN);
2116 CallsWithMetadata.push_back(StackNodeCall);
2117 }
2118
2119 if (!CallsWithMetadata.empty())
2120 FuncToCallsWithMetadata[FS] = CallsWithMetadata;
2121
2122 if (!FS->allocs().empty() || !FS->callsites().empty())
2123 FSToVIMap[FS] = VI;
2124 }
2125 }
2126
2127 if (DumpCCG) {
2128 dbgs() << "CCG before updating call stack chains:\n";
2129 dbgs() << *this;
2130 }
2131
2132 if (ExportToDot)
2133 exportToDot("prestackupdate");
2134
2135 updateStackNodes();
2136
2137 handleCallsitesWithMultipleTargets();
2138}
2139
2140template <typename DerivedCCG, typename FuncTy, typename CallTy>
2141void CallsiteContextGraph<DerivedCCG, FuncTy,
2142 CallTy>::handleCallsitesWithMultipleTargets() {
2143 // Look for and workaround callsites that call multiple functions.
2144 // This can happen for indirect calls, which needs better handling, and in
2145 // more rare cases (e.g. macro expansion).
2146 // TODO: To fix this for indirect calls we will want to perform speculative
2147 // devirtualization using either the normal PGO info with ICP, or using the
2148 // information in the profiled MemProf contexts. We can do this prior to
2149 // this transformation for regular LTO, and for ThinLTO we can simulate that
2150 // effect in the summary and perform the actual speculative devirtualization
2151 // while cloning in the ThinLTO backend.
2152
2153 // Keep track of the new nodes synthesized for discovered tail calls missing
2154 // from the profiled contexts.
2155 MapVector<CallInfo, ContextNode *> TailCallToContextNodeMap;
2156
2157 std::vector<std::pair<CallInfo, ContextNode *>> NewCallToNode;
2158 for (auto &Entry : NonAllocationCallToContextNodeMap) {
2159 auto *Node = Entry.second;
2160 assert(Node->Clones.empty());
2161 // Check all node callees and see if in the same function.
2162 // We need to check all of the calls recorded in this Node, because in some
2163 // cases we may have had multiple calls with the same debug info calling
2164 // different callees. This can happen, for example, when an object is
2165 // constructed in the paramter list - the destructor call of the object has
2166 // the same debug info (line/col) as the call the object was passed to.
2167 // Here we will prune any that don't match all callee nodes.
2168 std::vector<CallInfo> AllCalls;
2169 AllCalls.reserve(Node->MatchingCalls.size() + 1);
2170 AllCalls.push_back(Node->Call);
2171 AllCalls.insert(AllCalls.end(), Node->MatchingCalls.begin(),
2172 Node->MatchingCalls.end());
2173
2174 // First see if we can partition the calls by callee function, creating new
2175 // nodes to host each set of calls calling the same callees. This is
2176 // necessary for support indirect calls with ThinLTO, for which we
2177 // synthesized CallsiteInfo records for each target. They will all have the
2178 // same callsite stack ids and would be sharing a context node at this
2179 // point. We need to perform separate cloning for each, which will be
2180 // applied along with speculative devirtualization in the ThinLTO backends
2181 // as needed. Note this does not currently support looking through tail
2182 // calls, it is unclear if we need that for indirect call targets.
2183 // First partition calls by callee func. Map indexed by func, value is
2184 // struct with list of matching calls, assigned node.
2185 if (partitionCallsByCallee(Node, AllCalls, NewCallToNode))
2186 continue;
2187
2188 auto It = AllCalls.begin();
2189 // Iterate through the calls until we find the first that matches.
2190 for (; It != AllCalls.end(); ++It) {
2191 auto ThisCall = *It;
2192 bool Match = true;
2193 for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();
2194 ++EI) {
2195 auto Edge = *EI;
2196 if (!Edge->Callee->hasCall())
2197 continue;
2198 assert(NodeToCallingFunc.count(Edge->Callee));
2199 // Check if the called function matches that of the callee node.
2200 if (!calleesMatch(ThisCall.call(), EI, TailCallToContextNodeMap)) {
2201 Match = false;
2202 break;
2203 }
2204 }
2205 // Found a call that matches the callee nodes, we can quit now.
2206 if (Match) {
2207 // If the first match is not the primary call on the Node, update it
2208 // now. We will update the list of matching calls further below.
2209 if (Node->Call != ThisCall) {
2210 Node->setCall(ThisCall);
2211 // We need to update the NonAllocationCallToContextNodeMap, but don't
2212 // want to do this during iteration over that map, so save the calls
2213 // that need updated entries.
2214 NewCallToNode.push_back({ThisCall, Node});
2215 }
2216 break;
2217 }
2218 }
2219 // We will update this list below (or leave it cleared if there was no
2220 // match found above).
2221 Node->MatchingCalls.clear();
2222 // If we hit the end of the AllCalls vector, no call matching the callee
2223 // nodes was found, clear the call information in the node.
2224 if (It == AllCalls.end()) {
2225 RemovedEdgesWithMismatchedCallees++;
2226 // Work around by setting Node to have a null call, so it gets
2227 // skipped during cloning. Otherwise assignFunctions will assert
2228 // because its data structures are not designed to handle this case.
2229 Node->setCall(CallInfo());
2230 continue;
2231 }
2232 // Now add back any matching calls that call the same function as the
2233 // matching primary call on Node.
2234 for (++It; It != AllCalls.end(); ++It) {
2235 auto ThisCall = *It;
2236 if (!sameCallee(Node->Call.call(), ThisCall.call()))
2237 continue;
2238 Node->MatchingCalls.push_back(ThisCall);
2239 }
2240 }
2241
2242 // Remove all mismatched nodes identified in the above loop from the node map
2243 // (checking whether they have a null call which is set above). For a
2244 // MapVector like NonAllocationCallToContextNodeMap it is much more efficient
2245 // to do the removal via remove_if than by individually erasing entries above.
2246 // Also remove any entries if we updated the node's primary call above.
2247 NonAllocationCallToContextNodeMap.remove_if([](const auto &it) {
2248 return !it.second->hasCall() || it.second->Call != it.first;
2249 });
2250
2251 // Add entries for any new primary calls recorded above.
2252 for (auto &[Call, Node] : NewCallToNode)
2253 NonAllocationCallToContextNodeMap[Call] = Node;
2254
2255 // Add the new nodes after the above loop so that the iteration is not
2256 // invalidated.
2257 for (auto &[Call, Node] : TailCallToContextNodeMap)
2258 NonAllocationCallToContextNodeMap[Call] = Node;
2259}
2260
2261template <typename DerivedCCG, typename FuncTy, typename CallTy>
2262bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::partitionCallsByCallee(
2263 ContextNode *Node, ArrayRef<CallInfo> AllCalls,
2264 std::vector<std::pair<CallInfo, ContextNode *>> &NewCallToNode) {
2265 // Struct to keep track of all the calls having the same callee function,
2266 // and the node we eventually assign to them. Eventually we will record the
2267 // context node assigned to this group of calls.
2268 struct CallsWithSameCallee {
2269 std::vector<CallInfo> Calls;
2270 ContextNode *Node = nullptr;
2271 };
2272
2273 // First partition calls by callee function. Build map from each function
2274 // to the list of matching calls.
2276 for (auto ThisCall : AllCalls) {
2277 auto *F = getCalleeFunc(ThisCall.call());
2278 if (F)
2279 CalleeFuncToCallInfo[F].Calls.push_back(ThisCall);
2280 }
2281
2282 // Next, walk through all callee edges. For each callee node, get its
2283 // containing function and see if it was recorded in the above map (meaning we
2284 // have at least one matching call). Build another map from each callee node
2285 // with a matching call to the structure instance created above containing all
2286 // the calls.
2288 for (const auto &Edge : Node->CalleeEdges) {
2289 if (!Edge->Callee->hasCall())
2290 continue;
2291 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2292 if (CalleeFuncToCallInfo.contains(ProfiledCalleeFunc))
2293 CalleeNodeToCallInfo[Edge->Callee] =
2294 &CalleeFuncToCallInfo[ProfiledCalleeFunc];
2295 }
2296
2297 // If there are entries in the second map, then there were no matching
2298 // calls/callees, nothing to do here. Return so we can go to the handling that
2299 // looks through tail calls.
2300 if (CalleeNodeToCallInfo.empty())
2301 return false;
2302
2303 // Walk through all callee edges again. Any and all callee edges that didn't
2304 // match any calls (callee not in the CalleeNodeToCallInfo map) are moved to a
2305 // new caller node (UnmatchedCalleesNode) which gets a null call so that it is
2306 // ignored during cloning. If it is in the map, then we use the node recorded
2307 // in that entry (creating it if needed), and move the callee edge to it.
2308 // The first callee will use the original node instead of creating a new one.
2309 // Note that any of the original calls on this node (in AllCalls) that didn't
2310 // have a callee function automatically get dropped from the node as part of
2311 // this process.
2312 ContextNode *UnmatchedCalleesNode = nullptr;
2313 // Track whether we already assigned original node to a callee.
2314 bool UsedOrigNode = false;
2315 assert(NodeToCallingFunc[Node]);
2316 for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {
2317 auto Edge = *EI;
2318 if (!Edge->Callee->hasCall()) {
2319 ++EI;
2320 continue;
2321 }
2322
2323 // Will be updated below to point to whatever (caller) node this callee edge
2324 // should be moved to.
2325 ContextNode *CallerNodeToUse = nullptr;
2326
2327 // Handle the case where there were no matching calls first. Move this
2328 // callee edge to the UnmatchedCalleesNode, creating it if needed.
2329 if (!CalleeNodeToCallInfo.contains(Edge->Callee)) {
2330 if (!UnmatchedCalleesNode)
2331 UnmatchedCalleesNode =
2332 createNewNode(/*IsAllocation=*/false, NodeToCallingFunc[Node]);
2333 CallerNodeToUse = UnmatchedCalleesNode;
2334 } else {
2335 // Look up the information recorded for this callee node, and use the
2336 // recorded caller node (creating it if needed).
2337 auto *Info = CalleeNodeToCallInfo[Edge->Callee];
2338 if (!Info->Node) {
2339 // If we haven't assigned any callees to the original node use it.
2340 if (!UsedOrigNode) {
2341 Info->Node = Node;
2342 // Clear the set of matching calls which will be updated below.
2343 Node->MatchingCalls.clear();
2344 UsedOrigNode = true;
2345 } else
2346 Info->Node =
2347 createNewNode(/*IsAllocation=*/false, NodeToCallingFunc[Node]);
2348 assert(!Info->Calls.empty());
2349 // The first call becomes the primary call for this caller node, and the
2350 // rest go in the matching calls list.
2351 Info->Node->setCall(Info->Calls.front());
2352 Info->Node->MatchingCalls.insert(Info->Node->MatchingCalls.end(),
2353 Info->Calls.begin() + 1,
2354 Info->Calls.end());
2355 // Save the primary call to node correspondence so that we can update
2356 // the NonAllocationCallToContextNodeMap, which is being iterated in the
2357 // caller of this function.
2358 NewCallToNode.push_back({Info->Node->Call, Info->Node});
2359 }
2360 CallerNodeToUse = Info->Node;
2361 }
2362
2363 // Don't need to move edge if we are using the original node;
2364 if (CallerNodeToUse == Node) {
2365 ++EI;
2366 continue;
2367 }
2368
2369 moveCalleeEdgeToNewCaller(EI, CallerNodeToUse);
2370 }
2371 // Now that we are done moving edges, clean up any caller edges that ended
2372 // up with no type or context ids. During moveCalleeEdgeToNewCaller all
2373 // caller edges from Node are replicated onto the new callers, and it
2374 // simplifies the handling to leave them until we have moved all
2375 // edges/context ids.
2376 for (auto &I : CalleeNodeToCallInfo)
2377 removeNoneTypeCallerEdges(I.second->Node);
2378 if (UnmatchedCalleesNode)
2379 removeNoneTypeCallerEdges(UnmatchedCalleesNode);
2380 removeNoneTypeCallerEdges(Node);
2381
2382 return true;
2383}
2384
2385uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
2386 // In the Module (IR) case this is already the Id.
2387 return IdOrIndex;
2388}
2389
2390uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
2391 // In the Index case this is an index into the stack id list in the summary
2392 // index, convert it to an Id.
2393 return Index.getStackIdAtIndex(IdOrIndex);
2394}
2395
2396template <typename DerivedCCG, typename FuncTy, typename CallTy>
2397bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
2398 CallTy Call, EdgeIter &EI,
2399 MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
2400 auto Edge = *EI;
2401 const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
2402 const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
2403 // Will be populated in order of callee to caller if we find a chain of tail
2404 // calls between the profiled caller and callee.
2405 std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
2406 if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
2407 FoundCalleeChain))
2408 return false;
2409
2410 // The usual case where the profiled callee matches that of the IR/summary.
2411 if (FoundCalleeChain.empty())
2412 return true;
2413
2414 auto AddEdge = [Edge, &EI](ContextNode *Caller, ContextNode *Callee) {
2415 auto *CurEdge = Callee->findEdgeFromCaller(Caller);
2416 // If there is already an edge between these nodes, simply update it and
2417 // return.
2418 if (CurEdge) {
2419 CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
2420 Edge->ContextIds.end());
2421 CurEdge->AllocTypes |= Edge->AllocTypes;
2422 return;
2423 }
2424 // Otherwise, create a new edge and insert it into the caller and callee
2425 // lists.
2426 auto NewEdge = std::make_shared<ContextEdge>(
2427 Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
2428 Callee->CallerEdges.push_back(NewEdge);
2429 if (Caller == Edge->Caller) {
2430 // If we are inserting the new edge into the current edge's caller, insert
2431 // the new edge before the current iterator position, and then increment
2432 // back to the current edge.
2433 EI = Caller->CalleeEdges.insert(EI, NewEdge);
2434 ++EI;
2435 assert(*EI == Edge &&
2436 "Iterator position not restored after insert and increment");
2437 } else
2438 Caller->CalleeEdges.push_back(NewEdge);
2439 };
2440
2441 // Create new nodes for each found callee and connect in between the profiled
2442 // caller and callee.
2443 auto *CurCalleeNode = Edge->Callee;
2444 for (auto &[NewCall, Func] : FoundCalleeChain) {
2445 ContextNode *NewNode = nullptr;
2446 // First check if we have already synthesized a node for this tail call.
2447 if (TailCallToContextNodeMap.count(NewCall)) {
2448 NewNode = TailCallToContextNodeMap[NewCall];
2449 NewNode->AllocTypes |= Edge->AllocTypes;
2450 } else {
2451 FuncToCallsWithMetadata[Func].push_back({NewCall});
2452 // Create Node and record node info.
2453 NewNode = createNewNode(/*IsAllocation=*/false, Func, NewCall);
2454 TailCallToContextNodeMap[NewCall] = NewNode;
2455 NewNode->AllocTypes = Edge->AllocTypes;
2456 }
2457
2458 // Hook up node to its callee node
2459 AddEdge(NewNode, CurCalleeNode);
2460
2461 CurCalleeNode = NewNode;
2462 }
2463
2464 // Hook up edge's original caller to new callee node.
2465 AddEdge(Edge->Caller, CurCalleeNode);
2466
2467#ifndef NDEBUG
2468 // Save this because Edge's fields get cleared below when removed.
2469 auto *Caller = Edge->Caller;
2470#endif
2471
2472 // Remove old edge
2473 removeEdgeFromGraph(Edge.get(), &EI, /*CalleeIter=*/true);
2474
2475 // To simplify the increment of EI in the caller, subtract one from EI.
2476 // In the final AddEdge call we would have either added a new callee edge,
2477 // to Edge->Caller, or found an existing one. Either way we are guaranteed
2478 // that there is at least one callee edge.
2479 assert(!Caller->CalleeEdges.empty());
2480 --EI;
2481
2482 return true;
2483}
2484
2485bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2486 const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,
2487 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
2488 bool &FoundMultipleCalleeChains) {
2489 // Stop recursive search if we have already explored the maximum specified
2490 // depth.
2492 return false;
2493
2494 auto SaveCallsiteInfo = [&](Instruction *Callsite, Function *F) {
2495 FoundCalleeChain.push_back({Callsite, F});
2496 };
2497
2498 auto *CalleeFunc = dyn_cast<Function>(CurCallee);
2499 if (!CalleeFunc) {
2500 auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
2501 assert(Alias);
2502 CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
2503 assert(CalleeFunc);
2504 }
2505
2506 // Look for tail calls in this function, and check if they either call the
2507 // profiled callee directly, or indirectly (via a recursive search).
2508 // Only succeed if there is a single unique tail call chain found between the
2509 // profiled caller and callee, otherwise we could perform incorrect cloning.
2510 bool FoundSingleCalleeChain = false;
2511 for (auto &BB : *CalleeFunc) {
2512 for (auto &I : BB) {
2513 auto *CB = dyn_cast<CallBase>(&I);
2514 if (!CB || !CB->isTailCall())
2515 continue;
2516 auto *CalledValue = CB->getCalledOperand();
2517 auto *CalledFunction = CB->getCalledFunction();
2518 if (CalledValue && !CalledFunction) {
2519 CalledValue = CalledValue->stripPointerCasts();
2520 // Stripping pointer casts can reveal a called function.
2521 CalledFunction = dyn_cast<Function>(CalledValue);
2522 }
2523 // Check if this is an alias to a function. If so, get the
2524 // called aliasee for the checks below.
2525 if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
2526 assert(!CalledFunction &&
2527 "Expected null called function in callsite for alias");
2528 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
2529 }
2530 if (!CalledFunction)
2531 continue;
2532 if (CalledFunction == ProfiledCallee) {
2533 if (FoundSingleCalleeChain) {
2534 FoundMultipleCalleeChains = true;
2535 return false;
2536 }
2537 FoundSingleCalleeChain = true;
2538 FoundProfiledCalleeCount++;
2539 FoundProfiledCalleeDepth += Depth;
2540 if (Depth > FoundProfiledCalleeMaxDepth)
2541 FoundProfiledCalleeMaxDepth = Depth;
2542 SaveCallsiteInfo(&I, CalleeFunc);
2543 } else if (findProfiledCalleeThroughTailCalls(
2544 ProfiledCallee, CalledFunction, Depth + 1,
2545 FoundCalleeChain, FoundMultipleCalleeChains)) {
2546 // findProfiledCalleeThroughTailCalls should not have returned
2547 // true if FoundMultipleCalleeChains.
2548 assert(!FoundMultipleCalleeChains);
2549 if (FoundSingleCalleeChain) {
2550 FoundMultipleCalleeChains = true;
2551 return false;
2552 }
2553 FoundSingleCalleeChain = true;
2554 SaveCallsiteInfo(&I, CalleeFunc);
2555 } else if (FoundMultipleCalleeChains)
2556 return false;
2557 }
2558 }
2559
2560 return FoundSingleCalleeChain;
2561}
2562
2563const Function *ModuleCallsiteContextGraph::getCalleeFunc(Instruction *Call) {
2564 auto *CB = dyn_cast<CallBase>(Call);
2565 if (!CB->getCalledOperand() || CB->isIndirectCall())
2566 return nullptr;
2567 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2568 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2569 if (Alias)
2570 return dyn_cast<Function>(Alias->getAliasee());
2571 return dyn_cast<Function>(CalleeVal);
2572}
2573
2574bool ModuleCallsiteContextGraph::calleeMatchesFunc(
2575 Instruction *Call, const Function *Func, const Function *CallerFunc,
2576 std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
2577 auto *CB = dyn_cast<CallBase>(Call);
2578 if (!CB->getCalledOperand() || CB->isIndirectCall())
2579 return false;
2580 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
2581 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
2582 if (CalleeFunc == Func)
2583 return true;
2584 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
2585 if (Alias && Alias->getAliasee() == Func)
2586 return true;
2587
2588 // Recursively search for the profiled callee through tail calls starting with
2589 // the actual Callee. The discovered tail call chain is saved in
2590 // FoundCalleeChain, and we will fixup the graph to include these callsites
2591 // after returning.
2592 // FIXME: We will currently redo the same recursive walk if we find the same
2593 // mismatched callee from another callsite. We can improve this with more
2594 // bookkeeping of the created chain of new nodes for each mismatch.
2595 unsigned Depth = 1;
2596 bool FoundMultipleCalleeChains = false;
2597 if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal, Depth,
2598 FoundCalleeChain,
2599 FoundMultipleCalleeChains)) {
2600 LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: "
2601 << Func->getName() << " from " << CallerFunc->getName()
2602 << " that actually called " << CalleeVal->getName()
2603 << (FoundMultipleCalleeChains
2604 ? " (found multiple possible chains)"
2605 : "")
2606 << "\n");
2607 if (FoundMultipleCalleeChains)
2608 FoundProfiledCalleeNonUniquelyCount++;
2609 return false;
2610 }
2611
2612 return true;
2613}
2614
2615bool ModuleCallsiteContextGraph::sameCallee(Instruction *Call1,
2616 Instruction *Call2) {
2617 auto *CB1 = cast<CallBase>(Call1);
2618 if (!CB1->getCalledOperand() || CB1->isIndirectCall())
2619 return false;
2620 auto *CalleeVal1 = CB1->getCalledOperand()->stripPointerCasts();
2621 auto *CalleeFunc1 = dyn_cast<Function>(CalleeVal1);
2622 auto *CB2 = cast<CallBase>(Call2);
2623 if (!CB2->getCalledOperand() || CB2->isIndirectCall())
2624 return false;
2625 auto *CalleeVal2 = CB2->getCalledOperand()->stripPointerCasts();
2626 auto *CalleeFunc2 = dyn_cast<Function>(CalleeVal2);
2627 return CalleeFunc1 == CalleeFunc2;
2628}
2629
2630bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
2631 ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,
2632 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
2633 bool &FoundMultipleCalleeChains) {
2634 // Stop recursive search if we have already explored the maximum specified
2635 // depth.
2637 return false;
2638
2639 auto CreateAndSaveCallsiteInfo = [&](ValueInfo Callee, FunctionSummary *FS) {
2640 // Make a CallsiteInfo for each discovered callee, if one hasn't already
2641 // been synthesized.
2642 if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
2643 !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
2644 // StackIds is empty (we don't have debug info available in the index for
2645 // these callsites)
2646 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
2647 std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
2648 CallsiteInfo *NewCallsiteInfo =
2649 FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee].get();
2650 FoundCalleeChain.push_back({NewCallsiteInfo, FS});
2651 };
2652
2653 // Look for tail calls in this function, and check if they either call the
2654 // profiled callee directly, or indirectly (via a recursive search).
2655 // Only succeed if there is a single unique tail call chain found between the
2656 // profiled caller and callee, otherwise we could perform incorrect cloning.
2657 bool FoundSingleCalleeChain = false;
2658 for (auto &S : CurCallee.getSummaryList()) {
2659 if (!GlobalValue::isLocalLinkage(S->linkage()) &&
2660 !isPrevailing(CurCallee.getGUID(), S.get()))
2661 continue;
2662 auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
2663 if (!FS)
2664 continue;
2665 auto FSVI = CurCallee;
2666 auto *AS = dyn_cast<AliasSummary>(S.get());
2667 if (AS)
2668 FSVI = AS->getAliaseeVI();
2669 for (auto &CallEdge : FS->calls()) {
2670 if (!CallEdge.second.hasTailCall())
2671 continue;
2672 if (CallEdge.first == ProfiledCallee) {
2673 if (FoundSingleCalleeChain) {
2674 FoundMultipleCalleeChains = true;
2675 return false;
2676 }
2677 FoundSingleCalleeChain = true;
2678 FoundProfiledCalleeCount++;
2679 FoundProfiledCalleeDepth += Depth;
2680 if (Depth > FoundProfiledCalleeMaxDepth)
2681 FoundProfiledCalleeMaxDepth = Depth;
2682 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2683 // Add FS to FSToVIMap in case it isn't already there.
2684 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2685 FSToVIMap[FS] = FSVI;
2686 } else if (findProfiledCalleeThroughTailCalls(
2687 ProfiledCallee, CallEdge.first, Depth + 1,
2688 FoundCalleeChain, FoundMultipleCalleeChains)) {
2689 // findProfiledCalleeThroughTailCalls should not have returned
2690 // true if FoundMultipleCalleeChains.
2691 assert(!FoundMultipleCalleeChains);
2692 if (FoundSingleCalleeChain) {
2693 FoundMultipleCalleeChains = true;
2694 return false;
2695 }
2696 FoundSingleCalleeChain = true;
2697 CreateAndSaveCallsiteInfo(CallEdge.first, FS);
2698 // Add FS to FSToVIMap in case it isn't already there.
2699 assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
2700 FSToVIMap[FS] = FSVI;
2701 } else if (FoundMultipleCalleeChains)
2702 return false;
2703 }
2704 }
2705
2706 return FoundSingleCalleeChain;
2707}
2708
2709const FunctionSummary *
2710IndexCallsiteContextGraph::getCalleeFunc(IndexCall &Call) {
2711 ValueInfo Callee = dyn_cast_if_present<CallsiteInfo *>(Call)->Callee;
2712 if (Callee.getSummaryList().empty())
2713 return nullptr;
2714 return dyn_cast<FunctionSummary>(Callee.getSummaryList()[0]->getBaseObject());
2715}
2716
2717bool IndexCallsiteContextGraph::calleeMatchesFunc(
2718 IndexCall &Call, const FunctionSummary *Func,
2719 const FunctionSummary *CallerFunc,
2720 std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
2721 ValueInfo Callee = dyn_cast_if_present<CallsiteInfo *>(Call)->Callee;
2722 // If there is no summary list then this is a call to an externally defined
2723 // symbol.
2724 AliasSummary *Alias =
2725 Callee.getSummaryList().empty()
2726 ? nullptr
2727 : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get());
2728 assert(FSToVIMap.count(Func));
2729 auto FuncVI = FSToVIMap[Func];
2730 if (Callee == FuncVI ||
2731 // If callee is an alias, check the aliasee, since only function
2732 // summary base objects will contain the stack node summaries and thus
2733 // get a context node.
2734 (Alias && Alias->getAliaseeVI() == FuncVI))
2735 return true;
2736
2737 // Recursively search for the profiled callee through tail calls starting with
2738 // the actual Callee. The discovered tail call chain is saved in
2739 // FoundCalleeChain, and we will fixup the graph to include these callsites
2740 // after returning.
2741 // FIXME: We will currently redo the same recursive walk if we find the same
2742 // mismatched callee from another callsite. We can improve this with more
2743 // bookkeeping of the created chain of new nodes for each mismatch.
2744 unsigned Depth = 1;
2745 bool FoundMultipleCalleeChains = false;
2746 if (!findProfiledCalleeThroughTailCalls(
2747 FuncVI, Callee, Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
2748 LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " << FuncVI
2749 << " from " << FSToVIMap[CallerFunc]
2750 << " that actually called " << Callee
2751 << (FoundMultipleCalleeChains
2752 ? " (found multiple possible chains)"
2753 : "")
2754 << "\n");
2755 if (FoundMultipleCalleeChains)
2756 FoundProfiledCalleeNonUniquelyCount++;
2757 return false;
2758 }
2759
2760 return true;
2761}
2762
2763bool IndexCallsiteContextGraph::sameCallee(IndexCall &Call1, IndexCall &Call2) {
2764 ValueInfo Callee1 = dyn_cast_if_present<CallsiteInfo *>(Call1)->Callee;
2765 ValueInfo Callee2 = dyn_cast_if_present<CallsiteInfo *>(Call2)->Callee;
2766 return Callee1 == Callee2;
2767}
2768
2769template <typename DerivedCCG, typename FuncTy, typename CallTy>
2770void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
2771 const {
2772 print(dbgs());
2773 dbgs() << "\n";
2774}
2775
2776template <typename DerivedCCG, typename FuncTy, typename CallTy>
2777void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2778 raw_ostream &OS) const {
2779 OS << "Node " << this << "\n";
2780 OS << "\t";
2781 printCall(OS);
2782 if (Recursive)
2783 OS << " (recursive)";
2784 OS << "\n";
2785 if (!MatchingCalls.empty()) {
2786 OS << "\tMatchingCalls:\n";
2787 for (auto &MatchingCall : MatchingCalls) {
2788 OS << "\t";
2789 MatchingCall.print(OS);
2790 OS << "\n";
2791 }
2792 }
2793 OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
2794 OS << "\tContextIds:";
2795 // Make a copy of the computed context ids that we can sort for stability.
2796 auto ContextIds = getContextIds();
2797 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2798 std::sort(SortedIds.begin(), SortedIds.end());
2799 for (auto Id : SortedIds)
2800 OS << " " << Id;
2801 OS << "\n";
2802 OS << "\tCalleeEdges:\n";
2803 for (auto &Edge : CalleeEdges)
2804 OS << "\t\t" << *Edge << "\n";
2805 OS << "\tCallerEdges:\n";
2806 for (auto &Edge : CallerEdges)
2807 OS << "\t\t" << *Edge << "\n";
2808 if (!Clones.empty()) {
2809 OS << "\tClones: ";
2810 ListSeparator LS;
2811 for (auto *Clone : Clones)
2812 OS << LS << Clone;
2813 OS << "\n";
2814 } else if (CloneOf) {
2815 OS << "\tClone of " << CloneOf << "\n";
2816 }
2817}
2818
2819template <typename DerivedCCG, typename FuncTy, typename CallTy>
2820void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
2821 const {
2822 print(dbgs());
2823 dbgs() << "\n";
2824}
2825
2826template <typename DerivedCCG, typename FuncTy, typename CallTy>
2827void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
2828 raw_ostream &OS) const {
2829 OS << "Edge from Callee " << Callee << " to Caller: " << Caller
2830 << " AllocTypes: " << getAllocTypeString(AllocTypes);
2831 OS << " ContextIds:";
2832 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2833 std::sort(SortedIds.begin(), SortedIds.end());
2834 for (auto Id : SortedIds)
2835 OS << " " << Id;
2836}
2837
2838template <typename DerivedCCG, typename FuncTy, typename CallTy>
2839void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const {
2840 print(dbgs());
2841}
2842
2843template <typename DerivedCCG, typename FuncTy, typename CallTy>
2844void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
2845 raw_ostream &OS) const {
2846 OS << "Callsite Context Graph:\n";
2847 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2848 for (const auto Node : nodes<GraphType>(this)) {
2849 if (Node->isRemoved())
2850 continue;
2851 Node->print(OS);
2852 OS << "\n";
2853 }
2854}
2855
2856template <typename DerivedCCG, typename FuncTy, typename CallTy>
2857void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
2858 raw_ostream &OS) const {
2859 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2860 for (const auto Node : nodes<GraphType>(this)) {
2861 if (Node->isRemoved())
2862 continue;
2863 if (!Node->IsAllocation)
2864 continue;
2865 DenseSet<uint32_t> ContextIds = Node->getContextIds();
2866 auto AllocTypeFromCall = getAllocationCallType(Node->Call);
2867 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
2868 std::sort(SortedIds.begin(), SortedIds.end());
2869 for (auto Id : SortedIds) {
2870 auto TypeI = ContextIdToAllocationType.find(Id);
2871 assert(TypeI != ContextIdToAllocationType.end());
2872 auto CSI = ContextIdToContextSizeInfos.find(Id);
2873 if (CSI != ContextIdToContextSizeInfos.end()) {
2874 for (auto &Info : CSI->second) {
2875 OS << "MemProf hinting: "
2876 << getAllocTypeString((uint8_t)TypeI->second)
2877 << " full allocation context " << Info.FullStackId
2878 << " with total size " << Info.TotalSize << " is "
2879 << getAllocTypeString(Node->AllocTypes) << " after cloning";
2880 if (allocTypeToUse(Node->AllocTypes) != AllocTypeFromCall)
2881 OS << " marked " << getAllocTypeString((uint8_t)AllocTypeFromCall)
2882 << " due to cold byte percent";
2883 OS << "\n";
2884 }
2885 }
2886 }
2887 }
2888}
2889
2890template <typename DerivedCCG, typename FuncTy, typename CallTy>
2891void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
2892 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2893 for (const auto Node : nodes<GraphType>(this)) {
2894 checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
2895 for (auto &Edge : Node->CallerEdges)
2896 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
2897 }
2898}
2899
2900template <typename DerivedCCG, typename FuncTy, typename CallTy>
2901struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> {
2902 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2903 using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *;
2904
2905 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
2906 static NodeRef getNode(const NodePtrTy &P) { return P.get(); }
2907
2910 decltype(&getNode)>;
2911
2913 return nodes_iterator(G->NodeOwner.begin(), &getNode);
2914 }
2915
2917 return nodes_iterator(G->NodeOwner.end(), &getNode);
2918 }
2919
2921 return G->NodeOwner.begin()->get();
2922 }
2923
2924 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
2925 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
2927 return P->Callee;
2928 }
2929
2931 mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge<
2932 DerivedCCG, FuncTy, CallTy>>>::const_iterator,
2933 decltype(&GetCallee)>;
2934
2936 return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee);
2937 }
2938
2940 return ChildIteratorType(N->CalleeEdges.end(), &GetCallee);
2941 }
2942};
2943
2944template <typename DerivedCCG, typename FuncTy, typename CallTy>
2945struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>
2946 : public DefaultDOTGraphTraits {
2947 DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
2948
2949 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
2951 using NodeRef = typename GTraits::NodeRef;
2952 using ChildIteratorType = typename GTraits::ChildIteratorType;
2953
2954 static std::string getNodeLabel(NodeRef Node, GraphType G) {
2955 std::string LabelString =
2956 (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") +
2957 Twine(Node->OrigStackOrAllocId))
2958 .str();
2959 LabelString += "\n";
2960 if (Node->hasCall()) {
2961 auto Func = G->NodeToCallingFunc.find(Node);
2962 assert(Func != G->NodeToCallingFunc.end());
2963 LabelString +=
2964 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
2965 } else {
2966 LabelString += "null call";
2967 if (Node->Recursive)
2968 LabelString += " (recursive)";
2969 else
2970 LabelString += " (external)";
2971 }
2972 return LabelString;
2973 }
2974
2975 static std::string getNodeAttributes(NodeRef Node, GraphType) {
2976 std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " +
2977 getContextIds(Node->getContextIds()) + "\"")
2978 .str();
2979 AttributeString +=
2980 (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str();
2981 AttributeString += ",style=\"filled\"";
2982 if (Node->CloneOf) {
2983 AttributeString += ",color=\"blue\"";
2984 AttributeString += ",style=\"filled,bold,dashed\"";
2985 } else
2986 AttributeString += ",style=\"filled\"";
2987 return AttributeString;
2988 }
2989
2990 static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter,
2991 GraphType) {
2992 auto &Edge = *(ChildIter.getCurrent());
2993 return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" +
2994 Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"")
2995 .str();
2996 }
2997
2998 // Since the NodeOwners list includes nodes that are no longer connected to
2999 // the graph, skip them here.
3000 static bool isNodeHidden(NodeRef Node, GraphType) {
3001 return Node->isRemoved();
3002 }
3003
3004private:
3005 static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) {
3006 std::string IdString = "ContextIds:";
3007 if (ContextIds.size() < 100) {
3008 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
3009 std::sort(SortedIds.begin(), SortedIds.end());
3010 for (auto Id : SortedIds)
3011 IdString += (" " + Twine(Id)).str();
3012 } else {
3013 IdString += (" (" + Twine(ContextIds.size()) + " ids)").str();
3014 }
3015 return IdString;
3016 }
3017
3018 static std::string getColor(uint8_t AllocTypes) {
3019 if (AllocTypes == (uint8_t)AllocationType::NotCold)
3020 // Color "brown1" actually looks like a lighter red.
3021 return "brown1";
3022 if (AllocTypes == (uint8_t)AllocationType::Cold)
3023 return "cyan";
3024 if (AllocTypes ==
3026 // Lighter purple.
3027 return "mediumorchid1";
3028 return "gray";
3029 }
3030
3031 static std::string getNodeId(NodeRef Node) {
3032 std::stringstream SStream;
3033 SStream << std::hex << "N0x" << (unsigned long long)Node;
3034 std::string Result = SStream.str();
3035 return Result;
3036 }
3037};
3038
3039template <typename DerivedCCG, typename FuncTy, typename CallTy>
3040void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
3041 std::string Label) const {
3042 WriteGraph(this, "", false, Label,
3043 DotFilePathPrefix + "ccg." + Label + ".dot");
3044}
3045
3046template <typename DerivedCCG, typename FuncTy, typename CallTy>
3047typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
3048CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
3049 const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI,
3050 DenseSet<uint32_t> ContextIdsToMove) {
3051 ContextNode *Node = Edge->Callee;
3052 assert(NodeToCallingFunc.count(Node));
3053 ContextNode *Clone =
3054 createNewNode(Node->IsAllocation, NodeToCallingFunc[Node], Node->Call);
3055 Node->addClone(Clone);
3056 Clone->MatchingCalls = Node->MatchingCalls;
3057 moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true,
3058 ContextIdsToMove);
3059 return Clone;
3060}
3061
3062template <typename DerivedCCG, typename FuncTy, typename CallTy>
3063void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3064 moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
3065 ContextNode *NewCallee, EdgeIter *CallerEdgeI,
3066 bool NewClone,
3067 DenseSet<uint32_t> ContextIdsToMove) {
3068 // NewCallee and Edge's current callee must be clones of the same original
3069 // node (Edge's current callee may be the original node too).
3070 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
3071
3072 ContextNode *OldCallee = Edge->Callee;
3073
3074 // We might already have an edge to the new callee from earlier cloning for a
3075 // different allocation. If one exists we will reuse it.
3076 auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
3077
3078 // Callers will pass an empty ContextIdsToMove set when they want to move the
3079 // edge. Copy in Edge's ids for simplicity.
3080 if (ContextIdsToMove.empty())
3081 ContextIdsToMove = Edge->getContextIds();
3082
3083 // If we are moving all of Edge's ids, then just move the whole Edge.
3084 // Otherwise only move the specified subset, to a new edge if needed.
3085 if (Edge->getContextIds().size() == ContextIdsToMove.size()) {
3086 // First, update the alloc types on New Callee from Edge.
3087 // Do this before we potentially clear Edge's fields below!
3088 NewCallee->AllocTypes |= Edge->AllocTypes;
3089 // Moving the whole Edge.
3090 if (ExistingEdgeToNewCallee) {
3091 // Since we already have an edge to NewCallee, simply move the ids
3092 // onto it, and remove the existing Edge.
3093 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(),
3094 ContextIdsToMove.end());
3095 ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
3096 assert(Edge->ContextIds == ContextIdsToMove);
3097 removeEdgeFromGraph(Edge.get(), CallerEdgeI, /*CalleeIter=*/false);
3098 } else {
3099 // Otherwise just reconnect Edge to NewCallee.
3100 Edge->Callee = NewCallee;
3101 NewCallee->CallerEdges.push_back(Edge);
3102 // Remove it from callee where it was previously connected.
3103 if (CallerEdgeI)
3104 *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
3105 else
3106 OldCallee->eraseCallerEdge(Edge.get());
3107 // Don't need to update Edge's context ids since we are simply
3108 // reconnecting it.
3109 }
3110 } else {
3111 // Only moving a subset of Edge's ids.
3112 if (CallerEdgeI)
3113 ++(*CallerEdgeI);
3114 // Compute the alloc type of the subset of ids being moved.
3115 auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
3116 if (ExistingEdgeToNewCallee) {
3117 // Since we already have an edge to NewCallee, simply move the ids
3118 // onto it.
3119 ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(),
3120 ContextIdsToMove.end());
3121 ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
3122 } else {
3123 // Otherwise, create a new edge to NewCallee for the ids being moved.
3124 auto NewEdge = std::make_shared<ContextEdge>(
3125 NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
3126 Edge->Caller->CalleeEdges.push_back(NewEdge);
3127 NewCallee->CallerEdges.push_back(NewEdge);
3128 }
3129 // In either case, need to update the alloc types on NewCallee, and remove
3130 // those ids and update the alloc type on the original Edge.
3131 NewCallee->AllocTypes |= CallerEdgeAllocType;
3132 set_subtract(Edge->ContextIds, ContextIdsToMove);
3133 Edge->AllocTypes = computeAllocType(Edge->ContextIds);
3134 }
3135 // Now walk the old callee node's callee edges and move Edge's context ids
3136 // over to the corresponding edge into the clone (which is created here if
3137 // this is a newly created clone).
3138 for (auto &OldCalleeEdge : OldCallee->CalleeEdges) {
3139 // The context ids moving to the new callee are the subset of this edge's
3140 // context ids and the context ids on the caller edge being moved.
3141 DenseSet<uint32_t> EdgeContextIdsToMove =
3142 set_intersection(OldCalleeEdge->getContextIds(), ContextIdsToMove);
3143 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
3144 OldCalleeEdge->AllocTypes =
3145 computeAllocType(OldCalleeEdge->getContextIds());
3146 if (!NewClone) {
3147 // Update context ids / alloc type on corresponding edge to NewCallee.
3148 // There is a chance this may not exist if we are reusing an existing
3149 // clone, specifically during function assignment, where we would have
3150 // removed none type edges after creating the clone. If we can't find
3151 // a corresponding edge there, fall through to the cloning below.
3152 if (auto *NewCalleeEdge =
3153 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
3154 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
3155 EdgeContextIdsToMove.end());
3156 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3157 continue;
3158 }
3159 }
3160 auto NewEdge = std::make_shared<ContextEdge>(
3161 OldCalleeEdge->Callee, NewCallee,
3162 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3163 NewCallee->CalleeEdges.push_back(NewEdge);
3164 NewEdge->Callee->CallerEdges.push_back(NewEdge);
3165 }
3166 // Recompute the node alloc type now that its callee edges have been
3167 // updated (since we will compute from those edges).
3168 OldCallee->AllocTypes = OldCallee->computeAllocType();
3169 // OldCallee alloc type should be None iff its context id set is now empty.
3170 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
3171 OldCallee->emptyContextIds());
3172 if (VerifyCCG) {
3173 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false);
3174 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false);
3175 for (const auto &OldCalleeEdge : OldCallee->CalleeEdges)
3176 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
3177 /*CheckEdges=*/false);
3178 for (const auto &NewCalleeEdge : NewCallee->CalleeEdges)
3179 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
3180 /*CheckEdges=*/false);
3181 }
3182}
3183
3184template <typename DerivedCCG, typename FuncTy, typename CallTy>
3185void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3186 moveCalleeEdgeToNewCaller(EdgeIter &CalleeEdgeI, ContextNode *NewCaller) {
3187 auto Edge = *CalleeEdgeI;
3188
3189 ContextNode *OldCaller = Edge->Caller;
3190
3191 // We might already have an edge to the new caller. If one exists we will
3192 // reuse it.
3193 auto ExistingEdgeToNewCaller = NewCaller->findEdgeFromCallee(Edge->Callee);
3194
3195 CalleeEdgeI = OldCaller->CalleeEdges.erase(CalleeEdgeI);
3196 if (ExistingEdgeToNewCaller) {
3197 // Since we already have an edge to NewCaller, simply move the ids
3198 // onto it, and remove the existing Edge.
3199 ExistingEdgeToNewCaller->getContextIds().insert(
3200 Edge->getContextIds().begin(), Edge->getContextIds().end());
3201 ExistingEdgeToNewCaller->AllocTypes |= Edge->AllocTypes;
3202 Edge->ContextIds.clear();
3203 Edge->AllocTypes = (uint8_t)AllocationType::None;
3204 Edge->Callee->eraseCallerEdge(Edge.get());
3205 } else {
3206 // Otherwise just reconnect Edge to NewCaller.
3207 Edge->Caller = NewCaller;
3208 NewCaller->CalleeEdges.push_back(Edge);
3209 // Don't need to update Edge's context ids since we are simply
3210 // reconnecting it.
3211 }
3212 // In either case, need to update the alloc types on New Caller.
3213 NewCaller->AllocTypes |= Edge->AllocTypes;
3214
3215 // Now walk the old caller node's caller edges and move Edge's context ids
3216 // over to the corresponding edge into the node (which is created here if
3217 // this is a newly created node). We can tell whether this is a newly created
3218 // node by seeing if it has any caller edges yet.
3219#ifndef NDEBUG
3220 bool IsNewNode = NewCaller->CallerEdges.empty();
3221#endif
3222 for (auto &OldCallerEdge : OldCaller->CallerEdges) {
3223 // The context ids moving to the new caller are the subset of this edge's
3224 // context ids and the context ids on the callee edge being moved.
3225 DenseSet<uint32_t> EdgeContextIdsToMove =
3226 set_intersection(OldCallerEdge->getContextIds(), Edge->getContextIds());
3227 set_subtract(OldCallerEdge->getContextIds(), EdgeContextIdsToMove);
3228 OldCallerEdge->AllocTypes =
3229 computeAllocType(OldCallerEdge->getContextIds());
3230 // In this function we expect that any pre-existing node already has edges
3231 // from the same callers as the old node. That should be true in the current
3232 // use case, where we will remove None-type edges after copying over all
3233 // caller edges from the callee.
3234 auto *ExistingCallerEdge =
3235 NewCaller->findEdgeFromCaller(OldCallerEdge->Caller);
3236 assert(IsNewNode || ExistingCallerEdge);
3237 if (ExistingCallerEdge) {
3238 ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
3239 EdgeContextIdsToMove.end());
3240 ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
3241 continue;
3242 }
3243 auto NewEdge = std::make_shared<ContextEdge>(
3244 NewCaller, OldCallerEdge->Caller,
3245 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
3246 NewCaller->CallerEdges.push_back(NewEdge);
3247 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
3248 }
3249 // Recompute the node alloc type now that its caller edges have been
3250 // updated (since we will compute from those edges).
3251 OldCaller->AllocTypes = OldCaller->computeAllocType();
3252 // OldCaller alloc type should be None iff its context id set is now empty.
3253 assert((OldCaller->AllocTypes == (uint8_t)AllocationType::None) ==
3254 OldCaller->emptyContextIds());
3255 if (VerifyCCG) {
3256 checkNode<DerivedCCG, FuncTy, CallTy>(OldCaller, /*CheckEdges=*/false);
3257 checkNode<DerivedCCG, FuncTy, CallTy>(NewCaller, /*CheckEdges=*/false);
3258 for (const auto &OldCallerEdge : OldCaller->CallerEdges)
3259 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallerEdge->Caller,
3260 /*CheckEdges=*/false);
3261 for (const auto &NewCallerEdge : NewCaller->CallerEdges)
3262 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallerEdge->Caller,
3263 /*CheckEdges=*/false);
3264 }
3265}
3266
3267template <typename DerivedCCG, typename FuncTy, typename CallTy>
3268void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
3269 recursivelyRemoveNoneTypeCalleeEdges(
3270 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
3271 auto Inserted = Visited.insert(Node);
3272 if (!Inserted.second)
3273 return;
3274
3275 removeNoneTypeCalleeEdges(Node);
3276
3277 for (auto *Clone : Node->Clones)
3278 recursivelyRemoveNoneTypeCalleeEdges(Clone, Visited);
3279
3280 // The recursive call may remove some of this Node's caller edges.
3281 // Iterate over a copy and skip any that were removed.
3282 auto CallerEdges = Node->CallerEdges;
3283 for (auto &Edge : CallerEdges) {
3284 // Skip any that have been removed by an earlier recursive call.
3285 if (Edge->isRemoved()) {
3286 assert(!is_contained(Node->CallerEdges, Edge));
3287 continue;
3288 }
3289 recursivelyRemoveNoneTypeCalleeEdges(Edge->Caller, Visited);
3290 }
3291}
3292
3293template <typename DerivedCCG, typename FuncTy, typename CallTy>
3294void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
3296 for (auto &Entry : AllocationCallToContextNodeMap) {
3297 Visited.clear();
3298 identifyClones(Entry.second, Visited, Entry.second->getContextIds());
3299 }
3300 Visited.clear();
3301 for (auto &Entry : AllocationCallToContextNodeMap)
3302 recursivelyRemoveNoneTypeCalleeEdges(Entry.second, Visited);
3303 if (VerifyCCG)
3304 check();
3305}
3306
3307// helper function to check an AllocType is cold or notcold or both.
3309 return (AllocType == (uint8_t)AllocationType::Cold) ||
3311 (AllocType ==
3313}
3314
3315template <typename DerivedCCG, typename FuncTy, typename CallTy>
3316void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
3317 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
3318 const DenseSet<uint32_t> &AllocContextIds) {
3319 if (VerifyNodes)
3320 checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
3321 assert(!Node->CloneOf);
3322
3323 // If Node as a null call, then either it wasn't found in the module (regular
3324 // LTO) or summary index (ThinLTO), or there were other conditions blocking
3325 // cloning (e.g. recursion, calls multiple targets, etc).
3326 // Do this here so that we don't try to recursively clone callers below, which
3327 // isn't useful at least for this node.
3328 if (!Node->hasCall())
3329 return;
3330
3331#ifndef NDEBUG
3332 auto Insert =
3333#endif
3334 Visited.insert(Node);
3335 // We should not have visited this node yet.
3336 assert(Insert.second);
3337 // The recursive call to identifyClones may delete the current edge from the
3338 // CallerEdges vector. Make a copy and iterate on that, simpler than passing
3339 // in an iterator and having recursive call erase from it. Other edges may
3340 // also get removed during the recursion, which will have null Callee and
3341 // Caller pointers (and are deleted later), so we skip those below.
3342 {
3343 auto CallerEdges = Node->CallerEdges;
3344 for (auto &Edge : CallerEdges) {
3345 // Skip any that have been removed by an earlier recursive call.
3346 if (Edge->isRemoved()) {
3347 assert(!is_contained(Node->CallerEdges, Edge));
3348 continue;
3349 }
3350 // Ignore any caller we previously visited via another edge.
3351 if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) {
3352 identifyClones(Edge->Caller, Visited, AllocContextIds);
3353 }
3354 }
3355 }
3356
3357 // Check if we reached an unambiguous call or have have only a single caller.
3358 if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
3359 return;
3360
3361 // We need to clone.
3362
3363 // Try to keep the original version as alloc type NotCold. This will make
3364 // cases with indirect calls or any other situation with an unknown call to
3365 // the original function get the default behavior. We do this by sorting the
3366 // CallerEdges of the Node we will clone by alloc type.
3367 //
3368 // Give NotCold edge the lowest sort priority so those edges are at the end of
3369 // the caller edges vector, and stay on the original version (since the below
3370 // code clones greedily until it finds all remaining edges have the same type
3371 // and leaves the remaining ones on the original Node).
3372 //
3373 // We shouldn't actually have any None type edges, so the sorting priority for
3374 // that is arbitrary, and we assert in that case below.
3375 const unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4,
3376 /*Cold*/ 1,
3377 /*NotColdCold*/ 2};
3378 std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(),
3379 [&](const std::shared_ptr<ContextEdge> &A,
3380 const std::shared_ptr<ContextEdge> &B) {
3381 // Nodes with non-empty context ids should be sorted before
3382 // those with empty context ids.
3383 if (A->ContextIds.empty())
3384 // Either B ContextIds are non-empty (in which case we
3385 // should return false because B < A), or B ContextIds
3386 // are empty, in which case they are equal, and we should
3387 // maintain the original relative ordering.
3388 return false;
3389 if (B->ContextIds.empty())
3390 return true;
3391
3392 if (A->AllocTypes == B->AllocTypes)
3393 // Use the first context id for each edge as a
3394 // tie-breaker.
3395 return *A->ContextIds.begin() < *B->ContextIds.begin();
3396 return AllocTypeCloningPriority[A->AllocTypes] <
3397 AllocTypeCloningPriority[B->AllocTypes];
3398 });
3399
3400 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
3401
3402 DenseSet<uint32_t> RecursiveContextIds;
3403 // If we are allowing recursive callsites, but have also disabled recursive
3404 // contexts, look for context ids that show up in multiple caller edges.
3406 DenseSet<uint32_t> AllCallerContextIds;
3407 for (auto &CE : Node->CallerEdges) {
3408 // Resize to the largest set of caller context ids, since we know the
3409 // final set will be at least that large.
3410 AllCallerContextIds.reserve(CE->getContextIds().size());
3411 for (auto Id : CE->getContextIds())
3412 if (!AllCallerContextIds.insert(Id).second)
3413 RecursiveContextIds.insert(Id);
3414 }
3415 }
3416
3417 // Iterate until we find no more opportunities for disambiguating the alloc
3418 // types via cloning. In most cases this loop will terminate once the Node
3419 // has a single allocation type, in which case no more cloning is needed.
3420 // We need to be able to remove Edge from CallerEdges, so need to adjust
3421 // iterator inside the loop.
3422 for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) {
3423 auto CallerEdge = *EI;
3424
3425 // See if cloning the prior caller edge left this node with a single alloc
3426 // type or a single caller. In that case no more cloning of Node is needed.
3427 if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
3428 break;
3429
3430 // If the caller was not successfully matched to a call in the IR/summary,
3431 // there is no point in trying to clone for it as we can't update that call.
3432 if (!CallerEdge->Caller->hasCall()) {
3433 ++EI;
3434 continue;
3435 }
3436
3437 // Only need to process the ids along this edge pertaining to the given
3438 // allocation.
3439 auto CallerEdgeContextsForAlloc =
3440 set_intersection(CallerEdge->getContextIds(), AllocContextIds);
3441 if (!RecursiveContextIds.empty())
3442 CallerEdgeContextsForAlloc =
3443 set_difference(CallerEdgeContextsForAlloc, RecursiveContextIds);
3444 if (CallerEdgeContextsForAlloc.empty()) {
3445 ++EI;
3446 continue;
3447 }
3448 auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
3449
3450 // Compute the node callee edge alloc types corresponding to the context ids
3451 // for this caller edge.
3452 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
3453 CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size());
3454 for (auto &CalleeEdge : Node->CalleeEdges)
3455 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
3456 CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
3457
3458 // Don't clone if doing so will not disambiguate any alloc types amongst
3459 // caller edges (including the callee edges that would be cloned).
3460 // Otherwise we will simply move all edges to the clone.
3461 //
3462 // First check if by cloning we will disambiguate the caller allocation
3463 // type from node's allocation type. Query allocTypeToUse so that we don't
3464 // bother cloning to distinguish NotCold+Cold from NotCold. Note that
3465 // neither of these should be None type.
3466 //
3467 // Then check if by cloning node at least one of the callee edges will be
3468 // disambiguated by splitting out different context ids.
3469 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
3470 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
3471 if (allocTypeToUse(CallerAllocTypeForAlloc) ==
3472 allocTypeToUse(Node->AllocTypes) &&
3473 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
3474 CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) {
3475 ++EI;
3476 continue;
3477 }
3478
3479 // First see if we can use an existing clone. Check each clone and its
3480 // callee edges for matching alloc types.
3481 ContextNode *Clone = nullptr;
3482 for (auto *CurClone : Node->Clones) {
3483 if (allocTypeToUse(CurClone->AllocTypes) !=
3484 allocTypeToUse(CallerAllocTypeForAlloc))
3485 continue;
3486
3487 bool BothSingleAlloc = hasSingleAllocType(CurClone->AllocTypes) &&
3488 hasSingleAllocType(CallerAllocTypeForAlloc);
3489 // The above check should mean that if both have single alloc types that
3490 // they should be equal.
3491 assert(!BothSingleAlloc ||
3492 CurClone->AllocTypes == CallerAllocTypeForAlloc);
3493
3494 // If either both have a single alloc type (which are the same), or if the
3495 // clone's callee edges have the same alloc types as those for the current
3496 // allocation on Node's callee edges (CalleeEdgeAllocTypesForCallerEdge),
3497 // then we can reuse this clone.
3498 if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
3499 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
3500 Clone = CurClone;
3501 break;
3502 }
3503 }
3504
3505 // The edge iterator is adjusted when we move the CallerEdge to the clone.
3506 if (Clone)
3507 moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI, /*NewClone=*/false,
3508 CallerEdgeContextsForAlloc);
3509 else
3510 Clone =
3511 moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc);
3512
3513 assert(EI == Node->CallerEdges.end() ||
3514 Node->AllocTypes != (uint8_t)AllocationType::None);
3515 // Sanity check that no alloc types on clone or its edges are None.
3516 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
3517 }
3518
3519 // We should still have some context ids on the original Node.
3520 assert(!Node->emptyContextIds());
3521
3522 // Sanity check that no alloc types on node or edges are None.
3523 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
3524
3525 if (VerifyNodes)
3526 checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
3527}
3528
3529void ModuleCallsiteContextGraph::updateAllocationCall(
3531 std::string AllocTypeString = getAllocTypeAttributeString(AllocType);
3532 auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(),
3533 "memprof", AllocTypeString);
3534 cast<CallBase>(Call.call())->addFnAttr(A);
3535 OREGetter(Call.call()->getFunction())
3536 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call())
3537 << ore::NV("AllocationCall", Call.call()) << " in clone "
3538 << ore::NV("Caller", Call.call()->getFunction())
3539 << " marked with memprof allocation attribute "
3540 << ore::NV("Attribute", AllocTypeString));
3541}
3542
3543void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call,
3545 auto *AI = cast<AllocInfo *>(Call.call());
3546 assert(AI);
3547 assert(AI->Versions.size() > Call.cloneNo());
3548 AI->Versions[Call.cloneNo()] = (uint8_t)AllocType;
3549}
3550
3552ModuleCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
3553 const auto *CB = cast<CallBase>(Call.call());
3554 if (!CB->getAttributes().hasFnAttr("memprof"))
3555 return AllocationType::None;
3556 return CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"
3559}
3560
3562IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
3563 const auto *AI = cast<AllocInfo *>(Call.call());
3564 assert(AI->Versions.size() > Call.cloneNo());
3565 return (AllocationType)AI->Versions[Call.cloneNo()];
3566}
3567
3568void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
3569 FuncInfo CalleeFunc) {
3570 if (CalleeFunc.cloneNo() > 0)
3571 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
3572 OREGetter(CallerCall.call()->getFunction())
3573 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
3574 << ore::NV("Call", CallerCall.call()) << " in clone "
3575 << ore::NV("Caller", CallerCall.call()->getFunction())
3576 << " assigned to call function clone "
3577 << ore::NV("Callee", CalleeFunc.func()));
3578}
3579
3580void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
3581 FuncInfo CalleeFunc) {
3582 auto *CI = cast<CallsiteInfo *>(CallerCall.call());
3583 assert(CI &&
3584 "Caller cannot be an allocation which should not have profiled calls");
3585 assert(CI->Clones.size() > CallerCall.cloneNo());
3586 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
3587}
3588
3589CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
3590 Instruction *>::FuncInfo
3591ModuleCallsiteContextGraph::cloneFunctionForCallsite(
3592 FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3593 std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
3594 // Use existing LLVM facilities for cloning and obtaining Call in clone
3595 ValueToValueMapTy VMap;
3596 auto *NewFunc = CloneFunction(Func.func(), VMap);
3597 std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo);
3598 assert(!Func.func()->getParent()->getFunction(Name));
3599 NewFunc->setName(Name);
3600 for (auto &Inst : CallsWithMetadataInFunc) {
3601 // This map always has the initial version in it.
3602 assert(Inst.cloneNo() == 0);
3603 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
3604 }
3605 OREGetter(Func.func())
3606 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func())
3607 << "created clone " << ore::NV("NewFunction", NewFunc));
3608 return {NewFunc, CloneNo};
3609}
3610
3611CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
3612 IndexCall>::FuncInfo
3613IndexCallsiteContextGraph::cloneFunctionForCallsite(
3614 FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
3615 std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
3616 // Check how many clones we have of Call (and therefore function).
3617 // The next clone number is the current size of versions array.
3618 // Confirm this matches the CloneNo provided by the caller, which is based on
3619 // the number of function clones we have.
3620 assert(CloneNo ==
3621 (isa<AllocInfo *>(Call.call())
3622 ? Call.call().dyn_cast<AllocInfo *>()->Versions.size()
3623 : Call.call().dyn_cast<CallsiteInfo *>()->Clones.size()));
3624 // Walk all the instructions in this function. Create a new version for
3625 // each (by adding an entry to the Versions/Clones summary array), and copy
3626 // over the version being called for the function clone being cloned here.
3627 // Additionally, add an entry to the CallMap for the new function clone,
3628 // mapping the original call (clone 0, what is in CallsWithMetadataInFunc)
3629 // to the new call clone.
3630 for (auto &Inst : CallsWithMetadataInFunc) {
3631 // This map always has the initial version in it.
3632 assert(Inst.cloneNo() == 0);
3633 if (auto *AI = dyn_cast<AllocInfo *>(Inst.call())) {
3634 assert(AI->Versions.size() == CloneNo);
3635 // We assign the allocation type later (in updateAllocationCall), just add
3636 // an entry for it here.
3637 AI->Versions.push_back(0);
3638 } else {
3639 auto *CI = cast<CallsiteInfo *>(Inst.call());
3640 assert(CI && CI->Clones.size() == CloneNo);
3641 // We assign the clone number later (in updateCall), just add an entry for
3642 // it here.
3643 CI->Clones.push_back(0);
3644 }
3645 CallMap[Inst] = {Inst.call(), CloneNo};
3646 }
3647 return {Func.func(), CloneNo};
3648}
3649
3650// This method assigns cloned callsites to functions, cloning the functions as
3651// needed. The assignment is greedy and proceeds roughly as follows:
3652//
3653// For each function Func:
3654// For each call with graph Node having clones:
3655// Initialize ClonesWorklist to Node and its clones
3656// Initialize NodeCloneCount to 0
3657// While ClonesWorklist is not empty:
3658// Clone = pop front ClonesWorklist
3659// NodeCloneCount++
3660// If Func has been cloned less than NodeCloneCount times:
3661// If NodeCloneCount is 1:
3662// Assign Clone to original Func
3663// Continue
3664// Create a new function clone
3665// If other callers not assigned to call a function clone yet:
3666// Assign them to call new function clone
3667// Continue
3668// Assign any other caller calling the cloned version to new clone
3669//
3670// For each caller of Clone:
3671// If caller is assigned to call a specific function clone:
3672// If we cannot assign Clone to that function clone:
3673// Create new callsite Clone NewClone
3674// Add NewClone to ClonesWorklist
3675// Continue
3676// Assign Clone to existing caller's called function clone
3677// Else:
3678// If Clone not already assigned to a function clone:
3679// Assign to first function clone without assignment
3680// Assign caller to selected function clone
3681template <typename DerivedCCG, typename FuncTy, typename CallTy>
3682bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3683 bool Changed = false;
3684
3685 // Keep track of the assignment of nodes (callsites) to function clones they
3686 // call.
3687 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
3688
3689 // Update caller node to call function version CalleeFunc, by recording the
3690 // assignment in CallsiteToCalleeFuncCloneMap.
3691 auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller,
3692 const FuncInfo &CalleeFunc) {
3693 assert(Caller->hasCall());
3694 CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc;
3695 };
3696
3697 // Walk all functions for which we saw calls with memprof metadata, and handle
3698 // cloning for each of its calls.
3699 for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
3700 FuncInfo OrigFunc(Func);
3701 // Map from each clone of OrigFunc to a map of remappings of each call of
3702 // interest (from original uncloned call to the corresponding cloned call in
3703 // that function clone).
3704 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
3705 for (auto &Call : CallsWithMetadata) {
3706 ContextNode *Node = getNodeForInst(Call);
3707 // Skip call if we do not have a node for it (all uses of its stack ids
3708 // were either on inlined chains or pruned from the MIBs), or if we did
3709 // not create any clones for it.
3710 if (!Node || Node->Clones.empty())
3711 continue;
3712 assert(Node->hasCall() &&
3713 "Not having a call should have prevented cloning");
3714
3715 // Track the assignment of function clones to clones of the current
3716 // callsite Node being handled.
3717 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
3718
3719 // Assign callsite version CallsiteClone to function version FuncClone,
3720 // and also assign (possibly cloned) Call to CallsiteClone.
3721 auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone,
3722 CallInfo &Call,
3723 ContextNode *CallsiteClone,
3724 bool IsAlloc) {
3725 // Record the clone of callsite node assigned to this function clone.
3726 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
3727
3728 assert(FuncClonesToCallMap.count(FuncClone));
3729 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
3730 CallInfo CallClone(Call);
3731 if (CallMap.count(Call))
3732 CallClone = CallMap[Call];
3733 CallsiteClone->setCall(CallClone);
3734 // Need to do the same for all matching calls.
3735 for (auto &MatchingCall : Node->MatchingCalls) {
3736 CallInfo CallClone(MatchingCall);
3737 if (CallMap.count(MatchingCall))
3738 CallClone = CallMap[MatchingCall];
3739 // Updates the call in the list.
3740 MatchingCall = CallClone;
3741 }
3742 };
3743
3744 // Keep track of the clones of callsite Node that need to be assigned to
3745 // function clones. This list may be expanded in the loop body below if we
3746 // find additional cloning is required.
3747 std::deque<ContextNode *> ClonesWorklist;
3748 // Ignore original Node if we moved all of its contexts to clones.
3749 if (!Node->emptyContextIds())
3750 ClonesWorklist.push_back(Node);
3751 ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(),
3752 Node->Clones.end());
3753
3754 // Now walk through all of the clones of this callsite Node that we need,
3755 // and determine the assignment to a corresponding clone of the current
3756 // function (creating new function clones as needed).
3757 unsigned NodeCloneCount = 0;
3758 while (!ClonesWorklist.empty()) {
3759 ContextNode *Clone = ClonesWorklist.front();
3760 ClonesWorklist.pop_front();
3761 NodeCloneCount++;
3762 if (VerifyNodes)
3763 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
3764
3765 // Need to create a new function clone if we have more callsite clones
3766 // than existing function clones, which would have been assigned to an
3767 // earlier clone in the list (we assign callsite clones to function
3768 // clones greedily).
3769 if (FuncClonesToCallMap.size() < NodeCloneCount) {
3770 // If this is the first callsite copy, assign to original function.
3771 if (NodeCloneCount == 1) {
3772 // Since FuncClonesToCallMap is empty in this case, no clones have
3773 // been created for this function yet, and no callers should have
3774 // been assigned a function clone for this callee node yet.
3776 Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
3777 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3778 }));
3779 // Initialize with empty call map, assign Clone to original function
3780 // and its callers, and skip to the next clone.
3781 FuncClonesToCallMap[OrigFunc] = {};
3782 AssignCallsiteCloneToFuncClone(
3783 OrigFunc, Call, Clone,
3784 AllocationCallToContextNodeMap.count(Call));
3785 for (auto &CE : Clone->CallerEdges) {
3786 // Ignore any caller that does not have a recorded callsite Call.
3787 if (!CE->Caller->hasCall())
3788 continue;
3789 RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc);
3790 }
3791 continue;
3792 }
3793
3794 // First locate which copy of OrigFunc to clone again. If a caller
3795 // of this callsite clone was already assigned to call a particular
3796 // function clone, we need to redirect all of those callers to the
3797 // new function clone, and update their other callees within this
3798 // function.
3799 FuncInfo PreviousAssignedFuncClone;
3800 auto EI = llvm::find_if(
3801 Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
3802 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
3803 });
3804 bool CallerAssignedToCloneOfFunc = false;
3805 if (EI != Clone->CallerEdges.end()) {
3806 const std::shared_ptr<ContextEdge> &Edge = *EI;
3807 PreviousAssignedFuncClone =
3808 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3809 CallerAssignedToCloneOfFunc = true;
3810 }
3811
3812 // Clone function and save it along with the CallInfo map created
3813 // during cloning in the FuncClonesToCallMap.
3814 std::map<CallInfo, CallInfo> NewCallMap;
3815 unsigned CloneNo = FuncClonesToCallMap.size();
3816 assert(CloneNo > 0 && "Clone 0 is the original function, which "
3817 "should already exist in the map");
3818 FuncInfo NewFuncClone = cloneFunctionForCallsite(
3819 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
3820 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
3821 FunctionClonesAnalysis++;
3822 Changed = true;
3823
3824 // If no caller callsites were already assigned to a clone of this
3825 // function, we can simply assign this clone to the new func clone
3826 // and update all callers to it, then skip to the next clone.
3827 if (!CallerAssignedToCloneOfFunc) {
3828 AssignCallsiteCloneToFuncClone(
3829 NewFuncClone, Call, Clone,
3830 AllocationCallToContextNodeMap.count(Call));
3831 for (auto &CE : Clone->CallerEdges) {
3832 // Ignore any caller that does not have a recorded callsite Call.
3833 if (!CE->Caller->hasCall())
3834 continue;
3835 RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
3836 }
3837 continue;
3838 }
3839
3840 // We may need to do additional node cloning in this case.
3841 // Reset the CallsiteToCalleeFuncCloneMap entry for any callers
3842 // that were previously assigned to call PreviousAssignedFuncClone,
3843 // to record that they now call NewFuncClone.
3844 // The none type edge removal may remove some of this Clone's caller
3845 // edges, if it is reached via another of its caller's callees.
3846 // Iterate over a copy and skip any that were removed.
3847 auto CallerEdges = Clone->CallerEdges;
3848 for (auto CE : CallerEdges) {
3849 // Skip any that have been removed on an earlier iteration.
3850 if (CE->isRemoved()) {
3851 assert(!is_contained(Clone->CallerEdges, CE));
3852 continue;
3853 }
3854 assert(CE);
3855 // Ignore any caller that does not have a recorded callsite Call.
3856 if (!CE->Caller->hasCall())
3857 continue;
3858
3859 if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) ||
3860 // We subsequently fall through to later handling that
3861 // will perform any additional cloning required for
3862 // callers that were calling other function clones.
3863 CallsiteToCalleeFuncCloneMap[CE->Caller] !=
3864 PreviousAssignedFuncClone)
3865 continue;
3866
3867 RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
3868
3869 // If we are cloning a function that was already assigned to some
3870 // callers, then essentially we are creating new callsite clones
3871 // of the other callsites in that function that are reached by those
3872 // callers. Clone the other callees of the current callsite's caller
3873 // that were already assigned to PreviousAssignedFuncClone
3874 // accordingly. This is important since we subsequently update the
3875 // calls from the nodes in the graph and their assignments to callee
3876 // functions recorded in CallsiteToCalleeFuncCloneMap.
3877 // The none type edge removal may remove some of this caller's
3878 // callee edges, if it is reached via another of its callees.
3879 // Iterate over a copy and skip any that were removed.
3880 auto CalleeEdges = CE->Caller->CalleeEdges;
3881 for (auto CalleeEdge : CalleeEdges) {
3882 // Skip any that have been removed on an earlier iteration when
3883 // cleaning up newly None type callee edges.
3884 if (CalleeEdge->isRemoved()) {
3885 assert(!is_contained(CE->Caller->CalleeEdges, CalleeEdge));
3886 continue;
3887 }
3888 assert(CalleeEdge);
3889 ContextNode *Callee = CalleeEdge->Callee;
3890 // Skip the current callsite, we are looking for other
3891 // callsites Caller calls, as well as any that does not have a
3892 // recorded callsite Call.
3893 if (Callee == Clone || !Callee->hasCall())
3894 continue;
3895 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
3896 removeNoneTypeCalleeEdges(NewClone);
3897 // Moving the edge may have resulted in some none type
3898 // callee edges on the original Callee.
3899 removeNoneTypeCalleeEdges(Callee);
3900 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
3901 // If the Callee node was already assigned to call a specific
3902 // function version, make sure its new clone is assigned to call
3903 // that same function clone.
3904 if (CallsiteToCalleeFuncCloneMap.count(Callee))
3905 RecordCalleeFuncOfCallsite(
3906 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
3907 // Update NewClone with the new Call clone of this callsite's Call
3908 // created for the new function clone created earlier.
3909 // Recall that we have already ensured when building the graph
3910 // that each caller can only call callsites within the same
3911 // function, so we are guaranteed that Callee Call is in the
3912 // current OrigFunc.
3913 // CallMap is set up as indexed by original Call at clone 0.
3914 CallInfo OrigCall(Callee->getOrigNode()->Call);
3915 OrigCall.setCloneNo(0);
3916 std::map<CallInfo, CallInfo> &CallMap =
3917 FuncClonesToCallMap[NewFuncClone];
3918 assert(CallMap.count(OrigCall));
3919 CallInfo NewCall(CallMap[OrigCall]);
3920 assert(NewCall);
3921 NewClone->setCall(NewCall);
3922 // Need to do the same for all matching calls.
3923 for (auto &MatchingCall : NewClone->MatchingCalls) {
3924 CallInfo OrigMatchingCall(MatchingCall);
3925 OrigMatchingCall.setCloneNo(0);
3926 assert(CallMap.count(OrigMatchingCall));
3927 CallInfo NewCall(CallMap[OrigMatchingCall]);
3928 assert(NewCall);
3929 // Updates the call in the list.
3930 MatchingCall = NewCall;
3931 }
3932 }
3933 }
3934 // Fall through to handling below to perform the recording of the
3935 // function for this callsite clone. This enables handling of cases
3936 // where the callers were assigned to different clones of a function.
3937 }
3938
3939 // See if we can use existing function clone. Walk through
3940 // all caller edges to see if any have already been assigned to
3941 // a clone of this callsite's function. If we can use it, do so. If not,
3942 // because that function clone is already assigned to a different clone
3943 // of this callsite, then we need to clone again.
3944 // Basically, this checking is needed to handle the case where different
3945 // caller functions/callsites may need versions of this function
3946 // containing different mixes of callsite clones across the different
3947 // callsites within the function. If that happens, we need to create
3948 // additional function clones to handle the various combinations.
3949 //
3950 // Keep track of any new clones of this callsite created by the
3951 // following loop, as well as any existing clone that we decided to
3952 // assign this clone to.
3953 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
3954 FuncInfo FuncCloneAssignedToCurCallsiteClone;
3955 // We need to be able to remove Edge from CallerEdges, so need to adjust
3956 // iterator in the loop.
3957 for (auto EI = Clone->CallerEdges.begin();
3958 EI != Clone->CallerEdges.end();) {
3959 auto Edge = *EI;
3960 // Ignore any caller that does not have a recorded callsite Call.
3961 if (!Edge->Caller->hasCall()) {
3962 EI++;
3963 continue;
3964 }
3965 // If this caller already assigned to call a version of OrigFunc, need
3966 // to ensure we can assign this callsite clone to that function clone.
3967 if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) {
3968 FuncInfo FuncCloneCalledByCaller =
3969 CallsiteToCalleeFuncCloneMap[Edge->Caller];
3970 // First we need to confirm that this function clone is available
3971 // for use by this callsite node clone.
3972 //
3973 // While FuncCloneToCurNodeCloneMap is built only for this Node and
3974 // its callsite clones, one of those callsite clones X could have
3975 // been assigned to the same function clone called by Edge's caller
3976 // - if Edge's caller calls another callsite within Node's original
3977 // function, and that callsite has another caller reaching clone X.
3978 // We need to clone Node again in this case.
3979 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
3980 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
3981 Clone) ||
3982 // Detect when we have multiple callers of this callsite that
3983 // have already been assigned to specific, and different, clones
3984 // of OrigFunc (due to other unrelated callsites in Func they
3985 // reach via call contexts). Is this Clone of callsite Node
3986 // assigned to a different clone of OrigFunc? If so, clone Node
3987 // again.
3988 (FuncCloneAssignedToCurCallsiteClone &&
3989 FuncCloneAssignedToCurCallsiteClone !=
3990 FuncCloneCalledByCaller)) {
3991 // We need to use a different newly created callsite clone, in
3992 // order to assign it to another new function clone on a
3993 // subsequent iteration over the Clones array (adjusted below).
3994 // Note we specifically do not reset the
3995 // CallsiteToCalleeFuncCloneMap entry for this caller, so that
3996 // when this new clone is processed later we know which version of
3997 // the function to copy (so that other callsite clones we have
3998 // assigned to that function clone are properly cloned over). See
3999 // comments in the function cloning handling earlier.
4000
4001 // Check if we already have cloned this callsite again while
4002 // walking through caller edges, for a caller calling the same
4003 // function clone. If so, we can move this edge to that new clone
4004 // rather than creating yet another new clone.
4005 if (FuncCloneToNewCallsiteCloneMap.count(
4006 FuncCloneCalledByCaller)) {
4007 ContextNode *NewClone =
4008 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
4009 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
4010 // Cleanup any none type edges cloned over.
4011 removeNoneTypeCalleeEdges(NewClone);
4012 } else {
4013 // Create a new callsite clone.
4014 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
4015 removeNoneTypeCalleeEdges(NewClone);
4016 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
4017 NewClone;
4018 // Add to list of clones and process later.
4019 ClonesWorklist.push_back(NewClone);
4020 assert(EI == Clone->CallerEdges.end() ||
4021 Clone->AllocTypes != (uint8_t)AllocationType::None);
4022 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
4023 }
4024 // Moving the caller edge may have resulted in some none type
4025 // callee edges.
4026 removeNoneTypeCalleeEdges(Clone);
4027 // We will handle the newly created callsite clone in a subsequent
4028 // iteration over this Node's Clones. Continue here since we
4029 // already adjusted iterator EI while moving the edge.
4030 continue;
4031 }
4032
4033 // Otherwise, we can use the function clone already assigned to this
4034 // caller.
4035 if (!FuncCloneAssignedToCurCallsiteClone) {
4036 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
4037 // Assign Clone to FuncCloneCalledByCaller
4038 AssignCallsiteCloneToFuncClone(
4039 FuncCloneCalledByCaller, Call, Clone,
4040 AllocationCallToContextNodeMap.count(Call));
4041 } else
4042 // Don't need to do anything - callsite is already calling this
4043 // function clone.
4044 assert(FuncCloneAssignedToCurCallsiteClone ==
4045 FuncCloneCalledByCaller);
4046
4047 } else {
4048 // We have not already assigned this caller to a version of
4049 // OrigFunc. Do the assignment now.
4050
4051 // First check if we have already assigned this callsite clone to a
4052 // clone of OrigFunc for another caller during this iteration over
4053 // its caller edges.
4054 if (!FuncCloneAssignedToCurCallsiteClone) {
4055 // Find first function in FuncClonesToCallMap without an assigned
4056 // clone of this callsite Node. We should always have one
4057 // available at this point due to the earlier cloning when the
4058 // FuncClonesToCallMap size was smaller than the clone number.
4059 for (auto &CF : FuncClonesToCallMap) {
4060 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
4061 FuncCloneAssignedToCurCallsiteClone = CF.first;
4062 break;
4063 }
4064 }
4065 assert(FuncCloneAssignedToCurCallsiteClone);
4066 // Assign Clone to FuncCloneAssignedToCurCallsiteClone
4067 AssignCallsiteCloneToFuncClone(
4068 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
4069 AllocationCallToContextNodeMap.count(Call));
4070 } else
4071 assert(FuncCloneToCurNodeCloneMap
4072 [FuncCloneAssignedToCurCallsiteClone] == Clone);
4073 // Update callers to record function version called.
4074 RecordCalleeFuncOfCallsite(Edge->Caller,
4075 FuncCloneAssignedToCurCallsiteClone);
4076 }
4077
4078 EI++;
4079 }
4080 }
4081 if (VerifyCCG) {
4082 checkNode<DerivedCCG, FuncTy, CallTy>(Node);
4083 for (const auto &PE : Node->CalleeEdges)
4084 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4085 for (const auto &CE : Node->CallerEdges)
4086 checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
4087 for (auto *Clone : Node->Clones) {
4088 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
4089 for (const auto &PE : Clone->CalleeEdges)
4090 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
4091 for (const auto &CE : Clone->CallerEdges)
4092 checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
4093 }
4094 }
4095 }
4096 }
4097
4098 uint8_t BothTypes =
4100
4101 auto UpdateCalls = [&](ContextNode *Node,
4103 auto &&UpdateCalls) {
4104 auto Inserted = Visited.insert(Node);
4105 if (!Inserted.second)
4106 return;
4107
4108 for (auto *Clone : Node->Clones)
4109 UpdateCalls(Clone, Visited, UpdateCalls);
4110
4111 for (auto &Edge : Node->CallerEdges)
4112 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
4113
4114 // Skip if either no call to update, or if we ended up with no context ids
4115 // (we moved all edges onto other clones).
4116 if (!Node->hasCall() || Node->emptyContextIds())
4117 return;
4118
4119 if (Node->IsAllocation) {
4120 auto AT = allocTypeToUse(Node->AllocTypes);
4121 // If the allocation type is ambiguous, and more aggressive hinting
4122 // has been enabled via the MinClonedColdBytePercent flag, see if this
4123 // allocation should be hinted cold anyway because its fraction cold bytes
4124 // allocated is at least the given threshold.
4125 if (Node->AllocTypes == BothTypes && MinClonedColdBytePercent < 100 &&
4126 !ContextIdToContextSizeInfos.empty()) {
4127 uint64_t TotalCold = 0;
4128 uint64_t Total = 0;
4129 for (auto Id : Node->getContextIds()) {
4130 auto TypeI = ContextIdToAllocationType.find(Id);
4131 assert(TypeI != ContextIdToAllocationType.end());
4132 auto CSI = ContextIdToContextSizeInfos.find(Id);
4133 if (CSI != ContextIdToContextSizeInfos.end()) {
4134 for (auto &Info : CSI->second) {
4135 Total += Info.TotalSize;
4136 if (TypeI->second == AllocationType::Cold)
4137 TotalCold += Info.TotalSize;
4138 }
4139 }
4140 }
4141 if (TotalCold * 100 >= Total * MinClonedColdBytePercent)
4143 }
4144 updateAllocationCall(Node->Call, AT);
4145 assert(Node->MatchingCalls.empty());
4146 return;
4147 }
4148
4149 if (!CallsiteToCalleeFuncCloneMap.count(Node))
4150 return;
4151
4152 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
4153 updateCall(Node->Call, CalleeFunc);
4154 // Update all the matching calls as well.
4155 for (auto &Call : Node->MatchingCalls)
4156 updateCall(Call, CalleeFunc);
4157 };
4158
4159 // Performs DFS traversal starting from allocation nodes to update calls to
4160 // reflect cloning decisions recorded earlier. For regular LTO this will
4161 // update the actual calls in the IR to call the appropriate function clone
4162 // (and add attributes to allocation calls), whereas for ThinLTO the decisions
4163 // are recorded in the summary entries.
4165 for (auto &Entry : AllocationCallToContextNodeMap)
4166 UpdateCalls(Entry.second, Visited, UpdateCalls);
4167
4168 return Changed;
4169}
4170
4172 Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
4174 &FuncToAliasMap) {
4175 // The first "clone" is the original copy, we should only call this if we
4176 // needed to create new clones.
4177 assert(NumClones > 1);
4179 VMaps.reserve(NumClones - 1);
4180 FunctionsClonedThinBackend++;
4181 for (unsigned I = 1; I < NumClones; I++) {
4182 VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
4183 auto *NewF = CloneFunction(&F, *VMaps.back());
4184 FunctionClonesThinBackend++;
4185 // Strip memprof and callsite metadata from clone as they are no longer
4186 // needed.
4187 for (auto &BB : *NewF) {
4188 for (auto &Inst : BB) {
4189 Inst.setMetadata(LLVMContext::MD_memprof, nullptr);
4190 Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
4191 }
4192 }
4193 std::string Name = getMemProfFuncName(F.getName(), I);
4194 auto *PrevF = M.getFunction(Name);
4195 if (PrevF) {
4196 // We might have created this when adjusting callsite in another
4197 // function. It should be a declaration.
4198 assert(PrevF->isDeclaration());
4199 NewF->takeName(PrevF);
4200 PrevF->replaceAllUsesWith(NewF);
4201 PrevF->eraseFromParent();
4202 } else
4203 NewF->setName(Name);
4204 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
4205 << "created clone " << ore::NV("NewFunction", NewF));
4206
4207 // Now handle aliases to this function, and clone those as well.
4208 if (!FuncToAliasMap.count(&F))
4209 continue;
4210 for (auto *A : FuncToAliasMap[&F]) {
4211 std::string Name = getMemProfFuncName(A->getName(), I);
4212 auto *PrevA = M.getNamedAlias(Name);
4213 auto *NewA = GlobalAlias::create(A->getValueType(),
4214 A->getType()->getPointerAddressSpace(),
4215 A->getLinkage(), Name, NewF);
4216 NewA->copyAttributesFrom(A);
4217 if (PrevA) {
4218 // We might have created this when adjusting callsite in another
4219 // function. It should be a declaration.
4220 assert(PrevA->isDeclaration());
4221 NewA->takeName(PrevA);
4222 PrevA->replaceAllUsesWith(NewA);
4223 PrevA->eraseFromParent();
4224 }
4225 }
4226 }
4227 return VMaps;
4228}
4229
4230// Locate the summary for F. This is complicated by the fact that it might
4231// have been internalized or promoted.
4233 const ModuleSummaryIndex *ImportSummary) {
4234 // FIXME: Ideally we would retain the original GUID in some fashion on the
4235 // function (e.g. as metadata), but for now do our best to locate the
4236 // summary without that information.
4237 ValueInfo TheFnVI = ImportSummary->getValueInfo(F.getGUID());
4238 if (!TheFnVI)
4239 // See if theFn was internalized, by checking index directly with
4240 // original name (this avoids the name adjustment done by getGUID() for
4241 // internal symbols).
4242 TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(F.getName()));
4243 if (TheFnVI)
4244 return TheFnVI;
4245 // Now query with the original name before any promotion was performed.
4246 StringRef OrigName =
4248 std::string OrigId = GlobalValue::getGlobalIdentifier(
4249 OrigName, GlobalValue::InternalLinkage, M.getSourceFileName());
4250 TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId));
4251 if (TheFnVI)
4252 return TheFnVI;
4253 // Could be a promoted local imported from another module. We need to pass
4254 // down more info here to find the original module id. For now, try with
4255 // the OrigName which might have been stored in the OidGuidMap in the
4256 // index. This would not work if there were same-named locals in multiple
4257 // modules, however.
4258 auto OrigGUID =
4259 ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName));
4260 if (OrigGUID)
4261 TheFnVI = ImportSummary->getValueInfo(OrigGUID);
4262 return TheFnVI;
4263}
4264
4265bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
4266 Module &M) {
4267 ICallAnalysis = std::make_unique<ICallPromotionAnalysis>();
4268 Symtab = std::make_unique<InstrProfSymtab>();
4269 // Don't add canonical names, to avoid multiple functions to the symtab
4270 // when they both have the same root name with "." suffixes stripped.
4271 // If we pick the wrong one then this could lead to incorrect ICP and calling
4272 // a memprof clone that we don't actually create (resulting in linker unsats).
4273 // What this means is that the GUID of the function (or its PGOFuncName
4274 // metadata) *must* match that in the VP metadata to allow promotion.
4275 // In practice this should not be a limitation, since local functions should
4276 // have PGOFuncName metadata and global function names shouldn't need any
4277 // special handling (they should not get the ".llvm.*" suffix that the
4278 // canonicalization handling is attempting to strip).
4279 if (Error E = Symtab->create(M, /*InLTO=*/true, /*AddCanonical=*/false)) {
4280 std::string SymtabFailure = toString(std::move(E));
4281 M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
4282 return false;
4283 }
4284 return true;
4285}
4286
4287bool MemProfContextDisambiguation::applyImport(Module &M) {
4288 assert(ImportSummary);
4289 bool Changed = false;
4290
4291 // We also need to clone any aliases that reference cloned functions, because
4292 // the modified callsites may invoke via the alias. Keep track of the aliases
4293 // for each function.
4294 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
4295 FuncToAliasMap;
4296 for (auto &A : M.aliases()) {
4297 auto *Aliasee = A.getAliaseeObject();
4298 if (auto *F = dyn_cast<Function>(Aliasee))
4299 FuncToAliasMap[F].insert(&A);
4300 }
4301
4302 if (!initializeIndirectCallPromotionInfo(M))
4303 return false;
4304
4305 for (auto &F : M) {
4306 if (F.isDeclaration() || isMemProfClone(F))
4307 continue;
4308
4310
4312 bool ClonesCreated = false;
4313 unsigned NumClonesCreated = 0;
4314 auto CloneFuncIfNeeded = [&](unsigned NumClones) {
4315 // We should at least have version 0 which is the original copy.
4316 assert(NumClones > 0);
4317 // If only one copy needed use original.
4318 if (NumClones == 1)
4319 return;
4320 // If we already performed cloning of this function, confirm that the
4321 // requested number of clones matches (the thin link should ensure the
4322 // number of clones for each constituent callsite is consistent within
4323 // each function), before returning.
4324 if (ClonesCreated) {
4325 assert(NumClonesCreated == NumClones);
4326 return;
4327 }
4328 VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
4329 // The first "clone" is the original copy, which doesn't have a VMap.
4330 assert(VMaps.size() == NumClones - 1);
4331 Changed = true;
4332 ClonesCreated = true;
4333 NumClonesCreated = NumClones;
4334 };
4335
4336 auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
4337 Function *CalledFunction) {
4338 // Perform cloning if not yet done.
4339 CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
4340
4341 assert(!isMemProfClone(*CalledFunction));
4342
4343 // Update the calls per the summary info.
4344 // Save orig name since it gets updated in the first iteration
4345 // below.
4346 auto CalleeOrigName = CalledFunction->getName();
4347 for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
4348 // Do nothing if this version calls the original version of its
4349 // callee.
4350 if (!StackNode.Clones[J])
4351 continue;
4352 auto NewF = M.getOrInsertFunction(
4353 getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]),
4354 CalledFunction->getFunctionType());
4355 CallBase *CBClone;
4356 // Copy 0 is the original function.
4357 if (!J)
4358 CBClone = CB;
4359 else
4360 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4361 CBClone->setCalledFunction(NewF);
4362 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
4363 << ore::NV("Call", CBClone) << " in clone "
4364 << ore::NV("Caller", CBClone->getFunction())
4365 << " assigned to call function clone "
4366 << ore::NV("Callee", NewF.getCallee()));
4367 }
4368 };
4369
4370 // Locate the summary for F.
4371 ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary);
4372 // If not found, this could be an imported local (see comment in
4373 // findValueInfoForFunc). Skip for now as it will be cloned in its original
4374 // module (where it would have been promoted to global scope so should
4375 // satisfy any reference in this module).
4376 if (!TheFnVI)
4377 continue;
4378
4379 auto *GVSummary =
4380 ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier());
4381 if (!GVSummary) {
4382 // Must have been imported, use the summary which matches the definition。
4383 // (might be multiple if this was a linkonce_odr).
4384 auto SrcModuleMD = F.getMetadata("thinlto_src_module");
4385 assert(SrcModuleMD &&
4386 "enable-import-metadata is needed to emit thinlto_src_module");
4387 StringRef SrcModule =
4388 dyn_cast<MDString>(SrcModuleMD->getOperand(0))->getString();
4389 for (auto &GVS : TheFnVI.getSummaryList()) {
4390 if (GVS->modulePath() == SrcModule) {
4391 GVSummary = GVS.get();
4392 break;
4393 }
4394 }
4395 assert(GVSummary && GVSummary->modulePath() == SrcModule);
4396 }
4397
4398 // If this was an imported alias skip it as we won't have the function
4399 // summary, and it should be cloned in the original module.
4400 if (isa<AliasSummary>(GVSummary))
4401 continue;
4402
4403 auto *FS = cast<FunctionSummary>(GVSummary->getBaseObject());
4404
4405 if (FS->allocs().empty() && FS->callsites().empty())
4406 continue;
4407
4408 auto SI = FS->callsites().begin();
4409 auto AI = FS->allocs().begin();
4410
4411 // To handle callsite infos synthesized for tail calls which have missing
4412 // frames in the profiled context, map callee VI to the synthesized callsite
4413 // info.
4414 DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
4415 // Iterate the callsites for this function in reverse, since we place all
4416 // those synthesized for tail calls at the end.
4417 for (auto CallsiteIt = FS->callsites().rbegin();
4418 CallsiteIt != FS->callsites().rend(); CallsiteIt++) {
4419 auto &Callsite = *CallsiteIt;
4420 // Stop as soon as we see a non-synthesized callsite info (see comment
4421 // above loop). All the entries added for discovered tail calls have empty
4422 // stack ids.
4423 if (!Callsite.StackIdIndices.empty())
4424 break;
4425 MapTailCallCalleeVIToCallsite.insert({Callsite.Callee, Callsite});
4426 }
4427
4428 // Keeps track of needed ICP for the function.
4429 SmallVector<ICallAnalysisData> ICallAnalysisInfo;
4430
4431 // Assume for now that the instructions are in the exact same order
4432 // as when the summary was created, but confirm this is correct by
4433 // matching the stack ids.
4434 for (auto &BB : F) {
4435 for (auto &I : BB) {
4436 auto *CB = dyn_cast<CallBase>(&I);
4437 // Same handling as when creating module summary.
4438 if (!mayHaveMemprofSummary(CB))
4439 continue;
4440
4441 auto *CalledValue = CB->getCalledOperand();
4442 auto *CalledFunction = CB->getCalledFunction();
4443 if (CalledValue && !CalledFunction) {
4444 CalledValue = CalledValue->stripPointerCasts();
4445 // Stripping pointer casts can reveal a called function.
4446 CalledFunction = dyn_cast<Function>(CalledValue);
4447 }
4448 // Check if this is an alias to a function. If so, get the
4449 // called aliasee for the checks below.
4450 if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
4451 assert(!CalledFunction &&
4452 "Expected null called function in callsite for alias");
4453 CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
4454 }
4455
4457 I.getMetadata(LLVMContext::MD_callsite));
4458 auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
4459
4460 // Include allocs that were already assigned a memprof function
4461 // attribute in the statistics.
4462 if (CB->getAttributes().hasFnAttr("memprof")) {
4463 assert(!MemProfMD);
4464 CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"
4465 ? AllocTypeColdThinBackend++
4466 : AllocTypeNotColdThinBackend++;
4467 OrigAllocsThinBackend++;
4468 AllocVersionsThinBackend++;
4469 if (!MaxAllocVersionsThinBackend)
4470 MaxAllocVersionsThinBackend = 1;
4471 continue;
4472 }
4473
4474 if (MemProfMD) {
4475 // Consult the next alloc node.
4476 assert(AI != FS->allocs().end());
4477 auto &AllocNode = *(AI++);
4478
4479 // Sanity check that the MIB stack ids match between the summary and
4480 // instruction metadata.
4481 auto MIBIter = AllocNode.MIBs.begin();
4482 for (auto &MDOp : MemProfMD->operands()) {
4483 assert(MIBIter != AllocNode.MIBs.end());
4484 LLVM_ATTRIBUTE_UNUSED auto StackIdIndexIter =
4485 MIBIter->StackIdIndices.begin();
4486 auto *MIBMD = cast<const MDNode>(MDOp);
4487 MDNode *StackMDNode = getMIBStackNode(MIBMD);
4488 assert(StackMDNode);
4489 CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode);
4490 auto ContextIterBegin =
4491 StackContext.beginAfterSharedPrefix(CallsiteContext);
4492 // Skip the checking on the first iteration.
4493 uint64_t LastStackContextId =
4494 (ContextIterBegin != StackContext.end() &&
4495 *ContextIterBegin == 0)
4496 ? 1
4497 : 0;
4498 for (auto ContextIter = ContextIterBegin;
4499 ContextIter != StackContext.end(); ++ContextIter) {
4500 // If this is a direct recursion, simply skip the duplicate
4501 // entries, to be consistent with how the summary ids were
4502 // generated during ModuleSummaryAnalysis.
4503 if (LastStackContextId == *ContextIter)
4504 continue;
4505 LastStackContextId = *ContextIter;
4506 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
4507 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
4508 *ContextIter);
4509 StackIdIndexIter++;
4510 }
4511 MIBIter++;
4512 }
4513
4514 // Perform cloning if not yet done.
4515 CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
4516
4517 OrigAllocsThinBackend++;
4518 AllocVersionsThinBackend += AllocNode.Versions.size();
4519 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
4520 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
4521
4522 // If there is only one version that means we didn't end up
4523 // considering this function for cloning, and in that case the alloc
4524 // will still be none type or should have gotten the default NotCold.
4525 // Skip that after calling clone helper since that does some sanity
4526 // checks that confirm we haven't decided yet that we need cloning.
4527 // We might have a single version that is cold due to the
4528 // MinClonedColdBytePercent heuristic, make sure we don't skip in that
4529 // case.
4530 if (AllocNode.Versions.size() == 1 &&
4531 (AllocationType)AllocNode.Versions[0] != AllocationType::Cold) {
4532 assert((AllocationType)AllocNode.Versions[0] ==
4534 (AllocationType)AllocNode.Versions[0] ==
4536 UnclonableAllocsThinBackend++;
4537 continue;
4538 }
4539
4540 // All versions should have a singular allocation type.
4541 assert(llvm::none_of(AllocNode.Versions, [](uint8_t Type) {
4542 return Type == ((uint8_t)AllocationType::NotCold |
4543 (uint8_t)AllocationType::Cold);
4544 }));
4545
4546 // Update the allocation types per the summary info.
4547 for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
4548 // Ignore any that didn't get an assigned allocation type.
4549 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
4550 continue;
4551 AllocationType AllocTy = (AllocationType)AllocNode.Versions[J];
4552 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
4553 : AllocTypeNotColdThinBackend++;
4554 std::string AllocTypeString = getAllocTypeAttributeString(AllocTy);
4555 auto A = llvm::Attribute::get(F.getContext(), "memprof",
4556 AllocTypeString);
4557 CallBase *CBClone;
4558 // Copy 0 is the original function.
4559 if (!J)
4560 CBClone = CB;
4561 else
4562 // Since VMaps are only created for new clones, we index with
4563 // clone J-1 (J==0 is the original clone and does not have a VMaps
4564 // entry).
4565 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4566 CBClone->addFnAttr(A);
4567 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone)
4568 << ore::NV("AllocationCall", CBClone) << " in clone "
4569 << ore::NV("Caller", CBClone->getFunction())
4570 << " marked with memprof allocation attribute "
4571 << ore::NV("Attribute", AllocTypeString));
4572 }
4573 } else if (!CallsiteContext.empty()) {
4574 if (!CalledFunction) {
4575#ifndef NDEBUG
4576 // We should have skipped inline assembly calls.
4577 auto *CI = dyn_cast<CallInst>(CB);
4578 assert(!CI || !CI->isInlineAsm());
4579#endif
4580 // We should have skipped direct calls via a Constant.
4581 assert(CalledValue && !isa<Constant>(CalledValue));
4582
4583 // This is an indirect call, see if we have profile information and
4584 // whether any clones were recorded for the profiled targets (that
4585 // we synthesized CallsiteInfo summary records for when building the
4586 // index).
4587 auto NumClones =
4588 recordICPInfo(CB, FS->callsites(), SI, ICallAnalysisInfo);
4589
4590 // Perform cloning if not yet done. This is done here in case
4591 // we don't need to do ICP, but might need to clone this
4592 // function as it is the target of other cloned calls.
4593 if (NumClones)
4594 CloneFuncIfNeeded(NumClones);
4595 }
4596
4597 else {
4598 // Consult the next callsite node.
4599 assert(SI != FS->callsites().end());
4600 auto &StackNode = *(SI++);
4601
4602#ifndef NDEBUG
4603 // Sanity check that the stack ids match between the summary and
4604 // instruction metadata.
4605 auto StackIdIndexIter = StackNode.StackIdIndices.begin();
4606 for (auto StackId : CallsiteContext) {
4607 assert(StackIdIndexIter != StackNode.StackIdIndices.end());
4608 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
4609 StackId);
4610 StackIdIndexIter++;
4611 }
4612#endif
4613
4614 CloneCallsite(StackNode, CB, CalledFunction);
4615 }
4616 } else if (CB->isTailCall() && CalledFunction) {
4617 // Locate the synthesized callsite info for the callee VI, if any was
4618 // created, and use that for cloning.
4619 ValueInfo CalleeVI =
4620 findValueInfoForFunc(*CalledFunction, M, ImportSummary);
4621 if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {
4622 auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);
4623 assert(Callsite != MapTailCallCalleeVIToCallsite.end());
4624 CloneCallsite(Callsite->second, CB, CalledFunction);
4625 }
4626 }
4627 }
4628 }
4629
4630 // Now do any promotion required for cloning.
4631 performICP(M, FS->callsites(), VMaps, ICallAnalysisInfo, ORE);
4632 }
4633
4634 // We skip some of the functions and instructions above, so remove all the
4635 // metadata in a single sweep here.
4636 for (auto &F : M) {
4637 // We can skip memprof clones because createFunctionClones already strips
4638 // the metadata from the newly created clones.
4639 if (F.isDeclaration() || isMemProfClone(F))
4640 continue;
4641 for (auto &BB : F) {
4642 for (auto &I : BB) {
4643 if (!isa<CallBase>(I))
4644 continue;
4645 I.setMetadata(LLVMContext::MD_memprof, nullptr);
4646 I.setMetadata(LLVMContext::MD_callsite, nullptr);
4647 }
4648 }
4649 }
4650
4651 return Changed;
4652}
4653
4654unsigned MemProfContextDisambiguation::recordICPInfo(
4655 CallBase *CB, ArrayRef<CallsiteInfo> AllCallsites,
4657 SmallVector<ICallAnalysisData> &ICallAnalysisInfo) {
4658 // First see if we have profile information for this indirect call.
4659 uint32_t NumCandidates;
4660 uint64_t TotalCount;
4661 auto CandidateProfileData =
4662 ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
4663 NumCandidates);
4664 if (CandidateProfileData.empty())
4665 return 0;
4666
4667 // Iterate through all of the candidate profiled targets along with the
4668 // CallsiteInfo summary records synthesized for them when building the index,
4669 // and see if any are cloned and/or refer to clones.
4670 bool ICPNeeded = false;
4671 unsigned NumClones = 0;
4672 size_t CallsiteInfoStartIndex = std::distance(AllCallsites.begin(), SI);
4673 for (const auto &Candidate : CandidateProfileData) {
4674#ifndef NDEBUG
4675 auto CalleeValueInfo =
4676#endif
4677 ImportSummary->getValueInfo(Candidate.Value);
4678 // We might not have a ValueInfo if this is a distributed
4679 // ThinLTO backend and decided not to import that function.
4680 assert(!CalleeValueInfo || SI->Callee == CalleeValueInfo);
4681 assert(SI != AllCallsites.end());
4682 auto &StackNode = *(SI++);
4683 // See if any of the clones of the indirect callsite for this
4684 // profiled target should call a cloned version of the profiled
4685 // target. We only need to do the ICP here if so.
4686 ICPNeeded |= llvm::any_of(StackNode.Clones,
4687 [](unsigned CloneNo) { return CloneNo != 0; });
4688 // Every callsite in the same function should have been cloned the same
4689 // number of times.
4690 assert(!NumClones || NumClones == StackNode.Clones.size());
4691 NumClones = StackNode.Clones.size();
4692 }
4693 if (!ICPNeeded)
4694 return NumClones;
4695 // Save information for ICP, which is performed later to avoid messing up the
4696 // current function traversal.
4697 ICallAnalysisInfo.push_back({CB, CandidateProfileData.vec(), NumCandidates,
4698 TotalCount, CallsiteInfoStartIndex});
4699 return NumClones;
4700}
4701
4702void MemProfContextDisambiguation::performICP(
4703 Module &M, ArrayRef<CallsiteInfo> AllCallsites,
4704 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
4705 ArrayRef<ICallAnalysisData> ICallAnalysisInfo,
4707 // Now do any promotion required for cloning. Specifically, for each
4708 // recorded ICP candidate (which was only recorded because one clone of that
4709 // candidate should call a cloned target), we perform ICP (speculative
4710 // devirtualization) for each clone of the callsite, and update its callee
4711 // to the appropriate clone. Note that the ICP compares against the original
4712 // version of the target, which is what is in the vtable.
4713 for (auto &Info : ICallAnalysisInfo) {
4714 auto *CB = Info.CB;
4715 auto CallsiteIndex = Info.CallsiteInfoStartIndex;
4716 auto TotalCount = Info.TotalCount;
4717 unsigned NumPromoted = 0;
4718 unsigned NumClones = 0;
4719
4720 for (auto &Candidate : Info.CandidateProfileData) {
4721 auto &StackNode = AllCallsites[CallsiteIndex++];
4722
4723 // All calls in the same function must have the same number of clones.
4724 assert(!NumClones || NumClones == StackNode.Clones.size());
4725 NumClones = StackNode.Clones.size();
4726
4727 // See if the target is in the module. If it wasn't imported, it is
4728 // possible that this profile could have been collected on a different
4729 // target (or version of the code), and we need to be conservative
4730 // (similar to what is done in the ICP pass).
4731 Function *TargetFunction = Symtab->getFunction(Candidate.Value);
4732 if (TargetFunction == nullptr ||
4733 // Any ThinLTO global dead symbol removal should have already
4734 // occurred, so it should be safe to promote when the target is a
4735 // declaration.
4736 // TODO: Remove internal option once more fully tested.
4738 TargetFunction->isDeclaration())) {
4739 ORE.emit([&]() {
4740 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", CB)
4741 << "Memprof cannot promote indirect call: target with md5sum "
4742 << ore::NV("target md5sum", Candidate.Value) << " not found";
4743 });
4744 // FIXME: See if we can use the new declaration importing support to
4745 // at least get the declarations imported for this case. Hot indirect
4746 // targets should have been imported normally, however.
4747 continue;
4748 }
4749
4750 // Check if legal to promote
4751 const char *Reason = nullptr;
4752 if (!isLegalToPromote(*CB, TargetFunction, &Reason)) {
4753 ORE.emit([&]() {
4754 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", CB)
4755 << "Memprof cannot promote indirect call to "
4756 << ore::NV("TargetFunction", TargetFunction)
4757 << " with count of " << ore::NV("TotalCount", TotalCount)
4758 << ": " << Reason;
4759 });
4760 continue;
4761 }
4762
4763 assert(!isMemProfClone(*TargetFunction));
4764
4765 // Handle each call clone, applying ICP so that each clone directly
4766 // calls the specified callee clone, guarded by the appropriate ICP
4767 // check.
4768 CallBase *CBClone = CB;
4769 for (unsigned J = 0; J < NumClones; J++) {
4770 // Copy 0 is the original function.
4771 if (J > 0)
4772 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4773 // We do the promotion using the original name, so that the comparison
4774 // is against the name in the vtable. Then just below, change the new
4775 // direct call to call the cloned function.
4776 auto &DirectCall =
4777 pgo::promoteIndirectCall(*CBClone, TargetFunction, Candidate.Count,
4778 TotalCount, isSamplePGO, &ORE);
4779 auto *TargetToUse = TargetFunction;
4780 // Call original if this version calls the original version of its
4781 // callee.
4782 if (StackNode.Clones[J]) {
4783 TargetToUse =
4784 cast<Function>(M.getOrInsertFunction(
4785 getMemProfFuncName(TargetFunction->getName(),
4786 StackNode.Clones[J]),
4787 TargetFunction->getFunctionType())
4788 .getCallee());
4789 }
4790 DirectCall.setCalledFunction(TargetToUse);
4791 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
4792 << ore::NV("Call", CBClone) << " in clone "
4793 << ore::NV("Caller", CBClone->getFunction())
4794 << " promoted and assigned to call function clone "
4795 << ore::NV("Callee", TargetToUse));
4796 }
4797
4798 // Update TotalCount (all clones should get same count above)
4799 TotalCount -= Candidate.Count;
4800 NumPromoted++;
4801 }
4802 // Adjust the MD.prof metadata for all clones, now that we have the new
4803 // TotalCount and the number promoted.
4804 CallBase *CBClone = CB;
4805 for (unsigned J = 0; J < NumClones; J++) {
4806 // Copy 0 is the original function.
4807 if (J > 0)
4808 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
4809 // First delete the old one.
4810 CBClone->setMetadata(LLVMContext::MD_prof, nullptr);
4811 // If all promoted, we don't need the MD.prof metadata.
4812 // Otherwise we need update with the un-promoted records back.
4813 if (TotalCount != 0)
4815 M, *CBClone, ArrayRef(Info.CandidateProfileData).slice(NumPromoted),
4816 TotalCount, IPVK_IndirectCallTarget, Info.NumCandidates);
4817 }
4818 }
4819}
4820
4821template <typename DerivedCCG, typename FuncTy, typename CallTy>
4822bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
4823 if (DumpCCG) {
4824 dbgs() << "CCG before cloning:\n";
4825 dbgs() << *this;
4826 }
4827 if (ExportToDot)
4828 exportToDot("postbuild");
4829
4830 if (VerifyCCG) {
4831 check();
4832 }
4833
4834 identifyClones();
4835
4836 if (VerifyCCG) {
4837 check();
4838 }
4839
4840 if (DumpCCG) {
4841 dbgs() << "CCG after cloning:\n";
4842 dbgs() << *this;
4843 }
4844 if (ExportToDot)
4845 exportToDot("cloned");
4846
4847 bool Changed = assignFunctions();
4848
4849 if (DumpCCG) {
4850 dbgs() << "CCG after assigning function clones:\n";
4851 dbgs() << *this;
4852 }
4853 if (ExportToDot)
4854 exportToDot("clonefuncassign");
4855
4857 printTotalSizes(errs());
4858
4859 return Changed;
4860}
4861
4862bool MemProfContextDisambiguation::processModule(
4863 Module &M,
4865
4866 // If we have an import summary, then the cloning decisions were made during
4867 // the thin link on the index. Apply them and return.
4868 if (ImportSummary)
4869 return applyImport(M);
4870
4871 // TODO: If/when other types of memprof cloning are enabled beyond just for
4872 // hot and cold, we will need to change this to individually control the
4873 // AllocationType passed to addStackNodesForMIB during CCG construction.
4874 // Note that we specifically check this after applying imports above, so that
4875 // the option isn't needed to be passed to distributed ThinLTO backend
4876 // clang processes, which won't necessarily have visibility into the linker
4877 // dependences. Instead the information is communicated from the LTO link to
4878 // the backends via the combined summary index.
4879 if (!SupportsHotColdNew)
4880 return false;
4881
4882 ModuleCallsiteContextGraph CCG(M, OREGetter);
4883 return CCG.process();
4884}
4885
4887 const ModuleSummaryIndex *Summary, bool isSamplePGO)
4888 : ImportSummary(Summary), isSamplePGO(isSamplePGO) {
4889 if (ImportSummary) {
4890 // The MemProfImportSummary should only be used for testing ThinLTO
4891 // distributed backend handling via opt, in which case we don't have a
4892 // summary from the pass pipeline.
4894 return;
4895 }
4896 if (MemProfImportSummary.empty())
4897 return;
4898
4899 auto ReadSummaryFile =
4901 if (!ReadSummaryFile) {
4902 logAllUnhandledErrors(ReadSummaryFile.takeError(), errs(),
4903 "Error loading file '" + MemProfImportSummary +
4904 "': ");
4905 return;
4906 }
4907 auto ImportSummaryForTestingOrErr = getModuleSummaryIndex(**ReadSummaryFile);
4908 if (!ImportSummaryForTestingOrErr) {
4909 logAllUnhandledErrors(ImportSummaryForTestingOrErr.takeError(), errs(),
4910 "Error parsing file '" + MemProfImportSummary +
4911 "': ");
4912 return;
4913 }
4914 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
4915 ImportSummary = ImportSummaryForTesting.get();
4916}
4917
4920 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
4921 auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
4923 };
4924 if (!processModule(M, OREGetter))
4925 return PreservedAnalyses::all();
4926 return PreservedAnalyses::none();
4927}
4928
4932 isPrevailing) {
4933 // TODO: If/when other types of memprof cloning are enabled beyond just for
4934 // hot and cold, we will need to change this to individually control the
4935 // AllocationType passed to addStackNodesForMIB during CCG construction.
4936 // The index was set from the option, so these should be in sync.
4937 assert(Index.withSupportsHotColdNew() == SupportsHotColdNew);
4938 if (!SupportsHotColdNew)
4939 return;
4940
4941 IndexCallsiteContextGraph CCG(Index, isPrevailing);
4942 CCG.process();
4943}
aarch64 promote const
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::string Name
uint32_t Index
#define DEBUG_TYPE
global merge func
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file implements a map that provides insertion order iteration.
static SmallVector< std::unique_ptr< ValueToValueMapTy >, 4 > createFunctionClones(Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE, std::map< const Function *, SmallPtrSet< const GlobalAlias *, 1 > > &FuncToAliasMap)
static cl::opt< unsigned > TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5), cl::Hidden, cl::desc("Max depth to recursively search for missing " "frames through tail calls."))
static bool isMemProfClone(const Function &F)
static cl::opt< bool > ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files."))
static void checkEdge(const std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > &Edge)
bool checkColdOrNotCold(uint8_t AllocType)
static ValueInfo findValueInfoForFunc(const Function &F, const Module &M, const ModuleSummaryIndex *ImportSummary)
cl::opt< unsigned > MinClonedColdBytePercent
static std::string getAllocTypeString(uint8_t AllocTypes)
cl::opt< bool > MemProfReportHintedSizes
static cl::opt< bool > AllowRecursiveCallsites("memprof-allow-recursive-callsites", cl::init(false), cl::Hidden, cl::desc("Allow cloning of callsites involved in recursive cycles"))
static cl::opt< bool > VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, cl::desc("Perform verification checks on CallingContextGraph."))
static void checkNode(const ContextNode< DerivedCCG, FuncTy, CallTy > *Node, bool CheckEdges=true)
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo)
static cl::opt< std::string > MemProfImportSummary("memprof-import-summary", cl::desc("Import summary to use for testing the ThinLTO backend via opt"), cl::Hidden)
#define DEBUG_TYPE
static const std::string MemProfCloneSuffix
static cl::opt< bool > AllowRecursiveContexts("memprof-allow-recursive-contexts", cl::init(true), cl::Hidden, cl::desc("Allow cloning of contexts through recursive cycles"))
static cl::opt< std::string > DotFilePathPrefix("memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Specify the path prefix of the MemProf dot files."))
static cl::opt< bool > VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes."))
static cl::opt< bool > DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, cl::desc("Dump CallingContextGraph to stdout after each stage."))
AllocType
cl::opt< bool > MemProfReportHintedSizes("memprof-report-hinted-sizes", cl::init(false), cl::Hidden, cl::desc("Report total allocation sizes of hinted allocations"))
This is the interface to build a ModuleSummaryIndex for a module.
ModuleSummaryIndex.h This file contains the declarations the classes that hold the module index and s...
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
void print(OutputBuffer &OB) const
DomTreeNode::const_iterator end() const
Definition: LoopUnroll.cpp:249
Alias summary information.
ValueInfo getAliaseeVI() const
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
iterator begin() const
Definition: ArrayRef.h:156
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:95
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Definition: InstrTypes.h:1474
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1380
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition: DenseMap.h:103
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Function summary information to aid decisions and implementation of importing.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
static GlobalAlias * create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, Constant *Aliasee, Module *Parent)
If a parent module is specified, the alias is automatically inserted into the end of the specified mo...
Definition: Globals.cpp:557
Function and variable summary information to aid decisions and implementation of importing.
static bool isLocalLinkage(LinkageTypes Linkage)
Definition: GlobalValue.h:410
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:296
GUID getGUID() const
Return a 64-bit global unique ID constructed from global value name (i.e.
Definition: GlobalValue.h:596
std::string getGlobalIdentifier() const
Return the modified name for this global value suitable to be used as the key for a global lookup (e....
Definition: Globals.cpp:184
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:567
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
Metadata node.
Definition: Metadata.h:1073
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1434
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1440
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
size_type count(const KeyT &Key) const
Definition: MapVector.h:165
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary=nullptr, bool isSamplePGO=false)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static StringRef getOriginalNameBeforePromote(StringRef Name)
Helper to obtain the unpromoted name for a global value (or the original name if not promoted).
ValueInfo getValueInfo(const GlobalValueSummaryMapTy::value_type &R) const
Return a ValueInfo for the index value_type (convenient when iterating index).
uint64_t getStackIdAtIndex(unsigned Index) const
GlobalValueSummary * findSummaryInModule(ValueInfo VI, StringRef ModuleId) const
Find the summary for ValueInfo VI in module ModuleId, or nullptr if not found.
GlobalValue::GUID getGUIDFromOriginalID(GlobalValue::GUID OriginalID) const
Return the GUID for OriginalId in the OidGuidMap.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
unsigned size() const
bool insert(SUnit *SU)
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:118
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
void reserve(size_t Size)
Grow the DenseSet so that it can contain at least NumEntries items before resizing again.
Definition: DenseSet.h:90
size_type size() const
Definition: DenseSet.h:81
void swap(DenseSetImpl &RHS)
Definition: DenseSet.h:99
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:193
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:95
An efficient, type-erasing, non-owning reference to a callable.
Helper class to iterate through stack ids in both metadata (memprof MIB and callsite) and the corresp...
CallStackIterator end() const
CallStackIterator beginAfterSharedPrefix(CallStack &Other)
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
@ Entry
Definition: COFF.h:844
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
@ FS
Definition: X86.h:211
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
AllocationType getMIBAllocType(const MDNode *MIB)
Returns the allocation type from an MIB metadata node.
bool hasSingleAllocType(uint8_t AllocTypes)
True if the AllocTypes bitmask contains just a single type.
std::string getAllocTypeAttributeString(AllocationType Type)
Returns the string to use in attributes with the given type.
MDNode * getMIBStackNode(const MDNode *MIB)
Returns the stack node from an MIB metadata node.
DiagnosticInfoOptimizationBase::Argument NV
CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner={})
Log all errors (if any) in E to OS.
Definition: Error.cpp:65
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:58
cl::opt< bool > MemProfRequireDefinitionForPromotion("memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc("Require target function definition when promoting indirect calls"))
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void set_subtract(S1Ty &S1, const S2Ty &S2)
set_subtract(A, B) - Compute A := A - B
raw_ostream & WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames=false, const Twine &Title="")
Definition: GraphWriter.h:359
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Definition: InstrProf.cpp:1301
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
bool set_union(S1Ty &S1, const S2Ty &S2)
set_union(A, B) - Compute A := A u B, return whether A changed.
Definition: SetOperations.h:43
cl::opt< bool > SupportsHotColdNew
Indicate we are linking with an allocator that supports hot/cold operator new interfaces.
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
Definition: SetOperations.h:83
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
Definition: SetOperations.h:93
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
Expected< T > errorOrToExpected(ErrorOr< T > &&EO)
Convert an ErrorOr<T> to an Expected<T>.
Definition: Error.h:1231
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
const char * toString(DWARFSectionKind Kind)
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
bool mayHaveMemprofSummary(const CallBase *CB)
Returns true if the instruction could have memprof metadata, used to ensure consistency between summa...
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, GraphType)
static const ContextNode< DerivedCCG, FuncTy, CallTy > * GetCallee(const EdgePtrTy &P)
std::shared_ptr< ContextEdge< DerivedCCG, FuncTy, CallTy > > EdgePtrTy
std::unique_ptr< ContextNode< DerivedCCG, FuncTy, CallTy > > NodePtrTy
Summary of memprof metadata on allocations.
SmallVector< uint8_t > Versions
Summary of memprof callsite metadata.
SmallVector< unsigned > Clones
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
Used in the streaming interface as the general argument type.
typename GraphType::UnknownGraphTypeError NodeRef
Definition: GraphTraits.h:95
Struct that holds a reference to a particular GUID in a global value summary.
ArrayRef< std::unique_ptr< GlobalValueSummary > > getSummaryList() const
GlobalValue::GUID getGUID() const
static SimpleType getSimplifiedValue(IndexCall &Val)
static SimpleType getSimplifiedValue(const IndexCall &Val)
Define a template that can be specialized by smart pointers to reflect the fact that they are automat...
Definition: Casting.h:34