52#define DEBUG_TYPE "loop-interchange"
54STATISTIC(LoopsInterchanged,
"Number of loops interchanged");
58 cl::desc(
"Interchange if you gain more than this number"));
64 "Maximum number of load-store instructions that should be handled "
65 "in the dependency matrix. Higher value may lead to more interchanges "
66 "at the cost of compile-time"));
73using CharMatrix = std::vector<std::vector<char>>;
82 for (
auto &Row : DepMatrix) {
102 if (!isa<Instruction>(
I))
104 if (
auto *Ld = dyn_cast<LoadInst>(&
I)) {
108 }
else if (
auto *St = dyn_cast<StoreInst>(&
I)) {
111 MemInstr.push_back(&
I);
117 <<
" Loads and Stores to analyze\n");
123 L->getStartLoc(), L->getHeader())
124 <<
"Number of loads/stores exceeded, the supported maximum "
125 "can be increased with option "
126 "-loop-interchange-maxmeminstr-count.";
130 ValueVector::iterator
I, IE, J, JE;
133 for (
I = MemInstr.begin(), IE = MemInstr.end();
I != IE; ++
I) {
134 for (J =
I, JE = MemInstr.end(); J != JE; ++J) {
135 std::vector<char> Dep;
139 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
142 if (
auto D = DI->
depends(Src, Dst,
true)) {
143 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
146 if (
D->normalize(SE))
149 D->isFlow() ?
"flow" :
D->isAnti() ?
"anti" :
"output";
150 dbgs() <<
"Found " << DepType
151 <<
" dependency between Src and Dst\n"
152 <<
" Src:" << *Src <<
"\n Dst:" << *Dst <<
'\n');
153 unsigned Levels =
D->getLevels();
155 for (
unsigned II = 1;
II <= Levels; ++
II) {
156 unsigned Dir =
D->getDirection(
II);
168 while (Dep.size() != Level) {
174 DepMatrix.push_back(Dep);
186 for (
unsigned I = 0, E = DepMatrix.size();
I < E; ++
I)
187 std::swap(DepMatrix[
I][ToIndx], DepMatrix[
I][FromIndx]);
206 unsigned InnerLoopId,
207 unsigned OuterLoopId) {
208 unsigned NumRows = DepMatrix.size();
209 std::vector<char> Cur;
211 for (
unsigned Row = 0; Row < NumRows; ++Row) {
214 Cur = DepMatrix[Row];
217 std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
226 << L.getHeader()->getParent()->getName() <<
" Loop: %"
227 << L.getHeader()->getName() <<
'\n');
228 assert(LoopList.empty() &&
"LoopList should initially be empty!");
229 Loop *CurrentLoop = &L;
230 const std::vector<Loop *> *Vec = &CurrentLoop->
getSubLoops();
231 while (!Vec->empty()) {
235 if (Vec->size() != 1) {
240 LoopList.push_back(CurrentLoop);
241 CurrentLoop = Vec->front();
244 LoopList.push_back(CurrentLoop);
248 unsigned LoopNestDepth = LoopList.
size();
249 if (LoopNestDepth < 2) {
250 LLVM_DEBUG(
dbgs() <<
"Loop doesn't contain minimum nesting level.\n");
258class LoopInterchangeLegality {
262 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
265 bool canInterchangeLoops(
unsigned InnerLoopId,
unsigned OuterLoopId,
266 CharMatrix &DepMatrix);
274 bool isLoopStructureUnderstood();
276 bool currentLimitations();
279 return OuterInnerReductions;
283 return InnerLoopInductions;
287 bool tightlyNested(
Loop *Outer,
Loop *Inner);
288 bool containsUnsafeInstructions(
BasicBlock *BB);
294 bool findInductionAndReductions(
Loop *L,
316class LoopInterchangeProfitability {
320 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
324 unsigned InnerLoopId,
unsigned OuterLoopId,
325 CharMatrix &DepMatrix,
327 std::unique_ptr<CacheCost> &
CC);
330 int getInstrOrderCost();
331 std::optional<bool> isProfitablePerLoopCacheAnalysis(
333 std::unique_ptr<CacheCost> &
CC);
334 std::optional<bool> isProfitablePerInstrOrderCost();
335 std::optional<bool> isProfitableForVectorization(
unsigned InnerLoopId,
336 unsigned OuterLoopId,
337 CharMatrix &DepMatrix);
349class LoopInterchangeTransform {
353 const LoopInterchangeLegality &LIL)
354 : OuterLoop(
Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
358 void restructureLoops(
Loop *NewInner,
Loop *NewOuter,
361 void removeChildLoop(
Loop *OuterLoop,
Loop *InnerLoop);
364 bool adjustLoopLinks();
365 bool adjustLoopBranches();
376 const LoopInterchangeLegality &LIL;
379struct LoopInterchange {
384 std::unique_ptr<CacheCost>
CC =
nullptr;
392 : SE(SE), LI(LI), DI(DI), DT(DT),
CC(
std::
move(
CC)), ORE(ORE) {}
395 if (
L->getParentLoop())
399 return processLoopList(LoopList);
404 for (
unsigned I = 1;
I < LoopList.size(); ++
I)
405 if (LoopList[
I]->getParentLoop() != LoopList[
I - 1])
407 return processLoopList(LoopList);
411 for (
Loop *L : LoopList) {
413 if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
417 if (
L->getNumBackEdges() != 1) {
421 if (!
L->getExitingBlock()) {
432 return LoopList.
size() - 1;
436 bool Changed =
false;
441 unsigned LoopNestDepth = LoopList.
size();
447 if (!isComputableLoopNest(LoopList)) {
448 LLVM_DEBUG(
dbgs() <<
"Not valid loop candidate for interchange\n");
452 LLVM_DEBUG(
dbgs() <<
"Processing LoopList of size = " << LoopNestDepth
455 CharMatrix DependencyMatrix;
456 Loop *OuterMostLoop = *(LoopList.
begin());
458 OuterMostLoop, DI, SE, ORE)) {
473 unsigned SelecLoopId = selectLoopForInterchange(LoopList);
484 const auto &LoopCosts =
CC->getLoopCosts();
485 for (
unsigned i = 0; i < LoopCosts.size(); i++) {
486 CostMap[LoopCosts[i].first] = i;
493 for (
unsigned j = SelecLoopId;
j > 0;
j--) {
494 bool ChangedPerIter =
false;
495 for (
unsigned i = SelecLoopId; i > SelecLoopId -
j; i--) {
496 bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
497 DependencyMatrix, CostMap);
508 ChangedPerIter |= Interchanged;
509 Changed |= Interchanged;
519 bool processLoop(
Loop *InnerLoop,
Loop *OuterLoop,
unsigned InnerLoopId,
520 unsigned OuterLoopId,
521 std::vector<std::vector<char>> &DependencyMatrix,
524 <<
" and OuterLoopId = " << OuterLoopId <<
"\n");
525 LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
526 if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
527 LLVM_DEBUG(
dbgs() <<
"Not interchanging loops. Cannot prove legality.\n");
531 LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
532 if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
533 DependencyMatrix, CostMap, CC)) {
542 <<
"Loop interchanged with enclosing loop.";
545 LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
557bool LoopInterchangeLegality::containsUnsafeInstructions(
BasicBlock *BB) {
559 return I.mayHaveSideEffects() ||
I.mayReadFromMemory();
563bool LoopInterchangeLegality::tightlyNested(
Loop *OuterLoop,
Loop *InnerLoop) {
575 if (!OuterLoopHeaderBI)
579 if (Succ != InnerLoopPreHeader && Succ != InnerLoop->
getHeader() &&
580 Succ != OuterLoopLatch)
583 LLVM_DEBUG(
dbgs() <<
"Checking instructions in Loop header and Loop latch\n");
586 if (containsUnsafeInstructions(OuterLoopHeader) ||
587 containsUnsafeInstructions(OuterLoopLatch))
593 if (InnerLoopPreHeader != OuterLoopHeader &&
594 containsUnsafeInstructions(InnerLoopPreHeader))
602 if (&SuccInner != OuterLoopLatch) {
604 <<
" does not lead to the outer loop latch.\n";);
610 if (containsUnsafeInstructions(InnerLoopExit))
618bool LoopInterchangeLegality::isLoopStructureUnderstood() {
620 for (
PHINode *InnerInduction : InnerLoopInductions) {
621 unsigned Num = InnerInduction->getNumOperands();
622 for (
unsigned i = 0; i < Num; ++i) {
623 Value *Val = InnerInduction->getOperand(i);
624 if (isa<Constant>(Val))
633 if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
634 InnerLoopPreheader &&
654 Value *Op0 = InnerLoopCmp->getOperand(0);
655 Value *Op1 = InnerLoopCmp->getOperand(1);
666 std::function<
bool(
Value *)> IsPathToInnerIndVar;
667 IsPathToInnerIndVar = [
this, &IsPathToInnerIndVar](
const Value *
V) ->
bool {
670 if (isa<Constant>(V))
675 if (isa<CastInst>(
I))
676 return IsPathToInnerIndVar(
I->getOperand(0));
677 if (isa<BinaryOperator>(
I))
678 return IsPathToInnerIndVar(
I->getOperand(0)) &&
679 IsPathToInnerIndVar(
I->getOperand(1));
685 if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
690 if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
693 }
else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
716 if (
PHI->getNumIncomingValues() != 1)
724 if (isa<Constant>(V))
729 if (
PHI->getNumIncomingValues() == 1)
745bool LoopInterchangeLegality::findInductionAndReductions(
747 if (!
L->getLoopLatch() || !
L->getLoopPredecessor())
757 if (!OuterInnerReductions.count(&
PHI)) {
759 "across the outer loop.\n");
763 assert(
PHI.getNumIncomingValues() == 2 &&
764 "Phis in loop header should have exactly 2 incoming values");
773 <<
"Failed to recognize PHI as an induction or reduction.\n");
776 OuterInnerReductions.insert(&
PHI);
777 OuterInnerReductions.insert(InnerRedPhi);
786bool LoopInterchangeLegality::currentLimitations() {
796 dbgs() <<
"Loops where the latch is not the exiting block are not"
797 <<
" supported currently.\n");
802 <<
"Loops where the latch is not the exiting block cannot be"
803 " interchange currently.";
809 if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
811 dbgs() <<
"Only outer loops with induction or reduction PHI nodes "
812 <<
"are supported currently.\n");
817 <<
"Only outer loops with induction or reduction PHI nodes can be"
818 " interchanged currently.";
827 Loop *CurLevelLoop = OuterLoop;
830 CurLevelLoop = CurLevelLoop->
getSubLoops().front();
831 if (!findInductionAndReductions(CurLevelLoop, Inductions,
nullptr)) {
833 dbgs() <<
"Only inner loops with induction or reduction PHI nodes "
834 <<
"are supported currently.\n");
839 <<
"Only inner loops with induction or reduction PHI nodes can be"
840 " interchange currently.";
847 if (!isLoopStructureUnderstood()) {
853 <<
"Inner loop structure not understood currently.";
861bool LoopInterchangeLegality::findInductions(
868 return !Inductions.
empty();
880 if (
PHI.getNumIncomingValues() > 1)
883 PHINode *PN = dyn_cast<PHINode>(U);
885 (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
903 for (
unsigned i = 0; i <
PHI.getNumIncomingValues(); i++) {
904 Instruction *IncomingI = dyn_cast<Instruction>(
PHI.getIncomingValue(i));
950 for (
auto *U :
PHI.users()) {
959bool LoopInterchangeLegality::canInterchangeLoops(
unsigned InnerLoopId,
960 unsigned OuterLoopId,
961 CharMatrix &DepMatrix) {
963 LLVM_DEBUG(
dbgs() <<
"Failed interchange InnerLoopId = " << InnerLoopId
964 <<
" and OuterLoopId = " << OuterLoopId
965 <<
" due to dependence\n");
970 <<
"Cannot interchange loops due to dependences.";
975 for (
auto *BB : OuterLoop->
blocks())
977 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
979 if (CI->onlyWritesMemory())
982 dbgs() <<
"Loops with call instructions cannot be interchanged "
988 <<
"Cannot interchange loops due to call instruction.";
994 if (!findInductions(InnerLoop, InnerLoopInductions)) {
995 LLVM_DEBUG(
dbgs() <<
"Could not find inner loop induction variables.\n");
1000 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop latch.\n");
1005 <<
"Cannot interchange loops because unsupported PHI nodes found "
1006 "in inner loop latch.";
1013 if (currentLimitations()) {
1014 LLVM_DEBUG(
dbgs() <<
"Not legal because of current transform limitation\n");
1019 if (!tightlyNested(OuterLoop, InnerLoop)) {
1025 <<
"Cannot interchange loops because they are not tightly "
1032 OuterInnerReductions)) {
1033 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in inner loop exit.\n");
1038 <<
"Found unsupported PHI node in loop exit.";
1044 LLVM_DEBUG(
dbgs() <<
"Found unsupported PHI nodes in outer loop exit.\n");
1049 <<
"Found unsupported PHI node in loop exit.";
1057int LoopInterchangeProfitability::getInstrOrderCost() {
1058 unsigned GoodOrder, BadOrder;
1059 BadOrder = GoodOrder = 0;
1063 unsigned NumOp =
GEP->getNumOperands();
1064 bool FoundInnerInduction =
false;
1065 bool FoundOuterInduction =
false;
1066 for (
unsigned i = 0; i < NumOp; ++i) {
1081 if (AR->
getLoop() == InnerLoop) {
1084 FoundInnerInduction =
true;
1085 if (FoundOuterInduction) {
1095 if (AR->
getLoop() == OuterLoop) {
1098 FoundOuterInduction =
true;
1099 if (FoundInnerInduction) {
1108 return GoodOrder - BadOrder;
1112LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1114 std::unique_ptr<CacheCost> &
CC) {
1119 unsigned InnerIndex = 0, OuterIndex = 0;
1120 InnerIndex = CostMap.
find(InnerLoop)->second;
1121 OuterIndex = CostMap.
find(OuterLoop)->second;
1123 <<
", OuterIndex = " << OuterIndex <<
"\n");
1124 if (InnerIndex < OuterIndex)
1125 return std::optional<bool>(
true);
1126 assert(InnerIndex != OuterIndex &&
"CostMap should assign unique "
1127 "numbers to each loop");
1128 if (
CC->getLoopCost(*OuterLoop) ==
CC->getLoopCost(*InnerLoop))
1129 return std::nullopt;
1130 return std::optional<bool>(
false);
1132 return std::nullopt;
1136LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
1140 int Cost = getInstrOrderCost();
1143 return std::optional<bool>(
true);
1145 return std::nullopt;
1148std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1149 unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix) {
1150 for (
auto &Row : DepMatrix) {
1154 if (Row[InnerLoopId] ==
'I' || Row[InnerLoopId] ==
'=')
1155 return std::optional<bool>(
false);
1160 if (Row[OuterLoopId] !=
'I' && Row[OuterLoopId] !=
'=')
1161 return std::optional<bool>(
false);
1166 return std::optional<bool>(!DepMatrix.empty());
1169bool LoopInterchangeProfitability::isProfitable(
1170 const Loop *InnerLoop,
const Loop *OuterLoop,
unsigned InnerLoopId,
1171 unsigned OuterLoopId, CharMatrix &DepMatrix,
1173 std::unique_ptr<CacheCost> &
CC) {
1182 std::optional<bool> shouldInterchange =
1183 isProfitablePerLoopCacheAnalysis(CostMap,
CC);
1184 if (!shouldInterchange.has_value()) {
1185 shouldInterchange = isProfitablePerInstrOrderCost();
1186 if (!shouldInterchange.has_value())
1188 isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1190 if (!shouldInterchange.has_value()) {
1195 <<
"Insufficient information to calculate the cost of loop for "
1199 }
else if (!shouldInterchange.value()) {
1204 <<
"Interchanging loops is not considered to improve cache "
1205 "locality nor vectorization.";
1212void LoopInterchangeTransform::removeChildLoop(
Loop *OuterLoop,
1214 for (
Loop *L : *OuterLoop)
1215 if (L == InnerLoop) {
1216 OuterLoop->removeChildLoop(L);
1245void LoopInterchangeTransform::restructureLoops(
1255 if (OuterLoopParent) {
1257 removeChildLoop(OuterLoopParent, NewInner);
1258 removeChildLoop(NewInner, NewOuter);
1261 removeChildLoop(NewInner, NewOuter);
1286 if (BB == OuterHeader || BB == OuterLatch)
1301bool LoopInterchangeTransform::transform() {
1302 bool Transformed =
false;
1307 auto &InductionPHIs = LIL.getInnerLoopInductions();
1308 if (InductionPHIs.empty()) {
1309 LLVM_DEBUG(
dbgs() <<
"Failed to find the point to split loop latch \n");
1314 for (
PHINode *CurInductionPHI : InductionPHIs) {
1315 if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1317 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
1320 dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
1333 auto MoveInstructions = [&i, &WorkList,
this, &InductionPHIs, NewLatch]() {
1334 for (; i < WorkList.
size(); i++) {
1340 "Moving instructions with side-effects may change behavior of "
1351 for (
Value *
Op : WorkList[i]->operands()) {
1369 for (
Instruction *InnerIndexVar : InnerIndexVarList)
1370 WorkList.
insert(cast<Instruction>(InnerIndexVar));
1387 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1388 if (InnerLoopPreHeader != OuterLoopHeader) {
1392 std::prev(InnerLoopPreHeader->
end()))))
1396 Transformed |= adjustLoopLinks();
1421 I->removeFromParent();
1436 std::vector<DominatorTree::UpdateType> &DTUpdates,
1437 bool MustUpdateOnce =
true) {
1438 assert((!MustUpdateOnce ||
1442 }) == 1) &&
"BI must jump to OldBB exactly once.");
1443 bool Changed =
false;
1451 DTUpdates.push_back(
1452 {DominatorTree::UpdateKind::Insert, BI->
getParent(), NewBB});
1453 DTUpdates.push_back(
1454 {DominatorTree::UpdateKind::Delete, BI->
getParent(), OldBB});
1456 assert(Changed &&
"Expected a successor to be updated");
1473 assert(
P.getNumIncomingValues() == 1 &&
1474 "Only loops with a single exit are supported!");
1477 auto IncI = cast<Instruction>(
P.getIncomingValueForBlock(InnerLatch));
1480 auto IncIInnerMost = cast<Instruction>(
followLCSSA(IncI));
1483 if (IncIInnerMost->getParent() != InnerLatch &&
1484 IncIInnerMost->
getParent() != InnerHeader)
1488 [OuterHeader, OuterExit, IncI, InnerHeader](
User *U) {
1489 return (cast<PHINode>(U)->getParent() == OuterHeader &&
1490 IncI->getParent() == InnerHeader) ||
1491 cast<PHINode>(U)->getParent() == OuterExit;
1493 "Can only replace phis iff the uses are in the loop nest exit or "
1494 "the incoming value is defined in the inner header (it will "
1495 "dominate all loop blocks after interchanging)");
1496 P.replaceAllUsesWith(IncI);
1497 P.eraseFromParent();
1527 if (
P.getNumIncomingValues() != 1)
1531 auto I = dyn_cast<Instruction>(
P.getIncomingValue(0));
1535 PHINode *NewPhi = dyn_cast<PHINode>(
P.clone());
1541 if (Pred == OuterLatch)
1546 P.setIncomingValue(0, NewPhi);
1556bool LoopInterchangeTransform::adjustLoopBranches() {
1558 std::vector<DominatorTree::UpdateType> DTUpdates;
1560 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1563 assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1564 InnerLoopPreHeader != InnerLoop->
getHeader() && OuterLoopPreHeader &&
1565 InnerLoopPreHeader &&
"Guaranteed by loop-simplify form");
1570 if (isa<PHINode>(OuterLoopPreHeader->
begin()) ||
1572 OuterLoopPreHeader =
1574 if (InnerLoopPreHeader == OuterLoop->getHeader())
1575 InnerLoopPreHeader =
1580 BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1582 BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1598 if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1599 !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1604 dyn_cast<BranchInst>(InnerLoopLatchPredecessor->
getTerminator());
1606 dyn_cast<BranchInst>(OuterLoopPredecessor->
getTerminator());
1608 if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1611 if (!InnerLoopHeaderSuccessor)
1619 InnerLoopPreHeader, DTUpdates,
false);
1629 InnerLoopHeaderSuccessor, DTUpdates,
1637 OuterLoopPreHeader, DTUpdates);
1640 if (InnerLoopLatchBI->
getSuccessor(0) == InnerLoopHeader)
1641 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(1);
1643 InnerLoopLatchSuccessor = InnerLoopLatchBI->
getSuccessor(0);
1646 InnerLoopLatchSuccessor, DTUpdates);
1648 if (OuterLoopLatchBI->
getSuccessor(0) == OuterLoopHeader)
1649 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(1);
1651 OuterLoopLatchSuccessor = OuterLoopLatchBI->
getSuccessor(0);
1654 OuterLoopLatchSuccessor, DTUpdates);
1655 updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1659 restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1660 OuterLoopPreHeader);
1662 moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1663 OuterLoopHeader, OuterLoopLatch, InnerLoop->
getExitBlock(),
1669 auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1673 if (OuterInnerReductions.contains(&
PHI))
1677 if (OuterInnerReductions.contains(&
PHI))
1686 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
1691 assert(OuterInnerReductions.count(
PHI) &&
"Expected a reduction PHI node");
1713bool LoopInterchangeTransform::adjustLoopLinks() {
1715 bool Changed = adjustLoopBranches();
1720 BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1743 std::unique_ptr<CacheCost>
CC =
1746 if (!LoopInterchange(&AR.
SE, &AR.
LI, &DI, &AR.
DT,
CC, &ORE).run(LN))
1748 U.markLoopNestChanged(
true);
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
This file defines the interface for the loop cache analysis.
static bool hasMinimumLoopDepth(SmallVectorImpl< Loop * > &LoopList)
static cl::opt< unsigned int > MaxMemInstrCount("loop-interchange-max-meminstr-count", cl::init(64), cl::Hidden, cl::desc("Maximum number of load-store instructions that should be handled " "in the dependency matrix. Higher value may lead to more interchanges " "at the cost of compile-time"))
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, BasicBlock *InnerLatch, BasicBlock *OuterHeader, BasicBlock *OuterLatch, BasicBlock *OuterExit, Loop *InnerLoop, LoopInfo *LI)
static const unsigned MaxLoopNestDepth
static void printDepMatrix(CharMatrix &DepMatrix)
static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2)
Swap instructions between BB1 and BB2 but keep terminators intact.
static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
static bool areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, SmallPtrSetImpl< PHINode * > &Reductions)
static Value * followLCSSA(Value *SV)
static void populateWorklist(Loop &L, LoopVector &LoopList)
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE)
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates, bool MustUpdateOnce=true)
static bool isLexicographicallyPositive(std::vector< char > &DV)
This file defines the interface for the loop nest analysis.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static bool isProfitable(const SmallVector< std::unique_ptr< StableFunctionMap::StableFunctionEntry > > &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
StringSet - A set-like wrapper for the StringMap.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
Value * getCondition() const
static std::unique_ptr< CacheCost > getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, std::optional< unsigned > TRT=std::nullopt)
Create a CacheCost for the loop nest rooted by Root.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
DependenceInfo - This class is the main dependence-analysis driver.
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop)
Replace the specified loop in the top-level loops list with the indicated loop.
void changeLoopFor(BlockT *BB, LoopT *L)
Change the top-level loop that contains BB to the specified loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
static const BasicBlock & skipEmptyBlockUntil(const BasicBlock *From, const BasicBlock *End, bool CheckUniquePred=false)
Recursivelly traverse all empty 'single successor' basic blocks of From (if there are any).
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Function * getParent() const
Return the function to which the loop-nest belongs.
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
static unsigned getIncomingValueNumForOperand(unsigned i)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Instruction * getExactFPMathInst() const
Returns 1st non-reassociative FP instruction in the PHI node's use-chain.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const Loop * getLoop() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
StringSet - A wrapper for StringMap that provides set-like functionality.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn't have a preheader, this method is called...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto successors(const MachineBasicBlock *BB)
bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto map_range(ContainerTy &&C, FuncTy F)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Direction
An enum for the direction of the loop.