52 GetIntOrFpInductionDescriptor,
59 if (!VPBB->getParent())
62 auto EndIter = Term ? Term->getIterator() : VPBB->end();
67 VPValue *VPV = Ingredient.getVPSingleValue();
76 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
90 Phi, Start, Step, &Plan.
getVF(), *
II, Flags,
91 Ingredient.getDebugLoc());
99 *Load, Ingredient.getOperand(0),
nullptr ,
100 false ,
false , *VPI,
101 Ingredient.getDebugLoc());
104 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
105 nullptr ,
false ,
false , *VPI,
106 Ingredient.getDebugLoc());
109 Ingredient.getDebugLoc());
117 *VPI, CI->getDebugLoc());
120 *VPI, Ingredient.getDebugLoc());
123 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
127 *VPI, Ingredient.getDebugLoc());
136 "Only recpies with zero or one defined values expected");
137 Ingredient.eraseFromParent();
154 if (
A->getOpcode() != Instruction::Store ||
155 B->getOpcode() != Instruction::Store)
165 const APInt *Distance;
171 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
173 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
179 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
181 auto VFs =
B->getParent()->getPlan()->vectorFactors();
183 return Distance->
abs().
uge(
191 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
192 L(L), TypeInfo(TypeInfo) {}
199 return ExcludeRecipes.contains(&R) ||
200 (Store && isNoAliasViaDistance(Store, &GroupLeader));
213 std::optional<SinkStoreInfo> SinkInfo = {}) {
214 bool CheckReads = SinkInfo.has_value();
223 "Expected at most one successor in block chain");
226 if (SinkInfo && SinkInfo->shouldSkip(R))
230 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
241 if (CheckReads &&
R.mayReadFromMemory() &&
248 Loc->AATags.NoAlias))
268 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
273 return RepR && RepR->getOpcode() == Instruction::Alloca;
282 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
298 if (!ScalarVFOnly && RepR->isSingleScalar())
301 WorkList.
insert({SinkTo, Candidate});
313 for (
auto &Recipe : *VPBB)
315 InsertIfValidSinkCandidate(VPBB,
Op);
319 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
322 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
327 auto UsersOutsideSinkTo =
329 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
331 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
332 return !U->usesFirstLaneOnly(SinkCandidate);
335 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
337 if (NeedsDuplicating) {
341 if (
auto *SinkCandidateRepR =
347 nullptr , *SinkCandidateRepR,
351 Clone = SinkCandidate->
clone();
361 InsertIfValidSinkCandidate(SinkTo,
Op);
371 if (!EntryBB || EntryBB->size() != 1 ||
381 if (EntryBB->getNumSuccessors() != 2)
386 if (!Succ0 || !Succ1)
389 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
391 if (Succ0->getSingleSuccessor() == Succ1)
393 if (Succ1->getSingleSuccessor() == Succ0)
410 if (!Region1->isReplicator())
412 auto *MiddleBasicBlock =
414 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
419 if (!Region2 || !Region2->isReplicator())
424 if (!Mask1 || Mask1 != Mask2)
427 assert(Mask1 && Mask2 &&
"both region must have conditions");
433 if (TransformedRegions.
contains(Region1))
440 if (!Then1 || !Then2)
460 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
466 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
467 Phi1ToMove.eraseFromParent();
470 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
484 TransformedRegions.
insert(Region1);
487 return !TransformedRegions.
empty();
494 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
495 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
496 auto *BlockInMask = PredRecipe->
getMask();
515 RecipeWithoutMask->getDebugLoc());
539 if (RepR->isPredicated())
558 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
572 if (!VPBB->getParent())
576 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
585 R.moveBefore(*PredVPBB, PredVPBB->
end());
587 auto *ParentRegion = VPBB->getParent();
588 if (ParentRegion && ParentRegion->getExiting() == VPBB)
589 ParentRegion->setExiting(PredVPBB);
590 for (
auto *Succ :
to_vector(VPBB->successors())) {
596 return !WorkList.
empty();
603 bool ShouldSimplify =
true;
604 while (ShouldSimplify) {
620 if (!
IV ||
IV->getTruncInst())
635 for (
auto *U : FindMyCast->
users()) {
637 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
638 FoundUserCast = UserCast;
642 FindMyCast = FoundUserCast;
667 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
680 WidenOriginalIV->dropPoisonGeneratingFlags();
693 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
695 if (IsConditionalAssume)
698 if (R.mayHaveSideEffects())
702 return all_of(R.definedValues(),
703 [](
VPValue *V) { return V->getNumUsers() == 0; });
719 if (!PhiR || PhiR->getNumOperands() != 2)
721 VPUser *PhiUser = PhiR->getSingleUser();
725 if (PhiUser !=
Incoming->getDefiningRecipe() ||
728 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
729 PhiR->eraseFromParent();
730 Incoming->getDefiningRecipe()->eraseFromParent();
745 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
755 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
761 if (ResultTy != StepTy) {
768 Builder.setInsertPoint(VecPreheader);
769 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
771 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
777 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
782 Users.insert_range(V->users());
784 return Users.takeVector();
798 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
835 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
836 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
844 Def->operands(),
true,
846 Clone->insertAfter(Def);
847 Def->replaceAllUsesWith(Clone);
858 PtrIV->replaceAllUsesWith(PtrAdd);
865 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
866 return U->usesScalars(WideIV);
872 Plan,
ID.getKind(),
ID.getInductionOpcode(),
874 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
875 WideIV->getDebugLoc(), Builder);
878 if (!HasOnlyVectorVFs) {
880 "plans containing a scalar VF cannot also include scalable VFs");
881 WideIV->replaceAllUsesWith(Steps);
884 WideIV->replaceUsesWithIf(Steps,
885 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
887 return U.usesFirstLaneOnly(WideIV);
888 return U.usesScalars(WideIV);
904 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
909 if (!Def || Def->getNumOperands() != 2)
917 auto IsWideIVInc = [&]() {
918 auto &
ID = WideIV->getInductionDescriptor();
921 VPValue *IVStep = WideIV->getStepValue();
922 switch (
ID.getInductionOpcode()) {
923 case Instruction::Add:
925 case Instruction::FAdd:
928 case Instruction::FSub:
931 case Instruction::Sub: {
951 return IsWideIVInc() ? WideIV :
nullptr;
971 if (WideIntOrFp && WideIntOrFp->getTruncInst())
984 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
985 FirstActiveLaneType,
DL);
987 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane},
DL);
994 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
997 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
999 VPValue *Start = WideIV->getStartValue();
1000 VPValue *Step = WideIV->getStepValue();
1001 EndValue =
B.createDerivedIV(
1003 Start, EndValue, Step);
1023 assert(EndValue &&
"end value must have been pre-computed");
1033 VPValue *Step = WideIV->getStepValue();
1036 return B.createNaryOp(Instruction::Sub, {EndValue, Step},
1041 return B.createPtrAdd(EndValue,
1042 B.createNaryOp(Instruction::Sub, {Zero, Step}),
1046 const auto &
ID = WideIV->getInductionDescriptor();
1047 return B.createNaryOp(
1048 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1050 : Instruction::FAdd,
1051 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1066 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1068 if (PredVPBB == MiddleVPBB)
1070 ExitIRI->getOperand(Idx),
1074 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1076 ExitIRI->setOperand(Idx, Escape);
1093 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1096 ExpR->replaceAllUsesWith(V->second);
1097 ExpR->eraseFromParent();
1106 while (!WorkList.
empty()) {
1108 if (!Seen.
insert(Cur).second)
1116 R->eraseFromParent();
1123static std::optional<std::pair<bool, unsigned>>
1126 std::optional<std::pair<bool, unsigned>>>(R)
1129 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1130 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
1131 return std::make_pair(
true,
I->getVectorIntrinsicID());
1133 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1137 return std::make_pair(
false,
1140 .
Default([](
auto *) {
return std::nullopt; });
1156 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
1158 Ops.push_back(
Op->getLiveInIRValue());
1161 auto FoldToIRValue = [&]() ->
Value * {
1163 if (OpcodeOrIID->first) {
1164 if (R.getNumOperands() != 2)
1166 unsigned ID = OpcodeOrIID->second;
1167 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1170 unsigned Opcode = OpcodeOrIID->second;
1179 return Folder.FoldSelect(
Ops[0],
Ops[1],
1182 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1184 case Instruction::Select:
1185 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1186 case Instruction::ICmp:
1187 case Instruction::FCmp:
1190 case Instruction::GetElementPtr: {
1193 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1203 case Instruction::ExtractElement:
1210 if (
Value *V = FoldToIRValue())
1211 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1217 VPlan *Plan = Def->getParent()->getPlan();
1224 return Def->replaceAllUsesWith(V);
1230 PredPHI->replaceAllUsesWith(
Op);
1238 if (TruncTy == ATy) {
1239 Def->replaceAllUsesWith(
A);
1248 : Instruction::ZExt;
1251 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1253 Ext->setUnderlyingValue(UnderlyingExt);
1255 Def->replaceAllUsesWith(Ext);
1257 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1258 Def->replaceAllUsesWith(Trunc);
1266 for (
VPUser *U :
A->users()) {
1268 for (
VPValue *VPV : R->definedValues())
1282 Def->replaceAllUsesWith(
X);
1283 Def->eraseFromParent();
1289 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1293 return Def->replaceAllUsesWith(
X);
1297 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1301 return Def->replaceAllUsesWith(Def->getOperand(1));
1308 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1309 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1310 return Def->replaceAllUsesWith(
1311 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1315 return Def->replaceAllUsesWith(Plan->
getFalse());
1318 return Def->replaceAllUsesWith(
X);
1323 Def->setOperand(0,
C);
1324 Def->setOperand(1,
Y);
1325 Def->setOperand(2,
X);
1334 X->hasMoreThanOneUniqueUser())
1335 return Def->replaceAllUsesWith(
1336 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1339 return Def->replaceAllUsesWith(
A);
1342 return Def->replaceAllUsesWith(
A);
1345 return Def->replaceAllUsesWith(
1346 Def->getOperand(0) ==
A ? Def->getOperand(1) : Def->getOperand(0));
1350 return Def->replaceAllUsesWith(
A);
1365 R->setOperand(1,
Y);
1366 R->setOperand(2,
X);
1370 R->replaceAllUsesWith(Cmp);
1375 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1376 Cmp->setDebugLoc(Def->getDebugLoc());
1388 if (
Op->getNumUsers() > 1 ||
1392 }
else if (!UnpairedCmp) {
1393 UnpairedCmp =
Op->getDefiningRecipe();
1397 UnpairedCmp =
nullptr;
1404 if (NewOps.
size() < Def->getNumOperands()) {
1406 return Def->replaceAllUsesWith(NewAnyOf);
1418 return Def->replaceAllUsesWith(NewCmp);
1426 return Def->replaceAllUsesWith(Def->getOperand(1));
1432 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1433 Def->replaceAllUsesWith(
X);
1443 Def->setOperand(1, Def->getOperand(0));
1444 Def->setOperand(0,
Y);
1449 if (Phi->getOperand(0) == Phi->getOperand(1))
1450 Phi->replaceAllUsesWith(Phi->getOperand(0));
1458 Def->replaceAllUsesWith(
1459 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1463 return Def->replaceAllUsesWith(
A);
1469 Def->replaceAllUsesWith(
1470 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1477 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1482 Def->replaceAllUsesWith(
1492 "broadcast operand must be single-scalar");
1493 Def->setOperand(0,
C);
1498 if (Phi->getNumOperands() == 1)
1499 Phi->replaceAllUsesWith(Phi->getOperand(0));
1512 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1513 Phi->getSingleUser() == Def) {
1514 Phi->setOperand(0,
Y);
1515 Def->replaceAllUsesWith(Phi);
1524 return VPR->replaceAllUsesWith(VPR->getOperand(0));
1530 Steps->replaceAllUsesWith(Steps->getOperand(0));
1538 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1540 return PhiR && PhiR->isInLoop();
1546 Def->replaceAllUsesWith(
A);
1555 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1556 return Def->replaceAllUsesWith(
A);
1560 return Def->replaceAllUsesWith(
A);
1589 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1598 !WidenStoreR->isConsecutive()) {
1599 assert(!WidenStoreR->isReverse() &&
1600 "Not consecutive memory recipes shouldn't be reversed");
1601 VPValue *Mask = WidenStoreR->getMask();
1610 {WidenStoreR->getOperand(1)});
1615 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1616 true ,
nullptr , {},
1618 ScalarStore->insertBefore(WidenStoreR);
1619 WidenStoreR->eraseFromParent();
1627 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1628 true ,
nullptr , *RepR ,
1629 *RepR , RepR->getDebugLoc());
1630 Clone->insertBefore(RepOrWidenR);
1632 VPValue *ExtractOp = Clone->getOperand(0);
1638 Clone->setOperand(0, ExtractOp);
1639 RepR->eraseFromParent();
1652 if (!
all_of(RepOrWidenR->users(),
1653 [RepOrWidenR](
const VPUser *U) {
1654 if (auto *VPI = dyn_cast<VPInstruction>(U)) {
1655 unsigned Opcode = VPI->getOpcode();
1656 if (Opcode == VPInstruction::ExtractLastLane ||
1657 Opcode == VPInstruction::ExtractLastPart ||
1658 Opcode == VPInstruction::ExtractPenultimateElement)
1662 return U->usesScalars(RepOrWidenR);
1665 if (Op->getSingleUser() != RepOrWidenR)
1669 bool LiveInNeedsBroadcast =
1670 Op->isLiveIn() && !isa<Constant>(Op->getLiveInIRValue());
1671 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1672 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1677 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1678 true ,
nullptr, *RepOrWidenR);
1679 Clone->insertBefore(RepOrWidenR);
1680 RepOrWidenR->replaceAllUsesWith(Clone);
1682 RepOrWidenR->eraseFromParent();
1718 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1719 UniqueValues.
insert(Blend->getIncomingValue(0));
1720 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1722 UniqueValues.
insert(Blend->getIncomingValue(
I));
1724 if (UniqueValues.
size() == 1) {
1725 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1726 Blend->eraseFromParent();
1730 if (Blend->isNormalized())
1736 unsigned StartIndex = 0;
1737 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1742 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1749 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1751 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1752 if (
I == StartIndex)
1754 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1755 OperandsWithMask.
push_back(Blend->getMask(
I));
1760 OperandsWithMask, Blend->getDebugLoc());
1761 NewBlend->insertBefore(&R);
1763 VPValue *DeadMask = Blend->getMask(StartIndex);
1765 Blend->eraseFromParent();
1770 if (NewBlend->getNumOperands() == 3 &&
1772 VPValue *Inc0 = NewBlend->getOperand(0);
1773 VPValue *Inc1 = NewBlend->getOperand(1);
1774 VPValue *OldMask = NewBlend->getOperand(2);
1775 NewBlend->setOperand(0, Inc1);
1776 NewBlend->setOperand(1, Inc0);
1777 NewBlend->setOperand(2, NewMask);
1804 APInt MaxVal = AlignedTC - 1;
1807 unsigned NewBitWidth =
1813 bool MadeChange =
false;
1822 if (!WideIV || !WideIV->isCanonical() ||
1823 WideIV->hasMoreThanOneUniqueUser() ||
1824 NewIVTy == WideIV->getScalarType())
1829 VPUser *SingleUser = WideIV->getSingleUser();
1838 WideIV->setStartValue(NewStart);
1840 WideIV->setStepValue(NewStep);
1846 Cmp->setOperand(1, NewBTC);
1860 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1862 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
1875 const SCEV *VectorTripCount =
1880 "Trip count SCEV must be computable");
1901 auto *Term = &ExitingVPBB->
back();
1914 for (
unsigned Part = 0; Part < UF; ++Part) {
1922 Extracts[Part] = Ext;
1934 match(Phi->getBackedgeValue(),
1936 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1949 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1956 "Expected incoming values of Phi to be ActiveLaneMasks");
1961 EntryALM->setOperand(2, ALMMultiplier);
1962 LoopALM->setOperand(2, ALMMultiplier);
1966 ExtractFromALM(EntryALM, EntryExtracts);
1971 ExtractFromALM(LoopALM, LoopExtracts);
1973 Not->setOperand(0, LoopExtracts[0]);
1976 for (
unsigned Part = 0; Part < UF; ++Part) {
1977 Phis[Part]->setStartValue(EntryExtracts[Part]);
1978 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
1991 auto *Term = &ExitingVPBB->
back();
1998 const SCEV *VectorTripCount =
2004 "Trip count SCEV must be computable");
2029 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
2030 return R->isCanonical();
2031 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
2032 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
2038 R->getScalarType());
2040 HeaderR.eraseFromParent();
2044 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
2045 HeaderR.eraseFromParent();
2055 B->setParent(
nullptr);
2064 if (Exits.
size() != 1) {
2066 "BranchOnTwoConds needs 2 remaining exits");
2068 Term->getOperand(0));
2077 Term->setOperand(1, Plan.
getTrue());
2082 {}, {}, Term->getDebugLoc());
2086 Term->eraseFromParent();
2113 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2123 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2124 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2133 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
2148 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2151 if (SinkCandidate == Previous)
2155 !Seen.
insert(SinkCandidate).second ||
2168 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2171 "only recipes with a single defined value expected");
2186 if (SinkCandidate == FOR)
2189 SinkCandidate->moveAfter(Previous);
2190 Previous = SinkCandidate;
2208 for (
VPUser *U : FOR->users()) {
2214 [&VPDT, HoistPoint](
VPUser *U) {
2215 auto *R = cast<VPRecipeBase>(U);
2216 return HoistPoint == R ||
2217 VPDT.properlyDominates(HoistPoint, R);
2219 "HoistPoint must dominate all users of FOR");
2221 auto NeedsHoisting = [HoistPoint, &VPDT,
2223 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2224 if (!HoistCandidate)
2229 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2230 "CFG in VPlan should still be flat, without replicate regions");
2232 if (!Visited.
insert(HoistCandidate).second)
2244 return HoistCandidate;
2253 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2256 "only recipes with a single defined value expected");
2268 if (
auto *R = NeedsHoisting(
Op)) {
2271 if (R->getNumDefinedValues() != 1)
2285 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2304 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2307 while (
auto *PrevPhi =
2309 assert(PrevPhi->getParent() == FOR->getParent());
2311 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2329 {FOR, FOR->getBackedgeValue()});
2331 FOR->replaceAllUsesWith(RecurSplice);
2334 RecurSplice->setOperand(0, FOR);
2340 for (
VPUser *U : RecurSplice->users()) {
2351 B.createNaryOp(Instruction::Sub, {LastActiveLane, One});
2352 VPValue *PenultimateLastIter =
2354 {PenultimateIndex, FOR->getBackedgeValue()});
2359 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2372 RecurKind RK = PhiR->getRecurrenceKind();
2379 RecWithFlags->dropPoisonGeneratingFlags();
2385struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2387 return Def == getEmptyKey() || Def == getTombstoneKey();
2398 return GEP->getSourceElementType();
2401 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2402 [](
auto *
I) {
return I->getSourceElementType(); })
2403 .
Default([](
auto *) {
return nullptr; });
2407 static bool canHandle(
const VPSingleDefRecipe *Def) {
2416 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2417 C->second == Instruction::ExtractValue)))
2423 return !
Def->mayReadFromMemory();
2427 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2428 const VPlan *Plan =
Def->getParent()->getPlan();
2429 VPTypeAnalysis TypeInfo(*Plan);
2432 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2435 if (RFlags->hasPredicate())
2441 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2444 if (
L->getVPDefID() !=
R->getVPDefID() ||
2446 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2448 !
equal(
L->operands(),
R->operands()))
2451 "must have valid opcode info for both recipes");
2453 if (LFlags->hasPredicate() &&
2454 LFlags->getPredicate() !=
2460 const VPRegionBlock *RegionL =
L->getRegion();
2461 const VPRegionBlock *RegionR =
R->getRegion();
2464 L->getParent() !=
R->getParent())
2466 const VPlan *Plan =
L->getParent()->getPlan();
2467 VPTypeAnalysis TypeInfo(*Plan);
2468 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2483 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2487 if (!VPDT.
dominates(V->getParent(), VPBB))
2492 Def->replaceAllUsesWith(V);
2511 "Expected vector prehader's successor to be the vector loop region");
2518 return !Op->isDefinedOutsideLoopRegions();
2521 R.moveBefore(*Preheader, Preheader->
end());
2545 VPValue *ResultVPV = R.getVPSingleValue();
2547 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2548 if (!NewResSizeInBits)
2561 (void)OldResSizeInBits;
2569 VPW->dropPoisonGeneratingFlags();
2571 if (OldResSizeInBits != NewResSizeInBits &&
2576 Ext->insertAfter(&R);
2578 Ext->setOperand(0, ResultVPV);
2579 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2582 "Only ICmps should not need extending the result.");
2591 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2592 auto *
Op = R.getOperand(Idx);
2593 unsigned OpSizeInBits =
2595 if (OpSizeInBits == NewResSizeInBits)
2597 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2598 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2600 R.setOperand(Idx, ProcessedIter->second);
2608 Builder.setInsertPoint(&R);
2610 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2611 ProcessedIter->second = NewOp;
2612 R.setOperand(Idx, NewOp);
2627 assert(VPBB->getNumSuccessors() == 2 &&
2628 "Two successors expected for BranchOnCond");
2629 unsigned RemovedIdx;
2640 "There must be a single edge between VPBB and its successor");
2649 VPBB->back().eraseFromParent();
2711 VPValue *StartV = CanonicalIVPHI->getStartValue();
2713 auto *CanonicalIVIncrement =
2717 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2718 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2728 VPValue *TripCount, *IncrementValue;
2733 IncrementValue = CanonicalIVIncrement;
2739 IncrementValue = CanonicalIVPHI;
2743 auto *EntryIncrement = Builder.createOverflowingOp(
2751 {EntryIncrement, TC, ALMMultiplier},
DL,
2752 "active.lane.mask.entry");
2758 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2763 Builder.setInsertPoint(OriginalTerminator);
2764 auto *InLoopIncrement =
2766 {IncrementValue}, {
false,
false},
DL);
2768 {InLoopIncrement, TripCount, ALMMultiplier},
2769 DL,
"active.lane.mask.next");
2774 auto *NotMask = Builder.createNot(ALM,
DL);
2787 auto *FoundWidenCanonicalIVUser =
find_if(
2791 "Must have at most one VPWideCanonicalIVRecipe");
2792 if (FoundWidenCanonicalIVUser !=
2794 auto *WideCanonicalIV =
2796 WideCanonicalIVs.
push_back(WideCanonicalIV);
2804 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2805 WideCanonicalIVs.
push_back(WidenOriginalIV);
2811 for (
auto *Wide : WideCanonicalIVs) {
2817 assert(VPI->getOperand(0) == Wide &&
2818 "WidenCanonicalIV must be the first operand of the compare");
2819 assert(!HeaderMask &&
"Multiple header masks found?");
2827 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2830 UseActiveLaneMaskForControlFlow) &&
2831 "DataAndControlFlowWithoutRuntimeCheck implies "
2832 "UseActiveLaneMaskForControlFlow");
2835 auto *FoundWidenCanonicalIVUser =
find_if(
2837 assert(FoundWidenCanonicalIVUser &&
2838 "Must have widened canonical IV when tail folding!");
2840 auto *WideCanonicalIV =
2843 if (UseActiveLaneMaskForControlFlow) {
2853 nullptr,
"active.lane.mask");
2869 template <
typename OpTy>
bool match(OpTy *V)
const {
2880template <
typename Op0_t,
typename Op1_t>
2899 VPValue *Addr, *Mask, *EndPtr;
2902 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2904 EVLEndPtr->insertBefore(&CurRecipe);
2905 EVLEndPtr->setOperand(1, &EVL);
2909 if (
match(&CurRecipe,
2923 LoadR->insertBefore(&CurRecipe);
2925 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
2934 StoredVal, EVL, Mask);
2936 if (
match(&CurRecipe,
2942 Intrinsic::experimental_vp_reverse,
2943 {ReversedVal, Plan->
getTrue(), &EVL},
2947 AdjustEndPtr(EndPtr), NewReverse, EVL,
2952 if (Rdx->isConditional() &&
2957 if (Interleave->getMask() &&
2962 if (
match(&CurRecipe,
2971 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
2994 "User of VF that we can't transform to EVL.");
3000 [&LoopRegion, &Plan](
VPUser *U) {
3002 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
3003 m_Specific(&Plan.getVFxUF()))) ||
3004 isa<VPWidenPointerInductionRecipe>(U);
3006 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3007 "increment of the canonical induction.");
3027 MaxEVL = Builder.createScalarZExtOrTrunc(
3031 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3032 VPValue *PrevEVL = Builder.createScalarPhi(
3046 Intrinsic::experimental_vp_splice,
3047 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3051 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3069 VPValue *EVLMask = Builder.createICmp(
3087 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
3088 "New recipe must define the same number of values as the "
3093 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
3094 VPValue *CurVPV = CurRecipe->getVPValue(
I);
3106 R->eraseFromParent();
3156 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3164 VPValue *StartV = CanonicalIVPHI->getStartValue();
3168 EVLPhi->insertAfter(CanonicalIVPHI);
3169 VPBuilder Builder(Header, Header->getFirstNonPhi());
3172 VPPhi *AVLPhi = Builder.createScalarPhi(
3176 if (MaxSafeElements) {
3186 auto *CanonicalIVIncrement =
3188 Builder.setInsertPoint(CanonicalIVIncrement);
3192 OpVPEVL = Builder.createScalarZExtOrTrunc(
3193 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3195 auto *NextEVLIV = Builder.createOverflowingOp(
3196 Instruction::Add, {OpVPEVL, EVLPhi},
3197 {CanonicalIVIncrement->hasNoUnsignedWrap(),
3198 CanonicalIVIncrement->hasNoSignedWrap()},
3199 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
3200 EVLPhi->addOperand(NextEVLIV);
3202 VPValue *NextAVL = Builder.createOverflowingOp(
3203 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
3211 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
3212 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3226 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
3237 [[maybe_unused]]
bool FoundAVL =
3240 assert(FoundAVL &&
"Didn't find AVL?");
3248 [[maybe_unused]]
bool FoundAVLNext =
3251 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3262 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3265 "Unexpected canonical iv");
3271 CanonicalIV->eraseFromParent();
3285 "Expected BranchOnCond with ICmp comparing EVL increment with vector "
3290 LatchExitingBr->setOperand(0,
3302 return R->getRegion() ||
3306 for (
const SCEV *Stride : StridesMap.
values()) {
3309 const APInt *StrideConst;
3326 unsigned BW = U->getType()->getScalarSizeInBits();
3332 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3339 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3342 if (NewSCEV != ScevExpr) {
3344 ExpSCEV->replaceAllUsesWith(NewExp);
3353 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3357 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3362 while (!Worklist.
empty()) {
3365 if (!Visited.
insert(CurRec).second)
3387 RecWithFlags->isDisjoint()) {
3390 Instruction::Add, {
A,
B}, {
false,
false},
3391 RecWithFlags->getDebugLoc());
3392 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3393 RecWithFlags->replaceAllUsesWith(New);
3394 RecWithFlags->eraseFromParent();
3397 RecWithFlags->dropPoisonGeneratingFlags();
3402 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3403 "found instruction with poison generating flags not covered by "
3404 "VPRecipeWithIRFlags");
3409 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3421 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3422 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3423 if (AddrDef && WidenRec->isConsecutive() &&
3424 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3425 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3427 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3431 InterleaveRec->getInterleaveGroup();
3432 bool NeedPredication =
false;
3434 I < NumMembers; ++
I) {
3437 NeedPredication |= BlockNeedsPredication(Member->getParent());
3440 if (NeedPredication)
3441 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3453 if (InterleaveGroups.empty())
3460 for (
const auto *IG : InterleaveGroups) {
3466 StoredValues.
push_back(StoreR->getStoredValue());
3467 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3474 StoredValues.
push_back(StoreR->getStoredValue());
3478 bool NeedsMaskForGaps =
3479 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3480 (!StoredValues.
empty() && !IG->isFull());
3492 VPValue *Addr = Start->getAddr();
3501 assert(IG->getIndex(IRInsertPos) != 0 &&
3502 "index of insert position shouldn't be zero");
3506 IG->getIndex(IRInsertPos),
3510 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3516 if (IG->isReverse()) {
3519 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3520 ReversePtr->insertBefore(InsertPos);
3524 InsertPos->getMask(), NeedsMaskForGaps,
3525 InterleaveMD, InsertPos->getDebugLoc());
3526 VPIG->insertBefore(InsertPos);
3529 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3532 if (!Member->getType()->isVoidTy()) {
3591 AddOp = Instruction::Add;
3592 MulOp = Instruction::Mul;
3594 AddOp =
ID.getInductionOpcode();
3595 MulOp = Instruction::FMul;
3603 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3604 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3606 Flags.dropPoisonGeneratingFlags();
3615 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3620 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3621 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3627 WidePHI->insertBefore(WidenIVR);
3638 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3642 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3645 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3648 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3655 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3658 WidePHI->addOperand(
Next);
3686 VPlan *Plan = R->getParent()->getPlan();
3687 VPValue *Start = R->getStartValue();
3688 VPValue *Step = R->getStepValue();
3689 VPValue *VF = R->getVFValue();
3691 assert(R->getInductionDescriptor().getKind() ==
3693 "Not a pointer induction according to InductionDescriptor!");
3696 "Recipe should have been replaced");
3702 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3706 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3709 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3710 VPValue *PtrAdd = Builder.createNaryOp(
3712 R->replaceAllUsesWith(PtrAdd);
3717 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3719 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3722 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3731 if (!R->isReplicator())
3735 R->dissolveToCFGLoop();
3754 assert(Br->getNumOperands() == 2 &&
3755 "BranchOnTwoConds must have exactly 2 conditions");
3759 assert(Successors.size() == 3 &&
3760 "BranchOnTwoConds must have exactly 3 successors");
3765 VPValue *EarlyExitingCond = Br->getOperand(0);
3766 VPValue *LateExitingCond = Br->getOperand(1);
3775 VPValue *AnyExitTaken = Builder.createNaryOp(
3776 Instruction::Or, {EarlyExitingCond, LateExitingCond},
DL);
3786 Br->eraseFromParent();
3809 WidenIVR->replaceAllUsesWith(PtrAdd);
3822 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3823 Select = Builder.createSelect(Blend->getMask(
I),
3824 Blend->getIncomingValue(
I),
Select,
3825 R.getDebugLoc(),
"predphi");
3826 Blend->replaceAllUsesWith(
Select);
3841 for (
VPValue *
Op : LastActiveL->operands()) {
3842 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
3847 VPValue *FirstInactiveLane = Builder.createNaryOp(
3849 LastActiveL->getDebugLoc(),
"first.inactive.lane");
3854 VPValue *LastLane = Builder.createNaryOp(
3855 Instruction::Sub, {FirstInactiveLane, One},
3856 LastActiveL->getDebugLoc(),
"last.active.lane");
3867 DebugLoc DL = BranchOnCountInst->getDebugLoc();
3870 ToRemove.push_back(BranchOnCountInst);
3885 ? Instruction::UIToFP
3886 : Instruction::Trunc;
3887 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3893 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3898 Flags = {VPI->getFastMathFlags()};
3903 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3905 VPI->replaceAllUsesWith(VectorStep);
3911 R->eraseFromParent();
3924 "unsupported early exit VPBB");
3935 "Terminator must be be BranchOnCond");
3936 VPValue *CondOfEarlyExitingVPBB =
3938 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3939 ? CondOfEarlyExitingVPBB
3940 : Builder.createNot(CondOfEarlyExitingVPBB);
3954 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3959 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3960 if (ExitIRI->getNumOperands() != 1) {
3963 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
3966 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3967 if (!IncomingFromEarlyExit->
isLiveIn()) {
3975 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3985 "Unexpected terminator");
3986 auto *IsLatchExitTaken =
3988 LatchExitingBranch->getOperand(1));
3990 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
3991 LatchExitingBranch->eraseFromParent();
3993 Builder.setInsertPoint(LatchVPBB);
3995 {IsEarlyExitTaken, IsLatchExitTaken}, LatchDL);
3997 LatchVPBB->
setSuccessors({VectorEarlyExitVPBB, MiddleVPBB, HeaderVPBB});
4008 Type *RedTy = Ctx.Types.inferScalarType(Red);
4009 VPValue *VecOp = Red->getVecOp();
4012 auto IsExtendedRedValidAndClampRange =
4024 if (Red->isPartialReduction()) {
4029 ExtRedCost = Ctx.TTI.getPartialReductionCost(
4030 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
4033 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4034 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4035 Red->getFastMathFlags(),
CostKind);
4037 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4045 IsExtendedRedValidAndClampRange(
4048 Ctx.Types.inferScalarType(
A)))
4066 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
4069 Type *RedTy = Ctx.Types.inferScalarType(Red);
4072 auto IsMulAccValidAndClampRange =
4079 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4082 if (Red->isPartialReduction()) {
4084 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
4087 MulAccCost = Ctx.TTI.getPartialReductionCost(
4088 Opcode, SrcTy, SrcTy2, RedTy, VF,
4098 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
4102 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4104 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4112 ExtCost += Ext0->computeCost(VF, Ctx);
4114 ExtCost += Ext1->computeCost(VF, Ctx);
4116 ExtCost += OuterExt->computeCost(VF, Ctx);
4118 return MulAccCost.
isValid() &&
4119 MulAccCost < ExtCost + MulCost + RedCost;
4124 VPValue *VecOp = Red->getVecOp();
4142 if (!ExtA || ExtB || !ValB->
isLiveIn())
4158 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4159 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4160 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4161 Mul->setOperand(1, ExtB);
4171 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4176 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4183 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4200 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4209 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4210 Ext0->getOpcode() == Ext1->getOpcode() &&
4211 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4213 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4214 *Ext0, *Ext0, Ext0->getDebugLoc());
4215 NewExt0->insertBefore(Ext0);
4220 Ext->getResultType(),
nullptr, *Ext1,
4221 *Ext1, Ext1->getDebugLoc());
4224 Mul->setOperand(0, NewExt0);
4225 Mul->setOperand(1, NewExt1);
4226 Red->setOperand(1,
Mul);
4239 auto IP = std::next(Red->getIterator());
4240 auto *VPBB = Red->getParent();
4250 Red->replaceAllUsesWith(AbstractR);
4280 for (
VPValue *VPV : VPValues) {
4282 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
4290 if (
User->usesScalars(VPV))
4293 HoistPoint = HoistBlock->
begin();
4297 "All users must be in the vector preheader or dominated by it");
4302 VPV->replaceUsesWithIf(Broadcast,
4303 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4304 return Broadcast != &U && !U.usesScalars(VPV);
4321 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4322 RepR->getOpcode() != Instruction::Load)
4325 VPValue *Addr = RepR->getOperand(0);
4328 if (!
Loc.AATags.Scope)
4333 if (R.mayWriteToMemory()) {
4335 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4343 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4347 const AAMDNodes &LoadAA = LoadLoc.AATags;
4363 return CommonMetadata;
4366template <
unsigned Opcode>
4371 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4372 "Only Load and Store opcodes supported");
4373 constexpr bool IsLoad = (Opcode == Instruction::Load);
4383 if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
4387 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
4390 RecipesByAddress[AddrSCEV].push_back(RepR);
4397 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4399 for (
auto &[Addr, Recipes] : RecipesByAddress) {
4400 if (Recipes.size() < 2)
4408 VPValue *MaskI = RecipeI->getMask();
4409 Type *TypeI = GetLoadStoreValueType(RecipeI);
4415 bool HasComplementaryMask =
false;
4420 VPValue *MaskJ = RecipeJ->getMask();
4421 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4422 if (TypeI == TypeJ) {
4432 if (HasComplementaryMask) {
4433 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4443template <
typename InstType>
4463 for (
auto &Group :
Groups) {
4488 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4489 false,
nullptr, *EarliestLoad,
4492 UnpredicatedLoad->insertBefore(EarliestLoad);
4496 Load->replaceAllUsesWith(UnpredicatedLoad);
4497 Load->eraseFromParent();
4507 if (!StoreLoc || !StoreLoc->AATags.Scope)
4513 StoresToSink.
end());
4517 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4532 for (
auto &Group :
Groups) {
4549 VPValue *SelectedValue = Group[0]->getOperand(0);
4552 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4553 VPValue *Mask = Group[
I]->getMask();
4555 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4563 auto *UnpredicatedStore =
4565 {SelectedValue, LastStore->getOperand(1)},
4567 nullptr, *LastStore, CommonMetadata);
4568 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4572 Store->eraseFromParent();
4579 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4580 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4614 auto *TCMO = Builder.createNaryOp(
4642 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
4644 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
4651 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
4661 DefR->replaceUsesWithIf(
4662 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
4664 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
4678 for (
VPValue *Def : R.definedValues()) {
4691 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
4693 return U->usesScalars(Def) &&
4696 if (
none_of(Def->users(), IsCandidateUnpackUser))
4703 Unpack->insertAfter(&R);
4704 Def->replaceUsesWithIf(Unpack,
4705 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
4706 return IsCandidateUnpackUser(&U);
4716 bool RequiresScalarEpilogue) {
4718 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
4737 if (TailByMasking) {
4738 TC = Builder.createNaryOp(
4740 {TC, Builder.createNaryOp(Instruction::Sub,
4751 Builder.createNaryOp(Instruction::URem, {TC, Step},
4760 if (RequiresScalarEpilogue) {
4762 "requiring scalar epilogue is not supported with fail folding");
4765 R = Builder.createSelect(IsZero, Step, R);
4768 VPValue *Res = Builder.createNaryOp(
4787 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
4794 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
4798 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
4803 VPValue *MulByUF = Builder.createOverflowingOp(
4804 Instruction::Mul, {RuntimeVF, UF}, {
true,
false});
4813 BasicBlock *EntryBB = Entry->getIRBasicBlock();
4821 const SCEV *Expr = ExpSCEV->getSCEV();
4824 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
4829 ExpSCEV->eraseFromParent();
4832 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
4833 "after any VPIRInstructions");
4836 auto EI = Entry->begin();
4846 return ExpandedSCEVs;
4862 return Member0Op == OpV;
4864 return !W->getMask() && Member0Op == OpV;
4866 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
4877 if (!InterleaveR || InterleaveR->
getMask())
4880 Type *GroupElementTy =
nullptr;
4884 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4885 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4892 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
4893 return TypeInfo.inferScalarType(Op) == GroupElementTy;
4902 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&
4903 GroupSize == VectorRegWidth;
4911 return RepR && RepR->isSingleScalar();
4918 auto *R = V->getDefiningRecipe();
4926 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
4927 WideMember0->setOperand(
4936 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
4938 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4939 false, {}, LoadGroup->getDebugLoc());
4940 L->insertBefore(LoadGroup);
4946 assert(RepR->isSingleScalar() &&
4948 "must be a single scalar load");
4949 NarrowedOps.
insert(RepR);
4954 VPValue *PtrOp = WideLoad->getAddr();
4956 PtrOp = VecPtr->getOperand(0);
4961 nullptr, {}, *WideLoad);
4962 N->insertBefore(WideLoad);
4992 if (R.mayWriteToMemory() && !InterleaveR)
5014 if (InterleaveR->getStoredValues().empty())
5019 auto *Member0 = InterleaveR->getStoredValues()[0];
5021 all_of(InterleaveR->getStoredValues(),
5022 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
5030 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5033 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5034 return IR && IR->getInterleaveGroup()->isFull() &&
5035 IR->getVPValue(Op.index()) == Op.value();
5047 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
5049 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
5050 R->getNumOperands() > 2)
5053 [WideMember0, Idx =
I](
const auto &
P) {
5054 const auto &[OpIdx, OpV] = P;
5055 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
5062 if (StoreGroups.
empty())
5068 for (
auto *StoreGroup : StoreGroups) {
5074 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
5075 false, {}, StoreGroup->getDebugLoc());
5076 S->insertBefore(StoreGroup);
5077 StoreGroup->eraseFromParent();
5092 Instruction::Mul, {VScale, UF}, {
true,
false});
5096 Inc->setOperand(1, UF);
5115 "must have a BranchOnCond");
5118 if (VF.
isScalable() && VScaleForTuning.has_value())
5119 VectorStep *= *VScaleForTuning;
5120 assert(VectorStep > 0 &&
"trip count should not be zero");
5124 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5137 if (WideIntOrFp && WideIntOrFp->getTruncInst())
5144 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
5147 Start, VectorTC, Step);
5170 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5180 IVEndValues[WideIVR] = EndValue;
5181 ResumePhiR->setOperand(0, EndValue);
5182 ResumePhiR->setName(
"bc.resume.val");
5189 "should only skip truncated wide inductions");
5197 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
5199 "Cannot handle loops with uncountable early exits");
5205 "vector.recur.extract");
5207 ResumePhiR->setName(IsFOR ?
"scalar.recur.init" :
"bc.merge.rdx");
5208 ResumePhiR->setOperand(0, ResumeFromVectorLoop);
5217 VPBuilder ScalarPHBuilder(ScalarPHVPBB);
5218 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5230 "Cannot handle loops with uncountable early exits");
5303 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5317 "vector.recur.extract.for.phi");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=true)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
size_t getNumPredecessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
void setParent(VPRegionBlock *P)
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getFalse()
Return a VPValue wrapping i1 false.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
bool match(const SCEV *S, const Pattern &P)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...