LLVM 22.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
23#include "llvm/IR/Constants.h"
25#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Value.h"
30
31#define DEBUG_TYPE "vectorutils"
32
33using namespace llvm;
34using namespace llvm::PatternMatch;
35
36/// Maximum factor for an interleaved memory access.
38 "max-interleave-group-factor", cl::Hidden,
39 cl::desc("Maximum factor for an interleaved access group (default = 8)"),
40 cl::init(8));
41
42/// Return true if all of the intrinsic's arguments and return type are scalars
43/// for the scalar form of the intrinsic, and vectors for the vector form of the
44/// intrinsic (except operands that are marked as always being scalar by
45/// isVectorIntrinsicWithScalarOpAtArg).
47 switch (ID) {
48 case Intrinsic::abs: // Begin integer bit-manipulation.
49 case Intrinsic::bswap:
50 case Intrinsic::bitreverse:
51 case Intrinsic::ctpop:
52 case Intrinsic::ctlz:
53 case Intrinsic::cttz:
54 case Intrinsic::fshl:
55 case Intrinsic::fshr:
56 case Intrinsic::smax:
57 case Intrinsic::smin:
58 case Intrinsic::umax:
59 case Intrinsic::umin:
60 case Intrinsic::sadd_sat:
61 case Intrinsic::ssub_sat:
62 case Intrinsic::uadd_sat:
63 case Intrinsic::usub_sat:
64 case Intrinsic::smul_fix:
65 case Intrinsic::smul_fix_sat:
66 case Intrinsic::umul_fix:
67 case Intrinsic::umul_fix_sat:
68 case Intrinsic::sqrt: // Begin floating-point.
69 case Intrinsic::asin:
70 case Intrinsic::acos:
71 case Intrinsic::atan:
72 case Intrinsic::atan2:
73 case Intrinsic::sin:
74 case Intrinsic::cos:
75 case Intrinsic::sincos:
76 case Intrinsic::sincospi:
77 case Intrinsic::tan:
78 case Intrinsic::sinh:
79 case Intrinsic::cosh:
80 case Intrinsic::tanh:
81 case Intrinsic::exp:
82 case Intrinsic::exp10:
83 case Intrinsic::exp2:
84 case Intrinsic::frexp:
85 case Intrinsic::ldexp:
86 case Intrinsic::log:
87 case Intrinsic::log10:
88 case Intrinsic::log2:
89 case Intrinsic::fabs:
90 case Intrinsic::minnum:
91 case Intrinsic::maxnum:
92 case Intrinsic::minimum:
93 case Intrinsic::maximum:
94 case Intrinsic::minimumnum:
95 case Intrinsic::maximumnum:
96 case Intrinsic::modf:
97 case Intrinsic::copysign:
98 case Intrinsic::floor:
99 case Intrinsic::ceil:
100 case Intrinsic::trunc:
101 case Intrinsic::rint:
102 case Intrinsic::nearbyint:
103 case Intrinsic::round:
104 case Intrinsic::roundeven:
105 case Intrinsic::pow:
106 case Intrinsic::fma:
107 case Intrinsic::fmuladd:
108 case Intrinsic::is_fpclass:
109 case Intrinsic::powi:
110 case Intrinsic::canonicalize:
111 case Intrinsic::fptosi_sat:
112 case Intrinsic::fptoui_sat:
113 case Intrinsic::lround:
114 case Intrinsic::llround:
115 case Intrinsic::lrint:
116 case Intrinsic::llrint:
117 case Intrinsic::ucmp:
118 case Intrinsic::scmp:
119 return true;
120 default:
121 return false;
122 }
123}
124
126 const TargetTransformInfo *TTI) {
128 return true;
129
131 return TTI->isTargetIntrinsicTriviallyScalarizable(ID);
132
133 switch (ID) {
134 case Intrinsic::uadd_with_overflow:
135 case Intrinsic::sadd_with_overflow:
136 case Intrinsic::ssub_with_overflow:
137 case Intrinsic::usub_with_overflow:
138 case Intrinsic::umul_with_overflow:
139 case Intrinsic::smul_with_overflow:
140 return true;
141 }
142 return false;
143}
144
145/// Identifies if the vector form of the intrinsic has a scalar operand.
147 unsigned ScalarOpdIdx,
148 const TargetTransformInfo *TTI) {
149
151 return TTI->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);
152
153 // Vector predication intrinsics have the EVL as the last operand.
154 if (VPIntrinsic::getVectorLengthParamPos(ID) == ScalarOpdIdx)
155 return true;
156
157 switch (ID) {
158 case Intrinsic::abs:
159 case Intrinsic::vp_abs:
160 case Intrinsic::ctlz:
161 case Intrinsic::vp_ctlz:
162 case Intrinsic::cttz:
163 case Intrinsic::vp_cttz:
164 case Intrinsic::is_fpclass:
165 case Intrinsic::vp_is_fpclass:
166 case Intrinsic::powi:
167 case Intrinsic::vector_extract:
168 return (ScalarOpdIdx == 1);
169 case Intrinsic::smul_fix:
170 case Intrinsic::smul_fix_sat:
171 case Intrinsic::umul_fix:
172 case Intrinsic::umul_fix_sat:
173 return (ScalarOpdIdx == 2);
174 case Intrinsic::experimental_vp_splice:
175 return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;
176 default:
177 return false;
178 }
179}
180
182 Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI) {
183 assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!");
184
186 return TTI->isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);
187
189 return OpdIdx == -1 || OpdIdx == 0;
190
191 switch (ID) {
192 case Intrinsic::fptosi_sat:
193 case Intrinsic::fptoui_sat:
194 case Intrinsic::lround:
195 case Intrinsic::llround:
196 case Intrinsic::lrint:
197 case Intrinsic::llrint:
198 case Intrinsic::vp_lrint:
199 case Intrinsic::vp_llrint:
200 case Intrinsic::ucmp:
201 case Intrinsic::scmp:
202 case Intrinsic::vector_extract:
203 return OpdIdx == -1 || OpdIdx == 0;
204 case Intrinsic::modf:
205 case Intrinsic::sincos:
206 case Intrinsic::sincospi:
207 case Intrinsic::is_fpclass:
208 case Intrinsic::vp_is_fpclass:
209 return OpdIdx == 0;
210 case Intrinsic::powi:
211 case Intrinsic::ldexp:
212 return OpdIdx == -1 || OpdIdx == 1;
213 default:
214 return OpdIdx == -1;
215 }
216}
217
219 Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI) {
220
222 return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);
223
224 switch (ID) {
225 case Intrinsic::frexp:
226 return RetIdx == 0 || RetIdx == 1;
227 default:
228 return RetIdx == 0;
229 }
230}
231
232/// Returns intrinsic ID for call.
233/// For the input call instruction it finds mapping intrinsic and returns
234/// its ID, in case it does not found it return not_intrinsic.
236 const TargetLibraryInfo *TLI) {
240
241 if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
242 ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
243 ID == Intrinsic::experimental_noalias_scope_decl ||
244 ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
245 return ID;
247}
248
250 switch (ID) {
251 case Intrinsic::vector_interleave2:
252 return 2;
253 case Intrinsic::vector_interleave3:
254 return 3;
255 case Intrinsic::vector_interleave4:
256 return 4;
257 case Intrinsic::vector_interleave5:
258 return 5;
259 case Intrinsic::vector_interleave6:
260 return 6;
261 case Intrinsic::vector_interleave7:
262 return 7;
263 case Intrinsic::vector_interleave8:
264 return 8;
265 default:
266 return 0;
267 }
268}
269
271 switch (ID) {
272 case Intrinsic::vector_deinterleave2:
273 return 2;
274 case Intrinsic::vector_deinterleave3:
275 return 3;
276 case Intrinsic::vector_deinterleave4:
277 return 4;
278 case Intrinsic::vector_deinterleave5:
279 return 5;
280 case Intrinsic::vector_deinterleave6:
281 return 6;
282 case Intrinsic::vector_deinterleave7:
283 return 7;
284 case Intrinsic::vector_deinterleave8:
285 return 8;
286 default:
287 return 0;
288 }
289}
290
292 [[maybe_unused]] unsigned Factor =
294 ArrayRef<Type *> DISubtypes = DI->getType()->subtypes();
295 assert(Factor && Factor == DISubtypes.size() &&
296 "unexpected deinterleave factor or result type");
297 return cast<VectorType>(DISubtypes[0]);
298}
299
300/// Given a vector and an element number, see if the scalar value is
301/// already around as a register, for example if it were inserted then extracted
302/// from the vector.
303Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
304 assert(V->getType()->isVectorTy() && "Not looking at a vector?");
305 VectorType *VTy = cast<VectorType>(V->getType());
306 // For fixed-length vector, return poison for out of range access.
307 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
308 unsigned Width = FVTy->getNumElements();
309 if (EltNo >= Width)
310 return PoisonValue::get(FVTy->getElementType());
311 }
312
313 if (Constant *C = dyn_cast<Constant>(V))
314 return C->getAggregateElement(EltNo);
315
317 // If this is an insert to a variable element, we don't know what it is.
318 uint64_t IIElt;
319 if (!match(III->getOperand(2), m_ConstantInt(IIElt)))
320 return nullptr;
321
322 // If this is an insert to the element we are looking for, return the
323 // inserted value.
324 if (EltNo == IIElt)
325 return III->getOperand(1);
326
327 // Guard against infinite loop on malformed, unreachable IR.
328 if (III == III->getOperand(0))
329 return nullptr;
330
331 // Otherwise, the insertelement doesn't modify the value, recurse on its
332 // vector input.
333 return findScalarElement(III->getOperand(0), EltNo);
334 }
335
337 // Restrict the following transformation to fixed-length vector.
338 if (SVI && isa<FixedVectorType>(SVI->getType())) {
339 unsigned LHSWidth =
340 cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
341 int InEl = SVI->getMaskValue(EltNo);
342 if (InEl < 0)
343 return PoisonValue::get(VTy->getElementType());
344 if (InEl < (int)LHSWidth)
345 return findScalarElement(SVI->getOperand(0), InEl);
346 return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
347 }
348
349 // Extract a value from a vector add operation with a constant zero.
350 // TODO: Use getBinOpIdentity() to generalize this.
351 Value *Val; Constant *C;
352 if (match(V, m_Add(m_Value(Val), m_Constant(C))))
353 if (Constant *Elt = C->getAggregateElement(EltNo))
354 if (Elt->isNullValue())
355 return findScalarElement(Val, EltNo);
356
357 // If the vector is a splat then we can trivially find the scalar element.
359 if (Value *Splat = getSplatValue(V))
360 if (EltNo < VTy->getElementCount().getKnownMinValue())
361 return Splat;
362
363 // Otherwise, we don't know.
364 return nullptr;
365}
366
368 int SplatIndex = -1;
369 for (int M : Mask) {
370 // Ignore invalid (undefined) mask elements.
371 if (M < 0)
372 continue;
373
374 // There can be only 1 non-negative mask element value if this is a splat.
375 if (SplatIndex != -1 && SplatIndex != M)
376 return -1;
377
378 // Initialize the splat index to the 1st non-negative mask element.
379 SplatIndex = M;
380 }
381 assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
382 return SplatIndex;
383}
384
385/// Get splat value if the input is a splat vector or return nullptr.
386/// This function is not fully general. It checks only 2 cases:
387/// the input value is (1) a splat constant vector or (2) a sequence
388/// of instructions that broadcasts a scalar at element 0.
390 if (isa<VectorType>(V->getType()))
391 if (auto *C = dyn_cast<Constant>(V))
392 return C->getSplatValue();
393
394 // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
395 Value *Splat;
396 if (match(V,
398 m_Value(), m_ZeroMask())))
399 return Splat;
400
401 return nullptr;
402}
403
404bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
405 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
406
407 if (isa<VectorType>(V->getType())) {
408 if (isa<UndefValue>(V))
409 return true;
410 // FIXME: We can allow undefs, but if Index was specified, we may want to
411 // check that the constant is defined at that index.
412 if (auto *C = dyn_cast<Constant>(V))
413 return C->getSplatValue() != nullptr;
414 }
415
416 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
417 // FIXME: We can safely allow undefs here. If Index was specified, we will
418 // check that the mask elt is defined at the required index.
419 if (!all_equal(Shuf->getShuffleMask()))
420 return false;
421
422 // Match any index.
423 if (Index == -1)
424 return true;
425
426 // Match a specific element. The mask should be defined at and match the
427 // specified index.
428 return Shuf->getMaskValue(Index) == Index;
429 }
430
431 // The remaining tests are all recursive, so bail out if we hit the limit.
433 return false;
434
435 // If both operands of a binop are splats, the result is a splat.
436 Value *X, *Y, *Z;
437 if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
438 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
439
440 // If all operands of a select are splats, the result is a splat.
441 if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
442 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
443 isSplatValue(Z, Index, Depth);
444
445 // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
446
447 return false;
448}
449
451 const APInt &DemandedElts, APInt &DemandedLHS,
452 APInt &DemandedRHS, bool AllowUndefElts) {
453 DemandedLHS = DemandedRHS = APInt::getZero(SrcWidth);
454
455 // Early out if we don't demand any elements.
456 if (DemandedElts.isZero())
457 return true;
458
459 // Simple case of a shuffle with zeroinitializer.
460 if (all_of(Mask, [](int Elt) { return Elt == 0; })) {
461 DemandedLHS.setBit(0);
462 return true;
463 }
464
465 for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
466 int M = Mask[I];
467 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
468 "Invalid shuffle mask constant");
469
470 if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))
471 continue;
472
473 // For undef elements, we don't know anything about the common state of
474 // the shuffle result.
475 if (M < 0)
476 return false;
477
478 if (M < SrcWidth)
479 DemandedLHS.setBit(M);
480 else
481 DemandedRHS.setBit(M - SrcWidth);
482 }
483
484 return true;
485}
486
488 std::array<std::pair<int, int>, 2> &SrcInfo) {
489 const int SignalValue = NumElts * 2;
490 SrcInfo[0] = {-1, SignalValue};
491 SrcInfo[1] = {-1, SignalValue};
492 for (auto [i, M] : enumerate(Mask)) {
493 if (M < 0)
494 continue;
495 int Src = M >= NumElts;
496 int Diff = (int)i - (M % NumElts);
497 bool Match = false;
498 for (int j = 0; j < 2; j++) {
499 auto &[SrcE, DiffE] = SrcInfo[j];
500 if (SrcE == -1) {
501 assert(DiffE == SignalValue);
502 SrcE = Src;
503 DiffE = Diff;
504 }
505 if (SrcE == Src && DiffE == Diff) {
506 Match = true;
507 break;
508 }
509 }
510 if (!Match)
511 return false;
512 }
513 // Avoid all undef masks
514 return SrcInfo[0].first != -1;
515}
516
518 SmallVectorImpl<int> &ScaledMask) {
519 assert(Scale > 0 && "Unexpected scaling factor");
520
521 // Fast-path: if no scaling, then it is just a copy.
522 if (Scale == 1) {
523 ScaledMask.assign(Mask.begin(), Mask.end());
524 return;
525 }
526
527 ScaledMask.clear();
528 for (int MaskElt : Mask) {
529 if (MaskElt >= 0) {
530 assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
531 "Overflowed 32-bits");
532 }
533 for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
534 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
535 }
536}
537
539 SmallVectorImpl<int> &ScaledMask) {
540 assert(Scale > 0 && "Unexpected scaling factor");
541
542 // Fast-path: if no scaling, then it is just a copy.
543 if (Scale == 1) {
544 ScaledMask.assign(Mask.begin(), Mask.end());
545 return true;
546 }
547
548 // We must map the original elements down evenly to a type with less elements.
549 int NumElts = Mask.size();
550 if (NumElts % Scale != 0)
551 return false;
552
553 ScaledMask.clear();
554 ScaledMask.reserve(NumElts / Scale);
555
556 // Step through the input mask by splitting into Scale-sized slices.
557 do {
558 ArrayRef<int> MaskSlice = Mask.take_front(Scale);
559 assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
560
561 // The first element of the slice determines how we evaluate this slice.
562 int SliceFront = MaskSlice.front();
563 if (SliceFront < 0) {
564 // Negative values (undef or other "sentinel" values) must be equal across
565 // the entire slice.
566 if (!all_equal(MaskSlice))
567 return false;
568 ScaledMask.push_back(SliceFront);
569 } else {
570 // A positive mask element must be cleanly divisible.
571 if (SliceFront % Scale != 0)
572 return false;
573 // Elements of the slice must be consecutive.
574 for (int i = 1; i < Scale; ++i)
575 if (MaskSlice[i] != SliceFront + i)
576 return false;
577 ScaledMask.push_back(SliceFront / Scale);
578 }
579 Mask = Mask.drop_front(Scale);
580 } while (!Mask.empty());
581
582 assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
583
584 // All elements of the original mask can be scaled down to map to the elements
585 // of a mask with wider elements.
586 return true;
587}
588
590 SmallVectorImpl<int> &NewMask) {
591 unsigned NumElts = M.size();
592 if (NumElts % 2 != 0)
593 return false;
594
595 NewMask.clear();
596 for (unsigned i = 0; i < NumElts; i += 2) {
597 int M0 = M[i];
598 int M1 = M[i + 1];
599
600 // If both elements are undef, new mask is undef too.
601 if (M0 == -1 && M1 == -1) {
602 NewMask.push_back(-1);
603 continue;
604 }
605
606 if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
607 NewMask.push_back(M1 / 2);
608 continue;
609 }
610
611 if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
612 NewMask.push_back(M0 / 2);
613 continue;
614 }
615
616 NewMask.clear();
617 return false;
618 }
619
620 assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
621 return true;
622}
623
624bool llvm::scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,
625 SmallVectorImpl<int> &ScaledMask) {
626 unsigned NumSrcElts = Mask.size();
627 assert(NumSrcElts > 0 && NumDstElts > 0 && "Unexpected scaling factor");
628
629 // Fast-path: if no scaling, then it is just a copy.
630 if (NumSrcElts == NumDstElts) {
631 ScaledMask.assign(Mask.begin(), Mask.end());
632 return true;
633 }
634
635 // Ensure we can find a whole scale factor.
636 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
637 "Unexpected scaling factor");
638
639 if (NumSrcElts > NumDstElts) {
640 int Scale = NumSrcElts / NumDstElts;
641 return widenShuffleMaskElts(Scale, Mask, ScaledMask);
642 }
643
644 int Scale = NumDstElts / NumSrcElts;
645 narrowShuffleMaskElts(Scale, Mask, ScaledMask);
646 return true;
647}
648
650 SmallVectorImpl<int> &ScaledMask) {
651 std::array<SmallVector<int, 16>, 2> TmpMasks;
652 SmallVectorImpl<int> *Output = &TmpMasks[0], *Tmp = &TmpMasks[1];
653 ArrayRef<int> InputMask = Mask;
654 for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {
655 while (widenShuffleMaskElts(Scale, InputMask, *Output)) {
656 InputMask = *Output;
657 std::swap(Output, Tmp);
658 }
659 }
660 ScaledMask.assign(InputMask.begin(), InputMask.end());
661}
662
664 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
665 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
666 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
667 function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>
668 ManyInputsAction) {
669 SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
670 // Try to perform better estimation of the permutation.
671 // 1. Split the source/destination vectors into real registers.
672 // 2. Do the mask analysis to identify which real registers are
673 // permuted.
674 int Sz = Mask.size();
675 unsigned SzDest = Sz / NumOfDestRegs;
676 unsigned SzSrc = Sz / NumOfSrcRegs;
677 for (unsigned I = 0; I < NumOfDestRegs; ++I) {
678 auto &RegMasks = Res[I];
679 RegMasks.assign(2 * NumOfSrcRegs, {});
680 // Check that the values in dest registers are in the one src
681 // register.
682 for (unsigned K = 0; K < SzDest; ++K) {
683 int Idx = I * SzDest + K;
684 if (Idx == Sz)
685 break;
686 if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem)
687 continue;
688 int MaskIdx = Mask[Idx] % Sz;
689 int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);
690 // Add a cost of PermuteTwoSrc for each new source register permute,
691 // if we have more than one source registers.
692 if (RegMasks[SrcRegIdx].empty())
693 RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem);
694 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;
695 }
696 }
697 // Process split mask.
698 for (unsigned I : seq<unsigned>(NumOfUsedRegs)) {
699 auto &Dest = Res[I];
700 int NumSrcRegs =
701 count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
702 switch (NumSrcRegs) {
703 case 0:
704 // No input vectors were used!
705 NoInputAction();
706 break;
707 case 1: {
708 // Find the only mask with at least single undef mask elem.
709 auto *It =
710 find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
711 unsigned SrcReg = std::distance(Dest.begin(), It);
712 SingleInputAction(*It, SrcReg, I);
713 break;
714 }
715 default: {
716 // The first mask is a permutation of a single register. Since we have >2
717 // input registers to shuffle, we merge the masks for 2 first registers
718 // and generate a shuffle of 2 registers rather than the reordering of the
719 // first register and then shuffle with the second register. Next,
720 // generate the shuffles of the resulting register + the remaining
721 // registers from the list.
722 auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
723 ArrayRef<int> SecondMask) {
724 for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
725 if (SecondMask[Idx] != PoisonMaskElem) {
726 assert(FirstMask[Idx] == PoisonMaskElem &&
727 "Expected undefined mask element.");
728 FirstMask[Idx] = SecondMask[Idx] + VF;
729 }
730 }
731 };
732 auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
733 for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
734 if (Mask[Idx] != PoisonMaskElem)
735 Mask[Idx] = Idx;
736 }
737 };
738 int SecondIdx;
739 bool NewReg = true;
740 do {
741 int FirstIdx = -1;
742 SecondIdx = -1;
743 MutableArrayRef<int> FirstMask, SecondMask;
744 for (unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) {
745 SmallVectorImpl<int> &RegMask = Dest[I];
746 if (RegMask.empty())
747 continue;
748
749 if (FirstIdx == SecondIdx) {
750 FirstIdx = I;
751 FirstMask = RegMask;
752 continue;
753 }
754 SecondIdx = I;
755 SecondMask = RegMask;
756 CombineMasks(FirstMask, SecondMask);
757 ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);
758 NewReg = false;
759 NormalizeMask(FirstMask);
760 RegMask.clear();
761 SecondMask = FirstMask;
762 SecondIdx = FirstIdx;
763 }
764 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
765 CombineMasks(SecondMask, FirstMask);
766 ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);
767 NewReg = false;
768 Dest[FirstIdx].clear();
769 NormalizeMask(SecondMask);
770 }
771 } while (SecondIdx >= 0);
772 break;
773 }
774 }
775 }
776}
777
778void llvm::getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,
779 const APInt &DemandedElts,
780 APInt &DemandedLHS,
781 APInt &DemandedRHS) {
782 assert(VectorBitWidth >= 128 && "Vectors smaller than 128 bit not supported");
783 int NumLanes = VectorBitWidth / 128;
784 int NumElts = DemandedElts.getBitWidth();
785 int NumEltsPerLane = NumElts / NumLanes;
786 int HalfEltsPerLane = NumEltsPerLane / 2;
787
788 DemandedLHS = APInt::getZero(NumElts);
789 DemandedRHS = APInt::getZero(NumElts);
790
791 // Map DemandedElts to the horizontal operands.
792 for (int Idx = 0; Idx != NumElts; ++Idx) {
793 if (!DemandedElts[Idx])
794 continue;
795 int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
796 int LocalIdx = Idx % NumEltsPerLane;
797 if (LocalIdx < HalfEltsPerLane) {
798 DemandedLHS.setBit(LaneIdx + 2 * LocalIdx);
799 } else {
800 LocalIdx -= HalfEltsPerLane;
801 DemandedRHS.setBit(LaneIdx + 2 * LocalIdx);
802 }
803 }
804}
805
808 const TargetTransformInfo *TTI) {
809
810 // DemandedBits will give us every value's live-out bits. But we want
811 // to ensure no extra casts would need to be inserted, so every DAG
812 // of connected values must have the same minimum bitwidth.
818 SmallPtrSet<Instruction *, 4> InstructionSet;
820
821 // Determine the roots. We work bottom-up, from truncs or icmps.
822 bool SeenExtFromIllegalType = false;
823 for (auto *BB : Blocks)
824 for (auto &I : *BB) {
825 InstructionSet.insert(&I);
826
827 if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
828 !TTI->isTypeLegal(I.getOperand(0)->getType()))
829 SeenExtFromIllegalType = true;
830
831 // Only deal with non-vector integers up to 64-bits wide.
832 if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
833 !I.getType()->isVectorTy() &&
834 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
835 // Don't make work for ourselves. If we know the loaded type is legal,
836 // don't add it to the worklist.
837 if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
838 continue;
839
840 Worklist.push_back(&I);
841 Roots.insert(&I);
842 }
843 }
844 // Early exit.
845 if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
846 return MinBWs;
847
848 // Now proceed breadth-first, unioning values together.
849 while (!Worklist.empty()) {
850 Instruction *I = Worklist.pop_back_val();
851 Value *Leader = ECs.getOrInsertLeaderValue(I);
852
853 if (!Visited.insert(I).second)
854 continue;
855
856 // If we encounter a type that is larger than 64 bits, we can't represent
857 // it so bail out.
858 if (DB.getDemandedBits(I).getBitWidth() > 64)
860
861 uint64_t V = DB.getDemandedBits(I).getZExtValue();
862 DBits[Leader] |= V;
863 DBits[I] = V;
864
865 // Casts, loads and instructions outside of our range terminate a chain
866 // successfully.
868 !InstructionSet.count(I))
869 continue;
870
871 // Unsafe casts terminate a chain unsuccessfully. We can't do anything
872 // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
873 // transform anything that relies on them.
875 !I->getType()->isIntegerTy()) {
876 DBits[Leader] |= ~0ULL;
877 continue;
878 }
879
880 // We don't modify the types of PHIs. Reductions will already have been
881 // truncated if possible, and inductions' sizes will have been chosen by
882 // indvars.
883 if (isa<PHINode>(I))
884 continue;
885
886 // Don't modify the types of operands of a call, as doing that would cause a
887 // signature mismatch.
888 if (isa<CallBase>(I))
889 continue;
890
891 if (DBits[Leader] == ~0ULL)
892 // All bits demanded, no point continuing.
893 continue;
894
895 for (Value *O : I->operands()) {
896 ECs.unionSets(Leader, O);
897 if (auto *OI = dyn_cast<Instruction>(O))
898 Worklist.push_back(OI);
899 }
900 }
901
902 // Now we've discovered all values, walk them to see if there are
903 // any users we didn't see. If there are, we can't optimize that
904 // chain.
905 for (auto &I : DBits)
906 for (auto *U : I.first->users())
907 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
908 DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
909
910 for (const auto &E : ECs) {
911 if (!E->isLeader())
912 continue;
913 uint64_t LeaderDemandedBits = 0;
914 for (Value *M : ECs.members(*E))
915 LeaderDemandedBits |= DBits[M];
916
917 uint64_t MinBW = llvm::bit_width(LeaderDemandedBits);
918 // Round up to a power of 2
919 MinBW = llvm::bit_ceil(MinBW);
920
921 // We don't modify the types of PHIs. Reductions will already have been
922 // truncated if possible, and inductions' sizes will have been chosen by
923 // indvars.
924 // If we are required to shrink a PHI, abandon this entire equivalence class.
925 bool Abort = false;
926 for (Value *M : ECs.members(*E))
927 if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
928 Abort = true;
929 break;
930 }
931 if (Abort)
932 continue;
933
934 for (Value *M : ECs.members(*E)) {
935 auto *MI = dyn_cast<Instruction>(M);
936 if (!MI)
937 continue;
938 Type *Ty = M->getType();
939 if (Roots.count(MI))
940 Ty = MI->getOperand(0)->getType();
941
942 if (MinBW >= Ty->getScalarSizeInBits())
943 continue;
944
945 // If any of M's operands demand more bits than MinBW then M cannot be
946 // performed safely in MinBW.
947 auto *Call = dyn_cast<CallBase>(MI);
948 auto Ops = Call ? Call->args() : MI->operands();
949 if (any_of(Ops, [&DB, MinBW](Use &U) {
950 auto *CI = dyn_cast<ConstantInt>(U);
951 // For constants shift amounts, check if the shift would result in
952 // poison.
953 if (CI &&
955 U.getOperandNo() == 1)
956 return CI->uge(MinBW);
957 uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());
958 return bit_ceil(BW) > MinBW;
959 }))
960 continue;
961
962 MinBWs[MI] = MinBW;
963 }
964 }
965
966 return MinBWs;
967}
968
969/// Add all access groups in @p AccGroups to @p List.
970template <typename ListT>
971static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
972 // Interpret an access group as a list containing itself.
973 if (AccGroups->getNumOperands() == 0) {
974 assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
975 List.insert(AccGroups);
976 return;
977 }
978
979 for (const auto &AccGroupListOp : AccGroups->operands()) {
980 auto *Item = cast<MDNode>(AccGroupListOp.get());
981 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
982 List.insert(Item);
983 }
984}
985
986MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
987 if (!AccGroups1)
988 return AccGroups2;
989 if (!AccGroups2)
990 return AccGroups1;
991 if (AccGroups1 == AccGroups2)
992 return AccGroups1;
993
995 addToAccessGroupList(Union, AccGroups1);
996 addToAccessGroupList(Union, AccGroups2);
997
998 if (Union.size() == 0)
999 return nullptr;
1000 if (Union.size() == 1)
1001 return cast<MDNode>(Union.front());
1002
1003 LLVMContext &Ctx = AccGroups1->getContext();
1004 return MDNode::get(Ctx, Union.getArrayRef());
1005}
1006
1008 const Instruction *Inst2) {
1009 bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
1010 bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
1011
1012 if (!MayAccessMem1 && !MayAccessMem2)
1013 return nullptr;
1014 if (!MayAccessMem1)
1015 return Inst2->getMetadata(LLVMContext::MD_access_group);
1016 if (!MayAccessMem2)
1017 return Inst1->getMetadata(LLVMContext::MD_access_group);
1018
1019 MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
1020 MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
1021 if (!MD1 || !MD2)
1022 return nullptr;
1023 if (MD1 == MD2)
1024 return MD1;
1025
1026 // Use set for scalable 'contains' check.
1027 SmallPtrSet<Metadata *, 4> AccGroupSet2;
1028 addToAccessGroupList(AccGroupSet2, MD2);
1029
1030 SmallVector<Metadata *, 4> Intersection;
1031 if (MD1->getNumOperands() == 0) {
1032 assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
1033 if (AccGroupSet2.count(MD1))
1034 Intersection.push_back(MD1);
1035 } else {
1036 for (const MDOperand &Node : MD1->operands()) {
1037 auto *Item = cast<MDNode>(Node.get());
1038 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
1039 if (AccGroupSet2.count(Item))
1040 Intersection.push_back(Item);
1041 }
1042 }
1043
1044 if (Intersection.size() == 0)
1045 return nullptr;
1046 if (Intersection.size() == 1)
1047 return cast<MDNode>(Intersection.front());
1048
1049 LLVMContext &Ctx = Inst1->getContext();
1050 return MDNode::get(Ctx, Intersection);
1051}
1052
1053/// Add metadata from \p Inst to \p Metadata, if it can be preserved after
1054/// vectorization.
1056 Instruction *Inst,
1057 SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata) {
1059 static const unsigned SupportedIDs[] = {
1060 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1061 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
1062 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
1063 LLVMContext::MD_access_group, LLVMContext::MD_mmra};
1064
1065 // Remove any unsupported metadata kinds from Metadata.
1066 for (unsigned Idx = 0; Idx != Metadata.size();) {
1067 if (is_contained(SupportedIDs, Metadata[Idx].first)) {
1068 ++Idx;
1069 } else {
1070 // Swap element to end and remove it.
1071 std::swap(Metadata[Idx], Metadata.back());
1072 Metadata.pop_back();
1073 }
1074 }
1075}
1076
1077/// \returns \p I after propagating metadata from \p VL.
1079 if (VL.empty())
1080 return Inst;
1083
1084 for (auto &[Kind, MD] : Metadata) {
1085 // Skip MMRA metadata if the instruction cannot have it.
1086 if (Kind == LLVMContext::MD_mmra && !canInstructionHaveMMRAs(*Inst))
1087 continue;
1088
1089 for (int J = 1, E = VL.size(); MD && J != E; ++J) {
1090 const Instruction *IJ = cast<Instruction>(VL[J]);
1091 MDNode *IMD = IJ->getMetadata(Kind);
1092
1093 switch (Kind) {
1094 case LLVMContext::MD_mmra: {
1095 MD = MMRAMetadata::combine(Inst->getContext(), MD, IMD);
1096 break;
1097 }
1098 case LLVMContext::MD_tbaa:
1099 MD = MDNode::getMostGenericTBAA(MD, IMD);
1100 break;
1101 case LLVMContext::MD_alias_scope:
1103 break;
1104 case LLVMContext::MD_fpmath:
1105 MD = MDNode::getMostGenericFPMath(MD, IMD);
1106 break;
1107 case LLVMContext::MD_noalias:
1108 case LLVMContext::MD_nontemporal:
1109 case LLVMContext::MD_invariant_load:
1110 MD = MDNode::intersect(MD, IMD);
1111 break;
1112 case LLVMContext::MD_access_group:
1113 MD = intersectAccessGroups(Inst, IJ);
1114 break;
1115 default:
1116 llvm_unreachable("unhandled metadata");
1117 }
1118 }
1119
1120 Inst->setMetadata(Kind, MD);
1121 }
1122
1123 return Inst;
1124}
1125
1126Constant *
1128 const InterleaveGroup<Instruction> &Group) {
1129 // All 1's means mask is not needed.
1130 if (Group.isFull())
1131 return nullptr;
1132
1133 // TODO: support reversed access.
1134 assert(!Group.isReverse() && "Reversed group not supported.");
1135
1137 for (unsigned i = 0; i < VF; i++)
1138 for (unsigned j = 0; j < Group.getFactor(); ++j) {
1139 unsigned HasMember = Group.getMember(j) ? 1 : 0;
1140 Mask.push_back(Builder.getInt1(HasMember));
1141 }
1142
1143 return ConstantVector::get(Mask);
1144}
1145
1147llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
1148 SmallVector<int, 16> MaskVec;
1149 for (unsigned i = 0; i < VF; i++)
1150 for (unsigned j = 0; j < ReplicationFactor; j++)
1151 MaskVec.push_back(i);
1152
1153 return MaskVec;
1154}
1155
1157 unsigned NumVecs) {
1159 for (unsigned i = 0; i < VF; i++)
1160 for (unsigned j = 0; j < NumVecs; j++)
1161 Mask.push_back(j * VF + i);
1162
1163 return Mask;
1164}
1165
1167llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
1169 for (unsigned i = 0; i < VF; i++)
1170 Mask.push_back(Start + i * Stride);
1171
1172 return Mask;
1173}
1174
1176 unsigned NumInts,
1177 unsigned NumUndefs) {
1179 for (unsigned i = 0; i < NumInts; i++)
1180 Mask.push_back(Start + i);
1181
1182 for (unsigned i = 0; i < NumUndefs; i++)
1183 Mask.push_back(-1);
1184
1185 return Mask;
1186}
1187
1189 unsigned NumElts) {
1190 // Avoid casts in the loop and make sure we have a reasonable number.
1191 int NumEltsSigned = NumElts;
1192 assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
1193
1194 // If the mask chooses an element from operand 1, reduce it to choose from the
1195 // corresponding element of operand 0. Undef mask elements are unchanged.
1196 SmallVector<int, 16> UnaryMask;
1197 for (int MaskElt : Mask) {
1198 assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
1199 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1200 UnaryMask.push_back(UnaryElt);
1201 }
1202 return UnaryMask;
1203}
1204
1205/// A helper function for concatenating vectors. This function concatenates two
1206/// vectors having the same element type. If the second vector has fewer
1207/// elements than the first, it is padded with undefs.
1209 Value *V2) {
1210 VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
1211 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
1212 assert(VecTy1 && VecTy2 &&
1213 VecTy1->getScalarType() == VecTy2->getScalarType() &&
1214 "Expect two vectors with the same element type");
1215
1216 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
1217 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
1218 assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
1219
1220 if (NumElts1 > NumElts2) {
1221 // Extend with UNDEFs.
1222 V2 = Builder.CreateShuffleVector(
1223 V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
1224 }
1225
1226 return Builder.CreateShuffleVector(
1227 V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
1228}
1229
1231 ArrayRef<Value *> Vecs) {
1232 unsigned NumVecs = Vecs.size();
1233 assert(NumVecs > 1 && "Should be at least two vectors");
1234
1236 ResList.append(Vecs.begin(), Vecs.end());
1237 do {
1239 for (unsigned i = 0; i < NumVecs - 1; i += 2) {
1240 Value *V0 = ResList[i], *V1 = ResList[i + 1];
1241 assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
1242 "Only the last vector may have a different type");
1243
1244 TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
1245 }
1246
1247 // Push the last vector if the total number of vectors is odd.
1248 if (NumVecs % 2 != 0)
1249 TmpList.push_back(ResList[NumVecs - 1]);
1250
1251 ResList = TmpList;
1252 NumVecs = ResList.size();
1253 } while (NumVecs > 1);
1254
1255 return ResList[0];
1256}
1257
1259 assert(isa<VectorType>(Mask->getType()) &&
1260 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1261 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1262 1 &&
1263 "Mask must be a vector of i1");
1264
1265 auto *ConstMask = dyn_cast<Constant>(Mask);
1266 if (!ConstMask)
1267 return false;
1268 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1269 return true;
1270 if (isa<ScalableVectorType>(ConstMask->getType()))
1271 return false;
1272 for (unsigned
1273 I = 0,
1274 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1275 I != E; ++I) {
1276 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1277 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1278 continue;
1279 return false;
1280 }
1281 return true;
1282}
1283
1285 assert(isa<VectorType>(Mask->getType()) &&
1286 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1287 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1288 1 &&
1289 "Mask must be a vector of i1");
1290
1291 auto *ConstMask = dyn_cast<Constant>(Mask);
1292 if (!ConstMask)
1293 return false;
1294 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1295 return true;
1296 if (isa<ScalableVectorType>(ConstMask->getType()))
1297 return false;
1298 for (unsigned
1299 I = 0,
1300 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1301 I != E; ++I) {
1302 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1303 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1304 continue;
1305 return false;
1306 }
1307 return true;
1308}
1309
1311 assert(isa<VectorType>(Mask->getType()) &&
1312 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1313 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1314 1 &&
1315 "Mask must be a vector of i1");
1316
1317 auto *ConstMask = dyn_cast<Constant>(Mask);
1318 if (!ConstMask)
1319 return false;
1320 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1321 return true;
1322 if (isa<ScalableVectorType>(ConstMask->getType()))
1323 return false;
1324 for (unsigned
1325 I = 0,
1326 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1327 I != E; ++I) {
1328 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1329 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1330 return true;
1331 }
1332 return false;
1333}
1334
1335/// TODO: This is a lot like known bits, but for
1336/// vectors. Is there something we can common this with?
1338 assert(isa<FixedVectorType>(Mask->getType()) &&
1339 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1340 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1341 1 &&
1342 "Mask must be a fixed width vector of i1");
1343
1344 const unsigned VWidth =
1345 cast<FixedVectorType>(Mask->getType())->getNumElements();
1346 APInt DemandedElts = APInt::getAllOnes(VWidth);
1347 if (auto *CV = dyn_cast<ConstantVector>(Mask))
1348 for (unsigned i = 0; i < VWidth; i++)
1349 if (CV->getAggregateElement(i)->isNullValue())
1350 DemandedElts.clearBit(i);
1351 return DemandedElts;
1352}
1353
1354bool InterleavedAccessInfo::isStrided(int Stride) {
1355 unsigned Factor = std::abs(Stride);
1356 return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
1357}
1358
1359void InterleavedAccessInfo::collectConstStrideAccesses(
1361 const DenseMap<Value*, const SCEV*> &Strides) {
1362 auto &DL = TheLoop->getHeader()->getDataLayout();
1363
1364 // Since it's desired that the load/store instructions be maintained in
1365 // "program order" for the interleaved access analysis, we have to visit the
1366 // blocks in the loop in reverse postorder (i.e., in a topological order).
1367 // Such an ordering will ensure that any load/store that may be executed
1368 // before a second load/store will precede the second load/store in
1369 // AccessStrideInfo.
1370 LoopBlocksDFS DFS(TheLoop);
1371 DFS.perform(LI);
1372 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
1373 for (auto &I : *BB) {
1375 if (!Ptr)
1376 continue;
1377 Type *ElementTy = getLoadStoreType(&I);
1378
1379 // Currently, codegen doesn't support cases where the type size doesn't
1380 // match the alloc size. Skip them for now.
1381 uint64_t Size = DL.getTypeAllocSize(ElementTy);
1382 if (Size * 8 != DL.getTypeSizeInBits(ElementTy))
1383 continue;
1384
1385 // We don't check wrapping here because we don't know yet if Ptr will be
1386 // part of a full group or a group with gaps. Checking wrapping for all
1387 // pointers (even those that end up in groups with no gaps) will be overly
1388 // conservative. For full groups, wrapping should be ok since if we would
1389 // wrap around the address space we would do a memory access at nullptr
1390 // even without the transformation. The wrapping checks are therefore
1391 // deferred until after we've formed the interleaved groups.
1392 int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides,
1393 /*Assume=*/true, /*ShouldCheckWrap=*/false)
1394 .value_or(0);
1395
1396 const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
1397 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
1399 }
1400}
1401
1402// Analyze interleaved accesses and collect them into interleaved load and
1403// store groups.
1404//
1405// When generating code for an interleaved load group, we effectively hoist all
1406// loads in the group to the location of the first load in program order. When
1407// generating code for an interleaved store group, we sink all stores to the
1408// location of the last store. This code motion can change the order of load
1409// and store instructions and may break dependences.
1410//
1411// The code generation strategy mentioned above ensures that we won't violate
1412// any write-after-read (WAR) dependences.
1413//
1414// E.g., for the WAR dependence: a = A[i]; // (1)
1415// A[i] = b; // (2)
1416//
1417// The store group of (2) is always inserted at or below (2), and the load
1418// group of (1) is always inserted at or above (1). Thus, the instructions will
1419// never be reordered. All other dependences are checked to ensure the
1420// correctness of the instruction reordering.
1421//
1422// The algorithm visits all memory accesses in the loop in bottom-up program
1423// order. Program order is established by traversing the blocks in the loop in
1424// reverse postorder when collecting the accesses.
1425//
1426// We visit the memory accesses in bottom-up order because it can simplify the
1427// construction of store groups in the presence of write-after-write (WAW)
1428// dependences.
1429//
1430// E.g., for the WAW dependence: A[i] = a; // (1)
1431// A[i] = b; // (2)
1432// A[i + 1] = c; // (3)
1433//
1434// We will first create a store group with (3) and (2). (1) can't be added to
1435// this group because it and (2) are dependent. However, (1) can be grouped
1436// with other accesses that may precede it in program order. Note that a
1437// bottom-up order does not imply that WAW dependences should not be checked.
1439 bool EnablePredicatedInterleavedMemAccesses) {
1440 LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
1441 const auto &Strides = LAI->getSymbolicStrides();
1442
1443 // Holds all accesses with a constant stride.
1445 collectConstStrideAccesses(AccessStrideInfo, Strides);
1446
1447 if (AccessStrideInfo.empty())
1448 return;
1449
1450 // Collect the dependences in the loop.
1451 collectDependences();
1452
1453 // Holds all interleaved store groups temporarily.
1455 // Holds all interleaved load groups temporarily.
1457 // Groups added to this set cannot have new members added.
1458 SmallPtrSet<InterleaveGroup<Instruction> *, 4> CompletedLoadGroups;
1459
1460 // Search in bottom-up program order for pairs of accesses (A and B) that can
1461 // form interleaved load or store groups. In the algorithm below, access A
1462 // precedes access B in program order. We initialize a group for B in the
1463 // outer loop of the algorithm, and then in the inner loop, we attempt to
1464 // insert each A into B's group if:
1465 //
1466 // 1. A and B have the same stride,
1467 // 2. A and B have the same memory object size, and
1468 // 3. A belongs in B's group according to its distance from B.
1469 //
1470 // Special care is taken to ensure group formation will not break any
1471 // dependences.
1472 for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
1473 BI != E; ++BI) {
1474 Instruction *B = BI->first;
1475 StrideDescriptor DesB = BI->second;
1476
1477 // Initialize a group for B if it has an allowable stride. Even if we don't
1478 // create a group for B, we continue with the bottom-up algorithm to ensure
1479 // we don't break any of B's dependences.
1480 InterleaveGroup<Instruction> *GroupB = nullptr;
1481 if (isStrided(DesB.Stride) &&
1482 (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1483 GroupB = getInterleaveGroup(B);
1484 if (!GroupB) {
1485 LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
1486 << '\n');
1487 GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1488 if (B->mayWriteToMemory())
1489 StoreGroups.insert(GroupB);
1490 else
1491 LoadGroups.insert(GroupB);
1492 }
1493 }
1494
1495 for (auto AI = std::next(BI); AI != E; ++AI) {
1496 Instruction *A = AI->first;
1497 StrideDescriptor DesA = AI->second;
1498
1499 // Our code motion strategy implies that we can't have dependences
1500 // between accesses in an interleaved group and other accesses located
1501 // between the first and last member of the group. Note that this also
1502 // means that a group can't have more than one member at a given offset.
1503 // The accesses in a group can have dependences with other accesses, but
1504 // we must ensure we don't extend the boundaries of the group such that
1505 // we encompass those dependent accesses.
1506 //
1507 // For example, assume we have the sequence of accesses shown below in a
1508 // stride-2 loop:
1509 //
1510 // (1, 2) is a group | A[i] = a; // (1)
1511 // | A[i-1] = b; // (2) |
1512 // A[i-3] = c; // (3)
1513 // A[i] = d; // (4) | (2, 4) is not a group
1514 //
1515 // Because accesses (2) and (3) are dependent, we can group (2) with (1)
1516 // but not with (4). If we did, the dependent access (3) would be within
1517 // the boundaries of the (2, 4) group.
1518 auto DependentMember = [&](InterleaveGroup<Instruction> *Group,
1519 StrideEntry *A) -> Instruction * {
1520 for (uint32_t Index = 0; Index < Group->getFactor(); ++Index) {
1521 Instruction *MemberOfGroupB = Group->getMember(Index);
1522 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
1523 A, &*AccessStrideInfo.find(MemberOfGroupB)))
1524 return MemberOfGroupB;
1525 }
1526 return nullptr;
1527 };
1528
1529 auto GroupA = getInterleaveGroup(A);
1530 // If A is a load, dependencies are tolerable, there's nothing to do here.
1531 // If both A and B belong to the same (store) group, they are independent,
1532 // even if dependencies have not been recorded.
1533 // If both GroupA and GroupB are null, there's nothing to do here.
1534 if (A->mayWriteToMemory() && GroupA != GroupB) {
1535 Instruction *DependentInst = nullptr;
1536 // If GroupB is a load group, we have to compare AI against all
1537 // members of GroupB because if any load within GroupB has a dependency
1538 // on AI, we need to mark GroupB as complete and also release the
1539 // store GroupA (if A belongs to one). The former prevents incorrect
1540 // hoisting of load B above store A while the latter prevents incorrect
1541 // sinking of store A below load B.
1542 if (GroupB && LoadGroups.contains(GroupB))
1543 DependentInst = DependentMember(GroupB, &*AI);
1544 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
1545 DependentInst = B;
1546
1547 if (DependentInst) {
1548 // A has a store dependence on B (or on some load within GroupB) and
1549 // is part of a store group. Release A's group to prevent illegal
1550 // sinking of A below B. A will then be free to form another group
1551 // with instructions that precede it.
1552 if (GroupA && StoreGroups.contains(GroupA)) {
1553 LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1554 "dependence between "
1555 << *A << " and " << *DependentInst << '\n');
1556 StoreGroups.remove(GroupA);
1557 releaseGroup(GroupA);
1558 }
1559 // If B is a load and part of an interleave group, no earlier loads
1560 // can be added to B's interleave group, because this would mean the
1561 // DependentInst would move across store A. Mark the interleave group
1562 // as complete.
1563 if (GroupB && LoadGroups.contains(GroupB)) {
1564 LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B
1565 << " as complete.\n");
1566 CompletedLoadGroups.insert(GroupB);
1567 }
1568 }
1569 }
1570 if (CompletedLoadGroups.contains(GroupB)) {
1571 // Skip trying to add A to B, continue to look for other conflicting A's
1572 // in groups to be released.
1573 continue;
1574 }
1575
1576 // At this point, we've checked for illegal code motion. If either A or B
1577 // isn't strided, there's nothing left to do.
1578 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1579 continue;
1580
1581 // Ignore A if it's already in a group or isn't the same kind of memory
1582 // operation as B.
1583 // Note that mayReadFromMemory() isn't mutually exclusive to
1584 // mayWriteToMemory in the case of atomic loads. We shouldn't see those
1585 // here, canVectorizeMemory() should have returned false - except for the
1586 // case we asked for optimization remarks.
1587 if (isInterleaved(A) ||
1588 (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
1589 (A->mayWriteToMemory() != B->mayWriteToMemory()))
1590 continue;
1591
1592 // Check rules 1 and 2. Ignore A if its stride or size is different from
1593 // that of B.
1594 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1595 continue;
1596
1597 // Ignore A if the memory object of A and B don't belong to the same
1598 // address space
1600 continue;
1601
1602 // Calculate the distance from A to B.
1603 const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1604 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1605 if (!DistToB)
1606 continue;
1607 int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1608
1609 // Check rule 3. Ignore A if its distance to B is not a multiple of the
1610 // size.
1611 if (DistanceToB % static_cast<int64_t>(DesB.Size))
1612 continue;
1613
1614 // All members of a predicated interleave-group must have the same predicate,
1615 // and currently must reside in the same BB.
1616 BasicBlock *BlockA = A->getParent();
1617 BasicBlock *BlockB = B->getParent();
1618 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1619 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1620 continue;
1621
1622 // The index of A is the index of B plus A's distance to B in multiples
1623 // of the size.
1624 int IndexA =
1625 GroupB->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1626
1627 // Try to insert A into B's group.
1628 if (GroupB->insertMember(A, IndexA, DesA.Alignment)) {
1629 LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1630 << " into the interleave group with" << *B
1631 << '\n');
1632 InterleaveGroupMap[A] = GroupB;
1633
1634 // Set the first load in program order as the insert position.
1635 if (A->mayReadFromMemory())
1636 GroupB->setInsertPos(A);
1637 }
1638 } // Iteration over A accesses.
1639 } // Iteration over B accesses.
1640
1641 auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
1642 int Index,
1643 const char *FirstOrLast) -> bool {
1644 Instruction *Member = Group->getMember(Index);
1645 assert(Member && "Group member does not exist");
1646 Value *MemberPtr = getLoadStorePointerOperand(Member);
1647 Type *AccessTy = getLoadStoreType(Member);
1648 if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, *DT, Strides,
1649 /*Assume=*/false, /*ShouldCheckWrap=*/true)
1650 .value_or(0))
1651 return false;
1652 LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
1653 << FirstOrLast
1654 << " group member potentially pointer-wrapping.\n");
1655 releaseGroup(Group);
1656 return true;
1657 };
1658
1659 // Remove interleaved groups with gaps whose memory
1660 // accesses may wrap around. We have to revisit the getPtrStride analysis,
1661 // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1662 // not check wrapping (see documentation there).
1663 // FORNOW we use Assume=false;
1664 // TODO: Change to Assume=true but making sure we don't exceed the threshold
1665 // of runtime SCEV assumptions checks (thereby potentially failing to
1666 // vectorize altogether).
1667 // Additional optional optimizations:
1668 // TODO: If we are peeling the loop and we know that the first pointer doesn't
1669 // wrap then we can deduce that all pointers in the group don't wrap.
1670 // This means that we can forcefully peel the loop in order to only have to
1671 // check the first pointer for no-wrap. When we'll change to use Assume=true
1672 // we'll only need at most one runtime check per interleaved group.
1673 for (auto *Group : LoadGroups) {
1674 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1675 // load would wrap around the address space we would do a memory access at
1676 // nullptr even without the transformation.
1677 if (Group->isFull())
1678 continue;
1679
1680 // Case 2: If first and last members of the group don't wrap this implies
1681 // that all the pointers in the group don't wrap.
1682 // So we check only group member 0 (which is always guaranteed to exist),
1683 // and group member Factor - 1; If the latter doesn't exist we rely on
1684 // peeling (if it is a non-reversed access -- see Case 3).
1685 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))
1686 continue;
1687 if (Group->getMember(Group->getFactor() - 1))
1688 InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1, "last");
1689 else {
1690 // Case 3: A non-reversed interleaved load group with gaps: We need
1691 // to execute at least one scalar epilogue iteration. This will ensure
1692 // we don't speculatively access memory out-of-bounds. We only need
1693 // to look for a member at index factor - 1, since every group must have
1694 // a member at index zero.
1695 if (Group->isReverse()) {
1696 LLVM_DEBUG(
1697 dbgs() << "LV: Invalidate candidate interleaved group due to "
1698 "a reverse access with gaps.\n");
1699 releaseGroup(Group);
1700 continue;
1701 }
1702 LLVM_DEBUG(
1703 dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1704 RequiresScalarEpilogue = true;
1705 }
1706 }
1707
1708 for (auto *Group : StoreGroups) {
1709 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1710 // store would wrap around the address space we would do a memory access at
1711 // nullptr even without the transformation.
1712 if (Group->isFull())
1713 continue;
1714
1715 // Interleave-store-group with gaps is implemented using masked wide store.
1716 // Remove interleaved store groups with gaps if
1717 // masked-interleaved-accesses are not enabled by the target.
1718 if (!EnablePredicatedInterleavedMemAccesses) {
1719 LLVM_DEBUG(
1720 dbgs() << "LV: Invalidate candidate interleaved store group due "
1721 "to gaps.\n");
1722 releaseGroup(Group);
1723 continue;
1724 }
1725
1726 // Case 2: If first and last members of the group don't wrap this implies
1727 // that all the pointers in the group don't wrap.
1728 // So we check only group member 0 (which is always guaranteed to exist),
1729 // and the last group member. Case 3 (scalar epilog) is not relevant for
1730 // stores with gaps, which are implemented with masked-store (rather than
1731 // speculative access, as in loads).
1732 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))
1733 continue;
1734 for (int Index = Group->getFactor() - 1; Index > 0; Index--)
1735 if (Group->getMember(Index)) {
1736 InvalidateGroupIfMemberMayWrap(Group, Index, "last");
1737 break;
1738 }
1739 }
1740}
1741
1743 // If no group had triggered the requirement to create an epilogue loop,
1744 // there is nothing to do.
1746 return;
1747
1748 // Release groups requiring scalar epilogues. Note that this also removes them
1749 // from InterleaveGroups.
1750 bool ReleasedGroup = InterleaveGroups.remove_if([&](auto *Group) {
1751 if (!Group->requiresScalarEpilogue())
1752 return false;
1753 LLVM_DEBUG(
1754 dbgs()
1755 << "LV: Invalidate candidate interleaved group due to gaps that "
1756 "require a scalar epilogue (not allowed under optsize) and cannot "
1757 "be masked (not enabled). \n");
1758 releaseGroupWithoutRemovingFromSet(Group);
1759 return true;
1760 });
1761 assert(ReleasedGroup && "At least one group must be invalidated, as a "
1762 "scalar epilogue was required");
1763 (void)ReleasedGroup;
1764 RequiresScalarEpilogue = false;
1765}
1766
1767template <typename InstT>
1768void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1769 llvm_unreachable("addMetadata can only be used for Instruction");
1770}
1771
1772namespace llvm {
1773template <>
1778} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1407
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator_range< member_iterator > members(const ECValue &ECV) const
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
bool isReverse() const
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Metadata node.
Definition Metadata.h:1078
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1440
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
static LLVM_ABI MDNode * combine(LLVMContext &Ctx, const MMRAMetadata &A, const MMRAMetadata &B)
Combines A and B according to MMRA semantics.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
reverse_iterator rend()
Definition MapVector.h:74
iterator find(const KeyT &Key)
Definition MapVector.h:154
bool empty() const
Definition MapVector.h:77
reverse_iterator rbegin()
Definition MapVector.h:70
Root of the metadata hierarchy.
Definition Metadata.h:64
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a constant integer value.
const APInt & getAPInt() const
bool remove(const value_type &X)
Remove an item from the set vector.
Definition SetVector.h:181
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
Definition SetVector.h:252
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
ArrayRef< Type * > subtypes() const
Definition Type.h:365
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:233
static LLVM_ABI bool isVPCast(Intrinsic::ID ID)
static LLVM_ABI std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1106
Base class of all SIMD vector types.
Type * getElementType() const
An efficient, type-erasing, non-owning reference to a callable.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2530
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
unsigned M1(unsigned Val)
Definition VE.h:377
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr int PoisonMaskElem
LLVM_ABI bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
LLVM_ABI Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned M0(unsigned Val)
Definition VE.h:376
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
Definition STLExtras.h:1407
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2156
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872