LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
43#include "llvm/Support/Debug.h"
49#include <optional>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "riscv-lower"
54
55STATISTIC(NumTailCalls, "Number of tail calls");
56
58 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
59 cl::desc("Give the maximum size (in number of nodes) of the web of "
60 "instructions that we will consider for VW expansion"),
61 cl::init(18));
62
63static cl::opt<bool>
64 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
65 cl::desc("Allow the formation of VW_W operations (e.g., "
66 "VWADD_W) with splat constants"),
67 cl::init(false));
68
70 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
71 cl::desc("Set the minimum number of repetitions of a divisor to allow "
72 "transformation to multiplications by the reciprocal"),
73 cl::init(2));
74
75static cl::opt<int>
77 cl::desc("Give the maximum number of instructions that we will "
78 "use for creating a floating-point immediate value"),
79 cl::init(2));
80
82 const RISCVSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84
85 RISCVABI::ABI ABI = Subtarget.getTargetABI();
86 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
87
88 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
89 !Subtarget.hasStdExtF()) {
90 errs() << "Hard-float 'f' ABI can't be used for a target that "
91 "doesn't support the F instruction set extension (ignoring "
92 "target-abi)\n";
94 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
95 !Subtarget.hasStdExtD()) {
96 errs() << "Hard-float 'd' ABI can't be used for a target that "
97 "doesn't support the D instruction set extension (ignoring "
98 "target-abi)\n";
100 }
101
102 switch (ABI) {
103 default:
104 report_fatal_error("Don't know how to lower this ABI");
113 break;
114 }
115
116 MVT XLenVT = Subtarget.getXLenVT();
117
118 // Set up the register classes.
119 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
120
121 if (Subtarget.hasStdExtZfhmin())
122 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtZfbfmin())
124 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
125 if (Subtarget.hasStdExtF())
126 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
127 if (Subtarget.hasStdExtD())
128 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
129 if (Subtarget.hasStdExtZhinxmin())
130 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
131 if (Subtarget.hasStdExtZfinx())
132 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
133 if (Subtarget.hasStdExtZdinx()) {
134 if (Subtarget.is64Bit())
135 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
136 else
137 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
138 }
139
140 static const MVT::SimpleValueType BoolVecVTs[] = {
141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
143 static const MVT::SimpleValueType IntVecVTs[] = {
144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148 MVT::nxv4i64, MVT::nxv8i64};
149 static const MVT::SimpleValueType F16VecVTs[] = {
150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
152 static const MVT::SimpleValueType BF16VecVTs[] = {
153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
155 static const MVT::SimpleValueType F32VecVTs[] = {
156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
157 static const MVT::SimpleValueType F64VecVTs[] = {
158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
159 static const MVT::SimpleValueType VecTupleVTs[] = {
160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
171
172 if (Subtarget.hasVInstructions()) {
173 auto addRegClassForRVV = [this](MVT VT) {
174 // Disable the smallest fractional LMUL types if ELEN is less than
175 // RVVBitsPerBlock.
176 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
177 if (VT.getVectorMinNumElements() < MinElts)
178 return;
179
180 unsigned Size = VT.getSizeInBits().getKnownMinValue();
181 const TargetRegisterClass *RC;
183 RC = &RISCV::VRRegClass;
184 else if (Size == 2 * RISCV::RVVBitsPerBlock)
185 RC = &RISCV::VRM2RegClass;
186 else if (Size == 4 * RISCV::RVVBitsPerBlock)
187 RC = &RISCV::VRM4RegClass;
188 else if (Size == 8 * RISCV::RVVBitsPerBlock)
189 RC = &RISCV::VRM8RegClass;
190 else
191 llvm_unreachable("Unexpected size");
192
193 addRegisterClass(VT, RC);
194 };
195
196 for (MVT VT : BoolVecVTs)
197 addRegClassForRVV(VT);
198 for (MVT VT : IntVecVTs) {
199 if (VT.getVectorElementType() == MVT::i64 &&
200 !Subtarget.hasVInstructionsI64())
201 continue;
202 addRegClassForRVV(VT);
203 }
204
205 if (Subtarget.hasVInstructionsF16Minimal())
206 for (MVT VT : F16VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsBF16Minimal())
210 for (MVT VT : BF16VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.hasVInstructionsF32())
214 for (MVT VT : F32VecVTs)
215 addRegClassForRVV(VT);
216
217 if (Subtarget.hasVInstructionsF64())
218 for (MVT VT : F64VecVTs)
219 addRegClassForRVV(VT);
220
221 if (Subtarget.useRVVForFixedLengthVectors()) {
222 auto addRegClassForFixedVectors = [this](MVT VT) {
223 MVT ContainerVT = getContainerForFixedLengthVector(VT);
224 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
225 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
226 addRegisterClass(VT, TRI.getRegClass(RCID));
227 };
229 if (useRVVForFixedLengthVectorVT(VT))
230 addRegClassForFixedVectors(VT);
231
233 if (useRVVForFixedLengthVectorVT(VT))
234 addRegClassForFixedVectors(VT);
235 }
236
237 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
238 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
242 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
243 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
244 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
249 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
250 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
251 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
256 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
257 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
258 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
263 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
264 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
265 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
266 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
267 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
268 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
269 }
270
271 // Compute derived properties from the register classes.
273
275
277 MVT::i1, Promote);
278 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
280 MVT::i1, Promote);
281
282 // TODO: add all necessary setOperationAction calls.
284
289
294 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
297 }
298
300
303
304 if (!Subtarget.hasVendorXTHeadBb())
306
308
309 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
311 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
312
313 if (Subtarget.is64Bit()) {
315
318 MVT::i32, Custom);
320 if (!Subtarget.hasStdExtZbb())
323 Custom);
325 }
326 if (!Subtarget.hasStdExtZmmul()) {
328 } else if (Subtarget.is64Bit()) {
331 } else {
333 }
334
335 if (!Subtarget.hasStdExtM()) {
337 Expand);
338 } else if (Subtarget.is64Bit()) {
340 {MVT::i8, MVT::i16, MVT::i32}, Custom);
341 }
342
345 Expand);
346
348 Custom);
349
350 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
351 if (Subtarget.is64Bit())
353 } else if (Subtarget.hasVendorXTHeadBb()) {
354 if (Subtarget.is64Bit())
357 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
359 } else {
361 }
362
363 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
364 // pattern match it directly in isel.
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ? Legal
369 : Expand);
370
371 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
373 } else {
374 // Zbkb can use rev8+brev8 to implement bitreverse.
376 Subtarget.hasStdExtZbkb() ? Custom : Expand);
377 }
378
379 if (Subtarget.hasStdExtZbb() ||
380 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
382 Legal);
383 }
384
385 if (Subtarget.hasStdExtZbb() ||
386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
387 if (Subtarget.is64Bit())
389 } else {
391 }
392
393 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
395 // We need the custom lowering to make sure that the resulting sequence
396 // for the 32bit case is efficient on 64bit targets.
397 if (Subtarget.is64Bit())
399 } else {
401 }
402
403 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
405 } else if (Subtarget.hasShortForwardBranchOpt()) {
406 // We can use PseudoCCSUB to implement ABS.
408 } else if (Subtarget.is64Bit()) {
410 }
411
412 if (!Subtarget.hasVendorXTHeadCondMov())
414
415 static const unsigned FPLegalNodeTypes[] = {
423
424 static const ISD::CondCode FPCCToExpand[] = {
428
429 static const unsigned FPOpToExpand[] = {
431 ISD::FREM};
432
433 static const unsigned FPRndMode[] = {
436
437 static const unsigned ZfhminZfbfminPromoteOps[] = {
447
448 if (Subtarget.hasStdExtZfbfmin()) {
454 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
461 }
462
463 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
464 if (Subtarget.hasStdExtZfhOrZhinx()) {
465 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
466 setOperationAction(FPRndMode, MVT::f16,
467 Subtarget.hasStdExtZfa() ? Legal : Custom);
470 Subtarget.hasStdExtZfa() ? Legal : Custom);
471 if (Subtarget.hasStdExtZfa())
473 } else {
474 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
479 setOperationAction(Op, MVT::f16, Custom);
485 }
486
488
491 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
495
497 ISD::FNEARBYINT, MVT::f16,
498 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
503 MVT::f16, Promote);
504
505 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
506 // complete support for all operations in LegalizeDAG.
511 MVT::f16, Promote);
512
513 // We need to custom promote this.
514 if (Subtarget.is64Bit())
516 }
517
518 if (Subtarget.hasStdExtFOrZfinx()) {
519 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
520 setOperationAction(FPRndMode, MVT::f32,
521 Subtarget.hasStdExtZfa() ? Legal : Custom);
522 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
526 setOperationAction(FPOpToExpand, MVT::f32, Expand);
527 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
528 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
530 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
534 Subtarget.isSoftFPABI() ? LibCall : Custom);
539
540 if (Subtarget.hasStdExtZfa()) {
544 } else {
546 }
547 }
548
549 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
551
552 if (Subtarget.hasStdExtDOrZdinx()) {
553 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
554
555 if (!Subtarget.is64Bit())
557
558 if (Subtarget.hasStdExtZfa()) {
560 setOperationAction(FPRndMode, MVT::f64, Legal);
563 } else {
564 if (Subtarget.is64Bit())
565 setOperationAction(FPRndMode, MVT::f64, Custom);
566
568 }
569
572 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
576 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
577 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
578 setOperationAction(FPOpToExpand, MVT::f64, Expand);
579 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
580 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
582 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
586 Subtarget.isSoftFPABI() ? LibCall : Custom);
591 }
592
593 if (Subtarget.is64Bit()) {
596 MVT::i32, Custom);
598 }
599
600 if (Subtarget.hasStdExtFOrZfinx()) {
602 Custom);
603
604 // f16/bf16 require custom handling.
606 Custom);
608 Custom);
609
612 }
613
616 XLenVT, Custom);
617
619
620 if (Subtarget.is64Bit())
622
623 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
624 // Unfortunately this can't be determined just from the ISA naming string.
626 Subtarget.is64Bit() ? Legal : Custom);
628 Subtarget.is64Bit() ? Legal : Custom);
629
630 if (Subtarget.is64Bit()) {
633 }
634
637 if (Subtarget.is64Bit())
639
640 if (Subtarget.hasStdExtZicbop()) {
642 }
643
644 if (Subtarget.hasStdExtA()) {
646 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
648 else
650 } else if (Subtarget.hasForcedAtomics()) {
652 } else {
654 }
655
657
659
660 if (getTargetMachine().getTargetTriple().isOSLinux()) {
661 // Custom lowering of llvm.clear_cache.
663 }
664
665 if (Subtarget.hasVInstructions()) {
667
669
670 // RVV intrinsics may have illegal operands.
671 // We also need to custom legalize vmv.x.s.
674 {MVT::i8, MVT::i16}, Custom);
675 if (Subtarget.is64Bit())
677 MVT::i32, Custom);
678 else
680 MVT::i64, Custom);
681
683 MVT::Other, Custom);
684
685 static const unsigned IntegerVPOps[] = {
686 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
687 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
688 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
689 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
690 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
691 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
692 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
693 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
694 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
695 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
696 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
697 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
698 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
699 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
700 ISD::EXPERIMENTAL_VP_SPLAT};
701
702 static const unsigned FloatingPointVPOps[] = {
703 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
704 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
705 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
706 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
707 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
708 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
709 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
710 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
711 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
712 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
713 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
714 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
715 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
716 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
717
718 static const unsigned IntegerVecReduceOps[] = {
722
723 static const unsigned FloatingPointVecReduceOps[] = {
726
727 static const unsigned FloatingPointLibCallOps[] = {
730
731 if (!Subtarget.is64Bit()) {
732 // We must custom-lower certain vXi64 operations on RV32 due to the vector
733 // element type being illegal.
735 MVT::i64, Custom);
736
737 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
738
739 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
740 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
741 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
742 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
743 MVT::i64, Custom);
744 }
745
746 for (MVT VT : BoolVecVTs) {
747 if (!isTypeLegal(VT))
748 continue;
749
751
752 // Mask VTs are custom-expanded into a series of standard nodes
756 VT, Custom);
757
759 Custom);
760
762 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
763 Expand);
764 setOperationAction(ISD::VP_MERGE, VT, Custom);
765
766 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
767 Custom);
768
769 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
770
773 Custom);
774
776 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
777 Custom);
778
779 // RVV has native int->float & float->int conversions where the
780 // element type sizes are within one power-of-two of each other. Any
781 // wider distances between type sizes have to be lowered as sequences
782 // which progressively narrow the gap in stages.
787 VT, Custom);
789 Custom);
790
791 // Expand all extending loads to types larger than this, and truncating
792 // stores from types larger than this.
794 setTruncStoreAction(VT, OtherVT, Expand);
796 OtherVT, Expand);
797 }
798
799 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
800 ISD::VP_TRUNCATE, ISD::VP_SETCC},
801 VT, Custom);
802
805
807
808 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
809 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
810
813 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
814 }
815
816 for (MVT VT : IntVecVTs) {
817 if (!isTypeLegal(VT))
818 continue;
819
822
823 // Vectors implement MULHS/MULHU.
825
826 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
827 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
829
831 Legal);
832
834
835 // Custom-lower extensions and truncations from/to mask types.
837 VT, Custom);
838
839 // RVV has native int->float & float->int conversions where the
840 // element type sizes are within one power-of-two of each other. Any
841 // wider distances between type sizes have to be lowered as sequences
842 // which progressively narrow the gap in stages.
847 VT, Custom);
849 Custom);
853 VT, Legal);
854
855 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
856 // nodes which truncate by one power of two at a time.
859 Custom);
860
861 // Custom-lower insert/extract operations to simplify patterns.
863 Custom);
864
865 // Custom-lower reduction operations to set up the corresponding custom
866 // nodes' operands.
867 setOperationAction(IntegerVecReduceOps, VT, Custom);
868
869 setOperationAction(IntegerVPOps, VT, Custom);
870
872
874 VT, Custom);
875
877 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
878 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
879 VT, Custom);
880
883 VT, Custom);
884
887
889
891 setTruncStoreAction(VT, OtherVT, Expand);
893 OtherVT, Expand);
894 }
895
898
899 // Splice
901
902 if (Subtarget.hasStdExtZvkb()) {
904 setOperationAction(ISD::VP_BSWAP, VT, Custom);
905 } else {
906 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
908 }
909
910 if (Subtarget.hasStdExtZvbb()) {
912 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
913 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
914 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
915 VT, Custom);
916 } else {
917 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
919 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
920 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
921 VT, Expand);
922
923 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
924 // range of f32.
925 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
926 if (isTypeLegal(FloatVT)) {
928 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
929 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
930 VT, Custom);
931 }
932 }
933
935 }
936
937 for (MVT VT : VecTupleVTs) {
938 if (!isTypeLegal(VT))
939 continue;
940
942 }
943
944 // Expand various CCs to best match the RVV ISA, which natively supports UNE
945 // but no other unordered comparisons, and supports all ordered comparisons
946 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
947 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
948 // and we pattern-match those back to the "original", swapping operands once
949 // more. This way we catch both operations and both "vf" and "fv" forms with
950 // fewer patterns.
951 static const ISD::CondCode VFPCCToExpand[] = {
955 };
956
957 // TODO: support more ops.
958 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
966
967 // TODO: support more vp ops.
968 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
969 ISD::VP_FADD,
970 ISD::VP_FSUB,
971 ISD::VP_FMUL,
972 ISD::VP_FDIV,
973 ISD::VP_FMA,
974 ISD::VP_REDUCE_FMIN,
975 ISD::VP_REDUCE_FMAX,
976 ISD::VP_SQRT,
977 ISD::VP_FMINNUM,
978 ISD::VP_FMAXNUM,
979 ISD::VP_FCEIL,
980 ISD::VP_FFLOOR,
981 ISD::VP_FROUND,
982 ISD::VP_FROUNDEVEN,
983 ISD::VP_FROUNDTOZERO,
984 ISD::VP_FRINT,
985 ISD::VP_FNEARBYINT,
986 ISD::VP_SETCC,
987 ISD::VP_FMINIMUM,
988 ISD::VP_FMAXIMUM,
989 ISD::VP_REDUCE_FMINIMUM,
990 ISD::VP_REDUCE_FMAXIMUM};
991
992 // Sets common operation actions on RVV floating-point vector types.
993 const auto SetCommonVFPActions = [&](MVT VT) {
995 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
996 // sizes are within one power-of-two of each other. Therefore conversions
997 // between vXf16 and vXf64 must be lowered as sequences which convert via
998 // vXf32.
1001 // Custom-lower insert/extract operations to simplify patterns.
1003 Custom);
1004 // Expand various condition codes (explained above).
1005 setCondCodeAction(VFPCCToExpand, VT, Expand);
1006
1009
1013 VT, Custom);
1014
1015 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1016
1017 // Expand FP operations that need libcalls.
1018 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1019
1021
1023
1025 VT, Custom);
1026
1028 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1029 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1030 VT, Custom);
1031
1034
1037 VT, Custom);
1038
1041
1043
1044 setOperationAction(FloatingPointVPOps, VT, Custom);
1045
1047 Custom);
1050 VT, Legal);
1055 VT, Custom);
1056
1058 };
1059
1060 // Sets common extload/truncstore actions on RVV floating-point vector
1061 // types.
1062 const auto SetCommonVFPExtLoadTruncStoreActions =
1063 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1064 for (auto SmallVT : SmallerVTs) {
1065 setTruncStoreAction(VT, SmallVT, Expand);
1066 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1067 }
1068 };
1069
1070 // Sets common actions for f16 and bf16 for when there's only
1071 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1072 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1075 Custom);
1076 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1077 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1078 Custom);
1080 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1086 VT, Custom);
1087 MVT EltVT = VT.getVectorElementType();
1088 if (isTypeLegal(EltVT))
1089 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1091 VT, Custom);
1092 else
1093 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1094 EltVT, Custom);
1096 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1097 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1098 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1099 ISD::VP_SCATTER},
1100 VT, Custom);
1101
1105
1106 // Expand FP operations that need libcalls.
1107 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1108
1109 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1110 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1111 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1112 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1113 } else {
1114 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1115 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1116 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1117 }
1118 };
1119
1120 if (Subtarget.hasVInstructionsF16()) {
1121 for (MVT VT : F16VecVTs) {
1122 if (!isTypeLegal(VT))
1123 continue;
1124 SetCommonVFPActions(VT);
1125 }
1126 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1127 for (MVT VT : F16VecVTs) {
1128 if (!isTypeLegal(VT))
1129 continue;
1130 SetCommonPromoteToF32Actions(VT);
1131 }
1132 }
1133
1134 if (Subtarget.hasVInstructionsBF16Minimal()) {
1135 for (MVT VT : BF16VecVTs) {
1136 if (!isTypeLegal(VT))
1137 continue;
1138 SetCommonPromoteToF32Actions(VT);
1139 }
1140 }
1141
1142 if (Subtarget.hasVInstructionsF32()) {
1143 for (MVT VT : F32VecVTs) {
1144 if (!isTypeLegal(VT))
1145 continue;
1146 SetCommonVFPActions(VT);
1147 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1148 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1149 }
1150 }
1151
1152 if (Subtarget.hasVInstructionsF64()) {
1153 for (MVT VT : F64VecVTs) {
1154 if (!isTypeLegal(VT))
1155 continue;
1156 SetCommonVFPActions(VT);
1157 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1158 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1159 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1160 }
1161 }
1162
1163 if (Subtarget.useRVVForFixedLengthVectors()) {
1165 if (!useRVVForFixedLengthVectorVT(VT))
1166 continue;
1167
1168 // By default everything must be expanded.
1169 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1172 setTruncStoreAction(VT, OtherVT, Expand);
1174 OtherVT, Expand);
1175 }
1176
1177 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1178 // expansion to a build_vector of 0s.
1180
1181 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1183 Custom);
1184
1187 Custom);
1188
1190 VT, Custom);
1191
1193
1195
1197
1199
1202 Custom);
1203
1205
1208 Custom);
1209
1211 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1212 Custom);
1213
1215 {
1224 },
1225 VT, Custom);
1227 Custom);
1228
1230
1231 // Operations below are different for between masks and other vectors.
1232 if (VT.getVectorElementType() == MVT::i1) {
1233 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1234 ISD::OR, ISD::XOR},
1235 VT, Custom);
1236
1237 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1238 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1239 VT, Custom);
1240
1241 setOperationAction(ISD::VP_MERGE, VT, Custom);
1242
1243 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1244 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1245 continue;
1246 }
1247
1248 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1249 // it before type legalization for i64 vectors on RV32. It will then be
1250 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1251 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1252 // improvements first.
1253 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1256 }
1257
1260
1261 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1262 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1263 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1264 ISD::VP_SCATTER},
1265 VT, Custom);
1266
1270 VT, Custom);
1271
1274
1276
1277 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1278 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1280
1284 VT, Custom);
1285
1287
1290
1291 // Custom-lower reduction operations to set up the corresponding custom
1292 // nodes' operands.
1296 VT, Custom);
1297
1298 setOperationAction(IntegerVPOps, VT, Custom);
1299
1300 if (Subtarget.hasStdExtZvkb())
1302
1303 if (Subtarget.hasStdExtZvbb()) {
1306 VT, Custom);
1307 } else {
1308 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1309 // range of f32.
1310 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1311 if (isTypeLegal(FloatVT))
1314 Custom);
1315 }
1316
1318 }
1319
1321 // There are no extending loads or truncating stores.
1322 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1323 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1324 setTruncStoreAction(VT, InnerVT, Expand);
1325 }
1326
1327 if (!useRVVForFixedLengthVectorVT(VT))
1328 continue;
1329
1330 // By default everything must be expanded.
1331 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1333
1334 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1335 // expansion to a build_vector of 0s.
1337
1342 VT, Custom);
1343
1346 VT, Custom);
1347 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1348 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1349 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1350 VT, Custom);
1351
1354 Custom);
1355
1356 if (VT.getVectorElementType() == MVT::f16 &&
1357 !Subtarget.hasVInstructionsF16()) {
1359 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1361 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1362 Custom);
1363 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1364 Custom);
1365 if (Subtarget.hasStdExtZfhmin()) {
1367 } else {
1368 // We need to custom legalize f16 build vectors if Zfhmin isn't
1369 // available.
1371 }
1375 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1376 // Don't promote f16 vector operations to f32 if f32 vector type is
1377 // not legal.
1378 // TODO: could split the f16 vector into two vectors and do promotion.
1379 if (!isTypeLegal(F32VecVT))
1380 continue;
1381 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1382 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1383 continue;
1384 }
1385
1386 if (VT.getVectorElementType() == MVT::bf16) {
1388 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1389 if (Subtarget.hasStdExtZfbfmin()) {
1391 } else {
1392 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1393 // available.
1395 }
1397 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1398 Custom);
1399 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1400 // Don't promote f16 vector operations to f32 if f32 vector type is
1401 // not legal.
1402 // TODO: could split the f16 vector into two vectors and do promotion.
1403 if (!isTypeLegal(F32VecVT))
1404 continue;
1405 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1406 // TODO: Promote VP ops to fp32.
1407 continue;
1408 }
1409
1411 Custom);
1412
1417 VT, Custom);
1418
1421 VT, Custom);
1422
1423 setCondCodeAction(VFPCCToExpand, VT, Expand);
1424
1427
1429
1430 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1431
1432 setOperationAction(FloatingPointVPOps, VT, Custom);
1433
1440 VT, Custom);
1441 }
1442
1443 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1444 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1445 if (Subtarget.is64Bit())
1447 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1449 if (Subtarget.hasStdExtZfbfmin())
1451 if (Subtarget.hasStdExtFOrZfinx())
1453 if (Subtarget.hasStdExtDOrZdinx())
1455 }
1456 }
1457
1458 if (Subtarget.hasStdExtA())
1460
1461 if (Subtarget.hasForcedAtomics()) {
1462 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1468 XLenVT, LibCall);
1469 }
1470
1471 if (Subtarget.hasVendorXTHeadMemIdx()) {
1472 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1473 setIndexedLoadAction(im, MVT::i8, Legal);
1474 setIndexedStoreAction(im, MVT::i8, Legal);
1475 setIndexedLoadAction(im, MVT::i16, Legal);
1476 setIndexedStoreAction(im, MVT::i16, Legal);
1477 setIndexedLoadAction(im, MVT::i32, Legal);
1478 setIndexedStoreAction(im, MVT::i32, Legal);
1479
1480 if (Subtarget.is64Bit()) {
1481 setIndexedLoadAction(im, MVT::i64, Legal);
1482 setIndexedStoreAction(im, MVT::i64, Legal);
1483 }
1484 }
1485 }
1486
1487 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1491
1495 }
1496
1497 // Function alignments.
1498 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1499 setMinFunctionAlignment(FunctionAlignment);
1500 // Set preferred alignments.
1503
1509
1510 if (Subtarget.hasStdExtFOrZfinx())
1512
1513 if (Subtarget.hasStdExtZbb())
1515
1516 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1517 Subtarget.hasVInstructions())
1519
1520 if (Subtarget.hasStdExtZbkb())
1522
1523 if (Subtarget.hasStdExtFOrZfinx())
1526 if (Subtarget.hasVInstructions())
1528 ISD::MSCATTER, ISD::VP_GATHER,
1529 ISD::VP_SCATTER, ISD::SRA,
1533 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_REVERSE,
1539 if (Subtarget.hasVendorXTHeadMemPair())
1541 if (Subtarget.useRVVForFixedLengthVectors())
1543
1544 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1545 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1546
1547 // Disable strict node mutation.
1548 IsStrictFPEnabled = true;
1549 EnableExtLdPromotion = true;
1550
1551 // Let the subtarget decide if a predictable select is more expensive than the
1552 // corresponding branch. This information is used in CGP/SelectOpt to decide
1553 // when to convert selects into branches.
1554 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1555
1556 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1557 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1558
1560 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1561 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1562
1564 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1565 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1566
1567 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1568 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1569}
1570
1572 LLVMContext &Context,
1573 EVT VT) const {
1574 if (!VT.isVector())
1575 return getPointerTy(DL);
1576 if (Subtarget.hasVInstructions() &&
1577 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1578 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1580}
1581
1582MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1583 return Subtarget.getXLenVT();
1584}
1585
1586// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1587bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1588 unsigned VF,
1589 bool IsScalable) const {
1590 if (!Subtarget.hasVInstructions())
1591 return true;
1592
1593 if (!IsScalable)
1594 return true;
1595
1596 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1597 return true;
1598
1599 // Don't allow VF=1 if those types are't legal.
1600 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1601 return true;
1602
1603 // VLEN=32 support is incomplete.
1604 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1605 return true;
1606
1607 // The maximum VF is for the smallest element width with LMUL=8.
1608 // VF must be a power of 2.
1609 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1610 return VF > MaxVF || !isPowerOf2_32(VF);
1611}
1612
1614 return !Subtarget.hasVInstructions() ||
1615 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1616}
1617
1619 const CallInst &I,
1620 MachineFunction &MF,
1621 unsigned Intrinsic) const {
1622 auto &DL = I.getDataLayout();
1623
1624 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1625 bool IsUnitStrided, bool UsePtrVal = false) {
1627 // We can't use ptrVal if the intrinsic can access memory before the
1628 // pointer. This means we can't use it for strided or indexed intrinsics.
1629 if (UsePtrVal)
1630 Info.ptrVal = I.getArgOperand(PtrOp);
1631 else
1632 Info.fallbackAddressSpace =
1633 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1634 Type *MemTy;
1635 if (IsStore) {
1636 // Store value is the first operand.
1637 MemTy = I.getArgOperand(0)->getType();
1638 } else {
1639 // Use return type. If it's segment load, return type is a struct.
1640 MemTy = I.getType();
1641 if (MemTy->isStructTy())
1642 MemTy = MemTy->getStructElementType(0);
1643 }
1644 if (!IsUnitStrided)
1645 MemTy = MemTy->getScalarType();
1646
1647 Info.memVT = getValueType(DL, MemTy);
1648 if (MemTy->isTargetExtTy()) {
1649 // RISC-V vector tuple type's alignment type should be its element type.
1650 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1651 MemTy = Type::getIntNTy(
1652 MemTy->getContext(),
1653 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1654 ->getZExtValue());
1655 Info.align = DL.getABITypeAlign(MemTy);
1656 } else {
1657 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1658 }
1660 Info.flags |=
1662 return true;
1663 };
1664
1665 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1667
1669 switch (Intrinsic) {
1670 default:
1671 return false;
1672 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1673 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1674 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1675 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1676 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1677 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1678 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1679 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1680 case Intrinsic::riscv_masked_cmpxchg_i32:
1682 Info.memVT = MVT::i32;
1683 Info.ptrVal = I.getArgOperand(0);
1684 Info.offset = 0;
1685 Info.align = Align(4);
1688 return true;
1689 case Intrinsic::riscv_seg2_load:
1690 case Intrinsic::riscv_seg3_load:
1691 case Intrinsic::riscv_seg4_load:
1692 case Intrinsic::riscv_seg5_load:
1693 case Intrinsic::riscv_seg6_load:
1694 case Intrinsic::riscv_seg7_load:
1695 case Intrinsic::riscv_seg8_load:
1696 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1697 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1698 case Intrinsic::riscv_seg2_store:
1699 case Intrinsic::riscv_seg3_store:
1700 case Intrinsic::riscv_seg4_store:
1701 case Intrinsic::riscv_seg5_store:
1702 case Intrinsic::riscv_seg6_store:
1703 case Intrinsic::riscv_seg7_store:
1704 case Intrinsic::riscv_seg8_store:
1705 // Operands are (vec, ..., vec, ptr, vl)
1706 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1707 /*IsStore*/ true,
1708 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1709 case Intrinsic::riscv_vle:
1710 case Intrinsic::riscv_vle_mask:
1711 case Intrinsic::riscv_vleff:
1712 case Intrinsic::riscv_vleff_mask:
1713 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1714 /*IsStore*/ false,
1715 /*IsUnitStrided*/ true,
1716 /*UsePtrVal*/ true);
1717 case Intrinsic::riscv_vse:
1718 case Intrinsic::riscv_vse_mask:
1719 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1720 /*IsStore*/ true,
1721 /*IsUnitStrided*/ true,
1722 /*UsePtrVal*/ true);
1723 case Intrinsic::riscv_vlse:
1724 case Intrinsic::riscv_vlse_mask:
1725 case Intrinsic::riscv_vloxei:
1726 case Intrinsic::riscv_vloxei_mask:
1727 case Intrinsic::riscv_vluxei:
1728 case Intrinsic::riscv_vluxei_mask:
1729 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1730 /*IsStore*/ false,
1731 /*IsUnitStrided*/ false);
1732 case Intrinsic::riscv_vsse:
1733 case Intrinsic::riscv_vsse_mask:
1734 case Intrinsic::riscv_vsoxei:
1735 case Intrinsic::riscv_vsoxei_mask:
1736 case Intrinsic::riscv_vsuxei:
1737 case Intrinsic::riscv_vsuxei_mask:
1738 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1739 /*IsStore*/ true,
1740 /*IsUnitStrided*/ false);
1741 case Intrinsic::riscv_vlseg2:
1742 case Intrinsic::riscv_vlseg3:
1743 case Intrinsic::riscv_vlseg4:
1744 case Intrinsic::riscv_vlseg5:
1745 case Intrinsic::riscv_vlseg6:
1746 case Intrinsic::riscv_vlseg7:
1747 case Intrinsic::riscv_vlseg8:
1748 case Intrinsic::riscv_vlseg2ff:
1749 case Intrinsic::riscv_vlseg3ff:
1750 case Intrinsic::riscv_vlseg4ff:
1751 case Intrinsic::riscv_vlseg5ff:
1752 case Intrinsic::riscv_vlseg6ff:
1753 case Intrinsic::riscv_vlseg7ff:
1754 case Intrinsic::riscv_vlseg8ff:
1755 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1756 /*IsStore*/ false,
1757 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1758 case Intrinsic::riscv_vlseg2_mask:
1759 case Intrinsic::riscv_vlseg3_mask:
1760 case Intrinsic::riscv_vlseg4_mask:
1761 case Intrinsic::riscv_vlseg5_mask:
1762 case Intrinsic::riscv_vlseg6_mask:
1763 case Intrinsic::riscv_vlseg7_mask:
1764 case Intrinsic::riscv_vlseg8_mask:
1765 case Intrinsic::riscv_vlseg2ff_mask:
1766 case Intrinsic::riscv_vlseg3ff_mask:
1767 case Intrinsic::riscv_vlseg4ff_mask:
1768 case Intrinsic::riscv_vlseg5ff_mask:
1769 case Intrinsic::riscv_vlseg6ff_mask:
1770 case Intrinsic::riscv_vlseg7ff_mask:
1771 case Intrinsic::riscv_vlseg8ff_mask:
1772 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1773 /*IsStore*/ false,
1774 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1775 case Intrinsic::riscv_vlsseg2:
1776 case Intrinsic::riscv_vlsseg3:
1777 case Intrinsic::riscv_vlsseg4:
1778 case Intrinsic::riscv_vlsseg5:
1779 case Intrinsic::riscv_vlsseg6:
1780 case Intrinsic::riscv_vlsseg7:
1781 case Intrinsic::riscv_vlsseg8:
1782 case Intrinsic::riscv_vloxseg2:
1783 case Intrinsic::riscv_vloxseg3:
1784 case Intrinsic::riscv_vloxseg4:
1785 case Intrinsic::riscv_vloxseg5:
1786 case Intrinsic::riscv_vloxseg6:
1787 case Intrinsic::riscv_vloxseg7:
1788 case Intrinsic::riscv_vloxseg8:
1789 case Intrinsic::riscv_vluxseg2:
1790 case Intrinsic::riscv_vluxseg3:
1791 case Intrinsic::riscv_vluxseg4:
1792 case Intrinsic::riscv_vluxseg5:
1793 case Intrinsic::riscv_vluxseg6:
1794 case Intrinsic::riscv_vluxseg7:
1795 case Intrinsic::riscv_vluxseg8:
1796 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1797 /*IsStore*/ false,
1798 /*IsUnitStrided*/ false);
1799 case Intrinsic::riscv_vlsseg2_mask:
1800 case Intrinsic::riscv_vlsseg3_mask:
1801 case Intrinsic::riscv_vlsseg4_mask:
1802 case Intrinsic::riscv_vlsseg5_mask:
1803 case Intrinsic::riscv_vlsseg6_mask:
1804 case Intrinsic::riscv_vlsseg7_mask:
1805 case Intrinsic::riscv_vlsseg8_mask:
1806 case Intrinsic::riscv_vloxseg2_mask:
1807 case Intrinsic::riscv_vloxseg3_mask:
1808 case Intrinsic::riscv_vloxseg4_mask:
1809 case Intrinsic::riscv_vloxseg5_mask:
1810 case Intrinsic::riscv_vloxseg6_mask:
1811 case Intrinsic::riscv_vloxseg7_mask:
1812 case Intrinsic::riscv_vloxseg8_mask:
1813 case Intrinsic::riscv_vluxseg2_mask:
1814 case Intrinsic::riscv_vluxseg3_mask:
1815 case Intrinsic::riscv_vluxseg4_mask:
1816 case Intrinsic::riscv_vluxseg5_mask:
1817 case Intrinsic::riscv_vluxseg6_mask:
1818 case Intrinsic::riscv_vluxseg7_mask:
1819 case Intrinsic::riscv_vluxseg8_mask:
1820 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1821 /*IsStore*/ false,
1822 /*IsUnitStrided*/ false);
1823 case Intrinsic::riscv_vsseg2:
1824 case Intrinsic::riscv_vsseg3:
1825 case Intrinsic::riscv_vsseg4:
1826 case Intrinsic::riscv_vsseg5:
1827 case Intrinsic::riscv_vsseg6:
1828 case Intrinsic::riscv_vsseg7:
1829 case Intrinsic::riscv_vsseg8:
1830 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1831 /*IsStore*/ true,
1832 /*IsUnitStrided*/ false);
1833 case Intrinsic::riscv_vsseg2_mask:
1834 case Intrinsic::riscv_vsseg3_mask:
1835 case Intrinsic::riscv_vsseg4_mask:
1836 case Intrinsic::riscv_vsseg5_mask:
1837 case Intrinsic::riscv_vsseg6_mask:
1838 case Intrinsic::riscv_vsseg7_mask:
1839 case Intrinsic::riscv_vsseg8_mask:
1840 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1841 /*IsStore*/ true,
1842 /*IsUnitStrided*/ false);
1843 case Intrinsic::riscv_vssseg2:
1844 case Intrinsic::riscv_vssseg3:
1845 case Intrinsic::riscv_vssseg4:
1846 case Intrinsic::riscv_vssseg5:
1847 case Intrinsic::riscv_vssseg6:
1848 case Intrinsic::riscv_vssseg7:
1849 case Intrinsic::riscv_vssseg8:
1850 case Intrinsic::riscv_vsoxseg2:
1851 case Intrinsic::riscv_vsoxseg3:
1852 case Intrinsic::riscv_vsoxseg4:
1853 case Intrinsic::riscv_vsoxseg5:
1854 case Intrinsic::riscv_vsoxseg6:
1855 case Intrinsic::riscv_vsoxseg7:
1856 case Intrinsic::riscv_vsoxseg8:
1857 case Intrinsic::riscv_vsuxseg2:
1858 case Intrinsic::riscv_vsuxseg3:
1859 case Intrinsic::riscv_vsuxseg4:
1860 case Intrinsic::riscv_vsuxseg5:
1861 case Intrinsic::riscv_vsuxseg6:
1862 case Intrinsic::riscv_vsuxseg7:
1863 case Intrinsic::riscv_vsuxseg8:
1864 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1865 /*IsStore*/ true,
1866 /*IsUnitStrided*/ false);
1867 case Intrinsic::riscv_vssseg2_mask:
1868 case Intrinsic::riscv_vssseg3_mask:
1869 case Intrinsic::riscv_vssseg4_mask:
1870 case Intrinsic::riscv_vssseg5_mask:
1871 case Intrinsic::riscv_vssseg6_mask:
1872 case Intrinsic::riscv_vssseg7_mask:
1873 case Intrinsic::riscv_vssseg8_mask:
1874 case Intrinsic::riscv_vsoxseg2_mask:
1875 case Intrinsic::riscv_vsoxseg3_mask:
1876 case Intrinsic::riscv_vsoxseg4_mask:
1877 case Intrinsic::riscv_vsoxseg5_mask:
1878 case Intrinsic::riscv_vsoxseg6_mask:
1879 case Intrinsic::riscv_vsoxseg7_mask:
1880 case Intrinsic::riscv_vsoxseg8_mask:
1881 case Intrinsic::riscv_vsuxseg2_mask:
1882 case Intrinsic::riscv_vsuxseg3_mask:
1883 case Intrinsic::riscv_vsuxseg4_mask:
1884 case Intrinsic::riscv_vsuxseg5_mask:
1885 case Intrinsic::riscv_vsuxseg6_mask:
1886 case Intrinsic::riscv_vsuxseg7_mask:
1887 case Intrinsic::riscv_vsuxseg8_mask:
1888 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1889 /*IsStore*/ true,
1890 /*IsUnitStrided*/ false);
1891 }
1892}
1893
1895 const AddrMode &AM, Type *Ty,
1896 unsigned AS,
1897 Instruction *I) const {
1898 // No global is ever allowed as a base.
1899 if (AM.BaseGV)
1900 return false;
1901
1902 // None of our addressing modes allows a scalable offset
1903 if (AM.ScalableOffset)
1904 return false;
1905
1906 // RVV instructions only support register addressing.
1907 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1908 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1909
1910 // Require a 12-bit signed offset.
1911 if (!isInt<12>(AM.BaseOffs))
1912 return false;
1913
1914 switch (AM.Scale) {
1915 case 0: // "r+i" or just "i", depending on HasBaseReg.
1916 break;
1917 case 1:
1918 if (!AM.HasBaseReg) // allow "r+i".
1919 break;
1920 return false; // disallow "r+r" or "r+r+i".
1921 default:
1922 return false;
1923 }
1924
1925 return true;
1926}
1927
1929 return isInt<12>(Imm);
1930}
1931
1933 return isInt<12>(Imm);
1934}
1935
1936// On RV32, 64-bit integers are split into their high and low parts and held
1937// in two different registers, so the trunc is free since the low register can
1938// just be used.
1939// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1940// isTruncateFree?
1942 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1943 return false;
1944 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1945 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1946 return (SrcBits == 64 && DestBits == 32);
1947}
1948
1950 // We consider i64->i32 free on RV64 since we have good selection of W
1951 // instructions that make promoting operations back to i64 free in many cases.
1952 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1953 !DstVT.isInteger())
1954 return false;
1955 unsigned SrcBits = SrcVT.getSizeInBits();
1956 unsigned DestBits = DstVT.getSizeInBits();
1957 return (SrcBits == 64 && DestBits == 32);
1958}
1959
1961 EVT SrcVT = Val.getValueType();
1962 // free truncate from vnsrl and vnsra
1963 if (Subtarget.hasVInstructions() &&
1964 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1965 SrcVT.isVector() && VT2.isVector()) {
1966 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1967 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1968 if (SrcBits == DestBits * 2) {
1969 return true;
1970 }
1971 }
1972 return TargetLowering::isTruncateFree(Val, VT2);
1973}
1974
1976 // Zexts are free if they can be combined with a load.
1977 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1978 // poorly with type legalization of compares preferring sext.
1979 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1980 EVT MemVT = LD->getMemoryVT();
1981 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1982 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1983 LD->getExtensionType() == ISD::ZEXTLOAD))
1984 return true;
1985 }
1986
1987 return TargetLowering::isZExtFree(Val, VT2);
1988}
1989
1991 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1992}
1993
1995 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1996}
1997
1999 return Subtarget.hasStdExtZbb() ||
2000 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2001}
2002
2004 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2005 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2006}
2007
2009 const Instruction &AndI) const {
2010 // We expect to be able to match a bit extraction instruction if the Zbs
2011 // extension is supported and the mask is a power of two. However, we
2012 // conservatively return false if the mask would fit in an ANDI instruction,
2013 // on the basis that it's possible the sinking+duplication of the AND in
2014 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2015 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2016 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2017 return false;
2018 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2019 if (!Mask)
2020 return false;
2021 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2022}
2023
2025 EVT VT = Y.getValueType();
2026
2027 // FIXME: Support vectors once we have tests.
2028 if (VT.isVector())
2029 return false;
2030
2031 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2032 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2033}
2034
2036 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2037 if (Subtarget.hasStdExtZbs())
2038 return X.getValueType().isScalarInteger();
2039 auto *C = dyn_cast<ConstantSDNode>(Y);
2040 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2041 if (Subtarget.hasVendorXTHeadBs())
2042 return C != nullptr;
2043 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2044 return C && C->getAPIntValue().ule(10);
2045}
2046
2048 EVT VT) const {
2049 // Only enable for rvv.
2050 if (!VT.isVector() || !Subtarget.hasVInstructions())
2051 return false;
2052
2053 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2054 return false;
2055
2056 return true;
2057}
2058
2060 Type *Ty) const {
2061 assert(Ty->isIntegerTy());
2062
2063 unsigned BitSize = Ty->getIntegerBitWidth();
2064 if (BitSize > Subtarget.getXLen())
2065 return false;
2066
2067 // Fast path, assume 32-bit immediates are cheap.
2068 int64_t Val = Imm.getSExtValue();
2069 if (isInt<32>(Val))
2070 return true;
2071
2072 // A constant pool entry may be more aligned thant he load we're trying to
2073 // replace. If we don't support unaligned scalar mem, prefer the constant
2074 // pool.
2075 // TODO: Can the caller pass down the alignment?
2076 if (!Subtarget.enableUnalignedScalarMem())
2077 return true;
2078
2079 // Prefer to keep the load if it would require many instructions.
2080 // This uses the same threshold we use for constant pools but doesn't
2081 // check useConstantPoolForLargeInts.
2082 // TODO: Should we keep the load only when we're definitely going to emit a
2083 // constant pool?
2084
2086 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2087}
2088
2092 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2093 SelectionDAG &DAG) const {
2094 // One interesting pattern that we'd want to form is 'bit extract':
2095 // ((1 >> Y) & 1) ==/!= 0
2096 // But we also need to be careful not to try to reverse that fold.
2097
2098 // Is this '((1 >> Y) & 1)'?
2099 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2100 return false; // Keep the 'bit extract' pattern.
2101
2102 // Will this be '((1 >> Y) & 1)' after the transform?
2103 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2104 return true; // Do form the 'bit extract' pattern.
2105
2106 // If 'X' is a constant, and we transform, then we will immediately
2107 // try to undo the fold, thus causing endless combine loop.
2108 // So only do the transform if X is not a constant. This matches the default
2109 // implementation of this function.
2110 return !XC;
2111}
2112
2114 unsigned Opc = VecOp.getOpcode();
2115
2116 // Assume target opcodes can't be scalarized.
2117 // TODO - do we have any exceptions?
2118 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2119 return false;
2120
2121 // If the vector op is not supported, try to convert to scalar.
2122 EVT VecVT = VecOp.getValueType();
2123 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2124 return true;
2125
2126 // If the vector op is supported, but the scalar op is not, the transform may
2127 // not be worthwhile.
2128 // Permit a vector binary operation can be converted to scalar binary
2129 // operation which is custom lowered with illegal type.
2130 EVT ScalarVT = VecVT.getScalarType();
2131 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2132 isOperationCustom(Opc, ScalarVT);
2133}
2134
2136 const GlobalAddressSDNode *GA) const {
2137 // In order to maximise the opportunity for common subexpression elimination,
2138 // keep a separate ADD node for the global address offset instead of folding
2139 // it in the global address node. Later peephole optimisations may choose to
2140 // fold it back in when profitable.
2141 return false;
2142}
2143
2144// Returns 0-31 if the fli instruction is available for the type and this is
2145// legal FP immediate for the type. Returns -1 otherwise.
2147 if (!Subtarget.hasStdExtZfa())
2148 return -1;
2149
2150 bool IsSupportedVT = false;
2151 if (VT == MVT::f16) {
2152 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2153 } else if (VT == MVT::f32) {
2154 IsSupportedVT = true;
2155 } else if (VT == MVT::f64) {
2156 assert(Subtarget.hasStdExtD() && "Expect D extension");
2157 IsSupportedVT = true;
2158 }
2159
2160 if (!IsSupportedVT)
2161 return -1;
2162
2163 return RISCVLoadFPImm::getLoadFPImm(Imm);
2164}
2165
2167 bool ForCodeSize) const {
2168 bool IsLegalVT = false;
2169 if (VT == MVT::f16)
2170 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2171 else if (VT == MVT::f32)
2172 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2173 else if (VT == MVT::f64)
2174 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2175 else if (VT == MVT::bf16)
2176 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2177
2178 if (!IsLegalVT)
2179 return false;
2180
2181 if (getLegalZfaFPImm(Imm, VT) >= 0)
2182 return true;
2183
2184 // Cannot create a 64 bit floating-point immediate value for rv32.
2185 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2186 // td can handle +0.0 or -0.0 already.
2187 // -0.0 can be created by fmv + fneg.
2188 return Imm.isZero();
2189 }
2190
2191 // Special case: fmv + fneg
2192 if (Imm.isNegZero())
2193 return true;
2194
2195 // Building an integer and then converting requires a fmv at the end of
2196 // the integer sequence. The fmv is not required for Zfinx.
2197 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2198 const int Cost =
2199 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2200 Subtarget.getXLen(), Subtarget);
2201 return Cost <= FPImmCost;
2202}
2203
2204// TODO: This is very conservative.
2206 unsigned Index) const {
2208 return false;
2209
2210 // Only support extracting a fixed from a fixed vector for now.
2211 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2212 return false;
2213
2214 EVT EltVT = ResVT.getVectorElementType();
2215 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2216
2217 // The smallest type we can slide is i8.
2218 // TODO: We can extract index 0 from a mask vector without a slide.
2219 if (EltVT == MVT::i1)
2220 return false;
2221
2222 unsigned ResElts = ResVT.getVectorNumElements();
2223 unsigned SrcElts = SrcVT.getVectorNumElements();
2224
2225 unsigned MinVLen = Subtarget.getRealMinVLen();
2226 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2227
2228 // If we're extracting only data from the first VLEN bits of the source
2229 // then we can always do this with an m1 vslidedown.vx. Restricting the
2230 // Index ensures we can use a vslidedown.vi.
2231 // TODO: We can generalize this when the exact VLEN is known.
2232 if (Index + ResElts <= MinVLMAX && Index < 31)
2233 return true;
2234
2235 // Convervatively only handle extracting half of a vector.
2236 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2237 // the upper half of a vector until we have more test coverage.
2238 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2239 // a cheap extract. However, this case is important in practice for
2240 // shuffled extracts of longer vectors. How resolve?
2241 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2242}
2243
2246 EVT VT) const {
2247 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2248 // We might still end up using a GPR but that will be decided based on ABI.
2249 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2250 !Subtarget.hasStdExtZfhminOrZhinxmin())
2251 return MVT::f32;
2252
2254
2255 return PartVT;
2256}
2257
2258unsigned
2260 std::optional<MVT> RegisterVT) const {
2261 // Pair inline assembly operand
2262 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2263 *RegisterVT == MVT::Untyped)
2264 return 1;
2265
2266 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2267}
2268
2271 EVT VT) const {
2272 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2273 // We might still end up using a GPR but that will be decided based on ABI.
2274 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2275 !Subtarget.hasStdExtZfhminOrZhinxmin())
2276 return 1;
2277
2279}
2280
2282 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2283 unsigned &NumIntermediates, MVT &RegisterVT) const {
2285 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2286
2287 return NumRegs;
2288}
2289
2290// Changes the condition code and swaps operands if necessary, so the SetCC
2291// operation matches one of the comparisons supported directly by branches
2292// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2293// with 1/-1.
2294static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2295 ISD::CondCode &CC, SelectionDAG &DAG) {
2296 // If this is a single bit test that can't be handled by ANDI, shift the
2297 // bit to be tested to the MSB and perform a signed compare with 0.
2298 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2299 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2300 isa<ConstantSDNode>(LHS.getOperand(1))) {
2301 uint64_t Mask = LHS.getConstantOperandVal(1);
2302 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2303 unsigned ShAmt = 0;
2304 if (isPowerOf2_64(Mask)) {
2306 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2307 } else {
2308 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2309 }
2310
2311 LHS = LHS.getOperand(0);
2312 if (ShAmt != 0)
2313 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2314 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2315 return;
2316 }
2317 }
2318
2319 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2320 int64_t C = RHSC->getSExtValue();
2321 switch (CC) {
2322 default: break;
2323 case ISD::SETGT:
2324 // Convert X > -1 to X >= 0.
2325 if (C == -1) {
2326 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2327 CC = ISD::SETGE;
2328 return;
2329 }
2330 break;
2331 case ISD::SETLT:
2332 // Convert X < 1 to 0 >= X.
2333 if (C == 1) {
2334 RHS = LHS;
2335 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2336 CC = ISD::SETGE;
2337 return;
2338 }
2339 break;
2340 }
2341 }
2342
2343 switch (CC) {
2344 default:
2345 break;
2346 case ISD::SETGT:
2347 case ISD::SETLE:
2348 case ISD::SETUGT:
2349 case ISD::SETULE:
2351 std::swap(LHS, RHS);
2352 break;
2353 }
2354}
2355
2357 if (VT.isRISCVVectorTuple()) {
2358 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2359 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2360 return RISCVII::LMUL_F8;
2361 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2362 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2363 return RISCVII::LMUL_F4;
2364 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2365 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2366 return RISCVII::LMUL_F2;
2367 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2368 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2369 return RISCVII::LMUL_1;
2370 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2371 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2372 return RISCVII::LMUL_2;
2373 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2374 return RISCVII::LMUL_4;
2375 llvm_unreachable("Invalid vector tuple type LMUL.");
2376 }
2377
2378 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2379 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2380 if (VT.getVectorElementType() == MVT::i1)
2381 KnownSize *= 8;
2382
2383 switch (KnownSize) {
2384 default:
2385 llvm_unreachable("Invalid LMUL.");
2386 case 8:
2388 case 16:
2390 case 32:
2392 case 64:
2394 case 128:
2396 case 256:
2398 case 512:
2400 }
2401}
2402
2404 switch (LMul) {
2405 default:
2406 llvm_unreachable("Invalid LMUL.");
2411 return RISCV::VRRegClassID;
2413 return RISCV::VRM2RegClassID;
2415 return RISCV::VRM4RegClassID;
2417 return RISCV::VRM8RegClassID;
2418 }
2419}
2420
2421unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2422 RISCVII::VLMUL LMUL = getLMUL(VT);
2423 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2424 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2425 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2426 LMUL == RISCVII::VLMUL::LMUL_1) {
2427 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2428 "Unexpected subreg numbering");
2429 return RISCV::sub_vrm1_0 + Index;
2430 }
2431 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2432 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2433 "Unexpected subreg numbering");
2434 return RISCV::sub_vrm2_0 + Index;
2435 }
2436 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2437 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2438 "Unexpected subreg numbering");
2439 return RISCV::sub_vrm4_0 + Index;
2440 }
2441 llvm_unreachable("Invalid vector type.");
2442}
2443
2445 if (VT.isRISCVVectorTuple()) {
2446 unsigned NF = VT.getRISCVVectorTupleNumFields();
2447 unsigned RegsPerField =
2448 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2449 (NF * RISCV::RVVBitsPerBlock));
2450 switch (RegsPerField) {
2451 case 1:
2452 if (NF == 2)
2453 return RISCV::VRN2M1RegClassID;
2454 if (NF == 3)
2455 return RISCV::VRN3M1RegClassID;
2456 if (NF == 4)
2457 return RISCV::VRN4M1RegClassID;
2458 if (NF == 5)
2459 return RISCV::VRN5M1RegClassID;
2460 if (NF == 6)
2461 return RISCV::VRN6M1RegClassID;
2462 if (NF == 7)
2463 return RISCV::VRN7M1RegClassID;
2464 if (NF == 8)
2465 return RISCV::VRN8M1RegClassID;
2466 break;
2467 case 2:
2468 if (NF == 2)
2469 return RISCV::VRN2M2RegClassID;
2470 if (NF == 3)
2471 return RISCV::VRN3M2RegClassID;
2472 if (NF == 4)
2473 return RISCV::VRN4M2RegClassID;
2474 break;
2475 case 4:
2476 assert(NF == 2);
2477 return RISCV::VRN2M4RegClassID;
2478 default:
2479 break;
2480 }
2481 llvm_unreachable("Invalid vector tuple type RegClass.");
2482 }
2483
2484 if (VT.getVectorElementType() == MVT::i1)
2485 return RISCV::VRRegClassID;
2486 return getRegClassIDForLMUL(getLMUL(VT));
2487}
2488
2489// Attempt to decompose a subvector insert/extract between VecVT and
2490// SubVecVT via subregister indices. Returns the subregister index that
2491// can perform the subvector insert/extract with the given element index, as
2492// well as the index corresponding to any leftover subvectors that must be
2493// further inserted/extracted within the register class for SubVecVT.
2494std::pair<unsigned, unsigned>
2496 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2497 const RISCVRegisterInfo *TRI) {
2498 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2499 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2500 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2501 "Register classes not ordered");
2502 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2503 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2504
2505 // If VecVT is a vector tuple type, either it's the tuple type with same
2506 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2507 if (VecVT.isRISCVVectorTuple()) {
2508 if (VecRegClassID == SubRegClassID)
2509 return {RISCV::NoSubRegister, 0};
2510
2511 assert(SubVecVT.isScalableVector() &&
2512 "Only allow scalable vector subvector.");
2513 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2514 "Invalid vector tuple insert/extract for vector and subvector with "
2515 "different LMUL.");
2516 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2517 }
2518
2519 // Try to compose a subregister index that takes us from the incoming
2520 // LMUL>1 register class down to the outgoing one. At each step we half
2521 // the LMUL:
2522 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2523 // Note that this is not guaranteed to find a subregister index, such as
2524 // when we are extracting from one VR type to another.
2525 unsigned SubRegIdx = RISCV::NoSubRegister;
2526 for (const unsigned RCID :
2527 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2528 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2529 VecVT = VecVT.getHalfNumVectorElementsVT();
2530 bool IsHi =
2531 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2532 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2533 getSubregIndexByMVT(VecVT, IsHi));
2534 if (IsHi)
2535 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2536 }
2537 return {SubRegIdx, InsertExtractIdx};
2538}
2539
2540// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2541// stores for those types.
2542bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2543 return !Subtarget.useRVVForFixedLengthVectors() ||
2544 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2545}
2546
2548 if (!ScalarTy.isSimple())
2549 return false;
2550 switch (ScalarTy.getSimpleVT().SimpleTy) {
2551 case MVT::iPTR:
2552 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2553 case MVT::i8:
2554 case MVT::i16:
2555 case MVT::i32:
2556 return true;
2557 case MVT::i64:
2558 return Subtarget.hasVInstructionsI64();
2559 case MVT::f16:
2560 return Subtarget.hasVInstructionsF16Minimal();
2561 case MVT::bf16:
2562 return Subtarget.hasVInstructionsBF16Minimal();
2563 case MVT::f32:
2564 return Subtarget.hasVInstructionsF32();
2565 case MVT::f64:
2566 return Subtarget.hasVInstructionsF64();
2567 default:
2568 return false;
2569 }
2570}
2571
2572
2573unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2574 return NumRepeatedDivisors;
2575}
2576
2578 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2579 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2580 "Unexpected opcode");
2581 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2582 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2584 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2585 if (!II)
2586 return SDValue();
2587 return Op.getOperand(II->VLOperand + 1 + HasChain);
2588}
2589
2591 const RISCVSubtarget &Subtarget) {
2592 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2593 if (!Subtarget.useRVVForFixedLengthVectors())
2594 return false;
2595
2596 // We only support a set of vector types with a consistent maximum fixed size
2597 // across all supported vector element types to avoid legalization issues.
2598 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2599 // fixed-length vector type we support is 1024 bytes.
2600 if (VT.getFixedSizeInBits() > 1024 * 8)
2601 return false;
2602
2603 unsigned MinVLen = Subtarget.getRealMinVLen();
2604
2605 MVT EltVT = VT.getVectorElementType();
2606
2607 // Don't use RVV for vectors we cannot scalarize if required.
2608 switch (EltVT.SimpleTy) {
2609 // i1 is supported but has different rules.
2610 default:
2611 return false;
2612 case MVT::i1:
2613 // Masks can only use a single register.
2614 if (VT.getVectorNumElements() > MinVLen)
2615 return false;
2616 MinVLen /= 8;
2617 break;
2618 case MVT::i8:
2619 case MVT::i16:
2620 case MVT::i32:
2621 break;
2622 case MVT::i64:
2623 if (!Subtarget.hasVInstructionsI64())
2624 return false;
2625 break;
2626 case MVT::f16:
2627 if (!Subtarget.hasVInstructionsF16Minimal())
2628 return false;
2629 break;
2630 case MVT::bf16:
2631 if (!Subtarget.hasVInstructionsBF16Minimal())
2632 return false;
2633 break;
2634 case MVT::f32:
2635 if (!Subtarget.hasVInstructionsF32())
2636 return false;
2637 break;
2638 case MVT::f64:
2639 if (!Subtarget.hasVInstructionsF64())
2640 return false;
2641 break;
2642 }
2643
2644 // Reject elements larger than ELEN.
2645 if (EltVT.getSizeInBits() > Subtarget.getELen())
2646 return false;
2647
2648 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2649 // Don't use RVV for types that don't fit.
2650 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2651 return false;
2652
2653 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2654 // the base fixed length RVV support in place.
2655 if (!VT.isPow2VectorType())
2656 return false;
2657
2658 return true;
2659}
2660
2661bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2662 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2663}
2664
2665// Return the largest legal scalable vector type that matches VT's element type.
2667 const RISCVSubtarget &Subtarget) {
2668 // This may be called before legal types are setup.
2669 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2670 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2671 "Expected legal fixed length vector!");
2672
2673 unsigned MinVLen = Subtarget.getRealMinVLen();
2674 unsigned MaxELen = Subtarget.getELen();
2675
2676 MVT EltVT = VT.getVectorElementType();
2677 switch (EltVT.SimpleTy) {
2678 default:
2679 llvm_unreachable("unexpected element type for RVV container");
2680 case MVT::i1:
2681 case MVT::i8:
2682 case MVT::i16:
2683 case MVT::i32:
2684 case MVT::i64:
2685 case MVT::bf16:
2686 case MVT::f16:
2687 case MVT::f32:
2688 case MVT::f64: {
2689 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2690 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2691 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2692 unsigned NumElts =
2694 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2695 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2696 return MVT::getScalableVectorVT(EltVT, NumElts);
2697 }
2698 }
2699}
2700
2702 const RISCVSubtarget &Subtarget) {
2704 Subtarget);
2705}
2706
2708 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2709}
2710
2711// Grow V to consume an entire RVV register.
2713 const RISCVSubtarget &Subtarget) {
2714 assert(VT.isScalableVector() &&
2715 "Expected to convert into a scalable vector!");
2716 assert(V.getValueType().isFixedLengthVector() &&
2717 "Expected a fixed length vector operand!");
2718 SDLoc DL(V);
2719 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2720 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2721}
2722
2723// Shrink V so it's just big enough to maintain a VT's worth of data.
2725 const RISCVSubtarget &Subtarget) {
2727 "Expected to convert into a fixed length vector!");
2728 assert(V.getValueType().isScalableVector() &&
2729 "Expected a scalable vector operand!");
2730 SDLoc DL(V);
2731 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2732 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2733}
2734
2735/// Return the type of the mask type suitable for masking the provided
2736/// vector type. This is simply an i1 element type vector of the same
2737/// (possibly scalable) length.
2738static MVT getMaskTypeFor(MVT VecVT) {
2739 assert(VecVT.isVector());
2741 return MVT::getVectorVT(MVT::i1, EC);
2742}
2743
2744/// Creates an all ones mask suitable for masking a vector of type VecTy with
2745/// vector length VL. .
2746static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2747 SelectionDAG &DAG) {
2748 MVT MaskVT = getMaskTypeFor(VecVT);
2749 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2750}
2751
2752static std::pair<SDValue, SDValue>
2754 const RISCVSubtarget &Subtarget) {
2755 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2756 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2757 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2758 return {Mask, VL};
2759}
2760
2761static std::pair<SDValue, SDValue>
2762getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2763 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2764 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2765 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2766 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2767 return {Mask, VL};
2768}
2769
2770// Gets the two common "VL" operands: an all-ones mask and the vector length.
2771// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2772// the vector type that the fixed-length vector is contained in. Otherwise if
2773// VecVT is scalable, then ContainerVT should be the same as VecVT.
2774static std::pair<SDValue, SDValue>
2775getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2776 const RISCVSubtarget &Subtarget) {
2777 if (VecVT.isFixedLengthVector())
2778 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2779 Subtarget);
2780 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2781 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2782}
2783
2785 SelectionDAG &DAG) const {
2786 assert(VecVT.isScalableVector() && "Expected scalable vector");
2787 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2788 VecVT.getVectorElementCount());
2789}
2790
2791std::pair<unsigned, unsigned>
2793 const RISCVSubtarget &Subtarget) {
2794 assert(VecVT.isScalableVector() && "Expected scalable vector");
2795
2796 unsigned EltSize = VecVT.getScalarSizeInBits();
2797 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2798
2799 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2800 unsigned MaxVLMAX =
2801 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2802
2803 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2804 unsigned MinVLMAX =
2805 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2806
2807 return std::make_pair(MinVLMAX, MaxVLMAX);
2808}
2809
2810// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2811// of either is (currently) supported. This can get us into an infinite loop
2812// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2813// as a ..., etc.
2814// Until either (or both) of these can reliably lower any node, reporting that
2815// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2816// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2817// which is not desirable.
2819 EVT VT, unsigned DefinedValues) const {
2820 return false;
2821}
2822
2824 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2825 // implementation-defined.
2826 if (!VT.isVector())
2828 unsigned DLenFactor = Subtarget.getDLenFactor();
2829 unsigned Cost;
2830 if (VT.isScalableVector()) {
2831 unsigned LMul;
2832 bool Fractional;
2833 std::tie(LMul, Fractional) =
2835 if (Fractional)
2836 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2837 else
2838 Cost = (LMul * DLenFactor);
2839 } else {
2840 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2841 }
2842 return Cost;
2843}
2844
2845
2846/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2847/// is generally quadratic in the number of vreg implied by LMUL. Note that
2848/// operand (index and possibly mask) are handled separately.
2850 return getLMULCost(VT) * getLMULCost(VT);
2851}
2852
2853/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2854/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2855/// or may track the vrgather.vv cost. It is implementation-dependent.
2857 return getLMULCost(VT);
2858}
2859
2860/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2861/// for the type VT. (This does not cover the vslide1up or vslide1down
2862/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2863/// or may track the vrgather.vv cost. It is implementation-dependent.
2865 return getLMULCost(VT);
2866}
2867
2868/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2869/// for the type VT. (This does not cover the vslide1up or vslide1down
2870/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2871/// or may track the vrgather.vv cost. It is implementation-dependent.
2873 return getLMULCost(VT);
2874}
2875
2877 const RISCVSubtarget &Subtarget) {
2878 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2879 // bf16 conversions are always promoted to f32.
2880 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2881 Op.getValueType() == MVT::bf16) {
2882 bool IsStrict = Op->isStrictFPOpcode();
2883
2884 SDLoc DL(Op);
2885 if (IsStrict) {
2886 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2887 {Op.getOperand(0), Op.getOperand(1)});
2888 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2889 {Op.getValueType(), MVT::Other},
2890 {Val.getValue(1), Val.getValue(0),
2891 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2892 }
2893 return DAG.getNode(
2894 ISD::FP_ROUND, DL, Op.getValueType(),
2895 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2896 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2897 }
2898
2899 // Other operations are legal.
2900 return Op;
2901}
2902
2904 const RISCVSubtarget &Subtarget) {
2905 // RISC-V FP-to-int conversions saturate to the destination register size, but
2906 // don't produce 0 for nan. We can use a conversion instruction and fix the
2907 // nan case with a compare and a select.
2908 SDValue Src = Op.getOperand(0);
2909
2910 MVT DstVT = Op.getSimpleValueType();
2911 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2912
2913 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2914
2915 if (!DstVT.isVector()) {
2916 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2917 // the result.
2918 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2919 Src.getValueType() == MVT::bf16) {
2920 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2921 }
2922
2923 unsigned Opc;
2924 if (SatVT == DstVT)
2925 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2926 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2928 else
2929 return SDValue();
2930 // FIXME: Support other SatVTs by clamping before or after the conversion.
2931
2932 SDLoc DL(Op);
2933 SDValue FpToInt = DAG.getNode(
2934 Opc, DL, DstVT, Src,
2936
2937 if (Opc == RISCVISD::FCVT_WU_RV64)
2938 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2939
2940 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2941 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2943 }
2944
2945 // Vectors.
2946
2947 MVT DstEltVT = DstVT.getVectorElementType();
2948 MVT SrcVT = Src.getSimpleValueType();
2949 MVT SrcEltVT = SrcVT.getVectorElementType();
2950 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2951 unsigned DstEltSize = DstEltVT.getSizeInBits();
2952
2953 // Only handle saturating to the destination type.
2954 if (SatVT != DstEltVT)
2955 return SDValue();
2956
2957 MVT DstContainerVT = DstVT;
2958 MVT SrcContainerVT = SrcVT;
2959 if (DstVT.isFixedLengthVector()) {
2960 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2961 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2962 assert(DstContainerVT.getVectorElementCount() ==
2963 SrcContainerVT.getVectorElementCount() &&
2964 "Expected same element count");
2965 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2966 }
2967
2968 SDLoc DL(Op);
2969
2970 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2971
2972 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2973 {Src, Src, DAG.getCondCode(ISD::SETNE),
2974 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2975
2976 // Need to widen by more than 1 step, promote the FP type, then do a widening
2977 // convert.
2978 if (DstEltSize > (2 * SrcEltSize)) {
2979 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2980 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2981 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2982 }
2983
2984 MVT CvtContainerVT = DstContainerVT;
2985 MVT CvtEltVT = DstEltVT;
2986 if (SrcEltSize > (2 * DstEltSize)) {
2987 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2988 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2989 }
2990
2991 unsigned RVVOpc =
2993 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2994
2995 while (CvtContainerVT != DstContainerVT) {
2996 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2997 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2998 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2999 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3001 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3002 }
3003
3004 SDValue SplatZero = DAG.getNode(
3005 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3006 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3007 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3008 Res, DAG.getUNDEF(DstContainerVT), VL);
3009
3010 if (DstVT.isFixedLengthVector())
3011 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3012
3013 return Res;
3014}
3015
3017 const RISCVSubtarget &Subtarget) {
3018 bool IsStrict = Op->isStrictFPOpcode();
3019 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3020
3021 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3022 // bf16 conversions are always promoted to f32.
3023 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3024 SrcVal.getValueType() == MVT::bf16) {
3025 SDLoc DL(Op);
3026 if (IsStrict) {
3027 SDValue Ext =
3028 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3029 {Op.getOperand(0), SrcVal});
3030 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3031 {Ext.getValue(1), Ext.getValue(0)});
3032 }
3033 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3034 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3035 }
3036
3037 // Other operations are legal.
3038 return Op;
3039}
3040
3042 switch (Opc) {
3043 case ISD::FROUNDEVEN:
3045 case ISD::VP_FROUNDEVEN:
3046 return RISCVFPRndMode::RNE;
3047 case ISD::FTRUNC:
3048 case ISD::STRICT_FTRUNC:
3049 case ISD::VP_FROUNDTOZERO:
3050 return RISCVFPRndMode::RTZ;
3051 case ISD::FFLOOR:
3052 case ISD::STRICT_FFLOOR:
3053 case ISD::VP_FFLOOR:
3054 return RISCVFPRndMode::RDN;
3055 case ISD::FCEIL:
3056 case ISD::STRICT_FCEIL:
3057 case ISD::VP_FCEIL:
3058 return RISCVFPRndMode::RUP;
3059 case ISD::FROUND:
3060 case ISD::STRICT_FROUND:
3061 case ISD::VP_FROUND:
3062 return RISCVFPRndMode::RMM;
3063 case ISD::FRINT:
3064 case ISD::VP_FRINT:
3065 return RISCVFPRndMode::DYN;
3066 }
3067
3069}
3070
3071// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3072// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3073// the integer domain and back. Taking care to avoid converting values that are
3074// nan or already correct.
3075static SDValue
3077 const RISCVSubtarget &Subtarget) {
3078 MVT VT = Op.getSimpleValueType();
3079 assert(VT.isVector() && "Unexpected type");
3080
3081 SDLoc DL(Op);
3082
3083 SDValue Src = Op.getOperand(0);
3084
3085 MVT ContainerVT = VT;
3086 if (VT.isFixedLengthVector()) {
3087 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3088 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3089 }
3090
3091 SDValue Mask, VL;
3092 if (Op->isVPOpcode()) {
3093 Mask = Op.getOperand(1);
3094 if (VT.isFixedLengthVector())
3095 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3096 Subtarget);
3097 VL = Op.getOperand(2);
3098 } else {
3099 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3100 }
3101
3102 // Freeze the source since we are increasing the number of uses.
3103 Src = DAG.getFreeze(Src);
3104
3105 // We do the conversion on the absolute value and fix the sign at the end.
3106 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3107
3108 // Determine the largest integer that can be represented exactly. This and
3109 // values larger than it don't have any fractional bits so don't need to
3110 // be converted.
3111 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3112 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3113 APFloat MaxVal = APFloat(FltSem);
3114 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3115 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3116 SDValue MaxValNode =
3117 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3118 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3119 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3120
3121 // If abs(Src) was larger than MaxVal or nan, keep it.
3122 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3123 Mask =
3124 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3125 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3126 Mask, Mask, VL});
3127
3128 // Truncate to integer and convert back to FP.
3129 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3130 MVT XLenVT = Subtarget.getXLenVT();
3131 SDValue Truncated;
3132
3133 switch (Op.getOpcode()) {
3134 default:
3135 llvm_unreachable("Unexpected opcode");
3136 case ISD::FRINT:
3137 case ISD::VP_FRINT:
3138 case ISD::FCEIL:
3139 case ISD::VP_FCEIL:
3140 case ISD::FFLOOR:
3141 case ISD::VP_FFLOOR:
3142 case ISD::FROUND:
3143 case ISD::FROUNDEVEN:
3144 case ISD::VP_FROUND:
3145 case ISD::VP_FROUNDEVEN:
3146 case ISD::VP_FROUNDTOZERO: {
3149 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3150 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3151 break;
3152 }
3153 case ISD::FTRUNC:
3154 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3155 Mask, VL);
3156 break;
3157 case ISD::FNEARBYINT:
3158 case ISD::VP_FNEARBYINT:
3159 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3160 Mask, VL);
3161 break;
3162 }
3163
3164 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3165 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3166 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3167 Mask, VL);
3168
3169 // Restore the original sign so that -0.0 is preserved.
3170 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3171 Src, Src, Mask, VL);
3172
3173 if (!VT.isFixedLengthVector())
3174 return Truncated;
3175
3176 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3177}
3178
3179// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3180// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3181// qNan and coverting the new source to integer and back to FP.
3182static SDValue
3184 const RISCVSubtarget &Subtarget) {
3185 SDLoc DL(Op);
3186 MVT VT = Op.getSimpleValueType();
3187 SDValue Chain = Op.getOperand(0);
3188 SDValue Src = Op.getOperand(1);
3189
3190 MVT ContainerVT = VT;
3191 if (VT.isFixedLengthVector()) {
3192 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3193 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3194 }
3195
3196 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3197
3198 // Freeze the source since we are increasing the number of uses.
3199 Src = DAG.getFreeze(Src);
3200
3201 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3202 MVT MaskVT = Mask.getSimpleValueType();
3204 DAG.getVTList(MaskVT, MVT::Other),
3205 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3206 DAG.getUNDEF(MaskVT), Mask, VL});
3207 Chain = Unorder.getValue(1);
3209 DAG.getVTList(ContainerVT, MVT::Other),
3210 {Chain, Src, Src, Src, Unorder, VL});
3211 Chain = Src.getValue(1);
3212
3213 // We do the conversion on the absolute value and fix the sign at the end.
3214 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3215
3216 // Determine the largest integer that can be represented exactly. This and
3217 // values larger than it don't have any fractional bits so don't need to
3218 // be converted.
3219 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3220 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3221 APFloat MaxVal = APFloat(FltSem);
3222 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3223 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3224 SDValue MaxValNode =
3225 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3226 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3227 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3228
3229 // If abs(Src) was larger than MaxVal or nan, keep it.
3230 Mask = DAG.getNode(
3231 RISCVISD::SETCC_VL, DL, MaskVT,
3232 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3233
3234 // Truncate to integer and convert back to FP.
3235 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3236 MVT XLenVT = Subtarget.getXLenVT();
3237 SDValue Truncated;
3238
3239 switch (Op.getOpcode()) {
3240 default:
3241 llvm_unreachable("Unexpected opcode");
3242 case ISD::STRICT_FCEIL:
3243 case ISD::STRICT_FFLOOR:
3244 case ISD::STRICT_FROUND:
3248 Truncated = DAG.getNode(
3249 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3250 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3251 break;
3252 }
3253 case ISD::STRICT_FTRUNC:
3254 Truncated =
3256 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3257 break;
3260 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3261 Mask, VL);
3262 break;
3263 }
3264 Chain = Truncated.getValue(1);
3265
3266 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3267 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3268 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3269 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3270 Truncated, Mask, VL);
3271 Chain = Truncated.getValue(1);
3272 }
3273
3274 // Restore the original sign so that -0.0 is preserved.
3275 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3276 Src, Src, Mask, VL);
3277
3278 if (VT.isFixedLengthVector())
3279 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3280 return DAG.getMergeValues({Truncated, Chain}, DL);
3281}
3282
3283static SDValue
3285 const RISCVSubtarget &Subtarget) {
3286 MVT VT = Op.getSimpleValueType();
3287 if (VT.isVector())
3288 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3289
3290 if (DAG.shouldOptForSize())
3291 return SDValue();
3292
3293 SDLoc DL(Op);
3294 SDValue Src = Op.getOperand(0);
3295
3296 // Create an integer the size of the mantissa with the MSB set. This and all
3297 // values larger than it don't have any fractional bits so don't need to be
3298 // converted.
3299 const fltSemantics &FltSem = VT.getFltSemantics();
3300 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3301 APFloat MaxVal = APFloat(FltSem);
3302 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3303 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3304 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3305
3307 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3308 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3309}
3310
3311// Expand vector LRINT and LLRINT by converting to the integer domain.
3313 const RISCVSubtarget &Subtarget) {
3314 MVT VT = Op.getSimpleValueType();
3315 assert(VT.isVector() && "Unexpected type");
3316
3317 SDLoc DL(Op);
3318 SDValue Src = Op.getOperand(0);
3319 MVT ContainerVT = VT;
3320
3321 if (VT.isFixedLengthVector()) {
3322 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3323 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3324 }
3325
3326 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3327 SDValue Truncated = DAG.getNode(
3328 RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3330 VL);
3331
3332 if (!VT.isFixedLengthVector())
3333 return Truncated;
3334
3335 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3336}
3337
3338static SDValue
3340 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3341 SDValue Offset, SDValue Mask, SDValue VL,
3343 if (Passthru.isUndef())
3345 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3346 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3347 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3348}
3349
3350static SDValue
3351getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3352 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3353 SDValue VL,
3355 if (Passthru.isUndef())
3357 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3358 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3359 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3360}
3361
3362static MVT getLMUL1VT(MVT VT) {
3364 "Unexpected vector MVT");
3368}
3369
3373 int64_t Addend;
3374};
3375
3376static std::optional<APInt> getExactInteger(const APFloat &APF,
3378 // We will use a SINT_TO_FP to materialize this constant so we should use a
3379 // signed APSInt here.
3380 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3381 // We use an arbitrary rounding mode here. If a floating-point is an exact
3382 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3383 // the rounding mode changes the output value, then it is not an exact
3384 // integer.
3386 bool IsExact;
3387 // If it is out of signed integer range, it will return an invalid operation.
3388 // If it is not an exact integer, IsExact is false.
3389 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3391 !IsExact)
3392 return std::nullopt;
3393 return ValInt.extractBits(BitWidth, 0);
3394}
3395
3396// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3397// to the (non-zero) step S and start value X. This can be then lowered as the
3398// RVV sequence (VID * S) + X, for example.
3399// The step S is represented as an integer numerator divided by a positive
3400// denominator. Note that the implementation currently only identifies
3401// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3402// cannot detect 2/3, for example.
3403// Note that this method will also match potentially unappealing index
3404// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3405// determine whether this is worth generating code for.
3406//
3407// EltSizeInBits is the size of the type that the sequence will be calculated
3408// in, i.e. SEW for build_vectors or XLEN for address calculations.
3409static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3410 unsigned EltSizeInBits) {
3411 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3412 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3413 return std::nullopt;
3414 bool IsInteger = Op.getValueType().isInteger();
3415
3416 std::optional<unsigned> SeqStepDenom;
3417 std::optional<APInt> SeqStepNum;
3418 std::optional<APInt> SeqAddend;
3419 std::optional<std::pair<APInt, unsigned>> PrevElt;
3420 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3421
3422 // First extract the ops into a list of constant integer values. This may not
3423 // be possible for floats if they're not all representable as integers.
3425 const unsigned OpSize = Op.getScalarValueSizeInBits();
3426 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3427 if (Elt.isUndef()) {
3428 Elts[Idx] = std::nullopt;
3429 continue;
3430 }
3431 if (IsInteger) {
3432 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3433 } else {
3434 auto ExactInteger =
3435 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3436 if (!ExactInteger)
3437 return std::nullopt;
3438 Elts[Idx] = *ExactInteger;
3439 }
3440 }
3441
3442 for (auto [Idx, Elt] : enumerate(Elts)) {
3443 // Assume undef elements match the sequence; we just have to be careful
3444 // when interpolating across them.
3445 if (!Elt)
3446 continue;
3447
3448 if (PrevElt) {
3449 // Calculate the step since the last non-undef element, and ensure
3450 // it's consistent across the entire sequence.
3451 unsigned IdxDiff = Idx - PrevElt->second;
3452 APInt ValDiff = *Elt - PrevElt->first;
3453
3454 // A zero-value value difference means that we're somewhere in the middle
3455 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3456 // step change before evaluating the sequence.
3457 if (ValDiff == 0)
3458 continue;
3459
3460 int64_t Remainder = ValDiff.srem(IdxDiff);
3461 // Normalize the step if it's greater than 1.
3462 if (Remainder != ValDiff.getSExtValue()) {
3463 // The difference must cleanly divide the element span.
3464 if (Remainder != 0)
3465 return std::nullopt;
3466 ValDiff = ValDiff.sdiv(IdxDiff);
3467 IdxDiff = 1;
3468 }
3469
3470 if (!SeqStepNum)
3471 SeqStepNum = ValDiff;
3472 else if (ValDiff != SeqStepNum)
3473 return std::nullopt;
3474
3475 if (!SeqStepDenom)
3476 SeqStepDenom = IdxDiff;
3477 else if (IdxDiff != *SeqStepDenom)
3478 return std::nullopt;
3479 }
3480
3481 // Record this non-undef element for later.
3482 if (!PrevElt || PrevElt->first != *Elt)
3483 PrevElt = std::make_pair(*Elt, Idx);
3484 }
3485
3486 // We need to have logged a step for this to count as a legal index sequence.
3487 if (!SeqStepNum || !SeqStepDenom)
3488 return std::nullopt;
3489
3490 // Loop back through the sequence and validate elements we might have skipped
3491 // while waiting for a valid step. While doing this, log any sequence addend.
3492 for (auto [Idx, Elt] : enumerate(Elts)) {
3493 if (!Elt)
3494 continue;
3495 APInt ExpectedVal =
3496 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3497 *SeqStepNum)
3498 .sdiv(*SeqStepDenom);
3499
3500 APInt Addend = *Elt - ExpectedVal;
3501 if (!SeqAddend)
3502 SeqAddend = Addend;
3503 else if (Addend != SeqAddend)
3504 return std::nullopt;
3505 }
3506
3507 assert(SeqAddend && "Must have an addend if we have a step");
3508
3509 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3510 SeqAddend->getSExtValue()};
3511}
3512
3513// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3514// and lower it as a VRGATHER_VX_VL from the source vector.
3515static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3516 SelectionDAG &DAG,
3517 const RISCVSubtarget &Subtarget) {
3518 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3519 return SDValue();
3520 SDValue Src = SplatVal.getOperand(0);
3521 // Don't perform this optimization for i1 vectors, or if the element types are
3522 // different
3523 // FIXME: Support i1 vectors, maybe by promoting to i8?
3524 MVT EltTy = VT.getVectorElementType();
3525 MVT SrcVT = Src.getSimpleValueType();
3526 if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType())
3527 return SDValue();
3528 SDValue Idx = SplatVal.getOperand(1);
3529 // The index must be a legal type.
3530 if (Idx.getValueType() != Subtarget.getXLenVT())
3531 return SDValue();
3532
3533 // Check that we know Idx lies within VT
3534 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3535 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3536 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3537 return SDValue();
3538 }
3539
3540 // Convert fixed length vectors to scalable
3541 MVT ContainerVT = VT;
3542 if (VT.isFixedLengthVector())
3543 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3544
3545 MVT SrcContainerVT = SrcVT;
3546 if (SrcVT.isFixedLengthVector()) {
3547 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3548 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3549 }
3550
3551 // Put Vec in a VT sized vector
3552 if (SrcContainerVT.getVectorMinNumElements() <
3553 ContainerVT.getVectorMinNumElements())
3554 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3555 DAG.getUNDEF(ContainerVT), Src,
3556 DAG.getVectorIdxConstant(0, DL));
3557 else
3558 Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Src,
3559 DAG.getVectorIdxConstant(0, DL));
3560
3561 // We checked that Idx fits inside VT earlier
3562 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3563 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3564 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3565 if (VT.isFixedLengthVector())
3566 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3567 return Gather;
3568}
3569
3570/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3571/// which constitute a large proportion of the elements. In such cases we can
3572/// splat a vector with the dominant element and make up the shortfall with
3573/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3574/// Note that this includes vectors of 2 elements by association. The
3575/// upper-most element is the "dominant" one, allowing us to use a splat to
3576/// "insert" the upper element, and an insert of the lower element at position
3577/// 0, which improves codegen.
3579 const RISCVSubtarget &Subtarget) {
3580 MVT VT = Op.getSimpleValueType();
3581 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3582
3583 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3584
3585 SDLoc DL(Op);
3586 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3587
3588 MVT XLenVT = Subtarget.getXLenVT();
3589 unsigned NumElts = Op.getNumOperands();
3590
3591 SDValue DominantValue;
3592 unsigned MostCommonCount = 0;
3593 DenseMap<SDValue, unsigned> ValueCounts;
3594 unsigned NumUndefElts =
3595 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3596
3597 // Track the number of scalar loads we know we'd be inserting, estimated as
3598 // any non-zero floating-point constant. Other kinds of element are either
3599 // already in registers or are materialized on demand. The threshold at which
3600 // a vector load is more desirable than several scalar materializion and
3601 // vector-insertion instructions is not known.
3602 unsigned NumScalarLoads = 0;
3603
3604 for (SDValue V : Op->op_values()) {
3605 if (V.isUndef())
3606 continue;
3607
3608 unsigned &Count = ValueCounts[V];
3609 if (0 == Count)
3610 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3611 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3612
3613 // Is this value dominant? In case of a tie, prefer the highest element as
3614 // it's cheaper to insert near the beginning of a vector than it is at the
3615 // end.
3616 if (++Count >= MostCommonCount) {
3617 DominantValue = V;
3618 MostCommonCount = Count;
3619 }
3620 }
3621
3622 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3623 unsigned NumDefElts = NumElts - NumUndefElts;
3624 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3625
3626 // Don't perform this optimization when optimizing for size, since
3627 // materializing elements and inserting them tends to cause code bloat.
3628 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3629 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3630 ((MostCommonCount > DominantValueCountThreshold) ||
3631 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3632 // Start by splatting the most common element.
3633 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3634
3635 DenseSet<SDValue> Processed{DominantValue};
3636
3637 // We can handle an insert into the last element (of a splat) via
3638 // v(f)slide1down. This is slightly better than the vslideup insert
3639 // lowering as it avoids the need for a vector group temporary. It
3640 // is also better than using vmerge.vx as it avoids the need to
3641 // materialize the mask in a vector register.
3642 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3643 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3644 LastOp != DominantValue) {
3645 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3646 auto OpCode =
3648 if (!VT.isFloatingPoint())
3649 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3650 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3651 LastOp, Mask, VL);
3652 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3653 Processed.insert(LastOp);
3654 }
3655
3656 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3657 for (const auto &OpIdx : enumerate(Op->ops())) {
3658 const SDValue &V = OpIdx.value();
3659 if (V.isUndef() || !Processed.insert(V).second)
3660 continue;
3661 if (ValueCounts[V] == 1) {
3662 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3663 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3664 } else {
3665 // Blend in all instances of this value using a VSELECT, using a
3666 // mask where each bit signals whether that element is the one
3667 // we're after.
3669 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3670 return DAG.getConstant(V == V1, DL, XLenVT);
3671 });
3672 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3673 DAG.getBuildVector(SelMaskTy, DL, Ops),
3674 DAG.getSplatBuildVector(VT, DL, V), Vec);
3675 }
3676 }
3677
3678 return Vec;
3679 }
3680
3681 return SDValue();
3682}
3683
3685 const RISCVSubtarget &Subtarget) {
3686 MVT VT = Op.getSimpleValueType();
3687 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3688
3689 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3690
3691 SDLoc DL(Op);
3692 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3693
3694 MVT XLenVT = Subtarget.getXLenVT();
3695 unsigned NumElts = Op.getNumOperands();
3696
3697 if (VT.getVectorElementType() == MVT::i1) {
3698 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3699 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3700 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3701 }
3702
3703 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3704 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3705 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3706 }
3707
3708 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3709 // scalar integer chunks whose bit-width depends on the number of mask
3710 // bits and XLEN.
3711 // First, determine the most appropriate scalar integer type to use. This
3712 // is at most XLenVT, but may be shrunk to a smaller vector element type
3713 // according to the size of the final vector - use i8 chunks rather than
3714 // XLenVT if we're producing a v8i1. This results in more consistent
3715 // codegen across RV32 and RV64.
3716 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3717 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3718 // If we have to use more than one INSERT_VECTOR_ELT then this
3719 // optimization is likely to increase code size; avoid peforming it in
3720 // such a case. We can use a load from a constant pool in this case.
3721 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3722 return SDValue();
3723 // Now we can create our integer vector type. Note that it may be larger
3724 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3725 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3726 MVT IntegerViaVecVT =
3727 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3728 IntegerViaVecElts);
3729
3730 uint64_t Bits = 0;
3731 unsigned BitPos = 0, IntegerEltIdx = 0;
3732 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3733
3734 for (unsigned I = 0; I < NumElts;) {
3735 SDValue V = Op.getOperand(I);
3736 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3737 Bits |= ((uint64_t)BitValue << BitPos);
3738 ++BitPos;
3739 ++I;
3740
3741 // Once we accumulate enough bits to fill our scalar type or process the
3742 // last element, insert into our vector and clear our accumulated data.
3743 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3744 if (NumViaIntegerBits <= 32)
3745 Bits = SignExtend64<32>(Bits);
3746 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3747 Elts[IntegerEltIdx] = Elt;
3748 Bits = 0;
3749 BitPos = 0;
3750 IntegerEltIdx++;
3751 }
3752 }
3753
3754 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3755
3756 if (NumElts < NumViaIntegerBits) {
3757 // If we're producing a smaller vector than our minimum legal integer
3758 // type, bitcast to the equivalent (known-legal) mask type, and extract
3759 // our final mask.
3760 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3761 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3762 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3763 DAG.getConstant(0, DL, XLenVT));
3764 } else {
3765 // Else we must have produced an integer type with the same size as the
3766 // mask type; bitcast for the final result.
3767 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3768 Vec = DAG.getBitcast(VT, Vec);
3769 }
3770
3771 return Vec;
3772 }
3773
3774 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3775 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3777 if (!VT.isFloatingPoint())
3778 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3779 Splat =
3780 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3781 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3782 }
3783
3784 // Try and match index sequences, which we can lower to the vid instruction
3785 // with optional modifications. An all-undef vector is matched by
3786 // getSplatValue, above.
3787 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3788 int64_t StepNumerator = SimpleVID->StepNumerator;
3789 unsigned StepDenominator = SimpleVID->StepDenominator;
3790 int64_t Addend = SimpleVID->Addend;
3791
3792 assert(StepNumerator != 0 && "Invalid step");
3793 bool Negate = false;
3794 int64_t SplatStepVal = StepNumerator;
3795 unsigned StepOpcode = ISD::MUL;
3796 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3797 // anyway as the shift of 63 won't fit in uimm5.
3798 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3799 isPowerOf2_64(std::abs(StepNumerator))) {
3800 Negate = StepNumerator < 0;
3801 StepOpcode = ISD::SHL;
3802 SplatStepVal = Log2_64(std::abs(StepNumerator));
3803 }
3804
3805 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3806 // threshold since it's the immediate value many RVV instructions accept.
3807 // There is no vmul.vi instruction so ensure multiply constant can fit in
3808 // a single addi instruction.
3809 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3810 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3811 isPowerOf2_32(StepDenominator) &&
3812 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3813 MVT VIDVT =
3815 MVT VIDContainerVT =
3816 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3817 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3818 // Convert right out of the scalable type so we can use standard ISD
3819 // nodes for the rest of the computation. If we used scalable types with
3820 // these, we'd lose the fixed-length vector info and generate worse
3821 // vsetvli code.
3822 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3823 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3824 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3825 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3826 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3827 }
3828 if (StepDenominator != 1) {
3829 SDValue SplatStep =
3830 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3831 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3832 }
3833 if (Addend != 0 || Negate) {
3834 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3835 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3836 VID);
3837 }
3838 if (VT.isFloatingPoint()) {
3839 // TODO: Use vfwcvt to reduce register pressure.
3840 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3841 }
3842 return VID;
3843 }
3844 }
3845
3846 // For very small build_vectors, use a single scalar insert of a constant.
3847 // TODO: Base this on constant rematerialization cost, not size.
3848 const unsigned EltBitSize = VT.getScalarSizeInBits();
3849 if (VT.getSizeInBits() <= 32 &&
3851 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3852 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3853 "Unexpected sequence type");
3854 // If we can use the original VL with the modified element type, this
3855 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3856 // be moved into InsertVSETVLI?
3857 unsigned ViaVecLen =
3858 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3859 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3860
3861 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3862 uint64_t SplatValue = 0;
3863 // Construct the amalgamated value at this larger vector type.
3864 for (const auto &OpIdx : enumerate(Op->op_values())) {
3865 const auto &SeqV = OpIdx.value();
3866 if (!SeqV.isUndef())
3867 SplatValue |=
3868 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3869 }
3870
3871 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3872 // achieve better constant materializion.
3873 // On RV32, we need to sign-extend to use getSignedConstant.
3874 if (ViaIntVT == MVT::i32)
3875 SplatValue = SignExtend64<32>(SplatValue);
3876
3877 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3878 DAG.getUNDEF(ViaVecVT),
3879 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3880 DAG.getVectorIdxConstant(0, DL));
3881 if (ViaVecLen != 1)
3883 MVT::getVectorVT(ViaIntVT, 1), Vec,
3884 DAG.getConstant(0, DL, XLenVT));
3885 return DAG.getBitcast(VT, Vec);
3886 }
3887
3888
3889 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3890 // when re-interpreted as a vector with a larger element type. For example,
3891 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3892 // could be instead splat as
3893 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3894 // TODO: This optimization could also work on non-constant splats, but it
3895 // would require bit-manipulation instructions to construct the splat value.
3896 SmallVector<SDValue> Sequence;
3897 const auto *BV = cast<BuildVectorSDNode>(Op);
3898 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3900 BV->getRepeatedSequence(Sequence) &&
3901 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3902 unsigned SeqLen = Sequence.size();
3903 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3904 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3905 ViaIntVT == MVT::i64) &&
3906 "Unexpected sequence type");
3907
3908 // If we can use the original VL with the modified element type, this
3909 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3910 // be moved into InsertVSETVLI?
3911 const unsigned RequiredVL = NumElts / SeqLen;
3912 const unsigned ViaVecLen =
3913 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3914 NumElts : RequiredVL;
3915 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3916
3917 unsigned EltIdx = 0;
3918 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3919 uint64_t SplatValue = 0;
3920 // Construct the amalgamated value which can be splatted as this larger
3921 // vector type.
3922 for (const auto &SeqV : Sequence) {
3923 if (!SeqV.isUndef())
3924 SplatValue |=
3925 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3926 EltIdx++;
3927 }
3928
3929 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3930 // achieve better constant materializion.
3931 // On RV32, we need to sign-extend to use getSignedConstant.
3932 if (ViaIntVT == MVT::i32)
3933 SplatValue = SignExtend64<32>(SplatValue);
3934
3935 // Since we can't introduce illegal i64 types at this stage, we can only
3936 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3937 // way we can use RVV instructions to splat.
3938 assert((ViaIntVT.bitsLE(XLenVT) ||
3939 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3940 "Unexpected bitcast sequence");
3941 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3942 SDValue ViaVL =
3943 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3944 MVT ViaContainerVT =
3945 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3946 SDValue Splat =
3947 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3948 DAG.getUNDEF(ViaContainerVT),
3949 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3950 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3951 if (ViaVecLen != RequiredVL)
3953 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3954 DAG.getConstant(0, DL, XLenVT));
3955 return DAG.getBitcast(VT, Splat);
3956 }
3957 }
3958
3959 // If the number of signbits allows, see if we can lower as a <N x i8>.
3960 // Our main goal here is to reduce LMUL (and thus work) required to
3961 // build the constant, but we will also narrow if the resulting
3962 // narrow vector is known to materialize cheaply.
3963 // TODO: We really should be costing the smaller vector. There are
3964 // profitable cases this misses.
3965 if (EltBitSize > 8 && VT.isInteger() &&
3966 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3967 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3968 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3969 DL, Op->ops());
3970 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3971 Source, DAG, Subtarget);
3972 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3973 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3974 }
3975
3976 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3977 return Res;
3978
3979 // For constant vectors, use generic constant pool lowering. Otherwise,
3980 // we'd have to materialize constants in GPRs just to move them into the
3981 // vector.
3982 return SDValue();
3983}
3984
3985static unsigned getPACKOpcode(unsigned DestBW,
3986 const RISCVSubtarget &Subtarget) {
3987 switch (DestBW) {
3988 default:
3989 llvm_unreachable("Unsupported pack size");
3990 case 16:
3991 return RISCV::PACKH;
3992 case 32:
3993 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3994 case 64:
3995 assert(Subtarget.is64Bit());
3996 return RISCV::PACK;
3997 }
3998}
3999
4000/// Double the element size of the build vector to reduce the number
4001/// of vslide1down in the build vector chain. In the worst case, this
4002/// trades three scalar operations for 1 vector operation. Scalar
4003/// operations are generally lower latency, and for out-of-order cores
4004/// we also benefit from additional parallelism.
4006 const RISCVSubtarget &Subtarget) {
4007 SDLoc DL(Op);
4008 MVT VT = Op.getSimpleValueType();
4009 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4010 MVT ElemVT = VT.getVectorElementType();
4011 if (!ElemVT.isInteger())
4012 return SDValue();
4013
4014 // TODO: Relax these architectural restrictions, possibly with costing
4015 // of the actual instructions required.
4016 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4017 return SDValue();
4018
4019 unsigned NumElts = VT.getVectorNumElements();
4020 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4021 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4022 NumElts % 2 != 0)
4023 return SDValue();
4024
4025 // Produce [B,A] packed into a type twice as wide. Note that all
4026 // scalars are XLenVT, possibly masked (see below).
4027 MVT XLenVT = Subtarget.getXLenVT();
4028 SDValue Mask = DAG.getConstant(
4029 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4030 auto pack = [&](SDValue A, SDValue B) {
4031 // Bias the scheduling of the inserted operations to near the
4032 // definition of the element - this tends to reduce register
4033 // pressure overall.
4034 SDLoc ElemDL(B);
4035 if (Subtarget.hasStdExtZbkb())
4036 // Note that we're relying on the high bits of the result being
4037 // don't care. For PACKW, the result is *sign* extended.
4038 return SDValue(
4039 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4040 ElemDL, XLenVT, A, B),
4041 0);
4042
4043 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4044 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4045 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4046 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4047 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4049 };
4050
4051 SmallVector<SDValue> NewOperands;
4052 NewOperands.reserve(NumElts / 2);
4053 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4054 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4055 assert(NumElts == NewOperands.size() * 2);
4056 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4057 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4058 return DAG.getNode(ISD::BITCAST, DL, VT,
4059 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4060}
4061
4063 const RISCVSubtarget &Subtarget) {
4064 MVT VT = Op.getSimpleValueType();
4065 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4066
4067 MVT EltVT = VT.getVectorElementType();
4068 MVT XLenVT = Subtarget.getXLenVT();
4069
4070 SDLoc DL(Op);
4071
4072 // Proper support for f16 requires Zvfh. bf16 always requires special
4073 // handling. We need to cast the scalar to integer and create an integer
4074 // build_vector.
4075 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4076 MVT IVT = VT.changeVectorElementType(MVT::i16);
4078 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4079 SDValue Elem = Op.getOperand(I);
4080 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4081 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4082 // Called by LegalizeDAG, we need to use XLenVT operations since we
4083 // can't create illegal types.
4084 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4085 // Manually constant fold so the integer build_vector can be lowered
4086 // better. Waiting for DAGCombine will be too late.
4087 APInt V =
4088 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4089 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4090 } else {
4091 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4092 }
4093 } else {
4094 // Called by scalar type legalizer, we can use i16.
4095 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4096 }
4097 }
4098 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4099 return DAG.getBitcast(VT, Res);
4100 }
4101
4102 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4104 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4105
4106 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4107
4108 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4109
4110 if (VT.getVectorElementType() == MVT::i1) {
4111 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4112 // vector type, we have a legal equivalently-sized i8 type, so we can use
4113 // that.
4114 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4115 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4116
4117 SDValue WideVec;
4118 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4119 // For a splat, perform a scalar truncate before creating the wider
4120 // vector.
4121 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4122 DAG.getConstant(1, DL, Splat.getValueType()));
4123 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4124 } else {
4125 SmallVector<SDValue, 8> Ops(Op->op_values());
4126 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4127 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4128 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4129 }
4130
4131 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4132 }
4133
4134 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4135 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4136 return Gather;
4137 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4139 if (!VT.isFloatingPoint())
4140 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4141 Splat =
4142 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4143 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4144 }
4145
4146 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4147 return Res;
4148
4149 // If we're compiling for an exact VLEN value, we can split our work per
4150 // register in the register group.
4151 if (const auto VLen = Subtarget.getRealVLen();
4152 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4153 MVT ElemVT = VT.getVectorElementType();
4154 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4155 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4156 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4157 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4158 assert(M1VT == getLMUL1VT(M1VT));
4159
4160 // The following semantically builds up a fixed length concat_vector
4161 // of the component build_vectors. We eagerly lower to scalable and
4162 // insert_subvector here to avoid DAG combining it back to a large
4163 // build_vector.
4164 SmallVector<SDValue> BuildVectorOps(Op->ops());
4165 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4166 SDValue Vec = DAG.getUNDEF(ContainerVT);
4167 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4168 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4169 SDValue SubBV =
4170 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4171 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4172 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4173 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4174 DAG.getVectorIdxConstant(InsertIdx, DL));
4175 }
4176 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4177 }
4178
4179 // If we're about to resort to vslide1down (or stack usage), pack our
4180 // elements into the widest scalar type we can. This will force a VL/VTYPE
4181 // toggle, but reduces the critical path, the number of vslide1down ops
4182 // required, and possibly enables scalar folds of the values.
4183 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4184 return Res;
4185
4186 // For m1 vectors, if we have non-undef values in both halves of our vector,
4187 // split the vector into low and high halves, build them separately, then
4188 // use a vselect to combine them. For long vectors, this cuts the critical
4189 // path of the vslide1down sequence in half, and gives us an opportunity
4190 // to special case each half independently. Note that we don't change the
4191 // length of the sub-vectors here, so if both fallback to the generic
4192 // vslide1down path, we should be able to fold the vselect into the final
4193 // vslidedown (for the undef tail) for the first half w/ masking.
4194 unsigned NumElts = VT.getVectorNumElements();
4195 unsigned NumUndefElts =
4196 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4197 unsigned NumDefElts = NumElts - NumUndefElts;
4198 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4199 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4200 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4201 SmallVector<SDValue> MaskVals;
4202 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4203 SubVecAOps.reserve(NumElts);
4204 SubVecBOps.reserve(NumElts);
4205 for (unsigned i = 0; i < NumElts; i++) {
4206 SDValue Elem = Op->getOperand(i);
4207 if (i < NumElts / 2) {
4208 SubVecAOps.push_back(Elem);
4209 SubVecBOps.push_back(UndefElem);
4210 } else {
4211 SubVecAOps.push_back(UndefElem);
4212 SubVecBOps.push_back(Elem);
4213 }
4214 bool SelectMaskVal = (i < NumElts / 2);
4215 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4216 }
4217 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4218 MaskVals.size() == NumElts);
4219
4220 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4221 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4222 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4223 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4224 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4225 }
4226
4227 // Cap the cost at a value linear to the number of elements in the vector.
4228 // The default lowering is to use the stack. The vector store + scalar loads
4229 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4230 // being (at least) linear in LMUL. As a result, using the vslidedown
4231 // lowering for every element ends up being VL*LMUL..
4232 // TODO: Should we be directly costing the stack alternative? Doing so might
4233 // give us a more accurate upper bound.
4234 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4235
4236 // TODO: unify with TTI getSlideCost.
4237 InstructionCost PerSlideCost = 1;
4238 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4239 default: break;
4241 PerSlideCost = 2;
4242 break;
4244 PerSlideCost = 4;
4245 break;
4247 PerSlideCost = 8;
4248 break;
4249 }
4250
4251 // TODO: Should we be using the build instseq then cost + evaluate scheme
4252 // we use for integer constants here?
4253 unsigned UndefCount = 0;
4254 for (const SDValue &V : Op->ops()) {
4255 if (V.isUndef()) {
4256 UndefCount++;
4257 continue;
4258 }
4259 if (UndefCount) {
4260 LinearBudget -= PerSlideCost;
4261 UndefCount = 0;
4262 }
4263 LinearBudget -= PerSlideCost;
4264 }
4265 if (UndefCount) {
4266 LinearBudget -= PerSlideCost;
4267 }
4268
4269 if (LinearBudget < 0)
4270 return SDValue();
4271
4272 assert((!VT.isFloatingPoint() ||
4273 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4274 "Illegal type which will result in reserved encoding");
4275
4276 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4277
4278 SDValue Vec;
4279 UndefCount = 0;
4280 for (SDValue V : Op->ops()) {
4281 if (V.isUndef()) {
4282 UndefCount++;
4283 continue;
4284 }
4285
4286 // Start our sequence with a TA splat in the hopes that hardware is able to
4287 // recognize there's no dependency on the prior value of our temporary
4288 // register.
4289 if (!Vec) {
4290 Vec = DAG.getSplatVector(VT, DL, V);
4291 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4292 UndefCount = 0;
4293 continue;
4294 }
4295
4296 if (UndefCount) {
4297 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4298 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4299 Vec, Offset, Mask, VL, Policy);
4300 UndefCount = 0;
4301 }
4302 auto OpCode =
4304 if (!VT.isFloatingPoint())
4305 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4306 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4307 V, Mask, VL);
4308 }
4309 if (UndefCount) {
4310 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4311 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4312 Vec, Offset, Mask, VL, Policy);
4313 }
4314 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4315}
4316
4317static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4319 SelectionDAG &DAG) {
4320 if (!Passthru)
4321 Passthru = DAG.getUNDEF(VT);
4322 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4323 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4324 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4325 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4326 // node in order to try and match RVV vector/scalar instructions.
4327 if ((LoC >> 31) == HiC)
4328 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4329
4330 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4331 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4332 // vlmax vsetvli or vsetivli to change the VL.
4333 // FIXME: Support larger constants?
4334 // FIXME: Support non-constant VLs by saturating?
4335 if (LoC == HiC) {
4336 SDValue NewVL;
4337 if (isAllOnesConstant(VL) ||
4338 (isa<RegisterSDNode>(VL) &&
4339 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4340 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4341 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4342 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4343
4344 if (NewVL) {
4345 MVT InterVT =
4346 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4347 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4348 DAG.getUNDEF(InterVT), Lo, NewVL);
4349 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4350 }
4351 }
4352 }
4353
4354 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4355 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4356 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4357 Hi.getConstantOperandVal(1) == 31)
4358 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4359
4360 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4361 // even if it might be sign extended.
4362 if (Hi.isUndef())
4363 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4364
4365 // Fall back to a stack store and stride x0 vector load.
4366 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4367 Hi, VL);
4368}
4369
4370// Called by type legalization to handle splat of i64 on RV32.
4371// FIXME: We can optimize this when the type has sign or zero bits in one
4372// of the halves.
4373static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4374 SDValue Scalar, SDValue VL,
4375 SelectionDAG &DAG) {
4376 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4377 SDValue Lo, Hi;
4378 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4379 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4380}
4381
4382// This function lowers a splat of a scalar operand Splat with the vector
4383// length VL. It ensures the final sequence is type legal, which is useful when
4384// lowering a splat after type legalization.
4385static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4386 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4387 const RISCVSubtarget &Subtarget) {
4388 bool HasPassthru = Passthru && !Passthru.isUndef();
4389 if (!HasPassthru && !Passthru)
4390 Passthru = DAG.getUNDEF(VT);
4391
4392 MVT EltVT = VT.getVectorElementType();
4393 MVT XLenVT = Subtarget.getXLenVT();
4394
4395 if (VT.isFloatingPoint()) {
4396 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4397 EltVT == MVT::bf16) {
4398 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4399 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4400 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4401 else
4402 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4403 MVT IVT = VT.changeVectorElementType(MVT::i16);
4404 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4405 SDValue Splat =
4406 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4407 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4408 }
4409 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4410 }
4411
4412 // Simplest case is that the operand needs to be promoted to XLenVT.
4413 if (Scalar.getValueType().bitsLE(XLenVT)) {
4414 // If the operand is a constant, sign extend to increase our chances
4415 // of being able to use a .vi instruction. ANY_EXTEND would become a
4416 // a zero extend and the simm5 check in isel would fail.
4417 // FIXME: Should we ignore the upper bits in isel instead?
4418 unsigned ExtOpc =
4419 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4420 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4421 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4422 }
4423
4424 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4425 "Unexpected scalar for splat lowering!");
4426
4427 if (isOneConstant(VL) && isNullConstant(Scalar))
4428 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4429 DAG.getConstant(0, DL, XLenVT), VL);
4430
4431 // Otherwise use the more complicated splatting algorithm.
4432 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4433}
4434
4435// This function lowers an insert of a scalar operand Scalar into lane
4436// 0 of the vector regardless of the value of VL. The contents of the
4437// remaining lanes of the result vector are unspecified. VL is assumed
4438// to be non-zero.
4440 const SDLoc &DL, SelectionDAG &DAG,
4441 const RISCVSubtarget &Subtarget) {
4442 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4443
4444 const MVT XLenVT = Subtarget.getXLenVT();
4445 SDValue Passthru = DAG.getUNDEF(VT);
4446
4447 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4448 isNullConstant(Scalar.getOperand(1))) {
4449 SDValue ExtractedVal = Scalar.getOperand(0);
4450 // The element types must be the same.
4451 if (ExtractedVal.getValueType().getVectorElementType() ==
4452 VT.getVectorElementType()) {
4453 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4454 MVT ExtractedContainerVT = ExtractedVT;
4455 if (ExtractedContainerVT.isFixedLengthVector()) {
4456 ExtractedContainerVT = getContainerForFixedLengthVector(
4457 DAG, ExtractedContainerVT, Subtarget);
4458 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4459 ExtractedVal, DAG, Subtarget);
4460 }
4461 if (ExtractedContainerVT.bitsLE(VT))
4462 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4463 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4464 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4465 DAG.getVectorIdxConstant(0, DL));
4466 }
4467 }
4468
4469
4470 if (VT.isFloatingPoint())
4471 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4472 DAG.getUNDEF(VT), Scalar, VL);
4473
4474 // Avoid the tricky legalization cases by falling back to using the
4475 // splat code which already handles it gracefully.
4476 if (!Scalar.getValueType().bitsLE(XLenVT))
4477 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4478 DAG.getConstant(1, DL, XLenVT),
4479 VT, DL, DAG, Subtarget);
4480
4481 // If the operand is a constant, sign extend to increase our chances
4482 // of being able to use a .vi instruction. ANY_EXTEND would become a
4483 // a zero extend and the simm5 check in isel would fail.
4484 // FIXME: Should we ignore the upper bits in isel instead?
4485 unsigned ExtOpc =
4486 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4487 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4488 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4489 VL);
4490}
4491
4492// Can this shuffle be performed on exactly one (possibly larger) input?
4493static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4494 SDValue V2) {
4495
4496 if (V2.isUndef() &&
4498 return V1;
4499
4500 // Both input must be extracts.
4501 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4502 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4503 return SDValue();
4504
4505 // Extracting from the same source.
4506 SDValue Src = V1.getOperand(0);
4507 if (Src != V2.getOperand(0))
4508 return SDValue();
4509
4510 // Src needs to have twice the number of elements.
4511 unsigned NumElts = VT.getVectorNumElements();
4512 if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
4513 return SDValue();
4514
4515 // The extracts must extract the two halves of the source.
4516 if (V1.getConstantOperandVal(1) != 0 ||
4517 V2.getConstantOperandVal(1) != NumElts)
4518 return SDValue();
4519
4520 return Src;
4521}
4522
4523/// Is this shuffle interleaving contiguous elements from one vector into the
4524/// even elements and contiguous elements from another vector into the odd
4525/// elements. \p EvenSrc will contain the element that should be in the first
4526/// even element. \p OddSrc will contain the element that should be in the first
4527/// odd element. These can be the first element in a source or the element half
4528/// way through the source.
4529static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4530 int &OddSrc, const RISCVSubtarget &Subtarget) {
4531 // We need to be able to widen elements to the next larger integer type.
4532 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4533 return false;
4534
4535 int Size = Mask.size();
4536 int NumElts = VT.getVectorNumElements();
4537 assert(Size == (int)NumElts && "Unexpected mask size");
4538
4539 SmallVector<unsigned, 2> StartIndexes;
4540 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4541 return false;
4542
4543 EvenSrc = StartIndexes[0];
4544 OddSrc = StartIndexes[1];
4545
4546 // One source should be low half of first vector.
4547 if (EvenSrc != 0 && OddSrc != 0)
4548 return false;
4549
4550 // Subvectors will be subtracted from either at the start of the two input
4551 // vectors, or at the start and middle of the first vector if it's an unary
4552 // interleave.
4553 // In both cases, HalfNumElts will be extracted.
4554 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4555 // we'll create an illegal extract_subvector.
4556 // FIXME: We could support other values using a slidedown first.
4557 int HalfNumElts = NumElts / 2;
4558 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4559}
4560
4561/// Match shuffles that concatenate two vectors, rotate the concatenation,
4562/// and then extract the original number of elements from the rotated result.
4563/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4564/// returned rotation amount is for a rotate right, where elements move from
4565/// higher elements to lower elements. \p LoSrc indicates the first source
4566/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4567/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4568/// 0 or 1 if a rotation is found.
4569///
4570/// NOTE: We talk about rotate to the right which matches how bit shift and
4571/// rotate instructions are described where LSBs are on the right, but LLVM IR
4572/// and the table below write vectors with the lowest elements on the left.
4573static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4574 int Size = Mask.size();
4575
4576 // We need to detect various ways of spelling a rotation:
4577 // [11, 12, 13, 14, 15, 0, 1, 2]
4578 // [-1, 12, 13, 14, -1, -1, 1, -1]
4579 // [-1, -1, -1, -1, -1, -1, 1, 2]
4580 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4581 // [-1, 4, 5, 6, -1, -1, 9, -1]
4582 // [-1, 4, 5, 6, -1, -1, -1, -1]
4583 int Rotation = 0;
4584 LoSrc = -1;
4585 HiSrc = -1;
4586 for (int i = 0; i != Size; ++i) {
4587 int M = Mask[i];
4588 if (M < 0)
4589 continue;
4590
4591 // Determine where a rotate vector would have started.
4592 int StartIdx = i - (M % Size);
4593 // The identity rotation isn't interesting, stop.
4594 if (StartIdx == 0)
4595 return -1;
4596
4597 // If we found the tail of a vector the rotation must be the missing
4598 // front. If we found the head of a vector, it must be how much of the
4599 // head.
4600 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4601
4602 if (Rotation == 0)
4603 Rotation = CandidateRotation;
4604 else if (Rotation != CandidateRotation)
4605 // The rotations don't match, so we can't match this mask.
4606 return -1;
4607
4608 // Compute which value this mask is pointing at.
4609 int MaskSrc = M < Size ? 0 : 1;
4610
4611 // Compute which of the two target values this index should be assigned to.
4612 // This reflects whether the high elements are remaining or the low elemnts
4613 // are remaining.
4614 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4615
4616 // Either set up this value if we've not encountered it before, or check
4617 // that it remains consistent.
4618 if (TargetSrc < 0)
4619 TargetSrc = MaskSrc;
4620 else if (TargetSrc != MaskSrc)
4621 // This may be a rotation, but it pulls from the inputs in some
4622 // unsupported interleaving.
4623 return -1;
4624 }
4625
4626 // Check that we successfully analyzed the mask, and normalize the results.
4627 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4628 assert((LoSrc >= 0 || HiSrc >= 0) &&
4629 "Failed to find a rotated input vector!");
4630
4631 return Rotation;
4632}
4633
4634// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4635// 2, 4, 8 and the integer type Factor-times larger than VT's
4636// element type must be a legal element type.
4637// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4638// -> [p, q, r, s] (Factor=2, Index=1)
4640 SDValue Src, unsigned Factor,
4641 unsigned Index, SelectionDAG &DAG) {
4642 unsigned EltBits = VT.getScalarSizeInBits();
4643 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4644 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4645 SrcEC.divideCoefficientBy(Factor));
4646 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4647 SrcEC.divideCoefficientBy(Factor));
4648 Src = DAG.getBitcast(WideSrcVT, Src);
4649
4650 unsigned Shift = Index * EltBits;
4651 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4652 DAG.getConstant(Shift, DL, WideSrcVT));
4653 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4655 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4656 DAG.getVectorIdxConstant(0, DL));
4657 return DAG.getBitcast(VT, Res);
4658}
4659
4660// Lower the following shuffle to vslidedown.
4661// a)
4662// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4663// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4664// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4665// b)
4666// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4667// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4668// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4669// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4670// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4671// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4673 SDValue V1, SDValue V2,
4674 ArrayRef<int> Mask,
4675 const RISCVSubtarget &Subtarget,
4676 SelectionDAG &DAG) {
4677 auto findNonEXTRACT_SUBVECTORParent =
4678 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4679 uint64_t Offset = 0;
4680 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4681 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4682 // a scalable vector. But we don't want to match the case.
4683 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4684 Offset += Parent.getConstantOperandVal(1);
4685 Parent = Parent.getOperand(0);
4686 }
4687 return std::make_pair(Parent, Offset);
4688 };
4689
4690 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4691 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4692
4693 // Extracting from the same source.
4694 SDValue Src = V1Src;
4695 if (Src != V2Src)
4696 return SDValue();
4697
4698 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4699 SmallVector<int, 16> NewMask(Mask);
4700 for (size_t i = 0; i != NewMask.size(); ++i) {
4701 if (NewMask[i] == -1)
4702 continue;
4703
4704 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4705 NewMask[i] = NewMask[i] + V1IndexOffset;
4706 } else {
4707 // Minus NewMask.size() is needed. Otherwise, the b case would be
4708 // <5,6,7,12> instead of <5,6,7,8>.
4709 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4710 }
4711 }
4712
4713 // First index must be known and non-zero. It will be used as the slidedown
4714 // amount.
4715 if (NewMask[0] <= 0)
4716 return SDValue();
4717
4718 // NewMask is also continuous.
4719 for (unsigned i = 1; i != NewMask.size(); ++i)
4720 if (NewMask[i - 1] + 1 != NewMask[i])
4721 return SDValue();
4722
4723 MVT XLenVT = Subtarget.getXLenVT();
4724 MVT SrcVT = Src.getSimpleValueType();
4725 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4726 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4727 SDValue Slidedown =
4728 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4729 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4730 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4731 return DAG.getNode(
4733 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4734 DAG.getConstant(0, DL, XLenVT));
4735}
4736
4737// Because vslideup leaves the destination elements at the start intact, we can
4738// use it to perform shuffles that insert subvectors:
4739//
4740// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4741// ->
4742// vsetvli zero, 8, e8, mf2, ta, ma
4743// vslideup.vi v8, v9, 4
4744//
4745// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4746// ->
4747// vsetvli zero, 5, e8, mf2, tu, ma
4748// vslideup.v1 v8, v9, 2
4750 SDValue V1, SDValue V2,
4751 ArrayRef<int> Mask,
4752 const RISCVSubtarget &Subtarget,
4753 SelectionDAG &DAG) {
4754 unsigned NumElts = VT.getVectorNumElements();
4755 int NumSubElts, Index;
4756 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4757 Index))
4758 return SDValue();
4759
4760 bool OpsSwapped = Mask[Index] < (int)NumElts;
4761 SDValue InPlace = OpsSwapped ? V2 : V1;
4762 SDValue ToInsert = OpsSwapped ? V1 : V2;
4763
4764 MVT XLenVT = Subtarget.getXLenVT();
4765 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4766 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4767 // We slide up by the index that the subvector is being inserted at, and set
4768 // VL to the index + the number of elements being inserted.
4770 // If the we're adding a suffix to the in place vector, i.e. inserting right
4771 // up to the very end of it, then we don't actually care about the tail.
4772 if (NumSubElts + Index >= (int)NumElts)
4773 Policy |= RISCVII::TAIL_AGNOSTIC;
4774
4775 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4776 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4777 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4778
4779 SDValue Res;
4780 // If we're inserting into the lowest elements, use a tail undisturbed
4781 // vmv.v.v.
4782 if (Index == 0)
4783 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4784 VL);
4785 else
4786 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4787 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4788 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4789}
4790
4791/// Match v(f)slide1up/down idioms. These operations involve sliding
4792/// N-1 elements to make room for an inserted scalar at one end.
4794 SDValue V1, SDValue V2,
4795 ArrayRef<int> Mask,
4796 const RISCVSubtarget &Subtarget,
4797 SelectionDAG &DAG) {
4798 bool OpsSwapped = false;
4799 if (!isa<BuildVectorSDNode>(V1)) {
4800 if (!isa<BuildVectorSDNode>(V2))
4801 return SDValue();
4802 std::swap(V1, V2);
4803 OpsSwapped = true;
4804 }
4805 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4806 if (!Splat)
4807 return SDValue();
4808
4809 // Return true if the mask could describe a slide of Mask.size() - 1
4810 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4811 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4812 const unsigned S = (Offset > 0) ? 0 : -Offset;
4813 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4814 for (unsigned i = S; i != E; ++i)
4815 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4816 return false;
4817 return true;
4818 };
4819
4820 const unsigned NumElts = VT.getVectorNumElements();
4821 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4822 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4823 return SDValue();
4824
4825 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4826 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4827 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4828 return SDValue();
4829
4830 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4831 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4832
4833 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4834 // vslide1{down,up}.vx instead.
4835 if (VT.getVectorElementType() == MVT::bf16 ||
4836 (VT.getVectorElementType() == MVT::f16 &&
4837 !Subtarget.hasVInstructionsF16())) {
4838 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4839 Splat =
4840 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4841 V2 = DAG.getBitcast(
4842 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4843 SDValue Vec = DAG.getNode(
4845 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4846 Vec = DAG.getBitcast(ContainerVT, Vec);
4847 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4848 }
4849
4850 auto OpCode = IsVSlidedown ?
4853 if (!VT.isFloatingPoint())
4854 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4855 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4856 DAG.getUNDEF(ContainerVT),
4857 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4858 Splat, TrueMask, VL);
4859 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4860}
4861
4862// Match a mask which "spreads" the leading elements of a vector evenly
4863// across the result. Factor is the spread amount, and Index is the
4864// offset applied. (on success, Index < Factor) This is the inverse
4865// of a deinterleave with the same Factor and Index. This is analogous
4866// to an interleave, except that all but one lane is undef.
4867static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4868 SmallVector<bool> LaneIsUndef(Factor, true);
4869 for (unsigned i = 0; i < Mask.size(); i++)
4870 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4871
4872 bool Found = false;
4873 for (unsigned i = 0; i < Factor; i++) {
4874 if (LaneIsUndef[i])
4875 continue;
4876 if (Found)
4877 return false;
4878 Index = i;
4879 Found = true;
4880 }
4881 if (!Found)
4882 return false;
4883
4884 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4885 unsigned j = i * Factor + Index;
4886 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4887 return false;
4888 }
4889 return true;
4890}
4891
4892// Given a vector a, b, c, d return a vector Factor times longer
4893// with Factor-1 undef's between elements. Ex:
4894// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4895// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4896static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4897 const SDLoc &DL, SelectionDAG &DAG) {
4898
4899 MVT VT = V.getSimpleValueType();
4900 unsigned EltBits = VT.getScalarSizeInBits();
4902 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4903
4904 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4905
4906 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4907 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4908 // allow the SHL to fold away if Index is 0.
4909 if (Index != 0)
4910 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4911 DAG.getConstant(EltBits * Index, DL, WideVT));
4912 // Make sure to use original element type
4914 EC.multiplyCoefficientBy(Factor));
4915 return DAG.getBitcast(ResultVT, Result);
4916}
4917
4918// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4919// to create an interleaved vector of <[vscale x] n*2 x ty>.
4920// This requires that the size of ty is less than the subtarget's maximum ELEN.
4922 const SDLoc &DL, SelectionDAG &DAG,
4923 const RISCVSubtarget &Subtarget) {
4924
4925 // FIXME: Not only does this optimize the code, it fixes some correctness
4926 // issues because MIR does not have freeze.
4927 if (EvenV.isUndef())
4928 return getWideningSpread(OddV, 2, 1, DL, DAG);
4929 if (OddV.isUndef())
4930 return getWideningSpread(EvenV, 2, 0, DL, DAG);
4931
4932 MVT VecVT = EvenV.getSimpleValueType();
4933 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4934 // Convert fixed vectors to scalable if needed
4935 if (VecContainerVT.isFixedLengthVector()) {
4936 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4937 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4938 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4939 }
4940
4941 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4942
4943 // We're working with a vector of the same size as the resulting
4944 // interleaved vector, but with half the number of elements and
4945 // twice the SEW (Hence the restriction on not using the maximum
4946 // ELEN)
4947 MVT WideVT =
4949 VecVT.getVectorElementCount());
4950 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4951 if (WideContainerVT.isFixedLengthVector())
4952 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4953
4954 // Bitcast the input vectors to integers in case they are FP
4955 VecContainerVT = VecContainerVT.changeTypeToInteger();
4956 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4957 OddV = DAG.getBitcast(VecContainerVT, OddV);
4958
4959 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4960 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4961
4962 SDValue Interleaved;
4963 if (Subtarget.hasStdExtZvbb()) {
4964 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4965 SDValue OffsetVec =
4966 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4967 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4968 OffsetVec, Passthru, Mask, VL);
4969 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4970 Interleaved, EvenV, Passthru, Mask, VL);
4971 } else {
4972 // FIXME: We should freeze the odd vector here. We already handled the case
4973 // of provably undef/poison above.
4974
4975 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4976 // vwaddu.vv
4977 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4978 OddV, Passthru, Mask, VL);
4979
4980 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4981 SDValue AllOnesVec = DAG.getSplatVector(
4982 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4983 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4984 OddV, AllOnesVec, Passthru, Mask, VL);
4985
4986 // Add the two together so we get
4987 // (OddV * 0xff...ff) + (OddV + EvenV)
4988 // = (OddV * 0x100...00) + EvenV
4989 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4990 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4991 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4992 Interleaved, OddsMul, Passthru, Mask, VL);
4993 }
4994
4995 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4996 MVT ResultContainerVT = MVT::getVectorVT(
4997 VecVT.getVectorElementType(), // Make sure to use original type
4998 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4999 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5000
5001 // Convert back to a fixed vector if needed
5002 MVT ResultVT =
5005 if (ResultVT.isFixedLengthVector())
5006 Interleaved =
5007 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5008
5009 return Interleaved;
5010}
5011
5012// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5013// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5015 SelectionDAG &DAG,
5016 const RISCVSubtarget &Subtarget) {
5017 SDLoc DL(SVN);
5018 MVT VT = SVN->getSimpleValueType(0);
5019 SDValue V = SVN->getOperand(0);
5020 unsigned NumElts = VT.getVectorNumElements();
5021
5022 assert(VT.getVectorElementType() == MVT::i1);
5023
5025 SVN->getMask().size()) ||
5026 !SVN->getOperand(1).isUndef())
5027 return SDValue();
5028
5029 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5030 EVT ViaVT = EVT::getVectorVT(
5031 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5032 EVT ViaBitVT =
5033 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5034
5035 // If we don't have zvbb or the larger element type > ELEN, the operation will
5036 // be illegal.
5038 ViaVT) ||
5039 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5040 return SDValue();
5041
5042 // If the bit vector doesn't fit exactly into the larger element type, we need
5043 // to insert it into the larger vector and then shift up the reversed bits
5044 // afterwards to get rid of the gap introduced.
5045 if (ViaEltSize > NumElts)
5046 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5047 V, DAG.getVectorIdxConstant(0, DL));
5048
5049 SDValue Res =
5050 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5051
5052 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5053 // element type.
5054 if (ViaEltSize > NumElts)
5055 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5056 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5057
5058 Res = DAG.getBitcast(ViaBitVT, Res);
5059
5060 if (ViaEltSize > NumElts)
5061 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5062 DAG.getVectorIdxConstant(0, DL));
5063 return Res;
5064}
5065
5067 SelectionDAG &DAG,
5068 const RISCVSubtarget &Subtarget,
5069 MVT &RotateVT, unsigned &RotateAmt) {
5070 SDLoc DL(SVN);
5071
5072 EVT VT = SVN->getValueType(0);
5073 unsigned NumElts = VT.getVectorNumElements();
5074 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5075 unsigned NumSubElts;
5076 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5077 NumElts, NumSubElts, RotateAmt))
5078 return false;
5079 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5080 NumElts / NumSubElts);
5081
5082 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5083 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5084}
5085
5086// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5087// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5088// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5090 SelectionDAG &DAG,
5091 const RISCVSubtarget &Subtarget) {
5092 SDLoc DL(SVN);
5093
5094 EVT VT = SVN->getValueType(0);
5095 unsigned RotateAmt;
5096 MVT RotateVT;
5097 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5098 return SDValue();
5099
5100 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5101
5102 SDValue Rotate;
5103 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5104 // so canonicalize to vrev8.
5105 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5106 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5107 else
5108 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5109 DAG.getConstant(RotateAmt, DL, RotateVT));
5110
5111 return DAG.getBitcast(VT, Rotate);
5112}
5113
5114// If compiling with an exactly known VLEN, see if we can split a
5115// shuffle on m2 or larger into a small number of m1 sized shuffles
5116// which write each destination registers exactly once.
5118 SelectionDAG &DAG,
5119 const RISCVSubtarget &Subtarget) {
5120 SDLoc DL(SVN);
5121 MVT VT = SVN->getSimpleValueType(0);
5122 SDValue V1 = SVN->getOperand(0);
5123 SDValue V2 = SVN->getOperand(1);
5124 ArrayRef<int> Mask = SVN->getMask();
5125
5126 // If we don't know exact data layout, not much we can do. If this
5127 // is already m1 or smaller, no point in splitting further.
5128 const auto VLen = Subtarget.getRealVLen();
5129 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5130 return SDValue();
5131
5132 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5133 // expansion for.
5134 unsigned RotateAmt;
5135 MVT RotateVT;
5136 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5137 return SDValue();
5138
5139 MVT ElemVT = VT.getVectorElementType();
5140 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5141
5142 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5143 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5144 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5145 assert(M1VT == getLMUL1VT(M1VT));
5146 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5147 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5148 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5149 unsigned NumOfDestRegs = NumElts / NumOpElts;
5150 // The following semantically builds up a fixed length concat_vector
5151 // of the component shuffle_vectors. We eagerly lower to scalable here
5152 // to avoid DAG combining it back to a large shuffle_vector again.
5153 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5154 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5156 Operands;
5158 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5159 [&]() { Operands.emplace_back(); },
5160 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5161 Operands.emplace_back().emplace_back(
5162 SrcVecIdx, UINT_MAX,
5163 SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5164 },
5165 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5166 if (NewReg)
5167 Operands.emplace_back();
5168 Operands.back().emplace_back(
5169 Idx1, Idx2, SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5170 });
5171 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5172 // Note: check that we do not emit too many shuffles here to prevent code
5173 // size explosion.
5174 // TODO: investigate, if it can be improved by extra analysis of the masks to
5175 // check if the code is more profitable.
5176 unsigned NumShuffles = std::accumulate(
5177 Operands.begin(), Operands.end(), 0u,
5178 [&](unsigned N,
5179 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5180 if (Data.empty())
5181 return N;
5182 N += Data.size();
5183 for (const auto &P : Data) {
5184 unsigned Idx2 = std::get<1>(P);
5185 ArrayRef<int> Mask = std::get<2>(P);
5186 if (Idx2 != UINT_MAX)
5187 ++N;
5188 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5189 --N;
5190 }
5191 return N;
5192 });
5193 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5194 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5195 return SDValue();
5196 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5197 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5198 DAG.getVectorIdxConstant(ExtractIdx, DL));
5199 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5200 return SubVec;
5201 };
5202 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5204 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5205 return SubVec;
5206 };
5207 SDValue Vec = DAG.getUNDEF(ContainerVT);
5208 for (auto [I, Data] : enumerate(Operands)) {
5209 if (Data.empty())
5210 continue;
5212 for (unsigned I : seq<unsigned>(Data.size())) {
5213 const auto &[Idx1, Idx2, _] = Data[I];
5214 if (Values.contains(Idx1)) {
5215 assert(Idx2 != UINT_MAX && Values.contains(Idx2) &&
5216 "Expected both indices to be extracted already.");
5217 break;
5218 }
5219 SDValue V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5220 (Idx1 % NumOfSrcRegs) * NumOpElts);
5221 Values[Idx1] = V;
5222 if (Idx2 != UINT_MAX)
5223 Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5224 (Idx2 % NumOfSrcRegs) * NumOpElts);
5225 }
5226 SDValue V;
5227 for (const auto &[Idx1, Idx2, Mask] : Data) {
5228 SDValue V1 = Values.at(Idx1);
5229 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5230 V = PerformShuffle(V1, V2, Mask);
5231 Values[Idx1] = V;
5232 }
5233
5234 unsigned InsertIdx = I * NumOpElts;
5235 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5236 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
5237 DAG.getVectorIdxConstant(InsertIdx, DL));
5238 }
5239 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5240}
5241
5242// Matches a subset of compress masks with a contiguous prefix of output
5243// elements. This could be extended to allow gaps by deciding which
5244// source elements to spuriously demand.
5246 int Last = -1;
5247 bool SawUndef = false;
5248 for (unsigned i = 0; i < Mask.size(); i++) {
5249 if (Mask[i] == -1) {
5250 SawUndef = true;
5251 continue;
5252 }
5253 if (SawUndef)
5254 return false;
5255 if (i > (unsigned)Mask[i])
5256 return false;
5257 if (Mask[i] <= Last)
5258 return false;
5259 Last = Mask[i];
5260 }
5261 return true;
5262}
5263
5264/// Given a shuffle where the indices are disjoint between the two sources,
5265/// e.g.:
5266///
5267/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5268///
5269/// Merge the two sources into one and do a single source shuffle:
5270///
5271/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5272/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5273///
5274/// A vselect will either be merged into a masked instruction or be lowered as a
5275/// vmerge.vvm, which is cheaper than a vrgather.vv.
5277 SelectionDAG &DAG,
5278 const RISCVSubtarget &Subtarget) {
5279 MVT VT = SVN->getSimpleValueType(0);
5280 MVT XLenVT = Subtarget.getXLenVT();
5281 SDLoc DL(SVN);
5282
5283 const ArrayRef<int> Mask = SVN->getMask();
5284
5285 // Work out which source each lane will come from.
5286 SmallVector<int, 16> Srcs(Mask.size(), -1);
5287
5288 for (int Idx : Mask) {
5289 if (Idx == -1)
5290 continue;
5291 unsigned SrcIdx = Idx % Mask.size();
5292 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5293 if (Srcs[SrcIdx] == -1)
5294 // Mark this source as using this lane.
5295 Srcs[SrcIdx] = Src;
5296 else if (Srcs[SrcIdx] != Src)
5297 // The other source is using this lane: not disjoint.
5298 return SDValue();
5299 }
5300
5301 SmallVector<SDValue> SelectMaskVals;
5302 for (int Lane : Srcs) {
5303 if (Lane == -1)
5304 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5305 else
5306 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5307 }
5308 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5309 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5310 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5311 SVN->getOperand(0), SVN->getOperand(1));
5312
5313 // Move all indices relative to the first source.
5314 SmallVector<int> NewMask(Mask.size());
5315 for (unsigned I = 0; I < Mask.size(); I++) {
5316 if (Mask[I] == -1)
5317 NewMask[I] = -1;
5318 else
5319 NewMask[I] = Mask[I] % Mask.size();
5320 }
5321
5322 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5323}
5324
5325/// Try to widen element type to get a new mask value for a better permutation
5326/// sequence. This doesn't try to inspect the widened mask for profitability;
5327/// we speculate the widened form is equal or better. This has the effect of
5328/// reducing mask constant sizes - allowing cheaper materialization sequences
5329/// - and index sequence sizes - reducing register pressure and materialization
5330/// cost, at the cost of (possibly) an extra VTYPE toggle.
5332 SDLoc DL(Op);
5333 MVT VT = Op.getSimpleValueType();
5334 MVT ScalarVT = VT.getVectorElementType();
5335 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5336 SDValue V0 = Op.getOperand(0);
5337 SDValue V1 = Op.getOperand(1);
5338 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5339
5340 // Avoid wasted work leading to isTypeLegal check failing below
5341 if (ElementSize > 32)
5342 return SDValue();
5343
5344 SmallVector<int, 8> NewMask;
5345 if (!widenShuffleMaskElts(Mask, NewMask))
5346 return SDValue();
5347
5348 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5349 : MVT::getIntegerVT(ElementSize * 2);
5350 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5351 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5352 return SDValue();
5353 V0 = DAG.getBitcast(NewVT, V0);
5354 V1 = DAG.getBitcast(NewVT, V1);
5355 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5356}
5357
5359 const RISCVSubtarget &Subtarget) {
5360 SDValue V1 = Op.getOperand(0);
5361 SDValue V2 = Op.getOperand(1);
5362 SDLoc DL(Op);
5363 MVT XLenVT = Subtarget.getXLenVT();
5364 MVT VT = Op.getSimpleValueType();
5365 unsigned NumElts = VT.getVectorNumElements();
5366 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5367
5368 if (VT.getVectorElementType() == MVT::i1) {
5369 // Lower to a vror.vi of a larger element type if possible before we promote
5370 // i1s to i8s.
5371 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5372 return V;
5373 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5374 return V;
5375
5376 // Promote i1 shuffle to i8 shuffle.
5377 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5378 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5379 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5380 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5381 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5382 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5383 ISD::SETNE);
5384 }
5385
5386 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5387
5388 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5389
5390 if (SVN->isSplat()) {
5391 const int Lane = SVN->getSplatIndex();
5392 if (Lane >= 0) {
5393 MVT SVT = VT.getVectorElementType();
5394
5395 // Turn splatted vector load into a strided load with an X0 stride.
5396 SDValue V = V1;
5397 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5398 // with undef.
5399 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5400 int Offset = Lane;
5401 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5402 int OpElements =
5403 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5404 V = V.getOperand(Offset / OpElements);
5405 Offset %= OpElements;
5406 }
5407
5408 // We need to ensure the load isn't atomic or volatile.
5409 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5410 auto *Ld = cast<LoadSDNode>(V);
5411 Offset *= SVT.getStoreSize();
5412 SDValue NewAddr = DAG.getMemBasePlusOffset(
5413 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5414
5415 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5416 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5417 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5418 SDValue IntID =
5419 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5420 SDValue Ops[] = {Ld->getChain(),
5421 IntID,
5422 DAG.getUNDEF(ContainerVT),
5423 NewAddr,
5424 DAG.getRegister(RISCV::X0, XLenVT),
5425 VL};
5426 SDValue NewLoad = DAG.getMemIntrinsicNode(
5427 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5429 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5430 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5431 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5432 }
5433
5434 MVT SplatVT = ContainerVT;
5435
5436 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5437 if (SVT == MVT::bf16 ||
5438 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5439 SVT = MVT::i16;
5440 SplatVT = ContainerVT.changeVectorElementType(SVT);
5441 }
5442
5443 // Otherwise use a scalar load and splat. This will give the best
5444 // opportunity to fold a splat into the operation. ISel can turn it into
5445 // the x0 strided load if we aren't able to fold away the select.
5446 if (SVT.isFloatingPoint())
5447 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5448 Ld->getPointerInfo().getWithOffset(Offset),
5449 Ld->getOriginalAlign(),
5450 Ld->getMemOperand()->getFlags());
5451 else
5452 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5453 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5454 Ld->getOriginalAlign(),
5455 Ld->getMemOperand()->getFlags());
5457
5458 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5460 SDValue Splat =
5461 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5462 Splat = DAG.getBitcast(ContainerVT, Splat);
5463 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5464 }
5465
5466 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5467 assert(Lane < (int)NumElts && "Unexpected lane!");
5468 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5469 V1, DAG.getConstant(Lane, DL, XLenVT),
5470 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5471 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5472 }
5473 }
5474
5475 // For exact VLEN m2 or greater, try to split to m1 operations if we
5476 // can split cleanly.
5477 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5478 return V;
5479
5480 ArrayRef<int> Mask = SVN->getMask();
5481
5482 if (SDValue V =
5483 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5484 return V;
5485
5486 if (SDValue V =
5487 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5488 return V;
5489
5490 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5491 // available.
5492 if (Subtarget.hasStdExtZvkb())
5493 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5494 return V;
5495
5496 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5497 // be undef which can be handled with a single SLIDEDOWN/UP.
5498 int LoSrc, HiSrc;
5499 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5500 if (Rotation > 0) {
5501 SDValue LoV, HiV;
5502 if (LoSrc >= 0) {
5503 LoV = LoSrc == 0 ? V1 : V2;
5504 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5505 }
5506 if (HiSrc >= 0) {
5507 HiV = HiSrc == 0 ? V1 : V2;
5508 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5509 }
5510
5511 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5512 // to slide LoV up by (NumElts - Rotation).
5513 unsigned InvRotate = NumElts - Rotation;
5514
5515 SDValue Res = DAG.getUNDEF(ContainerVT);
5516 if (HiV) {
5517 // Even though we could use a smaller VL, don't to avoid a vsetivli
5518 // toggle.
5519 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5520 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5521 }
5522 if (LoV)
5523 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5524 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5526
5527 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5528 }
5529
5530 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5531 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5532
5533 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5534 // use shift and truncate to perform the shuffle.
5535 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5536 // shift-and-trunc reducing total cost for everything except an mf8 result.
5537 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5538 // to do the entire operation.
5539 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5540 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5541 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5542 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5543 unsigned Index = 0;
5544 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5545 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5546 if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5547 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5548 }
5549 }
5550 }
5551
5552 if (SDValue V =
5553 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5554 return V;
5555
5556 // Detect an interleave shuffle and lower to
5557 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5558 int EvenSrc, OddSrc;
5559 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5560 // Extract the halves of the vectors.
5561 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5562
5563 // Recognize if one half is actually undef; the matching above will
5564 // otherwise reuse the even stream for the undef one. This improves
5565 // spread(2) shuffles.
5566 bool LaneIsUndef[2] = { true, true};
5567 for (unsigned i = 0; i < Mask.size(); i++)
5568 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5569
5570 int Size = Mask.size();
5571 SDValue EvenV, OddV;
5572 if (LaneIsUndef[0]) {
5573 EvenV = DAG.getUNDEF(HalfVT);
5574 } else {
5575 assert(EvenSrc >= 0 && "Undef source?");
5576 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5577 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5578 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5579 }
5580
5581 if (LaneIsUndef[1]) {
5582 OddV = DAG.getUNDEF(HalfVT);
5583 } else {
5584 assert(OddSrc >= 0 && "Undef source?");
5585 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5586 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5587 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5588 }
5589
5590 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5591 }
5592
5593
5594 // Handle any remaining single source shuffles
5595 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5596 if (V2.isUndef()) {
5597 // We might be able to express the shuffle as a bitrotate. But even if we
5598 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5599 // shifts and a vor will have a higher throughput than a vrgather.
5600 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5601 return V;
5602
5603 // Before hitting generic lowering fallbacks, try to widen the mask
5604 // to a wider SEW.
5605 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5606 return V;
5607
5608 // Can we generate a vcompress instead of a vrgather? These scale better
5609 // at high LMUL, at the cost of not being able to fold a following select
5610 // into them. The mask constants are also smaller than the index vector
5611 // constants, and thus easier to materialize.
5612 if (isCompressMask(Mask)) {
5613 SmallVector<SDValue> MaskVals(NumElts,
5614 DAG.getConstant(false, DL, XLenVT));
5615 for (auto Idx : Mask) {
5616 if (Idx == -1)
5617 break;
5618 assert(Idx >= 0 && (unsigned)Idx < NumElts);
5619 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
5620 }
5621 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5622 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5623 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
5624 DAG.getUNDEF(VT));
5625 }
5626
5627 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
5628 // is fully covered in interleave(2) above, so it is ignored here.
5629 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5630 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5631 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5632 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5633 unsigned Index;
5634 if (isSpreadMask(Mask, Factor, Index)) {
5635 MVT NarrowVT =
5636 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5637 SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5638 DAG.getVectorIdxConstant(0, DL));
5639 return getWideningSpread(Src, Factor, Index, DL, DAG);
5640 }
5641 }
5642 }
5643
5644 if (VT.getScalarSizeInBits() == 8 &&
5645 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5646 // On such a vector we're unable to use i8 as the index type.
5647 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5648 // may involve vector splitting if we're already at LMUL=8, or our
5649 // user-supplied maximum fixed-length LMUL.
5650 return SDValue();
5651 }
5652
5653 // Base case for the two operand recursion below - handle the worst case
5654 // single source shuffle.
5655 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5656 MVT IndexVT = VT.changeTypeToInteger();
5657 // Since we can't introduce illegal index types at this stage, use i16 and
5658 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5659 // than XLenVT.
5660 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5661 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5662 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5663 }
5664
5665 // If the mask allows, we can do all the index computation in 16 bits. This
5666 // requires less work and less register pressure at high LMUL, and creates
5667 // smaller constants which may be cheaper to materialize.
5668 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5669 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5670 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5671 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5672 }
5673
5674 MVT IndexContainerVT =
5675 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5676
5677 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5678 SmallVector<SDValue> GatherIndicesLHS;
5679 for (int MaskIndex : Mask) {
5680 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5681 GatherIndicesLHS.push_back(IsLHSIndex
5682 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5683 : DAG.getUNDEF(XLenVT));
5684 }
5685 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5686 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5687 Subtarget);
5688 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5689 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5690 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5691 }
5692
5693 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5694 // merged with a second vrgather.
5695 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5696
5697 // Now construct the mask that will be used by the blended vrgather operation.
5698 // Construct the appropriate indices into each vector.
5699 for (int MaskIndex : Mask) {
5700 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5701 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5702 ? MaskIndex : -1);
5703 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5704 }
5705
5706 // If the mask indices are disjoint between the two sources, we can lower it
5707 // as a vselect + a single source vrgather.vv. Don't do this if we think the
5708 // operands may end up being lowered to something cheaper than a vrgather.vv.
5709 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5710 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&
5711 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&
5712 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5713 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5714 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5715 return V;
5716
5717 // Before hitting generic lowering fallbacks, try to widen the mask
5718 // to a wider SEW.
5719 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5720 return V;
5721
5722 // Try to pick a profitable operand order.
5723 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5724 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5725
5726 // Recursively invoke lowering for each operand if we had two
5727 // independent single source shuffles, and then combine the result via a
5728 // vselect. Note that the vselect will likely be folded back into the
5729 // second permute (vrgather, or other) by the post-isel combine.
5730 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5731 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5732
5733 SmallVector<SDValue> MaskVals;
5734 for (int MaskIndex : Mask) {
5735 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5736 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5737 }
5738
5739 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5740 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5741 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5742
5743 if (SwapOps)
5744 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5745 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5746}
5747
5749 // Only support legal VTs for other shuffles for now.
5750 if (!isTypeLegal(VT))
5751 return false;
5752
5753 // Support splats for any type. These should type legalize well.
5754 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5755 return true;
5756
5757 MVT SVT = VT.getSimpleVT();
5758
5759 // Not for i1 vectors.
5760 if (SVT.getScalarType() == MVT::i1)
5761 return false;
5762
5763 int Dummy1, Dummy2;
5764 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5765 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5766}
5767
5768// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5769// the exponent.
5770SDValue
5771RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5772 SelectionDAG &DAG) const {
5773 MVT VT = Op.getSimpleValueType();
5774 unsigned EltSize = VT.getScalarSizeInBits();
5775 SDValue Src = Op.getOperand(0);
5776 SDLoc DL(Op);
5777 MVT ContainerVT = VT;
5778
5779 SDValue Mask, VL;
5780 if (Op->isVPOpcode()) {
5781 Mask = Op.getOperand(1);
5782 if (VT.isFixedLengthVector())
5783 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5784 Subtarget);
5785 VL = Op.getOperand(2);
5786 }
5787
5788 // We choose FP type that can represent the value if possible. Otherwise, we
5789 // use rounding to zero conversion for correct exponent of the result.
5790 // TODO: Use f16 for i8 when possible?
5791 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5792 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5793 FloatEltVT = MVT::f32;
5794 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5795
5796 // Legal types should have been checked in the RISCVTargetLowering
5797 // constructor.
5798 // TODO: Splitting may make sense in some cases.
5799 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5800 "Expected legal float type!");
5801
5802 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5803 // The trailing zero count is equal to log2 of this single bit value.
5804 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5805 SDValue Neg = DAG.getNegative(Src, DL, VT);
5806 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5807 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5808 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5809 Src, Mask, VL);
5810 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5811 }
5812
5813 // We have a legal FP type, convert to it.
5814 SDValue FloatVal;
5815 if (FloatVT.bitsGT(VT)) {
5816 if (Op->isVPOpcode())
5817 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5818 else
5819 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5820 } else {
5821 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5822 if (VT.isFixedLengthVector()) {
5823 ContainerVT = getContainerForFixedLengthVector(VT);
5824 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5825 }
5826 if (!Op->isVPOpcode())
5827 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5828 SDValue RTZRM =
5830 MVT ContainerFloatVT =
5831 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5832 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5833 Src, Mask, RTZRM, VL);
5834 if (VT.isFixedLengthVector())
5835 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5836 }
5837 // Bitcast to integer and shift the exponent to the LSB.
5838 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5839 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5840 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5841
5842 SDValue Exp;
5843 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5844 if (Op->isVPOpcode()) {
5845 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5846 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5847 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5848 } else {
5849 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5850 DAG.getConstant(ShiftAmt, DL, IntVT));
5851 if (IntVT.bitsLT(VT))
5852 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5853 else if (IntVT.bitsGT(VT))
5854 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5855 }
5856
5857 // The exponent contains log2 of the value in biased form.
5858 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5859 // For trailing zeros, we just need to subtract the bias.
5860 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5861 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5862 DAG.getConstant(ExponentBias, DL, VT));
5863 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5864 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5865 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5866
5867 // For leading zeros, we need to remove the bias and convert from log2 to
5868 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5869 unsigned Adjust = ExponentBias + (EltSize - 1);
5870 SDValue Res;
5871 if (Op->isVPOpcode())
5872 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5873 Mask, VL);
5874 else
5875 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5876
5877 // The above result with zero input equals to Adjust which is greater than
5878 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5879 if (Op.getOpcode() == ISD::CTLZ)
5880 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5881 else if (Op.getOpcode() == ISD::VP_CTLZ)
5882 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5883 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5884 return Res;
5885}
5886
5887SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5888 SelectionDAG &DAG) const {
5889 SDLoc DL(Op);
5890 MVT XLenVT = Subtarget.getXLenVT();
5891 SDValue Source = Op->getOperand(0);
5892 MVT SrcVT = Source.getSimpleValueType();
5893 SDValue Mask = Op->getOperand(1);
5894 SDValue EVL = Op->getOperand(2);
5895
5896 if (SrcVT.isFixedLengthVector()) {
5897 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5898 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5899 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5900 Subtarget);
5901 SrcVT = ContainerVT;
5902 }
5903
5904 // Convert to boolean vector.
5905 if (SrcVT.getScalarType() != MVT::i1) {
5906 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5907 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5908 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5909 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5910 DAG.getUNDEF(SrcVT), Mask, EVL});
5911 }
5912
5913 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5914 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5915 // In this case, we can interpret poison as -1, so nothing to do further.
5916 return Res;
5917
5918 // Convert -1 to VL.
5919 SDValue SetCC =
5920 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5921 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5922 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5923}
5924
5925// While RVV has alignment restrictions, we should always be able to load as a
5926// legal equivalently-sized byte-typed vector instead. This method is
5927// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5928// the load is already correctly-aligned, it returns SDValue().
5929SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5930 SelectionDAG &DAG) const {
5931 auto *Load = cast<LoadSDNode>(Op);
5932 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5933
5935 Load->getMemoryVT(),
5936 *Load->getMemOperand()))
5937 return SDValue();
5938
5939 SDLoc DL(Op);
5940 MVT VT = Op.getSimpleValueType();
5941 unsigned EltSizeBits = VT.getScalarSizeInBits();
5942 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5943 "Unexpected unaligned RVV load type");
5944 MVT NewVT =
5945 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5946 assert(NewVT.isValid() &&
5947 "Expecting equally-sized RVV vector types to be legal");
5948 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5949 Load->getPointerInfo(), Load->getOriginalAlign(),
5950 Load->getMemOperand()->getFlags());
5951 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5952}
5953
5954// While RVV has alignment restrictions, we should always be able to store as a
5955// legal equivalently-sized byte-typed vector instead. This method is
5956// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5957// returns SDValue() if the store is already correctly aligned.
5958SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5959 SelectionDAG &DAG) const {
5960 auto *Store = cast<StoreSDNode>(Op);
5961 assert(Store && Store->getValue().getValueType().isVector() &&
5962 "Expected vector store");
5963
5965 Store->getMemoryVT(),
5966 *Store->getMemOperand()))
5967 return SDValue();
5968
5969 SDLoc DL(Op);
5970 SDValue StoredVal = Store->getValue();
5971 MVT VT = StoredVal.getSimpleValueType();
5972 unsigned EltSizeBits = VT.getScalarSizeInBits();
5973 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5974 "Unexpected unaligned RVV store type");
5975 MVT NewVT =
5976 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5977 assert(NewVT.isValid() &&
5978 "Expecting equally-sized RVV vector types to be legal");
5979 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5980 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5981 Store->getPointerInfo(), Store->getOriginalAlign(),
5982 Store->getMemOperand()->getFlags());
5983}
5984
5986 const RISCVSubtarget &Subtarget) {
5987 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5988
5989 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5990
5991 // All simm32 constants should be handled by isel.
5992 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5993 // this check redundant, but small immediates are common so this check
5994 // should have better compile time.
5995 if (isInt<32>(Imm))
5996 return Op;
5997
5998 // We only need to cost the immediate, if constant pool lowering is enabled.
5999 if (!Subtarget.useConstantPoolForLargeInts())
6000 return Op;
6001
6003 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6004 return Op;
6005
6006 // Optimizations below are disabled for opt size. If we're optimizing for
6007 // size, use a constant pool.
6008 if (DAG.shouldOptForSize())
6009 return SDValue();
6010
6011 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6012 // that if it will avoid a constant pool.
6013 // It will require an extra temporary register though.
6014 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6015 // low and high 32 bits are the same and bit 31 and 63 are set.
6016 unsigned ShiftAmt, AddOpc;
6017 RISCVMatInt::InstSeq SeqLo =
6018 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6019 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6020 return Op;
6021
6022 return SDValue();
6023}
6024
6025SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6026 SelectionDAG &DAG) const {
6027 MVT VT = Op.getSimpleValueType();
6028 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6029
6030 // Can this constant be selected by a Zfa FLI instruction?
6031 bool Negate = false;
6032 int Index = getLegalZfaFPImm(Imm, VT);
6033
6034 // If the constant is negative, try negating.
6035 if (Index < 0 && Imm.isNegative()) {
6036 Index = getLegalZfaFPImm(-Imm, VT);
6037 Negate = true;
6038 }
6039
6040 // If we couldn't find a FLI lowering, fall back to generic code.
6041 if (Index < 0)
6042 return SDValue();
6043
6044 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6045 SDLoc DL(Op);
6046 SDValue Const =
6047 DAG.getNode(RISCVISD::FLI, DL, VT,
6048 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6049 if (!Negate)
6050 return Const;
6051
6052 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6053}
6054
6056 const RISCVSubtarget &Subtarget) {
6057 SDLoc dl(Op);
6058 AtomicOrdering FenceOrdering =
6059 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6060 SyncScope::ID FenceSSID =
6061 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6062
6063 if (Subtarget.hasStdExtZtso()) {
6064 // The only fence that needs an instruction is a sequentially-consistent
6065 // cross-thread fence.
6066 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6067 FenceSSID == SyncScope::System)
6068 return Op;
6069
6070 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6071 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6072 }
6073
6074 // singlethread fences only synchronize with signal handlers on the same
6075 // thread and thus only need to preserve instruction order, not actually
6076 // enforce memory ordering.
6077 if (FenceSSID == SyncScope::SingleThread)
6078 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6079 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6080
6081 return Op;
6082}
6083
6084SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6085 SelectionDAG &DAG) const {
6086 SDLoc DL(Op);
6087 MVT VT = Op.getSimpleValueType();
6088 MVT XLenVT = Subtarget.getXLenVT();
6089 unsigned Check = Op.getConstantOperandVal(1);
6090 unsigned TDCMask = 0;
6091 if (Check & fcSNan)
6092 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6093 if (Check & fcQNan)
6094 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6095 if (Check & fcPosInf)
6097 if (Check & fcNegInf)
6099 if (Check & fcPosNormal)
6101 if (Check & fcNegNormal)
6103 if (Check & fcPosSubnormal)
6105 if (Check & fcNegSubnormal)
6107 if (Check & fcPosZero)
6108 TDCMask |= RISCV::FPMASK_Positive_Zero;
6109 if (Check & fcNegZero)
6110 TDCMask |= RISCV::FPMASK_Negative_Zero;
6111
6112 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6113
6114 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6115
6116 if (VT.isVector()) {
6117 SDValue Op0 = Op.getOperand(0);
6118 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6119
6120 if (VT.isScalableVector()) {
6122 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6123 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6124 Mask = Op.getOperand(2);
6125 VL = Op.getOperand(3);
6126 }
6127 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6128 VL, Op->getFlags());
6129 if (IsOneBitMask)
6130 return DAG.getSetCC(DL, VT, FPCLASS,
6131 DAG.getConstant(TDCMask, DL, DstVT),
6133 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6134 DAG.getConstant(TDCMask, DL, DstVT));
6135 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6136 ISD::SETNE);
6137 }
6138
6139 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6140 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6141 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6142 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6143 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6144 Mask = Op.getOperand(2);
6145 MVT MaskContainerVT =
6146 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6147 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6148 VL = Op.getOperand(3);
6149 }
6150 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6151
6152 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6153 Mask, VL, Op->getFlags());
6154
6155 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6156 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6157 if (IsOneBitMask) {
6158 SDValue VMSEQ =
6159 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6160 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6161 DAG.getUNDEF(ContainerVT), Mask, VL});
6162 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6163 }
6164 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6165 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6166
6167 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6168 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6169 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6170
6171 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6172 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6173 DAG.getUNDEF(ContainerVT), Mask, VL});
6174 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6175 }
6176
6177 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6178 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6179 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6181 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6182}
6183
6184// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6185// operations propagate nans.
6187 const RISCVSubtarget &Subtarget) {
6188 SDLoc DL(Op);
6189 MVT VT = Op.getSimpleValueType();
6190
6191 SDValue X = Op.getOperand(0);
6192 SDValue Y = Op.getOperand(1);
6193
6194 if (!VT.isVector()) {
6195 MVT XLenVT = Subtarget.getXLenVT();
6196
6197 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6198 // ensures that when one input is a nan, the other will also be a nan
6199 // allowing the nan to propagate. If both inputs are nan, this will swap the
6200 // inputs which is harmless.
6201
6202 SDValue NewY = Y;
6203 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6204 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6205 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6206 }
6207
6208 SDValue NewX = X;
6209 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6210 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6211 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6212 }
6213
6214 unsigned Opc =
6215 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6216 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6217 }
6218
6219 // Check no NaNs before converting to fixed vector scalable.
6220 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6221 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6222
6223 MVT ContainerVT = VT;
6224 if (VT.isFixedLengthVector()) {
6225 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6226 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6227 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6228 }
6229
6230 SDValue Mask, VL;
6231 if (Op->isVPOpcode()) {
6232 Mask = Op.getOperand(2);
6233 if (VT.isFixedLengthVector())
6234 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6235 Subtarget);
6236 VL = Op.getOperand(3);
6237 } else {
6238 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6239 }
6240
6241 SDValue NewY = Y;
6242 if (!XIsNeverNan) {
6243 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6244 {X, X, DAG.getCondCode(ISD::SETOEQ),
6245 DAG.getUNDEF(ContainerVT), Mask, VL});
6246 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6247 DAG.getUNDEF(ContainerVT), VL);
6248 }
6249
6250 SDValue NewX = X;
6251 if (!YIsNeverNan) {
6252 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6253 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6254 DAG.getUNDEF(ContainerVT), Mask, VL});
6255 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6256 DAG.getUNDEF(ContainerVT), VL);
6257 }
6258
6259 unsigned Opc =
6260 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6263 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6264 DAG.getUNDEF(ContainerVT), Mask, VL);
6265 if (VT.isFixedLengthVector())
6266 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6267 return Res;
6268}
6269
6271 const RISCVSubtarget &Subtarget) {
6272 bool IsFABS = Op.getOpcode() == ISD::FABS;
6273 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6274 "Wrong opcode for lowering FABS or FNEG.");
6275
6276 MVT XLenVT = Subtarget.getXLenVT();
6277 MVT VT = Op.getSimpleValueType();
6278 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6279
6280 SDLoc DL(Op);
6281 SDValue Fmv =
6282 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6283
6284 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6285 Mask = Mask.sext(Subtarget.getXLen());
6286
6287 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6288 SDValue Logic =
6289 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6290 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6291}
6292
6294 const RISCVSubtarget &Subtarget) {
6295 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6296
6297 MVT XLenVT = Subtarget.getXLenVT();
6298 MVT VT = Op.getSimpleValueType();
6299 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6300
6301 SDValue Mag = Op.getOperand(0);
6302 SDValue Sign = Op.getOperand(1);
6303
6304 SDLoc DL(Op);
6305
6306 // Get sign bit into an integer value.
6307 SDValue SignAsInt;
6308 unsigned SignSize = Sign.getValueSizeInBits();
6309 if (SignSize == Subtarget.getXLen()) {
6310 SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6311 } else if (SignSize == 16) {
6312 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6313 } else if (SignSize == 32) {
6314 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6315 } else if (SignSize == 64) {
6316 assert(XLenVT == MVT::i32 && "Unexpected type");
6317 // Copy the upper word to integer.
6318 SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6319 .getValue(1);
6320 SignSize = 32;
6321 } else
6322 llvm_unreachable("Unexpected sign size");
6323
6324 // Get the signbit at the right position for MagAsInt.
6325 int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6326 if (ShiftAmount > 0) {
6327 SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6328 DAG.getConstant(ShiftAmount, DL, XLenVT));
6329 } else if (ShiftAmount < 0) {
6330 SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6331 DAG.getConstant(-ShiftAmount, DL, XLenVT));
6332 }
6333
6334 // Mask the sign bit and any bits above it. The extra bits will be dropped
6335 // when we convert back to FP.
6336 SDValue SignMask = DAG.getConstant(
6337 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6338 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6339
6340 // Transform Mag value to integer, and clear the sign bit.
6341 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6342 SDValue ClearSignMask = DAG.getConstant(
6343 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6344 SDValue ClearedSign =
6345 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6346
6347 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6349
6350 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6351}
6352
6353/// Get a RISC-V target specified VL op for a given SDNode.
6354static unsigned getRISCVVLOp(SDValue Op) {
6355#define OP_CASE(NODE) \
6356 case ISD::NODE: \
6357 return RISCVISD::NODE##_VL;
6358#define VP_CASE(NODE) \
6359 case ISD::VP_##NODE: \
6360 return RISCVISD::NODE##_VL;
6361 // clang-format off
6362 switch (Op.getOpcode()) {
6363 default:
6364 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6365 OP_CASE(ADD)
6366 OP_CASE(SUB)
6367 OP_CASE(MUL)
6368 OP_CASE(MULHS)
6369 OP_CASE(MULHU)
6370 OP_CASE(SDIV)
6371 OP_CASE(SREM)
6372 OP_CASE(UDIV)
6373 OP_CASE(UREM)
6374 OP_CASE(SHL)
6375 OP_CASE(SRA)
6376 OP_CASE(SRL)
6377 OP_CASE(ROTL)
6378 OP_CASE(ROTR)
6379 OP_CASE(BSWAP)
6380 OP_CASE(CTTZ)
6381 OP_CASE(CTLZ)
6382 OP_CASE(CTPOP)
6383 OP_CASE(BITREVERSE)
6384 OP_CASE(SADDSAT)
6385 OP_CASE(UADDSAT)
6386 OP_CASE(SSUBSAT)
6387 OP_CASE(USUBSAT)
6388 OP_CASE(AVGFLOORS)
6389 OP_CASE(AVGFLOORU)
6390 OP_CASE(AVGCEILS)
6391 OP_CASE(AVGCEILU)
6392 OP_CASE(FADD)
6393 OP_CASE(FSUB)
6394 OP_CASE(FMUL)
6395 OP_CASE(FDIV)
6396 OP_CASE(FNEG)
6397 OP_CASE(FABS)
6398 OP_CASE(FSQRT)
6399 OP_CASE(SMIN)
6400 OP_CASE(SMAX)
6401 OP_CASE(UMIN)
6402 OP_CASE(UMAX)
6403 OP_CASE(STRICT_FADD)
6404 OP_CASE(STRICT_FSUB)
6405 OP_CASE(STRICT_FMUL)
6406 OP_CASE(STRICT_FDIV)
6407 OP_CASE(STRICT_FSQRT)
6408 VP_CASE(ADD) // VP_ADD
6409 VP_CASE(SUB) // VP_SUB
6410 VP_CASE(MUL) // VP_MUL
6411 VP_CASE(SDIV) // VP_SDIV
6412 VP_CASE(SREM) // VP_SREM
6413 VP_CASE(UDIV) // VP_UDIV
6414 VP_CASE(UREM) // VP_UREM
6415 VP_CASE(SHL) // VP_SHL
6416 VP_CASE(FADD) // VP_FADD
6417 VP_CASE(FSUB) // VP_FSUB
6418 VP_CASE(FMUL) // VP_FMUL
6419 VP_CASE(FDIV) // VP_FDIV
6420 VP_CASE(FNEG) // VP_FNEG
6421 VP_CASE(FABS) // VP_FABS
6422 VP_CASE(SMIN) // VP_SMIN
6423 VP_CASE(SMAX) // VP_SMAX
6424 VP_CASE(UMIN) // VP_UMIN
6425 VP_CASE(UMAX) // VP_UMAX
6426 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6427 VP_CASE(SETCC) // VP_SETCC
6428 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6429 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6430 VP_CASE(BITREVERSE) // VP_BITREVERSE
6431 VP_CASE(SADDSAT) // VP_SADDSAT
6432 VP_CASE(UADDSAT) // VP_UADDSAT
6433 VP_CASE(SSUBSAT) // VP_SSUBSAT
6434 VP_CASE(USUBSAT) // VP_USUBSAT
6435 VP_CASE(BSWAP) // VP_BSWAP
6436 VP_CASE(CTLZ) // VP_CTLZ
6437 VP_CASE(CTTZ) // VP_CTTZ
6438 VP_CASE(CTPOP) // VP_CTPOP
6440 case ISD::VP_CTLZ_ZERO_UNDEF:
6441 return RISCVISD::CTLZ_VL;
6443 case ISD::VP_CTTZ_ZERO_UNDEF:
6444 return RISCVISD::CTTZ_VL;
6445 case ISD::FMA:
6446 case ISD::VP_FMA:
6447 return RISCVISD::VFMADD_VL;
6448 case ISD::STRICT_FMA:
6450 case ISD::AND:
6451 case ISD::VP_AND:
6452 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6453 return RISCVISD::VMAND_VL;
6454 return RISCVISD::AND_VL;
6455 case ISD::OR:
6456 case ISD::VP_OR:
6457 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6458 return RISCVISD::VMOR_VL;
6459 return RISCVISD::OR_VL;
6460 case ISD::XOR:
6461 case ISD::VP_XOR:
6462 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6463 return RISCVISD::VMXOR_VL;
6464 return RISCVISD::XOR_VL;
6465 case ISD::VP_SELECT:
6466 case ISD::VP_MERGE:
6467 return RISCVISD::VMERGE_VL;
6468 case ISD::VP_SRA:
6469 return RISCVISD::SRA_VL;
6470 case ISD::VP_SRL:
6471 return RISCVISD::SRL_VL;
6472 case ISD::VP_SQRT:
6473 return RISCVISD::FSQRT_VL;
6474 case ISD::VP_SIGN_EXTEND:
6475 return RISCVISD::VSEXT_VL;
6476 case ISD::VP_ZERO_EXTEND:
6477 return RISCVISD::VZEXT_VL;
6478 case ISD::VP_FP_TO_SINT:
6480 case ISD::VP_FP_TO_UINT:
6482 case ISD::FMINNUM:
6483 case ISD::VP_FMINNUM:
6484 return RISCVISD::VFMIN_VL;
6485 case ISD::FMAXNUM:
6486 case ISD::VP_FMAXNUM:
6487 return RISCVISD::VFMAX_VL;
6488 case ISD::LRINT:
6489 case ISD::VP_LRINT:
6490 case ISD::LLRINT:
6491 case ISD::VP_LLRINT:
6493 }
6494 // clang-format on
6495#undef OP_CASE
6496#undef VP_CASE
6497}
6498
6499/// Return true if a RISC-V target specified op has a passthru operand.
6500static bool hasPassthruOp(unsigned Opcode) {
6501 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6503 "not a RISC-V target specific op");
6504 static_assert(
6507 "adding target specific op should update this function");
6508 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6509 return true;
6510 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6511 return true;
6512 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6513 return true;
6514 if (Opcode == RISCVISD::SETCC_VL)
6515 return true;
6516 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6517 return true;
6518 if (Opcode == RISCVISD::VMERGE_VL)
6519 return true;
6520 return false;
6521}
6522
6523/// Return true if a RISC-V target specified op has a mask operand.
6524static bool hasMaskOp(unsigned Opcode) {
6525 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6527 "not a RISC-V target specific op");
6528 static_assert(
6531 "adding target specific op should update this function");
6532 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6533 return true;
6534 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6535 return true;
6536 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6538 return true;
6539 return false;
6540}
6541
6543 const RISCVSubtarget &Subtarget) {
6544 if (Op.getValueType() == MVT::nxv32f16 &&
6545 (Subtarget.hasVInstructionsF16Minimal() &&
6546 !Subtarget.hasVInstructionsF16()))
6547 return true;
6548 if (Op.getValueType() == MVT::nxv32bf16)
6549 return true;
6550 return false;
6551}
6552
6554 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6555 SDLoc DL(Op);
6556
6559
6560 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6561 if (!Op.getOperand(j).getValueType().isVector()) {
6562 LoOperands[j] = Op.getOperand(j);
6563 HiOperands[j] = Op.getOperand(j);
6564 continue;
6565 }
6566 std::tie(LoOperands[j], HiOperands[j]) =
6567 DAG.SplitVector(Op.getOperand(j), DL);
6568 }
6569
6570 SDValue LoRes =
6571 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6572 SDValue HiRes =
6573 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6574
6575 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6576}
6577
6579 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6580 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6581 SDLoc DL(Op);
6582
6585
6586 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6587 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6588 std::tie(LoOperands[j], HiOperands[j]) =
6589 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6590 continue;
6591 }
6592 if (!Op.getOperand(j).getValueType().isVector()) {
6593 LoOperands[j] = Op.getOperand(j);
6594 HiOperands[j] = Op.getOperand(j);
6595 continue;
6596 }
6597 std::tie(LoOperands[j], HiOperands[j]) =
6598 DAG.SplitVector(Op.getOperand(j), DL);
6599 }
6600
6601 SDValue LoRes =
6602 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6603 SDValue HiRes =
6604 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6605
6606 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6607}
6608
6610 SDLoc DL(Op);
6611
6612 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6613 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6614 auto [EVLLo, EVLHi] =
6615 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6616
6617 SDValue ResLo =
6618 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6619 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6620 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6621 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6622}
6623
6625
6626 assert(Op->isStrictFPOpcode());
6627
6628 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6629
6630 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6631 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6632
6633 SDLoc DL(Op);
6634
6637
6638 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6639 if (!Op.getOperand(j).getValueType().isVector()) {
6640 LoOperands[j] = Op.getOperand(j);
6641 HiOperands[j] = Op.getOperand(j);
6642 continue;
6643 }
6644 std::tie(LoOperands[j], HiOperands[j]) =
6645 DAG.SplitVector(Op.getOperand(j), DL);
6646 }
6647
6648 SDValue LoRes =
6649 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6650 HiOperands[0] = LoRes.getValue(1);
6651 SDValue HiRes =
6652 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6653
6654 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6655 LoRes.getValue(0), HiRes.getValue(0));
6656 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6657}
6658
6660 SelectionDAG &DAG) const {
6661 switch (Op.getOpcode()) {
6662 default:
6663 report_fatal_error("unimplemented operand");
6664 case ISD::ATOMIC_FENCE:
6665 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6666 case ISD::GlobalAddress:
6667 return lowerGlobalAddress(Op, DAG);
6668 case ISD::BlockAddress:
6669 return lowerBlockAddress(Op, DAG);
6670 case ISD::ConstantPool:
6671 return lowerConstantPool(Op, DAG);
6672 case ISD::JumpTable:
6673 return lowerJumpTable(Op, DAG);
6675 return lowerGlobalTLSAddress(Op, DAG);
6676 case ISD::Constant:
6677 return lowerConstant(Op, DAG, Subtarget);
6678 case ISD::ConstantFP:
6679 return lowerConstantFP(Op, DAG);
6680 case ISD::SELECT:
6681 return lowerSELECT(Op, DAG);
6682 case ISD::BRCOND:
6683 return lowerBRCOND(Op, DAG);
6684 case ISD::VASTART:
6685 return lowerVASTART(Op, DAG);
6686 case ISD::FRAMEADDR:
6687 return lowerFRAMEADDR(Op, DAG);
6688 case ISD::RETURNADDR:
6689 return lowerRETURNADDR(Op, DAG);
6690 case ISD::SHL_PARTS:
6691 return lowerShiftLeftParts(Op, DAG);
6692 case ISD::SRA_PARTS:
6693 return lowerShiftRightParts(Op, DAG, true);
6694 case ISD::SRL_PARTS:
6695 return lowerShiftRightParts(Op, DAG, false);
6696 case ISD::ROTL:
6697 case ISD::ROTR:
6698 if (Op.getValueType().isFixedLengthVector()) {
6699 assert(Subtarget.hasStdExtZvkb());
6700 return lowerToScalableOp(Op, DAG);
6701 }
6702 assert(Subtarget.hasVendorXTHeadBb() &&
6703 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6704 "Unexpected custom legalization");
6705 // XTHeadBb only supports rotate by constant.
6706 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6707 return SDValue();
6708 return Op;
6709 case ISD::BITCAST: {
6710 SDLoc DL(Op);
6711 EVT VT = Op.getValueType();
6712 SDValue Op0 = Op.getOperand(0);
6713 EVT Op0VT = Op0.getValueType();
6714 MVT XLenVT = Subtarget.getXLenVT();
6715 if (Op0VT == MVT::i16 &&
6716 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6717 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6718 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6719 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6720 }
6721 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6722 Subtarget.hasStdExtFOrZfinx()) {
6723 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6724 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6725 }
6726 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6727 Subtarget.hasStdExtDOrZdinx()) {
6728 SDValue Lo, Hi;
6729 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6730 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6731 }
6732
6733 // Consider other scalar<->scalar casts as legal if the types are legal.
6734 // Otherwise expand them.
6735 if (!VT.isVector() && !Op0VT.isVector()) {
6736 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6737 return Op;
6738 return SDValue();
6739 }
6740
6741 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6742 "Unexpected types");
6743
6744 if (VT.isFixedLengthVector()) {
6745 // We can handle fixed length vector bitcasts with a simple replacement
6746 // in isel.
6747 if (Op0VT.isFixedLengthVector())
6748 return Op;
6749 // When bitcasting from scalar to fixed-length vector, insert the scalar
6750 // into a one-element vector of the result type, and perform a vector
6751 // bitcast.
6752 if (!Op0VT.isVector()) {
6753 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6754 if (!isTypeLegal(BVT))
6755 return SDValue();
6756 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6757 DAG.getUNDEF(BVT), Op0,
6758 DAG.getVectorIdxConstant(0, DL)));
6759 }
6760 return SDValue();
6761 }
6762 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6763 // thus: bitcast the vector to a one-element vector type whose element type
6764 // is the same as the result type, and extract the first element.
6765 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6766 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6767 if (!isTypeLegal(BVT))
6768 return SDValue();
6769 SDValue BVec = DAG.getBitcast(BVT, Op0);
6770 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6771 DAG.getVectorIdxConstant(0, DL));
6772 }
6773 return SDValue();
6774 }
6776 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6778 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6780 return LowerINTRINSIC_VOID(Op, DAG);
6781 case ISD::IS_FPCLASS:
6782 return LowerIS_FPCLASS(Op, DAG);
6783 case ISD::BITREVERSE: {
6784 MVT VT = Op.getSimpleValueType();
6785 if (VT.isFixedLengthVector()) {
6786 assert(Subtarget.hasStdExtZvbb());
6787 return lowerToScalableOp(Op, DAG);
6788 }
6789 SDLoc DL(Op);
6790 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6791 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6792 // Expand bitreverse to a bswap(rev8) followed by brev8.
6793 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6794 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6795 }
6796 case ISD::TRUNCATE:
6799 // Only custom-lower vector truncates
6800 if (!Op.getSimpleValueType().isVector())
6801 return Op;
6802 return lowerVectorTruncLike(Op, DAG);
6803 case ISD::ANY_EXTEND:
6804 case ISD::ZERO_EXTEND:
6805 if (Op.getOperand(0).getValueType().isVector() &&
6806 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6807 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6808 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6809 case ISD::SIGN_EXTEND:
6810 if (Op.getOperand(0).getValueType().isVector() &&
6811 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6812 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6813 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6815 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6817 return lowerINSERT_VECTOR_ELT(Op, DAG);
6819 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6820 case ISD::SCALAR_TO_VECTOR: {
6821 MVT VT = Op.getSimpleValueType();
6822 SDLoc DL(Op);
6823 SDValue Scalar = Op.getOperand(0);
6824 if (VT.getVectorElementType() == MVT::i1) {
6825 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6826 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6827 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6828 }
6829 MVT ContainerVT = VT;
6830 if (VT.isFixedLengthVector())
6831 ContainerVT = getContainerForFixedLengthVector(VT);
6832 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6833
6834 SDValue V;
6835 if (VT.isFloatingPoint()) {
6836 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6837 DAG.getUNDEF(ContainerVT), Scalar, VL);
6838 } else {
6839 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6840 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6841 DAG.getUNDEF(ContainerVT), Scalar, VL);
6842 }
6843 if (VT.isFixedLengthVector())
6844 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6845 return V;
6846 }
6847 case ISD::VSCALE: {
6848 MVT XLenVT = Subtarget.getXLenVT();
6849 MVT VT = Op.getSimpleValueType();
6850 SDLoc DL(Op);
6851 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6852 // We define our scalable vector types for lmul=1 to use a 64 bit known
6853 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6854 // vscale as VLENB / 8.
6855 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6856 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6857 report_fatal_error("Support for VLEN==32 is incomplete.");
6858 // We assume VLENB is a multiple of 8. We manually choose the best shift
6859 // here because SimplifyDemandedBits isn't always able to simplify it.
6860 uint64_t Val = Op.getConstantOperandVal(0);
6861 if (isPowerOf2_64(Val)) {
6862 uint64_t Log2 = Log2_64(Val);
6863 if (Log2 < 3)
6864 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6865 DAG.getConstant(3 - Log2, DL, VT));
6866 else if (Log2 > 3)
6867 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6868 DAG.getConstant(Log2 - 3, DL, XLenVT));
6869 } else if ((Val % 8) == 0) {
6870 // If the multiplier is a multiple of 8, scale it down to avoid needing
6871 // to shift the VLENB value.
6872 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6873 DAG.getConstant(Val / 8, DL, XLenVT));
6874 } else {
6875 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6876 DAG.getConstant(3, DL, XLenVT));
6877 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6878 DAG.getConstant(Val, DL, XLenVT));
6879 }
6880 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6881 }
6882 case ISD::FPOWI: {
6883 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6884 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6885 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6886 Op.getOperand(1).getValueType() == MVT::i32) {
6887 SDLoc DL(Op);
6888 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6889 SDValue Powi =
6890 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6891 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6892 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6893 }
6894 return SDValue();
6895 }
6896 case ISD::FMAXIMUM:
6897 case ISD::FMINIMUM:
6898 if (isPromotedOpNeedingSplit(Op, Subtarget))
6899 return SplitVectorOp(Op, DAG);
6900 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6901 case ISD::FP_EXTEND:
6902 case ISD::FP_ROUND:
6903 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6906 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6907 case ISD::SINT_TO_FP:
6908 case ISD::UINT_TO_FP:
6909 if (Op.getValueType().isVector() &&
6910 ((Op.getValueType().getScalarType() == MVT::f16 &&
6911 (Subtarget.hasVInstructionsF16Minimal() &&
6912 !Subtarget.hasVInstructionsF16())) ||
6913 Op.getValueType().getScalarType() == MVT::bf16)) {
6914 if (isPromotedOpNeedingSplit(Op, Subtarget))
6915 return SplitVectorOp(Op, DAG);
6916 // int -> f32
6917 SDLoc DL(Op);
6918 MVT NVT =
6919 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6920 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6921 // f32 -> [b]f16
6922 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6923 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6924 }
6925 [[fallthrough]];
6926 case ISD::FP_TO_SINT:
6927 case ISD::FP_TO_UINT:
6928 if (SDValue Op1 = Op.getOperand(0);
6929 Op1.getValueType().isVector() &&
6930 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6931 (Subtarget.hasVInstructionsF16Minimal() &&
6932 !Subtarget.hasVInstructionsF16())) ||
6933 Op1.getValueType().getScalarType() == MVT::bf16)) {
6934 if (isPromotedOpNeedingSplit(Op1, Subtarget))
6935 return SplitVectorOp(Op, DAG);
6936 // [b]f16 -> f32
6937 SDLoc DL(Op);
6938 MVT NVT = MVT::getVectorVT(MVT::f32,
6939 Op1.getValueType().getVectorElementCount());
6940 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6941 // f32 -> int
6942 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6943 }
6944 [[fallthrough]];
6949 // RVV can only do fp<->int conversions to types half/double the size as
6950 // the source. We custom-lower any conversions that do two hops into
6951 // sequences.
6952 MVT VT = Op.getSimpleValueType();
6953 if (VT.isScalarInteger())
6954 return lowerFP_TO_INT(Op, DAG, Subtarget);
6955 bool IsStrict = Op->isStrictFPOpcode();
6956 SDValue Src = Op.getOperand(0 + IsStrict);
6957 MVT SrcVT = Src.getSimpleValueType();
6958 if (SrcVT.isScalarInteger())
6959 return lowerINT_TO_FP(Op, DAG, Subtarget);
6960 if (!VT.isVector())
6961 return Op;
6962 SDLoc DL(Op);
6963 MVT EltVT = VT.getVectorElementType();
6964 MVT SrcEltVT = SrcVT.getVectorElementType();
6965 unsigned EltSize = EltVT.getSizeInBits();
6966 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6967 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6968 "Unexpected vector element types");
6969
6970 bool IsInt2FP = SrcEltVT.isInteger();
6971 // Widening conversions
6972 if (EltSize > (2 * SrcEltSize)) {
6973 if (IsInt2FP) {
6974 // Do a regular integer sign/zero extension then convert to float.
6975 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6977 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6978 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6981 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6982 if (IsStrict)
6983 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6984 Op.getOperand(0), Ext);
6985 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6986 }
6987 // FP2Int
6988 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6989 // Do one doubling fp_extend then complete the operation by converting
6990 // to int.
6991 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6992 if (IsStrict) {
6993 auto [FExt, Chain] =
6994 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6995 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6996 }
6997 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6998 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6999 }
7000
7001 // Narrowing conversions
7002 if (SrcEltSize > (2 * EltSize)) {
7003 if (IsInt2FP) {
7004 // One narrowing int_to_fp, then an fp_round.
7005 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7006 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7007 if (IsStrict) {
7008 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7009 DAG.getVTList(InterimFVT, MVT::Other),
7010 Op.getOperand(0), Src);
7011 SDValue Chain = Int2FP.getValue(1);
7012 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7013 }
7014 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7015 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7016 }
7017 // FP2Int
7018 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7019 // representable by the integer, the result is poison.
7020 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7022 if (IsStrict) {
7023 SDValue FP2Int =
7024 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7025 Op.getOperand(0), Src);
7026 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7027 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7028 }
7029 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7030 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7031 }
7032
7033 // Scalable vectors can exit here. Patterns will handle equally-sized
7034 // conversions halving/doubling ones.
7035 if (!VT.isFixedLengthVector())
7036 return Op;
7037
7038 // For fixed-length vectors we lower to a custom "VL" node.
7039 unsigned RVVOpc = 0;
7040 switch (Op.getOpcode()) {
7041 default:
7042 llvm_unreachable("Impossible opcode");
7043 case ISD::FP_TO_SINT:
7045 break;
7046 case ISD::FP_TO_UINT:
7048 break;
7049 case ISD::SINT_TO_FP:
7050 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7051 break;
7052 case ISD::UINT_TO_FP:
7053 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7054 break;
7057 break;
7060 break;
7063 break;
7066 break;
7067 }
7068
7069 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7070 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7071 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7072 "Expected same element count");
7073
7074 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7075
7076 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7077 if (IsStrict) {
7078 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7079 Op.getOperand(0), Src, Mask, VL);
7080 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7081 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7082 }
7083 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7084 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7085 }
7088 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7089 case ISD::FP_TO_BF16: {
7090 // Custom lower to ensure the libcall return is passed in an FPR on hard
7091 // float ABIs.
7092 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7093 SDLoc DL(Op);
7094 MakeLibCallOptions CallOptions;
7095 RTLIB::Libcall LC =
7096 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7097 SDValue Res =
7098 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7099 if (Subtarget.is64Bit())
7100 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7101 return DAG.getBitcast(MVT::i32, Res);
7102 }
7103 case ISD::BF16_TO_FP: {
7104 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7105 MVT VT = Op.getSimpleValueType();
7106 SDLoc DL(Op);
7107 Op = DAG.getNode(
7108 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7109 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7110 SDValue Res = Subtarget.is64Bit()
7111 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7112 : DAG.getBitcast(MVT::f32, Op);
7113 // fp_extend if the target VT is bigger than f32.
7114 if (VT != MVT::f32)
7115 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7116 return Res;
7117 }
7119 case ISD::FP_TO_FP16: {
7120 // Custom lower to ensure the libcall return is passed in an FPR on hard
7121 // float ABIs.
7122 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7123 SDLoc DL(Op);
7124 MakeLibCallOptions CallOptions;
7125 bool IsStrict = Op->isStrictFPOpcode();
7126 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7127 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7128 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7129 SDValue Res;
7130 std::tie(Res, Chain) =
7131 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7132 if (Subtarget.is64Bit())
7133 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7134 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7135 if (IsStrict)
7136 return DAG.getMergeValues({Result, Chain}, DL);
7137 return Result;
7138 }
7140 case ISD::FP16_TO_FP: {
7141 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7142 // float ABIs.
7143 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7144 SDLoc DL(Op);
7145 MakeLibCallOptions CallOptions;
7146 bool IsStrict = Op->isStrictFPOpcode();
7147 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7148 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7149 SDValue Arg = Subtarget.is64Bit()
7150 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7151 : DAG.getBitcast(MVT::f32, Op0);
7152 SDValue Res;
7153 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7154 CallOptions, DL, Chain);
7155 if (IsStrict)
7156 return DAG.getMergeValues({Res, Chain}, DL);
7157 return Res;
7158 }
7159 case ISD::FTRUNC:
7160 case ISD::FCEIL:
7161 case ISD::FFLOOR:
7162 case ISD::FNEARBYINT:
7163 case ISD::FRINT:
7164 case ISD::FROUND:
7165 case ISD::FROUNDEVEN:
7166 if (isPromotedOpNeedingSplit(Op, Subtarget))
7167 return SplitVectorOp(Op, DAG);
7168 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7169 case ISD::LRINT:
7170 case ISD::LLRINT:
7171 if (Op.getValueType().isVector())
7172 return lowerVectorXRINT(Op, DAG, Subtarget);
7173 [[fallthrough]];
7174 case ISD::LROUND:
7175 case ISD::LLROUND: {
7176 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7177 "Unexpected custom legalisation");
7178 SDLoc DL(Op);
7179 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7180 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7181 }
7182 case ISD::STRICT_LRINT:
7183 case ISD::STRICT_LLRINT:
7184 case ISD::STRICT_LROUND:
7185 case ISD::STRICT_LLROUND: {
7186 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7187 "Unexpected custom legalisation");
7188 SDLoc DL(Op);
7189 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7190 {Op.getOperand(0), Op.getOperand(1)});
7191 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7192 {Ext.getValue(1), Ext.getValue(0)});
7193 }
7194 case ISD::VECREDUCE_ADD:
7199 return lowerVECREDUCE(Op, DAG);
7200 case ISD::VECREDUCE_AND:
7201 case ISD::VECREDUCE_OR:
7202 case ISD::VECREDUCE_XOR:
7203 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7204 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7205 return lowerVECREDUCE(Op, DAG);
7212 return lowerFPVECREDUCE(Op, DAG);
7213 case ISD::VP_REDUCE_ADD:
7214 case ISD::VP_REDUCE_UMAX:
7215 case ISD::VP_REDUCE_SMAX:
7216 case ISD::VP_REDUCE_UMIN:
7217 case ISD::VP_REDUCE_SMIN:
7218 case ISD::VP_REDUCE_FADD:
7219 case ISD::VP_REDUCE_SEQ_FADD:
7220 case ISD::VP_REDUCE_FMIN:
7221 case ISD::VP_REDUCE_FMAX:
7222 case ISD::VP_REDUCE_FMINIMUM:
7223 case ISD::VP_REDUCE_FMAXIMUM:
7224 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7225 return SplitVectorReductionOp(Op, DAG);
7226 return lowerVPREDUCE(Op, DAG);
7227 case ISD::VP_REDUCE_AND:
7228 case ISD::VP_REDUCE_OR:
7229 case ISD::VP_REDUCE_XOR:
7230 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7231 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7232 return lowerVPREDUCE(Op, DAG);
7233 case ISD::VP_CTTZ_ELTS:
7234 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7235 return lowerVPCttzElements(Op, DAG);
7236 case ISD::UNDEF: {
7237 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7238 return convertFromScalableVector(Op.getSimpleValueType(),
7239 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7240 }
7242 return lowerINSERT_SUBVECTOR(Op, DAG);
7244 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7246 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7248 return lowerVECTOR_INTERLEAVE(Op, DAG);
7249 case ISD::STEP_VECTOR:
7250 return lowerSTEP_VECTOR(Op, DAG);
7252 return lowerVECTOR_REVERSE(Op, DAG);
7253 case ISD::VECTOR_SPLICE:
7254 return lowerVECTOR_SPLICE(Op, DAG);
7255 case ISD::BUILD_VECTOR:
7256 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7257 case ISD::SPLAT_VECTOR: {
7258 MVT VT = Op.getSimpleValueType();
7259 MVT EltVT = VT.getVectorElementType();
7260 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7261 EltVT == MVT::bf16) {
7262 SDLoc DL(Op);
7263 SDValue Elt;
7264 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7265 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7266 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7267 Op.getOperand(0));
7268 else
7269 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7270 MVT IVT = VT.changeVectorElementType(MVT::i16);
7271 return DAG.getNode(ISD::BITCAST, DL, VT,
7272 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7273 }
7274
7275 if (EltVT == MVT::i1)
7276 return lowerVectorMaskSplat(Op, DAG);
7277 return SDValue();
7278 }
7280 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7281 case ISD::CONCAT_VECTORS: {
7282 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7283 // better than going through the stack, as the default expansion does.
7284 SDLoc DL(Op);
7285 MVT VT = Op.getSimpleValueType();
7286 MVT ContainerVT = VT;
7287 if (VT.isFixedLengthVector())
7288 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7289
7290 // Recursively split concat_vectors with more than 2 operands:
7291 //
7292 // concat_vector op1, op2, op3, op4
7293 // ->
7294 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7295 //
7296 // This reduces the length of the chain of vslideups and allows us to
7297 // perform the vslideups at a smaller LMUL, limited to MF2.
7298 if (Op.getNumOperands() > 2 &&
7299 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7300 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7302 size_t HalfNumOps = Op.getNumOperands() / 2;
7303 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7304 Op->ops().take_front(HalfNumOps));
7305 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7306 Op->ops().drop_front(HalfNumOps));
7307 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7308 }
7309
7310 unsigned NumOpElts =
7311 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7312 SDValue Vec = DAG.getUNDEF(VT);
7313 for (const auto &OpIdx : enumerate(Op->ops())) {
7314 SDValue SubVec = OpIdx.value();
7315 // Don't insert undef subvectors.
7316 if (SubVec.isUndef())
7317 continue;
7318 Vec =
7319 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7320 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7321 }
7322 return Vec;
7323 }
7324 case ISD::LOAD: {
7325 auto *Load = cast<LoadSDNode>(Op);
7326 EVT VecTy = Load->getMemoryVT();
7327 // Handle normal vector tuple load.
7328 if (VecTy.isRISCVVectorTuple()) {
7329 SDLoc DL(Op);
7330 MVT XLenVT = Subtarget.getXLenVT();
7331 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7332 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7333 unsigned NumElts = Sz / (NF * 8);
7334 int Log2LMUL = Log2_64(NumElts) - 3;
7335
7336 auto Flag = SDNodeFlags();
7337 Flag.setNoUnsignedWrap(true);
7338 SDValue Ret = DAG.getUNDEF(VecTy);
7339 SDValue BasePtr = Load->getBasePtr();
7340 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7341 VROffset =
7342 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7343 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7344 SmallVector<SDValue, 8> OutChains;
7345
7346 // Load NF vector registers and combine them to a vector tuple.
7347 for (unsigned i = 0; i < NF; ++i) {
7348 SDValue LoadVal = DAG.getLoad(
7349 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7350 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7351 OutChains.push_back(LoadVal.getValue(1));
7352 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7353 DAG.getVectorIdxConstant(i, DL));
7354 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7355 }
7356 return DAG.getMergeValues(
7357 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7358 }
7359
7360 if (auto V = expandUnalignedRVVLoad(Op, DAG))
7361 return V;
7362 if (Op.getValueType().isFixedLengthVector())
7363 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7364 return Op;
7365 }
7366 case ISD::STORE: {
7367 auto *Store = cast<StoreSDNode>(Op);
7368 SDValue StoredVal = Store->getValue();
7369 EVT VecTy = StoredVal.getValueType();
7370 // Handle normal vector tuple store.
7371 if (VecTy.isRISCVVectorTuple()) {
7372 SDLoc DL(Op);
7373 MVT XLenVT = Subtarget.getXLenVT();
7374 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7375 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7376 unsigned NumElts = Sz / (NF * 8);
7377 int Log2LMUL = Log2_64(NumElts) - 3;
7378
7379 auto Flag = SDNodeFlags();
7380 Flag.setNoUnsignedWrap(true);
7381 SDValue Ret;
7382 SDValue Chain = Store->getChain();
7383 SDValue BasePtr = Store->getBasePtr();
7384 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7385 VROffset =
7386 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7387 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7388
7389 // Extract subregisters in a vector tuple and store them individually.
7390 for (unsigned i = 0; i < NF; ++i) {
7391 auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7392 MVT::getScalableVectorVT(MVT::i8, NumElts),
7393 StoredVal, DAG.getVectorIdxConstant(i, DL));
7394 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7395 MachinePointerInfo(Store->getAddressSpace()),
7396 Store->getOriginalAlign(),
7397 Store->getMemOperand()->getFlags());
7398 Chain = Ret.getValue(0);
7399 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7400 }
7401 return Ret;
7402 }
7403
7404 if (auto V = expandUnalignedRVVStore(Op, DAG))
7405 return V;
7406 if (Op.getOperand(1).getValueType().isFixedLengthVector())
7407 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7408 return Op;
7409 }
7410 case ISD::MLOAD:
7411 case ISD::VP_LOAD:
7412 return lowerMaskedLoad(Op, DAG);
7413 case ISD::MSTORE:
7414 case ISD::VP_STORE:
7415 return lowerMaskedStore(Op, DAG);
7417 return lowerVectorCompress(Op, DAG);
7418 case ISD::SELECT_CC: {
7419 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7420 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7421 // into separate SETCC+SELECT just like LegalizeDAG.
7422 SDValue Tmp1 = Op.getOperand(0);
7423 SDValue Tmp2 = Op.getOperand(1);
7424 SDValue True = Op.getOperand(2);
7425 SDValue False = Op.getOperand(3);
7426 EVT VT = Op.getValueType();
7427 SDValue CC = Op.getOperand(4);
7428 EVT CmpVT = Tmp1.getValueType();
7429 EVT CCVT =
7430 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7431 SDLoc DL(Op);
7432 SDValue Cond =
7433 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7434 return DAG.getSelect(DL, VT, Cond, True, False);
7435 }
7436 case ISD::SETCC: {
7437 MVT OpVT = Op.getOperand(0).getSimpleValueType();
7438 if (OpVT.isScalarInteger()) {
7439 MVT VT = Op.getSimpleValueType();
7440 SDValue LHS = Op.getOperand(0);
7441 SDValue RHS = Op.getOperand(1);
7442 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7443 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7444 "Unexpected CondCode");
7445
7446 SDLoc DL(Op);
7447
7448 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7449 // convert this to the equivalent of (set(u)ge X, C+1) by using
7450 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7451 // in a register.
7452 if (isa<ConstantSDNode>(RHS)) {
7453 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7454 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7455 // If this is an unsigned compare and the constant is -1, incrementing
7456 // the constant would change behavior. The result should be false.
7457 if (CCVal == ISD::SETUGT && Imm == -1)
7458 return DAG.getConstant(0, DL, VT);
7459 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7460 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7461 SDValue SetCC = DAG.getSetCC(
7462 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7463 return DAG.getLogicalNOT(DL, SetCC, VT);
7464 }
7465 }
7466
7467 // Not a constant we could handle, swap the operands and condition code to
7468 // SETLT/SETULT.
7469 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7470 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7471 }
7472
7473 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7474 return SplitVectorOp(Op, DAG);
7475
7476 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7477 }
7478 case ISD::ADD:
7479 case ISD::SUB:
7480 case ISD::MUL:
7481 case ISD::MULHS:
7482 case ISD::MULHU:
7483 case ISD::AND:
7484 case ISD::OR:
7485 case ISD::XOR:
7486 case ISD::SDIV:
7487 case ISD::SREM:
7488 case ISD::UDIV:
7489 case ISD::UREM:
7490 case ISD::BSWAP:
7491 case ISD::CTPOP:
7492 return lowerToScalableOp(Op, DAG);
7493 case ISD::SHL:
7494 case ISD::SRA:
7495 case ISD::SRL:
7496 if (Op.getSimpleValueType().isFixedLengthVector())
7497 return lowerToScalableOp(Op, DAG);
7498 // This can be called for an i32 shift amount that needs to be promoted.
7499 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7500 "Unexpected custom legalisation");
7501 return SDValue();
7502 case ISD::FABS:
7503 case ISD::FNEG:
7504 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7505 return lowerFABSorFNEG(Op, DAG, Subtarget);
7506 [[fallthrough]];
7507 case ISD::FADD:
7508 case ISD::FSUB:
7509 case ISD::FMUL:
7510 case ISD::FDIV:
7511 case ISD::FSQRT:
7512 case ISD::FMA:
7513 case ISD::FMINNUM:
7514 case ISD::FMAXNUM:
7515 if (isPromotedOpNeedingSplit(Op, Subtarget))
7516 return SplitVectorOp(Op, DAG);
7517 [[fallthrough]];
7518 case ISD::AVGFLOORS:
7519 case ISD::AVGFLOORU:
7520 case ISD::AVGCEILS:
7521 case ISD::AVGCEILU:
7522 case ISD::SMIN:
7523 case ISD::SMAX:
7524 case ISD::UMIN:
7525 case ISD::UMAX:
7526 case ISD::UADDSAT:
7527 case ISD::USUBSAT:
7528 case ISD::SADDSAT:
7529 case ISD::SSUBSAT:
7530 return lowerToScalableOp(Op, DAG);
7531 case ISD::ABDS:
7532 case ISD::ABDU: {
7533 SDLoc dl(Op);
7534 EVT VT = Op->getValueType(0);
7535 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7536 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7537 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7538
7539 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7540 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7541 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7542 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7543 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7544 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7545 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7546 }
7547 case ISD::ABS:
7548 case ISD::VP_ABS:
7549 return lowerABS(Op, DAG);
7550 case ISD::CTLZ:
7552 case ISD::CTTZ:
7554 if (Subtarget.hasStdExtZvbb())
7555 return lowerToScalableOp(Op, DAG);
7556 assert(Op.getOpcode() != ISD::CTTZ);
7557 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7558 case ISD::VSELECT:
7559 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7560 case ISD::FCOPYSIGN:
7561 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7562 return lowerFCOPYSIGN(Op, DAG, Subtarget);
7563 if (isPromotedOpNeedingSplit(Op, Subtarget))
7564 return SplitVectorOp(Op, DAG);
7565 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7566 case ISD::STRICT_FADD:
7567 case ISD::STRICT_FSUB:
7568 case ISD::STRICT_FMUL:
7569 case ISD::STRICT_FDIV:
7570 case ISD::STRICT_FSQRT:
7571 case ISD::STRICT_FMA:
7572 if (isPromotedOpNeedingSplit(Op, Subtarget))
7573 return SplitStrictFPVectorOp(Op, DAG);
7574 return lowerToScalableOp(Op, DAG);
7575 case ISD::STRICT_FSETCC:
7577 return lowerVectorStrictFSetcc(Op, DAG);
7578 case ISD::STRICT_FCEIL:
7579 case ISD::STRICT_FRINT:
7580 case ISD::STRICT_FFLOOR:
7581 case ISD::STRICT_FTRUNC:
7583 case ISD::STRICT_FROUND:
7585 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7586 case ISD::MGATHER:
7587 case ISD::VP_GATHER:
7588 return lowerMaskedGather(Op, DAG);
7589 case ISD::MSCATTER:
7590 case ISD::VP_SCATTER:
7591 return lowerMaskedScatter(Op, DAG);
7592 case ISD::GET_ROUNDING:
7593 return lowerGET_ROUNDING(Op, DAG);
7594 case ISD::SET_ROUNDING:
7595 return lowerSET_ROUNDING(Op, DAG);
7596 case ISD::EH_DWARF_CFA:
7597 return lowerEH_DWARF_CFA(Op, DAG);
7598 case ISD::VP_MERGE:
7599 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7600 return lowerVPMergeMask(Op, DAG);
7601 [[fallthrough]];
7602 case ISD::VP_SELECT:
7603 case ISD::VP_ADD:
7604 case ISD::VP_SUB:
7605 case ISD::VP_MUL:
7606 case ISD::VP_SDIV:
7607 case ISD::VP_UDIV:
7608 case ISD::VP_SREM:
7609 case ISD::VP_UREM:
7610 case ISD::VP_UADDSAT:
7611 case ISD::VP_USUBSAT:
7612 case ISD::VP_SADDSAT:
7613 case ISD::VP_SSUBSAT:
7614 case ISD::VP_LRINT:
7615 case ISD::VP_LLRINT:
7616 return lowerVPOp(Op, DAG);
7617 case ISD::VP_AND:
7618 case ISD::VP_OR:
7619 case ISD::VP_XOR:
7620 return lowerLogicVPOp(Op, DAG);
7621 case ISD::VP_FADD:
7622 case ISD::VP_FSUB:
7623 case ISD::VP_FMUL:
7624 case ISD::VP_FDIV:
7625 case ISD::VP_FNEG:
7626 case ISD::VP_FABS:
7627 case ISD::VP_SQRT:
7628 case ISD::VP_FMA:
7629 case ISD::VP_FMINNUM:
7630 case ISD::VP_FMAXNUM:
7631 case ISD::VP_FCOPYSIGN:
7632 if (isPromotedOpNeedingSplit(Op, Subtarget))
7633 return SplitVPOp(Op, DAG);
7634 [[fallthrough]];
7635 case ISD::VP_SRA:
7636 case ISD::VP_SRL:
7637 case ISD::VP_SHL:
7638 return lowerVPOp(Op, DAG);
7639 case ISD::VP_IS_FPCLASS:
7640 return LowerIS_FPCLASS(Op, DAG);
7641 case ISD::VP_SIGN_EXTEND:
7642 case ISD::VP_ZERO_EXTEND:
7643 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7644 return lowerVPExtMaskOp(Op, DAG);
7645 return lowerVPOp(Op, DAG);
7646 case ISD::VP_TRUNCATE:
7647 return lowerVectorTruncLike(Op, DAG);
7648 case ISD::VP_FP_EXTEND:
7649 case ISD::VP_FP_ROUND:
7650 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7651 case ISD::VP_SINT_TO_FP:
7652 case ISD::VP_UINT_TO_FP:
7653 if (Op.getValueType().isVector() &&
7654 ((Op.getValueType().getScalarType() == MVT::f16 &&
7655 (Subtarget.hasVInstructionsF16Minimal() &&
7656 !Subtarget.hasVInstructionsF16())) ||
7657 Op.getValueType().getScalarType() == MVT::bf16)) {
7658 if (isPromotedOpNeedingSplit(Op, Subtarget))
7659 return SplitVectorOp(Op, DAG);
7660 // int -> f32
7661 SDLoc DL(Op);
7662 MVT NVT =
7663 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7664 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7665 // f32 -> [b]f16
7666 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7667 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7668 }
7669 [[fallthrough]];
7670 case ISD::VP_FP_TO_SINT:
7671 case ISD::VP_FP_TO_UINT:
7672 if (SDValue Op1 = Op.getOperand(0);
7673 Op1.getValueType().isVector() &&
7674 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7675 (Subtarget.hasVInstructionsF16Minimal() &&
7676 !Subtarget.hasVInstructionsF16())) ||
7677 Op1.getValueType().getScalarType() == MVT::bf16)) {
7678 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7679 return SplitVectorOp(Op, DAG);
7680 // [b]f16 -> f32
7681 SDLoc DL(Op);
7682 MVT NVT = MVT::getVectorVT(MVT::f32,
7683 Op1.getValueType().getVectorElementCount());
7684 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7685 // f32 -> int
7686 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7687 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7688 }
7689 return lowerVPFPIntConvOp(Op, DAG);
7690 case ISD::VP_SETCC:
7691 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7692 return SplitVPOp(Op, DAG);
7693 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7694 return lowerVPSetCCMaskOp(Op, DAG);
7695 [[fallthrough]];
7696 case ISD::VP_SMIN:
7697 case ISD::VP_SMAX:
7698 case ISD::VP_UMIN:
7699 case ISD::VP_UMAX:
7700 case ISD::VP_BITREVERSE:
7701 case ISD::VP_BSWAP:
7702 return lowerVPOp(Op, DAG);
7703 case ISD::VP_CTLZ:
7704 case ISD::VP_CTLZ_ZERO_UNDEF:
7705 if (Subtarget.hasStdExtZvbb())
7706 return lowerVPOp(Op, DAG);
7707 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7708 case ISD::VP_CTTZ:
7709 case ISD::VP_CTTZ_ZERO_UNDEF:
7710 if (Subtarget.hasStdExtZvbb())
7711 return lowerVPOp(Op, DAG);
7712 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7713 case ISD::VP_CTPOP:
7714 return lowerVPOp(Op, DAG);
7715 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7716 return lowerVPStridedLoad(Op, DAG);
7717 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7718 return lowerVPStridedStore(Op, DAG);
7719 case ISD::VP_FCEIL:
7720 case ISD::VP_FFLOOR:
7721 case ISD::VP_FRINT:
7722 case ISD::VP_FNEARBYINT:
7723 case ISD::VP_FROUND:
7724 case ISD::VP_FROUNDEVEN:
7725 case ISD::VP_FROUNDTOZERO:
7726 if (isPromotedOpNeedingSplit(Op, Subtarget))
7727 return SplitVPOp(Op, DAG);
7728 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7729 case ISD::VP_FMAXIMUM:
7730 case ISD::VP_FMINIMUM:
7731 if (isPromotedOpNeedingSplit(Op, Subtarget))
7732 return SplitVPOp(Op, DAG);
7733 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7734 case ISD::EXPERIMENTAL_VP_SPLICE:
7735 return lowerVPSpliceExperimental(Op, DAG);
7736 case ISD::EXPERIMENTAL_VP_REVERSE:
7737 return lowerVPReverseExperimental(Op, DAG);
7738 case ISD::EXPERIMENTAL_VP_SPLAT:
7739 return lowerVPSplatExperimental(Op, DAG);
7740 case ISD::CLEAR_CACHE: {
7741 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7742 "llvm.clear_cache only needs custom lower on Linux targets");
7743 SDLoc DL(Op);
7744 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7745 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7746 Op.getOperand(2), Flags, DL);
7747 }
7749 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7751 return lowerINIT_TRAMPOLINE(Op, DAG);
7753 return lowerADJUST_TRAMPOLINE(Op, DAG);
7754 }
7755}
7756
7757SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7758 SDValue Start, SDValue End,
7759 SDValue Flags, SDLoc DL) const {
7760 MakeLibCallOptions CallOptions;
7761 std::pair<SDValue, SDValue> CallResult =
7762 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7763 {Start, End, Flags}, CallOptions, DL, InChain);
7764
7765 // This function returns void so only the out chain matters.
7766 return CallResult.second;
7767}
7768
7769SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
7770 SelectionDAG &DAG) const {
7771 if (!Subtarget.is64Bit())
7772 llvm::report_fatal_error("Trampolines only implemented for RV64");
7773
7774 // Create an MCCodeEmitter to encode instructions.
7776 assert(TLO);
7777 MCContext &MCCtx = TLO->getContext();
7778
7779 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7780 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7781
7782 SDValue Root = Op.getOperand(0);
7783 SDValue Trmp = Op.getOperand(1); // trampoline
7784 SDLoc dl(Op);
7785
7786 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7787
7788 // We store in the trampoline buffer the following instructions and data.
7789 // Offset:
7790 // 0: auipc t2, 0
7791 // 4: ld t0, 24(t2)
7792 // 8: ld t2, 16(t2)
7793 // 12: jalr t0
7794 // 16: <StaticChainOffset>
7795 // 24: <FunctionAddressOffset>
7796 // 32:
7797
7798 constexpr unsigned StaticChainOffset = 16;
7799 constexpr unsigned FunctionAddressOffset = 24;
7800
7802 assert(STI);
7803 auto GetEncoding = [&](const MCInst &MC) {
7806 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7807 uint32_t Encoding = support::endian::read32le(CB.data());
7808 return Encoding;
7809 };
7810
7811 SDValue OutChains[6];
7812
7813 uint32_t Encodings[] = {
7814 // auipc t2, 0
7815 // Loads the current PC into t2.
7816 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7817 // ld t0, 24(t2)
7818 // Loads the function address into t0. Note that we are using offsets
7819 // pc-relative to the first instruction of the trampoline.
7820 GetEncoding(
7821 MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7822 FunctionAddressOffset)),
7823 // ld t2, 16(t2)
7824 // Load the value of the static chain.
7825 GetEncoding(
7826 MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7827 StaticChainOffset)),
7828 // jalr t0
7829 // Jump to the function.
7830 GetEncoding(MCInstBuilder(RISCV::JALR)
7831 .addReg(RISCV::X0)
7832 .addReg(RISCV::X5)
7833 .addImm(0))};
7834
7835 // Store encoded instructions.
7836 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
7837 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7838 DAG.getConstant(Idx * 4, dl, MVT::i64))
7839 : Trmp;
7840 OutChains[Idx] = DAG.getTruncStore(
7841 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
7842 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32);
7843 }
7844
7845 // Now store the variable part of the trampoline.
7846 SDValue FunctionAddress = Op.getOperand(2);
7847 SDValue StaticChain = Op.getOperand(3);
7848
7849 // Store the given static chain and function pointer in the trampoline buffer.
7850 struct OffsetValuePair {
7851 const unsigned Offset;
7852 const SDValue Value;
7853 SDValue Addr = SDValue(); // Used to cache the address.
7854 } OffsetValues[] = {
7855 {StaticChainOffset, StaticChain},
7856 {FunctionAddressOffset, FunctionAddress},
7857 };
7858 for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) {
7859 SDValue Addr =
7860 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7861 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7862 OffsetValue.Addr = Addr;
7863 OutChains[Idx + 4] =
7864 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
7865 MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7866 }
7867
7868 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7869
7870 // The end of instructions of trampoline is the same as the static chain
7871 // address that we computed earlier.
7872 SDValue EndOfTrmp = OffsetValues[0].Addr;
7873
7874 // Call clear cache on the trampoline instructions.
7875 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
7876 Trmp, EndOfTrmp);
7877
7878 return Chain;
7879}
7880
7881SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
7882 SelectionDAG &DAG) const {
7883 if (!Subtarget.is64Bit())
7884 llvm::report_fatal_error("Trampolines only implemented for RV64");
7885
7886 return Op.getOperand(0);
7887}
7888
7890 SelectionDAG &DAG, unsigned Flags) {
7891 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7892}
7893
7895 SelectionDAG &DAG, unsigned Flags) {
7896 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7897 Flags);
7898}
7899
7901 SelectionDAG &DAG, unsigned Flags) {
7902 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7903 N->getOffset(), Flags);
7904}
7905
7907 SelectionDAG &DAG, unsigned Flags) {
7908 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7909}
7910
7912 EVT Ty, SelectionDAG &DAG) {
7914 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7915 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7916 return DAG.getLoad(
7917 Ty, DL, DAG.getEntryNode(), LC,
7919}
7920
7922 EVT Ty, SelectionDAG &DAG) {
7924 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
7925 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7926 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7927 return DAG.getLoad(
7928 Ty, DL, DAG.getEntryNode(), LC,
7930}
7931
7932template <class NodeTy>
7933SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7934 bool IsLocal, bool IsExternWeak) const {
7935 SDLoc DL(N);
7936 EVT Ty = getPointerTy(DAG.getDataLayout());
7937
7938 // When HWASAN is used and tagging of global variables is enabled
7939 // they should be accessed via the GOT, since the tagged address of a global
7940 // is incompatible with existing code models. This also applies to non-pic
7941 // mode.
7942 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7943 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7944 if (IsLocal && !Subtarget.allowTaggedGlobals())
7945 // Use PC-relative addressing to access the symbol. This generates the
7946 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7947 // %pcrel_lo(auipc)).
7948 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7949
7950 // Use PC-relative addressing to access the GOT for this symbol, then load
7951 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7952 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7953 SDValue Load =
7954 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7960 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7961 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7962 return Load;
7963 }
7964
7965 switch (getTargetMachine().getCodeModel()) {
7966 default:
7967 report_fatal_error("Unsupported code model for lowering");
7968 case CodeModel::Small: {
7969 // Generate a sequence for accessing addresses within the first 2 GiB of
7970 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7971 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7972 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7973 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7974 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7975 }
7976 case CodeModel::Medium: {
7977 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7978 if (IsExternWeak) {
7979 // An extern weak symbol may be undefined, i.e. have value 0, which may
7980 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7981 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7982 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7983 SDValue Load =
7984 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7990 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7991 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7992 return Load;
7993 }
7994
7995 // Generate a sequence for accessing addresses within any 2GiB range within
7996 // the address space. This generates the pattern (PseudoLLA sym), which
7997 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7998 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7999 }
8000 case CodeModel::Large: {
8001 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8002 return getLargeGlobalAddress(G, DL, Ty, DAG);
8003
8004 // Using pc-relative mode for other node type.
8005 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8006 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8007 }
8008 }
8009}
8010
8011SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8012 SelectionDAG &DAG) const {
8013 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8014 assert(N->getOffset() == 0 && "unexpected offset in global node");
8015 const GlobalValue *GV = N->getGlobal();
8016 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8017}
8018
8019SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8020 SelectionDAG &DAG) const {
8021 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8022
8023 return getAddr(N, DAG);
8024}
8025
8026SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8027 SelectionDAG &DAG) const {
8028 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8029
8030 return getAddr(N, DAG);
8031}
8032
8033SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8034 SelectionDAG &DAG) const {
8035 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8036
8037 return getAddr(N, DAG);
8038}
8039
8040SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8041 SelectionDAG &DAG,
8042 bool UseGOT) const {
8043 SDLoc DL(N);
8044 EVT Ty = getPointerTy(DAG.getDataLayout());
8045 const GlobalValue *GV = N->getGlobal();
8046 MVT XLenVT = Subtarget.getXLenVT();
8047
8048 if (UseGOT) {
8049 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8050 // load the address from the GOT and add the thread pointer. This generates
8051 // the pattern (PseudoLA_TLS_IE sym), which expands to
8052 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8053 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8054 SDValue Load =
8055 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8061 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8062 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8063
8064 // Add the thread pointer.
8065 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8066 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8067 }
8068
8069 // Generate a sequence for accessing the address relative to the thread
8070 // pointer, with the appropriate adjustment for the thread pointer offset.
8071 // This generates the pattern
8072 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8073 SDValue AddrHi =
8075 SDValue AddrAdd =
8077 SDValue AddrLo =
8079
8080 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8081 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8082 SDValue MNAdd =
8083 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
8084 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
8085}
8086
8087SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8088 SelectionDAG &DAG) const {
8089 SDLoc DL(N);
8090 EVT Ty = getPointerTy(DAG.getDataLayout());
8091 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
8092 const GlobalValue *GV = N->getGlobal();
8093
8094 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8095 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8096 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8097 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8098 SDValue Load =
8099 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
8100
8101 // Prepare argument list to generate call.
8103 ArgListEntry Entry;
8104 Entry.Node = Load;
8105 Entry.Ty = CallTy;
8106 Args.push_back(Entry);
8107
8108 // Setup call to __tls_get_addr.
8110 CLI.setDebugLoc(DL)
8111 .setChain(DAG.getEntryNode())
8112 .setLibCallee(CallingConv::C, CallTy,
8113 DAG.getExternalSymbol("__tls_get_addr", Ty),
8114 std::move(Args));
8115
8116 return LowerCallTo(CLI).first;
8117}
8118
8119SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8120 SelectionDAG &DAG) const {
8121 SDLoc DL(N);
8122 EVT Ty = getPointerTy(DAG.getDataLayout());
8123 const GlobalValue *GV = N->getGlobal();
8124
8125 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8126 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8127 //
8128 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8129 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8130 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8131 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8132 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8133 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8134}
8135
8136SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8137 SelectionDAG &DAG) const {
8138 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8139 assert(N->getOffset() == 0 && "unexpected offset in global node");
8140
8141 if (DAG.getTarget().useEmulatedTLS())
8142 return LowerToTLSEmulatedModel(N, DAG);
8143
8145
8148 report_fatal_error("In GHC calling convention TLS is not supported");
8149
8150 SDValue Addr;
8151 switch (Model) {
8153 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
8154 break;
8156 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
8157 break;
8160 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8161 : getDynamicTLSAddr(N, DAG);
8162 break;
8163 }
8164
8165 return Addr;
8166}
8167
8168// Return true if Val is equal to (setcc LHS, RHS, CC).
8169// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8170// Otherwise, return std::nullopt.
8171static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8172 ISD::CondCode CC, SDValue Val) {
8173 assert(Val->getOpcode() == ISD::SETCC);
8174 SDValue LHS2 = Val.getOperand(0);
8175 SDValue RHS2 = Val.getOperand(1);
8176 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8177
8178 if (LHS == LHS2 && RHS == RHS2) {
8179 if (CC == CC2)
8180 return true;
8181 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8182 return false;
8183 } else if (LHS == RHS2 && RHS == LHS2) {
8185 if (CC == CC2)
8186 return true;
8187 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8188 return false;
8189 }
8190
8191 return std::nullopt;
8192}
8193
8195 const RISCVSubtarget &Subtarget) {
8196 SDValue CondV = N->getOperand(0);
8197 SDValue TrueV = N->getOperand(1);
8198 SDValue FalseV = N->getOperand(2);
8199 MVT VT = N->getSimpleValueType(0);
8200 SDLoc DL(N);
8201
8202 if (!Subtarget.hasConditionalMoveFusion()) {
8203 // (select c, -1, y) -> -c | y
8204 if (isAllOnesConstant(TrueV)) {
8205 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8206 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
8207 }
8208 // (select c, y, -1) -> (c-1) | y
8209 if (isAllOnesConstant(FalseV)) {
8210 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8211 DAG.getAllOnesConstant(DL, VT));
8212 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
8213 }
8214
8215 // (select c, 0, y) -> (c-1) & y
8216 if (isNullConstant(TrueV)) {
8217 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8218 DAG.getAllOnesConstant(DL, VT));
8219 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
8220 }
8221 // (select c, y, 0) -> -c & y
8222 if (isNullConstant(FalseV)) {
8223 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8224 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
8225 }
8226 }
8227
8228 // select c, ~x, x --> xor -c, x
8229 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8230 const APInt &TrueVal = TrueV->getAsAPIntVal();
8231 const APInt &FalseVal = FalseV->getAsAPIntVal();
8232 if (~TrueVal == FalseVal) {
8233 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8234 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
8235 }
8236 }
8237
8238 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8239 // when both truev and falsev are also setcc.
8240 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8241 FalseV.getOpcode() == ISD::SETCC) {
8242 SDValue LHS = CondV.getOperand(0);
8243 SDValue RHS = CondV.getOperand(1);
8244 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8245
8246 // (select x, x, y) -> x | y
8247 // (select !x, x, y) -> x & y
8248 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
8249 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
8250 DAG.getFreeze(FalseV));
8251 }
8252 // (select x, y, x) -> x & y
8253 // (select !x, y, x) -> x | y
8254 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
8255 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
8256 DAG.getFreeze(TrueV), FalseV);
8257 }
8258 }
8259
8260 return SDValue();
8261}
8262
8263// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8264// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8265// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8266// being `0` or `-1`. In such cases we can replace `select` with `and`.
8267// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8268// than `c0`?
8269static SDValue
8271 const RISCVSubtarget &Subtarget) {
8272 if (Subtarget.hasShortForwardBranchOpt())
8273 return SDValue();
8274
8275 unsigned SelOpNo = 0;
8276 SDValue Sel = BO->getOperand(0);
8277 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
8278 SelOpNo = 1;
8279 Sel = BO->getOperand(1);
8280 }
8281
8282 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
8283 return SDValue();
8284
8285 unsigned ConstSelOpNo = 1;
8286 unsigned OtherSelOpNo = 2;
8287 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8288 ConstSelOpNo = 2;
8289 OtherSelOpNo = 1;
8290 }
8291 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8292 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8293 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8294 return SDValue();
8295
8296 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8297 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8298 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8299 return SDValue();
8300
8301 SDLoc DL(Sel);
8302 EVT VT = BO->getValueType(0);
8303
8304 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8305 if (SelOpNo == 1)
8306 std::swap(NewConstOps[0], NewConstOps[1]);
8307
8308 SDValue NewConstOp =
8309 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
8310 if (!NewConstOp)
8311 return SDValue();
8312
8313 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8314 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8315 return SDValue();
8316
8317 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8318 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8319 if (SelOpNo == 1)
8320 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8321 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
8322
8323 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8324 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8325 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8326}
8327
8328SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
8329 SDValue CondV = Op.getOperand(0);
8330 SDValue TrueV = Op.getOperand(1);
8331 SDValue FalseV = Op.getOperand(2);
8332 SDLoc DL(Op);
8333 MVT VT = Op.getSimpleValueType();
8334 MVT XLenVT = Subtarget.getXLenVT();
8335
8336 // Lower vector SELECTs to VSELECTs by splatting the condition.
8337 if (VT.isVector()) {
8338 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8339 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8340 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8341 }
8342
8343 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8344 // nodes to implement the SELECT. Performing the lowering here allows for
8345 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8346 // sequence or RISCVISD::SELECT_CC node (branch-based select).
8347 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8348 VT.isScalarInteger()) {
8349 // (select c, t, 0) -> (czero_eqz t, c)
8350 if (isNullConstant(FalseV))
8351 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8352 // (select c, 0, f) -> (czero_nez f, c)
8353 if (isNullConstant(TrueV))
8354 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8355
8356 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8357 if (TrueV.getOpcode() == ISD::AND &&
8358 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8359 return DAG.getNode(
8360 ISD::OR, DL, VT, TrueV,
8361 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8362 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8363 if (FalseV.getOpcode() == ISD::AND &&
8364 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8365 return DAG.getNode(
8366 ISD::OR, DL, VT, FalseV,
8367 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8368
8369 // Try some other optimizations before falling back to generic lowering.
8370 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8371 return V;
8372
8373 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8374 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8375 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8376 const APInt &TrueVal = TrueV->getAsAPIntVal();
8377 const APInt &FalseVal = FalseV->getAsAPIntVal();
8378 const int TrueValCost = RISCVMatInt::getIntMatCost(
8379 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8380 const int FalseValCost = RISCVMatInt::getIntMatCost(
8381 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8382 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8383 SDValue LHSVal = DAG.getConstant(
8384 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8385 SDValue RHSVal =
8386 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8387 SDValue CMOV =
8389 DL, VT, LHSVal, CondV);
8390 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8391 }
8392
8393 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8394 // Unless we have the short forward branch optimization.
8395 if (!Subtarget.hasConditionalMoveFusion())
8396 return DAG.getNode(
8397 ISD::OR, DL, VT,
8398 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8399 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8400 }
8401
8402 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8403 return V;
8404
8405 if (Op.hasOneUse()) {
8406 unsigned UseOpc = Op->user_begin()->getOpcode();
8407 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8408 SDNode *BinOp = *Op->user_begin();
8409 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8410 DAG, Subtarget)) {
8411 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8412 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8413 // may return a constant node and cause crash in lowerSELECT.
8414 if (NewSel.getOpcode() == ISD::SELECT)
8415 return lowerSELECT(NewSel, DAG);
8416 return NewSel;
8417 }
8418 }
8419 }
8420
8421 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8422 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8423 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8424 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8425 if (FPTV && FPFV) {
8426 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8427 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8428 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8429 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8430 DAG.getConstant(1, DL, XLenVT));
8431 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8432 }
8433 }
8434
8435 // If the condition is not an integer SETCC which operates on XLenVT, we need
8436 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8437 // (select condv, truev, falsev)
8438 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8439 if (CondV.getOpcode() != ISD::SETCC ||
8440 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8441 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8442 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8443
8444 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8445
8446 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8447 }
8448
8449 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8450 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8451 // advantage of the integer compare+branch instructions. i.e.:
8452 // (select (setcc lhs, rhs, cc), truev, falsev)
8453 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8454 SDValue LHS = CondV.getOperand(0);
8455 SDValue RHS = CondV.getOperand(1);
8456 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8457
8458 // Special case for a select of 2 constants that have a diffence of 1.
8459 // Normally this is done by DAGCombine, but if the select is introduced by
8460 // type legalization or op legalization, we miss it. Restricting to SETLT
8461 // case for now because that is what signed saturating add/sub need.
8462 // FIXME: We don't need the condition to be SETLT or even a SETCC,
8463 // but we would probably want to swap the true/false values if the condition
8464 // is SETGE/SETLE to avoid an XORI.
8465 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8466 CCVal == ISD::SETLT) {
8467 const APInt &TrueVal = TrueV->getAsAPIntVal();
8468 const APInt &FalseVal = FalseV->getAsAPIntVal();
8469 if (TrueVal - 1 == FalseVal)
8470 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8471 if (TrueVal + 1 == FalseVal)
8472 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8473 }
8474
8475 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8476 // 1 < x ? x : 1 -> 0 < x ? x : 1
8477 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8478 RHS == TrueV && LHS == FalseV) {
8479 LHS = DAG.getConstant(0, DL, VT);
8480 // 0 <u x is the same as x != 0.
8481 if (CCVal == ISD::SETULT) {
8482 std::swap(LHS, RHS);
8483 CCVal = ISD::SETNE;
8484 }
8485 }
8486
8487 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8488 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8489 RHS == FalseV) {
8490 RHS = DAG.getConstant(0, DL, VT);
8491 }
8492
8493 SDValue TargetCC = DAG.getCondCode(CCVal);
8494
8495 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8496 // (select (setcc lhs, rhs, CC), constant, falsev)
8497 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8498 std::swap(TrueV, FalseV);
8499 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8500 }
8501
8502 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8503 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8504}
8505
8506SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8507 SDValue CondV = Op.getOperand(1);
8508 SDLoc DL(Op);
8509 MVT XLenVT = Subtarget.getXLenVT();
8510
8511 if (CondV.getOpcode() == ISD::SETCC &&
8512 CondV.getOperand(0).getValueType() == XLenVT) {
8513 SDValue LHS = CondV.getOperand(0);
8514 SDValue RHS = CondV.getOperand(1);
8515 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8516
8517 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8518
8519 SDValue TargetCC = DAG.getCondCode(CCVal);
8520 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8521 LHS, RHS, TargetCC, Op.getOperand(2));
8522 }
8523
8524 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8525 CondV, DAG.getConstant(0, DL, XLenVT),
8526 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8527}
8528
8529SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8532
8533 SDLoc DL(Op);
8534 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8536
8537 // vastart just stores the address of the VarArgsFrameIndex slot into the
8538 // memory location argument.
8539 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8540 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8541 MachinePointerInfo(SV));
8542}
8543
8544SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8545 SelectionDAG &DAG) const {
8546 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8548 MachineFrameInfo &MFI = MF.getFrameInfo();
8549 MFI.setFrameAddressIsTaken(true);
8550 Register FrameReg = RI.getFrameRegister(MF);
8551 int XLenInBytes = Subtarget.getXLen() / 8;
8552
8553 EVT VT = Op.getValueType();
8554 SDLoc DL(Op);
8555 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8556 unsigned Depth = Op.getConstantOperandVal(0);
8557 while (Depth--) {
8558 int Offset = -(XLenInBytes * 2);
8559 SDValue Ptr = DAG.getNode(
8560 ISD::ADD, DL, VT, FrameAddr,
8562 FrameAddr =
8563 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8564 }
8565 return FrameAddr;
8566}
8567
8568SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8569 SelectionDAG &DAG) const {
8570 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8572 MachineFrameInfo &MFI = MF.getFrameInfo();
8573 MFI.setReturnAddressIsTaken(true);
8574 MVT XLenVT = Subtarget.getXLenVT();
8575 int XLenInBytes = Subtarget.getXLen() / 8;
8576
8578 return SDValue();
8579
8580 EVT VT = Op.getValueType();
8581 SDLoc DL(Op);
8582 unsigned Depth = Op.getConstantOperandVal(0);
8583 if (Depth) {
8584 int Off = -XLenInBytes;
8585 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8586 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8587 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8588 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8590 }
8591
8592 // Return the value of the return address register, marking it an implicit
8593 // live-in.
8594 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8595 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8596}
8597
8598SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8599 SelectionDAG &DAG) const {
8600 SDLoc DL(Op);
8601 SDValue Lo = Op.getOperand(0);
8602 SDValue Hi = Op.getOperand(1);
8603 SDValue Shamt = Op.getOperand(2);
8604 EVT VT = Lo.getValueType();
8605
8606 // if Shamt-XLEN < 0: // Shamt < XLEN
8607 // Lo = Lo << Shamt
8608 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8609 // else:
8610 // Lo = 0
8611 // Hi = Lo << (Shamt-XLEN)
8612
8613 SDValue Zero = DAG.getConstant(0, DL, VT);
8614 SDValue One = DAG.getConstant(1, DL, VT);
8615 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8616 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8617 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8618 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8619
8620 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8621 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8622 SDValue ShiftRightLo =
8623 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8624 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8625 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8626 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8627
8628 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8629
8630 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8631 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8632
8633 SDValue Parts[2] = {Lo, Hi};
8634 return DAG.getMergeValues(Parts, DL);
8635}
8636
8637SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8638 bool IsSRA) const {
8639 SDLoc DL(Op);
8640 SDValue Lo = Op.getOperand(0);
8641 SDValue Hi = Op.getOperand(1);
8642 SDValue Shamt = Op.getOperand(2);
8643 EVT VT = Lo.getValueType();
8644
8645 // SRA expansion:
8646 // if Shamt-XLEN < 0: // Shamt < XLEN
8647 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8648 // Hi = Hi >>s Shamt
8649 // else:
8650 // Lo = Hi >>s (Shamt-XLEN);
8651 // Hi = Hi >>s (XLEN-1)
8652 //
8653 // SRL expansion:
8654 // if Shamt-XLEN < 0: // Shamt < XLEN
8655 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8656 // Hi = Hi >>u Shamt
8657 // else:
8658 // Lo = Hi >>u (Shamt-XLEN);
8659 // Hi = 0;
8660
8661 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8662
8663 SDValue Zero = DAG.getConstant(0, DL, VT);
8664 SDValue One = DAG.getConstant(1, DL, VT);
8665 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8666 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8667 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8668 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8669
8670 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8671 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8672 SDValue ShiftLeftHi =
8673 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8674 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8675 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8676 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8677 SDValue HiFalse =
8678 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8679
8680 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8681
8682 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8683 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8684
8685 SDValue Parts[2] = {Lo, Hi};
8686 return DAG.getMergeValues(Parts, DL);
8687}
8688
8689// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8690// legal equivalently-sized i8 type, so we can use that as a go-between.
8691SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8692 SelectionDAG &DAG) const {
8693 SDLoc DL(Op);
8694 MVT VT = Op.getSimpleValueType();
8695 SDValue SplatVal = Op.getOperand(0);
8696 // All-zeros or all-ones splats are handled specially.
8697 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8698 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8699 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8700 }
8701 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8702 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8703 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8704 }
8705 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8706 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8707 DAG.getConstant(1, DL, SplatVal.getValueType()));
8708 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8709 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8710 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8711}
8712
8713// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8714// illegal (currently only vXi64 RV32).
8715// FIXME: We could also catch non-constant sign-extended i32 values and lower
8716// them to VMV_V_X_VL.
8717SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8718 SelectionDAG &DAG) const {
8719 SDLoc DL(Op);
8720 MVT VecVT = Op.getSimpleValueType();
8721 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8722 "Unexpected SPLAT_VECTOR_PARTS lowering");
8723
8724 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8725 SDValue Lo = Op.getOperand(0);
8726 SDValue Hi = Op.getOperand(1);
8727
8728 MVT ContainerVT = VecVT;
8729 if (VecVT.isFixedLengthVector())
8730 ContainerVT = getContainerForFixedLengthVector(VecVT);
8731
8732 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8733
8734 SDValue Res =
8735 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8736
8737 if (VecVT.isFixedLengthVector())
8738 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8739
8740 return Res;
8741}
8742
8743// Custom-lower extensions from mask vectors by using a vselect either with 1
8744// for zero/any-extension or -1 for sign-extension:
8745// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8746// Note that any-extension is lowered identically to zero-extension.
8747SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8748 int64_t ExtTrueVal) const {
8749 SDLoc DL(Op);
8750 MVT VecVT = Op.getSimpleValueType();
8751 SDValue Src = Op.getOperand(0);
8752 // Only custom-lower extensions from mask types
8753 assert(Src.getValueType().isVector() &&
8754 Src.getValueType().getVectorElementType() == MVT::i1);
8755
8756 if (VecVT.isScalableVector()) {
8757 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8758 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8759 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8760 }
8761
8762 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8763 MVT I1ContainerVT =
8764 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8765
8766 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8767
8768 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8769
8770 MVT XLenVT = Subtarget.getXLenVT();
8771 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8772 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8773
8774 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8775 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8776 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8777 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8778 SDValue Select =
8779 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8780 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8781
8782 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8783}
8784
8785SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8786 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8787 MVT ExtVT = Op.getSimpleValueType();
8788 // Only custom-lower extensions from fixed-length vector types.
8789 if (!ExtVT.isFixedLengthVector())
8790 return Op;
8791 MVT VT = Op.getOperand(0).getSimpleValueType();
8792 // Grab the canonical container type for the extended type. Infer the smaller
8793 // type from that to ensure the same number of vector elements, as we know
8794 // the LMUL will be sufficient to hold the smaller type.
8795 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8796 // Get the extended container type manually to ensure the same number of
8797 // vector elements between source and dest.
8798 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8799 ContainerExtVT.getVectorElementCount());
8800
8801 SDValue Op1 =
8802 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8803
8804 SDLoc DL(Op);
8805 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8806
8807 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8808
8809 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8810}
8811
8812// Custom-lower truncations from vectors to mask vectors by using a mask and a
8813// setcc operation:
8814// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8815SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8816 SelectionDAG &DAG) const {
8817 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8818 SDLoc DL(Op);
8819 EVT MaskVT = Op.getValueType();
8820 // Only expect to custom-lower truncations to mask types
8821 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8822 "Unexpected type for vector mask lowering");
8823 SDValue Src = Op.getOperand(0);
8824 MVT VecVT = Src.getSimpleValueType();
8825 SDValue Mask, VL;
8826 if (IsVPTrunc) {
8827 Mask = Op.getOperand(1);
8828 VL = Op.getOperand(2);
8829 }
8830 // If this is a fixed vector, we need to convert it to a scalable vector.
8831 MVT ContainerVT = VecVT;
8832
8833 if (VecVT.isFixedLengthVector()) {
8834 ContainerVT = getContainerForFixedLengthVector(VecVT);
8835 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8836 if (IsVPTrunc) {
8837 MVT MaskContainerVT =
8838 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8839 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8840 }
8841 }
8842
8843 if (!IsVPTrunc) {
8844 std::tie(Mask, VL) =
8845 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8846 }
8847
8848 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8849 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8850
8851 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8852 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8853 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8854 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8855
8856 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8857 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8858 DAG.getUNDEF(ContainerVT), Mask, VL);
8859 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8860 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8861 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8862 if (MaskVT.isFixedLengthVector())
8863 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8864 return Trunc;
8865}
8866
8867SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8868 SelectionDAG &DAG) const {
8869 unsigned Opc = Op.getOpcode();
8870 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8871 SDLoc DL(Op);
8872
8873 MVT VT = Op.getSimpleValueType();
8874 // Only custom-lower vector truncates
8875 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8876
8877 // Truncates to mask types are handled differently
8878 if (VT.getVectorElementType() == MVT::i1)
8879 return lowerVectorMaskTruncLike(Op, DAG);
8880
8881 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8882 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8883 // truncate by one power of two at a time.
8884 MVT DstEltVT = VT.getVectorElementType();
8885
8886 SDValue Src = Op.getOperand(0);
8887 MVT SrcVT = Src.getSimpleValueType();
8888 MVT SrcEltVT = SrcVT.getVectorElementType();
8889
8890 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8891 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8892 "Unexpected vector truncate lowering");
8893
8894 MVT ContainerVT = SrcVT;
8895 SDValue Mask, VL;
8896 if (IsVPTrunc) {
8897 Mask = Op.getOperand(1);
8898 VL = Op.getOperand(2);
8899 }
8900 if (SrcVT.isFixedLengthVector()) {
8901 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8902 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8903 if (IsVPTrunc) {
8904 MVT MaskVT = getMaskTypeFor(ContainerVT);
8905 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8906 }
8907 }
8908
8909 SDValue Result = Src;
8910 if (!IsVPTrunc) {
8911 std::tie(Mask, VL) =
8912 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8913 }
8914
8915 unsigned NewOpc;
8916 if (Opc == ISD::TRUNCATE_SSAT_S)
8918 else if (Opc == ISD::TRUNCATE_USAT_U)
8920 else
8922
8923 do {
8924 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8925 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8926 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8927 } while (SrcEltVT != DstEltVT);
8928
8929 if (SrcVT.isFixedLengthVector())
8930 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8931
8932 return Result;
8933}
8934
8935SDValue
8936RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8937 SelectionDAG &DAG) const {
8938 SDLoc DL(Op);
8939 SDValue Chain = Op.getOperand(0);
8940 SDValue Src = Op.getOperand(1);
8941 MVT VT = Op.getSimpleValueType();
8942 MVT SrcVT = Src.getSimpleValueType();
8943 MVT ContainerVT = VT;
8944 if (VT.isFixedLengthVector()) {
8945 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8946 ContainerVT =
8947 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8948 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8949 }
8950
8951 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8952
8953 // RVV can only widen/truncate fp to types double/half the size as the source.
8954 if ((VT.getVectorElementType() == MVT::f64 &&
8955 (SrcVT.getVectorElementType() == MVT::f16 ||
8956 SrcVT.getVectorElementType() == MVT::bf16)) ||
8957 ((VT.getVectorElementType() == MVT::f16 ||
8958 VT.getVectorElementType() == MVT::bf16) &&
8959 SrcVT.getVectorElementType() == MVT::f64)) {
8960 // For double rounding, the intermediate rounding should be round-to-odd.
8961 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8964 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8965 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8966 Chain, Src, Mask, VL);
8967 Chain = Src.getValue(1);
8968 }
8969
8970 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8973 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8974 Chain, Src, Mask, VL);
8975 if (VT.isFixedLengthVector()) {
8976 // StrictFP operations have two result values. Their lowered result should
8977 // have same result count.
8978 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8979 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8980 }
8981 return Res;
8982}
8983
8984SDValue
8985RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8986 SelectionDAG &DAG) const {
8987 bool IsVP =
8988 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8989 bool IsExtend =
8990 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8991 // RVV can only do truncate fp to types half the size as the source. We
8992 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8993 // conversion instruction.
8994 SDLoc DL(Op);
8995 MVT VT = Op.getSimpleValueType();
8996
8997 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8998
8999 SDValue Src = Op.getOperand(0);
9000 MVT SrcVT = Src.getSimpleValueType();
9001
9002 bool IsDirectExtend =
9003 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
9004 (SrcVT.getVectorElementType() != MVT::f16 &&
9005 SrcVT.getVectorElementType() != MVT::bf16));
9006 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
9007 VT.getVectorElementType() != MVT::bf16) ||
9008 SrcVT.getVectorElementType() != MVT::f64);
9009
9010 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
9011
9012 // Prepare any fixed-length vector operands.
9013 MVT ContainerVT = VT;
9014 SDValue Mask, VL;
9015 if (IsVP) {
9016 Mask = Op.getOperand(1);
9017 VL = Op.getOperand(2);
9018 }
9019 if (VT.isFixedLengthVector()) {
9020 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9021 ContainerVT =
9022 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9023 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9024 if (IsVP) {
9025 MVT MaskVT = getMaskTypeFor(ContainerVT);
9026 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9027 }
9028 }
9029
9030 if (!IsVP)
9031 std::tie(Mask, VL) =
9032 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9033
9034 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
9035
9036 if (IsDirectConv) {
9037 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
9038 if (VT.isFixedLengthVector())
9039 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
9040 return Src;
9041 }
9042
9043 unsigned InterConvOpc =
9045
9046 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9047 SDValue IntermediateConv =
9048 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
9049 SDValue Result =
9050 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
9051 if (VT.isFixedLengthVector())
9052 return convertFromScalableVector(VT, Result, DAG, Subtarget);
9053 return Result;
9054}
9055
9056// Given a scalable vector type and an index into it, returns the type for the
9057// smallest subvector that the index fits in. This can be used to reduce LMUL
9058// for operations like vslidedown.
9059//
9060// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
9061static std::optional<MVT>
9062getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
9063 const RISCVSubtarget &Subtarget) {
9064 assert(VecVT.isScalableVector());
9065 const unsigned EltSize = VecVT.getScalarSizeInBits();
9066 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
9067 const unsigned MinVLMAX = VectorBitsMin / EltSize;
9068 MVT SmallerVT;
9069 if (MaxIdx < MinVLMAX)
9070 SmallerVT = getLMUL1VT(VecVT);
9071 else if (MaxIdx < MinVLMAX * 2)
9072 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
9073 else if (MaxIdx < MinVLMAX * 4)
9074 SmallerVT = getLMUL1VT(VecVT)
9077 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
9078 return std::nullopt;
9079 return SmallerVT;
9080}
9081
9082// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
9083// first position of a vector, and that vector is slid up to the insert index.
9084// By limiting the active vector length to index+1 and merging with the
9085// original vector (with an undisturbed tail policy for elements >= VL), we
9086// achieve the desired result of leaving all elements untouched except the one
9087// at VL-1, which is replaced with the desired value.
9088SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
9089 SelectionDAG &DAG) const {
9090 SDLoc DL(Op);
9091 MVT VecVT = Op.getSimpleValueType();
9092 MVT XLenVT = Subtarget.getXLenVT();
9093 SDValue Vec = Op.getOperand(0);
9094 SDValue Val = Op.getOperand(1);
9095 MVT ValVT = Val.getSimpleValueType();
9096 SDValue Idx = Op.getOperand(2);
9097
9098 if (VecVT.getVectorElementType() == MVT::i1) {
9099 // FIXME: For now we just promote to an i8 vector and insert into that,
9100 // but this is probably not optimal.
9101 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9102 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9103 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
9104 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
9105 }
9106
9107 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9108 ValVT == MVT::bf16) {
9109 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9110 MVT IntVT = VecVT.changeTypeToInteger();
9111 SDValue IntInsert = DAG.getNode(
9112 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
9113 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
9114 return DAG.getBitcast(VecVT, IntInsert);
9115 }
9116
9117 MVT ContainerVT = VecVT;
9118 // If the operand is a fixed-length vector, convert to a scalable one.
9119 if (VecVT.isFixedLengthVector()) {
9120 ContainerVT = getContainerForFixedLengthVector(VecVT);
9121 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9122 }
9123
9124 // If we know the index we're going to insert at, we can shrink Vec so that
9125 // we're performing the scalar inserts and slideup on a smaller LMUL.
9126 MVT OrigContainerVT = ContainerVT;
9127 SDValue OrigVec = Vec;
9128 SDValue AlignedIdx;
9129 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9130 const unsigned OrigIdx = IdxC->getZExtValue();
9131 // Do we know an upper bound on LMUL?
9132 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
9133 DL, DAG, Subtarget)) {
9134 ContainerVT = *ShrunkVT;
9135 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
9136 }
9137
9138 // If we're compiling for an exact VLEN value, we can always perform
9139 // the insert in m1 as we can determine the register corresponding to
9140 // the index in the register group.
9141 const MVT M1VT = getLMUL1VT(ContainerVT);
9142 if (auto VLEN = Subtarget.getRealVLen();
9143 VLEN && ContainerVT.bitsGT(M1VT)) {
9144 EVT ElemVT = VecVT.getVectorElementType();
9145 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9146 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9147 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9148 unsigned ExtractIdx =
9149 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9150 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
9151 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9152 ContainerVT = M1VT;
9153 }
9154
9155 if (AlignedIdx)
9156 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9157 AlignedIdx);
9158 }
9159
9160 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9161 // Even i64-element vectors on RV32 can be lowered without scalar
9162 // legalization if the most-significant 32 bits of the value are not affected
9163 // by the sign-extension of the lower 32 bits.
9164 // TODO: We could also catch sign extensions of a 32-bit value.
9165 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9166 const auto *CVal = cast<ConstantSDNode>(Val);
9167 if (isInt<32>(CVal->getSExtValue())) {
9168 IsLegalInsert = true;
9169 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
9170 }
9171 }
9172
9173 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9174
9175 SDValue ValInVec;
9176
9177 if (IsLegalInsert) {
9178 unsigned Opc =
9180 if (isNullConstant(Idx)) {
9181 if (!VecVT.isFloatingPoint())
9182 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
9183 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
9184
9185 if (AlignedIdx)
9186 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9187 Vec, AlignedIdx);
9188 if (!VecVT.isFixedLengthVector())
9189 return Vec;
9190 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9191 }
9192 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
9193 } else {
9194 // On RV32, i64-element vectors must be specially handled to place the
9195 // value at element 0, by using two vslide1down instructions in sequence on
9196 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9197 // this.
9198 SDValue ValLo, ValHi;
9199 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
9200 MVT I32ContainerVT =
9201 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9202 SDValue I32Mask =
9203 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
9204 // Limit the active VL to two.
9205 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
9206 // If the Idx is 0 we can insert directly into the vector.
9207 if (isNullConstant(Idx)) {
9208 // First slide in the lo value, then the hi in above it. We use slide1down
9209 // to avoid the register group overlap constraint of vslide1up.
9210 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9211 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9212 // If the source vector is undef don't pass along the tail elements from
9213 // the previous slide1down.
9214 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
9215 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9216 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9217 // Bitcast back to the right container type.
9218 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9219
9220 if (AlignedIdx)
9221 ValInVec =
9222 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9223 ValInVec, AlignedIdx);
9224 if (!VecVT.isFixedLengthVector())
9225 return ValInVec;
9226 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9227 }
9228
9229 // First slide in the lo value, then the hi in above it. We use slide1down
9230 // to avoid the register group overlap constraint of vslide1up.
9231 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9232 DAG.getUNDEF(I32ContainerVT),
9233 DAG.getUNDEF(I32ContainerVT), ValLo,
9234 I32Mask, InsertI64VL);
9235 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9236 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9237 I32Mask, InsertI64VL);
9238 // Bitcast back to the right container type.
9239 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9240 }
9241
9242 // Now that the value is in a vector, slide it into position.
9243 SDValue InsertVL =
9244 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
9245
9246 // Use tail agnostic policy if Idx is the last index of Vec.
9248 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9249 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9250 Policy = RISCVII::TAIL_AGNOSTIC;
9251 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
9252 Idx, Mask, InsertVL, Policy);
9253
9254 if (AlignedIdx)
9255 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9256 Slideup, AlignedIdx);
9257 if (!VecVT.isFixedLengthVector())
9258 return Slideup;
9259 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9260}
9261
9262// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9263// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9264// types this is done using VMV_X_S to allow us to glean information about the
9265// sign bits of the result.
9266SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
9267 SelectionDAG &DAG) const {
9268 SDLoc DL(Op);
9269 SDValue Idx = Op.getOperand(1);
9270 SDValue Vec = Op.getOperand(0);
9271 EVT EltVT = Op.getValueType();
9272 MVT VecVT = Vec.getSimpleValueType();
9273 MVT XLenVT = Subtarget.getXLenVT();
9274
9275 if (VecVT.getVectorElementType() == MVT::i1) {
9276 // Use vfirst.m to extract the first bit.
9277 if (isNullConstant(Idx)) {
9278 MVT ContainerVT = VecVT;
9279 if (VecVT.isFixedLengthVector()) {
9280 ContainerVT = getContainerForFixedLengthVector(VecVT);
9281 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9282 }
9283 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9284 SDValue Vfirst =
9285 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
9286 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9287 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9288 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9289 }
9290 if (VecVT.isFixedLengthVector()) {
9291 unsigned NumElts = VecVT.getVectorNumElements();
9292 if (NumElts >= 8) {
9293 MVT WideEltVT;
9294 unsigned WidenVecLen;
9295 SDValue ExtractElementIdx;
9296 SDValue ExtractBitIdx;
9297 unsigned MaxEEW = Subtarget.getELen();
9298 MVT LargestEltVT = MVT::getIntegerVT(
9299 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
9300 if (NumElts <= LargestEltVT.getSizeInBits()) {
9301 assert(isPowerOf2_32(NumElts) &&
9302 "the number of elements should be power of 2");
9303 WideEltVT = MVT::getIntegerVT(NumElts);
9304 WidenVecLen = 1;
9305 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
9306 ExtractBitIdx = Idx;
9307 } else {
9308 WideEltVT = LargestEltVT;
9309 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9310 // extract element index = index / element width
9311 ExtractElementIdx = DAG.getNode(
9312 ISD::SRL, DL, XLenVT, Idx,
9313 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
9314 // mask bit index = index % element width
9315 ExtractBitIdx = DAG.getNode(
9316 ISD::AND, DL, XLenVT, Idx,
9317 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
9318 }
9319 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
9320 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
9321 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
9322 Vec, ExtractElementIdx);
9323 // Extract the bit from GPR.
9324 SDValue ShiftRight =
9325 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
9326 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
9327 DAG.getConstant(1, DL, XLenVT));
9328 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9329 }
9330 }
9331 // Otherwise, promote to an i8 vector and extract from that.
9332 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9333 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9334 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
9335 }
9336
9337 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9338 EltVT == MVT::bf16) {
9339 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9340 MVT IntVT = VecVT.changeTypeToInteger();
9341 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9342 SDValue IntExtract =
9343 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9344 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9345 }
9346
9347 // If this is a fixed vector, we need to convert it to a scalable vector.
9348 MVT ContainerVT = VecVT;
9349 if (VecVT.isFixedLengthVector()) {
9350 ContainerVT = getContainerForFixedLengthVector(VecVT);
9351 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9352 }
9353
9354 // If we're compiling for an exact VLEN value and we have a known
9355 // constant index, we can always perform the extract in m1 (or
9356 // smaller) as we can determine the register corresponding to
9357 // the index in the register group.
9358 const auto VLen = Subtarget.getRealVLen();
9359 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9360 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9361 MVT M1VT = getLMUL1VT(ContainerVT);
9362 unsigned OrigIdx = IdxC->getZExtValue();
9363 EVT ElemVT = VecVT.getVectorElementType();
9364 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9365 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9366 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9367 unsigned ExtractIdx =
9368 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9369 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9370 DAG.getVectorIdxConstant(ExtractIdx, DL));
9371 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9372 ContainerVT = M1VT;
9373 }
9374
9375 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9376 // contains our index.
9377 std::optional<uint64_t> MaxIdx;
9378 if (VecVT.isFixedLengthVector())
9379 MaxIdx = VecVT.getVectorNumElements() - 1;
9380 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9381 MaxIdx = IdxC->getZExtValue();
9382 if (MaxIdx) {
9383 if (auto SmallerVT =
9384 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9385 ContainerVT = *SmallerVT;
9386 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9387 DAG.getConstant(0, DL, XLenVT));
9388 }
9389 }
9390
9391 // If after narrowing, the required slide is still greater than LMUL2,
9392 // fallback to generic expansion and go through the stack. This is done
9393 // for a subtle reason: extracting *all* elements out of a vector is
9394 // widely expected to be linear in vector size, but because vslidedown
9395 // is linear in LMUL, performing N extracts using vslidedown becomes
9396 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9397 // seems to have the same problem (the store is linear in LMUL), but the
9398 // generic expansion *memoizes* the store, and thus for many extracts of
9399 // the same vector we end up with one store and a bunch of loads.
9400 // TODO: We don't have the same code for insert_vector_elt because we
9401 // have BUILD_VECTOR and handle the degenerate case there. Should we
9402 // consider adding an inverse BUILD_VECTOR node?
9403 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9404 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9405 return SDValue();
9406
9407 // If the index is 0, the vector is already in the right position.
9408 if (!isNullConstant(Idx)) {
9409 // Use a VL of 1 to avoid processing more elements than we need.
9410 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9411 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9412 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9413 }
9414
9415 if (!EltVT.isInteger()) {
9416 // Floating-point extracts are handled in TableGen.
9417 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9418 DAG.getVectorIdxConstant(0, DL));
9419 }
9420
9421 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9422 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9423}
9424
9425// Some RVV intrinsics may claim that they want an integer operand to be
9426// promoted or expanded.
9428 const RISCVSubtarget &Subtarget) {
9429 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9430 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9431 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9432 "Unexpected opcode");
9433
9434 if (!Subtarget.hasVInstructions())
9435 return SDValue();
9436
9437 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9438 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9439 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9440
9441 SDLoc DL(Op);
9442
9444 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9445 if (!II || !II->hasScalarOperand())
9446 return SDValue();
9447
9448 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9449 assert(SplatOp < Op.getNumOperands());
9450
9452 SDValue &ScalarOp = Operands[SplatOp];
9453 MVT OpVT = ScalarOp.getSimpleValueType();
9454 MVT XLenVT = Subtarget.getXLenVT();
9455
9456 // If this isn't a scalar, or its type is XLenVT we're done.
9457 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9458 return SDValue();
9459
9460 // Simplest case is that the operand needs to be promoted to XLenVT.
9461 if (OpVT.bitsLT(XLenVT)) {
9462 // If the operand is a constant, sign extend to increase our chances
9463 // of being able to use a .vi instruction. ANY_EXTEND would become a
9464 // a zero extend and the simm5 check in isel would fail.
9465 // FIXME: Should we ignore the upper bits in isel instead?
9466 unsigned ExtOpc =
9467 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9468 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9469 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9470 }
9471
9472 // Use the previous operand to get the vXi64 VT. The result might be a mask
9473 // VT for compares. Using the previous operand assumes that the previous
9474 // operand will never have a smaller element size than a scalar operand and
9475 // that a widening operation never uses SEW=64.
9476 // NOTE: If this fails the below assert, we can probably just find the
9477 // element count from any operand or result and use it to construct the VT.
9478 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9479 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9480
9481 // The more complex case is when the scalar is larger than XLenVT.
9482 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9483 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9484
9485 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9486 // instruction to sign-extend since SEW>XLEN.
9487 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9488 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9489 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9490 }
9491
9492 switch (IntNo) {
9493 case Intrinsic::riscv_vslide1up:
9494 case Intrinsic::riscv_vslide1down:
9495 case Intrinsic::riscv_vslide1up_mask:
9496 case Intrinsic::riscv_vslide1down_mask: {
9497 // We need to special case these when the scalar is larger than XLen.
9498 unsigned NumOps = Op.getNumOperands();
9499 bool IsMasked = NumOps == 7;
9500
9501 // Convert the vector source to the equivalent nxvXi32 vector.
9502 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9503 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9504 SDValue ScalarLo, ScalarHi;
9505 std::tie(ScalarLo, ScalarHi) =
9506 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9507
9508 // Double the VL since we halved SEW.
9509 SDValue AVL = getVLOperand(Op);
9510 SDValue I32VL;
9511
9512 // Optimize for constant AVL
9513 if (isa<ConstantSDNode>(AVL)) {
9514 const auto [MinVLMAX, MaxVLMAX] =
9516
9517 uint64_t AVLInt = AVL->getAsZExtVal();
9518 if (AVLInt <= MinVLMAX) {
9519 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9520 } else if (AVLInt >= 2 * MaxVLMAX) {
9521 // Just set vl to VLMAX in this situation
9522 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9523 } else {
9524 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9525 // is related to the hardware implementation.
9526 // So let the following code handle
9527 }
9528 }
9529 if (!I32VL) {
9531 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9532 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9533 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9534 SDValue SETVL =
9535 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9536 // Using vsetvli instruction to get actually used length which related to
9537 // the hardware implementation
9538 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9539 SEW, LMUL);
9540 I32VL =
9541 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9542 }
9543
9544 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9545
9546 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9547 // instructions.
9548 SDValue Passthru;
9549 if (IsMasked)
9550 Passthru = DAG.getUNDEF(I32VT);
9551 else
9552 Passthru = DAG.getBitcast(I32VT, Operands[1]);
9553
9554 if (IntNo == Intrinsic::riscv_vslide1up ||
9555 IntNo == Intrinsic::riscv_vslide1up_mask) {
9556 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9557 ScalarHi, I32Mask, I32VL);
9558 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9559 ScalarLo, I32Mask, I32VL);
9560 } else {
9561 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9562 ScalarLo, I32Mask, I32VL);
9563 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9564 ScalarHi, I32Mask, I32VL);
9565 }
9566
9567 // Convert back to nxvXi64.
9568 Vec = DAG.getBitcast(VT, Vec);
9569
9570 if (!IsMasked)
9571 return Vec;
9572 // Apply mask after the operation.
9573 SDValue Mask = Operands[NumOps - 3];
9574 SDValue MaskedOff = Operands[1];
9575 // Assume Policy operand is the last operand.
9576 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9577 // We don't need to select maskedoff if it's undef.
9578 if (MaskedOff.isUndef())
9579 return Vec;
9580 // TAMU
9581 if (Policy == RISCVII::TAIL_AGNOSTIC)
9582 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9583 DAG.getUNDEF(VT), AVL);
9584 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9585 // It's fine because vmerge does not care mask policy.
9586 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9587 MaskedOff, AVL);
9588 }
9589 }
9590
9591 // We need to convert the scalar to a splat vector.
9592 SDValue VL = getVLOperand(Op);
9593 assert(VL.getValueType() == XLenVT);
9594 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9595 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9596}
9597
9598// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9599// scalable vector llvm.get.vector.length for now.
9600//
9601// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9602// (vscale * VF). The vscale and VF are independent of element width. We use
9603// SEW=8 for the vsetvli because it is the only element width that supports all
9604// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9605// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9606// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9607// SEW and LMUL are better for the surrounding vector instructions.
9609 const RISCVSubtarget &Subtarget) {
9610 MVT XLenVT = Subtarget.getXLenVT();
9611
9612 // The smallest LMUL is only valid for the smallest element width.
9613 const unsigned ElementWidth = 8;
9614
9615 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9616 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9617 // We don't support VF==1 with ELEN==32.
9618 [[maybe_unused]] unsigned MinVF =
9619 RISCV::RVVBitsPerBlock / Subtarget.getELen();
9620
9621 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9622 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9623 "Unexpected VF");
9624
9625 bool Fractional = VF < LMul1VF;
9626 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9627 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9628 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9629
9630 SDLoc DL(N);
9631
9632 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9633 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9634
9635 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9636
9637 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9638 SDValue Res =
9639 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9640 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9641}
9642
9644 const RISCVSubtarget &Subtarget) {
9645 SDValue Op0 = N->getOperand(1);
9646 MVT OpVT = Op0.getSimpleValueType();
9647 MVT ContainerVT = OpVT;
9648 if (OpVT.isFixedLengthVector()) {
9649 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9650 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9651 }
9652 MVT XLenVT = Subtarget.getXLenVT();
9653 SDLoc DL(N);
9654 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9655 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9656 if (isOneConstant(N->getOperand(2)))
9657 return Res;
9658
9659 // Convert -1 to VL.
9660 SDValue Setcc =
9661 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9662 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9663 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9664}
9665
9666static inline void promoteVCIXScalar(const SDValue &Op,
9668 SelectionDAG &DAG) {
9669 const RISCVSubtarget &Subtarget =
9671
9672 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9673 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9674 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9675 SDLoc DL(Op);
9676
9678 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9679 if (!II || !II->hasScalarOperand())
9680 return;
9681
9682 unsigned SplatOp = II->ScalarOperand + 1;
9683 assert(SplatOp < Op.getNumOperands());
9684
9685 SDValue &ScalarOp = Operands[SplatOp];
9686 MVT OpVT = ScalarOp.getSimpleValueType();
9687 MVT XLenVT = Subtarget.getXLenVT();
9688
9689 // The code below is partially copied from lowerVectorIntrinsicScalars.
9690 // If this isn't a scalar, or its type is XLenVT we're done.
9691 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9692 return;
9693
9694 // Manually emit promote operation for scalar operation.
9695 if (OpVT.bitsLT(XLenVT)) {
9696 unsigned ExtOpc =
9697 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9698 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9699 }
9700}
9701
9702static void processVCIXOperands(SDValue &OrigOp,
9704 SelectionDAG &DAG) {
9705 promoteVCIXScalar(OrigOp, Operands, DAG);
9706 const RISCVSubtarget &Subtarget =
9708 for (SDValue &V : Operands) {
9709 EVT ValType = V.getValueType();
9710 if (ValType.isVector() && ValType.isFloatingPoint()) {
9711 MVT InterimIVT =
9712 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9713 ValType.getVectorElementCount());
9714 V = DAG.getBitcast(InterimIVT, V);
9715 }
9716 if (ValType.isFixedLengthVector()) {
9717 MVT OpContainerVT = getContainerForFixedLengthVector(
9718 DAG, V.getSimpleValueType(), Subtarget);
9719 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9720 }
9721 }
9722}
9723
9724// LMUL * VLEN should be greater than or equal to EGS * SEW
9725static inline bool isValidEGW(int EGS, EVT VT,
9726 const RISCVSubtarget &Subtarget) {
9727 return (Subtarget.getRealMinVLen() *
9729 EGS * VT.getScalarSizeInBits();
9730}
9731
9732SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9733 SelectionDAG &DAG) const {
9734 unsigned IntNo = Op.getConstantOperandVal(0);
9735 SDLoc DL(Op);
9736 MVT XLenVT = Subtarget.getXLenVT();
9737
9738 switch (IntNo) {
9739 default:
9740 break; // Don't custom lower most intrinsics.
9741 case Intrinsic::riscv_tuple_insert: {
9742 SDValue Vec = Op.getOperand(1);
9743 SDValue SubVec = Op.getOperand(2);
9744 SDValue Index = Op.getOperand(3);
9745
9746 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9747 SubVec, Index);
9748 }
9749 case Intrinsic::riscv_tuple_extract: {
9750 SDValue Vec = Op.getOperand(1);
9751 SDValue Index = Op.getOperand(2);
9752
9753 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9754 Index);
9755 }
9756 case Intrinsic::thread_pointer: {
9757 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9758 return DAG.getRegister(RISCV::X4, PtrVT);
9759 }
9760 case Intrinsic::riscv_orc_b:
9761 case Intrinsic::riscv_brev8:
9762 case Intrinsic::riscv_sha256sig0:
9763 case Intrinsic::riscv_sha256sig1:
9764 case Intrinsic::riscv_sha256sum0:
9765 case Intrinsic::riscv_sha256sum1:
9766 case Intrinsic::riscv_sm3p0:
9767 case Intrinsic::riscv_sm3p1: {
9768 unsigned Opc;
9769 switch (IntNo) {
9770 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9771 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9772 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9773 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9774 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9775 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9776 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9777 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9778 }
9779
9780 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9781 }
9782 case Intrinsic::riscv_sm4ks:
9783 case Intrinsic::riscv_sm4ed: {
9784 unsigned Opc =
9785 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9786
9787 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9788 Op.getOperand(3));
9789 }
9790 case Intrinsic::riscv_zip:
9791 case Intrinsic::riscv_unzip: {
9792 unsigned Opc =
9793 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9794 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9795 }
9796 case Intrinsic::riscv_mopr:
9797 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9798 Op.getOperand(2));
9799
9800 case Intrinsic::riscv_moprr: {
9801 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9802 Op.getOperand(2), Op.getOperand(3));
9803 }
9804 case Intrinsic::riscv_clmul:
9805 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9806 Op.getOperand(2));
9807 case Intrinsic::riscv_clmulh:
9808 case Intrinsic::riscv_clmulr: {
9809 unsigned Opc =
9810 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9811 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9812 }
9813 case Intrinsic::experimental_get_vector_length:
9814 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9815 case Intrinsic::experimental_cttz_elts:
9816 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9817 case Intrinsic::riscv_vmv_x_s: {
9818 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9819 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9820 }
9821 case Intrinsic::riscv_vfmv_f_s:
9822 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9823 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9824 case Intrinsic::riscv_vmv_v_x:
9825 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9826 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9827 Subtarget);
9828 case Intrinsic::riscv_vfmv_v_f:
9829 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9830 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9831 case Intrinsic::riscv_vmv_s_x: {
9832 SDValue Scalar = Op.getOperand(2);
9833
9834 if (Scalar.getValueType().bitsLE(XLenVT)) {
9835 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9836 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9837 Op.getOperand(1), Scalar, Op.getOperand(3));
9838 }
9839
9840 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9841
9842 // This is an i64 value that lives in two scalar registers. We have to
9843 // insert this in a convoluted way. First we build vXi64 splat containing
9844 // the two values that we assemble using some bit math. Next we'll use
9845 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9846 // to merge element 0 from our splat into the source vector.
9847 // FIXME: This is probably not the best way to do this, but it is
9848 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9849 // point.
9850 // sw lo, (a0)
9851 // sw hi, 4(a0)
9852 // vlse vX, (a0)
9853 //
9854 // vid.v vVid
9855 // vmseq.vx mMask, vVid, 0
9856 // vmerge.vvm vDest, vSrc, vVal, mMask
9857 MVT VT = Op.getSimpleValueType();
9858 SDValue Vec = Op.getOperand(1);
9859 SDValue VL = getVLOperand(Op);
9860
9861 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9862 if (Op.getOperand(1).isUndef())
9863 return SplattedVal;
9864 SDValue SplattedIdx =
9865 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9866 DAG.getConstant(0, DL, MVT::i32), VL);
9867
9868 MVT MaskVT = getMaskTypeFor(VT);
9869 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9870 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9871 SDValue SelectCond =
9872 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9873 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9874 DAG.getUNDEF(MaskVT), Mask, VL});
9875 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9876 Vec, DAG.getUNDEF(VT), VL);
9877 }
9878 case Intrinsic::riscv_vfmv_s_f:
9879 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9880 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9881 // EGS * EEW >= 128 bits
9882 case Intrinsic::riscv_vaesdf_vv:
9883 case Intrinsic::riscv_vaesdf_vs:
9884 case Intrinsic::riscv_vaesdm_vv:
9885 case Intrinsic::riscv_vaesdm_vs:
9886 case Intrinsic::riscv_vaesef_vv:
9887 case Intrinsic::riscv_vaesef_vs:
9888 case Intrinsic::riscv_vaesem_vv:
9889 case Intrinsic::riscv_vaesem_vs:
9890 case Intrinsic::riscv_vaeskf1:
9891 case Intrinsic::riscv_vaeskf2:
9892 case Intrinsic::riscv_vaesz_vs:
9893 case Intrinsic::riscv_vsm4k:
9894 case Intrinsic::riscv_vsm4r_vv:
9895 case Intrinsic::riscv_vsm4r_vs: {
9896 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9897 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9898 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9899 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9900 return Op;
9901 }
9902 // EGS * EEW >= 256 bits
9903 case Intrinsic::riscv_vsm3c:
9904 case Intrinsic::riscv_vsm3me: {
9905 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9906 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9907 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9908 return Op;
9909 }
9910 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9911 case Intrinsic::riscv_vsha2ch:
9912 case Intrinsic::riscv_vsha2cl:
9913 case Intrinsic::riscv_vsha2ms: {
9914 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9915 !Subtarget.hasStdExtZvknhb())
9916 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9917 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9918 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9919 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9920 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9921 return Op;
9922 }
9923 case Intrinsic::riscv_sf_vc_v_x:
9924 case Intrinsic::riscv_sf_vc_v_i:
9925 case Intrinsic::riscv_sf_vc_v_xv:
9926 case Intrinsic::riscv_sf_vc_v_iv:
9927 case Intrinsic::riscv_sf_vc_v_vv:
9928 case Intrinsic::riscv_sf_vc_v_fv:
9929 case Intrinsic::riscv_sf_vc_v_xvv:
9930 case Intrinsic::riscv_sf_vc_v_ivv:
9931 case Intrinsic::riscv_sf_vc_v_vvv:
9932 case Intrinsic::riscv_sf_vc_v_fvv:
9933 case Intrinsic::riscv_sf_vc_v_xvw:
9934 case Intrinsic::riscv_sf_vc_v_ivw:
9935 case Intrinsic::riscv_sf_vc_v_vvw:
9936 case Intrinsic::riscv_sf_vc_v_fvw: {
9937 MVT VT = Op.getSimpleValueType();
9938
9939 SmallVector<SDValue> Operands{Op->op_values()};
9941
9942 MVT RetVT = VT;
9943 if (VT.isFixedLengthVector())
9945 else if (VT.isFloatingPoint())
9948
9949 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9950
9951 if (VT.isFixedLengthVector())
9952 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9953 else if (VT.isFloatingPoint())
9954 NewNode = DAG.getBitcast(VT, NewNode);
9955
9956 if (Op == NewNode)
9957 break;
9958
9959 return NewNode;
9960 }
9961 }
9962
9963 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9964}
9965
9967 unsigned Type) {
9968 SDLoc DL(Op);
9969 SmallVector<SDValue> Operands{Op->op_values()};
9970 Operands.erase(Operands.begin() + 1);
9971
9972 const RISCVSubtarget &Subtarget =
9974 MVT VT = Op.getSimpleValueType();
9975 MVT RetVT = VT;
9976 MVT FloatVT = VT;
9977
9978 if (VT.isFloatingPoint()) {
9979 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9981 FloatVT = RetVT;
9982 }
9983 if (VT.isFixedLengthVector())
9985 Subtarget);
9986
9988
9989 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9990 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9991 SDValue Chain = NewNode.getValue(1);
9992
9993 if (VT.isFixedLengthVector())
9994 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9995 if (VT.isFloatingPoint())
9996 NewNode = DAG.getBitcast(VT, NewNode);
9997
9998 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9999
10000 return NewNode;
10001}
10002
10004 unsigned Type) {
10005 SmallVector<SDValue> Operands{Op->op_values()};
10006 Operands.erase(Operands.begin() + 1);
10008
10009 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
10010}
10011
10012SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
10013 SelectionDAG &DAG) const {
10014 unsigned IntNo = Op.getConstantOperandVal(1);
10015 switch (IntNo) {
10016 default:
10017 break;
10018 case Intrinsic::riscv_seg2_load:
10019 case Intrinsic::riscv_seg3_load:
10020 case Intrinsic::riscv_seg4_load:
10021 case Intrinsic::riscv_seg5_load:
10022 case Intrinsic::riscv_seg6_load:
10023 case Intrinsic::riscv_seg7_load:
10024 case Intrinsic::riscv_seg8_load: {
10025 SDLoc DL(Op);
10026 static const Intrinsic::ID VlsegInts[7] = {
10027 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
10028 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
10029 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
10030 Intrinsic::riscv_vlseg8};
10031 unsigned NF = Op->getNumValues() - 1;
10032 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10033 MVT XLenVT = Subtarget.getXLenVT();
10034 MVT VT = Op->getSimpleValueType(0);
10035 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10036 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10037 ContainerVT.getScalarSizeInBits();
10038 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10039
10040 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10041 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
10042 auto *Load = cast<MemIntrinsicSDNode>(Op);
10043
10044 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
10045 SDValue Ops[] = {
10046 Load->getChain(),
10047 IntID,
10048 DAG.getUNDEF(VecTupTy),
10049 Op.getOperand(2),
10050 VL,
10051 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10052 SDValue Result =
10054 Load->getMemoryVT(), Load->getMemOperand());
10056 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
10057 SDValue SubVec =
10058 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
10059 Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
10060 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
10061 }
10062 Results.push_back(Result.getValue(1));
10063 return DAG.getMergeValues(Results, DL);
10064 }
10065 case Intrinsic::riscv_sf_vc_v_x_se:
10067 case Intrinsic::riscv_sf_vc_v_i_se:
10069 case Intrinsic::riscv_sf_vc_v_xv_se:
10071 case Intrinsic::riscv_sf_vc_v_iv_se:
10073 case Intrinsic::riscv_sf_vc_v_vv_se:
10075 case Intrinsic::riscv_sf_vc_v_fv_se:
10077 case Intrinsic::riscv_sf_vc_v_xvv_se:
10079 case Intrinsic::riscv_sf_vc_v_ivv_se:
10081 case Intrinsic::riscv_sf_vc_v_vvv_se:
10083 case Intrinsic::riscv_sf_vc_v_fvv_se:
10085 case Intrinsic::riscv_sf_vc_v_xvw_se:
10087 case Intrinsic::riscv_sf_vc_v_ivw_se:
10089 case Intrinsic::riscv_sf_vc_v_vvw_se:
10091 case Intrinsic::riscv_sf_vc_v_fvw_se:
10093 }
10094
10095 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10096}
10097
10098SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10099 SelectionDAG &DAG) const {
10100 unsigned IntNo = Op.getConstantOperandVal(1);
10101 switch (IntNo) {
10102 default:
10103 break;
10104 case Intrinsic::riscv_seg2_store:
10105 case Intrinsic::riscv_seg3_store:
10106 case Intrinsic::riscv_seg4_store:
10107 case Intrinsic::riscv_seg5_store:
10108 case Intrinsic::riscv_seg6_store:
10109 case Intrinsic::riscv_seg7_store:
10110 case Intrinsic::riscv_seg8_store: {
10111 SDLoc DL(Op);
10112 static const Intrinsic::ID VssegInts[] = {
10113 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10114 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10115 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10116 Intrinsic::riscv_vsseg8};
10117 // Operands are (chain, int_id, vec*, ptr, vl)
10118 unsigned NF = Op->getNumOperands() - 4;
10119 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10120 MVT XLenVT = Subtarget.getXLenVT();
10121 MVT VT = Op->getOperand(2).getSimpleValueType();
10122 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10123 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10124 ContainerVT.getScalarSizeInBits();
10125 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10126
10127 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10128 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
10129 SDValue Ptr = Op->getOperand(NF + 2);
10130
10131 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10132
10133 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10134 for (unsigned i = 0; i < NF; i++)
10135 StoredVal = DAG.getNode(
10136 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
10138 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10139 DAG.getVectorIdxConstant(i, DL));
10140
10141 SDValue Ops[] = {
10142 FixedIntrinsic->getChain(),
10143 IntID,
10144 StoredVal,
10145 Ptr,
10146 VL,
10147 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10148
10149 return DAG.getMemIntrinsicNode(
10150 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
10151 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10152 }
10153 case Intrinsic::riscv_sf_vc_xv_se:
10155 case Intrinsic::riscv_sf_vc_iv_se:
10157 case Intrinsic::riscv_sf_vc_vv_se:
10159 case Intrinsic::riscv_sf_vc_fv_se:
10161 case Intrinsic::riscv_sf_vc_xvv_se:
10163 case Intrinsic::riscv_sf_vc_ivv_se:
10165 case Intrinsic::riscv_sf_vc_vvv_se:
10167 case Intrinsic::riscv_sf_vc_fvv_se:
10169 case Intrinsic::riscv_sf_vc_xvw_se:
10171 case Intrinsic::riscv_sf_vc_ivw_se:
10173 case Intrinsic::riscv_sf_vc_vvw_se:
10175 case Intrinsic::riscv_sf_vc_fvw_se:
10177 }
10178
10179 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10180}
10181
10182static unsigned getRVVReductionOp(unsigned ISDOpcode) {
10183 switch (ISDOpcode) {
10184 default:
10185 llvm_unreachable("Unhandled reduction");
10186 case ISD::VP_REDUCE_ADD:
10187 case ISD::VECREDUCE_ADD:
10189 case ISD::VP_REDUCE_UMAX:
10192 case ISD::VP_REDUCE_SMAX:
10195 case ISD::VP_REDUCE_UMIN:
10198 case ISD::VP_REDUCE_SMIN:
10201 case ISD::VP_REDUCE_AND:
10202 case ISD::VECREDUCE_AND:
10204 case ISD::VP_REDUCE_OR:
10205 case ISD::VECREDUCE_OR:
10207 case ISD::VP_REDUCE_XOR:
10208 case ISD::VECREDUCE_XOR:
10210 case ISD::VP_REDUCE_FADD:
10212 case ISD::VP_REDUCE_SEQ_FADD:
10214 case ISD::VP_REDUCE_FMAX:
10215 case ISD::VP_REDUCE_FMAXIMUM:
10217 case ISD::VP_REDUCE_FMIN:
10218 case ISD::VP_REDUCE_FMINIMUM:
10220 }
10221
10222}
10223
10224SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
10225 SelectionDAG &DAG,
10226 bool IsVP) const {
10227 SDLoc DL(Op);
10228 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
10229 MVT VecVT = Vec.getSimpleValueType();
10230 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
10231 Op.getOpcode() == ISD::VECREDUCE_OR ||
10232 Op.getOpcode() == ISD::VECREDUCE_XOR ||
10233 Op.getOpcode() == ISD::VP_REDUCE_AND ||
10234 Op.getOpcode() == ISD::VP_REDUCE_OR ||
10235 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10236 "Unexpected reduction lowering");
10237
10238 MVT XLenVT = Subtarget.getXLenVT();
10239
10240 MVT ContainerVT = VecVT;
10241 if (VecVT.isFixedLengthVector()) {
10242 ContainerVT = getContainerForFixedLengthVector(VecVT);
10243 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10244 }
10245
10246 SDValue Mask, VL;
10247 if (IsVP) {
10248 Mask = Op.getOperand(2);
10249 VL = Op.getOperand(3);
10250 } else {
10251 std::tie(Mask, VL) =
10252 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10253 }
10254
10256 switch (Op.getOpcode()) {
10257 default:
10258 llvm_unreachable("Unhandled reduction");
10259 case ISD::VECREDUCE_AND:
10260 case ISD::VP_REDUCE_AND: {
10261 // vcpop ~x == 0
10262 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10263 if (IsVP || VecVT.isFixedLengthVector())
10264 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
10265 else
10266 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
10267 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10268 CC = ISD::SETEQ;
10269 break;
10270 }
10271 case ISD::VECREDUCE_OR:
10272 case ISD::VP_REDUCE_OR:
10273 // vcpop x != 0
10274 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10275 CC = ISD::SETNE;
10276 break;
10277 case ISD::VECREDUCE_XOR:
10278 case ISD::VP_REDUCE_XOR: {
10279 // ((vcpop x) & 1) != 0
10280 SDValue One = DAG.getConstant(1, DL, XLenVT);
10281 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10282 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
10283 CC = ISD::SETNE;
10284 break;
10285 }
10286 }
10287
10288 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10289 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
10290 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
10291
10292 if (!IsVP)
10293 return SetCC;
10294
10295 // Now include the start value in the operation.
10296 // Note that we must return the start value when no elements are operated
10297 // upon. The vcpop instructions we've emitted in each case above will return
10298 // 0 for an inactive vector, and so we've already received the neutral value:
10299 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10300 // can simply include the start value.
10301 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10302 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
10303}
10304
10305static bool isNonZeroAVL(SDValue AVL) {
10306 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10307 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10308 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10309 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10310}
10311
10312/// Helper to lower a reduction sequence of the form:
10313/// scalar = reduce_op vec, scalar_start
10314static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
10315 SDValue StartValue, SDValue Vec, SDValue Mask,
10316 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
10317 const RISCVSubtarget &Subtarget) {
10318 const MVT VecVT = Vec.getSimpleValueType();
10319 const MVT M1VT = getLMUL1VT(VecVT);
10320 const MVT XLenVT = Subtarget.getXLenVT();
10321 const bool NonZeroAVL = isNonZeroAVL(VL);
10322
10323 // The reduction needs an LMUL1 input; do the splat at either LMUL1
10324 // or the original VT if fractional.
10325 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10326 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
10327 // prove it is non-zero. For the AVL=0 case, we need the scalar to
10328 // be the result of the reduction operation.
10329 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
10330 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
10331 DAG, Subtarget);
10332 if (M1VT != InnerVT)
10333 InitialValue =
10334 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
10335 InitialValue, DAG.getVectorIdxConstant(0, DL));
10336 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10337 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10338 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10339 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
10340 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
10341 DAG.getVectorIdxConstant(0, DL));
10342}
10343
10344SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10345 SelectionDAG &DAG) const {
10346 SDLoc DL(Op);
10347 SDValue Vec = Op.getOperand(0);
10348 EVT VecEVT = Vec.getValueType();
10349
10350 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10351
10352 // Due to ordering in legalize types we may have a vector type that needs to
10353 // be split. Do that manually so we can get down to a legal type.
10354 while (getTypeAction(*DAG.getContext(), VecEVT) ==
10356 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10357 VecEVT = Lo.getValueType();
10358 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10359 }
10360
10361 // TODO: The type may need to be widened rather than split. Or widened before
10362 // it can be split.
10363 if (!isTypeLegal(VecEVT))
10364 return SDValue();
10365
10366 MVT VecVT = VecEVT.getSimpleVT();
10367 MVT VecEltVT = VecVT.getVectorElementType();
10368 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10369
10370 MVT ContainerVT = VecVT;
10371 if (VecVT.isFixedLengthVector()) {
10372 ContainerVT = getContainerForFixedLengthVector(VecVT);
10373 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10374 }
10375
10376 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10377
10378 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10379 switch (BaseOpc) {
10380 case ISD::AND:
10381 case ISD::OR:
10382 case ISD::UMAX:
10383 case ISD::UMIN:
10384 case ISD::SMAX:
10385 case ISD::SMIN:
10386 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10387 DAG.getVectorIdxConstant(0, DL));
10388 }
10389 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10390 Mask, VL, DL, DAG, Subtarget);
10391}
10392
10393// Given a reduction op, this function returns the matching reduction opcode,
10394// the vector SDValue and the scalar SDValue required to lower this to a
10395// RISCVISD node.
10396static std::tuple<unsigned, SDValue, SDValue>
10398 const RISCVSubtarget &Subtarget) {
10399 SDLoc DL(Op);
10400 auto Flags = Op->getFlags();
10401 unsigned Opcode = Op.getOpcode();
10402 switch (Opcode) {
10403 default:
10404 llvm_unreachable("Unhandled reduction");
10405 case ISD::VECREDUCE_FADD: {
10406 // Use positive zero if we can. It is cheaper to materialize.
10407 SDValue Zero =
10408 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10409 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10410 }
10412 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10413 Op.getOperand(0));
10417 case ISD::VECREDUCE_FMAX: {
10418 SDValue Front =
10419 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10420 DAG.getVectorIdxConstant(0, DL));
10421 unsigned RVVOpc =
10422 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10425 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10426 }
10427 }
10428}
10429
10430SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10431 SelectionDAG &DAG) const {
10432 SDLoc DL(Op);
10433 MVT VecEltVT = Op.getSimpleValueType();
10434
10435 unsigned RVVOpcode;
10436 SDValue VectorVal, ScalarVal;
10437 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10438 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10439 MVT VecVT = VectorVal.getSimpleValueType();
10440
10441 MVT ContainerVT = VecVT;
10442 if (VecVT.isFixedLengthVector()) {
10443 ContainerVT = getContainerForFixedLengthVector(VecVT);
10444 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10445 }
10446
10447 MVT ResVT = Op.getSimpleValueType();
10448 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10449 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10450 VL, DL, DAG, Subtarget);
10451 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10452 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10453 return Res;
10454
10455 if (Op->getFlags().hasNoNaNs())
10456 return Res;
10457
10458 // Force output to NaN if any element is Nan.
10459 SDValue IsNan =
10460 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10461 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10462 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10463 MVT XLenVT = Subtarget.getXLenVT();
10464 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10465 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10466 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10467 return DAG.getSelect(
10468 DL, ResVT, NoNaNs, Res,
10469 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10470}
10471
10472SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10473 SelectionDAG &DAG) const {
10474 SDLoc DL(Op);
10475 unsigned Opc = Op.getOpcode();
10476 SDValue Start = Op.getOperand(0);
10477 SDValue Vec = Op.getOperand(1);
10478 EVT VecEVT = Vec.getValueType();
10479 MVT XLenVT = Subtarget.getXLenVT();
10480
10481 // TODO: The type may need to be widened rather than split. Or widened before
10482 // it can be split.
10483 if (!isTypeLegal(VecEVT))
10484 return SDValue();
10485
10486 MVT VecVT = VecEVT.getSimpleVT();
10487 unsigned RVVOpcode = getRVVReductionOp(Opc);
10488
10489 if (VecVT.isFixedLengthVector()) {
10490 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10491 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10492 }
10493
10494 SDValue VL = Op.getOperand(3);
10495 SDValue Mask = Op.getOperand(2);
10496 SDValue Res =
10497 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10498 Vec, Mask, VL, DL, DAG, Subtarget);
10499 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10500 Op->getFlags().hasNoNaNs())
10501 return Res;
10502
10503 // Propagate NaNs.
10504 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10505 // Check if any of the elements in Vec is NaN.
10506 SDValue IsNaN = DAG.getNode(
10507 RISCVISD::SETCC_VL, DL, PredVT,
10508 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10509 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10510 // Check if the start value is NaN.
10511 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10512 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10513 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10514 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10515 MVT ResVT = Res.getSimpleValueType();
10516 return DAG.getSelect(
10517 DL, ResVT, NoNaNs, Res,
10518 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10519}
10520
10521SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10522 SelectionDAG &DAG) const {
10523 SDValue Vec = Op.getOperand(0);
10524 SDValue SubVec = Op.getOperand(1);
10525 MVT VecVT = Vec.getSimpleValueType();
10526 MVT SubVecVT = SubVec.getSimpleValueType();
10527
10528 SDLoc DL(Op);
10529 MVT XLenVT = Subtarget.getXLenVT();
10530 unsigned OrigIdx = Op.getConstantOperandVal(2);
10531 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10532
10533 if (OrigIdx == 0 && Vec.isUndef())
10534 return Op;
10535
10536 // We don't have the ability to slide mask vectors up indexed by their i1
10537 // elements; the smallest we can do is i8. Often we are able to bitcast to
10538 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10539 // into a scalable one, we might not necessarily have enough scalable
10540 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10541 if (SubVecVT.getVectorElementType() == MVT::i1) {
10542 if (VecVT.getVectorMinNumElements() >= 8 &&
10543 SubVecVT.getVectorMinNumElements() >= 8) {
10544 assert(OrigIdx % 8 == 0 && "Invalid index");
10545 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10546 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10547 "Unexpected mask vector lowering");
10548 OrigIdx /= 8;
10549 SubVecVT =
10550 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10551 SubVecVT.isScalableVector());
10552 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10553 VecVT.isScalableVector());
10554 Vec = DAG.getBitcast(VecVT, Vec);
10555 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10556 } else {
10557 // We can't slide this mask vector up indexed by its i1 elements.
10558 // This poses a problem when we wish to insert a scalable vector which
10559 // can't be re-expressed as a larger type. Just choose the slow path and
10560 // extend to a larger type, then truncate back down.
10561 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10562 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10563 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10564 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10565 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10566 Op.getOperand(2));
10567 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10568 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10569 }
10570 }
10571
10572 // If the subvector vector is a fixed-length type and we don't know VLEN
10573 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10574 // don't know which register of a LMUL group contains the specific subvector
10575 // as we only know the minimum register size. Therefore we must slide the
10576 // vector group up the full amount.
10577 const auto VLen = Subtarget.getRealVLen();
10578 if (SubVecVT.isFixedLengthVector() && !VLen) {
10579 MVT ContainerVT = VecVT;
10580 if (VecVT.isFixedLengthVector()) {
10581 ContainerVT = getContainerForFixedLengthVector(VecVT);
10582 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10583 }
10584
10585 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10586 DAG.getUNDEF(ContainerVT), SubVec,
10587 DAG.getVectorIdxConstant(0, DL));
10588
10589 SDValue Mask =
10590 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10591 // Set the vector length to only the number of elements we care about. Note
10592 // that for slideup this includes the offset.
10593 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10594 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10595
10596 // Use tail agnostic policy if we're inserting over Vec's tail.
10598 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10599 Policy = RISCVII::TAIL_AGNOSTIC;
10600
10601 // If we're inserting into the lowest elements, use a tail undisturbed
10602 // vmv.v.v.
10603 if (OrigIdx == 0) {
10604 SubVec =
10605 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10606 } else {
10607 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10608 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10609 SlideupAmt, Mask, VL, Policy);
10610 }
10611
10612 if (VecVT.isFixedLengthVector())
10613 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10614 return DAG.getBitcast(Op.getValueType(), SubVec);
10615 }
10616
10617 MVT ContainerVecVT = VecVT;
10618 if (VecVT.isFixedLengthVector()) {
10619 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10620 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10621 }
10622
10623 MVT ContainerSubVecVT = SubVecVT;
10624 if (SubVecVT.isFixedLengthVector()) {
10625 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10626 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10627 }
10628
10629 unsigned SubRegIdx;
10630 ElementCount RemIdx;
10631 // insert_subvector scales the index by vscale if the subvector is scalable,
10632 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10633 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10634 if (SubVecVT.isFixedLengthVector()) {
10635 assert(VLen);
10636 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10637 auto Decompose =
10639 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10640 SubRegIdx = Decompose.first;
10641 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10642 (OrigIdx % Vscale));
10643 } else {
10644 auto Decompose =
10646 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10647 SubRegIdx = Decompose.first;
10648 RemIdx = ElementCount::getScalable(Decompose.second);
10649 }
10650
10653 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10654 bool ExactlyVecRegSized =
10655 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10656 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10657
10658 // 1. If the Idx has been completely eliminated and this subvector's size is
10659 // a vector register or a multiple thereof, or the surrounding elements are
10660 // undef, then this is a subvector insert which naturally aligns to a vector
10661 // register. These can easily be handled using subregister manipulation.
10662 // 2. If the subvector isn't an exact multiple of a valid register group size,
10663 // then the insertion must preserve the undisturbed elements of the register.
10664 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10665 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10666 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10667 // of that LMUL=1 type back into the larger vector (resolving to another
10668 // subregister operation). See below for how our VSLIDEUP works. We go via a
10669 // LMUL=1 type to avoid allocating a large register group to hold our
10670 // subvector.
10671 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10672 if (SubVecVT.isFixedLengthVector()) {
10673 // We may get NoSubRegister if inserting at index 0 and the subvec
10674 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10675 if (SubRegIdx == RISCV::NoSubRegister) {
10676 assert(OrigIdx == 0);
10677 return Op;
10678 }
10679
10680 // Use a insert_subvector that will resolve to an insert subreg.
10681 assert(VLen);
10682 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10683 SDValue Insert =
10684 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10685 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10686 if (VecVT.isFixedLengthVector())
10687 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10688 return Insert;
10689 }
10690 return Op;
10691 }
10692
10693 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10694 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10695 // (in our case undisturbed). This means we can set up a subvector insertion
10696 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10697 // size of the subvector.
10698 MVT InterSubVT = ContainerVecVT;
10699 SDValue AlignedExtract = Vec;
10700 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10701 if (SubVecVT.isFixedLengthVector()) {
10702 assert(VLen);
10703 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10704 }
10705 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10706 InterSubVT = getLMUL1VT(ContainerVecVT);
10707 // Extract a subvector equal to the nearest full vector register type. This
10708 // should resolve to a EXTRACT_SUBREG instruction.
10709 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10710 DAG.getVectorIdxConstant(AlignedIdx, DL));
10711 }
10712
10713 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10714 DAG.getUNDEF(InterSubVT), SubVec,
10715 DAG.getVectorIdxConstant(0, DL));
10716
10717 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10718
10719 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10720 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10721
10722 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10724 if (Subtarget.expandVScale(EndIndex) ==
10725 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10726 Policy = RISCVII::TAIL_AGNOSTIC;
10727
10728 // If we're inserting into the lowest elements, use a tail undisturbed
10729 // vmv.v.v.
10730 if (RemIdx.isZero()) {
10731 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10732 SubVec, VL);
10733 } else {
10734 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10735
10736 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10737 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10738
10739 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10740 SlideupAmt, Mask, VL, Policy);
10741 }
10742
10743 // If required, insert this subvector back into the correct vector register.
10744 // This should resolve to an INSERT_SUBREG instruction.
10745 if (ContainerVecVT.bitsGT(InterSubVT))
10746 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10747 DAG.getVectorIdxConstant(AlignedIdx, DL));
10748
10749 if (VecVT.isFixedLengthVector())
10750 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10751
10752 // We might have bitcast from a mask type: cast back to the original type if
10753 // required.
10754 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10755}
10756
10757SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10758 SelectionDAG &DAG) const {
10759 SDValue Vec = Op.getOperand(0);
10760 MVT SubVecVT = Op.getSimpleValueType();
10761 MVT VecVT = Vec.getSimpleValueType();
10762
10763 SDLoc DL(Op);
10764 MVT XLenVT = Subtarget.getXLenVT();
10765 unsigned OrigIdx = Op.getConstantOperandVal(1);
10766 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10767
10768 // With an index of 0 this is a cast-like subvector, which can be performed
10769 // with subregister operations.
10770 if (OrigIdx == 0)
10771 return Op;
10772
10773 // We don't have the ability to slide mask vectors down indexed by their i1
10774 // elements; the smallest we can do is i8. Often we are able to bitcast to
10775 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10776 // from a scalable one, we might not necessarily have enough scalable
10777 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10778 if (SubVecVT.getVectorElementType() == MVT::i1) {
10779 if (VecVT.getVectorMinNumElements() >= 8 &&
10780 SubVecVT.getVectorMinNumElements() >= 8) {
10781 assert(OrigIdx % 8 == 0 && "Invalid index");
10782 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10783 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10784 "Unexpected mask vector lowering");
10785 OrigIdx /= 8;
10786 SubVecVT =
10787 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10788 SubVecVT.isScalableVector());
10789 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10790 VecVT.isScalableVector());
10791 Vec = DAG.getBitcast(VecVT, Vec);
10792 } else {
10793 // We can't slide this mask vector down, indexed by its i1 elements.
10794 // This poses a problem when we wish to extract a scalable vector which
10795 // can't be re-expressed as a larger type. Just choose the slow path and
10796 // extend to a larger type, then truncate back down.
10797 // TODO: We could probably improve this when extracting certain fixed
10798 // from fixed, where we can extract as i8 and shift the correct element
10799 // right to reach the desired subvector?
10800 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10801 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10802 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10803 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10804 Op.getOperand(1));
10805 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10806 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10807 }
10808 }
10809
10810 const auto VLen = Subtarget.getRealVLen();
10811
10812 // If the subvector vector is a fixed-length type and we don't know VLEN
10813 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10814 // don't know which register of a LMUL group contains the specific subvector
10815 // as we only know the minimum register size. Therefore we must slide the
10816 // vector group down the full amount.
10817 if (SubVecVT.isFixedLengthVector() && !VLen) {
10818 MVT ContainerVT = VecVT;
10819 if (VecVT.isFixedLengthVector()) {
10820 ContainerVT = getContainerForFixedLengthVector(VecVT);
10821 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10822 }
10823
10824 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10825 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10826 if (auto ShrunkVT =
10827 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10828 ContainerVT = *ShrunkVT;
10829 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10830 DAG.getVectorIdxConstant(0, DL));
10831 }
10832
10833 SDValue Mask =
10834 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10835 // Set the vector length to only the number of elements we care about. This
10836 // avoids sliding down elements we're going to discard straight away.
10837 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10838 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10839 SDValue Slidedown =
10840 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10841 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10842 // Now we can use a cast-like subvector extract to get the result.
10843 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10844 DAG.getVectorIdxConstant(0, DL));
10845 return DAG.getBitcast(Op.getValueType(), Slidedown);
10846 }
10847
10848 if (VecVT.isFixedLengthVector()) {
10849 VecVT = getContainerForFixedLengthVector(VecVT);
10850 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10851 }
10852
10853 MVT ContainerSubVecVT = SubVecVT;
10854 if (SubVecVT.isFixedLengthVector())
10855 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10856
10857 unsigned SubRegIdx;
10858 ElementCount RemIdx;
10859 // extract_subvector scales the index by vscale if the subvector is scalable,
10860 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10861 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10862 if (SubVecVT.isFixedLengthVector()) {
10863 assert(VLen);
10864 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10865 auto Decompose =
10867 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10868 SubRegIdx = Decompose.first;
10869 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10870 (OrigIdx % Vscale));
10871 } else {
10872 auto Decompose =
10874 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10875 SubRegIdx = Decompose.first;
10876 RemIdx = ElementCount::getScalable(Decompose.second);
10877 }
10878
10879 // If the Idx has been completely eliminated then this is a subvector extract
10880 // which naturally aligns to a vector register. These can easily be handled
10881 // using subregister manipulation. We use an extract_subvector that will
10882 // resolve to an extract subreg.
10883 if (RemIdx.isZero()) {
10884 if (SubVecVT.isFixedLengthVector()) {
10885 assert(VLen);
10886 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10887 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
10888 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10889 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10890 }
10891 return Op;
10892 }
10893
10894 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10895 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10896 // divide exactly.
10897 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10898 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10899
10900 // If the vector type is an LMUL-group type, extract a subvector equal to the
10901 // nearest full vector register type.
10902 MVT InterSubVT = VecVT;
10903 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10904 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10905 // we should have successfully decomposed the extract into a subregister.
10906 // We use an extract_subvector that will resolve to a subreg extract.
10907 assert(SubRegIdx != RISCV::NoSubRegister);
10908 (void)SubRegIdx;
10909 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
10910 if (SubVecVT.isFixedLengthVector()) {
10911 assert(VLen);
10912 Idx /= *VLen / RISCV::RVVBitsPerBlock;
10913 }
10914 InterSubVT = getLMUL1VT(VecVT);
10915 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10916 DAG.getConstant(Idx, DL, XLenVT));
10917 }
10918
10919 // Slide this vector register down by the desired number of elements in order
10920 // to place the desired subvector starting at element 0.
10921 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10922 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10923 if (SubVecVT.isFixedLengthVector())
10924 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10925 SDValue Slidedown =
10926 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10927 Vec, SlidedownAmt, Mask, VL);
10928
10929 // Now the vector is in the right position, extract our final subvector. This
10930 // should resolve to a COPY.
10931 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10932 DAG.getVectorIdxConstant(0, DL));
10933
10934 // We might have bitcast from a mask type: cast back to the original type if
10935 // required.
10936 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10937}
10938
10939// Widen a vector's operands to i8, then truncate its results back to the
10940// original type, typically i1. All operand and result types must be the same.
10942 SelectionDAG &DAG) {
10943 MVT VT = N.getSimpleValueType();
10944 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10946 for (SDValue Op : N->ops()) {
10947 assert(Op.getSimpleValueType() == VT &&
10948 "Operands and result must be same type");
10949 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10950 }
10951
10952 unsigned NumVals = N->getNumValues();
10953
10955 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10956 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10957 SmallVector<SDValue, 4> TruncVals;
10958 for (unsigned I = 0; I < NumVals; I++) {
10959 TruncVals.push_back(
10960 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10961 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10962 }
10963
10964 if (TruncVals.size() > 1)
10965 return DAG.getMergeValues(TruncVals, DL);
10966 return TruncVals.front();
10967}
10968
10969SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10970 SelectionDAG &DAG) const {
10971 SDLoc DL(Op);
10972 MVT VecVT = Op.getSimpleValueType();
10973
10974 assert(VecVT.isScalableVector() &&
10975 "vector_interleave on non-scalable vector!");
10976
10977 // 1 bit element vectors need to be widened to e8
10978 if (VecVT.getVectorElementType() == MVT::i1)
10979 return widenVectorOpsToi8(Op, DL, DAG);
10980
10981 // If the VT is LMUL=8, we need to split and reassemble.
10982 if (VecVT.getSizeInBits().getKnownMinValue() ==
10983 (8 * RISCV::RVVBitsPerBlock)) {
10984 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10985 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10986 EVT SplitVT = Op0Lo.getValueType();
10987
10989 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10991 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10992
10993 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10994 ResLo.getValue(0), ResHi.getValue(0));
10995 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10996 ResHi.getValue(1));
10997 return DAG.getMergeValues({Even, Odd}, DL);
10998 }
10999
11000 // Concatenate the two vectors as one vector to deinterleave
11001 MVT ConcatVT =
11004 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11005 Op.getOperand(0), Op.getOperand(1));
11006
11007 // We can deinterleave through vnsrl.wi if the element type is smaller than
11008 // ELEN
11009 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11010 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
11011 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
11012 return DAG.getMergeValues({Even, Odd}, DL);
11013 }
11014
11015 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
11016 // possibly mask vector, then extract the required subvector. Doing this
11017 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
11018 // creation to be rematerialized during register allocation to reduce
11019 // register pressure if needed.
11020
11021 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
11022
11023 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
11024 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
11025 SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat,
11026 DAG.getVectorIdxConstant(0, DL));
11027
11028 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
11029 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
11030 SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
11031 DAG.getVectorIdxConstant(0, DL));
11032
11033 // vcompress the even and odd elements into two separate vectors
11034 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11035 EvenMask, DAG.getUNDEF(ConcatVT));
11036 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11037 OddMask, DAG.getUNDEF(ConcatVT));
11038
11039 // Extract the result half of the gather for even and odd
11040 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
11041 DAG.getVectorIdxConstant(0, DL));
11042 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
11043 DAG.getVectorIdxConstant(0, DL));
11044
11045 return DAG.getMergeValues({Even, Odd}, DL);
11046}
11047
11048SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
11049 SelectionDAG &DAG) const {
11050 SDLoc DL(Op);
11051 MVT VecVT = Op.getSimpleValueType();
11052
11053 assert(VecVT.isScalableVector() &&
11054 "vector_interleave on non-scalable vector!");
11055
11056 // i1 vectors need to be widened to i8
11057 if (VecVT.getVectorElementType() == MVT::i1)
11058 return widenVectorOpsToi8(Op, DL, DAG);
11059
11060 MVT XLenVT = Subtarget.getXLenVT();
11061 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
11062
11063 // If the VT is LMUL=8, we need to split and reassemble.
11064 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
11065 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11066 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
11067 EVT SplitVT = Op0Lo.getValueType();
11068
11070 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
11072 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
11073
11074 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11075 ResLo.getValue(0), ResLo.getValue(1));
11076 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11077 ResHi.getValue(0), ResHi.getValue(1));
11078 return DAG.getMergeValues({Lo, Hi}, DL);
11079 }
11080
11081 SDValue Interleaved;
11082
11083 // If the element type is smaller than ELEN, then we can interleave with
11084 // vwaddu.vv and vwmaccu.vx
11085 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11086 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
11087 DAG, Subtarget);
11088 } else {
11089 // Otherwise, fallback to using vrgathere16.vv
11090 MVT ConcatVT =
11093 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11094 Op.getOperand(0), Op.getOperand(1));
11095
11096 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
11097
11098 // 0 1 2 3 4 5 6 7 ...
11099 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
11100
11101 // 1 1 1 1 1 1 1 1 ...
11102 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
11103
11104 // 1 0 1 0 1 0 1 0 ...
11105 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
11106 OddMask = DAG.getSetCC(
11107 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11108 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
11110
11111 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
11112
11113 // Build up the index vector for interleaving the concatenated vector
11114 // 0 0 1 1 2 2 3 3 ...
11115 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
11116 // 0 n 1 n+1 2 n+2 3 n+3 ...
11117 Idx =
11118 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
11119
11120 // Then perform the interleave
11121 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
11122 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
11123 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
11124 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11125 }
11126
11127 // Extract the two halves from the interleaved result
11128 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11129 DAG.getVectorIdxConstant(0, DL));
11130 SDValue Hi = DAG.getNode(
11131 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11133
11134 return DAG.getMergeValues({Lo, Hi}, DL);
11135}
11136
11137// Lower step_vector to the vid instruction. Any non-identity step value must
11138// be accounted for my manual expansion.
11139SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
11140 SelectionDAG &DAG) const {
11141 SDLoc DL(Op);
11142 MVT VT = Op.getSimpleValueType();
11143 assert(VT.isScalableVector() && "Expected scalable vector");
11144 MVT XLenVT = Subtarget.getXLenVT();
11145 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
11146 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11147 uint64_t StepValImm = Op.getConstantOperandVal(0);
11148 if (StepValImm != 1) {
11149 if (isPowerOf2_64(StepValImm)) {
11150 SDValue StepVal =
11151 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11152 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
11153 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
11154 } else {
11155 SDValue StepVal = lowerScalarSplat(
11156 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
11157 VL, VT, DL, DAG, Subtarget);
11158 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
11159 }
11160 }
11161 return StepVec;
11162}
11163
11164// Implement vector_reverse using vrgather.vv with indices determined by
11165// subtracting the id of each element from (VLMAX-1). This will convert
11166// the indices like so:
11167// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11168// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11169SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
11170 SelectionDAG &DAG) const {
11171 SDLoc DL(Op);
11172 MVT VecVT = Op.getSimpleValueType();
11173 if (VecVT.getVectorElementType() == MVT::i1) {
11174 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11175 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
11176 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
11177 return DAG.getSetCC(DL, VecVT, Op2,
11178 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
11179 }
11180
11181 MVT ContainerVT = VecVT;
11182 SDValue Vec = Op.getOperand(0);
11183 if (VecVT.isFixedLengthVector()) {
11184 ContainerVT = getContainerForFixedLengthVector(VecVT);
11185 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11186 }
11187
11188 MVT XLenVT = Subtarget.getXLenVT();
11189 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11190
11191 // On some uarchs vrgather.vv will read from every input register for each
11192 // output register, regardless of the indices. However to reverse a vector
11193 // each output register only needs to read from one register. So decompose it
11194 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11195 // O(LMUL^2).
11196 //
11197 // vsetvli a1, zero, e64, m4, ta, ma
11198 // vrgatherei16.vv v12, v8, v16
11199 // ->
11200 // vsetvli a1, zero, e64, m1, ta, ma
11201 // vrgather.vv v15, v8, v16
11202 // vrgather.vv v14, v9, v16
11203 // vrgather.vv v13, v10, v16
11204 // vrgather.vv v12, v11, v16
11205 if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11206 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11207 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11208 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
11209 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
11210 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
11211
11212 // Fixed length vectors might not fit exactly into their container, and so
11213 // leave a gap in the front of the vector after being reversed. Slide this
11214 // away.
11215 //
11216 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
11217 // 0 1 2 3 x x x x <- reverse
11218 // x x x x 0 1 2 3 <- vslidedown.vx
11219 if (VecVT.isFixedLengthVector()) {
11220 SDValue Offset = DAG.getNode(
11221 ISD::SUB, DL, XLenVT,
11222 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11223 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11224 Concat =
11225 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11226 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
11227 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
11228 }
11229 return Concat;
11230 }
11231
11232 unsigned EltSize = ContainerVT.getScalarSizeInBits();
11233 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11234 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11235 unsigned MaxVLMAX =
11236 VecVT.isFixedLengthVector()
11237 ? VecVT.getVectorNumElements()
11238 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11239
11240 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11241 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11242
11243 // If this is SEW=8 and VLMAX is potentially more than 256, we need
11244 // to use vrgatherei16.vv.
11245 if (MaxVLMAX > 256 && EltSize == 8) {
11246 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11247 // Reverse each half, then reassemble them in reverse order.
11248 // NOTE: It's also possible that after splitting that VLMAX no longer
11249 // requires vrgatherei16.vv.
11250 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11251 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11252 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11253 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11254 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11255 // Reassemble the low and high pieces reversed.
11256 // FIXME: This is a CONCAT_VECTORS.
11257 SDValue Res =
11258 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
11259 DAG.getVectorIdxConstant(0, DL));
11260 return DAG.getNode(
11261 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
11262 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
11263 }
11264
11265 // Just promote the int type to i16 which will double the LMUL.
11266 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11267 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11268 }
11269
11270 // At LMUL > 1, do the index computation in 16 bits to reduce register
11271 // pressure.
11272 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11273 IntVT.bitsGT(getLMUL1VT(IntVT))) {
11274 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
11275 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11276 IntVT = IntVT.changeVectorElementType(MVT::i16);
11277 }
11278
11279 // Calculate VLMAX-1 for the desired SEW.
11280 SDValue VLMinus1 = DAG.getNode(
11281 ISD::SUB, DL, XLenVT,
11282 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11283 DAG.getConstant(1, DL, XLenVT));
11284
11285 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11286 bool IsRV32E64 =
11287 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11288 SDValue SplatVL;
11289 if (!IsRV32E64)
11290 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
11291 else
11292 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
11293 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11294
11295 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
11296 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
11297 DAG.getUNDEF(IntVT), Mask, VL);
11298
11299 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
11300 DAG.getUNDEF(ContainerVT), Mask, VL);
11301 if (VecVT.isFixedLengthVector())
11302 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11303 return Gather;
11304}
11305
11306SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
11307 SelectionDAG &DAG) const {
11308 SDLoc DL(Op);
11309 SDValue V1 = Op.getOperand(0);
11310 SDValue V2 = Op.getOperand(1);
11311 MVT XLenVT = Subtarget.getXLenVT();
11312 MVT VecVT = Op.getSimpleValueType();
11313
11314 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
11315
11316 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11317 SDValue DownOffset, UpOffset;
11318 if (ImmValue >= 0) {
11319 // The operand is a TargetConstant, we need to rebuild it as a regular
11320 // constant.
11321 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11322 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
11323 } else {
11324 // The operand is a TargetConstant, we need to rebuild it as a regular
11325 // constant rather than negating the original operand.
11326 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11327 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
11328 }
11329
11330 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
11331
11332 SDValue SlideDown =
11333 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
11334 DownOffset, TrueMask, UpOffset);
11335 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
11336 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11338}
11339
11340SDValue
11341RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
11342 SelectionDAG &DAG) const {
11343 SDLoc DL(Op);
11344 auto *Load = cast<LoadSDNode>(Op);
11345
11347 Load->getMemoryVT(),
11348 *Load->getMemOperand()) &&
11349 "Expecting a correctly-aligned load");
11350
11351 MVT VT = Op.getSimpleValueType();
11352 MVT XLenVT = Subtarget.getXLenVT();
11353 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11354
11355 // If we know the exact VLEN and our fixed length vector completely fills
11356 // the container, use a whole register load instead.
11357 const auto [MinVLMAX, MaxVLMAX] =
11358 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11359 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11360 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11361 MachineMemOperand *MMO = Load->getMemOperand();
11362 SDValue NewLoad =
11363 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11364 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11365 MMO->getAAInfo(), MMO->getRanges());
11366 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11367 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11368 }
11369
11370 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11371
11372 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11373 SDValue IntID = DAG.getTargetConstant(
11374 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11375 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11376 if (!IsMaskOp)
11377 Ops.push_back(DAG.getUNDEF(ContainerVT));
11378 Ops.push_back(Load->getBasePtr());
11379 Ops.push_back(VL);
11380 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11381 SDValue NewLoad =
11383 Load->getMemoryVT(), Load->getMemOperand());
11384
11385 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11386 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11387}
11388
11389SDValue
11390RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11391 SelectionDAG &DAG) const {
11392 SDLoc DL(Op);
11393 auto *Store = cast<StoreSDNode>(Op);
11394
11396 Store->getMemoryVT(),
11397 *Store->getMemOperand()) &&
11398 "Expecting a correctly-aligned store");
11399
11400 SDValue StoreVal = Store->getValue();
11401 MVT VT = StoreVal.getSimpleValueType();
11402 MVT XLenVT = Subtarget.getXLenVT();
11403
11404 // If the size less than a byte, we need to pad with zeros to make a byte.
11405 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11406 VT = MVT::v8i1;
11407 StoreVal =
11408 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11409 StoreVal, DAG.getVectorIdxConstant(0, DL));
11410 }
11411
11412 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11413
11414 SDValue NewValue =
11415 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11416
11417 // If we know the exact VLEN and our fixed length vector completely fills
11418 // the container, use a whole register store instead.
11419 const auto [MinVLMAX, MaxVLMAX] =
11420 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11421 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11422 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11423 MachineMemOperand *MMO = Store->getMemOperand();
11424 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11425 MMO->getPointerInfo(), MMO->getBaseAlign(),
11426 MMO->getFlags(), MMO->getAAInfo());
11427 }
11428
11429 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11430
11431 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11432 SDValue IntID = DAG.getTargetConstant(
11433 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11434 return DAG.getMemIntrinsicNode(
11435 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11436 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11437 Store->getMemoryVT(), Store->getMemOperand());
11438}
11439
11440SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11441 SelectionDAG &DAG) const {
11442 SDLoc DL(Op);
11443 MVT VT = Op.getSimpleValueType();
11444
11445 const auto *MemSD = cast<MemSDNode>(Op);
11446 EVT MemVT = MemSD->getMemoryVT();
11447 MachineMemOperand *MMO = MemSD->getMemOperand();
11448 SDValue Chain = MemSD->getChain();
11449 SDValue BasePtr = MemSD->getBasePtr();
11450
11451 SDValue Mask, PassThru, VL;
11452 bool IsExpandingLoad = false;
11453 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11454 Mask = VPLoad->getMask();
11455 PassThru = DAG.getUNDEF(VT);
11456 VL = VPLoad->getVectorLength();
11457 } else {
11458 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11459 Mask = MLoad->getMask();
11460 PassThru = MLoad->getPassThru();
11461 IsExpandingLoad = MLoad->isExpandingLoad();
11462 }
11463
11464 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11465
11466 MVT XLenVT = Subtarget.getXLenVT();
11467
11468 MVT ContainerVT = VT;
11469 if (VT.isFixedLengthVector()) {
11470 ContainerVT = getContainerForFixedLengthVector(VT);
11471 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11472 if (!IsUnmasked) {
11473 MVT MaskVT = getMaskTypeFor(ContainerVT);
11474 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11475 }
11476 }
11477
11478 if (!VL)
11479 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11480
11481 SDValue ExpandingVL;
11482 if (!IsUnmasked && IsExpandingLoad) {
11483 ExpandingVL = VL;
11484 VL =
11485 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11486 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11487 }
11488
11489 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11490 : Intrinsic::riscv_vle_mask;
11491 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11492 if (IntID == Intrinsic::riscv_vle)
11493 Ops.push_back(DAG.getUNDEF(ContainerVT));
11494 else
11495 Ops.push_back(PassThru);
11496 Ops.push_back(BasePtr);
11497 if (IntID == Intrinsic::riscv_vle_mask)
11498 Ops.push_back(Mask);
11499 Ops.push_back(VL);
11500 if (IntID == Intrinsic::riscv_vle_mask)
11501 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11502
11503 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11504
11505 SDValue Result =
11506 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11507 Chain = Result.getValue(1);
11508 if (ExpandingVL) {
11509 MVT IndexVT = ContainerVT;
11510 if (ContainerVT.isFloatingPoint())
11511 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11512
11513 MVT IndexEltVT = IndexVT.getVectorElementType();
11514 bool UseVRGATHEREI16 = false;
11515 // If index vector is an i8 vector and the element count exceeds 256, we
11516 // should change the element type of index vector to i16 to avoid
11517 // overflow.
11518 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11519 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11520 assert(getLMUL(IndexVT) != RISCVII::LMUL_8);
11521 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11522 UseVRGATHEREI16 = true;
11523 }
11524
11525 SDValue Iota =
11526 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11527 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11528 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11529 Result =
11530 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11532 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11533 }
11534
11535 if (VT.isFixedLengthVector())
11536 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11537
11538 return DAG.getMergeValues({Result, Chain}, DL);
11539}
11540
11541SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11542 SelectionDAG &DAG) const {
11543 SDLoc DL(Op);
11544
11545 const auto *MemSD = cast<MemSDNode>(Op);
11546 EVT MemVT = MemSD->getMemoryVT();
11547 MachineMemOperand *MMO = MemSD->getMemOperand();
11548 SDValue Chain = MemSD->getChain();
11549 SDValue BasePtr = MemSD->getBasePtr();
11550 SDValue Val, Mask, VL;
11551
11552 bool IsCompressingStore = false;
11553 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11554 Val = VPStore->getValue();
11555 Mask = VPStore->getMask();
11556 VL = VPStore->getVectorLength();
11557 } else {
11558 const auto *MStore = cast<MaskedStoreSDNode>(Op);
11559 Val = MStore->getValue();
11560 Mask = MStore->getMask();
11561 IsCompressingStore = MStore->isCompressingStore();
11562 }
11563
11564 bool IsUnmasked =
11565 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11566
11567 MVT VT = Val.getSimpleValueType();
11568 MVT XLenVT = Subtarget.getXLenVT();
11569
11570 MVT ContainerVT = VT;
11571 if (VT.isFixedLengthVector()) {
11572 ContainerVT = getContainerForFixedLengthVector(VT);
11573
11574 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11575 if (!IsUnmasked || IsCompressingStore) {
11576 MVT MaskVT = getMaskTypeFor(ContainerVT);
11577 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11578 }
11579 }
11580
11581 if (!VL)
11582 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11583
11584 if (IsCompressingStore) {
11585 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11586 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11587 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11588 VL =
11589 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11590 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11591 }
11592
11593 unsigned IntID =
11594 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11595 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11596 Ops.push_back(Val);
11597 Ops.push_back(BasePtr);
11598 if (!IsUnmasked)
11599 Ops.push_back(Mask);
11600 Ops.push_back(VL);
11601
11603 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11604}
11605
11606SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11607 SelectionDAG &DAG) const {
11608 SDLoc DL(Op);
11609 SDValue Val = Op.getOperand(0);
11610 SDValue Mask = Op.getOperand(1);
11611 SDValue Passthru = Op.getOperand(2);
11612
11613 MVT VT = Val.getSimpleValueType();
11614 MVT XLenVT = Subtarget.getXLenVT();
11615 MVT ContainerVT = VT;
11616 if (VT.isFixedLengthVector()) {
11617 ContainerVT = getContainerForFixedLengthVector(VT);
11618 MVT MaskVT = getMaskTypeFor(ContainerVT);
11619 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11620 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11621 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11622 }
11623
11624 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11625 SDValue Res =
11626 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11627 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11628 Passthru, Val, Mask, VL);
11629
11630 if (VT.isFixedLengthVector())
11631 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11632
11633 return Res;
11634}
11635
11636SDValue
11637RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11638 SelectionDAG &DAG) const {
11639 MVT InVT = Op.getOperand(0).getSimpleValueType();
11640 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11641
11642 MVT VT = Op.getSimpleValueType();
11643
11644 SDValue Op1 =
11645 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11646 SDValue Op2 =
11647 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11648
11649 SDLoc DL(Op);
11650 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11651 DAG, Subtarget);
11652 MVT MaskVT = getMaskTypeFor(ContainerVT);
11653
11654 SDValue Cmp =
11655 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11656 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11657
11658 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11659}
11660
11661SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11662 SelectionDAG &DAG) const {
11663 unsigned Opc = Op.getOpcode();
11664 SDLoc DL(Op);
11665 SDValue Chain = Op.getOperand(0);
11666 SDValue Op1 = Op.getOperand(1);
11667 SDValue Op2 = Op.getOperand(2);
11668 SDValue CC = Op.getOperand(3);
11669 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11670 MVT VT = Op.getSimpleValueType();
11671 MVT InVT = Op1.getSimpleValueType();
11672
11673 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11674 // condition code.
11675 if (Opc == ISD::STRICT_FSETCCS) {
11676 // Expand strict_fsetccs(x, oeq) to
11677 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11678 SDVTList VTList = Op->getVTList();
11679 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11680 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11681 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11682 Op2, OLECCVal);
11683 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11684 Op1, OLECCVal);
11685 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11686 Tmp1.getValue(1), Tmp2.getValue(1));
11687 // Tmp1 and Tmp2 might be the same node.
11688 if (Tmp1 != Tmp2)
11689 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11690 return DAG.getMergeValues({Tmp1, OutChain}, DL);
11691 }
11692
11693 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11694 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11695 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11696 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11697 Op2, OEQCCVal);
11698 SDValue Res = DAG.getNOT(DL, OEQ, VT);
11699 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11700 }
11701 }
11702
11703 MVT ContainerInVT = InVT;
11704 if (InVT.isFixedLengthVector()) {
11705 ContainerInVT = getContainerForFixedLengthVector(InVT);
11706 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11707 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11708 }
11709 MVT MaskVT = getMaskTypeFor(ContainerInVT);
11710
11711 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11712
11713 SDValue Res;
11714 if (Opc == ISD::STRICT_FSETCC &&
11715 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11716 CCVal == ISD::SETOLE)) {
11717 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11718 // active when both input elements are ordered.
11719 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11720 SDValue OrderMask1 = DAG.getNode(
11721 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11722 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11723 True, VL});
11724 SDValue OrderMask2 = DAG.getNode(
11725 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11726 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11727 True, VL});
11728 Mask =
11729 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11730 // Use Mask as the passthru operand to let the result be 0 if either of the
11731 // inputs is unordered.
11733 DAG.getVTList(MaskVT, MVT::Other),
11734 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11735 } else {
11736 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11738 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11739 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11740 }
11741
11742 if (VT.isFixedLengthVector()) {
11743 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11744 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11745 }
11746 return Res;
11747}
11748
11749// Lower vector ABS to smax(X, sub(0, X)).
11750SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11751 SDLoc DL(Op);
11752 MVT VT = Op.getSimpleValueType();
11753 SDValue X = Op.getOperand(0);
11754
11755 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11756 "Unexpected type for ISD::ABS");
11757
11758 MVT ContainerVT = VT;
11759 if (VT.isFixedLengthVector()) {
11760 ContainerVT = getContainerForFixedLengthVector(VT);
11761 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11762 }
11763
11764 SDValue Mask, VL;
11765 if (Op->getOpcode() == ISD::VP_ABS) {
11766 Mask = Op->getOperand(1);
11767 if (VT.isFixedLengthVector())
11768 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11769 Subtarget);
11770 VL = Op->getOperand(2);
11771 } else
11772 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11773
11774 SDValue SplatZero = DAG.getNode(
11775 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11776 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11777 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11778 DAG.getUNDEF(ContainerVT), Mask, VL);
11779 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11780 DAG.getUNDEF(ContainerVT), Mask, VL);
11781
11782 if (VT.isFixedLengthVector())
11783 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11784 return Max;
11785}
11786
11787SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11788 SDValue Op, SelectionDAG &DAG) const {
11789 SDLoc DL(Op);
11790 MVT VT = Op.getSimpleValueType();
11791 SDValue Mag = Op.getOperand(0);
11792 SDValue Sign = Op.getOperand(1);
11793 assert(Mag.getValueType() == Sign.getValueType() &&
11794 "Can only handle COPYSIGN with matching types.");
11795
11796 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11797 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11798 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11799
11800 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11801
11802 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11803 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11804
11805 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11806}
11807
11808SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11809 SDValue Op, SelectionDAG &DAG) const {
11810 MVT VT = Op.getSimpleValueType();
11811 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11812
11813 MVT I1ContainerVT =
11814 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11815
11816 SDValue CC =
11817 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11818 SDValue Op1 =
11819 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11820 SDValue Op2 =
11821 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11822
11823 SDLoc DL(Op);
11824 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11825
11826 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11827 Op2, DAG.getUNDEF(ContainerVT), VL);
11828
11829 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11830}
11831
11832SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11833 SelectionDAG &DAG) const {
11834 unsigned NewOpc = getRISCVVLOp(Op);
11835 bool HasPassthruOp = hasPassthruOp(NewOpc);
11836 bool HasMask = hasMaskOp(NewOpc);
11837
11838 MVT VT = Op.getSimpleValueType();
11839 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11840
11841 // Create list of operands by converting existing ones to scalable types.
11843 for (const SDValue &V : Op->op_values()) {
11844 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11845
11846 // Pass through non-vector operands.
11847 if (!V.getValueType().isVector()) {
11848 Ops.push_back(V);
11849 continue;
11850 }
11851
11852 // "cast" fixed length vector to a scalable vector.
11853 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11854 "Only fixed length vectors are supported!");
11855 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11856 }
11857
11858 SDLoc DL(Op);
11859 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11860 if (HasPassthruOp)
11861 Ops.push_back(DAG.getUNDEF(ContainerVT));
11862 if (HasMask)
11863 Ops.push_back(Mask);
11864 Ops.push_back(VL);
11865
11866 // StrictFP operations have two result values. Their lowered result should
11867 // have same result count.
11868 if (Op->isStrictFPOpcode()) {
11869 SDValue ScalableRes =
11870 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11871 Op->getFlags());
11872 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11873 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11874 }
11875
11876 SDValue ScalableRes =
11877 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11878 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11879}
11880
11881// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11882// * Operands of each node are assumed to be in the same order.
11883// * The EVL operand is promoted from i32 to i64 on RV64.
11884// * Fixed-length vectors are converted to their scalable-vector container
11885// types.
11886SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11887 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11888 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
11889
11890 SDLoc DL(Op);
11891 MVT VT = Op.getSimpleValueType();
11893
11894 MVT ContainerVT = VT;
11895 if (VT.isFixedLengthVector())
11896 ContainerVT = getContainerForFixedLengthVector(VT);
11897
11898 for (const auto &OpIdx : enumerate(Op->ops())) {
11899 SDValue V = OpIdx.value();
11900 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11901 // Add dummy passthru value before the mask. Or if there isn't a mask,
11902 // before EVL.
11903 if (HasPassthruOp) {
11904 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11905 if (MaskIdx) {
11906 if (*MaskIdx == OpIdx.index())
11907 Ops.push_back(DAG.getUNDEF(ContainerVT));
11908 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11909 OpIdx.index()) {
11910 if (Op.getOpcode() == ISD::VP_MERGE) {
11911 // For VP_MERGE, copy the false operand instead of an undef value.
11912 Ops.push_back(Ops.back());
11913 } else {
11914 assert(Op.getOpcode() == ISD::VP_SELECT);
11915 // For VP_SELECT, add an undef value.
11916 Ops.push_back(DAG.getUNDEF(ContainerVT));
11917 }
11918 }
11919 }
11920 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
11921 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
11922 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
11924 Subtarget.getXLenVT()));
11925 // Pass through operands which aren't fixed-length vectors.
11926 if (!V.getValueType().isFixedLengthVector()) {
11927 Ops.push_back(V);
11928 continue;
11929 }
11930 // "cast" fixed length vector to a scalable vector.
11931 MVT OpVT = V.getSimpleValueType();
11932 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11933 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11934 "Only fixed length vectors are supported!");
11935 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11936 }
11937
11938 if (!VT.isFixedLengthVector())
11939 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11940
11941 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11942
11943 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11944}
11945
11946SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11947 SelectionDAG &DAG) const {
11948 SDLoc DL(Op);
11949 MVT VT = Op.getSimpleValueType();
11950
11951 SDValue Src = Op.getOperand(0);
11952 // NOTE: Mask is dropped.
11953 SDValue VL = Op.getOperand(2);
11954
11955 MVT ContainerVT = VT;
11956 if (VT.isFixedLengthVector()) {
11957 ContainerVT = getContainerForFixedLengthVector(VT);
11958 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11959 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11960 }
11961
11962 MVT XLenVT = Subtarget.getXLenVT();
11963 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11964 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11965 DAG.getUNDEF(ContainerVT), Zero, VL);
11966
11967 SDValue SplatValue = DAG.getSignedConstant(
11968 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11969 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11970 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11971
11972 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11973 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11974 if (!VT.isFixedLengthVector())
11975 return Result;
11976 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11977}
11978
11979SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11980 SelectionDAG &DAG) const {
11981 SDLoc DL(Op);
11982 MVT VT = Op.getSimpleValueType();
11983
11984 SDValue Op1 = Op.getOperand(0);
11985 SDValue Op2 = Op.getOperand(1);
11986 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11987 // NOTE: Mask is dropped.
11988 SDValue VL = Op.getOperand(4);
11989
11990 MVT ContainerVT = VT;
11991 if (VT.isFixedLengthVector()) {
11992 ContainerVT = getContainerForFixedLengthVector(VT);
11993 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11994 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11995 }
11996
11998 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11999
12000 switch (Condition) {
12001 default:
12002 break;
12003 // X != Y --> (X^Y)
12004 case ISD::SETNE:
12005 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12006 break;
12007 // X == Y --> ~(X^Y)
12008 case ISD::SETEQ: {
12009 SDValue Temp =
12010 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12011 Result =
12012 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
12013 break;
12014 }
12015 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12016 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12017 case ISD::SETGT:
12018 case ISD::SETULT: {
12019 SDValue Temp =
12020 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12021 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
12022 break;
12023 }
12024 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12025 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12026 case ISD::SETLT:
12027 case ISD::SETUGT: {
12028 SDValue Temp =
12029 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12030 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
12031 break;
12032 }
12033 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12034 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12035 case ISD::SETGE:
12036 case ISD::SETULE: {
12037 SDValue Temp =
12038 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12039 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
12040 break;
12041 }
12042 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12043 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12044 case ISD::SETLE:
12045 case ISD::SETUGE: {
12046 SDValue Temp =
12047 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12048 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
12049 break;
12050 }
12051 }
12052
12053 if (!VT.isFixedLengthVector())
12054 return Result;
12055 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12056}
12057
12058// Lower Floating-Point/Integer Type-Convert VP SDNodes
12059SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
12060 SelectionDAG &DAG) const {
12061 SDLoc DL(Op);
12062
12063 SDValue Src = Op.getOperand(0);
12064 SDValue Mask = Op.getOperand(1);
12065 SDValue VL = Op.getOperand(2);
12066 unsigned RISCVISDOpc = getRISCVVLOp(Op);
12067
12068 MVT DstVT = Op.getSimpleValueType();
12069 MVT SrcVT = Src.getSimpleValueType();
12070 if (DstVT.isFixedLengthVector()) {
12071 DstVT = getContainerForFixedLengthVector(DstVT);
12072 SrcVT = getContainerForFixedLengthVector(SrcVT);
12073 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
12074 MVT MaskVT = getMaskTypeFor(DstVT);
12075 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12076 }
12077
12078 unsigned DstEltSize = DstVT.getScalarSizeInBits();
12079 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
12080
12082 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
12083 if (SrcVT.isInteger()) {
12084 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12085
12086 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
12089
12090 // Do we need to do any pre-widening before converting?
12091 if (SrcEltSize == 1) {
12092 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
12093 MVT XLenVT = Subtarget.getXLenVT();
12094 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
12095 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12096 DAG.getUNDEF(IntVT), Zero, VL);
12097 SDValue One = DAG.getSignedConstant(
12098 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
12099 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12100 DAG.getUNDEF(IntVT), One, VL);
12101 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
12102 ZeroSplat, DAG.getUNDEF(IntVT), VL);
12103 } else if (DstEltSize > (2 * SrcEltSize)) {
12104 // Widen before converting.
12105 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12106 DstVT.getVectorElementCount());
12107 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
12108 }
12109
12110 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12111 } else {
12112 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12113 "Wrong input/output vector types");
12114
12115 // Convert f16 to f32 then convert f32 to i64.
12116 if (DstEltSize > (2 * SrcEltSize)) {
12117 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12118 MVT InterimFVT =
12119 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12120 Src =
12121 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
12122 }
12123
12124 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12125 }
12126 } else { // Narrowing + Conversion
12127 if (SrcVT.isInteger()) {
12128 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12129 // First do a narrowing convert to an FP type half the size, then round
12130 // the FP type to a small FP type if needed.
12131
12132 MVT InterimFVT = DstVT;
12133 if (SrcEltSize > (2 * DstEltSize)) {
12134 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
12135 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12136 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12137 }
12138
12139 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
12140
12141 if (InterimFVT != DstVT) {
12142 Src = Result;
12143 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
12144 }
12145 } else {
12146 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12147 "Wrong input/output vector types");
12148 // First do a narrowing conversion to an integer half the size, then
12149 // truncate if needed.
12150
12151 if (DstEltSize == 1) {
12152 // First convert to the same size integer, then convert to mask using
12153 // setcc.
12154 assert(SrcEltSize >= 16 && "Unexpected FP type!");
12155 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12156 DstVT.getVectorElementCount());
12157 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12158
12159 // Compare the integer result to 0. The integer should be 0 or 1/-1,
12160 // otherwise the conversion was undefined.
12161 MVT XLenVT = Subtarget.getXLenVT();
12162 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
12163 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
12164 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12165 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
12166 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12167 DAG.getUNDEF(DstVT), Mask, VL});
12168 } else {
12169 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12170 DstVT.getVectorElementCount());
12171
12172 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12173
12174 while (InterimIVT != DstVT) {
12175 SrcEltSize /= 2;
12176 Src = Result;
12177 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12178 DstVT.getVectorElementCount());
12179 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
12180 Src, Mask, VL);
12181 }
12182 }
12183 }
12184 }
12185
12186 MVT VT = Op.getSimpleValueType();
12187 if (!VT.isFixedLengthVector())
12188 return Result;
12189 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12190}
12191
12192SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
12193 SelectionDAG &DAG) const {
12194 SDLoc DL(Op);
12195 MVT VT = Op.getSimpleValueType();
12196 MVT XLenVT = Subtarget.getXLenVT();
12197
12198 SDValue Mask = Op.getOperand(0);
12199 SDValue TrueVal = Op.getOperand(1);
12200 SDValue FalseVal = Op.getOperand(2);
12201 SDValue VL = Op.getOperand(3);
12202
12203 // Use default legalization if a vector of EVL type would be legal.
12204 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12206 if (isTypeLegal(EVLVecVT))
12207 return SDValue();
12208
12209 MVT ContainerVT = VT;
12210 if (VT.isFixedLengthVector()) {
12211 ContainerVT = getContainerForFixedLengthVector(VT);
12212 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12213 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12214 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12215 }
12216
12217 // Promote to a vector of i8.
12218 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12219
12220 // Promote TrueVal and FalseVal using VLMax.
12221 // FIXME: Is there a better way to do this?
12222 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12223 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12224 DAG.getUNDEF(PromotedVT),
12225 DAG.getConstant(1, DL, XLenVT), VLMax);
12226 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12227 DAG.getUNDEF(PromotedVT),
12228 DAG.getConstant(0, DL, XLenVT), VLMax);
12229 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
12230 SplatZero, DAG.getUNDEF(PromotedVT), VL);
12231 // Any element past VL uses FalseVal, so use VLMax
12232 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
12233 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12234
12235 // VP_MERGE the two promoted values.
12236 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
12237 TrueVal, FalseVal, FalseVal, VL);
12238
12239 // Convert back to mask.
12240 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12241 SDValue Result = DAG.getNode(
12242 RISCVISD::SETCC_VL, DL, ContainerVT,
12243 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12244 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12245
12246 if (VT.isFixedLengthVector())
12247 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12248 return Result;
12249}
12250
12251SDValue
12252RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
12253 SelectionDAG &DAG) const {
12254 SDLoc DL(Op);
12255
12256 SDValue Op1 = Op.getOperand(0);
12257 SDValue Op2 = Op.getOperand(1);
12258 SDValue Offset = Op.getOperand(2);
12259 SDValue Mask = Op.getOperand(3);
12260 SDValue EVL1 = Op.getOperand(4);
12261 SDValue EVL2 = Op.getOperand(5);
12262
12263 const MVT XLenVT = Subtarget.getXLenVT();
12264 MVT VT = Op.getSimpleValueType();
12265 MVT ContainerVT = VT;
12266 if (VT.isFixedLengthVector()) {
12267 ContainerVT = getContainerForFixedLengthVector(VT);
12268 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12269 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12270 MVT MaskVT = getMaskTypeFor(ContainerVT);
12271 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12272 }
12273
12274 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12275 if (IsMaskVector) {
12276 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12277
12278 // Expand input operands
12279 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12280 DAG.getUNDEF(ContainerVT),
12281 DAG.getConstant(1, DL, XLenVT), EVL1);
12282 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12283 DAG.getUNDEF(ContainerVT),
12284 DAG.getConstant(0, DL, XLenVT), EVL1);
12285 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
12286 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12287
12288 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12289 DAG.getUNDEF(ContainerVT),
12290 DAG.getConstant(1, DL, XLenVT), EVL2);
12291 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12292 DAG.getUNDEF(ContainerVT),
12293 DAG.getConstant(0, DL, XLenVT), EVL2);
12294 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
12295 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12296 }
12297
12298 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12299 SDValue DownOffset, UpOffset;
12300 if (ImmValue >= 0) {
12301 // The operand is a TargetConstant, we need to rebuild it as a regular
12302 // constant.
12303 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12304 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
12305 } else {
12306 // The operand is a TargetConstant, we need to rebuild it as a regular
12307 // constant rather than negating the original operand.
12308 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12309 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
12310 }
12311
12312 SDValue SlideDown =
12313 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12314 Op1, DownOffset, Mask, UpOffset);
12315 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
12316 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
12317
12318 if (IsMaskVector) {
12319 // Truncate Result back to a mask vector (Result has same EVL as Op2)
12320 Result = DAG.getNode(
12321 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
12322 {Result, DAG.getConstant(0, DL, ContainerVT),
12323 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12324 Mask, EVL2});
12325 }
12326
12327 if (!VT.isFixedLengthVector())
12328 return Result;
12329 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12330}
12331
12332SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
12333 SelectionDAG &DAG) const {
12334 SDLoc DL(Op);
12335 SDValue Val = Op.getOperand(0);
12336 SDValue Mask = Op.getOperand(1);
12337 SDValue VL = Op.getOperand(2);
12338 MVT VT = Op.getSimpleValueType();
12339
12340 MVT ContainerVT = VT;
12341 if (VT.isFixedLengthVector()) {
12342 ContainerVT = getContainerForFixedLengthVector(VT);
12343 MVT MaskVT = getMaskTypeFor(ContainerVT);
12344 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12345 }
12346
12347 SDValue Result =
12348 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12349
12350 if (!VT.isFixedLengthVector())
12351 return Result;
12352 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12353}
12354
12355SDValue
12356RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
12357 SelectionDAG &DAG) const {
12358 SDLoc DL(Op);
12359 MVT VT = Op.getSimpleValueType();
12360 MVT XLenVT = Subtarget.getXLenVT();
12361
12362 SDValue Op1 = Op.getOperand(0);
12363 SDValue Mask = Op.getOperand(1);
12364 SDValue EVL = Op.getOperand(2);
12365
12366 MVT ContainerVT = VT;
12367 if (VT.isFixedLengthVector()) {
12368 ContainerVT = getContainerForFixedLengthVector(VT);
12369 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12370 MVT MaskVT = getMaskTypeFor(ContainerVT);
12371 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12372 }
12373
12374 MVT GatherVT = ContainerVT;
12375 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12376 // Check if we are working with mask vectors
12377 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12378 if (IsMaskVector) {
12379 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12380
12381 // Expand input operand
12382 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12383 DAG.getUNDEF(IndicesVT),
12384 DAG.getConstant(1, DL, XLenVT), EVL);
12385 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12386 DAG.getUNDEF(IndicesVT),
12387 DAG.getConstant(0, DL, XLenVT), EVL);
12388 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
12389 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12390 }
12391
12392 unsigned EltSize = GatherVT.getScalarSizeInBits();
12393 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12394 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12395 unsigned MaxVLMAX =
12396 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12397
12398 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12399 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
12400 // to use vrgatherei16.vv.
12401 // TODO: It's also possible to use vrgatherei16.vv for other types to
12402 // decrease register width for the index calculation.
12403 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12404 if (MaxVLMAX > 256 && EltSize == 8) {
12405 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
12406 // Split the vector in half and reverse each half using a full register
12407 // reverse.
12408 // Swap the halves and concatenate them.
12409 // Slide the concatenated result by (VLMax - VL).
12410 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12411 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12412 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
12413
12414 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12415 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12416
12417 // Reassemble the low and high pieces reversed.
12418 // NOTE: this Result is unmasked (because we do not need masks for
12419 // shuffles). If in the future this has to change, we can use a SELECT_VL
12420 // between Result and UNDEF using the mask originally passed to VP_REVERSE
12421 SDValue Result =
12422 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
12423
12424 // Slide off any elements from past EVL that were reversed into the low
12425 // elements.
12426 unsigned MinElts = GatherVT.getVectorMinNumElements();
12427 SDValue VLMax =
12428 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
12429 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
12430
12431 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
12432 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12433
12434 if (IsMaskVector) {
12435 // Truncate Result back to a mask vector
12436 Result =
12437 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12438 {Result, DAG.getConstant(0, DL, GatherVT),
12440 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12441 }
12442
12443 if (!VT.isFixedLengthVector())
12444 return Result;
12445 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12446 }
12447
12448 // Just promote the int type to i16 which will double the LMUL.
12449 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12450 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12451 }
12452
12453 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
12454 SDValue VecLen =
12455 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
12456 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12457 DAG.getUNDEF(IndicesVT), VecLen, EVL);
12458 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
12459 DAG.getUNDEF(IndicesVT), Mask, EVL);
12460 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
12461 DAG.getUNDEF(GatherVT), Mask, EVL);
12462
12463 if (IsMaskVector) {
12464 // Truncate Result back to a mask vector
12465 Result = DAG.getNode(
12466 RISCVISD::SETCC_VL, DL, ContainerVT,
12467 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12468 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12469 }
12470
12471 if (!VT.isFixedLengthVector())
12472 return Result;
12473 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12474}
12475
12476SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12477 SelectionDAG &DAG) const {
12478 MVT VT = Op.getSimpleValueType();
12479 if (VT.getVectorElementType() != MVT::i1)
12480 return lowerVPOp(Op, DAG);
12481
12482 // It is safe to drop mask parameter as masked-off elements are undef.
12483 SDValue Op1 = Op->getOperand(0);
12484 SDValue Op2 = Op->getOperand(1);
12485 SDValue VL = Op->getOperand(3);
12486
12487 MVT ContainerVT = VT;
12488 const bool IsFixed = VT.isFixedLengthVector();
12489 if (IsFixed) {
12490 ContainerVT = getContainerForFixedLengthVector(VT);
12491 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12492 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12493 }
12494
12495 SDLoc DL(Op);
12496 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12497 if (!IsFixed)
12498 return Val;
12499 return convertFromScalableVector(VT, Val, DAG, Subtarget);
12500}
12501
12502SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12503 SelectionDAG &DAG) const {
12504 SDLoc DL(Op);
12505 MVT XLenVT = Subtarget.getXLenVT();
12506 MVT VT = Op.getSimpleValueType();
12507 MVT ContainerVT = VT;
12508 if (VT.isFixedLengthVector())
12509 ContainerVT = getContainerForFixedLengthVector(VT);
12510
12511 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12512
12513 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12514 // Check if the mask is known to be all ones
12515 SDValue Mask = VPNode->getMask();
12516 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12517
12518 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12519 : Intrinsic::riscv_vlse_mask,
12520 DL, XLenVT);
12521 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12522 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12523 VPNode->getStride()};
12524 if (!IsUnmasked) {
12525 if (VT.isFixedLengthVector()) {
12526 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12527 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12528 }
12529 Ops.push_back(Mask);
12530 }
12531 Ops.push_back(VPNode->getVectorLength());
12532 if (!IsUnmasked) {
12533 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12534 Ops.push_back(Policy);
12535 }
12536
12537 SDValue Result =
12539 VPNode->getMemoryVT(), VPNode->getMemOperand());
12540 SDValue Chain = Result.getValue(1);
12541
12542 if (VT.isFixedLengthVector())
12543 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12544
12545 return DAG.getMergeValues({Result, Chain}, DL);
12546}
12547
12548SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12549 SelectionDAG &DAG) const {
12550 SDLoc DL(Op);
12551 MVT XLenVT = Subtarget.getXLenVT();
12552
12553 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12554 SDValue StoreVal = VPNode->getValue();
12555 MVT VT = StoreVal.getSimpleValueType();
12556 MVT ContainerVT = VT;
12557 if (VT.isFixedLengthVector()) {
12558 ContainerVT = getContainerForFixedLengthVector(VT);
12559 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12560 }
12561
12562 // Check if the mask is known to be all ones
12563 SDValue Mask = VPNode->getMask();
12564 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12565
12566 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12567 : Intrinsic::riscv_vsse_mask,
12568 DL, XLenVT);
12569 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12570 VPNode->getBasePtr(), VPNode->getStride()};
12571 if (!IsUnmasked) {
12572 if (VT.isFixedLengthVector()) {
12573 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12574 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12575 }
12576 Ops.push_back(Mask);
12577 }
12578 Ops.push_back(VPNode->getVectorLength());
12579
12580 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12581 Ops, VPNode->getMemoryVT(),
12582 VPNode->getMemOperand());
12583}
12584
12585// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12586// matched to a RVV indexed load. The RVV indexed load instructions only
12587// support the "unsigned unscaled" addressing mode; indices are implicitly
12588// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12589// signed or scaled indexing is extended to the XLEN value type and scaled
12590// accordingly.
12591SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12592 SelectionDAG &DAG) const {
12593 SDLoc DL(Op);
12594 MVT VT = Op.getSimpleValueType();
12595
12596 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12597 EVT MemVT = MemSD->getMemoryVT();
12598 MachineMemOperand *MMO = MemSD->getMemOperand();
12599 SDValue Chain = MemSD->getChain();
12600 SDValue BasePtr = MemSD->getBasePtr();
12601
12602 [[maybe_unused]] ISD::LoadExtType LoadExtType;
12603 SDValue Index, Mask, PassThru, VL;
12604
12605 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12606 Index = VPGN->getIndex();
12607 Mask = VPGN->getMask();
12608 PassThru = DAG.getUNDEF(VT);
12609 VL = VPGN->getVectorLength();
12610 // VP doesn't support extending loads.
12612 } else {
12613 // Else it must be a MGATHER.
12614 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12615 Index = MGN->getIndex();
12616 Mask = MGN->getMask();
12617 PassThru = MGN->getPassThru();
12618 LoadExtType = MGN->getExtensionType();
12619 }
12620
12621 MVT IndexVT = Index.getSimpleValueType();
12622 MVT XLenVT = Subtarget.getXLenVT();
12623
12625 "Unexpected VTs!");
12626 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12627 // Targets have to explicitly opt-in for extending vector loads.
12628 assert(LoadExtType == ISD::NON_EXTLOAD &&
12629 "Unexpected extending MGATHER/VP_GATHER");
12630
12631 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12632 // the selection of the masked intrinsics doesn't do this for us.
12633 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12634
12635 MVT ContainerVT = VT;
12636 if (VT.isFixedLengthVector()) {
12637 ContainerVT = getContainerForFixedLengthVector(VT);
12638 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12639 ContainerVT.getVectorElementCount());
12640
12641 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12642
12643 if (!IsUnmasked) {
12644 MVT MaskVT = getMaskTypeFor(ContainerVT);
12645 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12646 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12647 }
12648 }
12649
12650 if (!VL)
12651 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12652
12653 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12654 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12655 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12656 }
12657
12658 unsigned IntID =
12659 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12660 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12661 if (IsUnmasked)
12662 Ops.push_back(DAG.getUNDEF(ContainerVT));
12663 else
12664 Ops.push_back(PassThru);
12665 Ops.push_back(BasePtr);
12666 Ops.push_back(Index);
12667 if (!IsUnmasked)
12668 Ops.push_back(Mask);
12669 Ops.push_back(VL);
12670 if (!IsUnmasked)
12672
12673 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12674 SDValue Result =
12675 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12676 Chain = Result.getValue(1);
12677
12678 if (VT.isFixedLengthVector())
12679 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12680
12681 return DAG.getMergeValues({Result, Chain}, DL);
12682}
12683
12684// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12685// matched to a RVV indexed store. The RVV indexed store instructions only
12686// support the "unsigned unscaled" addressing mode; indices are implicitly
12687// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12688// signed or scaled indexing is extended to the XLEN value type and scaled
12689// accordingly.
12690SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
12691 SelectionDAG &DAG) const {
12692 SDLoc DL(Op);
12693 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12694 EVT MemVT = MemSD->getMemoryVT();
12695 MachineMemOperand *MMO = MemSD->getMemOperand();
12696 SDValue Chain = MemSD->getChain();
12697 SDValue BasePtr = MemSD->getBasePtr();
12698
12699 [[maybe_unused]] bool IsTruncatingStore = false;
12700 SDValue Index, Mask, Val, VL;
12701
12702 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12703 Index = VPSN->getIndex();
12704 Mask = VPSN->getMask();
12705 Val = VPSN->getValue();
12706 VL = VPSN->getVectorLength();
12707 // VP doesn't support truncating stores.
12708 IsTruncatingStore = false;
12709 } else {
12710 // Else it must be a MSCATTER.
12711 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12712 Index = MSN->getIndex();
12713 Mask = MSN->getMask();
12714 Val = MSN->getValue();
12715 IsTruncatingStore = MSN->isTruncatingStore();
12716 }
12717
12718 MVT VT = Val.getSimpleValueType();
12719 MVT IndexVT = Index.getSimpleValueType();
12720 MVT XLenVT = Subtarget.getXLenVT();
12721
12723 "Unexpected VTs!");
12724 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12725 // Targets have to explicitly opt-in for extending vector loads and
12726 // truncating vector stores.
12727 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12728
12729 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12730 // the selection of the masked intrinsics doesn't do this for us.
12731 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12732
12733 MVT ContainerVT = VT;
12734 if (VT.isFixedLengthVector()) {
12735 ContainerVT = getContainerForFixedLengthVector(VT);
12736 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12737 ContainerVT.getVectorElementCount());
12738
12739 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12740 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12741
12742 if (!IsUnmasked) {
12743 MVT MaskVT = getMaskTypeFor(ContainerVT);
12744 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12745 }
12746 }
12747
12748 if (!VL)
12749 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12750
12751 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12752 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12753 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12754 }
12755
12756 unsigned IntID =
12757 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12758 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12759 Ops.push_back(Val);
12760 Ops.push_back(BasePtr);
12761 Ops.push_back(Index);
12762 if (!IsUnmasked)
12763 Ops.push_back(Mask);
12764 Ops.push_back(VL);
12765
12767 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12768}
12769
12770SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12771 SelectionDAG &DAG) const {
12772 const MVT XLenVT = Subtarget.getXLenVT();
12773 SDLoc DL(Op);
12774 SDValue Chain = Op->getOperand(0);
12775 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12776 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12777 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12778
12779 // Encoding used for rounding mode in RISC-V differs from that used in
12780 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12781 // table, which consists of a sequence of 4-bit fields, each representing
12782 // corresponding FLT_ROUNDS mode.
12783 static const int Table =
12789
12790 SDValue Shift =
12791 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12792 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12793 DAG.getConstant(Table, DL, XLenVT), Shift);
12794 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12795 DAG.getConstant(7, DL, XLenVT));
12796
12797 return DAG.getMergeValues({Masked, Chain}, DL);
12798}
12799
12800SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12801 SelectionDAG &DAG) const {
12802 const MVT XLenVT = Subtarget.getXLenVT();
12803 SDLoc DL(Op);
12804 SDValue Chain = Op->getOperand(0);
12805 SDValue RMValue = Op->getOperand(1);
12806 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12807
12808 // Encoding used for rounding mode in RISC-V differs from that used in
12809 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12810 // a table, which consists of a sequence of 4-bit fields, each representing
12811 // corresponding RISC-V mode.
12812 static const unsigned Table =
12818
12819 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12820
12821 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12822 DAG.getConstant(2, DL, XLenVT));
12823 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12824 DAG.getConstant(Table, DL, XLenVT), Shift);
12825 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12826 DAG.getConstant(0x7, DL, XLenVT));
12827 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12828 RMValue);
12829}
12830
12831SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12832 SelectionDAG &DAG) const {
12834
12835 bool isRISCV64 = Subtarget.is64Bit();
12836 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12837
12838 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12839 return DAG.getFrameIndex(FI, PtrVT);
12840}
12841
12842// Returns the opcode of the target-specific SDNode that implements the 32-bit
12843// form of the given Opcode.
12844static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12845 switch (Opcode) {
12846 default:
12847 llvm_unreachable("Unexpected opcode");
12848 case ISD::SHL:
12849 return RISCVISD::SLLW;
12850 case ISD::SRA:
12851 return RISCVISD::SRAW;
12852 case ISD::SRL:
12853 return RISCVISD::SRLW;
12854 case ISD::SDIV:
12855 return RISCVISD::DIVW;
12856 case ISD::UDIV:
12857 return RISCVISD::DIVUW;
12858 case ISD::UREM:
12859 return RISCVISD::REMUW;
12860 case ISD::ROTL:
12861 return RISCVISD::ROLW;
12862 case ISD::ROTR:
12863 return RISCVISD::RORW;
12864 }
12865}
12866
12867// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12868// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12869// otherwise be promoted to i64, making it difficult to select the
12870// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12871// type i8/i16/i32 is lost.
12873 unsigned ExtOpc = ISD::ANY_EXTEND) {
12874 SDLoc DL(N);
12875 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12876 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12877 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12878 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12879 // ReplaceNodeResults requires we maintain the same type for the return value.
12880 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12881}
12882
12883// Converts the given 32-bit operation to a i64 operation with signed extension
12884// semantic to reduce the signed extension instructions.
12886 SDLoc DL(N);
12887 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12888 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12889 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12890 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12891 DAG.getValueType(MVT::i32));
12892 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12893}
12894
12897 SelectionDAG &DAG) const {
12898 SDLoc DL(N);
12899 switch (N->getOpcode()) {
12900 default:
12901 llvm_unreachable("Don't know how to custom type legalize this operation!");
12904 case ISD::FP_TO_SINT:
12905 case ISD::FP_TO_UINT: {
12906 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12907 "Unexpected custom legalisation");
12908 bool IsStrict = N->isStrictFPOpcode();
12909 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12910 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12911 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12912 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12914 if (!isTypeLegal(Op0.getValueType()))
12915 return;
12916 if (IsStrict) {
12917 SDValue Chain = N->getOperand(0);
12918 // In absense of Zfh, promote f16 to f32, then convert.
12919 if (Op0.getValueType() == MVT::f16 &&
12920 !Subtarget.hasStdExtZfhOrZhinx()) {
12921 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12922 {Chain, Op0});
12923 Chain = Op0.getValue(1);
12924 }
12925 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12927 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12928 SDValue Res = DAG.getNode(
12929 Opc, DL, VTs, Chain, Op0,
12930 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12931 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12932 Results.push_back(Res.getValue(1));
12933 return;
12934 }
12935 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12936 // convert.
12937 if ((Op0.getValueType() == MVT::f16 &&
12938 !Subtarget.hasStdExtZfhOrZhinx()) ||
12939 Op0.getValueType() == MVT::bf16)
12940 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12941
12942 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12943 SDValue Res =
12944 DAG.getNode(Opc, DL, MVT::i64, Op0,
12945 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12946 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12947 return;
12948 }
12949 // If the FP type needs to be softened, emit a library call using the 'si'
12950 // version. If we left it to default legalization we'd end up with 'di'. If
12951 // the FP type doesn't need to be softened just let generic type
12952 // legalization promote the result type.
12953 RTLIB::Libcall LC;
12954 if (IsSigned)
12955 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12956 else
12957 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12958 MakeLibCallOptions CallOptions;
12959 EVT OpVT = Op0.getValueType();
12960 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12961 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12962 SDValue Result;
12963 std::tie(Result, Chain) =
12964 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12965 Results.push_back(Result);
12966 if (IsStrict)
12967 Results.push_back(Chain);
12968 break;
12969 }
12970 case ISD::LROUND: {
12971 SDValue Op0 = N->getOperand(0);
12972 EVT Op0VT = Op0.getValueType();
12973 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12975 if (!isTypeLegal(Op0VT))
12976 return;
12977
12978 // In absense of Zfh, promote f16 to f32, then convert.
12979 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12980 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12981
12982 SDValue Res =
12983 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12984 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12985 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12986 return;
12987 }
12988 // If the FP type needs to be softened, emit a library call to lround. We'll
12989 // need to truncate the result. We assume any value that doesn't fit in i32
12990 // is allowed to return an unspecified value.
12991 RTLIB::Libcall LC =
12992 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12993 MakeLibCallOptions CallOptions;
12994 EVT OpVT = Op0.getValueType();
12995 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12996 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12997 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12998 Results.push_back(Result);
12999 break;
13000 }
13003 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
13004 "has custom type legalization on riscv32");
13005
13006 SDValue LoCounter, HiCounter;
13007 MVT XLenVT = Subtarget.getXLenVT();
13008 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
13009 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
13010 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
13011 } else {
13012 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
13013 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
13014 }
13015 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
13017 N->getOperand(0), LoCounter, HiCounter);
13018
13019 Results.push_back(
13020 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
13021 Results.push_back(RCW.getValue(2));
13022 break;
13023 }
13024 case ISD::LOAD: {
13025 if (!ISD::isNON_EXTLoad(N))
13026 return;
13027
13028 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
13029 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
13030 LoadSDNode *Ld = cast<LoadSDNode>(N);
13031
13032 SDLoc dl(N);
13033 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
13034 Ld->getBasePtr(), Ld->getMemoryVT(),
13035 Ld->getMemOperand());
13036 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
13037 Results.push_back(Res.getValue(1));
13038 return;
13039 }
13040 case ISD::MUL: {
13041 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
13042 unsigned XLen = Subtarget.getXLen();
13043 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
13044 if (Size > XLen) {
13045 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
13046 SDValue LHS = N->getOperand(0);
13047 SDValue RHS = N->getOperand(1);
13048 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
13049
13050 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
13051 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
13052 // We need exactly one side to be unsigned.
13053 if (LHSIsU == RHSIsU)
13054 return;
13055
13056 auto MakeMULPair = [&](SDValue S, SDValue U) {
13057 MVT XLenVT = Subtarget.getXLenVT();
13058 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
13059 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
13060 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
13061 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
13062 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
13063 };
13064
13065 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
13066 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
13067
13068 // The other operand should be signed, but still prefer MULH when
13069 // possible.
13070 if (RHSIsU && LHSIsS && !RHSIsS)
13071 Results.push_back(MakeMULPair(LHS, RHS));
13072 else if (LHSIsU && RHSIsS && !LHSIsS)
13073 Results.push_back(MakeMULPair(RHS, LHS));
13074
13075 return;
13076 }
13077 [[fallthrough]];
13078 }
13079 case ISD::ADD:
13080 case ISD::SUB:
13081 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13082 "Unexpected custom legalisation");
13083 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
13084 break;
13085 case ISD::SHL:
13086 case ISD::SRA:
13087 case ISD::SRL:
13088 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13089 "Unexpected custom legalisation");
13090 if (N->getOperand(1).getOpcode() != ISD::Constant) {
13091 // If we can use a BSET instruction, allow default promotion to apply.
13092 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
13093 isOneConstant(N->getOperand(0)))
13094 break;
13095 Results.push_back(customLegalizeToWOp(N, DAG));
13096 break;
13097 }
13098
13099 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13100 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13101 // shift amount.
13102 if (N->getOpcode() == ISD::SHL) {
13103 SDLoc DL(N);
13104 SDValue NewOp0 =
13105 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13106 SDValue NewOp1 =
13107 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
13108 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
13109 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13110 DAG.getValueType(MVT::i32));
13111 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13112 }
13113
13114 break;
13115 case ISD::ROTL:
13116 case ISD::ROTR:
13117 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13118 "Unexpected custom legalisation");
13119 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13120 Subtarget.hasVendorXTHeadBb()) &&
13121 "Unexpected custom legalization");
13122 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13123 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13124 return;
13125 Results.push_back(customLegalizeToWOp(N, DAG));
13126 break;
13127 case ISD::CTTZ:
13129 case ISD::CTLZ:
13130 case ISD::CTLZ_ZERO_UNDEF: {
13131 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13132 "Unexpected custom legalisation");
13133
13134 SDValue NewOp0 =
13135 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13136 bool IsCTZ =
13137 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
13138 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
13139 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
13140 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13141 return;
13142 }
13143 case ISD::SDIV:
13144 case ISD::UDIV:
13145 case ISD::UREM: {
13146 MVT VT = N->getSimpleValueType(0);
13147 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13148 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13149 "Unexpected custom legalisation");
13150 // Don't promote division/remainder by constant since we should expand those
13151 // to multiply by magic constant.
13153 if (N->getOperand(1).getOpcode() == ISD::Constant &&
13154 !isIntDivCheap(N->getValueType(0), Attr))
13155 return;
13156
13157 // If the input is i32, use ANY_EXTEND since the W instructions don't read
13158 // the upper 32 bits. For other types we need to sign or zero extend
13159 // based on the opcode.
13160 unsigned ExtOpc = ISD::ANY_EXTEND;
13161 if (VT != MVT::i32)
13162 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
13164
13165 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13166 break;
13167 }
13168 case ISD::SADDO: {
13169 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13170 "Unexpected custom legalisation");
13171
13172 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13173 // use the default legalization.
13174 if (!isa<ConstantSDNode>(N->getOperand(1)))
13175 return;
13176
13177 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13178 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
13179 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
13180 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13181 DAG.getValueType(MVT::i32));
13182
13183 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13184
13185 // For an addition, the result should be less than one of the operands (LHS)
13186 // if and only if the other operand (RHS) is negative, otherwise there will
13187 // be overflow.
13188 // For a subtraction, the result should be less than one of the operands
13189 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13190 // otherwise there will be overflow.
13191 EVT OType = N->getValueType(1);
13192 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
13193 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
13194
13195 SDValue Overflow =
13196 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
13197 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13198 Results.push_back(Overflow);
13199 return;
13200 }
13201 case ISD::UADDO:
13202 case ISD::USUBO: {
13203 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13204 "Unexpected custom legalisation");
13205 bool IsAdd = N->getOpcode() == ISD::UADDO;
13206 // Create an ADDW or SUBW.
13207 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13208 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13209 SDValue Res =
13210 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
13211 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13212 DAG.getValueType(MVT::i32));
13213
13214 SDValue Overflow;
13215 if (IsAdd && isOneConstant(RHS)) {
13216 // Special case uaddo X, 1 overflowed if the addition result is 0.
13217 // The general case (X + C) < C is not necessarily beneficial. Although we
13218 // reduce the live range of X, we may introduce the materialization of
13219 // constant C, especially when the setcc result is used by branch. We have
13220 // no compare with constant and branch instructions.
13221 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
13222 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
13223 } else if (IsAdd && isAllOnesConstant(RHS)) {
13224 // Special case uaddo X, -1 overflowed if X != 0.
13225 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
13226 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
13227 } else {
13228 // Sign extend the LHS and perform an unsigned compare with the ADDW
13229 // result. Since the inputs are sign extended from i32, this is equivalent
13230 // to comparing the lower 32 bits.
13231 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13232 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
13233 IsAdd ? ISD::SETULT : ISD::SETUGT);
13234 }
13235
13236 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13237 Results.push_back(Overflow);
13238 return;
13239 }
13240 case ISD::UADDSAT:
13241 case ISD::USUBSAT: {
13242 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13243 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
13244 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13245 // promotion for UADDO/USUBO.
13246 Results.push_back(expandAddSubSat(N, DAG));
13247 return;
13248 }
13249 case ISD::SADDSAT:
13250 case ISD::SSUBSAT: {
13251 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13252 "Unexpected custom legalisation");
13253 Results.push_back(expandAddSubSat(N, DAG));
13254 return;
13255 }
13256 case ISD::ABS: {
13257 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13258 "Unexpected custom legalisation");
13259
13260 if (Subtarget.hasStdExtZbb()) {
13261 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13262 // This allows us to remember that the result is sign extended. Expanding
13263 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13264 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
13265 N->getOperand(0));
13266 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
13267 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
13268 return;
13269 }
13270
13271 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13272 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13273
13274 // Freeze the source so we can increase it's use count.
13275 Src = DAG.getFreeze(Src);
13276
13277 // Copy sign bit to all bits using the sraiw pattern.
13278 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
13279 DAG.getValueType(MVT::i32));
13280 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
13281 DAG.getConstant(31, DL, MVT::i64));
13282
13283 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
13284 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
13285
13286 // NOTE: The result is only required to be anyextended, but sext is
13287 // consistent with type legalization of sub.
13288 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
13289 DAG.getValueType(MVT::i32));
13290 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13291 return;
13292 }
13293 case ISD::BITCAST: {
13294 EVT VT = N->getValueType(0);
13295 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
13296 SDValue Op0 = N->getOperand(0);
13297 EVT Op0VT = Op0.getValueType();
13298 MVT XLenVT = Subtarget.getXLenVT();
13299 if (VT == MVT::i16 &&
13300 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13301 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13302 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
13303 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
13304 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13305 Subtarget.hasStdExtFOrZfinx()) {
13306 SDValue FPConv =
13307 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
13308 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
13309 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13310 Subtarget.hasStdExtDOrZdinx()) {
13311 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
13312 DAG.getVTList(MVT::i32, MVT::i32), Op0);
13313 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
13314 NewReg.getValue(0), NewReg.getValue(1));
13315 Results.push_back(RetReg);
13316 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13317 isTypeLegal(Op0VT)) {
13318 // Custom-legalize bitcasts from fixed-length vector types to illegal
13319 // scalar types in order to improve codegen. Bitcast the vector to a
13320 // one-element vector type whose element type is the same as the result
13321 // type, and extract the first element.
13322 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
13323 if (isTypeLegal(BVT)) {
13324 SDValue BVec = DAG.getBitcast(BVT, Op0);
13325 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
13326 DAG.getVectorIdxConstant(0, DL)));
13327 }
13328 }
13329 break;
13330 }
13331 case RISCVISD::BREV8:
13332 case RISCVISD::ORC_B: {
13333 MVT VT = N->getSimpleValueType(0);
13334 MVT XLenVT = Subtarget.getXLenVT();
13335 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13336 "Unexpected custom legalisation");
13337 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13338 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13339 "Unexpected extension");
13340 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
13341 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
13342 // ReplaceNodeResults requires we maintain the same type for the return
13343 // value.
13344 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
13345 break;
13346 }
13348 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13349 // type is illegal (currently only vXi64 RV32).
13350 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13351 // transferred to the destination register. We issue two of these from the
13352 // upper- and lower- halves of the SEW-bit vector element, slid down to the
13353 // first element.
13354 SDValue Vec = N->getOperand(0);
13355 SDValue Idx = N->getOperand(1);
13356
13357 // The vector type hasn't been legalized yet so we can't issue target
13358 // specific nodes if it needs legalization.
13359 // FIXME: We would manually legalize if it's important.
13360 if (!isTypeLegal(Vec.getValueType()))
13361 return;
13362
13363 MVT VecVT = Vec.getSimpleValueType();
13364
13365 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
13366 VecVT.getVectorElementType() == MVT::i64 &&
13367 "Unexpected EXTRACT_VECTOR_ELT legalization");
13368
13369 // If this is a fixed vector, we need to convert it to a scalable vector.
13370 MVT ContainerVT = VecVT;
13371 if (VecVT.isFixedLengthVector()) {
13372 ContainerVT = getContainerForFixedLengthVector(VecVT);
13373 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13374 }
13375
13376 MVT XLenVT = Subtarget.getXLenVT();
13377
13378 // Use a VL of 1 to avoid processing more elements than we need.
13379 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
13380
13381 // Unless the index is known to be 0, we must slide the vector down to get
13382 // the desired element into index 0.
13383 if (!isNullConstant(Idx)) {
13384 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13385 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
13386 }
13387
13388 // Extract the lower XLEN bits of the correct vector element.
13389 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13390
13391 // To extract the upper XLEN bits of the vector element, shift the first
13392 // element right by 32 bits and re-extract the lower XLEN bits.
13393 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13394 DAG.getUNDEF(ContainerVT),
13395 DAG.getConstant(32, DL, XLenVT), VL);
13396 SDValue LShr32 =
13397 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
13398 DAG.getUNDEF(ContainerVT), Mask, VL);
13399
13400 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13401
13402 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13403 break;
13404 }
13406 unsigned IntNo = N->getConstantOperandVal(0);
13407 switch (IntNo) {
13408 default:
13410 "Don't know how to custom type legalize this intrinsic!");
13411 case Intrinsic::experimental_get_vector_length: {
13412 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
13413 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13414 return;
13415 }
13416 case Intrinsic::experimental_cttz_elts: {
13417 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
13418 Results.push_back(
13419 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
13420 return;
13421 }
13422 case Intrinsic::riscv_orc_b:
13423 case Intrinsic::riscv_brev8:
13424 case Intrinsic::riscv_sha256sig0:
13425 case Intrinsic::riscv_sha256sig1:
13426 case Intrinsic::riscv_sha256sum0:
13427 case Intrinsic::riscv_sha256sum1:
13428 case Intrinsic::riscv_sm3p0:
13429 case Intrinsic::riscv_sm3p1: {
13430 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13431 return;
13432 unsigned Opc;
13433 switch (IntNo) {
13434 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
13435 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
13436 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
13437 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
13438 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
13439 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
13440 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
13441 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
13442 }
13443
13444 SDValue NewOp =
13445 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13446 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
13447 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13448 return;
13449 }
13450 case Intrinsic::riscv_sm4ks:
13451 case Intrinsic::riscv_sm4ed: {
13452 unsigned Opc =
13453 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
13454 SDValue NewOp0 =
13455 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13456 SDValue NewOp1 =
13457 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13458 SDValue Res =
13459 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
13460 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13461 return;
13462 }
13463 case Intrinsic::riscv_mopr: {
13464 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13465 return;
13466 SDValue NewOp =
13467 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13468 SDValue Res = DAG.getNode(
13469 RISCVISD::MOPR, DL, MVT::i64, NewOp,
13470 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
13471 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13472 return;
13473 }
13474 case Intrinsic::riscv_moprr: {
13475 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13476 return;
13477 SDValue NewOp0 =
13478 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13479 SDValue NewOp1 =
13480 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13481 SDValue Res = DAG.getNode(
13482 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13483 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13484 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13485 return;
13486 }
13487 case Intrinsic::riscv_clmul: {
13488 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13489 return;
13490
13491 SDValue NewOp0 =
13492 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13493 SDValue NewOp1 =
13494 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13495 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13496 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13497 return;
13498 }
13499 case Intrinsic::riscv_clmulh:
13500 case Intrinsic::riscv_clmulr: {
13501 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13502 return;
13503
13504 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13505 // to the full 128-bit clmul result of multiplying two xlen values.
13506 // Perform clmulr or clmulh on the shifted values. Finally, extract the
13507 // upper 32 bits.
13508 //
13509 // The alternative is to mask the inputs to 32 bits and use clmul, but
13510 // that requires two shifts to mask each input without zext.w.
13511 // FIXME: If the inputs are known zero extended or could be freely
13512 // zero extended, the mask form would be better.
13513 SDValue NewOp0 =
13514 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13515 SDValue NewOp1 =
13516 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13517 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13518 DAG.getConstant(32, DL, MVT::i64));
13519 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13520 DAG.getConstant(32, DL, MVT::i64));
13521 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13523 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13524 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13525 DAG.getConstant(32, DL, MVT::i64));
13526 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13527 return;
13528 }
13529 case Intrinsic::riscv_vmv_x_s: {
13530 EVT VT = N->getValueType(0);
13531 MVT XLenVT = Subtarget.getXLenVT();
13532 if (VT.bitsLT(XLenVT)) {
13533 // Simple case just extract using vmv.x.s and truncate.
13534 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13535 Subtarget.getXLenVT(), N->getOperand(1));
13536 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13537 return;
13538 }
13539
13540 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13541 "Unexpected custom legalization");
13542
13543 // We need to do the move in two steps.
13544 SDValue Vec = N->getOperand(1);
13545 MVT VecVT = Vec.getSimpleValueType();
13546
13547 // First extract the lower XLEN bits of the element.
13548 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13549
13550 // To extract the upper XLEN bits of the vector element, shift the first
13551 // element right by 32 bits and re-extract the lower XLEN bits.
13552 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13553
13554 SDValue ThirtyTwoV =
13555 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13556 DAG.getConstant(32, DL, XLenVT), VL);
13557 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13558 DAG.getUNDEF(VecVT), Mask, VL);
13559 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13560
13561 Results.push_back(
13562 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13563 break;
13564 }
13565 }
13566 break;
13567 }
13568 case ISD::VECREDUCE_ADD:
13569 case ISD::VECREDUCE_AND:
13570 case ISD::VECREDUCE_OR:
13571 case ISD::VECREDUCE_XOR:
13576 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13577 Results.push_back(V);
13578 break;
13579 case ISD::VP_REDUCE_ADD:
13580 case ISD::VP_REDUCE_AND:
13581 case ISD::VP_REDUCE_OR:
13582 case ISD::VP_REDUCE_XOR:
13583 case ISD::VP_REDUCE_SMAX:
13584 case ISD::VP_REDUCE_UMAX:
13585 case ISD::VP_REDUCE_SMIN:
13586 case ISD::VP_REDUCE_UMIN:
13587 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13588 Results.push_back(V);
13589 break;
13590 case ISD::GET_ROUNDING: {
13591 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13592 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13593 Results.push_back(Res.getValue(0));
13594 Results.push_back(Res.getValue(1));
13595 break;
13596 }
13597 }
13598}
13599
13600/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13601/// which corresponds to it.
13602static unsigned getVecReduceOpcode(unsigned Opc) {
13603 switch (Opc) {
13604 default:
13605 llvm_unreachable("Unhandled binary to transfrom reduction");
13606 case ISD::ADD:
13607 return ISD::VECREDUCE_ADD;
13608 case ISD::UMAX:
13609 return ISD::VECREDUCE_UMAX;
13610 case ISD::SMAX:
13611 return ISD::VECREDUCE_SMAX;
13612 case ISD::UMIN:
13613 return ISD::VECREDUCE_UMIN;
13614 case ISD::SMIN:
13615 return ISD::VECREDUCE_SMIN;
13616 case ISD::AND:
13617 return ISD::VECREDUCE_AND;
13618 case ISD::OR:
13619 return ISD::VECREDUCE_OR;
13620 case ISD::XOR:
13621 return ISD::VECREDUCE_XOR;
13622 case ISD::FADD:
13623 // Note: This is the associative form of the generic reduction opcode.
13624 return ISD::VECREDUCE_FADD;
13625 }
13626}
13627
13628/// Perform two related transforms whose purpose is to incrementally recognize
13629/// an explode_vector followed by scalar reduction as a vector reduction node.
13630/// This exists to recover from a deficiency in SLP which can't handle
13631/// forests with multiple roots sharing common nodes. In some cases, one
13632/// of the trees will be vectorized, and the other will remain (unprofitably)
13633/// scalarized.
13634static SDValue
13636 const RISCVSubtarget &Subtarget) {
13637
13638 // This transforms need to run before all integer types have been legalized
13639 // to i64 (so that the vector element type matches the add type), and while
13640 // it's safe to introduce odd sized vector types.
13642 return SDValue();
13643
13644 // Without V, this transform isn't useful. We could form the (illegal)
13645 // operations and let them be scalarized again, but there's really no point.
13646 if (!Subtarget.hasVInstructions())
13647 return SDValue();
13648
13649 const SDLoc DL(N);
13650 const EVT VT = N->getValueType(0);
13651 const unsigned Opc = N->getOpcode();
13652
13653 // For FADD, we only handle the case with reassociation allowed. We
13654 // could handle strict reduction order, but at the moment, there's no
13655 // known reason to, and the complexity isn't worth it.
13656 // TODO: Handle fminnum and fmaxnum here
13657 if (!VT.isInteger() &&
13658 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13659 return SDValue();
13660
13661 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13662 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13663 "Inconsistent mappings");
13664 SDValue LHS = N->getOperand(0);
13665 SDValue RHS = N->getOperand(1);
13666
13667 if (!LHS.hasOneUse() || !RHS.hasOneUse())
13668 return SDValue();
13669
13670 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13671 std::swap(LHS, RHS);
13672
13673 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13674 !isa<ConstantSDNode>(RHS.getOperand(1)))
13675 return SDValue();
13676
13677 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13678 SDValue SrcVec = RHS.getOperand(0);
13679 EVT SrcVecVT = SrcVec.getValueType();
13680 assert(SrcVecVT.getVectorElementType() == VT);
13681 if (SrcVecVT.isScalableVector())
13682 return SDValue();
13683
13684 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13685 return SDValue();
13686
13687 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13688 // reduce_op (extract_subvector [2 x VT] from V). This will form the
13689 // root of our reduction tree. TODO: We could extend this to any two
13690 // adjacent aligned constant indices if desired.
13691 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13692 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13693 uint64_t LHSIdx =
13694 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13695 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13696 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13697 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13698 DAG.getVectorIdxConstant(0, DL));
13699 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13700 }
13701 }
13702
13703 // Match (binop (reduce (extract_subvector V, 0),
13704 // (extract_vector_elt V, sizeof(SubVec))))
13705 // into a reduction of one more element from the original vector V.
13706 if (LHS.getOpcode() != ReduceOpc)
13707 return SDValue();
13708
13709 SDValue ReduceVec = LHS.getOperand(0);
13710 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13711 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13712 isNullConstant(ReduceVec.getOperand(1)) &&
13713 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13714 // For illegal types (e.g. 3xi32), most will be combined again into a
13715 // wider (hopefully legal) type. If this is a terminal state, we are
13716 // relying on type legalization here to produce something reasonable
13717 // and this lowering quality could probably be improved. (TODO)
13718 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13719 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13720 DAG.getVectorIdxConstant(0, DL));
13721 return DAG.getNode(ReduceOpc, DL, VT, Vec,
13722 ReduceVec->getFlags() & N->getFlags());
13723 }
13724
13725 return SDValue();
13726}
13727
13728
13729// Try to fold (<bop> x, (reduction.<bop> vec, start))
13731 const RISCVSubtarget &Subtarget) {
13732 auto BinOpToRVVReduce = [](unsigned Opc) {
13733 switch (Opc) {
13734 default:
13735 llvm_unreachable("Unhandled binary to transfrom reduction");
13736 case ISD::ADD:
13738 case ISD::UMAX:
13740 case ISD::SMAX:
13742 case ISD::UMIN:
13744 case ISD::SMIN:
13746 case ISD::AND:
13748 case ISD::OR:
13750 case ISD::XOR:
13752 case ISD::FADD:
13754 case ISD::FMAXNUM:
13756 case ISD::FMINNUM:
13758 }
13759 };
13760
13761 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13762 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13763 isNullConstant(V.getOperand(1)) &&
13764 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13765 };
13766
13767 unsigned Opc = N->getOpcode();
13768 unsigned ReduceIdx;
13769 if (IsReduction(N->getOperand(0), Opc))
13770 ReduceIdx = 0;
13771 else if (IsReduction(N->getOperand(1), Opc))
13772 ReduceIdx = 1;
13773 else
13774 return SDValue();
13775
13776 // Skip if FADD disallows reassociation but the combiner needs.
13777 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13778 return SDValue();
13779
13780 SDValue Extract = N->getOperand(ReduceIdx);
13781 SDValue Reduce = Extract.getOperand(0);
13782 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13783 return SDValue();
13784
13785 SDValue ScalarV = Reduce.getOperand(2);
13786 EVT ScalarVT = ScalarV.getValueType();
13787 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13788 ScalarV.getOperand(0)->isUndef() &&
13789 isNullConstant(ScalarV.getOperand(2)))
13790 ScalarV = ScalarV.getOperand(1);
13791
13792 // Make sure that ScalarV is a splat with VL=1.
13793 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13794 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13795 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13796 return SDValue();
13797
13798 if (!isNonZeroAVL(ScalarV.getOperand(2)))
13799 return SDValue();
13800
13801 // Check the scalar of ScalarV is neutral element
13802 // TODO: Deal with value other than neutral element.
13803 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13804 0))
13805 return SDValue();
13806
13807 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13808 // FIXME: We might be able to improve this if operand 0 is undef.
13809 if (!isNonZeroAVL(Reduce.getOperand(5)))
13810 return SDValue();
13811
13812 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13813
13814 SDLoc DL(N);
13815 SDValue NewScalarV =
13816 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13817 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13818
13819 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13820 if (ScalarVT != ScalarV.getValueType())
13821 NewScalarV =
13822 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13823 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13824
13825 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13826 NewScalarV, Reduce.getOperand(3),
13827 Reduce.getOperand(4), Reduce.getOperand(5)};
13828 SDValue NewReduce =
13829 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13830 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13831 Extract.getOperand(1));
13832}
13833
13834// Optimize (add (shl x, c0), (shl y, c1)) ->
13835// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13837 const RISCVSubtarget &Subtarget) {
13838 // Perform this optimization only in the zba extension.
13839 if (!Subtarget.hasStdExtZba())
13840 return SDValue();
13841
13842 // Skip for vector types and larger types.
13843 EVT VT = N->getValueType(0);
13844 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13845 return SDValue();
13846
13847 // The two operand nodes must be SHL and have no other use.
13848 SDValue N0 = N->getOperand(0);
13849 SDValue N1 = N->getOperand(1);
13850 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13851 !N0->hasOneUse() || !N1->hasOneUse())
13852 return SDValue();
13853
13854 // Check c0 and c1.
13855 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13856 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13857 if (!N0C || !N1C)
13858 return SDValue();
13859 int64_t C0 = N0C->getSExtValue();
13860 int64_t C1 = N1C->getSExtValue();
13861 if (C0 <= 0 || C1 <= 0)
13862 return SDValue();
13863
13864 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13865 int64_t Bits = std::min(C0, C1);
13866 int64_t Diff = std::abs(C0 - C1);
13867 if (Diff != 1 && Diff != 2 && Diff != 3)
13868 return SDValue();
13869
13870 // Build nodes.
13871 SDLoc DL(N);
13872 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13873 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13874 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13875 DAG.getConstant(Diff, DL, VT), NS);
13876 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13877}
13878
13879// Combine a constant select operand into its use:
13880//
13881// (and (select cond, -1, c), x)
13882// -> (select cond, x, (and x, c)) [AllOnes=1]
13883// (or (select cond, 0, c), x)
13884// -> (select cond, x, (or x, c)) [AllOnes=0]
13885// (xor (select cond, 0, c), x)
13886// -> (select cond, x, (xor x, c)) [AllOnes=0]
13887// (add (select cond, 0, c), x)
13888// -> (select cond, x, (add x, c)) [AllOnes=0]
13889// (sub x, (select cond, 0, c))
13890// -> (select cond, x, (sub x, c)) [AllOnes=0]
13892 SelectionDAG &DAG, bool AllOnes,
13893 const RISCVSubtarget &Subtarget) {
13894 EVT VT = N->getValueType(0);
13895
13896 // Skip vectors.
13897 if (VT.isVector())
13898 return SDValue();
13899
13900 if (!Subtarget.hasConditionalMoveFusion()) {
13901 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13902 if ((!Subtarget.hasStdExtZicond() &&
13903 !Subtarget.hasVendorXVentanaCondOps()) ||
13904 N->getOpcode() != ISD::AND)
13905 return SDValue();
13906
13907 // Maybe harmful when condition code has multiple use.
13908 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13909 return SDValue();
13910
13911 // Maybe harmful when VT is wider than XLen.
13912 if (VT.getSizeInBits() > Subtarget.getXLen())
13913 return SDValue();
13914 }
13915
13916 if ((Slct.getOpcode() != ISD::SELECT &&
13917 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13918 !Slct.hasOneUse())
13919 return SDValue();
13920
13921 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13923 };
13924
13925 bool SwapSelectOps;
13926 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13927 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13928 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13929 SDValue NonConstantVal;
13930 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13931 SwapSelectOps = false;
13932 NonConstantVal = FalseVal;
13933 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13934 SwapSelectOps = true;
13935 NonConstantVal = TrueVal;
13936 } else
13937 return SDValue();
13938
13939 // Slct is now know to be the desired identity constant when CC is true.
13940 TrueVal = OtherOp;
13941 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13942 // Unless SwapSelectOps says the condition should be false.
13943 if (SwapSelectOps)
13944 std::swap(TrueVal, FalseVal);
13945
13946 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13947 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13948 {Slct.getOperand(0), Slct.getOperand(1),
13949 Slct.getOperand(2), TrueVal, FalseVal});
13950
13951 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13952 {Slct.getOperand(0), TrueVal, FalseVal});
13953}
13954
13955// Attempt combineSelectAndUse on each operand of a commutative operator N.
13957 bool AllOnes,
13958 const RISCVSubtarget &Subtarget) {
13959 SDValue N0 = N->getOperand(0);
13960 SDValue N1 = N->getOperand(1);
13961 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13962 return Result;
13963 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13964 return Result;
13965 return SDValue();
13966}
13967
13968// Transform (add (mul x, c0), c1) ->
13969// (add (mul (add x, c1/c0), c0), c1%c0).
13970// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13971// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13972// to an infinite loop in DAGCombine if transformed.
13973// Or transform (add (mul x, c0), c1) ->
13974// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13975// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13976// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13977// lead to an infinite loop in DAGCombine if transformed.
13978// Or transform (add (mul x, c0), c1) ->
13979// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13980// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13981// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13982// lead to an infinite loop in DAGCombine if transformed.
13983// Or transform (add (mul x, c0), c1) ->
13984// (mul (add x, c1/c0), c0).
13985// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13987 const RISCVSubtarget &Subtarget) {
13988 // Skip for vector types and larger types.
13989 EVT VT = N->getValueType(0);
13990 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13991 return SDValue();
13992 // The first operand node must be a MUL and has no other use.
13993 SDValue N0 = N->getOperand(0);
13994 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13995 return SDValue();
13996 // Check if c0 and c1 match above conditions.
13997 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13998 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13999 if (!N0C || !N1C)
14000 return SDValue();
14001 // If N0C has multiple uses it's possible one of the cases in
14002 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
14003 // in an infinite loop.
14004 if (!N0C->hasOneUse())
14005 return SDValue();
14006 int64_t C0 = N0C->getSExtValue();
14007 int64_t C1 = N1C->getSExtValue();
14008 int64_t CA, CB;
14009 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
14010 return SDValue();
14011 // Search for proper CA (non-zero) and CB that both are simm12.
14012 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
14013 !isInt<12>(C0 * (C1 / C0))) {
14014 CA = C1 / C0;
14015 CB = C1 % C0;
14016 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
14017 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
14018 CA = C1 / C0 + 1;
14019 CB = C1 % C0 - C0;
14020 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
14021 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
14022 CA = C1 / C0 - 1;
14023 CB = C1 % C0 + C0;
14024 } else
14025 return SDValue();
14026 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
14027 SDLoc DL(N);
14028 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
14029 DAG.getSignedConstant(CA, DL, VT));
14030 SDValue New1 =
14031 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
14032 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
14033}
14034
14035// add (zext, zext) -> zext (add (zext, zext))
14036// sub (zext, zext) -> sext (sub (zext, zext))
14037// mul (zext, zext) -> zext (mul (zext, zext))
14038// sdiv (zext, zext) -> zext (sdiv (zext, zext))
14039// udiv (zext, zext) -> zext (udiv (zext, zext))
14040// srem (zext, zext) -> zext (srem (zext, zext))
14041// urem (zext, zext) -> zext (urem (zext, zext))
14042//
14043// where the sum of the extend widths match, and the the range of the bin op
14044// fits inside the width of the narrower bin op. (For profitability on rvv, we
14045// use a power of two for both inner and outer extend.)
14047
14048 EVT VT = N->getValueType(0);
14049 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
14050 return SDValue();
14051
14052 SDValue N0 = N->getOperand(0);
14053 SDValue N1 = N->getOperand(1);
14055 return SDValue();
14056 if (!N0.hasOneUse() || !N1.hasOneUse())
14057 return SDValue();
14058
14059 SDValue Src0 = N0.getOperand(0);
14060 SDValue Src1 = N1.getOperand(0);
14061 EVT SrcVT = Src0.getValueType();
14062 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
14063 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
14064 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
14065 return SDValue();
14066
14067 LLVMContext &C = *DAG.getContext();
14069 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
14070
14071 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
14072 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
14073
14074 // Src0 and Src1 are zero extended, so they're always positive if signed.
14075 //
14076 // sub can produce a negative from two positive operands, so it needs sign
14077 // extended. Other nodes produce a positive from two positive operands, so
14078 // zero extend instead.
14079 unsigned OuterExtend =
14080 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14081
14082 return DAG.getNode(
14083 OuterExtend, SDLoc(N), VT,
14084 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
14085}
14086
14087// Try to turn (add (xor bool, 1) -1) into (neg bool).
14089 SDValue N0 = N->getOperand(0);
14090 SDValue N1 = N->getOperand(1);
14091 EVT VT = N->getValueType(0);
14092 SDLoc DL(N);
14093
14094 // RHS should be -1.
14095 if (!isAllOnesConstant(N1))
14096 return SDValue();
14097
14098 // Look for (xor X, 1).
14099 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
14100 return SDValue();
14101
14102 // First xor input should be 0 or 1.
14104 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14105 return SDValue();
14106
14107 // Emit a negate of the setcc.
14108 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
14109 N0.getOperand(0));
14110}
14111
14114 const RISCVSubtarget &Subtarget) {
14115 SelectionDAG &DAG = DCI.DAG;
14116 if (SDValue V = combineAddOfBooleanXor(N, DAG))
14117 return V;
14118 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
14119 return V;
14120 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14121 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
14122 return V;
14123 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14124 return V;
14125 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14126 return V;
14127 if (SDValue V = combineBinOpOfZExt(N, DAG))
14128 return V;
14129
14130 // fold (add (select lhs, rhs, cc, 0, y), x) ->
14131 // (select lhs, rhs, cc, x, (add x, y))
14132 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14133}
14134
14135// Try to turn a sub boolean RHS and constant LHS into an addi.
14137 SDValue N0 = N->getOperand(0);
14138 SDValue N1 = N->getOperand(1);
14139 EVT VT = N->getValueType(0);
14140 SDLoc DL(N);
14141
14142 // Require a constant LHS.
14143 auto *N0C = dyn_cast<ConstantSDNode>(N0);
14144 if (!N0C)
14145 return SDValue();
14146
14147 // All our optimizations involve subtracting 1 from the immediate and forming
14148 // an ADDI. Make sure the new immediate is valid for an ADDI.
14149 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14150 if (!ImmValMinus1.isSignedIntN(12))
14151 return SDValue();
14152
14153 SDValue NewLHS;
14154 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
14155 // (sub constant, (setcc x, y, eq/neq)) ->
14156 // (add (setcc x, y, neq/eq), constant - 1)
14157 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14158 EVT SetCCOpVT = N1.getOperand(0).getValueType();
14159 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14160 return SDValue();
14161 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14162 NewLHS =
14163 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14164 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
14165 N1.getOperand(0).getOpcode() == ISD::SETCC) {
14166 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14167 // Since setcc returns a bool the xor is equivalent to 1-setcc.
14168 NewLHS = N1.getOperand(0);
14169 } else
14170 return SDValue();
14171
14172 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
14173 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
14174}
14175
14176// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14177// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14178// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14179// valid with Y=3, while 0b0000_1000_0000_0100 is not.
14181 const RISCVSubtarget &Subtarget) {
14182 if (!Subtarget.hasStdExtZbb())
14183 return SDValue();
14184
14185 EVT VT = N->getValueType(0);
14186
14187 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14188 return SDValue();
14189
14190 SDValue N0 = N->getOperand(0);
14191 SDValue N1 = N->getOperand(1);
14192
14193 if (N0->getOpcode() != ISD::SHL)
14194 return SDValue();
14195
14196 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14197 if (!ShAmtCLeft)
14198 return SDValue();
14199 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14200
14201 if (ShiftedAmount >= 8)
14202 return SDValue();
14203
14204 SDValue LeftShiftOperand = N0->getOperand(0);
14205 SDValue RightShiftOperand = N1;
14206
14207 if (ShiftedAmount != 0) { // Right operand must be a right shift.
14208 if (N1->getOpcode() != ISD::SRL)
14209 return SDValue();
14210 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14211 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14212 return SDValue();
14213 RightShiftOperand = N1.getOperand(0);
14214 }
14215
14216 // At least one shift should have a single use.
14217 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14218 return SDValue();
14219
14220 if (LeftShiftOperand != RightShiftOperand)
14221 return SDValue();
14222
14223 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
14224 Mask <<= ShiftedAmount;
14225 // Check that X has indeed the right shape (only the Y-th bit can be set in
14226 // every byte).
14227 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14228 return SDValue();
14229
14230 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
14231}
14232
14234 const RISCVSubtarget &Subtarget) {
14235 if (SDValue V = combineSubOfBoolean(N, DAG))
14236 return V;
14237
14238 EVT VT = N->getValueType(0);
14239 SDValue N0 = N->getOperand(0);
14240 SDValue N1 = N->getOperand(1);
14241 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14242 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
14243 isNullConstant(N1.getOperand(1))) {
14244 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14245 if (CCVal == ISD::SETLT) {
14246 SDLoc DL(N);
14247 unsigned ShAmt = N0.getValueSizeInBits() - 1;
14248 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
14249 DAG.getConstant(ShAmt, DL, VT));
14250 }
14251 }
14252
14253 if (SDValue V = combineBinOpOfZExt(N, DAG))
14254 return V;
14255 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
14256 return V;
14257
14258 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14259 // (select lhs, rhs, cc, x, (sub x, y))
14260 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
14261}
14262
14263// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14264// Legalizing setcc can introduce xors like this. Doing this transform reduces
14265// the number of xors and may allow the xor to fold into a branch condition.
14267 SDValue N0 = N->getOperand(0);
14268 SDValue N1 = N->getOperand(1);
14269 bool IsAnd = N->getOpcode() == ISD::AND;
14270
14271 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
14272 return SDValue();
14273
14274 if (!N0.hasOneUse() || !N1.hasOneUse())
14275 return SDValue();
14276
14277 SDValue N01 = N0.getOperand(1);
14278 SDValue N11 = N1.getOperand(1);
14279
14280 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14281 // (xor X, -1) based on the upper bits of the other operand being 0. If the
14282 // operation is And, allow one of the Xors to use -1.
14283 if (isOneConstant(N01)) {
14284 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
14285 return SDValue();
14286 } else if (isOneConstant(N11)) {
14287 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14288 if (!(IsAnd && isAllOnesConstant(N01)))
14289 return SDValue();
14290 } else
14291 return SDValue();
14292
14293 EVT VT = N->getValueType(0);
14294
14295 SDValue N00 = N0.getOperand(0);
14296 SDValue N10 = N1.getOperand(0);
14297
14298 // The LHS of the xors needs to be 0/1.
14300 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14301 return SDValue();
14302
14303 // Invert the opcode and insert a new xor.
14304 SDLoc DL(N);
14305 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14306 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
14307 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
14308}
14309
14310// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14311// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14312// value to an unsigned value. This will be lowered to vmax and series of
14313// vnclipu instructions later. This can be extended to other truncated types
14314// other than i8 by replacing 256 and 255 with the equivalent constants for the
14315// type.
14317 EVT VT = N->getValueType(0);
14318 SDValue N0 = N->getOperand(0);
14319 EVT SrcVT = N0.getValueType();
14320
14321 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14322 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14323 return SDValue();
14324
14325 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
14326 return SDValue();
14327
14328 SDValue Cond = N0.getOperand(0);
14329 SDValue True = N0.getOperand(1);
14330 SDValue False = N0.getOperand(2);
14331
14332 if (Cond.getOpcode() != ISD::SETCC)
14333 return SDValue();
14334
14335 // FIXME: Support the version of this pattern with the select operands
14336 // swapped.
14337 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14338 if (CCVal != ISD::SETULT)
14339 return SDValue();
14340
14341 SDValue CondLHS = Cond.getOperand(0);
14342 SDValue CondRHS = Cond.getOperand(1);
14343
14344 if (CondLHS != True)
14345 return SDValue();
14346
14347 unsigned ScalarBits = VT.getScalarSizeInBits();
14348
14349 // FIXME: Support other constants.
14350 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
14351 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14352 return SDValue();
14353
14354 if (False.getOpcode() != ISD::SIGN_EXTEND)
14355 return SDValue();
14356
14357 False = False.getOperand(0);
14358
14359 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
14360 return SDValue();
14361
14362 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
14363 if (!FalseRHSC || !FalseRHSC->isZero())
14364 return SDValue();
14365
14366 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14367 if (CCVal2 != ISD::SETGT)
14368 return SDValue();
14369
14370 // Emit the signed to unsigned saturation pattern.
14371 SDLoc DL(N);
14372 SDValue Max =
14373 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
14374 SDValue Min =
14375 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
14376 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
14377 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
14378}
14379
14381 const RISCVSubtarget &Subtarget) {
14382 SDValue N0 = N->getOperand(0);
14383 EVT VT = N->getValueType(0);
14384
14385 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14386 // extending X. This is safe since we only need the LSB after the shift and
14387 // shift amounts larger than 31 would produce poison. If we wait until
14388 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14389 // to use a BEXT instruction.
14390 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14391 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
14392 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14393 SDLoc DL(N0);
14394 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14395 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14396 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14397 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
14398 }
14399
14400 return combineTruncSelectToSMaxUSat(N, DAG);
14401}
14402
14403// Combines two comparison operation and logic operation to one selection
14404// operation(min, max) and logic operation. Returns new constructed Node if
14405// conditions for optimization are satisfied.
14408 const RISCVSubtarget &Subtarget) {
14409 SelectionDAG &DAG = DCI.DAG;
14410
14411 SDValue N0 = N->getOperand(0);
14412 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14413 // extending X. This is safe since we only need the LSB after the shift and
14414 // shift amounts larger than 31 would produce poison. If we wait until
14415 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14416 // to use a BEXT instruction.
14417 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14418 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
14419 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14420 N0.hasOneUse()) {
14421 SDLoc DL(N);
14422 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14423 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14424 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14425 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
14426 DAG.getConstant(1, DL, MVT::i64));
14427 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14428 }
14429
14430 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14431 return V;
14432 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14433 return V;
14434
14435 if (DCI.isAfterLegalizeDAG())
14436 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14437 return V;
14438
14439 // fold (and (select lhs, rhs, cc, -1, y), x) ->
14440 // (select lhs, rhs, cc, x, (and x, y))
14441 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
14442}
14443
14444// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14445// FIXME: Generalize to other binary operators with same operand.
14447 SelectionDAG &DAG) {
14448 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
14449
14450 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
14452 !N0.hasOneUse() || !N1.hasOneUse())
14453 return SDValue();
14454
14455 // Should have the same condition.
14456 SDValue Cond = N0.getOperand(1);
14457 if (Cond != N1.getOperand(1))
14458 return SDValue();
14459
14460 SDValue TrueV = N0.getOperand(0);
14461 SDValue FalseV = N1.getOperand(0);
14462
14463 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
14464 TrueV.getOperand(1) != FalseV.getOperand(1) ||
14465 !isOneConstant(TrueV.getOperand(1)) ||
14466 !TrueV.hasOneUse() || !FalseV.hasOneUse())
14467 return SDValue();
14468
14469 EVT VT = N->getValueType(0);
14470 SDLoc DL(N);
14471
14472 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
14473 Cond);
14474 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
14475 Cond);
14476 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
14477 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
14478}
14479
14481 const RISCVSubtarget &Subtarget) {
14482 SelectionDAG &DAG = DCI.DAG;
14483
14484 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14485 return V;
14486 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14487 return V;
14488
14489 if (DCI.isAfterLegalizeDAG())
14490 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14491 return V;
14492
14493 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14494 // We may be able to pull a common operation out of the true and false value.
14495 SDValue N0 = N->getOperand(0);
14496 SDValue N1 = N->getOperand(1);
14497 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
14498 return V;
14499 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14500 return V;
14501
14502 // fold (or (select cond, 0, y), x) ->
14503 // (select cond, x, (or x, y))
14504 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14505}
14506
14508 const RISCVSubtarget &Subtarget) {
14509 SDValue N0 = N->getOperand(0);
14510 SDValue N1 = N->getOperand(1);
14511
14512 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14513 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14514 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14515 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14516 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14517 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14518 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14519 SDLoc DL(N);
14520 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14521 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14522 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14523 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14524 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14525 }
14526
14527 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14528 // NOTE: Assumes ROL being legal means ROLW is legal.
14529 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14530 if (N0.getOpcode() == RISCVISD::SLLW &&
14532 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14533 SDLoc DL(N);
14534 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14535 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14536 }
14537
14538 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14539 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14540 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14541 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14542 if (ConstN00 && CC == ISD::SETLT) {
14543 EVT VT = N0.getValueType();
14544 SDLoc DL(N0);
14545 const APInt &Imm = ConstN00->getAPIntValue();
14546 if ((Imm + 1).isSignedIntN(12))
14547 return DAG.getSetCC(DL, VT, N0.getOperand(1),
14548 DAG.getConstant(Imm + 1, DL, VT), CC);
14549 }
14550 }
14551
14552 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14553 return V;
14554 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14555 return V;
14556
14557 // fold (xor (select cond, 0, y), x) ->
14558 // (select cond, x, (xor x, y))
14559 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14560}
14561
14562// Try to expand a scalar multiply to a faster sequence.
14565 const RISCVSubtarget &Subtarget) {
14566
14567 EVT VT = N->getValueType(0);
14568
14569 // LI + MUL is usually smaller than the alternative sequence.
14571 return SDValue();
14572
14573 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14574 return SDValue();
14575
14576 if (VT != Subtarget.getXLenVT())
14577 return SDValue();
14578
14579 const bool HasShlAdd =
14580 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14581
14582 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14583 if (!CNode)
14584 return SDValue();
14585 uint64_t MulAmt = CNode->getZExtValue();
14586
14587 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14588 // We're adding additional uses of X here, and in principle, we should be freezing
14589 // X before doing so. However, adding freeze here causes real regressions, and no
14590 // other target properly freezes X in these cases either.
14591 SDValue X = N->getOperand(0);
14592
14593 if (HasShlAdd) {
14594 for (uint64_t Divisor : {3, 5, 9}) {
14595 if (MulAmt % Divisor != 0)
14596 continue;
14597 uint64_t MulAmt2 = MulAmt / Divisor;
14598 // 3/5/9 * 2^N -> shl (shXadd X, X), N
14599 if (isPowerOf2_64(MulAmt2)) {
14600 SDLoc DL(N);
14601 SDValue X = N->getOperand(0);
14602 // Put the shift first if we can fold a zext into the
14603 // shift forming a slli.uw.
14604 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14605 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14606 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14607 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14608 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14609 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14610 Shl);
14611 }
14612 // Otherwise, put rhe shl second so that it can fold with following
14613 // instructions (e.g. sext or add).
14614 SDValue Mul359 =
14615 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14616 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14617 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14618 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14619 }
14620
14621 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14622 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14623 SDLoc DL(N);
14624 SDValue Mul359 =
14625 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14626 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14627 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14628 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14629 Mul359);
14630 }
14631 }
14632
14633 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14634 // shXadd. First check if this a sum of two power of 2s because that's
14635 // easy. Then count how many zeros are up to the first bit.
14636 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14637 unsigned ScaleShift = llvm::countr_zero(MulAmt);
14638 if (ScaleShift >= 1 && ScaleShift < 4) {
14639 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14640 SDLoc DL(N);
14641 SDValue Shift1 =
14642 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14643 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14644 DAG.getConstant(ScaleShift, DL, VT), Shift1);
14645 }
14646 }
14647
14648 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14649 // This is the two instruction form, there are also three instruction
14650 // variants we could implement. e.g.
14651 // (2^(1,2,3) * 3,5,9 + 1) << C2
14652 // 2^(C1>3) * 3,5,9 +/- 1
14653 for (uint64_t Divisor : {3, 5, 9}) {
14654 uint64_t C = MulAmt - 1;
14655 if (C <= Divisor)
14656 continue;
14657 unsigned TZ = llvm::countr_zero(C);
14658 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14659 SDLoc DL(N);
14660 SDValue Mul359 =
14661 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14662 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14663 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14664 DAG.getConstant(TZ, DL, VT), X);
14665 }
14666 }
14667
14668 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14669 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14670 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
14671 if (ScaleShift >= 1 && ScaleShift < 4) {
14672 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14673 SDLoc DL(N);
14674 SDValue Shift1 =
14675 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14676 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
14677 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14678 DAG.getConstant(ScaleShift, DL, VT), X));
14679 }
14680 }
14681
14682 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14683 for (uint64_t Offset : {3, 5, 9}) {
14684 if (isPowerOf2_64(MulAmt + Offset)) {
14685 SDLoc DL(N);
14686 SDValue Shift1 =
14687 DAG.getNode(ISD::SHL, DL, VT, X,
14688 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
14689 SDValue Mul359 =
14690 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14691 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
14692 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
14693 }
14694 }
14695 }
14696
14697 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14698 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14699 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14700 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14701 SDLoc DL(N);
14702 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14703 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14704 SDValue Shift2 =
14705 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14706 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14707 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14708 }
14709
14710 if (HasShlAdd) {
14711 for (uint64_t Divisor : {3, 5, 9}) {
14712 if (MulAmt % Divisor != 0)
14713 continue;
14714 uint64_t MulAmt2 = MulAmt / Divisor;
14715 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14716 // of 25 which happen to be quite common.
14717 for (uint64_t Divisor2 : {3, 5, 9}) {
14718 if (MulAmt2 % Divisor2 != 0)
14719 continue;
14720 uint64_t MulAmt3 = MulAmt2 / Divisor2;
14721 if (isPowerOf2_64(MulAmt3)) {
14722 SDLoc DL(N);
14723 SDValue Mul359A =
14724 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14725 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14726 SDValue Mul359B = DAG.getNode(
14727 RISCVISD::SHL_ADD, DL, VT, Mul359A,
14728 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
14729 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
14730 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
14731 }
14732 }
14733 }
14734 }
14735
14736 return SDValue();
14737}
14738
14739// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14740// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14741// Same for other equivalent types with other equivalent constants.
14743 EVT VT = N->getValueType(0);
14744 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14745
14746 // Do this for legal vectors unless they are i1 or i8 vectors.
14747 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14748 return SDValue();
14749
14750 if (N->getOperand(0).getOpcode() != ISD::AND ||
14751 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14752 return SDValue();
14753
14754 SDValue And = N->getOperand(0);
14755 SDValue Srl = And.getOperand(0);
14756
14757 APInt V1, V2, V3;
14758 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14759 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14761 return SDValue();
14762
14763 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14764 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14765 V3 != (HalfSize - 1))
14766 return SDValue();
14767
14768 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14769 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14770 VT.getVectorElementCount() * 2);
14771 SDLoc DL(N);
14772 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14773 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14774 DAG.getConstant(HalfSize - 1, DL, HalfVT));
14775 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14776}
14777
14780 const RISCVSubtarget &Subtarget) {
14781 EVT VT = N->getValueType(0);
14782 if (!VT.isVector())
14783 return expandMul(N, DAG, DCI, Subtarget);
14784
14785 SDLoc DL(N);
14786 SDValue N0 = N->getOperand(0);
14787 SDValue N1 = N->getOperand(1);
14788 SDValue MulOper;
14789 unsigned AddSubOpc;
14790
14791 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14792 // (mul x, add (y, 1)) -> (add x, (mul x, y))
14793 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14794 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14795 auto IsAddSubWith1 = [&](SDValue V) -> bool {
14796 AddSubOpc = V->getOpcode();
14797 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14798 SDValue Opnd = V->getOperand(1);
14799 MulOper = V->getOperand(0);
14800 if (AddSubOpc == ISD::SUB)
14801 std::swap(Opnd, MulOper);
14802 if (isOneOrOneSplat(Opnd))
14803 return true;
14804 }
14805 return false;
14806 };
14807
14808 if (IsAddSubWith1(N0)) {
14809 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14810 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14811 }
14812
14813 if (IsAddSubWith1(N1)) {
14814 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14815 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14816 }
14817
14818 if (SDValue V = combineBinOpOfZExt(N, DAG))
14819 return V;
14820
14822 return V;
14823
14824 return SDValue();
14825}
14826
14827/// According to the property that indexed load/store instructions zero-extend
14828/// their indices, try to narrow the type of index operand.
14829static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14830 if (isIndexTypeSigned(IndexType))
14831 return false;
14832
14833 if (!N->hasOneUse())
14834 return false;
14835
14836 EVT VT = N.getValueType();
14837 SDLoc DL(N);
14838
14839 // In general, what we're doing here is seeing if we can sink a truncate to
14840 // a smaller element type into the expression tree building our index.
14841 // TODO: We can generalize this and handle a bunch more cases if useful.
14842
14843 // Narrow a buildvector to the narrowest element type. This requires less
14844 // work and less register pressure at high LMUL, and creates smaller constants
14845 // which may be cheaper to materialize.
14846 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14847 KnownBits Known = DAG.computeKnownBits(N);
14848 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14849 LLVMContext &C = *DAG.getContext();
14850 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14851 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14852 N = DAG.getNode(ISD::TRUNCATE, DL,
14853 VT.changeVectorElementType(ResultVT), N);
14854 return true;
14855 }
14856 }
14857
14858 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14859 if (N.getOpcode() != ISD::SHL)
14860 return false;
14861
14862 SDValue N0 = N.getOperand(0);
14863 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14865 return false;
14866 if (!N0->hasOneUse())
14867 return false;
14868
14869 APInt ShAmt;
14870 SDValue N1 = N.getOperand(1);
14871 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14872 return false;
14873
14874 SDValue Src = N0.getOperand(0);
14875 EVT SrcVT = Src.getValueType();
14876 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14877 unsigned ShAmtV = ShAmt.getZExtValue();
14878 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14879 NewElen = std::max(NewElen, 8U);
14880
14881 // Skip if NewElen is not narrower than the original extended type.
14882 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14883 return false;
14884
14885 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14886 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14887
14888 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14889 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14890 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14891 return true;
14892}
14893
14894// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14895// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14896// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14897// can become a sext.w instead of a shift pair.
14899 const RISCVSubtarget &Subtarget) {
14900 SDValue N0 = N->getOperand(0);
14901 SDValue N1 = N->getOperand(1);
14902 EVT VT = N->getValueType(0);
14903 EVT OpVT = N0.getValueType();
14904
14905 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14906 return SDValue();
14907
14908 // RHS needs to be a constant.
14909 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14910 if (!N1C)
14911 return SDValue();
14912
14913 // LHS needs to be (and X, 0xffffffff).
14914 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14915 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14916 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14917 return SDValue();
14918
14919 // Looking for an equality compare.
14920 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14921 if (!isIntEqualitySetCC(Cond))
14922 return SDValue();
14923
14924 // Don't do this if the sign bit is provably zero, it will be turned back into
14925 // an AND.
14926 APInt SignMask = APInt::getOneBitSet(64, 31);
14927 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14928 return SDValue();
14929
14930 const APInt &C1 = N1C->getAPIntValue();
14931
14932 SDLoc dl(N);
14933 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14934 // to be equal.
14935 if (C1.getActiveBits() > 32)
14936 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14937
14938 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14939 N0.getOperand(0), DAG.getValueType(MVT::i32));
14940 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14941 dl, OpVT), Cond);
14942}
14943
14944static SDValue
14946 const RISCVSubtarget &Subtarget) {
14947 SDValue Src = N->getOperand(0);
14948 EVT VT = N->getValueType(0);
14949 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14950 unsigned Opc = Src.getOpcode();
14951
14952 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14953 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14954 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
14955 Subtarget.hasStdExtZfhmin())
14956 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14957 Src.getOperand(0));
14958
14959 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
14960 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
14961 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14962 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
14963 return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
14964 Src.getOperand(1));
14965
14966 return SDValue();
14967}
14968
14969namespace {
14970// Forward declaration of the structure holding the necessary information to
14971// apply a combine.
14972struct CombineResult;
14973
14974enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14975/// Helper class for folding sign/zero extensions.
14976/// In particular, this class is used for the following combines:
14977/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14978/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14979/// mul | mul_vl -> vwmul(u) | vwmul_su
14980/// shl | shl_vl -> vwsll
14981/// fadd -> vfwadd | vfwadd_w
14982/// fsub -> vfwsub | vfwsub_w
14983/// fmul -> vfwmul
14984/// An object of this class represents an operand of the operation we want to
14985/// combine.
14986/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14987/// NodeExtensionHelper for `a` and one for `b`.
14988///
14989/// This class abstracts away how the extension is materialized and
14990/// how its number of users affect the combines.
14991///
14992/// In particular:
14993/// - VWADD_W is conceptually == add(op0, sext(op1))
14994/// - VWADDU_W == add(op0, zext(op1))
14995/// - VWSUB_W == sub(op0, sext(op1))
14996/// - VWSUBU_W == sub(op0, zext(op1))
14997/// - VFWADD_W == fadd(op0, fpext(op1))
14998/// - VFWSUB_W == fsub(op0, fpext(op1))
14999/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
15000/// zext|sext(smaller_value).
15001struct NodeExtensionHelper {
15002 /// Records if this operand is like being zero extended.
15003 bool SupportsZExt;
15004 /// Records if this operand is like being sign extended.
15005 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
15006 /// instance, a splat constant (e.g., 3), would support being both sign and
15007 /// zero extended.
15008 bool SupportsSExt;
15009 /// Records if this operand is like being floating-Point extended.
15010 bool SupportsFPExt;
15011 /// This boolean captures whether we care if this operand would still be
15012 /// around after the folding happens.
15013 bool EnforceOneUse;
15014 /// Original value that this NodeExtensionHelper represents.
15015 SDValue OrigOperand;
15016
15017 /// Get the value feeding the extension or the value itself.
15018 /// E.g., for zext(a), this would return a.
15019 SDValue getSource() const {
15020 switch (OrigOperand.getOpcode()) {
15021 case ISD::ZERO_EXTEND:
15022 case ISD::SIGN_EXTEND:
15023 case RISCVISD::VSEXT_VL:
15024 case RISCVISD::VZEXT_VL:
15026 return OrigOperand.getOperand(0);
15027 default:
15028 return OrigOperand;
15029 }
15030 }
15031
15032 /// Check if this instance represents a splat.
15033 bool isSplat() const {
15034 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
15035 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
15036 }
15037
15038 /// Get the extended opcode.
15039 unsigned getExtOpc(ExtKind SupportsExt) const {
15040 switch (SupportsExt) {
15041 case ExtKind::SExt:
15042 return RISCVISD::VSEXT_VL;
15043 case ExtKind::ZExt:
15044 return RISCVISD::VZEXT_VL;
15045 case ExtKind::FPExt:
15047 }
15048 llvm_unreachable("Unknown ExtKind enum");
15049 }
15050
15051 /// Get or create a value that can feed \p Root with the given extension \p
15052 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
15053 /// operand. \see ::getSource().
15054 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
15055 const RISCVSubtarget &Subtarget,
15056 std::optional<ExtKind> SupportsExt) const {
15057 if (!SupportsExt.has_value())
15058 return OrigOperand;
15059
15060 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
15061
15062 SDValue Source = getSource();
15063 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
15064 if (Source.getValueType() == NarrowVT)
15065 return Source;
15066
15067 // vfmadd_vl -> vfwmadd_vl can take bf16 operands
15068 if (Source.getValueType().getVectorElementType() == MVT::bf16) {
15069 assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
15070 Root->getOpcode() == RISCVISD::VFMADD_VL);
15071 return Source;
15072 }
15073
15074 unsigned ExtOpc = getExtOpc(*SupportsExt);
15075
15076 // If we need an extension, we should be changing the type.
15077 SDLoc DL(OrigOperand);
15078 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
15079 switch (OrigOperand.getOpcode()) {
15080 case ISD::ZERO_EXTEND:
15081 case ISD::SIGN_EXTEND:
15082 case RISCVISD::VSEXT_VL:
15083 case RISCVISD::VZEXT_VL:
15085 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
15086 case ISD::SPLAT_VECTOR:
15087 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
15089 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
15090 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
15092 Source = Source.getOperand(1);
15093 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
15094 Source = Source.getOperand(0);
15095 assert(Source.getValueType() == NarrowVT.getVectorElementType());
15096 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
15097 DAG.getUNDEF(NarrowVT), Source, VL);
15098 default:
15099 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15100 // and that operand should already have the right NarrowVT so no
15101 // extension should be required at this point.
15102 llvm_unreachable("Unsupported opcode");
15103 }
15104 }
15105
15106 /// Helper function to get the narrow type for \p Root.
15107 /// The narrow type is the type of \p Root where we divided the size of each
15108 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15109 /// \pre Both the narrow type and the original type should be legal.
15110 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
15111 MVT VT = Root->getSimpleValueType(0);
15112
15113 // Determine the narrow size.
15114 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15115
15116 MVT EltVT = SupportsExt == ExtKind::FPExt
15117 ? MVT::getFloatingPointVT(NarrowSize)
15118 : MVT::getIntegerVT(NarrowSize);
15119
15120 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15121 "Trying to extend something we can't represent");
15122 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15123 return NarrowVT;
15124 }
15125
15126 /// Get the opcode to materialize:
15127 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15128 static unsigned getSExtOpcode(unsigned Opcode) {
15129 switch (Opcode) {
15130 case ISD::ADD:
15131 case RISCVISD::ADD_VL:
15134 case ISD::OR:
15135 return RISCVISD::VWADD_VL;
15136 case ISD::SUB:
15137 case RISCVISD::SUB_VL:
15140 return RISCVISD::VWSUB_VL;
15141 case ISD::MUL:
15142 case RISCVISD::MUL_VL:
15143 return RISCVISD::VWMUL_VL;
15144 default:
15145 llvm_unreachable("Unexpected opcode");
15146 }
15147 }
15148
15149 /// Get the opcode to materialize:
15150 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15151 static unsigned getZExtOpcode(unsigned Opcode) {
15152 switch (Opcode) {
15153 case ISD::ADD:
15154 case RISCVISD::ADD_VL:
15157 case ISD::OR:
15158 return RISCVISD::VWADDU_VL;
15159 case ISD::SUB:
15160 case RISCVISD::SUB_VL:
15163 return RISCVISD::VWSUBU_VL;
15164 case ISD::MUL:
15165 case RISCVISD::MUL_VL:
15166 return RISCVISD::VWMULU_VL;
15167 case ISD::SHL:
15168 case RISCVISD::SHL_VL:
15169 return RISCVISD::VWSLL_VL;
15170 default:
15171 llvm_unreachable("Unexpected opcode");
15172 }
15173 }
15174
15175 /// Get the opcode to materialize:
15176 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15177 static unsigned getFPExtOpcode(unsigned Opcode) {
15178 switch (Opcode) {
15179 case RISCVISD::FADD_VL:
15181 return RISCVISD::VFWADD_VL;
15182 case RISCVISD::FSUB_VL:
15184 return RISCVISD::VFWSUB_VL;
15185 case RISCVISD::FMUL_VL:
15186 return RISCVISD::VFWMUL_VL;
15188 return RISCVISD::VFWMADD_VL;
15190 return RISCVISD::VFWMSUB_VL;
15192 return RISCVISD::VFWNMADD_VL;
15194 return RISCVISD::VFWNMSUB_VL;
15195 default:
15196 llvm_unreachable("Unexpected opcode");
15197 }
15198 }
15199
15200 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15201 /// newOpcode(a, b).
15202 static unsigned getSUOpcode(unsigned Opcode) {
15203 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
15204 "SU is only supported for MUL");
15205 return RISCVISD::VWMULSU_VL;
15206 }
15207
15208 /// Get the opcode to materialize
15209 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15210 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15211 switch (Opcode) {
15212 case ISD::ADD:
15213 case RISCVISD::ADD_VL:
15214 case ISD::OR:
15215 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
15217 case ISD::SUB:
15218 case RISCVISD::SUB_VL:
15219 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
15221 case RISCVISD::FADD_VL:
15222 return RISCVISD::VFWADD_W_VL;
15223 case RISCVISD::FSUB_VL:
15224 return RISCVISD::VFWSUB_W_VL;
15225 default:
15226 llvm_unreachable("Unexpected opcode");
15227 }
15228 }
15229
15230 using CombineToTry = std::function<std::optional<CombineResult>(
15231 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
15232 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
15233 const RISCVSubtarget &)>;
15234
15235 /// Check if this node needs to be fully folded or extended for all users.
15236 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
15237
15238 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
15239 const RISCVSubtarget &Subtarget) {
15240 unsigned Opc = OrigOperand.getOpcode();
15241 MVT VT = OrigOperand.getSimpleValueType();
15242
15243 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
15244 "Unexpected Opcode");
15245
15246 // The pasthru must be undef for tail agnostic.
15247 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15248 return;
15249
15250 // Get the scalar value.
15251 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15252 : OrigOperand.getOperand(1);
15253
15254 // See if we have enough sign bits or zero bits in the scalar to use a
15255 // widening opcode by splatting to smaller element size.
15256 unsigned EltBits = VT.getScalarSizeInBits();
15257 unsigned ScalarBits = Op.getValueSizeInBits();
15258 // If we're not getting all bits from the element, we need special handling.
15259 if (ScalarBits < EltBits) {
15260 // This should only occur on RV32.
15261 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15262 !Subtarget.is64Bit() && "Unexpected splat");
15263 // vmv.v.x sign extends narrow inputs.
15264 SupportsSExt = true;
15265
15266 // If the input is positive, then sign extend is also zero extend.
15267 if (DAG.SignBitIsZero(Op))
15268 SupportsZExt = true;
15269
15270 EnforceOneUse = false;
15271 return;
15272 }
15273
15274 unsigned NarrowSize = EltBits / 2;
15275 // If the narrow type cannot be expressed with a legal VMV,
15276 // this is not a valid candidate.
15277 if (NarrowSize < 8)
15278 return;
15279
15280 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15281 SupportsSExt = true;
15282
15283 if (DAG.MaskedValueIsZero(Op,
15284 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15285 SupportsZExt = true;
15286
15287 EnforceOneUse = false;
15288 }
15289
15290 bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
15291 const RISCVSubtarget &Subtarget) {
15292 // Any f16 extension will neeed zvfh
15293 if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15294 return false;
15295 // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15296 // zvfbfwma
15297 if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15298 Root->getOpcode() != RISCVISD::VFMADD_VL))
15299 return false;
15300 return true;
15301 }
15302
15303 /// Helper method to set the various fields of this struct based on the
15304 /// type of \p Root.
15305 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
15306 const RISCVSubtarget &Subtarget) {
15307 SupportsZExt = false;
15308 SupportsSExt = false;
15309 SupportsFPExt = false;
15310 EnforceOneUse = true;
15311 unsigned Opc = OrigOperand.getOpcode();
15312 // For the nodes we handle below, we end up using their inputs directly: see
15313 // getSource(). However since they either don't have a passthru or we check
15314 // that their passthru is undef, we can safely ignore their mask and VL.
15315 switch (Opc) {
15316 case ISD::ZERO_EXTEND:
15317 case ISD::SIGN_EXTEND: {
15318 MVT VT = OrigOperand.getSimpleValueType();
15319 if (!VT.isVector())
15320 break;
15321
15322 SDValue NarrowElt = OrigOperand.getOperand(0);
15323 MVT NarrowVT = NarrowElt.getSimpleValueType();
15324 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15325 if (NarrowVT.getVectorElementType() == MVT::i1)
15326 break;
15327
15328 SupportsZExt = Opc == ISD::ZERO_EXTEND;
15329 SupportsSExt = Opc == ISD::SIGN_EXTEND;
15330 break;
15331 }
15332 case RISCVISD::VZEXT_VL:
15333 SupportsZExt = true;
15334 break;
15335 case RISCVISD::VSEXT_VL:
15336 SupportsSExt = true;
15337 break;
15339 MVT NarrowEltVT =
15341 if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15342 break;
15343 SupportsFPExt = true;
15344 break;
15345 }
15346 case ISD::SPLAT_VECTOR:
15348 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15349 break;
15350 case RISCVISD::VFMV_V_F_VL: {
15351 MVT VT = OrigOperand.getSimpleValueType();
15352
15353 if (!OrigOperand.getOperand(0).isUndef())
15354 break;
15355
15356 SDValue Op = OrigOperand.getOperand(1);
15357 if (Op.getOpcode() != ISD::FP_EXTEND)
15358 break;
15359
15360 if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
15361 Subtarget))
15362 break;
15363
15364 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15365 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
15366 if (NarrowSize != ScalarBits)
15367 break;
15368
15369 SupportsFPExt = true;
15370 break;
15371 }
15372 default:
15373 break;
15374 }
15375 }
15376
15377 /// Check if \p Root supports any extension folding combines.
15378 static bool isSupportedRoot(const SDNode *Root,
15379 const RISCVSubtarget &Subtarget) {
15380 switch (Root->getOpcode()) {
15381 case ISD::ADD:
15382 case ISD::SUB:
15383 case ISD::MUL: {
15384 return Root->getValueType(0).isScalableVector();
15385 }
15386 case ISD::OR: {
15387 return Root->getValueType(0).isScalableVector() &&
15388 Root->getFlags().hasDisjoint();
15389 }
15390 // Vector Widening Integer Add/Sub/Mul Instructions
15391 case RISCVISD::ADD_VL:
15392 case RISCVISD::MUL_VL:
15395 case RISCVISD::SUB_VL:
15398 // Vector Widening Floating-Point Add/Sub/Mul Instructions
15399 case RISCVISD::FADD_VL:
15400 case RISCVISD::FSUB_VL:
15401 case RISCVISD::FMUL_VL:
15404 return true;
15405 case ISD::SHL:
15406 return Root->getValueType(0).isScalableVector() &&
15407 Subtarget.hasStdExtZvbb();
15408 case RISCVISD::SHL_VL:
15409 return Subtarget.hasStdExtZvbb();
15414 return true;
15415 default:
15416 return false;
15417 }
15418 }
15419
15420 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15421 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
15422 const RISCVSubtarget &Subtarget) {
15423 assert(isSupportedRoot(Root, Subtarget) &&
15424 "Trying to build an helper with an "
15425 "unsupported root");
15426 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
15428 OrigOperand = Root->getOperand(OperandIdx);
15429
15430 unsigned Opc = Root->getOpcode();
15431 switch (Opc) {
15432 // We consider
15433 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15434 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15435 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15442 if (OperandIdx == 1) {
15443 SupportsZExt =
15445 SupportsSExt =
15447 SupportsFPExt =
15449 // There's no existing extension here, so we don't have to worry about
15450 // making sure it gets removed.
15451 EnforceOneUse = false;
15452 break;
15453 }
15454 [[fallthrough]];
15455 default:
15456 fillUpExtensionSupport(Root, DAG, Subtarget);
15457 break;
15458 }
15459 }
15460
15461 /// Helper function to get the Mask and VL from \p Root.
15462 static std::pair<SDValue, SDValue>
15463 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
15464 const RISCVSubtarget &Subtarget) {
15465 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
15466 switch (Root->getOpcode()) {
15467 case ISD::ADD:
15468 case ISD::SUB:
15469 case ISD::MUL:
15470 case ISD::OR:
15471 case ISD::SHL: {
15472 SDLoc DL(Root);
15473 MVT VT = Root->getSimpleValueType(0);
15474 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
15475 }
15476 default:
15477 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15478 }
15479 }
15480
15481 /// Helper function to check if \p N is commutative with respect to the
15482 /// foldings that are supported by this class.
15483 static bool isCommutative(const SDNode *N) {
15484 switch (N->getOpcode()) {
15485 case ISD::ADD:
15486 case ISD::MUL:
15487 case ISD::OR:
15488 case RISCVISD::ADD_VL:
15489 case RISCVISD::MUL_VL:
15492 case RISCVISD::FADD_VL:
15493 case RISCVISD::FMUL_VL:
15499 return true;
15500 case ISD::SUB:
15501 case RISCVISD::SUB_VL:
15504 case RISCVISD::FSUB_VL:
15506 case ISD::SHL:
15507 case RISCVISD::SHL_VL:
15508 return false;
15509 default:
15510 llvm_unreachable("Unexpected opcode");
15511 }
15512 }
15513
15514 /// Get a list of combine to try for folding extensions in \p Root.
15515 /// Note that each returned CombineToTry function doesn't actually modify
15516 /// anything. Instead they produce an optional CombineResult that if not None,
15517 /// need to be materialized for the combine to be applied.
15518 /// \see CombineResult::materialize.
15519 /// If the related CombineToTry function returns std::nullopt, that means the
15520 /// combine didn't match.
15521 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15522};
15523
15524/// Helper structure that holds all the necessary information to materialize a
15525/// combine that does some extension folding.
15526struct CombineResult {
15527 /// Opcode to be generated when materializing the combine.
15528 unsigned TargetOpcode;
15529 // No value means no extension is needed.
15530 std::optional<ExtKind> LHSExt;
15531 std::optional<ExtKind> RHSExt;
15532 /// Root of the combine.
15533 SDNode *Root;
15534 /// LHS of the TargetOpcode.
15535 NodeExtensionHelper LHS;
15536 /// RHS of the TargetOpcode.
15537 NodeExtensionHelper RHS;
15538
15539 CombineResult(unsigned TargetOpcode, SDNode *Root,
15540 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15541 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15542 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15543 LHS(LHS), RHS(RHS) {}
15544
15545 /// Return a value that uses TargetOpcode and that can be used to replace
15546 /// Root.
15547 /// The actual replacement is *not* done in that method.
15548 SDValue materialize(SelectionDAG &DAG,
15549 const RISCVSubtarget &Subtarget) const {
15550 SDValue Mask, VL, Passthru;
15551 std::tie(Mask, VL) =
15552 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15553 switch (Root->getOpcode()) {
15554 default:
15555 Passthru = Root->getOperand(2);
15556 break;
15557 case ISD::ADD:
15558 case ISD::SUB:
15559 case ISD::MUL:
15560 case ISD::OR:
15561 case ISD::SHL:
15562 Passthru = DAG.getUNDEF(Root->getValueType(0));
15563 break;
15564 }
15565 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15566 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15567 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15568 Passthru, Mask, VL);
15569 }
15570};
15571
15572/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15573/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15574/// are zext) and LHS and RHS can be folded into Root.
15575/// AllowExtMask define which form `ext` can take in this pattern.
15576///
15577/// \note If the pattern can match with both zext and sext, the returned
15578/// CombineResult will feature the zext result.
15579///
15580/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15581/// can be used to apply the pattern.
15582static std::optional<CombineResult>
15583canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15584 const NodeExtensionHelper &RHS,
15585 uint8_t AllowExtMask, SelectionDAG &DAG,
15586 const RISCVSubtarget &Subtarget) {
15587 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15588 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15589 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15590 /*RHSExt=*/{ExtKind::ZExt});
15591 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15592 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15593 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15594 /*RHSExt=*/{ExtKind::SExt});
15595 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15596 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15597 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15598 /*RHSExt=*/{ExtKind::FPExt});
15599 return std::nullopt;
15600}
15601
15602/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15603/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15604/// are zext) and LHS and RHS can be folded into Root.
15605///
15606/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15607/// can be used to apply the pattern.
15608static std::optional<CombineResult>
15609canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15610 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15611 const RISCVSubtarget &Subtarget) {
15612 return canFoldToVWWithSameExtensionImpl(
15613 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15614 Subtarget);
15615}
15616
15617/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15618///
15619/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15620/// can be used to apply the pattern.
15621static std::optional<CombineResult>
15622canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15623 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15624 const RISCVSubtarget &Subtarget) {
15625 if (RHS.SupportsFPExt)
15626 return CombineResult(
15627 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15628 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15629
15630 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15631 // sext/zext?
15632 // Control this behavior behind an option (AllowSplatInVW_W) for testing
15633 // purposes.
15634 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15635 return CombineResult(
15636 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15637 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15638 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15639 return CombineResult(
15640 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15641 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15642 return std::nullopt;
15643}
15644
15645/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15646///
15647/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15648/// can be used to apply the pattern.
15649static std::optional<CombineResult>
15650canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15651 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15652 const RISCVSubtarget &Subtarget) {
15653 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15654 Subtarget);
15655}
15656
15657/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15658///
15659/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15660/// can be used to apply the pattern.
15661static std::optional<CombineResult>
15662canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15663 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15664 const RISCVSubtarget &Subtarget) {
15665 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15666 Subtarget);
15667}
15668
15669/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15670///
15671/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15672/// can be used to apply the pattern.
15673static std::optional<CombineResult>
15674canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15675 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15676 const RISCVSubtarget &Subtarget) {
15677 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15678 Subtarget);
15679}
15680
15681/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15682///
15683/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15684/// can be used to apply the pattern.
15685static std::optional<CombineResult>
15686canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
15687 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15688 const RISCVSubtarget &Subtarget) {
15689
15690 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15691 return std::nullopt;
15692 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15693 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15694 /*RHSExt=*/{ExtKind::ZExt});
15695}
15696
15698NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
15699 SmallVector<CombineToTry> Strategies;
15700 switch (Root->getOpcode()) {
15701 case ISD::ADD:
15702 case ISD::SUB:
15703 case ISD::OR:
15704 case RISCVISD::ADD_VL:
15705 case RISCVISD::SUB_VL:
15706 case RISCVISD::FADD_VL:
15707 case RISCVISD::FSUB_VL:
15708 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15709 Strategies.push_back(canFoldToVWWithSameExtension);
15710 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15711 Strategies.push_back(canFoldToVW_W);
15712 break;
15713 case RISCVISD::FMUL_VL:
15718 Strategies.push_back(canFoldToVWWithSameExtension);
15719 break;
15720 case ISD::MUL:
15721 case RISCVISD::MUL_VL:
15722 // mul -> vwmul(u)
15723 Strategies.push_back(canFoldToVWWithSameExtension);
15724 // mul -> vwmulsu
15725 Strategies.push_back(canFoldToVW_SU);
15726 break;
15727 case ISD::SHL:
15728 case RISCVISD::SHL_VL:
15729 // shl -> vwsll
15730 Strategies.push_back(canFoldToVWWithZEXT);
15731 break;
15734 // vwadd_w|vwsub_w -> vwadd|vwsub
15735 Strategies.push_back(canFoldToVWWithSEXT);
15736 break;
15739 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15740 Strategies.push_back(canFoldToVWWithZEXT);
15741 break;
15744 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15745 Strategies.push_back(canFoldToVWWithFPEXT);
15746 break;
15747 default:
15748 llvm_unreachable("Unexpected opcode");
15749 }
15750 return Strategies;
15751}
15752} // End anonymous namespace.
15753
15754/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15755/// The supported combines are:
15756/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15757/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15758/// mul | mul_vl -> vwmul(u) | vwmul_su
15759/// shl | shl_vl -> vwsll
15760/// fadd_vl -> vfwadd | vfwadd_w
15761/// fsub_vl -> vfwsub | vfwsub_w
15762/// fmul_vl -> vfwmul
15763/// vwadd_w(u) -> vwadd(u)
15764/// vwsub_w(u) -> vwsub(u)
15765/// vfwadd_w -> vfwadd
15766/// vfwsub_w -> vfwsub
15769 const RISCVSubtarget &Subtarget) {
15770 SelectionDAG &DAG = DCI.DAG;
15771 if (DCI.isBeforeLegalize())
15772 return SDValue();
15773
15774 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15775 return SDValue();
15776
15777 SmallVector<SDNode *> Worklist;
15778 SmallSet<SDNode *, 8> Inserted;
15779 Worklist.push_back(N);
15780 Inserted.insert(N);
15781 SmallVector<CombineResult> CombinesToApply;
15782
15783 while (!Worklist.empty()) {
15784 SDNode *Root = Worklist.pop_back_val();
15785
15786 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15787 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15788 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15789 &Inserted](const NodeExtensionHelper &Op) {
15790 if (Op.needToPromoteOtherUsers()) {
15791 for (SDUse &Use : Op.OrigOperand->uses()) {
15792 SDNode *TheUser = Use.getUser();
15793 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15794 return false;
15795 // We only support the first 2 operands of FMA.
15796 if (Use.getOperandNo() >= 2)
15797 return false;
15798 if (Inserted.insert(TheUser).second)
15799 Worklist.push_back(TheUser);
15800 }
15801 }
15802 return true;
15803 };
15804
15805 // Control the compile time by limiting the number of node we look at in
15806 // total.
15807 if (Inserted.size() > ExtensionMaxWebSize)
15808 return SDValue();
15809
15811 NodeExtensionHelper::getSupportedFoldings(Root);
15812
15813 assert(!FoldingStrategies.empty() && "Nothing to be folded");
15814 bool Matched = false;
15815 for (int Attempt = 0;
15816 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15817 ++Attempt) {
15818
15819 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15820 FoldingStrategies) {
15821 std::optional<CombineResult> Res =
15822 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15823 if (Res) {
15824 Matched = true;
15825 CombinesToApply.push_back(*Res);
15826 // All the inputs that are extended need to be folded, otherwise
15827 // we would be leaving the old input (since it is may still be used),
15828 // and the new one.
15829 if (Res->LHSExt.has_value())
15830 if (!AppendUsersIfNeeded(LHS))
15831 return SDValue();
15832 if (Res->RHSExt.has_value())
15833 if (!AppendUsersIfNeeded(RHS))
15834 return SDValue();
15835 break;
15836 }
15837 }
15838 std::swap(LHS, RHS);
15839 }
15840 // Right now we do an all or nothing approach.
15841 if (!Matched)
15842 return SDValue();
15843 }
15844 // Store the value for the replacement of the input node separately.
15845 SDValue InputRootReplacement;
15846 // We do the RAUW after we materialize all the combines, because some replaced
15847 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15848 // some of these nodes may appear in the NodeExtensionHelpers of some of the
15849 // yet-to-be-visited CombinesToApply roots.
15851 ValuesToReplace.reserve(CombinesToApply.size());
15852 for (CombineResult Res : CombinesToApply) {
15853 SDValue NewValue = Res.materialize(DAG, Subtarget);
15854 if (!InputRootReplacement) {
15855 assert(Res.Root == N &&
15856 "First element is expected to be the current node");
15857 InputRootReplacement = NewValue;
15858 } else {
15859 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15860 }
15861 }
15862 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15863 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15864 DCI.AddToWorklist(OldNewValues.second.getNode());
15865 }
15866 return InputRootReplacement;
15867}
15868
15869// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15870// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15871// y will be the Passthru and cond will be the Mask.
15873 unsigned Opc = N->getOpcode();
15876
15877 SDValue Y = N->getOperand(0);
15878 SDValue MergeOp = N->getOperand(1);
15879 unsigned MergeOpc = MergeOp.getOpcode();
15880
15881 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15882 return SDValue();
15883
15884 SDValue X = MergeOp->getOperand(1);
15885
15886 if (!MergeOp.hasOneUse())
15887 return SDValue();
15888
15889 // Passthru should be undef
15890 SDValue Passthru = N->getOperand(2);
15891 if (!Passthru.isUndef())
15892 return SDValue();
15893
15894 // Mask should be all ones
15895 SDValue Mask = N->getOperand(3);
15896 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15897 return SDValue();
15898
15899 // False value of MergeOp should be all zeros
15900 SDValue Z = MergeOp->getOperand(2);
15901
15902 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15903 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15904 Z = Z.getOperand(1);
15905
15906 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15907 return SDValue();
15908
15909 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15910 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15911 N->getFlags());
15912}
15913
15916 const RISCVSubtarget &Subtarget) {
15917 [[maybe_unused]] unsigned Opc = N->getOpcode();
15920
15921 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15922 return V;
15923
15924 return combineVWADDSUBWSelect(N, DCI.DAG);
15925}
15926
15927// Helper function for performMemPairCombine.
15928// Try to combine the memory loads/stores LSNode1 and LSNode2
15929// into a single memory pair operation.
15931 LSBaseSDNode *LSNode2, SDValue BasePtr,
15932 uint64_t Imm) {
15934 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15935
15936 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15937 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15938 return SDValue();
15939
15941 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15942
15943 // The new operation has twice the width.
15944 MVT XLenVT = Subtarget.getXLenVT();
15945 EVT MemVT = LSNode1->getMemoryVT();
15946 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15947 MachineMemOperand *MMO = LSNode1->getMemOperand();
15949 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15950
15951 if (LSNode1->getOpcode() == ISD::LOAD) {
15952 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15953 unsigned Opcode;
15954 if (MemVT == MVT::i32)
15955 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15956 else
15957 Opcode = RISCVISD::TH_LDD;
15958
15959 SDValue Res = DAG.getMemIntrinsicNode(
15960 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15961 {LSNode1->getChain(), BasePtr,
15962 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15963 NewMemVT, NewMMO);
15964
15965 SDValue Node1 =
15966 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15967 SDValue Node2 =
15968 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15969
15970 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15971 return Node1;
15972 } else {
15973 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15974
15975 SDValue Res = DAG.getMemIntrinsicNode(
15976 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15977 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15978 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15979 NewMemVT, NewMMO);
15980
15981 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15982 return Res;
15983 }
15984}
15985
15986// Try to combine two adjacent loads/stores to a single pair instruction from
15987// the XTHeadMemPair vendor extension.
15990 SelectionDAG &DAG = DCI.DAG;
15992 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15993
15994 // Target does not support load/store pair.
15995 if (!Subtarget.hasVendorXTHeadMemPair())
15996 return SDValue();
15997
15998 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
15999 EVT MemVT = LSNode1->getMemoryVT();
16000 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
16001
16002 // No volatile, indexed or atomic loads/stores.
16003 if (!LSNode1->isSimple() || LSNode1->isIndexed())
16004 return SDValue();
16005
16006 // Function to get a base + constant representation from a memory value.
16007 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
16008 if (Ptr->getOpcode() == ISD::ADD)
16009 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
16010 return {Ptr->getOperand(0), C1->getZExtValue()};
16011 return {Ptr, 0};
16012 };
16013
16014 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
16015
16016 SDValue Chain = N->getOperand(0);
16017 for (SDUse &Use : Chain->uses()) {
16018 if (Use.getUser() != N && Use.getResNo() == 0 &&
16019 Use.getUser()->getOpcode() == N->getOpcode()) {
16020 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
16021
16022 // No volatile, indexed or atomic loads/stores.
16023 if (!LSNode2->isSimple() || LSNode2->isIndexed())
16024 continue;
16025
16026 // Check if LSNode1 and LSNode2 have the same type and extension.
16027 if (LSNode1->getOpcode() == ISD::LOAD)
16028 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
16029 cast<LoadSDNode>(LSNode1)->getExtensionType())
16030 continue;
16031
16032 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
16033 continue;
16034
16035 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
16036
16037 // Check if the base pointer is the same for both instruction.
16038 if (Base1 != Base2)
16039 continue;
16040
16041 // Check if the offsets match the XTHeadMemPair encoding contraints.
16042 bool Valid = false;
16043 if (MemVT == MVT::i32) {
16044 // Check for adjacent i32 values and a 2-bit index.
16045 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
16046 Valid = true;
16047 } else if (MemVT == MVT::i64) {
16048 // Check for adjacent i64 values and a 2-bit index.
16049 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
16050 Valid = true;
16051 }
16052
16053 if (!Valid)
16054 continue;
16055
16056 // Try to combine.
16057 if (SDValue Res =
16058 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
16059 return Res;
16060 }
16061 }
16062
16063 return SDValue();
16064}
16065
16066// Fold
16067// (fp_to_int (froundeven X)) -> fcvt X, rne
16068// (fp_to_int (ftrunc X)) -> fcvt X, rtz
16069// (fp_to_int (ffloor X)) -> fcvt X, rdn
16070// (fp_to_int (fceil X)) -> fcvt X, rup
16071// (fp_to_int (fround X)) -> fcvt X, rmm
16072// (fp_to_int (frint X)) -> fcvt X
16075 const RISCVSubtarget &Subtarget) {
16076 SelectionDAG &DAG = DCI.DAG;
16077 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16078 MVT XLenVT = Subtarget.getXLenVT();
16079
16080 SDValue Src = N->getOperand(0);
16081
16082 // Don't do this for strict-fp Src.
16083 if (Src->isStrictFPOpcode())
16084 return SDValue();
16085
16086 // Ensure the FP type is legal.
16087 if (!TLI.isTypeLegal(Src.getValueType()))
16088 return SDValue();
16089
16090 // Don't do this for f16 with Zfhmin and not Zfh.
16091 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16092 return SDValue();
16093
16094 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16095 // If the result is invalid, we didn't find a foldable instruction.
16096 if (FRM == RISCVFPRndMode::Invalid)
16097 return SDValue();
16098
16099 SDLoc DL(N);
16100 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
16101 EVT VT = N->getValueType(0);
16102
16103 if (VT.isVector() && TLI.isTypeLegal(VT)) {
16104 MVT SrcVT = Src.getSimpleValueType();
16105 MVT SrcContainerVT = SrcVT;
16106 MVT ContainerVT = VT.getSimpleVT();
16107 SDValue XVal = Src.getOperand(0);
16108
16109 // For widening and narrowing conversions we just combine it into a
16110 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16111 // end up getting lowered to their appropriate pseudo instructions based on
16112 // their operand types
16113 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16114 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16115 return SDValue();
16116
16117 // Make fixed-length vectors scalable first
16118 if (SrcVT.isFixedLengthVector()) {
16119 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16120 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16121 ContainerVT =
16122 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16123 }
16124
16125 auto [Mask, VL] =
16126 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
16127
16128 SDValue FpToInt;
16129 if (FRM == RISCVFPRndMode::RTZ) {
16130 // Use the dedicated trunc static rounding mode if we're truncating so we
16131 // don't need to generate calls to fsrmi/fsrm
16132 unsigned Opc =
16134 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
16135 } else {
16136 unsigned Opc =
16138 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
16139 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
16140 }
16141
16142 // If converted from fixed-length to scalable, convert back
16143 if (VT.isFixedLengthVector())
16144 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16145
16146 return FpToInt;
16147 }
16148
16149 // Only handle XLen or i32 types. Other types narrower than XLen will
16150 // eventually be legalized to XLenVT.
16151 if (VT != MVT::i32 && VT != XLenVT)
16152 return SDValue();
16153
16154 unsigned Opc;
16155 if (VT == XLenVT)
16156 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16157 else
16159
16160 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
16161 DAG.getTargetConstant(FRM, DL, XLenVT));
16162 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
16163}
16164
16165// Fold
16166// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16167// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
16168// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
16169// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
16170// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
16171// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
16174 const RISCVSubtarget &Subtarget) {
16175 SelectionDAG &DAG = DCI.DAG;
16176 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16177 MVT XLenVT = Subtarget.getXLenVT();
16178
16179 // Only handle XLen types. Other types narrower than XLen will eventually be
16180 // legalized to XLenVT.
16181 EVT DstVT = N->getValueType(0);
16182 if (DstVT != XLenVT)
16183 return SDValue();
16184
16185 SDValue Src = N->getOperand(0);
16186
16187 // Don't do this for strict-fp Src.
16188 if (Src->isStrictFPOpcode())
16189 return SDValue();
16190
16191 // Ensure the FP type is also legal.
16192 if (!TLI.isTypeLegal(Src.getValueType()))
16193 return SDValue();
16194
16195 // Don't do this for f16 with Zfhmin and not Zfh.
16196 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16197 return SDValue();
16198
16199 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16200
16201 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16202 if (FRM == RISCVFPRndMode::Invalid)
16203 return SDValue();
16204
16205 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
16206
16207 unsigned Opc;
16208 if (SatVT == DstVT)
16209 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16210 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
16212 else
16213 return SDValue();
16214 // FIXME: Support other SatVTs by clamping before or after the conversion.
16215
16216 Src = Src.getOperand(0);
16217
16218 SDLoc DL(N);
16219 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
16220 DAG.getTargetConstant(FRM, DL, XLenVT));
16221
16222 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16223 // extend.
16224 if (Opc == RISCVISD::FCVT_WU_RV64)
16225 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
16226
16227 // RISC-V FP-to-int conversions saturate to the destination register size, but
16228 // don't produce 0 for nan.
16229 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
16230 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
16231}
16232
16233// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16234// smaller than XLenVT.
16236 const RISCVSubtarget &Subtarget) {
16237 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
16238
16239 SDValue Src = N->getOperand(0);
16240 if (Src.getOpcode() != ISD::BSWAP)
16241 return SDValue();
16242
16243 EVT VT = N->getValueType(0);
16244 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16245 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16246 return SDValue();
16247
16248 SDLoc DL(N);
16249 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
16250}
16251
16253 const RISCVSubtarget &Subtarget) {
16254 // Fold:
16255 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
16256
16257 // Check if its first operand is a vp.load.
16258 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
16259 if (!VPLoad)
16260 return SDValue();
16261
16262 EVT LoadVT = VPLoad->getValueType(0);
16263 // We do not have a strided_load version for masks, and the evl of vp.reverse
16264 // and vp.load should always be the same.
16265 if (!LoadVT.getVectorElementType().isByteSized() ||
16266 N->getOperand(2) != VPLoad->getVectorLength() ||
16267 !N->getOperand(0).hasOneUse())
16268 return SDValue();
16269
16270 // Check if the mask of outer vp.reverse are all 1's.
16271 if (!isOneOrOneSplat(N->getOperand(1)))
16272 return SDValue();
16273
16274 SDValue LoadMask = VPLoad->getMask();
16275 // If Mask is all ones, then load is unmasked and can be reversed.
16276 if (!isOneOrOneSplat(LoadMask)) {
16277 // If the mask is not all ones, we can reverse the load if the mask was also
16278 // reversed by an unmasked vp.reverse with the same EVL.
16279 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16280 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
16281 LoadMask.getOperand(2) != VPLoad->getVectorLength())
16282 return SDValue();
16283 LoadMask = LoadMask.getOperand(0);
16284 }
16285
16286 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
16287 SDLoc DL(N);
16288 MVT XLenVT = Subtarget.getXLenVT();
16289 SDValue NumElem = VPLoad->getVectorLength();
16290 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
16291
16292 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16293 DAG.getConstant(1, DL, XLenVT));
16294 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16295 DAG.getConstant(ElemWidthByte, DL, XLenVT));
16296 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
16297 SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT);
16298
16300 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
16302 PtrInfo, VPLoad->getMemOperand()->getFlags(),
16303 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
16304
16305 SDValue Ret = DAG.getStridedLoadVP(
16306 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
16307 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
16308
16309 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
16310
16311 return Ret;
16312}
16313
16315 const RISCVSubtarget &Subtarget) {
16316 // Fold:
16317 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
16318 // -1, MASK)
16319 auto *VPStore = cast<VPStoreSDNode>(N);
16320
16321 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
16322 return SDValue();
16323
16324 SDValue VPReverse = VPStore->getValue();
16325 EVT ReverseVT = VPReverse->getValueType(0);
16326
16327 // We do not have a strided_store version for masks, and the evl of vp.reverse
16328 // and vp.store should always be the same.
16329 if (!ReverseVT.getVectorElementType().isByteSized() ||
16330 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
16331 !VPReverse.hasOneUse())
16332 return SDValue();
16333
16334 SDValue StoreMask = VPStore->getMask();
16335 // If Mask is all ones, then load is unmasked and can be reversed.
16336 if (!isOneOrOneSplat(StoreMask)) {
16337 // If the mask is not all ones, we can reverse the store if the mask was
16338 // also reversed by an unmasked vp.reverse with the same EVL.
16339 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16340 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
16341 StoreMask.getOperand(2) != VPStore->getVectorLength())
16342 return SDValue();
16343 StoreMask = StoreMask.getOperand(0);
16344 }
16345
16346 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
16347 SDLoc DL(N);
16348 MVT XLenVT = Subtarget.getXLenVT();
16349 SDValue NumElem = VPStore->getVectorLength();
16350 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
16351
16352 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16353 DAG.getConstant(1, DL, XLenVT));
16354 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16355 DAG.getConstant(ElemWidthByte, DL, XLenVT));
16356 SDValue Base =
16357 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
16358 SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT);
16359
16361 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
16363 PtrInfo, VPStore->getMemOperand()->getFlags(),
16364 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
16365
16366 return DAG.getStridedStoreVP(
16367 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
16368 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
16369 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
16370 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
16371}
16372
16373// Convert from one FMA opcode to another based on whether we are negating the
16374// multiply result and/or the accumulator.
16375// NOTE: Only supports RVV operations with VL.
16376static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
16377 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
16378 if (NegMul) {
16379 // clang-format off
16380 switch (Opcode) {
16381 default: llvm_unreachable("Unexpected opcode");
16382 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16383 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16384 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16385 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16390 }
16391 // clang-format on
16392 }
16393
16394 // Negating the accumulator changes ADD<->SUB.
16395 if (NegAcc) {
16396 // clang-format off
16397 switch (Opcode) {
16398 default: llvm_unreachable("Unexpected opcode");
16399 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16400 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16401 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16402 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16407 }
16408 // clang-format on
16409 }
16410
16411 return Opcode;
16412}
16413
16415 // Fold FNEG_VL into FMA opcodes.
16416 // The first operand of strict-fp is chain.
16417 bool IsStrict =
16418 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
16419 unsigned Offset = IsStrict ? 1 : 0;
16420 SDValue A = N->getOperand(0 + Offset);
16421 SDValue B = N->getOperand(1 + Offset);
16422 SDValue C = N->getOperand(2 + Offset);
16423 SDValue Mask = N->getOperand(3 + Offset);
16424 SDValue VL = N->getOperand(4 + Offset);
16425
16426 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16427 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16428 V.getOperand(2) == VL) {
16429 // Return the negated input.
16430 V = V.getOperand(0);
16431 return true;
16432 }
16433
16434 return false;
16435 };
16436
16437 bool NegA = invertIfNegative(A);
16438 bool NegB = invertIfNegative(B);
16439 bool NegC = invertIfNegative(C);
16440
16441 // If no operands are negated, we're done.
16442 if (!NegA && !NegB && !NegC)
16443 return SDValue();
16444
16445 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16446 if (IsStrict)
16447 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
16448 {N->getOperand(0), A, B, C, Mask, VL});
16449 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
16450 VL);
16451}
16452
16455 const RISCVSubtarget &Subtarget) {
16456 SelectionDAG &DAG = DCI.DAG;
16457
16459 return V;
16460
16461 // FIXME: Ignore strict opcodes for now.
16462 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
16463 return SDValue();
16464
16465 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16466}
16467
16469 const RISCVSubtarget &Subtarget) {
16470 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
16471
16472 EVT VT = N->getValueType(0);
16473
16474 if (VT != Subtarget.getXLenVT())
16475 return SDValue();
16476
16477 if (!isa<ConstantSDNode>(N->getOperand(1)))
16478 return SDValue();
16479 uint64_t ShAmt = N->getConstantOperandVal(1);
16480
16481 SDValue N0 = N->getOperand(0);
16482
16483 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16484 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16485 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16486 unsigned ExtSize =
16487 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16488 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
16489 N0.getOperand(0).hasOneUse() &&
16490 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16491 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16492 if (LShAmt < ExtSize) {
16493 unsigned Size = VT.getSizeInBits();
16494 SDLoc ShlDL(N0.getOperand(0));
16495 SDValue Shl =
16496 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16497 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16498 SDLoc DL(N);
16499 return DAG.getNode(ISD::SRA, DL, VT, Shl,
16500 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
16501 }
16502 }
16503 }
16504
16505 if (ShAmt > 32 || VT != MVT::i64)
16506 return SDValue();
16507
16508 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16509 // FIXME: Should this be a generic combine? There's a similar combine on X86.
16510 //
16511 // Also try these folds where an add or sub is in the middle.
16512 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16513 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16514 SDValue Shl;
16515 ConstantSDNode *AddC = nullptr;
16516
16517 // We might have an ADD or SUB between the SRA and SHL.
16518 bool IsAdd = N0.getOpcode() == ISD::ADD;
16519 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
16520 // Other operand needs to be a constant we can modify.
16521 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16522 if (!AddC)
16523 return SDValue();
16524
16525 // AddC needs to have at least 32 trailing zeros.
16526 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16527 return SDValue();
16528
16529 // All users should be a shift by constant less than or equal to 32. This
16530 // ensures we'll do this optimization for each of them to produce an
16531 // add/sub+sext_inreg they can all share.
16532 for (SDNode *U : N0->users()) {
16533 if (U->getOpcode() != ISD::SRA ||
16534 !isa<ConstantSDNode>(U->getOperand(1)) ||
16535 U->getConstantOperandVal(1) > 32)
16536 return SDValue();
16537 }
16538
16539 Shl = N0.getOperand(IsAdd ? 0 : 1);
16540 } else {
16541 // Not an ADD or SUB.
16542 Shl = N0;
16543 }
16544
16545 // Look for a shift left by 32.
16546 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16547 Shl.getConstantOperandVal(1) != 32)
16548 return SDValue();
16549
16550 // We if we didn't look through an add/sub, then the shl should have one use.
16551 // If we did look through an add/sub, the sext_inreg we create is free so
16552 // we're only creating 2 new instructions. It's enough to only remove the
16553 // original sra+add/sub.
16554 if (!AddC && !Shl.hasOneUse())
16555 return SDValue();
16556
16557 SDLoc DL(N);
16558 SDValue In = Shl.getOperand(0);
16559
16560 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
16561 // constant.
16562 if (AddC) {
16563 SDValue ShiftedAddC =
16564 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
16565 if (IsAdd)
16566 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
16567 else
16568 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
16569 }
16570
16571 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
16572 DAG.getValueType(MVT::i32));
16573 if (ShAmt == 32)
16574 return SExt;
16575
16576 return DAG.getNode(
16577 ISD::SHL, DL, MVT::i64, SExt,
16578 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
16579}
16580
16581// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
16582// the result is used as the conditon of a br_cc or select_cc we can invert,
16583// inverting the setcc is free, and Z is 0/1. Caller will invert the
16584// br_cc/select_cc.
16586 bool IsAnd = Cond.getOpcode() == ISD::AND;
16587 if (!IsAnd && Cond.getOpcode() != ISD::OR)
16588 return SDValue();
16589
16590 if (!Cond.hasOneUse())
16591 return SDValue();
16592
16593 SDValue Setcc = Cond.getOperand(0);
16594 SDValue Xor = Cond.getOperand(1);
16595 // Canonicalize setcc to LHS.
16596 if (Setcc.getOpcode() != ISD::SETCC)
16597 std::swap(Setcc, Xor);
16598 // LHS should be a setcc and RHS should be an xor.
16599 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
16600 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
16601 return SDValue();
16602
16603 // If the condition is an And, SimplifyDemandedBits may have changed
16604 // (xor Z, 1) to (not Z).
16605 SDValue Xor1 = Xor.getOperand(1);
16606 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
16607 return SDValue();
16608
16609 EVT VT = Cond.getValueType();
16610 SDValue Xor0 = Xor.getOperand(0);
16611
16612 // The LHS of the xor needs to be 0/1.
16614 if (!DAG.MaskedValueIsZero(Xor0, Mask))
16615 return SDValue();
16616
16617 // We can only invert integer setccs.
16618 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
16619 if (!SetCCOpVT.isScalarInteger())
16620 return SDValue();
16621
16622 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16623 if (ISD::isIntEqualitySetCC(CCVal)) {
16624 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16625 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16626 Setcc.getOperand(1), CCVal);
16627 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
16628 // Invert (setlt 0, X) by converting to (setlt X, 1).
16629 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16630 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
16631 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
16632 // (setlt X, 1) by converting to (setlt 0, X).
16633 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16634 DAG.getConstant(0, SDLoc(Setcc), VT),
16635 Setcc.getOperand(0), CCVal);
16636 } else
16637 return SDValue();
16638
16639 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16640 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
16641}
16642
16643// Perform common combines for BR_CC and SELECT_CC condtions.
16644static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
16645 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
16646 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16647
16648 // As far as arithmetic right shift always saves the sign,
16649 // shift can be omitted.
16650 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
16651 // setge (sra X, N), 0 -> setge X, 0
16652 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
16653 LHS.getOpcode() == ISD::SRA) {
16654 LHS = LHS.getOperand(0);
16655 return true;
16656 }
16657
16658 if (!ISD::isIntEqualitySetCC(CCVal))
16659 return false;
16660
16661 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16662 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16663 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
16664 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16665 // If we're looking for eq 0 instead of ne 0, we need to invert the
16666 // condition.
16667 bool Invert = CCVal == ISD::SETEQ;
16668 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16669 if (Invert)
16670 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16671
16672 RHS = LHS.getOperand(1);
16673 LHS = LHS.getOperand(0);
16674 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
16675
16676 CC = DAG.getCondCode(CCVal);
16677 return true;
16678 }
16679
16680 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16681 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
16682 RHS = LHS.getOperand(1);
16683 LHS = LHS.getOperand(0);
16684 return true;
16685 }
16686
16687 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16688 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
16689 LHS.getOperand(1).getOpcode() == ISD::Constant) {
16690 SDValue LHS0 = LHS.getOperand(0);
16691 if (LHS0.getOpcode() == ISD::AND &&
16692 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
16693 uint64_t Mask = LHS0.getConstantOperandVal(1);
16694 uint64_t ShAmt = LHS.getConstantOperandVal(1);
16695 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
16696 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
16697 CC = DAG.getCondCode(CCVal);
16698
16699 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
16700 LHS = LHS0.getOperand(0);
16701 if (ShAmt != 0)
16702 LHS =
16703 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
16704 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
16705 return true;
16706 }
16707 }
16708 }
16709
16710 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16711 // This can occur when legalizing some floating point comparisons.
16712 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16713 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16714 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16715 CC = DAG.getCondCode(CCVal);
16716 RHS = DAG.getConstant(0, DL, LHS.getValueType());
16717 return true;
16718 }
16719
16720 if (isNullConstant(RHS)) {
16721 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
16722 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16723 CC = DAG.getCondCode(CCVal);
16724 LHS = NewCond;
16725 return true;
16726 }
16727 }
16728
16729 return false;
16730}
16731
16732// Fold
16733// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16734// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16735// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
16736// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16738 SDValue TrueVal, SDValue FalseVal,
16739 bool Swapped) {
16740 bool Commutative = true;
16741 unsigned Opc = TrueVal.getOpcode();
16742 switch (Opc) {
16743 default:
16744 return SDValue();
16745 case ISD::SHL:
16746 case ISD::SRA:
16747 case ISD::SRL:
16748 case ISD::SUB:
16749 Commutative = false;
16750 break;
16751 case ISD::ADD:
16752 case ISD::OR:
16753 case ISD::XOR:
16754 break;
16755 }
16756
16757 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16758 return SDValue();
16759
16760 unsigned OpToFold;
16761 if (FalseVal == TrueVal.getOperand(0))
16762 OpToFold = 0;
16763 else if (Commutative && FalseVal == TrueVal.getOperand(1))
16764 OpToFold = 1;
16765 else
16766 return SDValue();
16767
16768 EVT VT = N->getValueType(0);
16769 SDLoc DL(N);
16770 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16771 EVT OtherOpVT = OtherOp.getValueType();
16772 SDValue IdentityOperand =
16773 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
16774 if (!Commutative)
16775 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
16776 assert(IdentityOperand && "No identity operand!");
16777
16778 if (Swapped)
16779 std::swap(OtherOp, IdentityOperand);
16780 SDValue NewSel =
16781 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
16782 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
16783}
16784
16785// This tries to get rid of `select` and `icmp` that are being used to handle
16786// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16788 SDValue Cond = N->getOperand(0);
16789
16790 // This represents either CTTZ or CTLZ instruction.
16791 SDValue CountZeroes;
16792
16793 SDValue ValOnZero;
16794
16795 if (Cond.getOpcode() != ISD::SETCC)
16796 return SDValue();
16797
16798 if (!isNullConstant(Cond->getOperand(1)))
16799 return SDValue();
16800
16801 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16802 if (CCVal == ISD::CondCode::SETEQ) {
16803 CountZeroes = N->getOperand(2);
16804 ValOnZero = N->getOperand(1);
16805 } else if (CCVal == ISD::CondCode::SETNE) {
16806 CountZeroes = N->getOperand(1);
16807 ValOnZero = N->getOperand(2);
16808 } else {
16809 return SDValue();
16810 }
16811
16812 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
16813 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
16814 CountZeroes = CountZeroes.getOperand(0);
16815
16816 if (CountZeroes.getOpcode() != ISD::CTTZ &&
16817 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
16818 CountZeroes.getOpcode() != ISD::CTLZ &&
16819 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
16820 return SDValue();
16821
16822 if (!isNullConstant(ValOnZero))
16823 return SDValue();
16824
16825 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16826 if (Cond->getOperand(0) != CountZeroesArgument)
16827 return SDValue();
16828
16829 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
16830 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
16831 CountZeroes.getValueType(), CountZeroesArgument);
16832 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
16833 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
16834 CountZeroes.getValueType(), CountZeroesArgument);
16835 }
16836
16837 unsigned BitWidth = CountZeroes.getValueSizeInBits();
16838 SDValue BitWidthMinusOne =
16839 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
16840
16841 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
16842 CountZeroes, BitWidthMinusOne);
16843 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
16844}
16845
16847 const RISCVSubtarget &Subtarget) {
16848 SDValue Cond = N->getOperand(0);
16849 SDValue True = N->getOperand(1);
16850 SDValue False = N->getOperand(2);
16851 SDLoc DL(N);
16852 EVT VT = N->getValueType(0);
16853 EVT CondVT = Cond.getValueType();
16854
16855 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
16856 return SDValue();
16857
16858 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16859 // BEXTI, where C is power of 2.
16860 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16861 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16862 SDValue LHS = Cond.getOperand(0);
16863 SDValue RHS = Cond.getOperand(1);
16864 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16865 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
16866 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
16867 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
16868 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16869 return DAG.getSelect(DL, VT,
16870 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
16871 False, True);
16872 }
16873 }
16874 return SDValue();
16875}
16876
16878 const RISCVSubtarget &Subtarget) {
16879 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
16880 return Folded;
16881
16882 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
16883 return V;
16884
16885 if (Subtarget.hasConditionalMoveFusion())
16886 return SDValue();
16887
16888 SDValue TrueVal = N->getOperand(1);
16889 SDValue FalseVal = N->getOperand(2);
16890 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
16891 return V;
16892 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
16893}
16894
16895/// If we have a build_vector where each lane is binop X, C, where C
16896/// is a constant (but not necessarily the same constant on all lanes),
16897/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16898/// We assume that materializing a constant build vector will be no more
16899/// expensive that performing O(n) binops.
16901 const RISCVSubtarget &Subtarget,
16902 const RISCVTargetLowering &TLI) {
16903 SDLoc DL(N);
16904 EVT VT = N->getValueType(0);
16905
16906 assert(!VT.isScalableVector() && "unexpected build vector");
16907
16908 if (VT.getVectorNumElements() == 1)
16909 return SDValue();
16910
16911 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
16912 if (!TLI.isBinOp(Opcode))
16913 return SDValue();
16914
16915 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16916 return SDValue();
16917
16918 // This BUILD_VECTOR involves an implicit truncation, and sinking
16919 // truncates through binops is non-trivial.
16920 if (N->op_begin()->getValueType() != VT.getVectorElementType())
16921 return SDValue();
16922
16923 SmallVector<SDValue> LHSOps;
16924 SmallVector<SDValue> RHSOps;
16925 for (SDValue Op : N->ops()) {
16926 if (Op.isUndef()) {
16927 // We can't form a divide or remainder from undef.
16928 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16929 return SDValue();
16930
16931 LHSOps.push_back(Op);
16932 RHSOps.push_back(Op);
16933 continue;
16934 }
16935
16936 // TODO: We can handle operations which have an neutral rhs value
16937 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16938 // of profit in a more explicit manner.
16939 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16940 return SDValue();
16941
16942 LHSOps.push_back(Op.getOperand(0));
16943 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16944 !isa<ConstantFPSDNode>(Op.getOperand(1)))
16945 return SDValue();
16946 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16947 // have different LHS and RHS types.
16948 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
16949 return SDValue();
16950
16951 RHSOps.push_back(Op.getOperand(1));
16952 }
16953
16954 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
16955 DAG.getBuildVector(VT, DL, RHSOps));
16956}
16957
16959 const RISCVSubtarget &Subtarget,
16960 const RISCVTargetLowering &TLI) {
16961 SDValue InVec = N->getOperand(0);
16962 SDValue InVal = N->getOperand(1);
16963 SDValue EltNo = N->getOperand(2);
16964 SDLoc DL(N);
16965
16966 EVT VT = InVec.getValueType();
16967 if (VT.isScalableVector())
16968 return SDValue();
16969
16970 if (!InVec.hasOneUse())
16971 return SDValue();
16972
16973 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
16974 // move the insert_vector_elts into the arms of the binop. Note that
16975 // the new RHS must be a constant.
16976 const unsigned InVecOpcode = InVec->getOpcode();
16977 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
16978 InVal.hasOneUse()) {
16979 SDValue InVecLHS = InVec->getOperand(0);
16980 SDValue InVecRHS = InVec->getOperand(1);
16981 SDValue InValLHS = InVal->getOperand(0);
16982 SDValue InValRHS = InVal->getOperand(1);
16983
16985 return SDValue();
16986 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
16987 return SDValue();
16988 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16989 // have different LHS and RHS types.
16990 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
16991 return SDValue();
16993 InVecLHS, InValLHS, EltNo);
16995 InVecRHS, InValRHS, EltNo);
16996 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
16997 }
16998
16999 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
17000 // move the insert_vector_elt to the source operand of the concat_vector.
17001 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
17002 return SDValue();
17003
17004 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17005 if (!IndexC)
17006 return SDValue();
17007 unsigned Elt = IndexC->getZExtValue();
17008
17009 EVT ConcatVT = InVec.getOperand(0).getValueType();
17010 if (ConcatVT.getVectorElementType() != InVal.getValueType())
17011 return SDValue();
17012 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17013 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
17014
17015 unsigned ConcatOpIdx = Elt / ConcatNumElts;
17016 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
17017 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
17018 ConcatOp, InVal, NewIdx);
17019
17020 SmallVector<SDValue> ConcatOps;
17021 ConcatOps.append(InVec->op_begin(), InVec->op_end());
17022 ConcatOps[ConcatOpIdx] = ConcatOp;
17023 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17024}
17025
17026// If we're concatenating a series of vector loads like
17027// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
17028// Then we can turn this into a strided load by widening the vector elements
17029// vlse32 p, stride=n
17031 const RISCVSubtarget &Subtarget,
17032 const RISCVTargetLowering &TLI) {
17033 SDLoc DL(N);
17034 EVT VT = N->getValueType(0);
17035
17036 // Only perform this combine on legal MVTs.
17037 if (!TLI.isTypeLegal(VT))
17038 return SDValue();
17039
17040 // TODO: Potentially extend this to scalable vectors
17041 if (VT.isScalableVector())
17042 return SDValue();
17043
17044 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
17045 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
17046 !SDValue(BaseLd, 0).hasOneUse())
17047 return SDValue();
17048
17049 EVT BaseLdVT = BaseLd->getValueType(0);
17050
17051 // Go through the loads and check that they're strided
17053 Lds.push_back(BaseLd);
17054 Align Align = BaseLd->getAlign();
17055 for (SDValue Op : N->ops().drop_front()) {
17056 auto *Ld = dyn_cast<LoadSDNode>(Op);
17057 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
17058 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
17059 Ld->getValueType(0) != BaseLdVT)
17060 return SDValue();
17061
17062 Lds.push_back(Ld);
17063
17064 // The common alignment is the most restrictive (smallest) of all the loads
17065 Align = std::min(Align, Ld->getAlign());
17066 }
17067
17068 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
17069 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
17070 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
17071 // If the load ptrs can be decomposed into a common (Base + Index) with a
17072 // common constant stride, then return the constant stride.
17073 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
17074 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
17075 if (BIO1.equalBaseIndex(BIO2, DAG))
17076 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
17077
17078 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
17079 SDValue P1 = Ld1->getBasePtr();
17080 SDValue P2 = Ld2->getBasePtr();
17081 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
17082 return {{P2.getOperand(1), false}};
17083 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
17084 return {{P1.getOperand(1), true}};
17085
17086 return std::nullopt;
17087 };
17088
17089 // Get the distance between the first and second loads
17090 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
17091 if (!BaseDiff)
17092 return SDValue();
17093
17094 // Check all the loads are the same distance apart
17095 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
17096 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
17097 return SDValue();
17098
17099 // TODO: At this point, we've successfully matched a generalized gather
17100 // load. Maybe we should emit that, and then move the specialized
17101 // matchers above and below into a DAG combine?
17102
17103 // Get the widened scalar type, e.g. v4i8 -> i64
17104 unsigned WideScalarBitWidth =
17105 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
17106 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
17107
17108 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
17109 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
17110 if (!TLI.isTypeLegal(WideVecVT))
17111 return SDValue();
17112
17113 // Check that the operation is legal
17114 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
17115 return SDValue();
17116
17117 auto [StrideVariant, MustNegateStride] = *BaseDiff;
17118 SDValue Stride =
17119 std::holds_alternative<SDValue>(StrideVariant)
17120 ? std::get<SDValue>(StrideVariant)
17121 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
17122 Lds[0]->getOffset().getValueType());
17123 if (MustNegateStride)
17124 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
17125
17126 SDValue AllOneMask =
17127 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
17128 DAG.getConstant(1, DL, MVT::i1));
17129
17130 uint64_t MemSize;
17131 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
17132 ConstStride && ConstStride->getSExtValue() >= 0)
17133 // total size = (elsize * n) + (stride - elsize) * (n-1)
17134 // = elsize + stride * (n-1)
17135 MemSize = WideScalarVT.getSizeInBits() +
17136 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
17137 else
17138 // If Stride isn't constant, then we can't know how much it will load
17140
17142 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
17143 Align);
17144
17145 SDValue StridedLoad = DAG.getStridedLoadVP(
17146 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
17147 AllOneMask,
17148 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
17149
17150 for (SDValue Ld : N->ops())
17151 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
17152
17153 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
17154}
17155
17156/// Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
17157/// during the combine phase before type legalization, and relies on
17158/// DAGCombine not undoing the transform if isShuffleMaskLegal returns false
17159/// for the source mask.
17161 const RISCVSubtarget &Subtarget,
17162 const RISCVTargetLowering &TLI) {
17163 SDLoc DL(N);
17164 EVT VT = N->getValueType(0);
17165 const unsigned ElementSize = VT.getScalarSizeInBits();
17166 SDValue V1 = N->getOperand(0);
17167 SDValue V2 = N->getOperand(1);
17168 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
17169
17170 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
17171 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
17172 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
17173 return SDValue();
17174
17175 SmallVector<int, 8> NewMask;
17176 narrowShuffleMaskElts(2, Mask, NewMask);
17177
17178 LLVMContext &C = *DAG.getContext();
17179 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
17180 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
17181 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
17182 DAG.getBitcast(NewVT, V2), NewMask);
17183 return DAG.getBitcast(VT, Res);
17184}
17185
17186
17188 const RISCVSubtarget &Subtarget) {
17189
17190 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
17191
17192 if (N->getValueType(0).isFixedLengthVector())
17193 return SDValue();
17194
17195 SDValue Addend = N->getOperand(0);
17196 SDValue MulOp = N->getOperand(1);
17197
17198 if (N->getOpcode() == RISCVISD::ADD_VL) {
17199 SDValue AddPassthruOp = N->getOperand(2);
17200 if (!AddPassthruOp.isUndef())
17201 return SDValue();
17202 }
17203
17204 auto IsVWMulOpc = [](unsigned Opc) {
17205 switch (Opc) {
17206 case RISCVISD::VWMUL_VL:
17209 return true;
17210 default:
17211 return false;
17212 }
17213 };
17214
17215 if (!IsVWMulOpc(MulOp.getOpcode()))
17216 std::swap(Addend, MulOp);
17217
17218 if (!IsVWMulOpc(MulOp.getOpcode()))
17219 return SDValue();
17220
17221 SDValue MulPassthruOp = MulOp.getOperand(2);
17222
17223 if (!MulPassthruOp.isUndef())
17224 return SDValue();
17225
17226 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
17227 const RISCVSubtarget &Subtarget) {
17228 if (N->getOpcode() == ISD::ADD) {
17229 SDLoc DL(N);
17230 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
17231 Subtarget);
17232 }
17233 return std::make_pair(N->getOperand(3), N->getOperand(4));
17234 }(N, DAG, Subtarget);
17235
17236 SDValue MulMask = MulOp.getOperand(3);
17237 SDValue MulVL = MulOp.getOperand(4);
17238
17239 if (AddMask != MulMask || AddVL != MulVL)
17240 return SDValue();
17241
17242 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
17243 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
17244 "Unexpected opcode after VWMACC_VL");
17245 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
17246 "Unexpected opcode after VWMACC_VL!");
17247 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
17248 "Unexpected opcode after VWMUL_VL!");
17249 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
17250 "Unexpected opcode after VWMUL_VL!");
17251
17252 SDLoc DL(N);
17253 EVT VT = N->getValueType(0);
17254 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17255 AddVL};
17256 return DAG.getNode(Opc, DL, VT, Ops);
17257}
17258
17260 ISD::MemIndexType &IndexType,
17262 if (!DCI.isBeforeLegalize())
17263 return false;
17264
17265 SelectionDAG &DAG = DCI.DAG;
17266 const MVT XLenVT =
17267 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17268
17269 const EVT IndexVT = Index.getValueType();
17270
17271 // RISC-V indexed loads only support the "unsigned unscaled" addressing
17272 // mode, so anything else must be manually legalized.
17273 if (!isIndexTypeSigned(IndexType))
17274 return false;
17275
17276 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17277 // Any index legalization should first promote to XLenVT, so we don't lose
17278 // bits when scaling. This may create an illegal index type so we let
17279 // LLVM's legalization take care of the splitting.
17280 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17281 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
17282 IndexVT.changeVectorElementType(XLenVT), Index);
17283 }
17284 IndexType = ISD::UNSIGNED_SCALED;
17285 return true;
17286}
17287
17288/// Match the index vector of a scatter or gather node as the shuffle mask
17289/// which performs the rearrangement if possible. Will only match if
17290/// all lanes are touched, and thus replacing the scatter or gather with
17291/// a unit strided access and shuffle is legal.
17292static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
17293 SmallVector<int> &ShuffleMask) {
17294 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17295 return false;
17296 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17297 return false;
17298
17299 const unsigned ElementSize = VT.getScalarStoreSize();
17300 const unsigned NumElems = VT.getVectorNumElements();
17301
17302 // Create the shuffle mask and check all bits active
17303 assert(ShuffleMask.empty());
17304 BitVector ActiveLanes(NumElems);
17305 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17306 // TODO: We've found an active bit of UB, and could be
17307 // more aggressive here if desired.
17308 if (Index->getOperand(i)->isUndef())
17309 return false;
17310 uint64_t C = Index->getConstantOperandVal(i);
17311 if (C % ElementSize != 0)
17312 return false;
17313 C = C / ElementSize;
17314 if (C >= NumElems)
17315 return false;
17316 ShuffleMask.push_back(C);
17317 ActiveLanes.set(C);
17318 }
17319 return ActiveLanes.all();
17320}
17321
17322/// Match the index of a gather or scatter operation as an operation
17323/// with twice the element width and half the number of elements. This is
17324/// generally profitable (if legal) because these operations are linear
17325/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17326/// come out ahead.
17327static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
17328 Align BaseAlign, const RISCVSubtarget &ST) {
17329 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17330 return false;
17331 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17332 return false;
17333
17334 // Attempt a doubling. If we can use a element type 4x or 8x in
17335 // size, this will happen via multiply iterations of the transform.
17336 const unsigned NumElems = VT.getVectorNumElements();
17337 if (NumElems % 2 != 0)
17338 return false;
17339
17340 const unsigned ElementSize = VT.getScalarStoreSize();
17341 const unsigned WiderElementSize = ElementSize * 2;
17342 if (WiderElementSize > ST.getELen()/8)
17343 return false;
17344
17345 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17346 return false;
17347
17348 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17349 // TODO: We've found an active bit of UB, and could be
17350 // more aggressive here if desired.
17351 if (Index->getOperand(i)->isUndef())
17352 return false;
17353 // TODO: This offset check is too strict if we support fully
17354 // misaligned memory operations.
17355 uint64_t C = Index->getConstantOperandVal(i);
17356 if (i % 2 == 0) {
17357 if (C % WiderElementSize != 0)
17358 return false;
17359 continue;
17360 }
17361 uint64_t Last = Index->getConstantOperandVal(i-1);
17362 if (C != Last + ElementSize)
17363 return false;
17364 }
17365 return true;
17366}
17367
17368// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17369// This would be benefit for the cases where X and Y are both the same value
17370// type of low precision vectors. Since the truncate would be lowered into
17371// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17372// restriction, such pattern would be expanded into a series of "vsetvli"
17373// and "vnsrl" instructions later to reach this point.
17375 SDValue Mask = N->getOperand(1);
17376 SDValue VL = N->getOperand(2);
17377
17378 bool IsVLMAX = isAllOnesConstant(VL) ||
17379 (isa<RegisterSDNode>(VL) &&
17380 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17381 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
17382 Mask.getOperand(0) != VL)
17383 return SDValue();
17384
17385 auto IsTruncNode = [&](SDValue V) {
17386 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17387 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17388 };
17389
17390 SDValue Op = N->getOperand(0);
17391
17392 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
17393 // to distinguish such pattern.
17394 while (IsTruncNode(Op)) {
17395 if (!Op.hasOneUse())
17396 return SDValue();
17397 Op = Op.getOperand(0);
17398 }
17399
17400 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
17401 return SDValue();
17402
17403 SDValue N0 = Op.getOperand(0);
17404 SDValue N1 = Op.getOperand(1);
17405 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17406 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
17407 return SDValue();
17408
17409 SDValue N00 = N0.getOperand(0);
17410 SDValue N10 = N1.getOperand(0);
17411 if (!N00.getValueType().isVector() ||
17412 N00.getValueType() != N10.getValueType() ||
17413 N->getValueType(0) != N10.getValueType())
17414 return SDValue();
17415
17416 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17417 SDValue SMin =
17418 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
17419 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
17420 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
17421}
17422
17423// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17424// maximum value for the truncated type.
17425// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17426// is the signed maximum value for the truncated type and C2 is the signed
17427// minimum value.
17429 const RISCVSubtarget &Subtarget) {
17430 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
17431
17432 MVT VT = N->getSimpleValueType(0);
17433
17434 SDValue Mask = N->getOperand(1);
17435 SDValue VL = N->getOperand(2);
17436
17437 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
17438 APInt &SplatVal) {
17439 if (V.getOpcode() != Opc &&
17440 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17441 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17442 return SDValue();
17443
17444 SDValue Op = V.getOperand(1);
17445
17446 // Peek through conversion between fixed and scalable vectors.
17447 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
17448 isNullConstant(Op.getOperand(2)) &&
17449 Op.getOperand(1).getValueType().isFixedLengthVector() &&
17450 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17451 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
17452 isNullConstant(Op.getOperand(1).getOperand(1)))
17453 Op = Op.getOperand(1).getOperand(0);
17454
17455 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17456 return V.getOperand(0);
17457
17458 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
17459 Op.getOperand(2) == VL) {
17460 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17461 SplatVal =
17462 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17463 return V.getOperand(0);
17464 }
17465 }
17466
17467 return SDValue();
17468 };
17469
17470 SDLoc DL(N);
17471
17472 auto DetectUSatPattern = [&](SDValue V) {
17473 APInt LoC, HiC;
17474
17475 // Simple case, V is a UMIN.
17476 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
17477 if (HiC.isMask(VT.getScalarSizeInBits()))
17478 return UMinOp;
17479
17480 // If we have an SMAX that removes negative numbers first, then we can match
17481 // SMIN instead of UMIN.
17482 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17483 if (SDValue SMaxOp =
17484 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17485 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17486 return SMinOp;
17487
17488 // If we have an SMIN before an SMAX and the SMAX constant is less than or
17489 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17490 // first.
17491 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17492 if (SDValue SMinOp =
17493 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17494 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17495 HiC.uge(LoC))
17496 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
17497 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17498 Mask, VL);
17499
17500 return SDValue();
17501 };
17502
17503 auto DetectSSatPattern = [&](SDValue V) {
17504 unsigned NumDstBits = VT.getScalarSizeInBits();
17505 unsigned NumSrcBits = V.getScalarValueSizeInBits();
17506 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
17507 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
17508
17509 APInt HiC, LoC;
17510 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17511 if (SDValue SMaxOp =
17512 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17513 if (HiC == SignedMax && LoC == SignedMin)
17514 return SMaxOp;
17515
17516 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17517 if (SDValue SMinOp =
17518 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17519 if (HiC == SignedMax && LoC == SignedMin)
17520 return SMinOp;
17521
17522 return SDValue();
17523 };
17524
17525 SDValue Src = N->getOperand(0);
17526
17527 // Look through multiple layers of truncates.
17528 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17529 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17530 Src.hasOneUse())
17531 Src = Src.getOperand(0);
17532
17533 SDValue Val;
17534 unsigned ClipOpc;
17535 if ((Val = DetectUSatPattern(Src)))
17537 else if ((Val = DetectSSatPattern(Src)))
17539 else
17540 return SDValue();
17541
17542 MVT ValVT = Val.getSimpleValueType();
17543
17544 do {
17545 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
17546 ValVT = ValVT.changeVectorElementType(ValEltVT);
17547 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
17548 } while (ValVT != VT);
17549
17550 return Val;
17551}
17552
17553// Convert
17554// (iX ctpop (bitcast (vXi1 A)))
17555// ->
17556// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
17557// FIXME: It's complicated to match all the variations of this after type
17558// legalization so we only handle the pre-type legalization pattern, but that
17559// requires the fixed vector type to be legal.
17561 const RISCVSubtarget &Subtarget) {
17562 EVT VT = N->getValueType(0);
17563 if (!VT.isScalarInteger())
17564 return SDValue();
17565
17566 SDValue Src = N->getOperand(0);
17567
17568 // Peek through zero_extend. It doesn't change the count.
17569 if (Src.getOpcode() == ISD::ZERO_EXTEND)
17570 Src = Src.getOperand(0);
17571
17572 if (Src.getOpcode() != ISD::BITCAST)
17573 return SDValue();
17574
17575 Src = Src.getOperand(0);
17576 EVT SrcEVT = Src.getValueType();
17577 if (!SrcEVT.isSimple())
17578 return SDValue();
17579
17580 MVT SrcMVT = SrcEVT.getSimpleVT();
17581 // Make sure the input is an i1 vector.
17582 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
17583 return SDValue();
17584
17585 if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
17586 return SDValue();
17587
17588 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
17589 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
17590
17591 SDLoc DL(N);
17592 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
17593
17594 MVT XLenVT = Subtarget.getXLenVT();
17595 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
17596 return DAG.getZExtOrTrunc(Pop, DL, VT);
17597}
17598
17600 DAGCombinerInfo &DCI) const {
17601 SelectionDAG &DAG = DCI.DAG;
17602 const MVT XLenVT = Subtarget.getXLenVT();
17603 SDLoc DL(N);
17604
17605 // Helper to call SimplifyDemandedBits on an operand of N where only some low
17606 // bits are demanded. N will be added to the Worklist if it was not deleted.
17607 // Caller should return SDValue(N, 0) if this returns true.
17608 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
17609 SDValue Op = N->getOperand(OpNo);
17610 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
17611 if (!SimplifyDemandedBits(Op, Mask, DCI))
17612 return false;
17613
17614 if (N->getOpcode() != ISD::DELETED_NODE)
17615 DCI.AddToWorklist(N);
17616 return true;
17617 };
17618
17619 switch (N->getOpcode()) {
17620 default:
17621 break;
17622 case RISCVISD::SplitF64: {
17623 SDValue Op0 = N->getOperand(0);
17624 // If the input to SplitF64 is just BuildPairF64 then the operation is
17625 // redundant. Instead, use BuildPairF64's operands directly.
17626 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
17627 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
17628
17629 if (Op0->isUndef()) {
17630 SDValue Lo = DAG.getUNDEF(MVT::i32);
17631 SDValue Hi = DAG.getUNDEF(MVT::i32);
17632 return DCI.CombineTo(N, Lo, Hi);
17633 }
17634
17635 // It's cheaper to materialise two 32-bit integers than to load a double
17636 // from the constant pool and transfer it to integer registers through the
17637 // stack.
17638 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
17639 APInt V = C->getValueAPF().bitcastToAPInt();
17640 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
17641 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
17642 return DCI.CombineTo(N, Lo, Hi);
17643 }
17644
17645 // This is a target-specific version of a DAGCombine performed in
17646 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17647 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17648 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17649 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17650 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
17651 break;
17652 SDValue NewSplitF64 =
17653 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
17654 Op0.getOperand(0));
17655 SDValue Lo = NewSplitF64.getValue(0);
17656 SDValue Hi = NewSplitF64.getValue(1);
17657 APInt SignBit = APInt::getSignMask(32);
17658 if (Op0.getOpcode() == ISD::FNEG) {
17659 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
17660 DAG.getConstant(SignBit, DL, MVT::i32));
17661 return DCI.CombineTo(N, Lo, NewHi);
17662 }
17663 assert(Op0.getOpcode() == ISD::FABS);
17664 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
17665 DAG.getConstant(~SignBit, DL, MVT::i32));
17666 return DCI.CombineTo(N, Lo, NewHi);
17667 }
17668 case RISCVISD::SLLW:
17669 case RISCVISD::SRAW:
17670 case RISCVISD::SRLW:
17671 case RISCVISD::RORW:
17672 case RISCVISD::ROLW: {
17673 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
17674 if (SimplifyDemandedLowBitsHelper(0, 32) ||
17675 SimplifyDemandedLowBitsHelper(1, 5))
17676 return SDValue(N, 0);
17677
17678 break;
17679 }
17680 case RISCVISD::CLZW:
17681 case RISCVISD::CTZW: {
17682 // Only the lower 32 bits of the first operand are read
17683 if (SimplifyDemandedLowBitsHelper(0, 32))
17684 return SDValue(N, 0);
17685 break;
17686 }
17688 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
17689 // conversion is unnecessary and can be replaced with the
17690 // FMV_X_ANYEXTW_RV64 operand.
17691 SDValue Op0 = N->getOperand(0);
17693 return Op0.getOperand(0);
17694 break;
17695 }
17698 SDLoc DL(N);
17699 SDValue Op0 = N->getOperand(0);
17700 MVT VT = N->getSimpleValueType(0);
17701
17702 // Constant fold.
17703 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17704 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17705 return DAG.getConstant(Val, DL, VT);
17706 }
17707
17708 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17709 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17710 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17711 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
17712 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
17713 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
17714 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
17715 assert(Op0.getOperand(0).getValueType() == VT &&
17716 "Unexpected value type!");
17717 return Op0.getOperand(0);
17718 }
17719
17720 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17721 cast<LoadSDNode>(Op0)->isSimple()) {
17723 auto *LN0 = cast<LoadSDNode>(Op0);
17724 SDValue Load =
17725 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
17726 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17727 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17728 return Load;
17729 }
17730
17731 // This is a target-specific version of a DAGCombine performed in
17732 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17733 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17734 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17735 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17736 !Op0.getNode()->hasOneUse())
17737 break;
17738 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
17739 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17740 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17741 if (Op0.getOpcode() == ISD::FNEG)
17742 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
17743 DAG.getConstant(SignBit, DL, VT));
17744
17745 assert(Op0.getOpcode() == ISD::FABS);
17746 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
17747 DAG.getConstant(~SignBit, DL, VT));
17748 }
17749 case ISD::ABS: {
17750 EVT VT = N->getValueType(0);
17751 SDValue N0 = N->getOperand(0);
17752 // abs (sext) -> zext (abs)
17753 // abs (zext) -> zext (handled elsewhere)
17754 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
17755 SDValue Src = N0.getOperand(0);
17756 SDLoc DL(N);
17757 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
17758 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
17759 }
17760 break;
17761 }
17762 case ISD::ADD: {
17763 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17764 return V;
17765 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
17766 return V;
17767 return performADDCombine(N, DCI, Subtarget);
17768 }
17769 case ISD::SUB: {
17770 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17771 return V;
17772 return performSUBCombine(N, DAG, Subtarget);
17773 }
17774 case ISD::AND:
17775 return performANDCombine(N, DCI, Subtarget);
17776 case ISD::OR: {
17777 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17778 return V;
17779 return performORCombine(N, DCI, Subtarget);
17780 }
17781 case ISD::XOR:
17782 return performXORCombine(N, DAG, Subtarget);
17783 case ISD::MUL:
17784 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17785 return V;
17786 return performMULCombine(N, DAG, DCI, Subtarget);
17787 case ISD::SDIV:
17788 case ISD::UDIV:
17789 case ISD::SREM:
17790 case ISD::UREM:
17791 if (SDValue V = combineBinOpOfZExt(N, DAG))
17792 return V;
17793 break;
17794 case ISD::FMUL: {
17795 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17796 SDValue N0 = N->getOperand(0);
17797 SDValue N1 = N->getOperand(1);
17798 if (N0->getOpcode() != ISD::FCOPYSIGN)
17799 std::swap(N0, N1);
17800 if (N0->getOpcode() != ISD::FCOPYSIGN)
17801 return SDValue();
17802 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
17803 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17804 return SDValue();
17805 EVT VT = N->getValueType(0);
17806 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17807 return SDValue();
17808 SDValue Sign = N0->getOperand(1);
17809 if (Sign.getValueType() != VT)
17810 return SDValue();
17811 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
17812 }
17813 case ISD::FADD:
17814 case ISD::UMAX:
17815 case ISD::UMIN:
17816 case ISD::SMAX:
17817 case ISD::SMIN:
17818 case ISD::FMAXNUM:
17819 case ISD::FMINNUM: {
17820 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17821 return V;
17822 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17823 return V;
17824 return SDValue();
17825 }
17826 case ISD::SETCC:
17827 return performSETCCCombine(N, DAG, Subtarget);
17829 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17830 case ISD::ZERO_EXTEND:
17831 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17832 // type legalization. This is safe because fp_to_uint produces poison if
17833 // it overflows.
17834 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17835 SDValue Src = N->getOperand(0);
17836 if (Src.getOpcode() == ISD::FP_TO_UINT &&
17837 isTypeLegal(Src.getOperand(0).getValueType()))
17838 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
17839 Src.getOperand(0));
17840 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
17841 isTypeLegal(Src.getOperand(1).getValueType())) {
17842 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
17843 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
17844 Src.getOperand(0), Src.getOperand(1));
17845 DCI.CombineTo(N, Res);
17846 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
17847 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
17848 return SDValue(N, 0); // Return N so it doesn't get rechecked.
17849 }
17850 }
17851 return SDValue();
17853 if (SDValue V = combineTruncOfSraSext(N, DAG))
17854 return V;
17855 return combineTruncToVnclip(N, DAG, Subtarget);
17856 case ISD::TRUNCATE:
17857 return performTRUNCATECombine(N, DAG, Subtarget);
17858 case ISD::SELECT:
17859 return performSELECTCombine(N, DAG, Subtarget);
17861 case RISCVISD::CZERO_NEZ: {
17862 SDValue Val = N->getOperand(0);
17863 SDValue Cond = N->getOperand(1);
17864
17865 unsigned Opc = N->getOpcode();
17866
17867 // czero_eqz x, x -> x
17868 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
17869 return Val;
17870
17871 unsigned InvOpc =
17873
17874 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
17875 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
17876 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
17877 SDValue NewCond = Cond.getOperand(0);
17878 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
17879 if (DAG.MaskedValueIsZero(NewCond, Mask))
17880 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
17881 }
17882 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
17883 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
17884 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
17885 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
17886 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
17887 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17888 if (ISD::isIntEqualitySetCC(CCVal))
17889 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
17890 N->getValueType(0), Val, Cond.getOperand(0));
17891 }
17892 return SDValue();
17893 }
17894 case RISCVISD::SELECT_CC: {
17895 // Transform
17896 SDValue LHS = N->getOperand(0);
17897 SDValue RHS = N->getOperand(1);
17898 SDValue CC = N->getOperand(2);
17899 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17900 SDValue TrueV = N->getOperand(3);
17901 SDValue FalseV = N->getOperand(4);
17902 SDLoc DL(N);
17903 EVT VT = N->getValueType(0);
17904
17905 // If the True and False values are the same, we don't need a select_cc.
17906 if (TrueV == FalseV)
17907 return TrueV;
17908
17909 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
17910 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
17911 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17912 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
17913 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
17914 if (CCVal == ISD::CondCode::SETGE)
17915 std::swap(TrueV, FalseV);
17916
17917 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17918 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17919 // Only handle simm12, if it is not in this range, it can be considered as
17920 // register.
17921 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
17922 isInt<12>(TrueSImm - FalseSImm)) {
17923 SDValue SRA =
17924 DAG.getNode(ISD::SRA, DL, VT, LHS,
17925 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
17926 SDValue AND =
17927 DAG.getNode(ISD::AND, DL, VT, SRA,
17928 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
17929 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
17930 }
17931
17932 if (CCVal == ISD::CondCode::SETGE)
17933 std::swap(TrueV, FalseV);
17934 }
17935
17936 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17937 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
17938 {LHS, RHS, CC, TrueV, FalseV});
17939
17940 if (!Subtarget.hasConditionalMoveFusion()) {
17941 // (select c, -1, y) -> -c | y
17942 if (isAllOnesConstant(TrueV)) {
17943 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17944 SDValue Neg = DAG.getNegative(C, DL, VT);
17945 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
17946 }
17947 // (select c, y, -1) -> -!c | y
17948 if (isAllOnesConstant(FalseV)) {
17949 SDValue C =
17950 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17951 SDValue Neg = DAG.getNegative(C, DL, VT);
17952 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
17953 }
17954
17955 // (select c, 0, y) -> -!c & y
17956 if (isNullConstant(TrueV)) {
17957 SDValue C =
17958 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17959 SDValue Neg = DAG.getNegative(C, DL, VT);
17960 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
17961 }
17962 // (select c, y, 0) -> -c & y
17963 if (isNullConstant(FalseV)) {
17964 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17965 SDValue Neg = DAG.getNegative(C, DL, VT);
17966 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
17967 }
17968 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
17969 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
17970 if (((isOneConstant(FalseV) && LHS == TrueV &&
17971 CCVal == ISD::CondCode::SETNE) ||
17972 (isOneConstant(TrueV) && LHS == FalseV &&
17973 CCVal == ISD::CondCode::SETEQ)) &&
17975 // freeze it to be safe.
17976 LHS = DAG.getFreeze(LHS);
17978 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
17979 }
17980 }
17981
17982 // If both true/false are an xor with 1, pull through the select.
17983 // This can occur after op legalization if both operands are setccs that
17984 // require an xor to invert.
17985 // FIXME: Generalize to other binary ops with identical operand?
17986 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
17987 TrueV.getOperand(1) == FalseV.getOperand(1) &&
17988 isOneConstant(TrueV.getOperand(1)) &&
17989 TrueV.hasOneUse() && FalseV.hasOneUse()) {
17990 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
17991 TrueV.getOperand(0), FalseV.getOperand(0));
17992 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
17993 }
17994
17995 return SDValue();
17996 }
17997 case RISCVISD::BR_CC: {
17998 SDValue LHS = N->getOperand(1);
17999 SDValue RHS = N->getOperand(2);
18000 SDValue CC = N->getOperand(3);
18001 SDLoc DL(N);
18002
18003 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
18004 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
18005 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
18006
18007 return SDValue();
18008 }
18009 case ISD::BITREVERSE:
18010 return performBITREVERSECombine(N, DAG, Subtarget);
18011 case ISD::FP_TO_SINT:
18012 case ISD::FP_TO_UINT:
18013 return performFP_TO_INTCombine(N, DCI, Subtarget);
18016 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
18017 case ISD::FCOPYSIGN: {
18018 EVT VT = N->getValueType(0);
18019 if (!VT.isVector())
18020 break;
18021 // There is a form of VFSGNJ which injects the negated sign of its second
18022 // operand. Try and bubble any FNEG up after the extend/round to produce
18023 // this optimized pattern. Avoid modifying cases where FP_ROUND and
18024 // TRUNC=1.
18025 SDValue In2 = N->getOperand(1);
18026 // Avoid cases where the extend/round has multiple uses, as duplicating
18027 // those is typically more expensive than removing a fneg.
18028 if (!In2.hasOneUse())
18029 break;
18030 if (In2.getOpcode() != ISD::FP_EXTEND &&
18031 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
18032 break;
18033 In2 = In2.getOperand(0);
18034 if (In2.getOpcode() != ISD::FNEG)
18035 break;
18036 SDLoc DL(N);
18037 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
18038 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
18039 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
18040 }
18041 case ISD::MGATHER: {
18042 const auto *MGN = cast<MaskedGatherSDNode>(N);
18043 const EVT VT = N->getValueType(0);
18044 SDValue Index = MGN->getIndex();
18045 SDValue ScaleOp = MGN->getScale();
18046 ISD::MemIndexType IndexType = MGN->getIndexType();
18047 assert(!MGN->isIndexScaled() &&
18048 "Scaled gather/scatter should not be formed");
18049
18050 SDLoc DL(N);
18051 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18052 return DAG.getMaskedGather(
18053 N->getVTList(), MGN->getMemoryVT(), DL,
18054 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18055 MGN->getBasePtr(), Index, ScaleOp},
18056 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18057
18058 if (narrowIndex(Index, IndexType, DAG))
18059 return DAG.getMaskedGather(
18060 N->getVTList(), MGN->getMemoryVT(), DL,
18061 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18062 MGN->getBasePtr(), Index, ScaleOp},
18063 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18064
18065 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
18066 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
18067 // The sequence will be XLenVT, not the type of Index. Tell
18068 // isSimpleVIDSequence this so we avoid overflow.
18069 if (std::optional<VIDSequence> SimpleVID =
18070 isSimpleVIDSequence(Index, Subtarget.getXLen());
18071 SimpleVID && SimpleVID->StepDenominator == 1) {
18072 const int64_t StepNumerator = SimpleVID->StepNumerator;
18073 const int64_t Addend = SimpleVID->Addend;
18074
18075 // Note: We don't need to check alignment here since (by assumption
18076 // from the existance of the gather), our offsets must be sufficiently
18077 // aligned.
18078
18079 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
18080 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
18081 assert(IndexType == ISD::UNSIGNED_SCALED);
18082 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
18083 DAG.getSignedConstant(Addend, DL, PtrVT));
18084
18085 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
18087 SDValue StridedLoad = DAG.getStridedLoadVP(
18088 VT, DL, MGN->getChain(), BasePtr,
18089 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
18090 EVL, MGN->getMemOperand());
18091 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
18092 StridedLoad, MGN->getPassThru(), EVL);
18093 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
18094 DL);
18095 }
18096 }
18097
18098 SmallVector<int> ShuffleMask;
18099 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18100 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
18101 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
18102 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
18103 MGN->getMask(), DAG.getUNDEF(VT),
18104 MGN->getMemoryVT(), MGN->getMemOperand(),
18106 SDValue Shuffle =
18107 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
18108 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
18109 }
18110
18111 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18112 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
18113 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
18114 SmallVector<SDValue> NewIndices;
18115 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
18116 NewIndices.push_back(Index.getOperand(i));
18117 EVT IndexVT = Index.getValueType()
18118 .getHalfNumVectorElementsVT(*DAG.getContext());
18119 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
18120
18121 unsigned ElementSize = VT.getScalarStoreSize();
18122 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
18123 auto EltCnt = VT.getVectorElementCount();
18124 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
18125 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
18126 EltCnt.divideCoefficientBy(2));
18127 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
18128 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
18129 EltCnt.divideCoefficientBy(2));
18130 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
18131
18132 SDValue Gather =
18133 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
18134 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
18135 Index, ScaleOp},
18136 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
18137 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
18138 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
18139 }
18140 break;
18141 }
18142 case ISD::MSCATTER:{
18143 const auto *MSN = cast<MaskedScatterSDNode>(N);
18144 SDValue Index = MSN->getIndex();
18145 SDValue ScaleOp = MSN->getScale();
18146 ISD::MemIndexType IndexType = MSN->getIndexType();
18147 assert(!MSN->isIndexScaled() &&
18148 "Scaled gather/scatter should not be formed");
18149
18150 SDLoc DL(N);
18151 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18152 return DAG.getMaskedScatter(
18153 N->getVTList(), MSN->getMemoryVT(), DL,
18154 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18155 Index, ScaleOp},
18156 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18157
18158 if (narrowIndex(Index, IndexType, DAG))
18159 return DAG.getMaskedScatter(
18160 N->getVTList(), MSN->getMemoryVT(), DL,
18161 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18162 Index, ScaleOp},
18163 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18164
18165 EVT VT = MSN->getValue()->getValueType(0);
18166 SmallVector<int> ShuffleMask;
18167 if (!MSN->isTruncatingStore() &&
18168 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
18169 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
18170 DAG.getUNDEF(VT), ShuffleMask);
18171 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
18172 DAG.getUNDEF(XLenVT), MSN->getMask(),
18173 MSN->getMemoryVT(), MSN->getMemOperand(),
18174 ISD::UNINDEXED, false);
18175 }
18176 break;
18177 }
18178 case ISD::VP_GATHER: {
18179 const auto *VPGN = cast<VPGatherSDNode>(N);
18180 SDValue Index = VPGN->getIndex();
18181 SDValue ScaleOp = VPGN->getScale();
18182 ISD::MemIndexType IndexType = VPGN->getIndexType();
18183 assert(!VPGN->isIndexScaled() &&
18184 "Scaled gather/scatter should not be formed");
18185
18186 SDLoc DL(N);
18187 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18188 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18189 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18190 ScaleOp, VPGN->getMask(),
18191 VPGN->getVectorLength()},
18192 VPGN->getMemOperand(), IndexType);
18193
18194 if (narrowIndex(Index, IndexType, DAG))
18195 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18196 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18197 ScaleOp, VPGN->getMask(),
18198 VPGN->getVectorLength()},
18199 VPGN->getMemOperand(), IndexType);
18200
18201 break;
18202 }
18203 case ISD::VP_SCATTER: {
18204 const auto *VPSN = cast<VPScatterSDNode>(N);
18205 SDValue Index = VPSN->getIndex();
18206 SDValue ScaleOp = VPSN->getScale();
18207 ISD::MemIndexType IndexType = VPSN->getIndexType();
18208 assert(!VPSN->isIndexScaled() &&
18209 "Scaled gather/scatter should not be formed");
18210
18211 SDLoc DL(N);
18212 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18213 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18214 {VPSN->getChain(), VPSN->getValue(),
18215 VPSN->getBasePtr(), Index, ScaleOp,
18216 VPSN->getMask(), VPSN->getVectorLength()},
18217 VPSN->getMemOperand(), IndexType);
18218
18219 if (narrowIndex(Index, IndexType, DAG))
18220 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18221 {VPSN->getChain(), VPSN->getValue(),
18222 VPSN->getBasePtr(), Index, ScaleOp,
18223 VPSN->getMask(), VPSN->getVectorLength()},
18224 VPSN->getMemOperand(), IndexType);
18225 break;
18226 }
18227 case RISCVISD::SHL_VL:
18228 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18229 return V;
18230 [[fallthrough]];
18231 case RISCVISD::SRA_VL:
18232 case RISCVISD::SRL_VL: {
18233 SDValue ShAmt = N->getOperand(1);
18235 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18236 SDLoc DL(N);
18237 SDValue VL = N->getOperand(4);
18238 EVT VT = N->getValueType(0);
18239 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18240 ShAmt.getOperand(1), VL);
18241 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
18242 N->getOperand(2), N->getOperand(3), N->getOperand(4));
18243 }
18244 break;
18245 }
18246 case ISD::SRA:
18247 if (SDValue V = performSRACombine(N, DAG, Subtarget))
18248 return V;
18249 [[fallthrough]];
18250 case ISD::SRL:
18251 case ISD::SHL: {
18252 if (N->getOpcode() == ISD::SHL) {
18253 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18254 return V;
18255 }
18256 SDValue ShAmt = N->getOperand(1);
18258 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18259 SDLoc DL(N);
18260 EVT VT = N->getValueType(0);
18261 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18262 ShAmt.getOperand(1),
18263 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18264 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
18265 }
18266 break;
18267 }
18268 case RISCVISD::ADD_VL:
18269 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18270 return V;
18271 return combineToVWMACC(N, DAG, Subtarget);
18276 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
18277 case RISCVISD::SUB_VL:
18278 case RISCVISD::MUL_VL:
18279 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18288 return performVFMADD_VLCombine(N, DCI, Subtarget);
18289 case RISCVISD::FADD_VL:
18290 case RISCVISD::FSUB_VL:
18291 case RISCVISD::FMUL_VL:
18294 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18295 case ISD::LOAD:
18296 case ISD::STORE: {
18297 if (DCI.isAfterLegalizeDAG())
18298 if (SDValue V = performMemPairCombine(N, DCI))
18299 return V;
18300
18301 if (N->getOpcode() != ISD::STORE)
18302 break;
18303
18304 auto *Store = cast<StoreSDNode>(N);
18305 SDValue Chain = Store->getChain();
18306 EVT MemVT = Store->getMemoryVT();
18307 SDValue Val = Store->getValue();
18308 SDLoc DL(N);
18309
18310 bool IsScalarizable =
18311 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
18312 Store->isSimple() &&
18313 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18314 isPowerOf2_64(MemVT.getSizeInBits()) &&
18315 MemVT.getSizeInBits() <= Subtarget.getXLen();
18316
18317 // If sufficiently aligned we can scalarize stores of constant vectors of
18318 // any power-of-two size up to XLen bits, provided that they aren't too
18319 // expensive to materialize.
18320 // vsetivli zero, 2, e8, m1, ta, ma
18321 // vmv.v.i v8, 4
18322 // vse64.v v8, (a0)
18323 // ->
18324 // li a1, 1028
18325 // sh a1, 0(a0)
18326 if (DCI.isBeforeLegalize() && IsScalarizable &&
18328 // Get the constant vector bits
18329 APInt NewC(Val.getValueSizeInBits(), 0);
18330 uint64_t EltSize = Val.getScalarValueSizeInBits();
18331 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18332 if (Val.getOperand(i).isUndef())
18333 continue;
18334 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18335 i * EltSize);
18336 }
18337 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18338
18339 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18340 true) <= 2 &&
18342 NewVT, *Store->getMemOperand())) {
18343 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
18344 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
18345 Store->getPointerInfo(), Store->getOriginalAlign(),
18346 Store->getMemOperand()->getFlags());
18347 }
18348 }
18349
18350 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18351 // vsetivli zero, 2, e16, m1, ta, ma
18352 // vle16.v v8, (a0)
18353 // vse16.v v8, (a1)
18354 if (auto *L = dyn_cast<LoadSDNode>(Val);
18355 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18356 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18357 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
18358 L->getMemoryVT() == MemVT) {
18359 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18361 NewVT, *Store->getMemOperand()) &&
18363 NewVT, *L->getMemOperand())) {
18364 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
18365 L->getPointerInfo(), L->getOriginalAlign(),
18366 L->getMemOperand()->getFlags());
18367 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
18368 Store->getPointerInfo(), Store->getOriginalAlign(),
18369 Store->getMemOperand()->getFlags());
18370 }
18371 }
18372
18373 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18374 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
18375 // any illegal types.
18376 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
18377 (DCI.isAfterLegalizeDAG() &&
18379 isNullConstant(Val.getOperand(1)))) {
18380 SDValue Src = Val.getOperand(0);
18381 MVT VecVT = Src.getSimpleValueType();
18382 // VecVT should be scalable and memory VT should match the element type.
18383 if (!Store->isIndexed() && VecVT.isScalableVector() &&
18384 MemVT == VecVT.getVectorElementType()) {
18385 SDLoc DL(N);
18386 MVT MaskVT = getMaskTypeFor(VecVT);
18387 return DAG.getStoreVP(
18388 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
18389 DAG.getConstant(1, DL, MaskVT),
18390 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
18391 Store->getMemOperand(), Store->getAddressingMode(),
18392 Store->isTruncatingStore(), /*IsCompress*/ false);
18393 }
18394 }
18395
18396 break;
18397 }
18398 case ISD::SPLAT_VECTOR: {
18399 EVT VT = N->getValueType(0);
18400 // Only perform this combine on legal MVT types.
18401 if (!isTypeLegal(VT))
18402 break;
18403 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
18404 DAG, Subtarget))
18405 return Gather;
18406 break;
18407 }
18408 case ISD::BUILD_VECTOR:
18409 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18410 return V;
18411 break;
18413 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18414 return V;
18415 break;
18417 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
18418 return V;
18419 break;
18421 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18422 return V;
18423 break;
18424 case RISCVISD::VFMV_V_F_VL: {
18425 const MVT VT = N->getSimpleValueType(0);
18426 SDValue Passthru = N->getOperand(0);
18427 SDValue Scalar = N->getOperand(1);
18428 SDValue VL = N->getOperand(2);
18429
18430 // If VL is 1, we can use vfmv.s.f.
18431 if (isOneConstant(VL))
18432 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
18433 break;
18434 }
18435 case RISCVISD::VMV_V_X_VL: {
18436 const MVT VT = N->getSimpleValueType(0);
18437 SDValue Passthru = N->getOperand(0);
18438 SDValue Scalar = N->getOperand(1);
18439 SDValue VL = N->getOperand(2);
18440
18441 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
18442 // scalar input.
18443 unsigned ScalarSize = Scalar.getValueSizeInBits();
18444 unsigned EltWidth = VT.getScalarSizeInBits();
18445 if (ScalarSize > EltWidth && Passthru.isUndef())
18446 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18447 return SDValue(N, 0);
18448
18449 // If VL is 1 and the scalar value won't benefit from immediate, we can
18450 // use vmv.s.x.
18451 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18452 if (isOneConstant(VL) &&
18453 (!Const || Const->isZero() ||
18454 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18455 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
18456
18457 break;
18458 }
18459 case RISCVISD::VFMV_S_F_VL: {
18460 SDValue Src = N->getOperand(1);
18461 // Try to remove vector->scalar->vector if the scalar->vector is inserting
18462 // into an undef vector.
18463 // TODO: Could use a vslide or vmv.v.v for non-undef.
18464 if (N->getOperand(0).isUndef() &&
18465 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18466 isNullConstant(Src.getOperand(1)) &&
18467 Src.getOperand(0).getValueType().isScalableVector()) {
18468 EVT VT = N->getValueType(0);
18469 EVT SrcVT = Src.getOperand(0).getValueType();
18471 // Widths match, just return the original vector.
18472 if (SrcVT == VT)
18473 return Src.getOperand(0);
18474 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
18475 }
18476 [[fallthrough]];
18477 }
18478 case RISCVISD::VMV_S_X_VL: {
18479 const MVT VT = N->getSimpleValueType(0);
18480 SDValue Passthru = N->getOperand(0);
18481 SDValue Scalar = N->getOperand(1);
18482 SDValue VL = N->getOperand(2);
18483
18484 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
18485 Scalar.getOperand(0).getValueType() == N->getValueType(0))
18486 return Scalar.getOperand(0);
18487
18488 // Use M1 or smaller to avoid over constraining register allocation
18489 const MVT M1VT = getLMUL1VT(VT);
18490 if (M1VT.bitsLT(VT)) {
18491 SDValue M1Passthru =
18492 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
18493 DAG.getVectorIdxConstant(0, DL));
18494 SDValue Result =
18495 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
18496 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
18497 DAG.getVectorIdxConstant(0, DL));
18498 return Result;
18499 }
18500
18501 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
18502 // higher would involve overly constraining the register allocator for
18503 // no purpose.
18504 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18505 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18506 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
18507 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
18508
18509 break;
18510 }
18511 case RISCVISD::VMV_X_S: {
18512 SDValue Vec = N->getOperand(0);
18513 MVT VecVT = N->getOperand(0).getSimpleValueType();
18514 const MVT M1VT = getLMUL1VT(VecVT);
18515 if (M1VT.bitsLT(VecVT)) {
18516 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
18517 DAG.getVectorIdxConstant(0, DL));
18518 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
18519 }
18520 break;
18521 }
18525 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
18526 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
18527 switch (IntNo) {
18528 // By default we do not combine any intrinsic.
18529 default:
18530 return SDValue();
18531 case Intrinsic::riscv_vcpop:
18532 case Intrinsic::riscv_vcpop_mask:
18533 case Intrinsic::riscv_vfirst:
18534 case Intrinsic::riscv_vfirst_mask: {
18535 SDValue VL = N->getOperand(2);
18536 if (IntNo == Intrinsic::riscv_vcpop_mask ||
18537 IntNo == Intrinsic::riscv_vfirst_mask)
18538 VL = N->getOperand(3);
18539 if (!isNullConstant(VL))
18540 return SDValue();
18541 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
18542 SDLoc DL(N);
18543 EVT VT = N->getValueType(0);
18544 if (IntNo == Intrinsic::riscv_vfirst ||
18545 IntNo == Intrinsic::riscv_vfirst_mask)
18546 return DAG.getAllOnesConstant(DL, VT);
18547 return DAG.getConstant(0, DL, VT);
18548 }
18549 }
18550 }
18551 case ISD::EXPERIMENTAL_VP_REVERSE:
18552 return performVP_REVERSECombine(N, DAG, Subtarget);
18553 case ISD::VP_STORE:
18554 return performVP_STORECombine(N, DAG, Subtarget);
18555 case ISD::BITCAST: {
18557 SDValue N0 = N->getOperand(0);
18558 EVT VT = N->getValueType(0);
18559 EVT SrcVT = N0.getValueType();
18560 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
18561 unsigned NF = VT.getRISCVVectorTupleNumFields();
18562 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
18563 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
18564 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
18565
18566 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
18567
18568 SDValue Result = DAG.getUNDEF(VT);
18569 for (unsigned i = 0; i < NF; ++i)
18570 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
18571 DAG.getVectorIdxConstant(i, DL));
18572 return Result;
18573 }
18574 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
18575 // type, widen both sides to avoid a trip through memory.
18576 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18577 VT.isScalarInteger()) {
18578 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
18579 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
18580 Ops[0] = N0;
18581 SDLoc DL(N);
18582 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
18583 N0 = DAG.getBitcast(MVT::i8, N0);
18584 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
18585 }
18586
18587 return SDValue();
18588 }
18589 case ISD::CTPOP:
18590 if (SDValue V = combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
18591 return V;
18592 break;
18593 }
18594
18595 return SDValue();
18596}
18597
18599 EVT XVT, unsigned KeptBits) const {
18600 // For vectors, we don't have a preference..
18601 if (XVT.isVector())
18602 return false;
18603
18604 if (XVT != MVT::i32 && XVT != MVT::i64)
18605 return false;
18606
18607 // We can use sext.w for RV64 or an srai 31 on RV32.
18608 if (KeptBits == 32 || KeptBits == 64)
18609 return true;
18610
18611 // With Zbb we can use sext.h/sext.b.
18612 return Subtarget.hasStdExtZbb() &&
18613 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
18614 KeptBits == 16);
18615}
18616
18618 const SDNode *N, CombineLevel Level) const {
18619 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
18620 N->getOpcode() == ISD::SRL) &&
18621 "Expected shift op");
18622
18623 // The following folds are only desirable if `(OP _, c1 << c2)` can be
18624 // materialised in fewer instructions than `(OP _, c1)`:
18625 //
18626 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
18627 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
18628 SDValue N0 = N->getOperand(0);
18629 EVT Ty = N0.getValueType();
18630
18631 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
18632 // LD/ST, it can still complete the folding optimization operation performed
18633 // above.
18634 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
18635 for (SDNode *Use : X->users()) {
18636 // This use is the one we're on right now. Skip it
18637 if (Use == User || Use->getOpcode() == ISD::SELECT)
18638 continue;
18639 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
18640 return false;
18641 }
18642 return true;
18643 };
18644
18645 if (Ty.isScalarInteger() &&
18646 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
18647 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
18648 return isUsedByLdSt(N0.getNode(), N);
18649
18650 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18651 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18652
18653 // Bail if we might break a sh{1,2,3}add pattern.
18654 if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
18655 C2->getZExtValue() <= 3 && N->hasOneUse() &&
18656 N->user_begin()->getOpcode() == ISD::ADD &&
18657 !isUsedByLdSt(*N->user_begin(), nullptr) &&
18658 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
18659 return false;
18660
18661 if (C1 && C2) {
18662 const APInt &C1Int = C1->getAPIntValue();
18663 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18664
18665 // We can materialise `c1 << c2` into an add immediate, so it's "free",
18666 // and the combine should happen, to potentially allow further combines
18667 // later.
18668 if (ShiftedC1Int.getSignificantBits() <= 64 &&
18669 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
18670 return true;
18671
18672 // We can materialise `c1` in an add immediate, so it's "free", and the
18673 // combine should be prevented.
18674 if (C1Int.getSignificantBits() <= 64 &&
18676 return false;
18677
18678 // Neither constant will fit into an immediate, so find materialisation
18679 // costs.
18680 int C1Cost =
18681 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
18682 /*CompressionCost*/ true);
18683 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
18684 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
18685 /*CompressionCost*/ true);
18686
18687 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
18688 // combine should be prevented.
18689 if (C1Cost < ShiftedC1Cost)
18690 return false;
18691 }
18692 }
18693
18694 if (!N0->hasOneUse())
18695 return false;
18696
18697 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18698 N0->getOperand(0)->getOpcode() == ISD::ADD &&
18699 !N0->getOperand(0)->hasOneUse())
18700 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
18701
18702 return true;
18703}
18704
18706 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
18707 TargetLoweringOpt &TLO) const {
18708 // Delay this optimization as late as possible.
18709 if (!TLO.LegalOps)
18710 return false;
18711
18712 EVT VT = Op.getValueType();
18713 if (VT.isVector())
18714 return false;
18715
18716 unsigned Opcode = Op.getOpcode();
18717 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
18718 return false;
18719
18720 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18721 if (!C)
18722 return false;
18723
18724 const APInt &Mask = C->getAPIntValue();
18725
18726 // Clear all non-demanded bits initially.
18727 APInt ShrunkMask = Mask & DemandedBits;
18728
18729 // Try to make a smaller immediate by setting undemanded bits.
18730
18731 APInt ExpandedMask = Mask | ~DemandedBits;
18732
18733 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
18734 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18735 };
18736 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
18737 if (NewMask == Mask)
18738 return true;
18739 SDLoc DL(Op);
18740 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
18741 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
18742 Op.getOperand(0), NewC);
18743 return TLO.CombineTo(Op, NewOp);
18744 };
18745
18746 // If the shrunk mask fits in sign extended 12 bits, let the target
18747 // independent code apply it.
18748 if (ShrunkMask.isSignedIntN(12))
18749 return false;
18750
18751 // And has a few special cases for zext.
18752 if (Opcode == ISD::AND) {
18753 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
18754 // otherwise use SLLI + SRLI.
18755 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
18756 if (IsLegalMask(NewMask))
18757 return UseMask(NewMask);
18758
18759 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
18760 if (VT == MVT::i64) {
18761 APInt NewMask = APInt(64, 0xffffffff);
18762 if (IsLegalMask(NewMask))
18763 return UseMask(NewMask);
18764 }
18765 }
18766
18767 // For the remaining optimizations, we need to be able to make a negative
18768 // number through a combination of mask and undemanded bits.
18769 if (!ExpandedMask.isNegative())
18770 return false;
18771
18772 // What is the fewest number of bits we need to represent the negative number.
18773 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18774
18775 // Try to make a 12 bit negative immediate. If that fails try to make a 32
18776 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
18777 // If we can't create a simm12, we shouldn't change opaque constants.
18778 APInt NewMask = ShrunkMask;
18779 if (MinSignedBits <= 12)
18780 NewMask.setBitsFrom(11);
18781 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18782 NewMask.setBitsFrom(31);
18783 else
18784 return false;
18785
18786 // Check that our new mask is a subset of the demanded mask.
18787 assert(IsLegalMask(NewMask));
18788 return UseMask(NewMask);
18789}
18790
18791static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
18792 static const uint64_t GREVMasks[] = {
18793 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18794 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18795
18796 for (unsigned Stage = 0; Stage != 6; ++Stage) {
18797 unsigned Shift = 1 << Stage;
18798 if (ShAmt & Shift) {
18799 uint64_t Mask = GREVMasks[Stage];
18800 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18801 if (IsGORC)
18802 Res |= x;
18803 x = Res;
18804 }
18805 }
18806
18807 return x;
18808}
18809
18811 KnownBits &Known,
18812 const APInt &DemandedElts,
18813 const SelectionDAG &DAG,
18814 unsigned Depth) const {
18815 unsigned BitWidth = Known.getBitWidth();
18816 unsigned Opc = Op.getOpcode();
18817 assert((Opc >= ISD::BUILTIN_OP_END ||
18818 Opc == ISD::INTRINSIC_WO_CHAIN ||
18819 Opc == ISD::INTRINSIC_W_CHAIN ||
18820 Opc == ISD::INTRINSIC_VOID) &&
18821 "Should use MaskedValueIsZero if you don't know whether Op"
18822 " is a target node!");
18823
18824 Known.resetAll();
18825 switch (Opc) {
18826 default: break;
18827 case RISCVISD::SELECT_CC: {
18828 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
18829 // If we don't know any bits, early out.
18830 if (Known.isUnknown())
18831 break;
18832 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
18833
18834 // Only known if known in both the LHS and RHS.
18835 Known = Known.intersectWith(Known2);
18836 break;
18837 }
18840 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18841 // Result is either all zero or operand 0. We can propagate zeros, but not
18842 // ones.
18843 Known.One.clearAllBits();
18844 break;
18845 case RISCVISD::REMUW: {
18846 KnownBits Known2;
18847 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18848 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18849 // We only care about the lower 32 bits.
18850 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
18851 // Restore the original width by sign extending.
18852 Known = Known.sext(BitWidth);
18853 break;
18854 }
18855 case RISCVISD::DIVUW: {
18856 KnownBits Known2;
18857 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18858 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18859 // We only care about the lower 32 bits.
18860 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
18861 // Restore the original width by sign extending.
18862 Known = Known.sext(BitWidth);
18863 break;
18864 }
18865 case RISCVISD::SLLW: {
18866 KnownBits Known2;
18867 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18868 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18869 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
18870 // Restore the original width by sign extending.
18871 Known = Known.sext(BitWidth);
18872 break;
18873 }
18874 case RISCVISD::CTZW: {
18875 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18876 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
18877 unsigned LowBits = llvm::bit_width(PossibleTZ);
18878 Known.Zero.setBitsFrom(LowBits);
18879 break;
18880 }
18881 case RISCVISD::CLZW: {
18882 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18883 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
18884 unsigned LowBits = llvm::bit_width(PossibleLZ);
18885 Known.Zero.setBitsFrom(LowBits);
18886 break;
18887 }
18888 case RISCVISD::BREV8:
18889 case RISCVISD::ORC_B: {
18890 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
18891 // control value of 7 is equivalent to brev8 and orc.b.
18892 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18893 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
18894 // To compute zeros, we need to invert the value and invert it back after.
18895 Known.Zero =
18896 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
18897 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
18898 break;
18899 }
18900 case RISCVISD::READ_VLENB: {
18901 // We can use the minimum and maximum VLEN values to bound VLENB. We
18902 // know VLEN must be a power of two.
18903 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
18904 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
18905 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
18906 Known.Zero.setLowBits(Log2_32(MinVLenB));
18907 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
18908 if (MaxVLenB == MinVLenB)
18909 Known.One.setBit(Log2_32(MinVLenB));
18910 break;
18911 }
18912 case RISCVISD::FCLASS: {
18913 // fclass will only set one of the low 10 bits.
18914 Known.Zero.setBitsFrom(10);
18915 break;
18916 }
18919 unsigned IntNo =
18920 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
18921 switch (IntNo) {
18922 default:
18923 // We can't do anything for most intrinsics.
18924 break;
18925 case Intrinsic::riscv_vsetvli:
18926 case Intrinsic::riscv_vsetvlimax: {
18927 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
18928 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
18929 RISCVII::VLMUL VLMUL =
18930 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
18931 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
18932 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
18933 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
18934 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
18935
18936 // Result of vsetvli must be not larger than AVL.
18937 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
18938 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
18939
18940 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
18941 if (BitWidth > KnownZeroFirstBit)
18942 Known.Zero.setBitsFrom(KnownZeroFirstBit);
18943 break;
18944 }
18945 }
18946 break;
18947 }
18948 }
18949}
18950
18952 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18953 unsigned Depth) const {
18954 switch (Op.getOpcode()) {
18955 default:
18956 break;
18957 case RISCVISD::SELECT_CC: {
18958 unsigned Tmp =
18959 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
18960 if (Tmp == 1) return 1; // Early out.
18961 unsigned Tmp2 =
18962 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
18963 return std::min(Tmp, Tmp2);
18964 }
18967 // Output is either all zero or operand 0. We can propagate sign bit count
18968 // from operand 0.
18969 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18970 case RISCVISD::ABSW: {
18971 // We expand this at isel to negw+max. The result will have 33 sign bits
18972 // if the input has at least 33 sign bits.
18973 unsigned Tmp =
18974 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18975 if (Tmp < 33) return 1;
18976 return 33;
18977 }
18978 case RISCVISD::SLLW:
18979 case RISCVISD::SRAW:
18980 case RISCVISD::SRLW:
18981 case RISCVISD::DIVW:
18982 case RISCVISD::DIVUW:
18983 case RISCVISD::REMUW:
18984 case RISCVISD::ROLW:
18985 case RISCVISD::RORW:
18990 // TODO: As the result is sign-extended, this is conservatively correct. A
18991 // more precise answer could be calculated for SRAW depending on known
18992 // bits in the shift amount.
18993 return 33;
18994 case RISCVISD::VMV_X_S: {
18995 // The number of sign bits of the scalar result is computed by obtaining the
18996 // element type of the input vector operand, subtracting its width from the
18997 // XLEN, and then adding one (sign bit within the element type). If the
18998 // element type is wider than XLen, the least-significant XLEN bits are
18999 // taken.
19000 unsigned XLen = Subtarget.getXLen();
19001 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
19002 if (EltBits <= XLen)
19003 return XLen - EltBits + 1;
19004 break;
19005 }
19007 unsigned IntNo = Op.getConstantOperandVal(1);
19008 switch (IntNo) {
19009 default:
19010 break;
19011 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
19012 case Intrinsic::riscv_masked_atomicrmw_add_i64:
19013 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
19014 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
19015 case Intrinsic::riscv_masked_atomicrmw_max_i64:
19016 case Intrinsic::riscv_masked_atomicrmw_min_i64:
19017 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
19018 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
19019 case Intrinsic::riscv_masked_cmpxchg_i64:
19020 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
19021 // narrow atomic operation. These are implemented using atomic
19022 // operations at the minimum supported atomicrmw/cmpxchg width whose
19023 // result is then sign extended to XLEN. With +A, the minimum width is
19024 // 32 for both 64 and 32.
19025 assert(Subtarget.getXLen() == 64);
19027 assert(Subtarget.hasStdExtA());
19028 return 33;
19029 }
19030 break;
19031 }
19032 }
19033
19034 return 1;
19035}
19036
19038 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
19039 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
19040
19041 // TODO: Add more target nodes.
19042 switch (Op.getOpcode()) {
19044 // Integer select_cc cannot create poison.
19045 // TODO: What are the FP poison semantics?
19046 // TODO: This instruction blocks poison from the unselected operand, can
19047 // we do anything with that?
19048 return !Op.getValueType().isInteger();
19049 }
19051 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
19052}
19053
19054const Constant *
19056 assert(Ld && "Unexpected null LoadSDNode");
19057 if (!ISD::isNormalLoad(Ld))
19058 return nullptr;
19059
19060 SDValue Ptr = Ld->getBasePtr();
19061
19062 // Only constant pools with no offset are supported.
19063 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
19064 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
19065 if (!CNode || CNode->isMachineConstantPoolEntry() ||
19066 CNode->getOffset() != 0)
19067 return nullptr;
19068
19069 return CNode;
19070 };
19071
19072 // Simple case, LLA.
19073 if (Ptr.getOpcode() == RISCVISD::LLA) {
19074 auto *CNode = GetSupportedConstantPool(Ptr);
19075 if (!CNode || CNode->getTargetFlags() != 0)
19076 return nullptr;
19077
19078 return CNode->getConstVal();
19079 }
19080
19081 // Look for a HI and ADD_LO pair.
19082 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
19083 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
19084 return nullptr;
19085
19086 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
19087 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
19088
19089 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
19090 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
19091 return nullptr;
19092
19093 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
19094 return nullptr;
19095
19096 return CNodeLo->getConstVal();
19097}
19098
19100 MachineBasicBlock *BB) {
19101 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
19102
19103 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
19104 // Should the count have wrapped while it was being read, we need to try
19105 // again.
19106 // For example:
19107 // ```
19108 // read:
19109 // csrrs x3, counterh # load high word of counter
19110 // csrrs x2, counter # load low word of counter
19111 // csrrs x4, counterh # load high word of counter
19112 // bne x3, x4, read # check if high word reads match, otherwise try again
19113 // ```
19114
19115 MachineFunction &MF = *BB->getParent();
19116 const BasicBlock *LLVMBB = BB->getBasicBlock();
19118
19119 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
19120 MF.insert(It, LoopMBB);
19121
19122 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
19123 MF.insert(It, DoneMBB);
19124
19125 // Transfer the remainder of BB and its successor edges to DoneMBB.
19126 DoneMBB->splice(DoneMBB->begin(), BB,
19127 std::next(MachineBasicBlock::iterator(MI)), BB->end());
19129
19130 BB->addSuccessor(LoopMBB);
19131
19133 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19134 Register LoReg = MI.getOperand(0).getReg();
19135 Register HiReg = MI.getOperand(1).getReg();
19136 int64_t LoCounter = MI.getOperand(2).getImm();
19137 int64_t HiCounter = MI.getOperand(3).getImm();
19138 DebugLoc DL = MI.getDebugLoc();
19139
19141 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
19142 .addImm(HiCounter)
19143 .addReg(RISCV::X0);
19144 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
19145 .addImm(LoCounter)
19146 .addReg(RISCV::X0);
19147 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
19148 .addImm(HiCounter)
19149 .addReg(RISCV::X0);
19150
19151 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
19152 .addReg(HiReg)
19153 .addReg(ReadAgainReg)
19154 .addMBB(LoopMBB);
19155
19156 LoopMBB->addSuccessor(LoopMBB);
19157 LoopMBB->addSuccessor(DoneMBB);
19158
19159 MI.eraseFromParent();
19160
19161 return DoneMBB;
19162}
19163
19166 const RISCVSubtarget &Subtarget) {
19167 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
19168
19169 MachineFunction &MF = *BB->getParent();
19170 DebugLoc DL = MI.getDebugLoc();
19173 Register LoReg = MI.getOperand(0).getReg();
19174 Register HiReg = MI.getOperand(1).getReg();
19175 Register SrcReg = MI.getOperand(2).getReg();
19176
19177 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
19178 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19179
19180 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
19181 RI, Register());
19183 MachineMemOperand *MMOLo =
19187 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
19188 .addFrameIndex(FI)
19189 .addImm(0)
19190 .addMemOperand(MMOLo);
19191 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
19192 .addFrameIndex(FI)
19193 .addImm(4)
19194 .addMemOperand(MMOHi);
19195 MI.eraseFromParent(); // The pseudo instruction is gone now.
19196 return BB;
19197}
19198
19201 const RISCVSubtarget &Subtarget) {
19202 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
19203 "Unexpected instruction");
19204
19205 MachineFunction &MF = *BB->getParent();
19206 DebugLoc DL = MI.getDebugLoc();
19209 Register DstReg = MI.getOperand(0).getReg();
19210 Register LoReg = MI.getOperand(1).getReg();
19211 Register HiReg = MI.getOperand(2).getReg();
19212
19213 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
19214 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19215
19217 MachineMemOperand *MMOLo =
19221 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19222 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
19223 .addFrameIndex(FI)
19224 .addImm(0)
19225 .addMemOperand(MMOLo);
19226 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19227 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
19228 .addFrameIndex(FI)
19229 .addImm(4)
19230 .addMemOperand(MMOHi);
19231 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
19232 MI.eraseFromParent(); // The pseudo instruction is gone now.
19233 return BB;
19234}
19235
19237 switch (MI.getOpcode()) {
19238 default:
19239 return false;
19240 case RISCV::Select_GPR_Using_CC_GPR:
19241 case RISCV::Select_GPR_Using_CC_Imm:
19242 case RISCV::Select_FPR16_Using_CC_GPR:
19243 case RISCV::Select_FPR16INX_Using_CC_GPR:
19244 case RISCV::Select_FPR32_Using_CC_GPR:
19245 case RISCV::Select_FPR32INX_Using_CC_GPR:
19246 case RISCV::Select_FPR64_Using_CC_GPR:
19247 case RISCV::Select_FPR64INX_Using_CC_GPR:
19248 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19249 return true;
19250 }
19251}
19252
19254 unsigned RelOpcode, unsigned EqOpcode,
19255 const RISCVSubtarget &Subtarget) {
19256 DebugLoc DL = MI.getDebugLoc();
19257 Register DstReg = MI.getOperand(0).getReg();
19258 Register Src1Reg = MI.getOperand(1).getReg();
19259 Register Src2Reg = MI.getOperand(2).getReg();
19261 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19263
19264 // Save the current FFLAGS.
19265 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
19266
19267 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
19268 .addReg(Src1Reg)
19269 .addReg(Src2Reg);
19272
19273 // Restore the FFLAGS.
19274 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19275 .addReg(SavedFFlags, RegState::Kill);
19276
19277 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19278 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
19279 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
19280 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
19283
19284 // Erase the pseudoinstruction.
19285 MI.eraseFromParent();
19286 return BB;
19287}
19288
19289static MachineBasicBlock *
19291 MachineBasicBlock *ThisMBB,
19292 const RISCVSubtarget &Subtarget) {
19293 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19294 // Without this, custom-inserter would have generated:
19295 //
19296 // A
19297 // | \
19298 // | B
19299 // | /
19300 // C
19301 // | \
19302 // | D
19303 // | /
19304 // E
19305 //
19306 // A: X = ...; Y = ...
19307 // B: empty
19308 // C: Z = PHI [X, A], [Y, B]
19309 // D: empty
19310 // E: PHI [X, C], [Z, D]
19311 //
19312 // If we lower both Select_FPRX_ in a single step, we can instead generate:
19313 //
19314 // A
19315 // | \
19316 // | C
19317 // | /|
19318 // |/ |
19319 // | |
19320 // | D
19321 // | /
19322 // E
19323 //
19324 // A: X = ...; Y = ...
19325 // D: empty
19326 // E: PHI [X, A], [X, C], [Y, D]
19327
19328 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19329 const DebugLoc &DL = First.getDebugLoc();
19330 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19331 MachineFunction *F = ThisMBB->getParent();
19332 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
19333 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
19334 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
19335 MachineFunction::iterator It = ++ThisMBB->getIterator();
19336 F->insert(It, FirstMBB);
19337 F->insert(It, SecondMBB);
19338 F->insert(It, SinkMBB);
19339
19340 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19341 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19343 ThisMBB->end());
19344 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19345
19346 // Fallthrough block for ThisMBB.
19347 ThisMBB->addSuccessor(FirstMBB);
19348 // Fallthrough block for FirstMBB.
19349 FirstMBB->addSuccessor(SecondMBB);
19350 ThisMBB->addSuccessor(SinkMBB);
19351 FirstMBB->addSuccessor(SinkMBB);
19352 // This is fallthrough.
19353 SecondMBB->addSuccessor(SinkMBB);
19354
19355 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19356 Register FLHS = First.getOperand(1).getReg();
19357 Register FRHS = First.getOperand(2).getReg();
19358 // Insert appropriate branch.
19359 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
19360 .addReg(FLHS)
19361 .addReg(FRHS)
19362 .addMBB(SinkMBB);
19363
19364 Register SLHS = Second.getOperand(1).getReg();
19365 Register SRHS = Second.getOperand(2).getReg();
19366 Register Op1Reg4 = First.getOperand(4).getReg();
19367 Register Op1Reg5 = First.getOperand(5).getReg();
19368
19369 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19370 // Insert appropriate branch.
19371 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
19372 .addReg(SLHS)
19373 .addReg(SRHS)
19374 .addMBB(SinkMBB);
19375
19376 Register DestReg = Second.getOperand(0).getReg();
19377 Register Op2Reg4 = Second.getOperand(4).getReg();
19378 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
19379 .addReg(Op2Reg4)
19380 .addMBB(ThisMBB)
19381 .addReg(Op1Reg4)
19382 .addMBB(FirstMBB)
19383 .addReg(Op1Reg5)
19384 .addMBB(SecondMBB);
19385
19386 // Now remove the Select_FPRX_s.
19387 First.eraseFromParent();
19388 Second.eraseFromParent();
19389 return SinkMBB;
19390}
19391
19394 const RISCVSubtarget &Subtarget) {
19395 // To "insert" Select_* instructions, we actually have to insert the triangle
19396 // control-flow pattern. The incoming instructions know the destination vreg
19397 // to set, the condition code register to branch on, the true/false values to
19398 // select between, and the condcode to use to select the appropriate branch.
19399 //
19400 // We produce the following control flow:
19401 // HeadMBB
19402 // | \
19403 // | IfFalseMBB
19404 // | /
19405 // TailMBB
19406 //
19407 // When we find a sequence of selects we attempt to optimize their emission
19408 // by sharing the control flow. Currently we only handle cases where we have
19409 // multiple selects with the exact same condition (same LHS, RHS and CC).
19410 // The selects may be interleaved with other instructions if the other
19411 // instructions meet some requirements we deem safe:
19412 // - They are not pseudo instructions.
19413 // - They are debug instructions. Otherwise,
19414 // - They do not have side-effects, do not access memory and their inputs do
19415 // not depend on the results of the select pseudo-instructions.
19416 // The TrueV/FalseV operands of the selects cannot depend on the result of
19417 // previous selects in the sequence.
19418 // These conditions could be further relaxed. See the X86 target for a
19419 // related approach and more information.
19420 //
19421 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19422 // is checked here and handled by a separate function -
19423 // EmitLoweredCascadedSelect.
19424
19425 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
19426 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19427 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19428 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
19429 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
19430 Next->getOperand(5).isKill())
19431 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
19432
19433 Register LHS = MI.getOperand(1).getReg();
19434 Register RHS;
19435 if (MI.getOperand(2).isReg())
19436 RHS = MI.getOperand(2).getReg();
19437 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
19438
19439 SmallVector<MachineInstr *, 4> SelectDebugValues;
19440 SmallSet<Register, 4> SelectDests;
19441 SelectDests.insert(MI.getOperand(0).getReg());
19442
19443 MachineInstr *LastSelectPseudo = &MI;
19444 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
19445 SequenceMBBI != E; ++SequenceMBBI) {
19446 if (SequenceMBBI->isDebugInstr())
19447 continue;
19448 if (isSelectPseudo(*SequenceMBBI)) {
19449 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
19450 !SequenceMBBI->getOperand(2).isReg() ||
19451 SequenceMBBI->getOperand(2).getReg() != RHS ||
19452 SequenceMBBI->getOperand(3).getImm() != CC ||
19453 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
19454 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
19455 break;
19456 LastSelectPseudo = &*SequenceMBBI;
19457 SequenceMBBI->collectDebugValues(SelectDebugValues);
19458 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
19459 continue;
19460 }
19461 if (SequenceMBBI->hasUnmodeledSideEffects() ||
19462 SequenceMBBI->mayLoadOrStore() ||
19463 SequenceMBBI->usesCustomInsertionHook())
19464 break;
19465 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
19466 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19467 }))
19468 break;
19469 }
19470
19471 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19472 const BasicBlock *LLVM_BB = BB->getBasicBlock();
19473 DebugLoc DL = MI.getDebugLoc();
19475
19476 MachineBasicBlock *HeadMBB = BB;
19477 MachineFunction *F = BB->getParent();
19478 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
19479 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
19480
19481 F->insert(I, IfFalseMBB);
19482 F->insert(I, TailMBB);
19483
19484 // Set the call frame size on entry to the new basic blocks.
19485 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
19486 IfFalseMBB->setCallFrameSize(CallFrameSize);
19487 TailMBB->setCallFrameSize(CallFrameSize);
19488
19489 // Transfer debug instructions associated with the selects to TailMBB.
19490 for (MachineInstr *DebugInstr : SelectDebugValues) {
19491 TailMBB->push_back(DebugInstr->removeFromParent());
19492 }
19493
19494 // Move all instructions after the sequence to TailMBB.
19495 TailMBB->splice(TailMBB->end(), HeadMBB,
19496 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
19497 // Update machine-CFG edges by transferring all successors of the current
19498 // block to the new block which will contain the Phi nodes for the selects.
19499 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
19500 // Set the successors for HeadMBB.
19501 HeadMBB->addSuccessor(IfFalseMBB);
19502 HeadMBB->addSuccessor(TailMBB);
19503
19504 // Insert appropriate branch.
19505 if (MI.getOperand(2).isImm())
19506 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
19507 .addReg(LHS)
19508 .addImm(MI.getOperand(2).getImm())
19509 .addMBB(TailMBB);
19510 else
19511 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
19512 .addReg(LHS)
19513 .addReg(RHS)
19514 .addMBB(TailMBB);
19515
19516 // IfFalseMBB just falls through to TailMBB.
19517 IfFalseMBB->addSuccessor(TailMBB);
19518
19519 // Create PHIs for all of the select pseudo-instructions.
19520 auto SelectMBBI = MI.getIterator();
19521 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
19522 auto InsertionPoint = TailMBB->begin();
19523 while (SelectMBBI != SelectEnd) {
19524 auto Next = std::next(SelectMBBI);
19525 if (isSelectPseudo(*SelectMBBI)) {
19526 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
19527 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
19528 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19529 .addReg(SelectMBBI->getOperand(4).getReg())
19530 .addMBB(HeadMBB)
19531 .addReg(SelectMBBI->getOperand(5).getReg())
19532 .addMBB(IfFalseMBB);
19533 SelectMBBI->eraseFromParent();
19534 }
19535 SelectMBBI = Next;
19536 }
19537
19538 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
19539 return TailMBB;
19540}
19541
19542// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
19543static const RISCV::RISCVMaskedPseudoInfo *
19544lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
19546 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19547 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
19549 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
19550 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
19551 return Masked;
19552}
19553
19556 unsigned CVTXOpc) {
19557 DebugLoc DL = MI.getDebugLoc();
19558
19560
19562 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19563
19564 // Save the old value of FFLAGS.
19565 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
19566
19567 assert(MI.getNumOperands() == 7);
19568
19569 // Emit a VFCVT_X_F
19570 const TargetRegisterInfo *TRI =
19572 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
19573 Register Tmp = MRI.createVirtualRegister(RC);
19574 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
19575 .add(MI.getOperand(1))
19576 .add(MI.getOperand(2))
19577 .add(MI.getOperand(3))
19578 .add(MachineOperand::CreateImm(7)) // frm = DYN
19579 .add(MI.getOperand(4))
19580 .add(MI.getOperand(5))
19581 .add(MI.getOperand(6))
19582 .add(MachineOperand::CreateReg(RISCV::FRM,
19583 /*IsDef*/ false,
19584 /*IsImp*/ true));
19585
19586 // Emit a VFCVT_F_X
19587 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
19588 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
19589 // There is no E8 variant for VFCVT_F_X.
19590 assert(Log2SEW >= 4);
19591 unsigned CVTFOpc =
19592 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
19593 ->MaskedPseudo;
19594
19595 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
19596 .add(MI.getOperand(0))
19597 .add(MI.getOperand(1))
19598 .addReg(Tmp)
19599 .add(MI.getOperand(3))
19600 .add(MachineOperand::CreateImm(7)) // frm = DYN
19601 .add(MI.getOperand(4))
19602 .add(MI.getOperand(5))
19603 .add(MI.getOperand(6))
19604 .add(MachineOperand::CreateReg(RISCV::FRM,
19605 /*IsDef*/ false,
19606 /*IsImp*/ true));
19607
19608 // Restore FFLAGS.
19609 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19610 .addReg(SavedFFLAGS, RegState::Kill);
19611
19612 // Erase the pseudoinstruction.
19613 MI.eraseFromParent();
19614 return BB;
19615}
19616
19618 const RISCVSubtarget &Subtarget) {
19619 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19620 const TargetRegisterClass *RC;
19621 switch (MI.getOpcode()) {
19622 default:
19623 llvm_unreachable("Unexpected opcode");
19624 case RISCV::PseudoFROUND_H:
19625 CmpOpc = RISCV::FLT_H;
19626 F2IOpc = RISCV::FCVT_W_H;
19627 I2FOpc = RISCV::FCVT_H_W;
19628 FSGNJOpc = RISCV::FSGNJ_H;
19629 FSGNJXOpc = RISCV::FSGNJX_H;
19630 RC = &RISCV::FPR16RegClass;
19631 break;
19632 case RISCV::PseudoFROUND_H_INX:
19633 CmpOpc = RISCV::FLT_H_INX;
19634 F2IOpc = RISCV::FCVT_W_H_INX;
19635 I2FOpc = RISCV::FCVT_H_W_INX;
19636 FSGNJOpc = RISCV::FSGNJ_H_INX;
19637 FSGNJXOpc = RISCV::FSGNJX_H_INX;
19638 RC = &RISCV::GPRF16RegClass;
19639 break;
19640 case RISCV::PseudoFROUND_S:
19641 CmpOpc = RISCV::FLT_S;
19642 F2IOpc = RISCV::FCVT_W_S;
19643 I2FOpc = RISCV::FCVT_S_W;
19644 FSGNJOpc = RISCV::FSGNJ_S;
19645 FSGNJXOpc = RISCV::FSGNJX_S;
19646 RC = &RISCV::FPR32RegClass;
19647 break;
19648 case RISCV::PseudoFROUND_S_INX:
19649 CmpOpc = RISCV::FLT_S_INX;
19650 F2IOpc = RISCV::FCVT_W_S_INX;
19651 I2FOpc = RISCV::FCVT_S_W_INX;
19652 FSGNJOpc = RISCV::FSGNJ_S_INX;
19653 FSGNJXOpc = RISCV::FSGNJX_S_INX;
19654 RC = &RISCV::GPRF32RegClass;
19655 break;
19656 case RISCV::PseudoFROUND_D:
19657 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19658 CmpOpc = RISCV::FLT_D;
19659 F2IOpc = RISCV::FCVT_L_D;
19660 I2FOpc = RISCV::FCVT_D_L;
19661 FSGNJOpc = RISCV::FSGNJ_D;
19662 FSGNJXOpc = RISCV::FSGNJX_D;
19663 RC = &RISCV::FPR64RegClass;
19664 break;
19665 case RISCV::PseudoFROUND_D_INX:
19666 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19667 CmpOpc = RISCV::FLT_D_INX;
19668 F2IOpc = RISCV::FCVT_L_D_INX;
19669 I2FOpc = RISCV::FCVT_D_L_INX;
19670 FSGNJOpc = RISCV::FSGNJ_D_INX;
19671 FSGNJXOpc = RISCV::FSGNJX_D_INX;
19672 RC = &RISCV::GPRRegClass;
19673 break;
19674 }
19675
19676 const BasicBlock *BB = MBB->getBasicBlock();
19677 DebugLoc DL = MI.getDebugLoc();
19679
19681 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
19682 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
19683
19684 F->insert(I, CvtMBB);
19685 F->insert(I, DoneMBB);
19686 // Move all instructions after the sequence to DoneMBB.
19687 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
19688 MBB->end());
19689 // Update machine-CFG edges by transferring all successors of the current
19690 // block to the new block which will contain the Phi nodes for the selects.
19692 // Set the successors for MBB.
19693 MBB->addSuccessor(CvtMBB);
19694 MBB->addSuccessor(DoneMBB);
19695
19696 Register DstReg = MI.getOperand(0).getReg();
19697 Register SrcReg = MI.getOperand(1).getReg();
19698 Register MaxReg = MI.getOperand(2).getReg();
19699 int64_t FRM = MI.getOperand(3).getImm();
19700
19701 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19703
19704 Register FabsReg = MRI.createVirtualRegister(RC);
19705 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
19706
19707 // Compare the FP value to the max value.
19708 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19709 auto MIB =
19710 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
19713
19714 // Insert branch.
19715 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
19716 .addReg(CmpReg)
19717 .addReg(RISCV::X0)
19718 .addMBB(DoneMBB);
19719
19720 CvtMBB->addSuccessor(DoneMBB);
19721
19722 // Convert to integer.
19723 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19724 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
19727
19728 // Convert back to FP.
19729 Register I2FReg = MRI.createVirtualRegister(RC);
19730 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
19733
19734 // Restore the sign bit.
19735 Register CvtReg = MRI.createVirtualRegister(RC);
19736 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
19737
19738 // Merge the results.
19739 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
19740 .addReg(SrcReg)
19741 .addMBB(MBB)
19742 .addReg(CvtReg)
19743 .addMBB(CvtMBB);
19744
19745 MI.eraseFromParent();
19746 return DoneMBB;
19747}
19748
19751 MachineBasicBlock *BB) const {
19752 switch (MI.getOpcode()) {
19753 default:
19754 llvm_unreachable("Unexpected instr type to insert");
19755 case RISCV::ReadCounterWide:
19756 assert(!Subtarget.is64Bit() &&
19757 "ReadCounterWide is only to be used on riscv32");
19758 return emitReadCounterWidePseudo(MI, BB);
19759 case RISCV::Select_GPR_Using_CC_GPR:
19760 case RISCV::Select_GPR_Using_CC_Imm:
19761 case RISCV::Select_FPR16_Using_CC_GPR:
19762 case RISCV::Select_FPR16INX_Using_CC_GPR:
19763 case RISCV::Select_FPR32_Using_CC_GPR:
19764 case RISCV::Select_FPR32INX_Using_CC_GPR:
19765 case RISCV::Select_FPR64_Using_CC_GPR:
19766 case RISCV::Select_FPR64INX_Using_CC_GPR:
19767 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19768 return emitSelectPseudo(MI, BB, Subtarget);
19769 case RISCV::BuildPairF64Pseudo:
19770 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
19771 case RISCV::SplitF64Pseudo:
19772 return emitSplitF64Pseudo(MI, BB, Subtarget);
19773 case RISCV::PseudoQuietFLE_H:
19774 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19775 case RISCV::PseudoQuietFLE_H_INX:
19776 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19777 case RISCV::PseudoQuietFLT_H:
19778 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19779 case RISCV::PseudoQuietFLT_H_INX:
19780 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19781 case RISCV::PseudoQuietFLE_S:
19782 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19783 case RISCV::PseudoQuietFLE_S_INX:
19784 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19785 case RISCV::PseudoQuietFLT_S:
19786 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19787 case RISCV::PseudoQuietFLT_S_INX:
19788 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19789 case RISCV::PseudoQuietFLE_D:
19790 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19791 case RISCV::PseudoQuietFLE_D_INX:
19792 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19793 case RISCV::PseudoQuietFLE_D_IN32X:
19794 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19795 Subtarget);
19796 case RISCV::PseudoQuietFLT_D:
19797 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19798 case RISCV::PseudoQuietFLT_D_INX:
19799 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19800 case RISCV::PseudoQuietFLT_D_IN32X:
19801 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19802 Subtarget);
19803
19804 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19805 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19806 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19807 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19808 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19809 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19810 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19811 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19812 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19813 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19814 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19815 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19816 case RISCV::PseudoFROUND_H:
19817 case RISCV::PseudoFROUND_H_INX:
19818 case RISCV::PseudoFROUND_S:
19819 case RISCV::PseudoFROUND_S_INX:
19820 case RISCV::PseudoFROUND_D:
19821 case RISCV::PseudoFROUND_D_INX:
19822 case RISCV::PseudoFROUND_D_IN32X:
19823 return emitFROUND(MI, BB, Subtarget);
19824 case RISCV::PROBED_STACKALLOC_DYN:
19825 return emitDynamicProbedAlloc(MI, BB);
19826 case TargetOpcode::STATEPOINT:
19827 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
19828 // while jal call instruction (where statepoint will be lowered at the end)
19829 // has implicit def. This def is early-clobber as it will be set at
19830 // the moment of the call and earlier than any use is read.
19831 // Add this implicit dead def here as a workaround.
19832 MI.addOperand(*MI.getMF(),
19834 RISCV::X1, /*isDef*/ true,
19835 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
19836 /*isUndef*/ false, /*isEarlyClobber*/ true));
19837 [[fallthrough]];
19838 case TargetOpcode::STACKMAP:
19839 case TargetOpcode::PATCHPOINT:
19840 if (!Subtarget.is64Bit())
19841 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
19842 "supported on 64-bit targets");
19843 return emitPatchPoint(MI, BB);
19844 }
19845}
19846
19848 SDNode *Node) const {
19849 // Add FRM dependency to any instructions with dynamic rounding mode.
19850 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
19851 if (Idx < 0) {
19852 // Vector pseudos have FRM index indicated by TSFlags.
19853 Idx = RISCVII::getFRMOpNum(MI.getDesc());
19854 if (Idx < 0)
19855 return;
19856 }
19857 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
19858 return;
19859 // If the instruction already reads FRM, don't add another read.
19860 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
19861 return;
19862 MI.addOperand(
19863 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
19864}
19865
19866void RISCVTargetLowering::analyzeInputArgs(
19867 MachineFunction &MF, CCState &CCInfo,
19868 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19869 RISCVCCAssignFn Fn) const {
19870 unsigned NumArgs = Ins.size();
19872
19873 for (unsigned i = 0; i != NumArgs; ++i) {
19874 MVT ArgVT = Ins[i].VT;
19875 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19876
19877 Type *ArgTy = nullptr;
19878 if (IsRet)
19879 ArgTy = FType->getReturnType();
19880 else if (Ins[i].isOrigArg())
19881 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19882
19883 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19884 /*IsFixed=*/true, IsRet, ArgTy)) {
19885 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19886 << ArgVT << '\n');
19887 llvm_unreachable(nullptr);
19888 }
19889 }
19890}
19891
19892void RISCVTargetLowering::analyzeOutputArgs(
19893 MachineFunction &MF, CCState &CCInfo,
19894 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19895 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19896 unsigned NumArgs = Outs.size();
19897
19898 for (unsigned i = 0; i != NumArgs; i++) {
19899 MVT ArgVT = Outs[i].VT;
19900 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19901 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19902
19903 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19904 Outs[i].IsFixed, IsRet, OrigTy)) {
19905 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19906 << ArgVT << "\n");
19907 llvm_unreachable(nullptr);
19908 }
19909 }
19910}
19911
19912// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19913// values.
19915 const CCValAssign &VA, const SDLoc &DL,
19916 const RISCVSubtarget &Subtarget) {
19917 if (VA.needsCustom()) {
19918 if (VA.getLocVT().isInteger() &&
19919 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19920 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
19921 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
19922 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
19924 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
19925 llvm_unreachable("Unexpected Custom handling.");
19926 }
19927
19928 switch (VA.getLocInfo()) {
19929 default:
19930 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19931 case CCValAssign::Full:
19932 break;
19933 case CCValAssign::BCvt:
19934 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
19935 break;
19936 }
19937 return Val;
19938}
19939
19940// The caller is responsible for loading the full value if the argument is
19941// passed with CCValAssign::Indirect.
19943 const CCValAssign &VA, const SDLoc &DL,
19944 const ISD::InputArg &In,
19945 const RISCVTargetLowering &TLI) {
19948 EVT LocVT = VA.getLocVT();
19949 SDValue Val;
19950 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
19951 Register VReg = RegInfo.createVirtualRegister(RC);
19952 RegInfo.addLiveIn(VA.getLocReg(), VReg);
19953 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
19954
19955 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
19956 if (In.isOrigArg()) {
19957 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
19958 if (OrigArg->getType()->isIntegerTy()) {
19959 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
19960 // An input zero extended from i31 can also be considered sign extended.
19961 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
19962 (BitWidth < 32 && In.Flags.isZExt())) {
19964 RVFI->addSExt32Register(VReg);
19965 }
19966 }
19967 }
19968
19970 return Val;
19971
19972 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
19973}
19974
19976 const CCValAssign &VA, const SDLoc &DL,
19977 const RISCVSubtarget &Subtarget) {
19978 EVT LocVT = VA.getLocVT();
19979
19980 if (VA.needsCustom()) {
19981 if (LocVT.isInteger() &&
19982 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19983 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19984 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
19985 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19986 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19987 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
19988 llvm_unreachable("Unexpected Custom handling.");
19989 }
19990
19991 switch (VA.getLocInfo()) {
19992 default:
19993 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19994 case CCValAssign::Full:
19995 break;
19996 case CCValAssign::BCvt:
19997 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19998 break;
19999 }
20000 return Val;
20001}
20002
20003// The caller is responsible for loading the full value if the argument is
20004// passed with CCValAssign::Indirect.
20006 const CCValAssign &VA, const SDLoc &DL) {
20008 MachineFrameInfo &MFI = MF.getFrameInfo();
20009 EVT LocVT = VA.getLocVT();
20010 EVT ValVT = VA.getValVT();
20012 if (VA.getLocInfo() == CCValAssign::Indirect) {
20013 // When the value is a scalable vector, we save the pointer which points to
20014 // the scalable vector value in the stack. The ValVT will be the pointer
20015 // type, instead of the scalable vector type.
20016 ValVT = LocVT;
20017 }
20018 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
20019 /*IsImmutable=*/true);
20020 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20021 SDValue Val;
20022
20024 switch (VA.getLocInfo()) {
20025 default:
20026 llvm_unreachable("Unexpected CCValAssign::LocInfo");
20027 case CCValAssign::Full:
20029 case CCValAssign::BCvt:
20030 break;
20031 }
20032 Val = DAG.getExtLoad(
20033 ExtType, DL, LocVT, Chain, FIN,
20035 return Val;
20036}
20037
20039 const CCValAssign &VA,
20040 const CCValAssign &HiVA,
20041 const SDLoc &DL) {
20042 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
20043 "Unexpected VA");
20045 MachineFrameInfo &MFI = MF.getFrameInfo();
20047
20048 assert(VA.isRegLoc() && "Expected register VA assignment");
20049
20050 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20051 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
20052 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
20053 SDValue Hi;
20054 if (HiVA.isMemLoc()) {
20055 // Second half of f64 is passed on the stack.
20056 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
20057 /*IsImmutable=*/true);
20058 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
20059 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
20061 } else {
20062 // Second half of f64 is passed in another GPR.
20063 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20064 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
20065 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
20066 }
20067 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
20068}
20069
20070// Transform physical registers into virtual registers.
20072 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
20073 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
20074 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
20075
20077
20078 switch (CallConv) {
20079 default:
20080 report_fatal_error("Unsupported calling convention");
20081 case CallingConv::C:
20082 case CallingConv::Fast:
20084 case CallingConv::GRAAL:
20086 break;
20087 case CallingConv::GHC:
20088 if (Subtarget.hasStdExtE())
20089 report_fatal_error("GHC calling convention is not supported on RVE!");
20090 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
20091 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
20092 "(Zdinx/D) instruction set extensions");
20093 }
20094
20095 const Function &Func = MF.getFunction();
20096 if (Func.hasFnAttribute("interrupt")) {
20097 if (!Func.arg_empty())
20099 "Functions with the interrupt attribute cannot have arguments!");
20100
20101 StringRef Kind =
20102 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20103
20104 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
20106 "Function interrupt attribute argument not supported!");
20107 }
20108
20109 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20110 MVT XLenVT = Subtarget.getXLenVT();
20111 unsigned XLenInBytes = Subtarget.getXLen() / 8;
20112 // Used with vargs to acumulate store chains.
20113 std::vector<SDValue> OutChains;
20114
20115 // Assign locations to all of the incoming arguments.
20117 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20118
20119 if (CallConv == CallingConv::GHC)
20121 else
20122 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
20124 : CC_RISCV);
20125
20126 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
20127 CCValAssign &VA = ArgLocs[i];
20128 SDValue ArgValue;
20129 // Passing f64 on RV32D with a soft float ABI must be handled as a special
20130 // case.
20131 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20132 assert(VA.needsCustom());
20133 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
20134 } else if (VA.isRegLoc())
20135 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
20136 else
20137 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
20138
20139 if (VA.getLocInfo() == CCValAssign::Indirect) {
20140 // If the original argument was split and passed by reference (e.g. i128
20141 // on RV32), we need to load all parts of it here (using the same
20142 // address). Vectors may be partly split to registers and partly to the
20143 // stack, in which case the base address is partly offset and subsequent
20144 // stores are relative to that.
20145 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
20147 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
20148 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
20149 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20150 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
20151 CCValAssign &PartVA = ArgLocs[i + 1];
20152 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
20153 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20154 if (PartVA.getValVT().isScalableVector())
20155 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20156 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
20157 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
20159 ++i;
20160 ++InsIdx;
20161 }
20162 continue;
20163 }
20164 InVals.push_back(ArgValue);
20165 }
20166
20167 if (any_of(ArgLocs,
20168 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20169 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20170
20171 if (IsVarArg) {
20172 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
20173 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
20174 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
20175 MachineFrameInfo &MFI = MF.getFrameInfo();
20176 MachineRegisterInfo &RegInfo = MF.getRegInfo();
20178
20179 // Size of the vararg save area. For now, the varargs save area is either
20180 // zero or large enough to hold a0-a7.
20181 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
20182 int FI;
20183
20184 // If all registers are allocated, then all varargs must be passed on the
20185 // stack and we don't need to save any argregs.
20186 if (VarArgsSaveSize == 0) {
20187 int VaArgOffset = CCInfo.getStackSize();
20188 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
20189 } else {
20190 int VaArgOffset = -VarArgsSaveSize;
20191 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
20192
20193 // If saving an odd number of registers then create an extra stack slot to
20194 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
20195 // offsets to even-numbered registered remain 2*XLEN-aligned.
20196 if (Idx % 2) {
20198 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
20199 VarArgsSaveSize += XLenInBytes;
20200 }
20201
20202 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20203
20204 // Copy the integer registers that may have been used for passing varargs
20205 // to the vararg save area.
20206 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
20207 const Register Reg = RegInfo.createVirtualRegister(RC);
20208 RegInfo.addLiveIn(ArgRegs[I], Reg);
20209 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
20210 SDValue Store = DAG.getStore(
20211 Chain, DL, ArgValue, FIN,
20212 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
20213 OutChains.push_back(Store);
20214 FIN =
20215 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
20216 }
20217 }
20218
20219 // Record the frame index of the first variable argument
20220 // which is a value necessary to VASTART.
20221 RVFI->setVarArgsFrameIndex(FI);
20222 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
20223 }
20224
20225 // All stores are grouped in one node to allow the matching between
20226 // the size of Ins and InVals. This only happens for vararg functions.
20227 if (!OutChains.empty()) {
20228 OutChains.push_back(Chain);
20229 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
20230 }
20231
20232 return Chain;
20233}
20234
20235/// isEligibleForTailCallOptimization - Check whether the call is eligible
20236/// for tail call optimization.
20237/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
20238bool RISCVTargetLowering::isEligibleForTailCallOptimization(
20239 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
20240 const SmallVector<CCValAssign, 16> &ArgLocs) const {
20241
20242 auto CalleeCC = CLI.CallConv;
20243 auto &Outs = CLI.Outs;
20244 auto &Caller = MF.getFunction();
20245 auto CallerCC = Caller.getCallingConv();
20246
20247 // Exception-handling functions need a special set of instructions to
20248 // indicate a return to the hardware. Tail-calling another function would
20249 // probably break this.
20250 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
20251 // should be expanded as new function attributes are introduced.
20252 if (Caller.hasFnAttribute("interrupt"))
20253 return false;
20254
20255 // Do not tail call opt if the stack is used to pass parameters.
20256 if (CCInfo.getStackSize() != 0)
20257 return false;
20258
20259 // Do not tail call opt if any parameters need to be passed indirectly.
20260 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
20261 // passed indirectly. So the address of the value will be passed in a
20262 // register, or if not available, then the address is put on the stack. In
20263 // order to pass indirectly, space on the stack often needs to be allocated
20264 // in order to store the value. In this case the CCInfo.getNextStackOffset()
20265 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
20266 // are passed CCValAssign::Indirect.
20267 for (auto &VA : ArgLocs)
20268 if (VA.getLocInfo() == CCValAssign::Indirect)
20269 return false;
20270
20271 // Do not tail call opt if either caller or callee uses struct return
20272 // semantics.
20273 auto IsCallerStructRet = Caller.hasStructRetAttr();
20274 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
20275 if (IsCallerStructRet || IsCalleeStructRet)
20276 return false;
20277
20278 // The callee has to preserve all registers the caller needs to preserve.
20279 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20280 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
20281 if (CalleeCC != CallerCC) {
20282 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
20283 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20284 return false;
20285 }
20286
20287 // Byval parameters hand the function a pointer directly into the stack area
20288 // we want to reuse during a tail call. Working around this *is* possible
20289 // but less efficient and uglier in LowerCall.
20290 for (auto &Arg : Outs)
20291 if (Arg.Flags.isByVal())
20292 return false;
20293
20294 return true;
20295}
20296
20298 return DAG.getDataLayout().getPrefTypeAlign(
20299 VT.getTypeForEVT(*DAG.getContext()));
20300}
20301
20302// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20303// and output parameter nodes.
20305 SmallVectorImpl<SDValue> &InVals) const {
20306 SelectionDAG &DAG = CLI.DAG;
20307 SDLoc &DL = CLI.DL;
20309 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20311 SDValue Chain = CLI.Chain;
20312 SDValue Callee = CLI.Callee;
20313 bool &IsTailCall = CLI.IsTailCall;
20314 CallingConv::ID CallConv = CLI.CallConv;
20315 bool IsVarArg = CLI.IsVarArg;
20316 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20317 MVT XLenVT = Subtarget.getXLenVT();
20318
20320
20321 // Analyze the operands of the call, assigning locations to each operand.
20323 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20324
20325 if (CallConv == CallingConv::GHC) {
20326 if (Subtarget.hasStdExtE())
20327 report_fatal_error("GHC calling convention is not supported on RVE!");
20328 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
20329 } else
20330 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
20332 : CC_RISCV);
20333
20334 // Check if it's really possible to do a tail call.
20335 if (IsTailCall)
20336 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20337
20338 if (IsTailCall)
20339 ++NumTailCalls;
20340 else if (CLI.CB && CLI.CB->isMustTailCall())
20341 report_fatal_error("failed to perform tail call elimination on a call "
20342 "site marked musttail");
20343
20344 // Get a count of how many bytes are to be pushed on the stack.
20345 unsigned NumBytes = ArgCCInfo.getStackSize();
20346
20347 // Create local copies for byval args
20348 SmallVector<SDValue, 8> ByValArgs;
20349 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20350 ISD::ArgFlagsTy Flags = Outs[i].Flags;
20351 if (!Flags.isByVal())
20352 continue;
20353
20354 SDValue Arg = OutVals[i];
20355 unsigned Size = Flags.getByValSize();
20356 Align Alignment = Flags.getNonZeroByValAlign();
20357
20358 int FI =
20359 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
20360 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
20361 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
20362
20363 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
20364 /*IsVolatile=*/false,
20365 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
20367 ByValArgs.push_back(FIPtr);
20368 }
20369
20370 if (!IsTailCall)
20371 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20372
20373 // Copy argument values to their designated locations.
20375 SmallVector<SDValue, 8> MemOpChains;
20376 SDValue StackPtr;
20377 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20378 ++i, ++OutIdx) {
20379 CCValAssign &VA = ArgLocs[i];
20380 SDValue ArgValue = OutVals[OutIdx];
20381 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20382
20383 // Handle passing f64 on RV32D with a soft float ABI as a special case.
20384 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20385 assert(VA.isRegLoc() && "Expected register VA assignment");
20386 assert(VA.needsCustom());
20387 SDValue SplitF64 = DAG.getNode(
20388 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20389 SDValue Lo = SplitF64.getValue(0);
20390 SDValue Hi = SplitF64.getValue(1);
20391
20392 Register RegLo = VA.getLocReg();
20393 RegsToPass.push_back(std::make_pair(RegLo, Lo));
20394
20395 // Get the CCValAssign for the Hi part.
20396 CCValAssign &HiVA = ArgLocs[++i];
20397
20398 if (HiVA.isMemLoc()) {
20399 // Second half of f64 is passed on the stack.
20400 if (!StackPtr.getNode())
20401 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20403 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20404 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
20405 // Emit the store.
20406 MemOpChains.push_back(DAG.getStore(
20407 Chain, DL, Hi, Address,
20409 } else {
20410 // Second half of f64 is passed in another GPR.
20411 Register RegHigh = HiVA.getLocReg();
20412 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
20413 }
20414 continue;
20415 }
20416
20417 // Promote the value if needed.
20418 // For now, only handle fully promoted and indirect arguments.
20419 if (VA.getLocInfo() == CCValAssign::Indirect) {
20420 // Store the argument in a stack slot and pass its address.
20421 Align StackAlign =
20422 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20423 getPrefTypeAlign(ArgValue.getValueType(), DAG));
20424 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20425 // If the original argument was split (e.g. i128), we need
20426 // to store the required parts of it here (and pass just one address).
20427 // Vectors may be partly split to registers and partly to the stack, in
20428 // which case the base address is partly offset and subsequent stores are
20429 // relative to that.
20430 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20431 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20432 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20433 // Calculate the total size to store. We don't have access to what we're
20434 // actually storing other than performing the loop and collecting the
20435 // info.
20437 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20438 SDValue PartValue = OutVals[OutIdx + 1];
20439 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20440 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20441 EVT PartVT = PartValue.getValueType();
20442 if (PartVT.isScalableVector())
20443 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20444 StoredSize += PartVT.getStoreSize();
20445 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
20446 Parts.push_back(std::make_pair(PartValue, Offset));
20447 ++i;
20448 ++OutIdx;
20449 }
20450 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20451 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20452 MemOpChains.push_back(
20453 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
20455 for (const auto &Part : Parts) {
20456 SDValue PartValue = Part.first;
20457 SDValue PartOffset = Part.second;
20459 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
20460 MemOpChains.push_back(
20461 DAG.getStore(Chain, DL, PartValue, Address,
20463 }
20464 ArgValue = SpillSlot;
20465 } else {
20466 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
20467 }
20468
20469 // Use local copy if it is a byval arg.
20470 if (Flags.isByVal())
20471 ArgValue = ByValArgs[j++];
20472
20473 if (VA.isRegLoc()) {
20474 // Queue up the argument copies and emit them at the end.
20475 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20476 } else {
20477 assert(VA.isMemLoc() && "Argument not register or memory");
20478 assert(!IsTailCall && "Tail call not allowed if stack is used "
20479 "for passing parameters");
20480
20481 // Work out the address of the stack slot.
20482 if (!StackPtr.getNode())
20483 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20485 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20487
20488 // Emit the store.
20489 MemOpChains.push_back(
20490 DAG.getStore(Chain, DL, ArgValue, Address,
20492 }
20493 }
20494
20495 // Join the stores, which are independent of one another.
20496 if (!MemOpChains.empty())
20497 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
20498
20499 SDValue Glue;
20500
20501 // Build a sequence of copy-to-reg nodes, chained and glued together.
20502 for (auto &Reg : RegsToPass) {
20503 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
20504 Glue = Chain.getValue(1);
20505 }
20506
20507 // Validate that none of the argument registers have been marked as
20508 // reserved, if so report an error. Do the same for the return address if this
20509 // is not a tailcall.
20510 validateCCReservedRegs(RegsToPass, MF);
20511 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
20513 MF.getFunction(),
20514 "Return address register required, but has been reserved."});
20515
20516 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20517 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20518 // split it and then direct call can be matched by PseudoCALL.
20519 bool CalleeIsLargeExternalSymbol = false;
20521 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20522 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
20523 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20524 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
20525 CalleeIsLargeExternalSymbol = true;
20526 }
20527 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20528 const GlobalValue *GV = S->getGlobal();
20529 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
20530 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20531 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
20532 }
20533
20534 // The first call operand is the chain and the second is the target address.
20536 Ops.push_back(Chain);
20537 Ops.push_back(Callee);
20538
20539 // Add argument registers to the end of the list so that they are
20540 // known live into the call.
20541 for (auto &Reg : RegsToPass)
20542 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20543
20544 // Add a register mask operand representing the call-preserved registers.
20545 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20546 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
20547 assert(Mask && "Missing call preserved mask for calling convention");
20548 Ops.push_back(DAG.getRegisterMask(Mask));
20549
20550 // Glue the call to the argument copies, if any.
20551 if (Glue.getNode())
20552 Ops.push_back(Glue);
20553
20554 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20555 "Unexpected CFI type for a direct call");
20556
20557 // Emit the call.
20558 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20559
20560 // Use software guarded branch for large code model non-indirect calls
20561 // Tail call to external symbol will have a null CLI.CB and we need another
20562 // way to determine the callsite type
20563 bool NeedSWGuarded = false;
20565 Subtarget.hasStdExtZicfilp() &&
20566 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
20567 NeedSWGuarded = true;
20568
20569 if (IsTailCall) {
20571 unsigned CallOpc =
20572 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
20573 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20574 if (CLI.CFIType)
20575 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20576 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20577 return Ret;
20578 }
20579
20580 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
20581 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20582 if (CLI.CFIType)
20583 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20584 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20585 Glue = Chain.getValue(1);
20586
20587 // Mark the end of the call, which is glued to the call itself.
20588 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20589 Glue = Chain.getValue(1);
20590
20591 // Assign locations to each value returned by this call.
20593 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20594 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
20595
20596 // Copy all of the result registers out of their specified physreg.
20597 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20598 auto &VA = RVLocs[i];
20599 // Copy the value out
20600 SDValue RetValue =
20601 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20602 // Glue the RetValue to the end of the call sequence
20603 Chain = RetValue.getValue(1);
20604 Glue = RetValue.getValue(2);
20605
20606 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20607 assert(VA.needsCustom());
20608 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20609 MVT::i32, Glue);
20610 Chain = RetValue2.getValue(1);
20611 Glue = RetValue2.getValue(2);
20612 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20613 RetValue2);
20614 } else
20615 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20616
20617 InVals.push_back(RetValue);
20618 }
20619
20620 return Chain;
20621}
20622
20624 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20625 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
20626 const Type *RetTy) const {
20628 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20629
20630 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20631 MVT VT = Outs[i].VT;
20632 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20633 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
20634 /*IsFixed=*/true, /*IsRet=*/true, nullptr))
20635 return false;
20636 }
20637 return true;
20638}
20639
20640SDValue
20642 bool IsVarArg,
20644 const SmallVectorImpl<SDValue> &OutVals,
20645 const SDLoc &DL, SelectionDAG &DAG) const {
20647 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20648
20649 // Stores the assignment of the return value to a location.
20651
20652 // Info about the registers and stack slot.
20653 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20654 *DAG.getContext());
20655
20656 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20657 nullptr, CC_RISCV);
20658
20659 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20660 report_fatal_error("GHC functions return void only");
20661
20662 SDValue Glue;
20663 SmallVector<SDValue, 4> RetOps(1, Chain);
20664
20665 // Copy the result values into the output registers.
20666 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20667 SDValue Val = OutVals[OutIdx];
20668 CCValAssign &VA = RVLocs[i];
20669 assert(VA.isRegLoc() && "Can only return in registers!");
20670
20671 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20672 // Handle returning f64 on RV32D with a soft float ABI.
20673 assert(VA.isRegLoc() && "Expected return via registers");
20674 assert(VA.needsCustom());
20675 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20676 DAG.getVTList(MVT::i32, MVT::i32), Val);
20677 SDValue Lo = SplitF64.getValue(0);
20678 SDValue Hi = SplitF64.getValue(1);
20679 Register RegLo = VA.getLocReg();
20680 Register RegHi = RVLocs[++i].getLocReg();
20681
20682 if (STI.isRegisterReservedByUser(RegLo) ||
20683 STI.isRegisterReservedByUser(RegHi))
20685 MF.getFunction(),
20686 "Return value register required, but has been reserved."});
20687
20688 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20689 Glue = Chain.getValue(1);
20690 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20691 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20692 Glue = Chain.getValue(1);
20693 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20694 } else {
20695 // Handle a 'normal' return.
20696 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20697 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20698
20699 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20701 MF.getFunction(),
20702 "Return value register required, but has been reserved."});
20703
20704 // Guarantee that all emitted copies are stuck together.
20705 Glue = Chain.getValue(1);
20706 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20707 }
20708 }
20709
20710 RetOps[0] = Chain; // Update chain.
20711
20712 // Add the glue node if we have it.
20713 if (Glue.getNode()) {
20714 RetOps.push_back(Glue);
20715 }
20716
20717 if (any_of(RVLocs,
20718 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20719 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20720
20721 unsigned RetOpc = RISCVISD::RET_GLUE;
20722 // Interrupt service routines use different return instructions.
20723 const Function &Func = DAG.getMachineFunction().getFunction();
20724 if (Func.hasFnAttribute("interrupt")) {
20725 if (!Func.getReturnType()->isVoidTy())
20727 "Functions with the interrupt attribute must have void return type!");
20728
20730 StringRef Kind =
20731 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20732
20733 if (Kind == "supervisor")
20734 RetOpc = RISCVISD::SRET_GLUE;
20735 else
20736 RetOpc = RISCVISD::MRET_GLUE;
20737 }
20738
20739 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20740}
20741
20742void RISCVTargetLowering::validateCCReservedRegs(
20743 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20744 MachineFunction &MF) const {
20745 const Function &F = MF.getFunction();
20746 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20747
20748 if (llvm::any_of(Regs, [&STI](auto Reg) {
20749 return STI.isRegisterReservedByUser(Reg.first);
20750 }))
20751 F.getContext().diagnose(DiagnosticInfoUnsupported{
20752 F, "Argument register required, but has been reserved."});
20753}
20754
20755// Check if the result of the node is only used as a return value, as
20756// otherwise we can't perform a tail-call.
20758 if (N->getNumValues() != 1)
20759 return false;
20760 if (!N->hasNUsesOfValue(1, 0))
20761 return false;
20762
20763 SDNode *Copy = *N->user_begin();
20764
20765 if (Copy->getOpcode() == ISD::BITCAST) {
20766 return isUsedByReturnOnly(Copy, Chain);
20767 }
20768
20769 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20770 // with soft float ABIs.
20771 if (Copy->getOpcode() != ISD::CopyToReg) {
20772 return false;
20773 }
20774
20775 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20776 // isn't safe to perform a tail call.
20777 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20778 return false;
20779
20780 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20781 bool HasRet = false;
20782 for (SDNode *Node : Copy->users()) {
20783 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20784 return false;
20785 HasRet = true;
20786 }
20787 if (!HasRet)
20788 return false;
20789
20790 Chain = Copy->getOperand(0);
20791 return true;
20792}
20793
20795 return CI->isTailCall();
20796}
20797
20798const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20799#define NODE_NAME_CASE(NODE) \
20800 case RISCVISD::NODE: \
20801 return "RISCVISD::" #NODE;
20802 // clang-format off
20803 switch ((RISCVISD::NodeType)Opcode) {
20805 break;
20806 NODE_NAME_CASE(RET_GLUE)
20807 NODE_NAME_CASE(SRET_GLUE)
20808 NODE_NAME_CASE(MRET_GLUE)
20809 NODE_NAME_CASE(CALL)
20810 NODE_NAME_CASE(TAIL)
20811 NODE_NAME_CASE(SELECT_CC)
20812 NODE_NAME_CASE(BR_CC)
20813 NODE_NAME_CASE(BuildGPRPair)
20814 NODE_NAME_CASE(SplitGPRPair)
20815 NODE_NAME_CASE(BuildPairF64)
20816 NODE_NAME_CASE(SplitF64)
20817 NODE_NAME_CASE(ADD_LO)
20818 NODE_NAME_CASE(HI)
20819 NODE_NAME_CASE(LLA)
20820 NODE_NAME_CASE(ADD_TPREL)
20821 NODE_NAME_CASE(MULHSU)
20822 NODE_NAME_CASE(SHL_ADD)
20823 NODE_NAME_CASE(SLLW)
20824 NODE_NAME_CASE(SRAW)
20825 NODE_NAME_CASE(SRLW)
20826 NODE_NAME_CASE(DIVW)
20827 NODE_NAME_CASE(DIVUW)
20828 NODE_NAME_CASE(REMUW)
20829 NODE_NAME_CASE(ROLW)
20830 NODE_NAME_CASE(RORW)
20831 NODE_NAME_CASE(CLZW)
20832 NODE_NAME_CASE(CTZW)
20833 NODE_NAME_CASE(ABSW)
20834 NODE_NAME_CASE(FMV_H_X)
20835 NODE_NAME_CASE(FMV_X_ANYEXTH)
20836 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20837 NODE_NAME_CASE(FMV_W_X_RV64)
20838 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20839 NODE_NAME_CASE(FCVT_X)
20840 NODE_NAME_CASE(FCVT_XU)
20841 NODE_NAME_CASE(FCVT_W_RV64)
20842 NODE_NAME_CASE(FCVT_WU_RV64)
20843 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20844 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20845 NODE_NAME_CASE(FROUND)
20846 NODE_NAME_CASE(FCLASS)
20847 NODE_NAME_CASE(FSGNJX)
20848 NODE_NAME_CASE(FMAX)
20849 NODE_NAME_CASE(FMIN)
20850 NODE_NAME_CASE(FLI)
20851 NODE_NAME_CASE(READ_COUNTER_WIDE)
20852 NODE_NAME_CASE(BREV8)
20853 NODE_NAME_CASE(ORC_B)
20854 NODE_NAME_CASE(ZIP)
20855 NODE_NAME_CASE(UNZIP)
20856 NODE_NAME_CASE(CLMUL)
20857 NODE_NAME_CASE(CLMULH)
20858 NODE_NAME_CASE(CLMULR)
20859 NODE_NAME_CASE(MOPR)
20860 NODE_NAME_CASE(MOPRR)
20861 NODE_NAME_CASE(SHA256SIG0)
20862 NODE_NAME_CASE(SHA256SIG1)
20863 NODE_NAME_CASE(SHA256SUM0)
20864 NODE_NAME_CASE(SHA256SUM1)
20865 NODE_NAME_CASE(SM4KS)
20866 NODE_NAME_CASE(SM4ED)
20867 NODE_NAME_CASE(SM3P0)
20868 NODE_NAME_CASE(SM3P1)
20869 NODE_NAME_CASE(TH_LWD)
20870 NODE_NAME_CASE(TH_LWUD)
20871 NODE_NAME_CASE(TH_LDD)
20872 NODE_NAME_CASE(TH_SWD)
20873 NODE_NAME_CASE(TH_SDD)
20874 NODE_NAME_CASE(VMV_V_V_VL)
20875 NODE_NAME_CASE(VMV_V_X_VL)
20876 NODE_NAME_CASE(VFMV_V_F_VL)
20877 NODE_NAME_CASE(VMV_X_S)
20878 NODE_NAME_CASE(VMV_S_X_VL)
20879 NODE_NAME_CASE(VFMV_S_F_VL)
20880 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20881 NODE_NAME_CASE(READ_VLENB)
20882 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20883 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20884 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20885 NODE_NAME_CASE(VSLIDEUP_VL)
20886 NODE_NAME_CASE(VSLIDE1UP_VL)
20887 NODE_NAME_CASE(VSLIDEDOWN_VL)
20888 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20889 NODE_NAME_CASE(VFSLIDE1UP_VL)
20890 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20891 NODE_NAME_CASE(VID_VL)
20892 NODE_NAME_CASE(VFNCVT_ROD_VL)
20893 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20894 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20895 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20896 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20897 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20898 NODE_NAME_CASE(VECREDUCE_AND_VL)
20899 NODE_NAME_CASE(VECREDUCE_OR_VL)
20900 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20901 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20902 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20903 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20904 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20905 NODE_NAME_CASE(ADD_VL)
20906 NODE_NAME_CASE(AND_VL)
20907 NODE_NAME_CASE(MUL_VL)
20908 NODE_NAME_CASE(OR_VL)
20909 NODE_NAME_CASE(SDIV_VL)
20910 NODE_NAME_CASE(SHL_VL)
20911 NODE_NAME_CASE(SREM_VL)
20912 NODE_NAME_CASE(SRA_VL)
20913 NODE_NAME_CASE(SRL_VL)
20914 NODE_NAME_CASE(ROTL_VL)
20915 NODE_NAME_CASE(ROTR_VL)
20916 NODE_NAME_CASE(SUB_VL)
20917 NODE_NAME_CASE(UDIV_VL)
20918 NODE_NAME_CASE(UREM_VL)
20919 NODE_NAME_CASE(XOR_VL)
20920 NODE_NAME_CASE(AVGFLOORS_VL)
20921 NODE_NAME_CASE(AVGFLOORU_VL)
20922 NODE_NAME_CASE(AVGCEILS_VL)
20923 NODE_NAME_CASE(AVGCEILU_VL)
20924 NODE_NAME_CASE(SADDSAT_VL)
20925 NODE_NAME_CASE(UADDSAT_VL)
20926 NODE_NAME_CASE(SSUBSAT_VL)
20927 NODE_NAME_CASE(USUBSAT_VL)
20928 NODE_NAME_CASE(FADD_VL)
20929 NODE_NAME_CASE(FSUB_VL)
20930 NODE_NAME_CASE(FMUL_VL)
20931 NODE_NAME_CASE(FDIV_VL)
20932 NODE_NAME_CASE(FNEG_VL)
20933 NODE_NAME_CASE(FABS_VL)
20934 NODE_NAME_CASE(FSQRT_VL)
20935 NODE_NAME_CASE(FCLASS_VL)
20936 NODE_NAME_CASE(VFMADD_VL)
20937 NODE_NAME_CASE(VFNMADD_VL)
20938 NODE_NAME_CASE(VFMSUB_VL)
20939 NODE_NAME_CASE(VFNMSUB_VL)
20940 NODE_NAME_CASE(VFWMADD_VL)
20941 NODE_NAME_CASE(VFWNMADD_VL)
20942 NODE_NAME_CASE(VFWMSUB_VL)
20943 NODE_NAME_CASE(VFWNMSUB_VL)
20944 NODE_NAME_CASE(FCOPYSIGN_VL)
20945 NODE_NAME_CASE(SMIN_VL)
20946 NODE_NAME_CASE(SMAX_VL)
20947 NODE_NAME_CASE(UMIN_VL)
20948 NODE_NAME_CASE(UMAX_VL)
20949 NODE_NAME_CASE(BITREVERSE_VL)
20950 NODE_NAME_CASE(BSWAP_VL)
20951 NODE_NAME_CASE(CTLZ_VL)
20952 NODE_NAME_CASE(CTTZ_VL)
20953 NODE_NAME_CASE(CTPOP_VL)
20954 NODE_NAME_CASE(VFMIN_VL)
20955 NODE_NAME_CASE(VFMAX_VL)
20956 NODE_NAME_CASE(MULHS_VL)
20957 NODE_NAME_CASE(MULHU_VL)
20958 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20959 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20960 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20961 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20962 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20963 NODE_NAME_CASE(SINT_TO_FP_VL)
20964 NODE_NAME_CASE(UINT_TO_FP_VL)
20965 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20966 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20967 NODE_NAME_CASE(FP_EXTEND_VL)
20968 NODE_NAME_CASE(FP_ROUND_VL)
20969 NODE_NAME_CASE(STRICT_FADD_VL)
20970 NODE_NAME_CASE(STRICT_FSUB_VL)
20971 NODE_NAME_CASE(STRICT_FMUL_VL)
20972 NODE_NAME_CASE(STRICT_FDIV_VL)
20973 NODE_NAME_CASE(STRICT_FSQRT_VL)
20974 NODE_NAME_CASE(STRICT_VFMADD_VL)
20975 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20976 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20977 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20978 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20979 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20980 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20981 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20982 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20983 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20984 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20985 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20986 NODE_NAME_CASE(STRICT_FSETCC_VL)
20987 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20988 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20989 NODE_NAME_CASE(VWMUL_VL)
20990 NODE_NAME_CASE(VWMULU_VL)
20991 NODE_NAME_CASE(VWMULSU_VL)
20992 NODE_NAME_CASE(VWADD_VL)
20993 NODE_NAME_CASE(VWADDU_VL)
20994 NODE_NAME_CASE(VWSUB_VL)
20995 NODE_NAME_CASE(VWSUBU_VL)
20996 NODE_NAME_CASE(VWADD_W_VL)
20997 NODE_NAME_CASE(VWADDU_W_VL)
20998 NODE_NAME_CASE(VWSUB_W_VL)
20999 NODE_NAME_CASE(VWSUBU_W_VL)
21000 NODE_NAME_CASE(VWSLL_VL)
21001 NODE_NAME_CASE(VFWMUL_VL)
21002 NODE_NAME_CASE(VFWADD_VL)
21003 NODE_NAME_CASE(VFWSUB_VL)
21004 NODE_NAME_CASE(VFWADD_W_VL)
21005 NODE_NAME_CASE(VFWSUB_W_VL)
21006 NODE_NAME_CASE(VWMACC_VL)
21007 NODE_NAME_CASE(VWMACCU_VL)
21008 NODE_NAME_CASE(VWMACCSU_VL)
21009 NODE_NAME_CASE(SETCC_VL)
21010 NODE_NAME_CASE(VMERGE_VL)
21011 NODE_NAME_CASE(VMAND_VL)
21012 NODE_NAME_CASE(VMOR_VL)
21013 NODE_NAME_CASE(VMXOR_VL)
21014 NODE_NAME_CASE(VMCLR_VL)
21015 NODE_NAME_CASE(VMSET_VL)
21016 NODE_NAME_CASE(VRGATHER_VX_VL)
21017 NODE_NAME_CASE(VRGATHER_VV_VL)
21018 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
21019 NODE_NAME_CASE(VSEXT_VL)
21020 NODE_NAME_CASE(VZEXT_VL)
21021 NODE_NAME_CASE(VCPOP_VL)
21022 NODE_NAME_CASE(VFIRST_VL)
21023 NODE_NAME_CASE(READ_CSR)
21024 NODE_NAME_CASE(WRITE_CSR)
21025 NODE_NAME_CASE(SWAP_CSR)
21026 NODE_NAME_CASE(CZERO_EQZ)
21027 NODE_NAME_CASE(CZERO_NEZ)
21028 NODE_NAME_CASE(SW_GUARDED_BRIND)
21029 NODE_NAME_CASE(SW_GUARDED_CALL)
21030 NODE_NAME_CASE(SW_GUARDED_TAIL)
21031 NODE_NAME_CASE(TUPLE_INSERT)
21032 NODE_NAME_CASE(TUPLE_EXTRACT)
21033 NODE_NAME_CASE(SF_VC_XV_SE)
21034 NODE_NAME_CASE(SF_VC_IV_SE)
21035 NODE_NAME_CASE(SF_VC_VV_SE)
21036 NODE_NAME_CASE(SF_VC_FV_SE)
21037 NODE_NAME_CASE(SF_VC_XVV_SE)
21038 NODE_NAME_CASE(SF_VC_IVV_SE)
21039 NODE_NAME_CASE(SF_VC_VVV_SE)
21040 NODE_NAME_CASE(SF_VC_FVV_SE)
21041 NODE_NAME_CASE(SF_VC_XVW_SE)
21042 NODE_NAME_CASE(SF_VC_IVW_SE)
21043 NODE_NAME_CASE(SF_VC_VVW_SE)
21044 NODE_NAME_CASE(SF_VC_FVW_SE)
21045 NODE_NAME_CASE(SF_VC_V_X_SE)
21046 NODE_NAME_CASE(SF_VC_V_I_SE)
21047 NODE_NAME_CASE(SF_VC_V_XV_SE)
21048 NODE_NAME_CASE(SF_VC_V_IV_SE)
21049 NODE_NAME_CASE(SF_VC_V_VV_SE)
21050 NODE_NAME_CASE(SF_VC_V_FV_SE)
21051 NODE_NAME_CASE(SF_VC_V_XVV_SE)
21052 NODE_NAME_CASE(SF_VC_V_IVV_SE)
21053 NODE_NAME_CASE(SF_VC_V_VVV_SE)
21054 NODE_NAME_CASE(SF_VC_V_FVV_SE)
21055 NODE_NAME_CASE(SF_VC_V_XVW_SE)
21056 NODE_NAME_CASE(SF_VC_V_IVW_SE)
21057 NODE_NAME_CASE(SF_VC_V_VVW_SE)
21058 NODE_NAME_CASE(SF_VC_V_FVW_SE)
21059 NODE_NAME_CASE(PROBED_ALLOCA)
21060 }
21061 // clang-format on
21062 return nullptr;
21063#undef NODE_NAME_CASE
21064}
21065
21066/// getConstraintType - Given a constraint letter, return the type of
21067/// constraint it is for this target.
21070 if (Constraint.size() == 1) {
21071 switch (Constraint[0]) {
21072 default:
21073 break;
21074 case 'f':
21075 case 'R':
21076 return C_RegisterClass;
21077 case 'I':
21078 case 'J':
21079 case 'K':
21080 return C_Immediate;
21081 case 'A':
21082 return C_Memory;
21083 case 's':
21084 case 'S': // A symbolic address
21085 return C_Other;
21086 }
21087 } else {
21088 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
21089 return C_RegisterClass;
21090 if (Constraint == "cr" || Constraint == "cf")
21091 return C_RegisterClass;
21092 }
21093 return TargetLowering::getConstraintType(Constraint);
21094}
21095
21096std::pair<unsigned, const TargetRegisterClass *>
21098 StringRef Constraint,
21099 MVT VT) const {
21100 // First, see if this is a constraint that directly corresponds to a RISC-V
21101 // register class.
21102 if (Constraint.size() == 1) {
21103 switch (Constraint[0]) {
21104 case 'r':
21105 // TODO: Support fixed vectors up to XLen for P extension?
21106 if (VT.isVector())
21107 break;
21108 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21109 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21110 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21111 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21112 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21113 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21114 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21115 case 'f':
21116 if (VT == MVT::f16) {
21117 if (Subtarget.hasStdExtZfhmin())
21118 return std::make_pair(0U, &RISCV::FPR16RegClass);
21119 if (Subtarget.hasStdExtZhinxmin())
21120 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
21121 } else if (VT == MVT::f32) {
21122 if (Subtarget.hasStdExtF())
21123 return std::make_pair(0U, &RISCV::FPR32RegClass);
21124 if (Subtarget.hasStdExtZfinx())
21125 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
21126 } else if (VT == MVT::f64) {
21127 if (Subtarget.hasStdExtD())
21128 return std::make_pair(0U, &RISCV::FPR64RegClass);
21129 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21130 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21131 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21132 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
21133 }
21134 break;
21135 case 'R':
21136 if (VT == MVT::f64 && !Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())
21137 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21138 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
21139 default:
21140 break;
21141 }
21142 } else if (Constraint == "vr") {
21143 for (const auto *RC :
21144 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
21145 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
21146 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
21147 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
21148 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
21149 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
21150 &RISCV::VRN2M4RegClass}) {
21151 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21152 return std::make_pair(0U, RC);
21153 }
21154 } else if (Constraint == "vd") {
21155 for (const auto *RC :
21156 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
21157 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
21158 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
21159 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
21160 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
21161 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
21162 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
21163 &RISCV::VRN2M4NoV0RegClass}) {
21164 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
21165 return std::make_pair(0U, RC);
21166 }
21167 } else if (Constraint == "vm") {
21168 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
21169 return std::make_pair(0U, &RISCV::VMV0RegClass);
21170 } else if (Constraint == "cr") {
21171 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
21172 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21173 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
21174 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21175 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21176 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21177 if (!VT.isVector())
21178 return std::make_pair(0U, &RISCV::GPRCRegClass);
21179 } else if (Constraint == "cf") {
21180 if (VT == MVT::f16) {
21181 if (Subtarget.hasStdExtZfhmin())
21182 return std::make_pair(0U, &RISCV::FPR16CRegClass);
21183 if (Subtarget.hasStdExtZhinxmin())
21184 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
21185 } else if (VT == MVT::f32) {
21186 if (Subtarget.hasStdExtF())
21187 return std::make_pair(0U, &RISCV::FPR32CRegClass);
21188 if (Subtarget.hasStdExtZfinx())
21189 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
21190 } else if (VT == MVT::f64) {
21191 if (Subtarget.hasStdExtD())
21192 return std::make_pair(0U, &RISCV::FPR64CRegClass);
21193 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
21194 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
21195 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
21196 return std::make_pair(0U, &RISCV::GPRCRegClass);
21197 }
21198 }
21199
21200 // Clang will correctly decode the usage of register name aliases into their
21201 // official names. However, other frontends like `rustc` do not. This allows
21202 // users of these frontends to use the ABI names for registers in LLVM-style
21203 // register constraints.
21204 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
21205 .Case("{zero}", RISCV::X0)
21206 .Case("{ra}", RISCV::X1)
21207 .Case("{sp}", RISCV::X2)
21208 .Case("{gp}", RISCV::X3)
21209 .Case("{tp}", RISCV::X4)
21210 .Case("{t0}", RISCV::X5)
21211 .Case("{t1}", RISCV::X6)
21212 .Case("{t2}", RISCV::X7)
21213 .Cases("{s0}", "{fp}", RISCV::X8)
21214 .Case("{s1}", RISCV::X9)
21215 .Case("{a0}", RISCV::X10)
21216 .Case("{a1}", RISCV::X11)
21217 .Case("{a2}", RISCV::X12)
21218 .Case("{a3}", RISCV::X13)
21219 .Case("{a4}", RISCV::X14)
21220 .Case("{a5}", RISCV::X15)
21221 .Case("{a6}", RISCV::X16)
21222 .Case("{a7}", RISCV::X17)
21223 .Case("{s2}", RISCV::X18)
21224 .Case("{s3}", RISCV::X19)
21225 .Case("{s4}", RISCV::X20)
21226 .Case("{s5}", RISCV::X21)
21227 .Case("{s6}", RISCV::X22)
21228 .Case("{s7}", RISCV::X23)
21229 .Case("{s8}", RISCV::X24)
21230 .Case("{s9}", RISCV::X25)
21231 .Case("{s10}", RISCV::X26)
21232 .Case("{s11}", RISCV::X27)
21233 .Case("{t3}", RISCV::X28)
21234 .Case("{t4}", RISCV::X29)
21235 .Case("{t5}", RISCV::X30)
21236 .Case("{t6}", RISCV::X31)
21237 .Default(RISCV::NoRegister);
21238 if (XRegFromAlias != RISCV::NoRegister)
21239 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
21240
21241 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
21242 // TableGen record rather than the AsmName to choose registers for InlineAsm
21243 // constraints, plus we want to match those names to the widest floating point
21244 // register type available, manually select floating point registers here.
21245 //
21246 // The second case is the ABI name of the register, so that frontends can also
21247 // use the ABI names in register constraint lists.
21248 if (Subtarget.hasStdExtF()) {
21249 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
21250 .Cases("{f0}", "{ft0}", RISCV::F0_F)
21251 .Cases("{f1}", "{ft1}", RISCV::F1_F)
21252 .Cases("{f2}", "{ft2}", RISCV::F2_F)
21253 .Cases("{f3}", "{ft3}", RISCV::F3_F)
21254 .Cases("{f4}", "{ft4}", RISCV::F4_F)
21255 .Cases("{f5}", "{ft5}", RISCV::F5_F)
21256 .Cases("{f6}", "{ft6}", RISCV::F6_F)
21257 .Cases("{f7}", "{ft7}", RISCV::F7_F)
21258 .Cases("{f8}", "{fs0}", RISCV::F8_F)
21259 .Cases("{f9}", "{fs1}", RISCV::F9_F)
21260 .Cases("{f10}", "{fa0}", RISCV::F10_F)
21261 .Cases("{f11}", "{fa1}", RISCV::F11_F)
21262 .Cases("{f12}", "{fa2}", RISCV::F12_F)
21263 .Cases("{f13}", "{fa3}", RISCV::F13_F)
21264 .Cases("{f14}", "{fa4}", RISCV::F14_F)
21265 .Cases("{f15}", "{fa5}", RISCV::F15_F)
21266 .Cases("{f16}", "{fa6}", RISCV::F16_F)
21267 .Cases("{f17}", "{fa7}", RISCV::F17_F)
21268 .Cases("{f18}", "{fs2}", RISCV::F18_F)
21269 .Cases("{f19}", "{fs3}", RISCV::F19_F)
21270 .Cases("{f20}", "{fs4}", RISCV::F20_F)
21271 .Cases("{f21}", "{fs5}", RISCV::F21_F)
21272 .Cases("{f22}", "{fs6}", RISCV::F22_F)
21273 .Cases("{f23}", "{fs7}", RISCV::F23_F)
21274 .Cases("{f24}", "{fs8}", RISCV::F24_F)
21275 .Cases("{f25}", "{fs9}", RISCV::F25_F)
21276 .Cases("{f26}", "{fs10}", RISCV::F26_F)
21277 .Cases("{f27}", "{fs11}", RISCV::F27_F)
21278 .Cases("{f28}", "{ft8}", RISCV::F28_F)
21279 .Cases("{f29}", "{ft9}", RISCV::F29_F)
21280 .Cases("{f30}", "{ft10}", RISCV::F30_F)
21281 .Cases("{f31}", "{ft11}", RISCV::F31_F)
21282 .Default(RISCV::NoRegister);
21283 if (FReg != RISCV::NoRegister) {
21284 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
21285 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
21286 unsigned RegNo = FReg - RISCV::F0_F;
21287 unsigned DReg = RISCV::F0_D + RegNo;
21288 return std::make_pair(DReg, &RISCV::FPR64RegClass);
21289 }
21290 if (VT == MVT::f32 || VT == MVT::Other)
21291 return std::make_pair(FReg, &RISCV::FPR32RegClass);
21292 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
21293 unsigned RegNo = FReg - RISCV::F0_F;
21294 unsigned HReg = RISCV::F0_H + RegNo;
21295 return std::make_pair(HReg, &RISCV::FPR16RegClass);
21296 }
21297 }
21298 }
21299
21300 if (Subtarget.hasVInstructions()) {
21301 Register VReg = StringSwitch<Register>(Constraint.lower())
21302 .Case("{v0}", RISCV::V0)
21303 .Case("{v1}", RISCV::V1)
21304 .Case("{v2}", RISCV::V2)
21305 .Case("{v3}", RISCV::V3)
21306 .Case("{v4}", RISCV::V4)
21307 .Case("{v5}", RISCV::V5)
21308 .Case("{v6}", RISCV::V6)
21309 .Case("{v7}", RISCV::V7)
21310 .Case("{v8}", RISCV::V8)
21311 .Case("{v9}", RISCV::V9)
21312 .Case("{v10}", RISCV::V10)
21313 .Case("{v11}", RISCV::V11)
21314 .Case("{v12}", RISCV::V12)
21315 .Case("{v13}", RISCV::V13)
21316 .Case("{v14}", RISCV::V14)
21317 .Case("{v15}", RISCV::V15)
21318 .Case("{v16}", RISCV::V16)
21319 .Case("{v17}", RISCV::V17)
21320 .Case("{v18}", RISCV::V18)
21321 .Case("{v19}", RISCV::V19)
21322 .Case("{v20}", RISCV::V20)
21323 .Case("{v21}", RISCV::V21)
21324 .Case("{v22}", RISCV::V22)
21325 .Case("{v23}", RISCV::V23)
21326 .Case("{v24}", RISCV::V24)
21327 .Case("{v25}", RISCV::V25)
21328 .Case("{v26}", RISCV::V26)
21329 .Case("{v27}", RISCV::V27)
21330 .Case("{v28}", RISCV::V28)
21331 .Case("{v29}", RISCV::V29)
21332 .Case("{v30}", RISCV::V30)
21333 .Case("{v31}", RISCV::V31)
21334 .Default(RISCV::NoRegister);
21335 if (VReg != RISCV::NoRegister) {
21336 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
21337 return std::make_pair(VReg, &RISCV::VMRegClass);
21338 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
21339 return std::make_pair(VReg, &RISCV::VRRegClass);
21340 for (const auto *RC :
21341 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
21342 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
21343 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
21344 return std::make_pair(VReg, RC);
21345 }
21346 }
21347 }
21348 }
21349
21350 std::pair<Register, const TargetRegisterClass *> Res =
21352
21353 // If we picked one of the Zfinx register classes, remap it to the GPR class.
21354 // FIXME: When Zfinx is supported in CodeGen this will need to take the
21355 // Subtarget into account.
21356 if (Res.second == &RISCV::GPRF16RegClass ||
21357 Res.second == &RISCV::GPRF32RegClass ||
21358 Res.second == &RISCV::GPRPairRegClass)
21359 return std::make_pair(Res.first, &RISCV::GPRRegClass);
21360
21361 return Res;
21362}
21363
21366 // Currently only support length 1 constraints.
21367 if (ConstraintCode.size() == 1) {
21368 switch (ConstraintCode[0]) {
21369 case 'A':
21371 default:
21372 break;
21373 }
21374 }
21375
21376 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
21377}
21378
21380 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
21381 SelectionDAG &DAG) const {
21382 // Currently only support length 1 constraints.
21383 if (Constraint.size() == 1) {
21384 switch (Constraint[0]) {
21385 case 'I':
21386 // Validate & create a 12-bit signed immediate operand.
21387 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21388 uint64_t CVal = C->getSExtValue();
21389 if (isInt<12>(CVal))
21390 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
21391 Subtarget.getXLenVT()));
21392 }
21393 return;
21394 case 'J':
21395 // Validate & create an integer zero operand.
21396 if (isNullConstant(Op))
21397 Ops.push_back(
21398 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
21399 return;
21400 case 'K':
21401 // Validate & create a 5-bit unsigned immediate operand.
21402 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
21403 uint64_t CVal = C->getZExtValue();
21404 if (isUInt<5>(CVal))
21405 Ops.push_back(
21406 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
21407 }
21408 return;
21409 case 'S':
21411 return;
21412 default:
21413 break;
21414 }
21415 }
21416 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
21417}
21418
21420 Instruction *Inst,
21421 AtomicOrdering Ord) const {
21422 if (Subtarget.hasStdExtZtso()) {
21423 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21424 return Builder.CreateFence(Ord);
21425 return nullptr;
21426 }
21427
21428 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21429 return Builder.CreateFence(Ord);
21430 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
21431 return Builder.CreateFence(AtomicOrdering::Release);
21432 return nullptr;
21433}
21434
21436 Instruction *Inst,
21437 AtomicOrdering Ord) const {
21438 if (Subtarget.hasStdExtZtso()) {
21439 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
21440 return Builder.CreateFence(Ord);
21441 return nullptr;
21442 }
21443
21444 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
21445 return Builder.CreateFence(AtomicOrdering::Acquire);
21446 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
21449 return nullptr;
21450}
21451
21454 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
21455 // point operations can't be used in an lr/sc sequence without breaking the
21456 // forward-progress guarantee.
21457 if (AI->isFloatingPointOperation() ||
21463
21464 // Don't expand forced atomics, we want to have __sync libcalls instead.
21465 if (Subtarget.hasForcedAtomics())
21467
21468 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21469 if (AI->getOperation() == AtomicRMWInst::Nand) {
21470 if (Subtarget.hasStdExtZacas() &&
21471 (Size >= 32 || Subtarget.hasStdExtZabha()))
21473 if (Size < 32)
21475 }
21476
21477 if (Size < 32 && !Subtarget.hasStdExtZabha())
21479
21481}
21482
21483static Intrinsic::ID
21485 if (XLen == 32) {
21486 switch (BinOp) {
21487 default:
21488 llvm_unreachable("Unexpected AtomicRMW BinOp");
21490 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
21491 case AtomicRMWInst::Add:
21492 return Intrinsic::riscv_masked_atomicrmw_add_i32;
21493 case AtomicRMWInst::Sub:
21494 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
21496 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
21497 case AtomicRMWInst::Max:
21498 return Intrinsic::riscv_masked_atomicrmw_max_i32;
21499 case AtomicRMWInst::Min:
21500 return Intrinsic::riscv_masked_atomicrmw_min_i32;
21502 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
21504 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
21505 }
21506 }
21507
21508 if (XLen == 64) {
21509 switch (BinOp) {
21510 default:
21511 llvm_unreachable("Unexpected AtomicRMW BinOp");
21513 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
21514 case AtomicRMWInst::Add:
21515 return Intrinsic::riscv_masked_atomicrmw_add_i64;
21516 case AtomicRMWInst::Sub:
21517 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
21519 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
21520 case AtomicRMWInst::Max:
21521 return Intrinsic::riscv_masked_atomicrmw_max_i64;
21522 case AtomicRMWInst::Min:
21523 return Intrinsic::riscv_masked_atomicrmw_min_i64;
21525 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
21527 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
21528 }
21529 }
21530
21531 llvm_unreachable("Unexpected XLen\n");
21532}
21533
21535 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
21536 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
21537 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
21538 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
21539 // mask, as this produces better code than the LR/SC loop emitted by
21540 // int_riscv_masked_atomicrmw_xchg.
21541 if (AI->getOperation() == AtomicRMWInst::Xchg &&
21542 isa<ConstantInt>(AI->getValOperand())) {
21543 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
21544 if (CVal->isZero())
21545 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
21546 Builder.CreateNot(Mask, "Inv_Mask"),
21547 AI->getAlign(), Ord);
21548 if (CVal->isMinusOne())
21549 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
21550 AI->getAlign(), Ord);
21551 }
21552
21553 unsigned XLen = Subtarget.getXLen();
21554 Value *Ordering =
21555 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
21556 Type *Tys[] = {AlignedAddr->getType()};
21558 AI->getModule(),
21560
21561 if (XLen == 64) {
21562 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
21563 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21564 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
21565 }
21566
21567 Value *Result;
21568
21569 // Must pass the shift amount needed to sign extend the loaded value prior
21570 // to performing a signed comparison for min/max. ShiftAmt is the number of
21571 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
21572 // is the number of bits to left+right shift the value in order to
21573 // sign-extend.
21574 if (AI->getOperation() == AtomicRMWInst::Min ||
21576 const DataLayout &DL = AI->getDataLayout();
21577 unsigned ValWidth =
21578 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
21579 Value *SextShamt =
21580 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
21581 Result = Builder.CreateCall(LrwOpScwLoop,
21582 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
21583 } else {
21584 Result =
21585 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
21586 }
21587
21588 if (XLen == 64)
21589 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21590 return Result;
21591}
21592
21595 AtomicCmpXchgInst *CI) const {
21596 // Don't expand forced atomics, we want to have __sync libcalls instead.
21597 if (Subtarget.hasForcedAtomics())
21599
21601 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
21602 (Size == 8 || Size == 16))
21605}
21606
21608 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
21609 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
21610 unsigned XLen = Subtarget.getXLen();
21611 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
21612 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
21613 if (XLen == 64) {
21614 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
21615 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
21616 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
21617 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
21618 }
21619 Type *Tys[] = {AlignedAddr->getType()};
21620 Value *Result = Builder.CreateIntrinsic(
21621 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
21622 if (XLen == 64)
21623 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
21624 return Result;
21625}
21626
21628 EVT DataVT) const {
21629 // We have indexed loads for all supported EEW types. Indices are always
21630 // zero extended.
21631 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
21632 isTypeLegal(Extend.getValueType()) &&
21633 isTypeLegal(Extend.getOperand(0).getValueType()) &&
21634 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
21635}
21636
21638 EVT VT) const {
21639 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
21640 return false;
21641
21642 switch (FPVT.getSimpleVT().SimpleTy) {
21643 case MVT::f16:
21644 return Subtarget.hasStdExtZfhmin();
21645 case MVT::f32:
21646 return Subtarget.hasStdExtF();
21647 case MVT::f64:
21648 return Subtarget.hasStdExtD();
21649 default:
21650 return false;
21651 }
21652}
21653
21655 // If we are using the small code model, we can reduce size of jump table
21656 // entry to 4 bytes.
21657 if (Subtarget.is64Bit() && !isPositionIndependent() &&
21660 }
21662}
21663
21665 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
21666 unsigned uid, MCContext &Ctx) const {
21667 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
21669 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
21670}
21671
21673 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
21674 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
21675 // a power of two as well.
21676 // FIXME: This doesn't work for zve32, but that's already broken
21677 // elsewhere for the same reason.
21678 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
21679 static_assert(RISCV::RVVBitsPerBlock == 64,
21680 "RVVBitsPerBlock changed, audit needed");
21681 return true;
21682}
21683
21685 SDValue &Offset,
21687 SelectionDAG &DAG) const {
21688 // Target does not support indexed loads.
21689 if (!Subtarget.hasVendorXTHeadMemIdx())
21690 return false;
21691
21692 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
21693 return false;
21694
21695 Base = Op->getOperand(0);
21696 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
21697 int64_t RHSC = RHS->getSExtValue();
21698 if (Op->getOpcode() == ISD::SUB)
21699 RHSC = -(uint64_t)RHSC;
21700
21701 // The constants that can be encoded in the THeadMemIdx instructions
21702 // are of the form (sign_extend(imm5) << imm2).
21703 bool isLegalIndexedOffset = false;
21704 for (unsigned i = 0; i < 4; i++)
21705 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
21706 isLegalIndexedOffset = true;
21707 break;
21708 }
21709
21710 if (!isLegalIndexedOffset)
21711 return false;
21712
21713 Offset = Op->getOperand(1);
21714 return true;
21715 }
21716
21717 return false;
21718}
21719
21721 SDValue &Offset,
21723 SelectionDAG &DAG) const {
21724 EVT VT;
21725 SDValue Ptr;
21726 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21727 VT = LD->getMemoryVT();
21728 Ptr = LD->getBasePtr();
21729 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21730 VT = ST->getMemoryVT();
21731 Ptr = ST->getBasePtr();
21732 } else
21733 return false;
21734
21735 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
21736 return false;
21737
21738 AM = ISD::PRE_INC;
21739 return true;
21740}
21741
21743 SDValue &Base,
21744 SDValue &Offset,
21746 SelectionDAG &DAG) const {
21747 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
21748 if (Op->getOpcode() != ISD::ADD)
21749 return false;
21750
21751 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))
21752 Base = LS->getBasePtr();
21753 else
21754 return false;
21755
21756 if (Base == Op->getOperand(0))
21757 Offset = Op->getOperand(1);
21758 else if (Base == Op->getOperand(1))
21759 Offset = Op->getOperand(0);
21760 else
21761 return false;
21762
21763 AM = ISD::POST_INC;
21764 return true;
21765 }
21766
21767 EVT VT;
21768 SDValue Ptr;
21769 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
21770 VT = LD->getMemoryVT();
21771 Ptr = LD->getBasePtr();
21772 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
21773 VT = ST->getMemoryVT();
21774 Ptr = ST->getBasePtr();
21775 } else
21776 return false;
21777
21778 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
21779 return false;
21780 // Post-indexing updates the base, so it's not a valid transform
21781 // if that's not the same as the load's pointer.
21782 if (Ptr != Base)
21783 return false;
21784
21785 AM = ISD::POST_INC;
21786 return true;
21787}
21788
21790 EVT VT) const {
21791 EVT SVT = VT.getScalarType();
21792
21793 if (!SVT.isSimple())
21794 return false;
21795
21796 switch (SVT.getSimpleVT().SimpleTy) {
21797 case MVT::f16:
21798 return VT.isVector() ? Subtarget.hasVInstructionsF16()
21799 : Subtarget.hasStdExtZfhOrZhinx();
21800 case MVT::f32:
21801 return Subtarget.hasStdExtFOrZfinx();
21802 case MVT::f64:
21803 return Subtarget.hasStdExtDOrZdinx();
21804 default:
21805 break;
21806 }
21807
21808 return false;
21809}
21810
21812 // Zacas will use amocas.w which does not require extension.
21813 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
21814}
21815
21817 const Constant *PersonalityFn) const {
21818 return RISCV::X10;
21819}
21820
21822 const Constant *PersonalityFn) const {
21823 return RISCV::X11;
21824}
21825
21827 // Return false to suppress the unnecessary extensions if the LibCall
21828 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
21829 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21830 Type.getSizeInBits() < Subtarget.getXLen()))
21831 return false;
21832
21833 return true;
21834}
21835
21837 bool IsSigned) const {
21838 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
21839 return true;
21840
21841 return IsSigned;
21842}
21843
21845 SDValue C) const {
21846 // Check integral scalar types.
21847 if (!VT.isScalarInteger())
21848 return false;
21849
21850 // Omit the optimization if the sub target has the M extension and the data
21851 // size exceeds XLen.
21852 const bool HasZmmul = Subtarget.hasStdExtZmmul();
21853 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
21854 return false;
21855
21856 auto *ConstNode = cast<ConstantSDNode>(C);
21857 const APInt &Imm = ConstNode->getAPIntValue();
21858
21859 // Break the MUL to a SLLI and an ADD/SUB.
21860 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21861 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21862 return true;
21863
21864 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
21865 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21866 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21867 (Imm - 8).isPowerOf2()))
21868 return true;
21869
21870 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
21871 // a pair of LUI/ADDI.
21872 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21873 ConstNode->hasOneUse()) {
21874 APInt ImmS = Imm.ashr(Imm.countr_zero());
21875 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21876 (1 - ImmS).isPowerOf2())
21877 return true;
21878 }
21879
21880 return false;
21881}
21882
21884 SDValue ConstNode) const {
21885 // Let the DAGCombiner decide for vectors.
21886 EVT VT = AddNode.getValueType();
21887 if (VT.isVector())
21888 return true;
21889
21890 // Let the DAGCombiner decide for larger types.
21891 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21892 return true;
21893
21894 // It is worse if c1 is simm12 while c1*c2 is not.
21895 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21896 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21897 const APInt &C1 = C1Node->getAPIntValue();
21898 const APInt &C2 = C2Node->getAPIntValue();
21899 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21900 return false;
21901
21902 // Default to true and let the DAGCombiner decide.
21903 return true;
21904}
21905
21907 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21908 unsigned *Fast) const {
21909 if (!VT.isVector()) {
21910 if (Fast)
21911 *Fast = Subtarget.enableUnalignedScalarMem();
21912 return Subtarget.enableUnalignedScalarMem();
21913 }
21914
21915 // All vector implementations must support element alignment
21916 EVT ElemVT = VT.getVectorElementType();
21917 if (Alignment >= ElemVT.getStoreSize()) {
21918 if (Fast)
21919 *Fast = 1;
21920 return true;
21921 }
21922
21923 // Note: We lower an unmasked unaligned vector access to an equally sized
21924 // e8 element type access. Given this, we effectively support all unmasked
21925 // misaligned accesses. TODO: Work through the codegen implications of
21926 // allowing such accesses to be formed, and considered fast.
21927 if (Fast)
21928 *Fast = Subtarget.enableUnalignedVectorMem();
21929 return Subtarget.enableUnalignedVectorMem();
21930}
21931
21932
21934 const AttributeList &FuncAttributes) const {
21935 if (!Subtarget.hasVInstructions())
21936 return MVT::Other;
21937
21938 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21939 return MVT::Other;
21940
21941 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21942 // has an expansion threshold, and we want the number of hardware memory
21943 // operations to correspond roughly to that threshold. LMUL>1 operations
21944 // are typically expanded linearly internally, and thus correspond to more
21945 // than one actual memory operation. Note that store merging and load
21946 // combining will typically form larger LMUL operations from the LMUL1
21947 // operations emitted here, and that's okay because combining isn't
21948 // introducing new memory operations; it's just merging existing ones.
21949 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21950 if (Op.size() < MinVLenInBytes)
21951 // TODO: Figure out short memops. For the moment, do the default thing
21952 // which ends up using scalar sequences.
21953 return MVT::Other;
21954
21955 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
21956 // fixed vectors.
21957 if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
21958 return MVT::Other;
21959
21960 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21961 // a large scalar constant and instead use vmv.v.x/i to do the
21962 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21963 // maximize the chance we can encode the size in the vsetvli.
21964 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21965 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21966
21967 // Do we have sufficient alignment for our preferred VT? If not, revert
21968 // to largest size allowed by our alignment criteria.
21969 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21970 Align RequiredAlign(PreferredVT.getStoreSize());
21971 if (Op.isFixedDstAlign())
21972 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21973 if (Op.isMemcpy())
21974 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21975 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21976 }
21977 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21978}
21979
21981 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21982 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21983 bool IsABIRegCopy = CC.has_value();
21984 EVT ValueVT = Val.getValueType();
21985
21986 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
21987 if ((ValueVT == PairVT ||
21988 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
21989 ValueVT == MVT::f64)) &&
21990 NumParts == 1 && PartVT == MVT::Untyped) {
21991 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
21992 MVT XLenVT = Subtarget.getXLenVT();
21993 if (ValueVT == MVT::f64)
21994 Val = DAG.getBitcast(MVT::i64, Val);
21995 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
21996 // Always creating an MVT::Untyped part, so always use
21997 // RISCVISD::BuildGPRPair.
21998 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
21999 return true;
22000 }
22001
22002 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22003 PartVT == MVT::f32) {
22004 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
22005 // nan, and cast to f32.
22006 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
22007 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
22008 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
22009 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
22010 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
22011 Parts[0] = Val;
22012 return true;
22013 }
22014
22015 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
22016#ifndef NDEBUG
22017 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
22018 [[maybe_unused]] unsigned ValLMUL =
22020 ValNF * RISCV::RVVBitsPerBlock);
22021 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
22022 [[maybe_unused]] unsigned PartLMUL =
22024 PartNF * RISCV::RVVBitsPerBlock);
22025 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
22026 "RISC-V vector tuple type only accepts same register class type "
22027 "TUPLE_INSERT");
22028#endif
22029
22030 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
22031 Val, DAG.getVectorIdxConstant(0, DL));
22032 Parts[0] = Val;
22033 return true;
22034 }
22035
22036 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
22037 LLVMContext &Context = *DAG.getContext();
22038 EVT ValueEltVT = ValueVT.getVectorElementType();
22039 EVT PartEltVT = PartVT.getVectorElementType();
22040 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
22041 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
22042 if (PartVTBitSize % ValueVTBitSize == 0) {
22043 assert(PartVTBitSize >= ValueVTBitSize);
22044 // If the element types are different, bitcast to the same element type of
22045 // PartVT first.
22046 // Give an example here, we want copy a <vscale x 1 x i8> value to
22047 // <vscale x 4 x i16>.
22048 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
22049 // subvector, then we can bitcast to <vscale x 4 x i16>.
22050 if (ValueEltVT != PartEltVT) {
22051 if (PartVTBitSize > ValueVTBitSize) {
22052 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
22053 assert(Count != 0 && "The number of element should not be zero.");
22054 EVT SameEltTypeVT =
22055 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
22056 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
22057 DAG.getUNDEF(SameEltTypeVT), Val,
22058 DAG.getVectorIdxConstant(0, DL));
22059 }
22060 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
22061 } else {
22062 Val =
22063 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
22064 Val, DAG.getVectorIdxConstant(0, DL));
22065 }
22066 Parts[0] = Val;
22067 return true;
22068 }
22069 }
22070
22071 return false;
22072}
22073
22075 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
22076 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
22077 bool IsABIRegCopy = CC.has_value();
22078
22079 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
22080 if ((ValueVT == PairVT ||
22081 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
22082 ValueVT == MVT::f64)) &&
22083 NumParts == 1 && PartVT == MVT::Untyped) {
22084 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
22085 MVT XLenVT = Subtarget.getXLenVT();
22086
22087 SDValue Val = Parts[0];
22088 // Always starting with an MVT::Untyped part, so always use
22089 // RISCVISD::SplitGPRPair
22090 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
22091 Val);
22092 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
22093 Val.getValue(1));
22094 if (ValueVT == MVT::f64)
22095 Val = DAG.getBitcast(ValueVT, Val);
22096 return Val;
22097 }
22098
22099 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
22100 PartVT == MVT::f32) {
22101 SDValue Val = Parts[0];
22102
22103 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
22104 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
22105 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
22106 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
22107 return Val;
22108 }
22109
22110 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
22111 LLVMContext &Context = *DAG.getContext();
22112 SDValue Val = Parts[0];
22113 EVT ValueEltVT = ValueVT.getVectorElementType();
22114 EVT PartEltVT = PartVT.getVectorElementType();
22115 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
22116 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
22117 if (PartVTBitSize % ValueVTBitSize == 0) {
22118 assert(PartVTBitSize >= ValueVTBitSize);
22119 EVT SameEltTypeVT = ValueVT;
22120 // If the element types are different, convert it to the same element type
22121 // of PartVT.
22122 // Give an example here, we want copy a <vscale x 1 x i8> value from
22123 // <vscale x 4 x i16>.
22124 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
22125 // then we can extract <vscale x 1 x i8>.
22126 if (ValueEltVT != PartEltVT) {
22127 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
22128 assert(Count != 0 && "The number of element should not be zero.");
22129 SameEltTypeVT =
22130 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
22131 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
22132 }
22133 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
22134 DAG.getVectorIdxConstant(0, DL));
22135 return Val;
22136 }
22137 }
22138 return SDValue();
22139}
22140
22142 // When aggressively optimizing for code size, we prefer to use a div
22143 // instruction, as it is usually smaller than the alternative sequence.
22144 // TODO: Add vector division?
22145 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
22146 return OptSize && !VT.isVector();
22147}
22148
22150 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
22151 // some situation.
22152 unsigned Opc = N->getOpcode();
22153 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
22154 return false;
22155 return true;
22156}
22157
22158static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
22159 Module *M = IRB.GetInsertBlock()->getModule();
22160 Function *ThreadPointerFunc =
22161 Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer);
22162 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
22163 IRB.CreateCall(ThreadPointerFunc), Offset);
22164}
22165
22167 // Fuchsia provides a fixed TLS slot for the stack cookie.
22168 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
22169 if (Subtarget.isTargetFuchsia())
22170 return useTpOffset(IRB, -0x10);
22171
22172 // Android provides a fixed TLS slot for the stack cookie. See the definition
22173 // of TLS_SLOT_STACK_GUARD in
22174 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
22175 if (Subtarget.isTargetAndroid())
22176 return useTpOffset(IRB, -0x18);
22177
22178 Module *M = IRB.GetInsertBlock()->getModule();
22179
22180 if (M->getStackProtectorGuard() == "tls") {
22181 // Users must specify the offset explicitly
22182 int Offset = M->getStackProtectorGuardOffset();
22183 return useTpOffset(IRB, Offset);
22184 }
22185
22187}
22188
22190 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
22191 const DataLayout &DL) const {
22192 EVT VT = getValueType(DL, VTy);
22193 // Don't lower vlseg/vsseg for vector types that can't be split.
22194 if (!isTypeLegal(VT))
22195 return false;
22196
22198 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
22199 Alignment))
22200 return false;
22201
22202 MVT ContainerVT = VT.getSimpleVT();
22203
22204 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
22205 if (!Subtarget.useRVVForFixedLengthVectors())
22206 return false;
22207 // Sometimes the interleaved access pass picks up splats as interleaves of
22208 // one element. Don't lower these.
22209 if (FVTy->getNumElements() < 2)
22210 return false;
22211
22213 } else {
22214 // The intrinsics for scalable vectors are not overloaded on pointer type
22215 // and can only handle the default address space.
22216 if (AddrSpace)
22217 return false;
22218 }
22219
22220 // Need to make sure that EMUL * NFIELDS ≤ 8
22221 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
22222 if (Fractional)
22223 return true;
22224 return Factor * LMUL <= 8;
22225}
22226
22228 Align Alignment) const {
22229 if (!Subtarget.hasVInstructions())
22230 return false;
22231
22232 // Only support fixed vectors if we know the minimum vector size.
22233 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
22234 return false;
22235
22236 EVT ScalarType = DataType.getScalarType();
22237 if (!isLegalElementTypeForRVV(ScalarType))
22238 return false;
22239
22240 if (!Subtarget.enableUnalignedVectorMem() &&
22241 Alignment < ScalarType.getStoreSize())
22242 return false;
22243
22244 return true;
22245}
22246
22248 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
22249 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
22250 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
22251 Intrinsic::riscv_seg8_load};
22252
22253/// Lower an interleaved load into a vlsegN intrinsic.
22254///
22255/// E.g. Lower an interleaved load (Factor = 2):
22256/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
22257/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
22258/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
22259///
22260/// Into:
22261/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
22262/// %ptr, i64 4)
22263/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
22264/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
22267 ArrayRef<unsigned> Indices, unsigned Factor) const {
22268 assert(Indices.size() == Shuffles.size());
22269
22270 IRBuilder<> Builder(LI);
22271
22272 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
22273 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
22275 LI->getDataLayout()))
22276 return false;
22277
22278 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22279
22280 // If the segment load is going to be performed segment at a time anyways
22281 // and there's only one element used, use a strided load instead. This
22282 // will be equally fast, and create less vector register pressure.
22283 if (Indices.size() == 1 && !Subtarget.hasOptimizedSegmentLoadStore(Factor)) {
22284 unsigned ScalarSizeInBytes = VTy->getScalarSizeInBits() / 8;
22285 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22286 Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
22287 Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
22288 Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
22289 Value *VL = Builder.getInt32(VTy->getNumElements());
22290
22291 CallInst *CI =
22292 Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
22293 {VTy, BasePtr->getType(), Stride->getType()},
22294 {BasePtr, Stride, Mask, VL});
22295 CI->addParamAttr(
22297 Shuffles[0]->replaceAllUsesWith(CI);
22298 return true;
22299 };
22300
22301 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22302
22303 CallInst *VlsegN = Builder.CreateIntrinsic(
22304 FixedVlsegIntrIds[Factor - 2], {VTy, LI->getPointerOperandType(), XLenTy},
22305 {LI->getPointerOperand(), VL});
22306
22307 for (unsigned i = 0; i < Shuffles.size(); i++) {
22308 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
22309 Shuffles[i]->replaceAllUsesWith(SubVec);
22310 }
22311
22312 return true;
22313}
22314
22316 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
22317 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
22318 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
22319 Intrinsic::riscv_seg8_store};
22320
22321/// Lower an interleaved store into a vssegN intrinsic.
22322///
22323/// E.g. Lower an interleaved store (Factor = 3):
22324/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
22325/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
22326/// store <12 x i32> %i.vec, <12 x i32>* %ptr
22327///
22328/// Into:
22329/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
22330/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
22331/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
22332/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
22333/// %ptr, i32 4)
22334///
22335/// Note that the new shufflevectors will be removed and we'll only generate one
22336/// vsseg3 instruction in CodeGen.
22338 ShuffleVectorInst *SVI,
22339 unsigned Factor) const {
22340 IRBuilder<> Builder(SI);
22341 auto Mask = SVI->getShuffleMask();
22342 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
22343 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
22344 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
22345 ShuffleVTy->getNumElements() / Factor);
22346 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
22347 SI->getPointerAddressSpace(),
22348 SI->getDataLayout()))
22349 return false;
22350
22351 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22352
22353 unsigned Index;
22354 // If the segment store only has one active lane (i.e. the interleave is
22355 // just a spread shuffle), we can use a strided store instead. This will
22356 // be equally fast, and create less vector register pressure.
22357 if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
22358 isSpreadMask(Mask, Factor, Index)) {
22359 unsigned ScalarSizeInBytes = ShuffleVTy->getScalarSizeInBits() / 8;
22360 Value *Data = SVI->getOperand(0);
22361 auto *DataVTy = cast<FixedVectorType>(Data->getType());
22362 Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
22363 Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
22364 Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
22365 Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
22366 Value *VL = Builder.getInt32(VTy->getNumElements());
22367
22368 CallInst *CI = Builder.CreateIntrinsic(
22369 Intrinsic::experimental_vp_strided_store,
22370 {Data->getType(), BasePtr->getType(), Stride->getType()},
22371 {Data, BasePtr, Stride, Mask, VL});
22372 CI->addParamAttr(
22373 1, Attribute::getWithAlignment(CI->getContext(), SI->getAlign()));
22374
22375 return true;
22376 }
22377
22379 SI->getModule(), FixedVssegIntrIds[Factor - 2],
22380 {VTy, SI->getPointerOperandType(), XLenTy});
22381
22383
22384 for (unsigned i = 0; i < Factor; i++) {
22385 Value *Shuffle = Builder.CreateShuffleVector(
22386 SVI->getOperand(0), SVI->getOperand(1),
22387 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
22388 Ops.push_back(Shuffle);
22389 }
22390 // This VL should be OK (should be executable in one vsseg instruction,
22391 // potentially under larger LMULs) because we checked that the fixed vector
22392 // type fits in isLegalInterleavedAccessType
22393 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
22394 Ops.append({SI->getPointerOperand(), VL});
22395
22396 Builder.CreateCall(VssegNFunc, Ops);
22397
22398 return true;
22399}
22400
22402 IntrinsicInst *DI, LoadInst *LI,
22403 SmallVectorImpl<Instruction *> &DeadInsts) const {
22404 assert(LI->isSimple());
22405 IRBuilder<> Builder(LI);
22406
22407 // Only deinterleave2 supported at present.
22408 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
22409 return false;
22410
22411 const unsigned Factor = 2;
22412
22413 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
22414 const DataLayout &DL = LI->getDataLayout();
22415
22416 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
22417 LI->getPointerAddressSpace(), DL))
22418 return false;
22419
22420 Value *Return;
22421 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
22422
22423 if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
22424 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22425 Return =
22426 Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
22427 {ResVTy, LI->getPointerOperandType(), XLenTy},
22428 {LI->getPointerOperand(), VL});
22429 } else {
22430 static const Intrinsic::ID IntrIds[] = {
22431 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
22432 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
22433 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
22434 Intrinsic::riscv_vlseg8};
22435
22436 unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType());
22437 unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
22438 Type *VecTupTy = TargetExtType::get(
22439 LI->getContext(), "riscv.vector.tuple",
22441 NumElts * SEW / 8),
22442 Factor);
22443
22444 Value *VL = Constant::getAllOnesValue(XLenTy);
22445
22446 Value *Vlseg = Builder.CreateIntrinsic(
22447 IntrIds[Factor - 2], {VecTupTy, XLenTy},
22448 {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,
22449 ConstantInt::get(XLenTy, Log2_64(SEW))});
22450
22451 SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};
22452 Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));
22453 for (unsigned i = 0; i < Factor; ++i) {
22454 Value *VecExtract = Builder.CreateIntrinsic(
22455 Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},
22456 {Vlseg, Builder.getInt32(i)});
22457 Return = Builder.CreateInsertValue(Return, VecExtract, i);
22458 }
22459 }
22460
22461 DI->replaceAllUsesWith(Return);
22462
22463 return true;
22464}
22465
22468 SmallVectorImpl<Instruction *> &DeadInsts) const {
22469 assert(SI->isSimple());
22470 IRBuilder<> Builder(SI);
22471
22472 // Only interleave2 supported at present.
22473 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
22474 return false;
22475
22476 const unsigned Factor = 2;
22477
22478 VectorType *InVTy = cast<VectorType>(II->getArgOperand(0)->getType());
22479 const DataLayout &DL = SI->getDataLayout();
22480
22481 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
22482 SI->getPointerAddressSpace(), DL))
22483 return false;
22484
22485 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
22486
22487 if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
22488 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
22489 Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2],
22490 {InVTy, SI->getPointerOperandType(), XLenTy},
22491 {II->getArgOperand(0), II->getArgOperand(1),
22492 SI->getPointerOperand(), VL});
22493 } else {
22494 static const Intrinsic::ID IntrIds[] = {
22495 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
22496 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
22497 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
22498 Intrinsic::riscv_vsseg8};
22499
22500 unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
22501 unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
22502 Type *VecTupTy = TargetExtType::get(
22503 SI->getContext(), "riscv.vector.tuple",
22504 ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
22505 NumElts * SEW / 8),
22506 Factor);
22507
22509 SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
22510
22511 Value *VL = Constant::getAllOnesValue(XLenTy);
22512
22513 Value *StoredVal = PoisonValue::get(VecTupTy);
22514 for (unsigned i = 0; i < Factor; ++i)
22515 StoredVal = Builder.CreateIntrinsic(
22516 Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
22517 {StoredVal, II->getArgOperand(i), Builder.getInt32(i)});
22518
22519 Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
22520 ConstantInt::get(XLenTy, Log2_64(SEW))});
22521 }
22522
22523 return true;
22524}
22525
22529 const TargetInstrInfo *TII) const {
22530 assert(MBBI->isCall() && MBBI->getCFIType() &&
22531 "Invalid call instruction for a KCFI check");
22532 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
22533 MBBI->getOpcode()));
22534
22535 MachineOperand &Target = MBBI->getOperand(0);
22536 Target.setIsRenamable(false);
22537
22538 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
22539 .addReg(Target.getReg())
22540 .addImm(MBBI->getCFIType())
22541 .getInstr();
22542}
22543
22544#define GET_REGISTER_MATCHER
22545#include "RISCVGenAsmMatcher.inc"
22546
22549 const MachineFunction &MF) const {
22551 if (Reg == RISCV::NoRegister)
22553 if (Reg == RISCV::NoRegister)
22555 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
22556 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
22557 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
22558 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
22559 StringRef(RegName) + "\"."));
22560 return Reg;
22561}
22562
22565 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
22566
22567 if (NontemporalInfo == nullptr)
22569
22570 // 1 for default value work as __RISCV_NTLH_ALL
22571 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
22572 // 3 -> __RISCV_NTLH_ALL_PRIVATE
22573 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
22574 // 5 -> __RISCV_NTLH_ALL
22575 int NontemporalLevel = 5;
22576 const MDNode *RISCVNontemporalInfo =
22577 I.getMetadata("riscv-nontemporal-domain");
22578 if (RISCVNontemporalInfo != nullptr)
22579 NontemporalLevel =
22580 cast<ConstantInt>(
22581 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
22582 ->getValue())
22583 ->getZExtValue();
22584
22585 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
22586 "RISC-V target doesn't support this non-temporal domain.");
22587
22588 NontemporalLevel -= 2;
22590 if (NontemporalLevel & 0b1)
22591 Flags |= MONontemporalBit0;
22592 if (NontemporalLevel & 0b10)
22593 Flags |= MONontemporalBit1;
22594
22595 return Flags;
22596}
22597
22600
22601 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
22603 TargetFlags |= (NodeFlags & MONontemporalBit0);
22604 TargetFlags |= (NodeFlags & MONontemporalBit1);
22605 return TargetFlags;
22606}
22607
22609 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
22610 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
22611}
22612
22614 if (VT.isScalableVector())
22615 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
22616 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
22617 return true;
22618 return Subtarget.hasStdExtZbb() &&
22619 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
22620}
22621
22623 ISD::CondCode Cond) const {
22624 return isCtpopFast(VT) ? 0 : 1;
22625}
22626
22628 const Instruction *I) const {
22629 if (Subtarget.hasStdExtZalasr()) {
22630 if (Subtarget.hasStdExtZtso()) {
22631 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
22632 // should be lowered to plain load/store. The easiest way to do this is
22633 // to say we should insert fences for them, and the fence insertion code
22634 // will just not insert any fences
22635 auto *LI = dyn_cast<LoadInst>(I);
22636 auto *SI = dyn_cast<StoreInst>(I);
22637 if ((LI &&
22638 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
22639 (SI &&
22640 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
22641 // Here, this is a load or store which is seq_cst, and needs a .aq or
22642 // .rl therefore we shouldn't try to insert fences
22643 return false;
22644 }
22645 // Here, we are a TSO inst that isn't a seq_cst load/store
22646 return isa<LoadInst>(I) || isa<StoreInst>(I);
22647 }
22648 return false;
22649 }
22650 // Note that one specific case requires fence insertion for an
22651 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
22652 // than this hook due to limitations in the interface here.
22653 return isa<LoadInst>(I) || isa<StoreInst>(I);
22654}
22655
22657
22658 // GISel support is in progress or complete for these opcodes.
22659 unsigned Op = Inst.getOpcode();
22660 if (Op == Instruction::Add || Op == Instruction::Sub ||
22661 Op == Instruction::And || Op == Instruction::Or ||
22662 Op == Instruction::Xor || Op == Instruction::InsertElement ||
22663 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
22664 Op == Instruction::Freeze || Op == Instruction::Store)
22665 return false;
22666
22667 if (Inst.getType()->isScalableTy())
22668 return true;
22669
22670 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
22671 if (Inst.getOperand(i)->getType()->isScalableTy() &&
22672 !isa<ReturnInst>(&Inst))
22673 return true;
22674
22675 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
22676 if (AI->getAllocatedType()->isScalableTy())
22677 return true;
22678 }
22679
22680 return false;
22681}
22682
22683SDValue
22684RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
22685 SelectionDAG &DAG,
22686 SmallVectorImpl<SDNode *> &Created) const {
22688 if (isIntDivCheap(N->getValueType(0), Attr))
22689 return SDValue(N, 0); // Lower SDIV as SDIV
22690
22691 // Only perform this transform if short forward branch opt is supported.
22692 if (!Subtarget.hasShortForwardBranchOpt())
22693 return SDValue();
22694 EVT VT = N->getValueType(0);
22695 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
22696 return SDValue();
22697
22698 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
22699 if (Divisor.sgt(2048) || Divisor.slt(-2048))
22700 return SDValue();
22701 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
22702}
22703
22704bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
22705 EVT VT, const APInt &AndMask) const {
22706 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
22707 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
22709}
22710
22711unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
22712 return Subtarget.getMinimumJumpTableEntries();
22713}
22714
22717 int JTI,
22718 SelectionDAG &DAG) const {
22719 if (Subtarget.hasStdExtZicfilp()) {
22720 // When Zicfilp enabled, we need to use software guarded branch for jump
22721 // table branch.
22722 SDValue Chain = Value;
22723 // Jump table debug info is only needed if CodeView is enabled.
22725 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
22726 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
22727 }
22728 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
22729}
22730
22731// If an output pattern produces multiple instructions tablegen may pick an
22732// arbitrary type from an instructions destination register class to use for the
22733// VT of that MachineSDNode. This VT may be used to look up the representative
22734// register class. If the type isn't legal, the default implementation will
22735// not find a register class.
22736//
22737// Some integer types smaller than XLen are listed in the GPR register class to
22738// support isel patterns for GISel, but are not legal in SelectionDAG. The
22739// arbitrary type tablegen picks may be one of these smaller types.
22740//
22741// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
22742// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
22743std::pair<const TargetRegisterClass *, uint8_t>
22744RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
22745 MVT VT) const {
22746 switch (VT.SimpleTy) {
22747 default:
22748 break;
22749 case MVT::i8:
22750 case MVT::i16:
22751 case MVT::i32:
22753 case MVT::bf16:
22754 case MVT::f16:
22756 }
22757
22759}
22760
22762
22763#define GET_RISCVVIntrinsicsTable_IMPL
22764#include "RISCVGenSearchableTables.inc"
22765
22766} // namespace llvm::RISCVVIntrinsicsTable
22767
22769
22770 // If the function specifically requests inline stack probes, emit them.
22771 if (MF.getFunction().hasFnAttribute("probe-stack"))
22772 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
22773 "inline-asm";
22774
22775 return false;
22776}
22777
22779 Align StackAlign) const {
22780 // The default stack probe size is 4096 if the function has no
22781 // stack-probe-size attribute.
22782 const Function &Fn = MF.getFunction();
22783 unsigned StackProbeSize =
22784 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
22785 // Round down to the stack alignment.
22786 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
22787 return StackProbeSize ? StackProbeSize : StackAlign.value();
22788}
22789
22790SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
22791 SelectionDAG &DAG) const {
22793 if (!hasInlineStackProbe(MF))
22794 return SDValue();
22795
22796 MVT XLenVT = Subtarget.getXLenVT();
22797 // Get the inputs.
22798 SDValue Chain = Op.getOperand(0);
22799 SDValue Size = Op.getOperand(1);
22800
22802 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
22803 SDLoc dl(Op);
22804 EVT VT = Op.getValueType();
22805
22806 // Construct the new SP value in a GPR.
22807 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
22808 Chain = SP.getValue(1);
22809 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
22810 if (Align)
22811 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
22812 DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT));
22813
22814 // Set the real SP to the new value with a probing loop.
22815 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
22816 return DAG.getMergeValues({SP, Chain}, dl);
22817}
22818
22821 MachineBasicBlock *MBB) const {
22822 MachineFunction &MF = *MBB->getParent();
22823 MachineBasicBlock::iterator MBBI = MI.getIterator();
22825 Register TargetReg = MI.getOperand(1).getReg();
22826
22827 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
22828 bool IsRV64 = Subtarget.is64Bit();
22829 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
22830 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
22831 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
22832
22833 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
22834 MachineBasicBlock *LoopTestMBB =
22836 MF.insert(MBBInsertPoint, LoopTestMBB);
22838 MF.insert(MBBInsertPoint, ExitMBB);
22839 Register SPReg = RISCV::X2;
22840 Register ScratchReg =
22841 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
22842
22843 // ScratchReg = ProbeSize
22844 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
22845
22846 // LoopTest:
22847 // SUB SP, SP, ProbeSize
22848 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
22849 .addReg(SPReg)
22850 .addReg(ScratchReg);
22851
22852 // s[d|w] zero, 0(sp)
22853 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
22854 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
22855 .addReg(RISCV::X0)
22856 .addReg(SPReg)
22857 .addImm(0);
22858
22859 // BLT TargetReg, SP, LoopTest
22860 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
22861 .addReg(TargetReg)
22862 .addReg(SPReg)
22863 .addMBB(LoopTestMBB);
22864
22865 // Adjust with: MV SP, TargetReg.
22866 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
22867 .addReg(TargetReg)
22868 .addImm(0);
22869
22870 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
22872
22873 LoopTestMBB->addSuccessor(ExitMBB);
22874 LoopTestMBB->addSuccessor(LoopTestMBB);
22875 MBB->addSuccessor(LoopTestMBB);
22876
22877 MI.eraseFromParent();
22878 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
22879 return ExitMBB->begin()->getParent();
22880}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr Register SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
Custom legalize <N x i128> or <N x i256> to <M x ELEN>.
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition: SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1334
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1326
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1111
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1618
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1397
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition: APInt.cpp:1710
bool isMask(unsigned numBits) const
Definition: APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1389
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:63
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
iterator begin() const
Definition: ArrayRef.h:156
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1494
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition: DenseMap.h:202
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
Type * getReturnType() const
Definition: DerivedTypes.h:126
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
Argument * getArg(unsigned i) const
Definition: Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:306
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:530
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1887
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2562
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1842
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1987
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
Value * getAllOnesMask(ElementCount NumElts)
Return an all true boolean vector (mask) with NumElts lanes.
Definition: IRBuilder.h:867
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:516
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2533
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1862
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Value * getPointerOperand()
Definition: Instructions.h:255
Type * getPointerOperandType() const
Definition: Instructions.h:258
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
MCContext & getContext() const
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1073
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1434
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:237
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:307
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:404
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
bool hasStdExtDOrZdinx() const
unsigned getMaxLoadsPerMemcmp(bool OptSize) const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
unsigned getMaxStoresPerMemset(bool OptSize) const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
unsigned getMaxStoresPerMemmove(bool OptSize) const
bool hasVInstructionsF16Minimal() const
unsigned getMaxGluedStoresPerMemcpy() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
const RISCVFrameLowering * getFrameLowering() const override
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
iterator_range< user_iterator > users()
op_iterator op_end() const
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:812
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:750
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:801
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:397
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:760
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:856
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:827
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
Definition: SelectionDAG.h:505
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:712
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:890
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:700
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:796
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:873
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:906
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:286
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
std::string lower() const
Definition: StringRef.cpp:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
static TargetExtType * get(LLVMContext &Context, StringRef Name, ArrayRef< Type * > Types={}, ArrayRef< unsigned > Ints={})
Return a target extension type having the specified name and optional type and integer parameters.
Definition: Type.cpp:895
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:752
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:258
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition: Type.h:203
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:384
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
self_iterator getIterator()
Definition: ilist_node.h:132
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1417
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1360
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1450
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1435
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1439
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1449
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1270
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:997
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1494
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:936
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1259
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1432
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1436
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ STRICT_LROUND
Definition: ISDOpcodes.h:446
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:601
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:661
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ STRICT_FP_TO_FP16
Definition: ISDOpcodes.h:967
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ STRICT_FP16_TO_FP
Definition: ISDOpcodes.h:966
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1407
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ STRICT_LRINT
Definition: ISDOpcodes.h:448
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:606
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1372
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1452
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:444
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:973
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:651
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:449
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:627
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:1004
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:669
@ STRICT_FLDEXP
Definition: ISDOpcodes.h:421
@ STRICT_LLROUND
Definition: ISDOpcodes.h:447
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1440
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:595
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition: ISDOpcodes.h:832
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1276
@ TRUNCATE_USAT_U
Definition: ISDOpcodes.h:836
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1572
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1572
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1559
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1610
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1655
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SplitF64
Turns a f64 into a pair of i32s.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
@ BuildPairF64
Turns a pair of i32s into an f64.
@ BuildGPRPair
Turn a pair of i<xlen>s into an even-odd register pair (untyped).
@ SplitGPRPair
Turn an even-odd register pair (untyped) into a pair of i<xlen>s.
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
uint32_t read32le(const void *P)
Definition: Endian.h:425
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
@ Offset
Definition: DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:557
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:298
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1547
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:348
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:275
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:405
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:315
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:397
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition: ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:414
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1049
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:266
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:153
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:288
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:1009
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:272
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:285
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)