LLVM 20.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
25#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
41 : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45}
46
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
54 SDValue &Chain) const {
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66 Attribute::DereferenceableOrNull, Attribute::NoAlias,
67 Attribute::NonNull, Attribute::NoUndef,
68 Attribute::Range, Attribute::NoFPClass})
69 CallerAttrs.removeAttribute(Attr);
70
71 if (CallerAttrs.hasAttributes())
72 return false;
73
74 // It's not safe to eliminate the sign / zero extension of the return value.
75 if (CallerAttrs.contains(Attribute::ZExt) ||
76 CallerAttrs.contains(Attribute::SExt))
77 return false;
78
79 // Check if the only use is a function return node.
80 return isUsedByReturnOnly(Node, Chain);
81}
82
84 const uint32_t *CallerPreservedMask,
85 const SmallVectorImpl<CCValAssign> &ArgLocs,
86 const SmallVectorImpl<SDValue> &OutVals) const {
87 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
88 const CCValAssign &ArgLoc = ArgLocs[I];
89 if (!ArgLoc.isRegLoc())
90 continue;
91 MCRegister Reg = ArgLoc.getLocReg();
92 // Only look at callee saved registers.
93 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94 continue;
95 // Check that we pass the value used for the caller.
96 // (We look for a CopyFromReg reading a virtual register that is used
97 // for the function live-in value of register Reg)
98 SDValue Value = OutVals[I];
99 if (Value->getOpcode() == ISD::AssertZext)
100 Value = Value.getOperand(0);
101 if (Value->getOpcode() != ISD::CopyFromReg)
102 return false;
103 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105 return false;
106 }
107 return true;
108}
109
110/// Set CallLoweringInfo attribute flags based on a call instruction
111/// and called function attributes.
113 unsigned ArgIdx) {
114 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
117 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
118 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
119 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
120 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
121 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
122 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
123 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
124 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
125 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
126 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
127 Alignment = Call->getParamStackAlign(ArgIdx);
128 IndirectType = nullptr;
130 "multiple ABI attributes?");
131 if (IsByVal) {
132 IndirectType = Call->getParamByValType(ArgIdx);
133 if (!Alignment)
134 Alignment = Call->getParamAlign(ArgIdx);
135 }
136 if (IsPreallocated)
137 IndirectType = Call->getParamPreallocatedType(ArgIdx);
138 if (IsInAlloca)
139 IndirectType = Call->getParamInAllocaType(ArgIdx);
140 if (IsSRet)
141 IndirectType = Call->getParamStructRetType(ArgIdx);
142}
143
144/// Generate a libcall taking the given operands as arguments and returning a
145/// result of type RetVT.
146std::pair<SDValue, SDValue>
149 MakeLibCallOptions CallOptions,
150 const SDLoc &dl,
151 SDValue InChain) const {
152 if (!InChain)
153 InChain = DAG.getEntryNode();
154
156 Args.reserve(Ops.size());
157
159 for (unsigned i = 0; i < Ops.size(); ++i) {
160 SDValue NewOp = Ops[i];
161 Entry.Node = NewOp;
162 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
163 Entry.IsSExt =
164 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
165 Entry.IsZExt = !Entry.IsSExt;
166
167 if (CallOptions.IsSoften &&
169 Entry.IsSExt = Entry.IsZExt = false;
170 }
171 Args.push_back(Entry);
172 }
173
174 if (LC == RTLIB::UNKNOWN_LIBCALL)
175 report_fatal_error("Unsupported library call operation!");
178
179 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
181 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
182 bool zeroExtend = !signExtend;
183
184 if (CallOptions.IsSoften &&
186 signExtend = zeroExtend = false;
187 }
188
189 CLI.setDebugLoc(dl)
190 .setChain(InChain)
191 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
192 .setNoReturn(CallOptions.DoesNotReturn)
195 .setSExtResult(signExtend)
196 .setZExtResult(zeroExtend);
197 return LowerCallTo(CLI);
198}
199
201 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
202 unsigned SrcAS, const AttributeList &FuncAttributes) const {
203 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
204 Op.getSrcAlign() < Op.getDstAlign())
205 return false;
206
207 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
208
209 if (VT == MVT::Other) {
210 // Use the largest integer type whose alignment constraints are satisfied.
211 // We only need to check DstAlign here as SrcAlign is always greater or
212 // equal to DstAlign (or zero).
213 VT = MVT::LAST_INTEGER_VALUETYPE;
214 if (Op.isFixedDstAlign())
215 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
216 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
218 assert(VT.isInteger());
219
220 // Find the largest legal integer type.
221 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
222 while (!isTypeLegal(LVT))
223 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
224 assert(LVT.isInteger());
225
226 // If the type we've chosen is larger than the largest legal integer type
227 // then use that instead.
228 if (VT.bitsGT(LVT))
229 VT = LVT;
230 }
231
232 unsigned NumMemOps = 0;
233 uint64_t Size = Op.size();
234 while (Size) {
235 unsigned VTSize = VT.getSizeInBits() / 8;
236 while (VTSize > Size) {
237 // For now, only use non-vector load / store's for the left-over pieces.
238 EVT NewVT = VT;
239 unsigned NewVTSize;
240
241 bool Found = false;
242 if (VT.isVector() || VT.isFloatingPoint()) {
243 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
246 Found = true;
247 else if (NewVT == MVT::i64 &&
249 isSafeMemOpType(MVT::f64)) {
250 // i64 is usually not legal on 32-bit targets, but f64 may be.
251 NewVT = MVT::f64;
252 Found = true;
253 }
254 }
255
256 if (!Found) {
257 do {
258 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
259 if (NewVT == MVT::i8)
260 break;
261 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
262 }
263 NewVTSize = NewVT.getSizeInBits() / 8;
264
265 // If the new VT cannot cover all of the remaining bits, then consider
266 // issuing a (or a pair of) unaligned and overlapping load / store.
267 unsigned Fast;
268 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
270 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
272 Fast)
273 VTSize = Size;
274 else {
275 VT = NewVT;
276 VTSize = NewVTSize;
277 }
278 }
279
280 if (++NumMemOps > Limit)
281 return false;
282
283 MemOps.push_back(VT);
284 Size -= VTSize;
285 }
286
287 return true;
288}
289
290/// Soften the operands of a comparison. This code is shared among BR_CC,
291/// SELECT_CC, and SETCC handlers.
293 SDValue &NewLHS, SDValue &NewRHS,
294 ISD::CondCode &CCCode,
295 const SDLoc &dl, const SDValue OldLHS,
296 const SDValue OldRHS) const {
297 SDValue Chain;
298 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
299 OldRHS, Chain);
300}
301
303 SDValue &NewLHS, SDValue &NewRHS,
304 ISD::CondCode &CCCode,
305 const SDLoc &dl, const SDValue OldLHS,
306 const SDValue OldRHS,
307 SDValue &Chain,
308 bool IsSignaling) const {
309 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
310 // not supporting it. We can update this code when libgcc provides such
311 // functions.
312
313 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
314 && "Unsupported setcc type!");
315
316 // Expand into one or more soft-fp libcall(s).
317 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
318 bool ShouldInvertCC = false;
319 switch (CCCode) {
320 case ISD::SETEQ:
321 case ISD::SETOEQ:
322 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
323 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
324 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
325 break;
326 case ISD::SETNE:
327 case ISD::SETUNE:
328 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
329 (VT == MVT::f64) ? RTLIB::UNE_F64 :
330 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
331 break;
332 case ISD::SETGE:
333 case ISD::SETOGE:
334 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
335 (VT == MVT::f64) ? RTLIB::OGE_F64 :
336 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
337 break;
338 case ISD::SETLT:
339 case ISD::SETOLT:
340 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
341 (VT == MVT::f64) ? RTLIB::OLT_F64 :
342 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
343 break;
344 case ISD::SETLE:
345 case ISD::SETOLE:
346 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
347 (VT == MVT::f64) ? RTLIB::OLE_F64 :
348 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
349 break;
350 case ISD::SETGT:
351 case ISD::SETOGT:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
353 (VT == MVT::f64) ? RTLIB::OGT_F64 :
354 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
355 break;
356 case ISD::SETO:
357 ShouldInvertCC = true;
358 [[fallthrough]];
359 case ISD::SETUO:
360 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
361 (VT == MVT::f64) ? RTLIB::UO_F64 :
362 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
363 break;
364 case ISD::SETONE:
365 // SETONE = O && UNE
366 ShouldInvertCC = true;
367 [[fallthrough]];
368 case ISD::SETUEQ:
369 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
370 (VT == MVT::f64) ? RTLIB::UO_F64 :
371 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
372 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
373 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
374 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
375 break;
376 default:
377 // Invert CC for unordered comparisons
378 ShouldInvertCC = true;
379 switch (CCCode) {
380 case ISD::SETULT:
381 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
382 (VT == MVT::f64) ? RTLIB::OGE_F64 :
383 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
384 break;
385 case ISD::SETULE:
386 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
387 (VT == MVT::f64) ? RTLIB::OGT_F64 :
388 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
389 break;
390 case ISD::SETUGT:
391 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
392 (VT == MVT::f64) ? RTLIB::OLE_F64 :
393 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
394 break;
395 case ISD::SETUGE:
396 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
397 (VT == MVT::f64) ? RTLIB::OLT_F64 :
398 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
399 break;
400 default: llvm_unreachable("Do not know how to soften this setcc!");
401 }
402 }
403
404 // Use the target specific return value for comparison lib calls.
406 SDValue Ops[2] = {NewLHS, NewRHS};
408 EVT OpsVT[2] = { OldLHS.getValueType(),
409 OldRHS.getValueType() };
410 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
411 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
412 NewLHS = Call.first;
413 NewRHS = DAG.getConstant(0, dl, RetVT);
414
415 CCCode = getCmpLibcallCC(LC1);
416 if (ShouldInvertCC) {
417 assert(RetVT.isInteger());
418 CCCode = getSetCCInverse(CCCode, RetVT);
419 }
420
421 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
422 // Update Chain.
423 Chain = Call.second;
424 } else {
425 EVT SetCCVT =
426 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
427 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
428 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
429 CCCode = getCmpLibcallCC(LC2);
430 if (ShouldInvertCC)
431 CCCode = getSetCCInverse(CCCode, RetVT);
432 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
433 if (Chain)
434 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
435 Call2.second);
436 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
437 Tmp.getValueType(), Tmp, NewLHS);
438 NewRHS = SDValue();
439 }
440}
441
442/// Return the entry encoding for a jump table in the current function. The
443/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
445 // In non-pic modes, just use the address of a block.
446 if (!isPositionIndependent())
448
449 // In PIC mode, if the target supports a GPRel32 directive, use it.
450 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
452
453 // Otherwise, use a label difference.
455}
456
458 SelectionDAG &DAG) const {
459 // If our PIC model is GP relative, use the global offset table as the base.
460 unsigned JTEncoding = getJumpTableEncoding();
461
465
466 return Table;
467}
468
469/// This returns the relocation base for the given PIC jumptable, the same as
470/// getPICJumpTableRelocBase, but as an MCExpr.
471const MCExpr *
473 unsigned JTI,MCContext &Ctx) const{
474 // The normal PIC reloc base is the label at the start of the jump table.
475 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
476}
477
479 SDValue Addr, int JTI,
480 SelectionDAG &DAG) const {
481 SDValue Chain = Value;
482 // Jump table debug info is only needed if CodeView is enabled.
484 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
485 }
486 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
487}
488
489bool
491 const TargetMachine &TM = getTargetMachine();
492 const GlobalValue *GV = GA->getGlobal();
493
494 // If the address is not even local to this DSO we will have to load it from
495 // a got and then add the offset.
496 if (!TM.shouldAssumeDSOLocal(GV))
497 return false;
498
499 // If the code is position independent we will have to add a base register.
500 if (isPositionIndependent())
501 return false;
502
503 // Otherwise we can do it.
504 return true;
505}
506
507//===----------------------------------------------------------------------===//
508// Optimization Methods
509//===----------------------------------------------------------------------===//
510
511/// If the specified instruction has a constant integer operand and there are
512/// bits set in that constant that are not demanded, then clear those bits and
513/// return true.
515 const APInt &DemandedBits,
516 const APInt &DemandedElts,
517 TargetLoweringOpt &TLO) const {
518 SDLoc DL(Op);
519 unsigned Opcode = Op.getOpcode();
520
521 // Early-out if we've ended up calling an undemanded node, leave this to
522 // constant folding.
523 if (DemandedBits.isZero() || DemandedElts.isZero())
524 return false;
525
526 // Do target-specific constant optimization.
527 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
528 return TLO.New.getNode();
529
530 // FIXME: ISD::SELECT, ISD::SELECT_CC
531 switch (Opcode) {
532 default:
533 break;
534 case ISD::XOR:
535 case ISD::AND:
536 case ISD::OR: {
537 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
538 if (!Op1C || Op1C->isOpaque())
539 return false;
540
541 // If this is a 'not' op, don't touch it because that's a canonical form.
542 const APInt &C = Op1C->getAPIntValue();
543 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
544 return false;
545
546 if (!C.isSubsetOf(DemandedBits)) {
547 EVT VT = Op.getValueType();
548 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
549 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
550 Op->getFlags());
551 return TLO.CombineTo(Op, NewOp);
552 }
553
554 break;
555 }
556 }
557
558 return false;
559}
560
562 const APInt &DemandedBits,
563 TargetLoweringOpt &TLO) const {
564 EVT VT = Op.getValueType();
565 APInt DemandedElts = VT.isVector()
567 : APInt(1, 1);
568 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
569}
570
571/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
572/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
573/// but it could be generalized for targets with other types of implicit
574/// widening casts.
576 const APInt &DemandedBits,
577 TargetLoweringOpt &TLO) const {
578 assert(Op.getNumOperands() == 2 &&
579 "ShrinkDemandedOp only supports binary operators!");
580 assert(Op.getNode()->getNumValues() == 1 &&
581 "ShrinkDemandedOp only supports nodes with one result!");
582
583 EVT VT = Op.getValueType();
584 SelectionDAG &DAG = TLO.DAG;
585 SDLoc dl(Op);
586
587 // Early return, as this function cannot handle vector types.
588 if (VT.isVector())
589 return false;
590
591 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
592 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
593 "ShrinkDemandedOp only supports operands that have the same size!");
594
595 // Don't do this if the node has another user, which may require the
596 // full value.
597 if (!Op.getNode()->hasOneUse())
598 return false;
599
600 // Search for the smallest integer type with free casts to and from
601 // Op's type. For expedience, just check power-of-2 integer types.
602 unsigned DemandedSize = DemandedBits.getActiveBits();
603 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
604 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
605 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
606 if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
607 // We found a type with free casts.
608
609 // If the operation has the 'disjoint' flag, then the
610 // operands on the new node are also disjoint.
611 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
613 SDValue X = DAG.getNode(
614 Op.getOpcode(), dl, SmallVT,
615 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
616 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
617 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
618 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
619 return TLO.CombineTo(Op, Z);
620 }
621 }
622 return false;
623}
624
626 DAGCombinerInfo &DCI) const {
627 SelectionDAG &DAG = DCI.DAG;
628 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
629 !DCI.isBeforeLegalizeOps());
630 KnownBits Known;
631
632 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
633 if (Simplified) {
634 DCI.AddToWorklist(Op.getNode());
636 }
637 return Simplified;
638}
639
641 const APInt &DemandedElts,
642 DAGCombinerInfo &DCI) const {
643 SelectionDAG &DAG = DCI.DAG;
644 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
645 !DCI.isBeforeLegalizeOps());
646 KnownBits Known;
647
648 bool Simplified =
649 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
650 if (Simplified) {
651 DCI.AddToWorklist(Op.getNode());
653 }
654 return Simplified;
655}
656
658 KnownBits &Known,
660 unsigned Depth,
661 bool AssumeSingleUse) const {
662 EVT VT = Op.getValueType();
663
664 // Since the number of lanes in a scalable vector is unknown at compile time,
665 // we track one bit which is implicitly broadcast to all lanes. This means
666 // that all lanes in a scalable vector are considered demanded.
667 APInt DemandedElts = VT.isFixedLengthVector()
669 : APInt(1, 1);
670 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
671 AssumeSingleUse);
672}
673
674// TODO: Under what circumstances can we create nodes? Constant folding?
676 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
677 SelectionDAG &DAG, unsigned Depth) const {
678 EVT VT = Op.getValueType();
679
680 // Limit search depth.
682 return SDValue();
683
684 // Ignore UNDEFs.
685 if (Op.isUndef())
686 return SDValue();
687
688 // Not demanding any bits/elts from Op.
689 if (DemandedBits == 0 || DemandedElts == 0)
690 return DAG.getUNDEF(VT);
691
692 bool IsLE = DAG.getDataLayout().isLittleEndian();
693 unsigned NumElts = DemandedElts.getBitWidth();
694 unsigned BitWidth = DemandedBits.getBitWidth();
695 KnownBits LHSKnown, RHSKnown;
696 switch (Op.getOpcode()) {
697 case ISD::BITCAST: {
698 if (VT.isScalableVector())
699 return SDValue();
700
701 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
702 EVT SrcVT = Src.getValueType();
703 EVT DstVT = Op.getValueType();
704 if (SrcVT == DstVT)
705 return Src;
706
707 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
708 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
709 if (NumSrcEltBits == NumDstEltBits)
710 if (SDValue V = SimplifyMultipleUseDemandedBits(
711 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
712 return DAG.getBitcast(DstVT, V);
713
714 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
715 unsigned Scale = NumDstEltBits / NumSrcEltBits;
716 unsigned NumSrcElts = SrcVT.getVectorNumElements();
717 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
718 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
719 for (unsigned i = 0; i != Scale; ++i) {
720 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
721 unsigned BitOffset = EltOffset * NumSrcEltBits;
722 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
723 if (!Sub.isZero()) {
724 DemandedSrcBits |= Sub;
725 for (unsigned j = 0; j != NumElts; ++j)
726 if (DemandedElts[j])
727 DemandedSrcElts.setBit((j * Scale) + i);
728 }
729 }
730
731 if (SDValue V = SimplifyMultipleUseDemandedBits(
732 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
733 return DAG.getBitcast(DstVT, V);
734 }
735
736 // TODO - bigendian once we have test coverage.
737 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
738 unsigned Scale = NumSrcEltBits / NumDstEltBits;
739 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
740 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
741 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
742 for (unsigned i = 0; i != NumElts; ++i)
743 if (DemandedElts[i]) {
744 unsigned Offset = (i % Scale) * NumDstEltBits;
745 DemandedSrcBits.insertBits(DemandedBits, Offset);
746 DemandedSrcElts.setBit(i / Scale);
747 }
748
749 if (SDValue V = SimplifyMultipleUseDemandedBits(
750 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
751 return DAG.getBitcast(DstVT, V);
752 }
753
754 break;
755 }
756 case ISD::FREEZE: {
757 SDValue N0 = Op.getOperand(0);
758 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
759 /*PoisonOnly=*/false))
760 return N0;
761 break;
762 }
763 case ISD::AND: {
764 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
765 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
766
767 // If all of the demanded bits are known 1 on one side, return the other.
768 // These bits cannot contribute to the result of the 'and' in this
769 // context.
770 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
771 return Op.getOperand(0);
772 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
773 return Op.getOperand(1);
774 break;
775 }
776 case ISD::OR: {
777 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
778 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
779
780 // If all of the demanded bits are known zero on one side, return the
781 // other. These bits cannot contribute to the result of the 'or' in this
782 // context.
783 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
784 return Op.getOperand(0);
785 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
786 return Op.getOperand(1);
787 break;
788 }
789 case ISD::XOR: {
790 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
791 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
792
793 // If all of the demanded bits are known zero on one side, return the
794 // other.
795 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
796 return Op.getOperand(0);
797 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
798 return Op.getOperand(1);
799 break;
800 }
801 case ISD::ADD: {
802 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
803 if (RHSKnown.isZero())
804 return Op.getOperand(0);
805
806 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
807 if (LHSKnown.isZero())
808 return Op.getOperand(1);
809 break;
810 }
811 case ISD::SHL: {
812 // If we are only demanding sign bits then we can use the shift source
813 // directly.
814 if (std::optional<uint64_t> MaxSA =
815 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
816 SDValue Op0 = Op.getOperand(0);
817 unsigned ShAmt = *MaxSA;
818 unsigned NumSignBits =
819 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
820 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
821 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
822 return Op0;
823 }
824 break;
825 }
826 case ISD::SRL: {
827 // If we are only demanding sign bits then we can use the shift source
828 // directly.
829 if (std::optional<uint64_t> MaxSA =
830 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
831 SDValue Op0 = Op.getOperand(0);
832 unsigned ShAmt = *MaxSA;
833 // Must already be signbits in DemandedBits bounds, and can't demand any
834 // shifted in zeroes.
835 if (DemandedBits.countl_zero() >= ShAmt) {
836 unsigned NumSignBits =
837 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
838 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
839 return Op0;
840 }
841 }
842 break;
843 }
844 case ISD::SETCC: {
845 SDValue Op0 = Op.getOperand(0);
846 SDValue Op1 = Op.getOperand(1);
847 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
848 // If (1) we only need the sign-bit, (2) the setcc operands are the same
849 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
850 // -1, we may be able to bypass the setcc.
851 if (DemandedBits.isSignMask() &&
855 // If we're testing X < 0, then this compare isn't needed - just use X!
856 // FIXME: We're limiting to integer types here, but this should also work
857 // if we don't care about FP signed-zero. The use of SETLT with FP means
858 // that we don't care about NaNs.
859 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
861 return Op0;
862 }
863 break;
864 }
866 // If none of the extended bits are demanded, eliminate the sextinreg.
867 SDValue Op0 = Op.getOperand(0);
868 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
869 unsigned ExBits = ExVT.getScalarSizeInBits();
870 if (DemandedBits.getActiveBits() <= ExBits &&
872 return Op0;
873 // If the input is already sign extended, just drop the extension.
874 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
875 if (NumSignBits >= (BitWidth - ExBits + 1))
876 return Op0;
877 break;
878 }
882 if (VT.isScalableVector())
883 return SDValue();
884
885 // If we only want the lowest element and none of extended bits, then we can
886 // return the bitcasted source vector.
887 SDValue Src = Op.getOperand(0);
888 EVT SrcVT = Src.getValueType();
889 EVT DstVT = Op.getValueType();
890 if (IsLE && DemandedElts == 1 &&
891 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
892 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
893 return DAG.getBitcast(DstVT, Src);
894 }
895 break;
896 }
898 if (VT.isScalableVector())
899 return SDValue();
900
901 // If we don't demand the inserted element, return the base vector.
902 SDValue Vec = Op.getOperand(0);
903 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
904 EVT VecVT = Vec.getValueType();
905 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
906 !DemandedElts[CIdx->getZExtValue()])
907 return Vec;
908 break;
909 }
911 if (VT.isScalableVector())
912 return SDValue();
913
914 SDValue Vec = Op.getOperand(0);
915 SDValue Sub = Op.getOperand(1);
916 uint64_t Idx = Op.getConstantOperandVal(2);
917 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
918 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
919 // If we don't demand the inserted subvector, return the base vector.
920 if (DemandedSubElts == 0)
921 return Vec;
922 break;
923 }
924 case ISD::VECTOR_SHUFFLE: {
926 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
927
928 // If all the demanded elts are from one operand and are inline,
929 // then we can use the operand directly.
930 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
931 for (unsigned i = 0; i != NumElts; ++i) {
932 int M = ShuffleMask[i];
933 if (M < 0 || !DemandedElts[i])
934 continue;
935 AllUndef = false;
936 IdentityLHS &= (M == (int)i);
937 IdentityRHS &= ((M - NumElts) == i);
938 }
939
940 if (AllUndef)
941 return DAG.getUNDEF(Op.getValueType());
942 if (IdentityLHS)
943 return Op.getOperand(0);
944 if (IdentityRHS)
945 return Op.getOperand(1);
946 break;
947 }
948 default:
949 // TODO: Probably okay to remove after audit; here to reduce change size
950 // in initial enablement patch for scalable vectors
951 if (VT.isScalableVector())
952 return SDValue();
953
954 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
955 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
956 Op, DemandedBits, DemandedElts, DAG, Depth))
957 return V;
958 break;
959 }
960 return SDValue();
961}
962
965 unsigned Depth) const {
966 EVT VT = Op.getValueType();
967 // Since the number of lanes in a scalable vector is unknown at compile time,
968 // we track one bit which is implicitly broadcast to all lanes. This means
969 // that all lanes in a scalable vector are considered demanded.
970 APInt DemandedElts = VT.isFixedLengthVector()
972 : APInt(1, 1);
973 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
974 Depth);
975}
976
978 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
979 unsigned Depth) const {
980 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
981 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
982 Depth);
983}
984
985// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
986// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
989 const TargetLowering &TLI,
990 const APInt &DemandedBits,
991 const APInt &DemandedElts, unsigned Depth) {
992 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
993 "SRL or SRA node is required here!");
994 // Is the right shift using an immediate value of 1?
995 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
996 if (!N1C || !N1C->isOne())
997 return SDValue();
998
999 // We are looking for an avgfloor
1000 // add(ext, ext)
1001 // or one of these as a avgceil
1002 // add(add(ext, ext), 1)
1003 // add(add(ext, 1), ext)
1004 // add(ext, add(ext, 1))
1005 SDValue Add = Op.getOperand(0);
1006 if (Add.getOpcode() != ISD::ADD)
1007 return SDValue();
1008
1009 SDValue ExtOpA = Add.getOperand(0);
1010 SDValue ExtOpB = Add.getOperand(1);
1011 SDValue Add2;
1012 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1013 ConstantSDNode *ConstOp;
1014 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1015 ConstOp->isOne()) {
1016 ExtOpA = Op1;
1017 ExtOpB = Op3;
1018 Add2 = A;
1019 return true;
1020 }
1021 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1022 ConstOp->isOne()) {
1023 ExtOpA = Op1;
1024 ExtOpB = Op2;
1025 Add2 = A;
1026 return true;
1027 }
1028 return false;
1029 };
1030 bool IsCeil =
1031 (ExtOpA.getOpcode() == ISD::ADD &&
1032 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1033 (ExtOpB.getOpcode() == ISD::ADD &&
1034 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1035
1036 // If the shift is signed (sra):
1037 // - Needs >= 2 sign bit for both operands.
1038 // - Needs >= 2 zero bits.
1039 // If the shift is unsigned (srl):
1040 // - Needs >= 1 zero bit for both operands.
1041 // - Needs 1 demanded bit zero and >= 2 sign bits.
1042 SelectionDAG &DAG = TLO.DAG;
1043 unsigned ShiftOpc = Op.getOpcode();
1044 bool IsSigned = false;
1045 unsigned KnownBits;
1046 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1047 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1048 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1049 unsigned NumZeroA =
1050 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1051 unsigned NumZeroB =
1052 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1053 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1054
1055 switch (ShiftOpc) {
1056 default:
1057 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1058 case ISD::SRA: {
1059 if (NumZero >= 2 && NumSigned < NumZero) {
1060 IsSigned = false;
1061 KnownBits = NumZero;
1062 break;
1063 }
1064 if (NumSigned >= 1) {
1065 IsSigned = true;
1066 KnownBits = NumSigned;
1067 break;
1068 }
1069 return SDValue();
1070 }
1071 case ISD::SRL: {
1072 if (NumZero >= 1 && NumSigned < NumZero) {
1073 IsSigned = false;
1074 KnownBits = NumZero;
1075 break;
1076 }
1077 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1078 IsSigned = true;
1079 KnownBits = NumSigned;
1080 break;
1081 }
1082 return SDValue();
1083 }
1084 }
1085
1086 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1087 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1088
1089 // Find the smallest power-2 type that is legal for this vector size and
1090 // operation, given the original type size and the number of known sign/zero
1091 // bits.
1092 EVT VT = Op.getValueType();
1093 unsigned MinWidth =
1094 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1095 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1097 return SDValue();
1098 if (VT.isVector())
1099 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1100 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1101 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1102 // larger type size to do the transform.
1103 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1104 return SDValue();
1105 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1106 Add.getOperand(1)) &&
1107 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1108 Add2.getOperand(1))))
1109 NVT = VT;
1110 else
1111 return SDValue();
1112 }
1113
1114 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1115 // this is likely to stop other folds (reassociation, value tracking etc.)
1116 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1117 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1118 return SDValue();
1119
1120 SDLoc DL(Op);
1121 SDValue ResultAVG =
1122 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1123 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1124 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1125}
1126
1127/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1128/// result of Op are ever used downstream. If we can use this information to
1129/// simplify Op, create a new simplified DAG node and return true, returning the
1130/// original and new nodes in Old and New. Otherwise, analyze the expression and
1131/// return a mask of Known bits for the expression (used to simplify the
1132/// caller). The Known bits may only be accurate for those bits in the
1133/// OriginalDemandedBits and OriginalDemandedElts.
1135 SDValue Op, const APInt &OriginalDemandedBits,
1136 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1137 unsigned Depth, bool AssumeSingleUse) const {
1138 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1139 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1140 "Mask size mismatches value type size!");
1141
1142 // Don't know anything.
1143 Known = KnownBits(BitWidth);
1144
1145 EVT VT = Op.getValueType();
1146 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1147 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1148 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1149 "Unexpected vector size");
1150
1151 APInt DemandedBits = OriginalDemandedBits;
1152 APInt DemandedElts = OriginalDemandedElts;
1153 SDLoc dl(Op);
1154
1155 // Undef operand.
1156 if (Op.isUndef())
1157 return false;
1158
1159 // We can't simplify target constants.
1160 if (Op.getOpcode() == ISD::TargetConstant)
1161 return false;
1162
1163 if (Op.getOpcode() == ISD::Constant) {
1164 // We know all of the bits for a constant!
1165 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1166 return false;
1167 }
1168
1169 if (Op.getOpcode() == ISD::ConstantFP) {
1170 // We know all of the bits for a floating point constant!
1172 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1173 return false;
1174 }
1175
1176 // Other users may use these bits.
1177 bool HasMultiUse = false;
1178 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1180 // Limit search depth.
1181 return false;
1182 }
1183 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1185 DemandedElts = APInt::getAllOnes(NumElts);
1186 HasMultiUse = true;
1187 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1188 // Not demanding any bits/elts from Op.
1189 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1190 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1191 // Limit search depth.
1192 return false;
1193 }
1194
1195 KnownBits Known2;
1196 switch (Op.getOpcode()) {
1197 case ISD::SCALAR_TO_VECTOR: {
1198 if (VT.isScalableVector())
1199 return false;
1200 if (!DemandedElts[0])
1201 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1202
1203 KnownBits SrcKnown;
1204 SDValue Src = Op.getOperand(0);
1205 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1206 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1207 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1208 return true;
1209
1210 // Upper elements are undef, so only get the knownbits if we just demand
1211 // the bottom element.
1212 if (DemandedElts == 1)
1213 Known = SrcKnown.anyextOrTrunc(BitWidth);
1214 break;
1215 }
1216 case ISD::BUILD_VECTOR:
1217 // Collect the known bits that are shared by every demanded element.
1218 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1219 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1220 return false; // Don't fall through, will infinitely loop.
1221 case ISD::SPLAT_VECTOR: {
1222 SDValue Scl = Op.getOperand(0);
1223 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1224 KnownBits KnownScl;
1225 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1226 return true;
1227
1228 // Implicitly truncate the bits to match the official semantics of
1229 // SPLAT_VECTOR.
1230 Known = KnownScl.trunc(BitWidth);
1231 break;
1232 }
1233 case ISD::LOAD: {
1234 auto *LD = cast<LoadSDNode>(Op);
1235 if (getTargetConstantFromLoad(LD)) {
1236 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1237 return false; // Don't fall through, will infinitely loop.
1238 }
1239 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1240 // If this is a ZEXTLoad and we are looking at the loaded value.
1241 EVT MemVT = LD->getMemoryVT();
1242 unsigned MemBits = MemVT.getScalarSizeInBits();
1243 Known.Zero.setBitsFrom(MemBits);
1244 return false; // Don't fall through, will infinitely loop.
1245 }
1246 break;
1247 }
1249 if (VT.isScalableVector())
1250 return false;
1251 SDValue Vec = Op.getOperand(0);
1252 SDValue Scl = Op.getOperand(1);
1253 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1254 EVT VecVT = Vec.getValueType();
1255
1256 // If index isn't constant, assume we need all vector elements AND the
1257 // inserted element.
1258 APInt DemandedVecElts(DemandedElts);
1259 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1260 unsigned Idx = CIdx->getZExtValue();
1261 DemandedVecElts.clearBit(Idx);
1262
1263 // Inserted element is not required.
1264 if (!DemandedElts[Idx])
1265 return TLO.CombineTo(Op, Vec);
1266 }
1267
1268 KnownBits KnownScl;
1269 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1270 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1271 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1272 return true;
1273
1274 Known = KnownScl.anyextOrTrunc(BitWidth);
1275
1276 KnownBits KnownVec;
1277 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1278 Depth + 1))
1279 return true;
1280
1281 if (!!DemandedVecElts)
1282 Known = Known.intersectWith(KnownVec);
1283
1284 return false;
1285 }
1286 case ISD::INSERT_SUBVECTOR: {
1287 if (VT.isScalableVector())
1288 return false;
1289 // Demand any elements from the subvector and the remainder from the src its
1290 // inserted into.
1291 SDValue Src = Op.getOperand(0);
1292 SDValue Sub = Op.getOperand(1);
1293 uint64_t Idx = Op.getConstantOperandVal(2);
1294 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1295 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1296 APInt DemandedSrcElts = DemandedElts;
1297 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1298
1299 KnownBits KnownSub, KnownSrc;
1300 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1301 Depth + 1))
1302 return true;
1303 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1304 Depth + 1))
1305 return true;
1306
1307 Known.Zero.setAllBits();
1308 Known.One.setAllBits();
1309 if (!!DemandedSubElts)
1310 Known = Known.intersectWith(KnownSub);
1311 if (!!DemandedSrcElts)
1312 Known = Known.intersectWith(KnownSrc);
1313
1314 // Attempt to avoid multi-use src if we don't need anything from it.
1315 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1316 !DemandedSrcElts.isAllOnes()) {
1317 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1318 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1319 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1320 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1321 if (NewSub || NewSrc) {
1322 NewSub = NewSub ? NewSub : Sub;
1323 NewSrc = NewSrc ? NewSrc : Src;
1324 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1325 Op.getOperand(2));
1326 return TLO.CombineTo(Op, NewOp);
1327 }
1328 }
1329 break;
1330 }
1332 if (VT.isScalableVector())
1333 return false;
1334 // Offset the demanded elts by the subvector index.
1335 SDValue Src = Op.getOperand(0);
1336 if (Src.getValueType().isScalableVector())
1337 break;
1338 uint64_t Idx = Op.getConstantOperandVal(1);
1339 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1340 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1341
1342 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1343 Depth + 1))
1344 return true;
1345
1346 // Attempt to avoid multi-use src if we don't need anything from it.
1347 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1348 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1349 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1350 if (DemandedSrc) {
1351 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1352 Op.getOperand(1));
1353 return TLO.CombineTo(Op, NewOp);
1354 }
1355 }
1356 break;
1357 }
1358 case ISD::CONCAT_VECTORS: {
1359 if (VT.isScalableVector())
1360 return false;
1361 Known.Zero.setAllBits();
1362 Known.One.setAllBits();
1363 EVT SubVT = Op.getOperand(0).getValueType();
1364 unsigned NumSubVecs = Op.getNumOperands();
1365 unsigned NumSubElts = SubVT.getVectorNumElements();
1366 for (unsigned i = 0; i != NumSubVecs; ++i) {
1367 APInt DemandedSubElts =
1368 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1369 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1370 Known2, TLO, Depth + 1))
1371 return true;
1372 // Known bits are shared by every demanded subvector element.
1373 if (!!DemandedSubElts)
1374 Known = Known.intersectWith(Known2);
1375 }
1376 break;
1377 }
1378 case ISD::VECTOR_SHUFFLE: {
1379 assert(!VT.isScalableVector());
1380 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1381
1382 // Collect demanded elements from shuffle operands..
1383 APInt DemandedLHS, DemandedRHS;
1384 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1385 DemandedRHS))
1386 break;
1387
1388 if (!!DemandedLHS || !!DemandedRHS) {
1389 SDValue Op0 = Op.getOperand(0);
1390 SDValue Op1 = Op.getOperand(1);
1391
1392 Known.Zero.setAllBits();
1393 Known.One.setAllBits();
1394 if (!!DemandedLHS) {
1395 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1396 Depth + 1))
1397 return true;
1398 Known = Known.intersectWith(Known2);
1399 }
1400 if (!!DemandedRHS) {
1401 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1402 Depth + 1))
1403 return true;
1404 Known = Known.intersectWith(Known2);
1405 }
1406
1407 // Attempt to avoid multi-use ops if we don't need anything from them.
1408 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1409 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1410 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1411 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1412 if (DemandedOp0 || DemandedOp1) {
1413 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1414 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1415 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1416 return TLO.CombineTo(Op, NewOp);
1417 }
1418 }
1419 break;
1420 }
1421 case ISD::AND: {
1422 SDValue Op0 = Op.getOperand(0);
1423 SDValue Op1 = Op.getOperand(1);
1424
1425 // If the RHS is a constant, check to see if the LHS would be zero without
1426 // using the bits from the RHS. Below, we use knowledge about the RHS to
1427 // simplify the LHS, here we're using information from the LHS to simplify
1428 // the RHS.
1429 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1430 // Do not increment Depth here; that can cause an infinite loop.
1431 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1432 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1433 if ((LHSKnown.Zero & DemandedBits) ==
1434 (~RHSC->getAPIntValue() & DemandedBits))
1435 return TLO.CombineTo(Op, Op0);
1436
1437 // If any of the set bits in the RHS are known zero on the LHS, shrink
1438 // the constant.
1439 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1440 DemandedElts, TLO))
1441 return true;
1442
1443 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1444 // constant, but if this 'and' is only clearing bits that were just set by
1445 // the xor, then this 'and' can be eliminated by shrinking the mask of
1446 // the xor. For example, for a 32-bit X:
1447 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1448 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1449 LHSKnown.One == ~RHSC->getAPIntValue()) {
1450 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1451 return TLO.CombineTo(Op, Xor);
1452 }
1453 }
1454
1455 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1456 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1457 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1458 (Op0.getOperand(0).isUndef() ||
1460 Op0->hasOneUse()) {
1461 unsigned NumSubElts =
1463 unsigned SubIdx = Op0.getConstantOperandVal(2);
1464 APInt DemandedSub =
1465 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1466 KnownBits KnownSubMask =
1467 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1468 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1469 SDValue NewAnd =
1470 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1471 SDValue NewInsert =
1472 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1473 Op0.getOperand(1), Op0.getOperand(2));
1474 return TLO.CombineTo(Op, NewInsert);
1475 }
1476 }
1477
1478 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1479 Depth + 1))
1480 return true;
1481 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1482 Known2, TLO, Depth + 1))
1483 return true;
1484
1485 // If all of the demanded bits are known one on one side, return the other.
1486 // These bits cannot contribute to the result of the 'and'.
1487 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1488 return TLO.CombineTo(Op, Op0);
1489 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1490 return TLO.CombineTo(Op, Op1);
1491 // If all of the demanded bits in the inputs are known zeros, return zero.
1492 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1493 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1494 // If the RHS is a constant, see if we can simplify it.
1495 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1496 TLO))
1497 return true;
1498 // If the operation can be done in a smaller type, do so.
1499 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1500 return true;
1501
1502 // Attempt to avoid multi-use ops if we don't need anything from them.
1503 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1504 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1505 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1506 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1507 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1508 if (DemandedOp0 || DemandedOp1) {
1509 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1510 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1511 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1512 return TLO.CombineTo(Op, NewOp);
1513 }
1514 }
1515
1516 Known &= Known2;
1517 break;
1518 }
1519 case ISD::OR: {
1520 SDValue Op0 = Op.getOperand(0);
1521 SDValue Op1 = Op.getOperand(1);
1522 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1523 Depth + 1)) {
1524 Op->dropFlags(SDNodeFlags::Disjoint);
1525 return true;
1526 }
1527
1528 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1529 Known2, TLO, Depth + 1)) {
1530 Op->dropFlags(SDNodeFlags::Disjoint);
1531 return true;
1532 }
1533
1534 // If all of the demanded bits are known zero on one side, return the other.
1535 // These bits cannot contribute to the result of the 'or'.
1536 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1537 return TLO.CombineTo(Op, Op0);
1538 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1539 return TLO.CombineTo(Op, Op1);
1540 // If the RHS is a constant, see if we can simplify it.
1541 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1542 return true;
1543 // If the operation can be done in a smaller type, do so.
1544 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1545 return true;
1546
1547 // Attempt to avoid multi-use ops if we don't need anything from them.
1548 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1549 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1550 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1551 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1552 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1553 if (DemandedOp0 || DemandedOp1) {
1554 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1555 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1556 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1557 return TLO.CombineTo(Op, NewOp);
1558 }
1559 }
1560
1561 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1562 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1563 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1564 Op0->hasOneUse() && Op1->hasOneUse()) {
1565 // Attempt to match all commutations - m_c_Or would've been useful!
1566 for (int I = 0; I != 2; ++I) {
1567 SDValue X = Op.getOperand(I).getOperand(0);
1568 SDValue C1 = Op.getOperand(I).getOperand(1);
1569 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1570 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1571 if (Alt.getOpcode() == ISD::OR) {
1572 for (int J = 0; J != 2; ++J) {
1573 if (X == Alt.getOperand(J)) {
1574 SDValue Y = Alt.getOperand(1 - J);
1575 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1576 {C1, C2})) {
1577 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1578 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1579 return TLO.CombineTo(
1580 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1581 }
1582 }
1583 }
1584 }
1585 }
1586 }
1587
1588 Known |= Known2;
1589 break;
1590 }
1591 case ISD::XOR: {
1592 SDValue Op0 = Op.getOperand(0);
1593 SDValue Op1 = Op.getOperand(1);
1594
1595 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1596 Depth + 1))
1597 return true;
1598 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1599 Depth + 1))
1600 return true;
1601
1602 // If all of the demanded bits are known zero on one side, return the other.
1603 // These bits cannot contribute to the result of the 'xor'.
1604 if (DemandedBits.isSubsetOf(Known.Zero))
1605 return TLO.CombineTo(Op, Op0);
1606 if (DemandedBits.isSubsetOf(Known2.Zero))
1607 return TLO.CombineTo(Op, Op1);
1608 // If the operation can be done in a smaller type, do so.
1609 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1610 return true;
1611
1612 // If all of the unknown bits are known to be zero on one side or the other
1613 // turn this into an *inclusive* or.
1614 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1615 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1616 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1617
1618 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1619 if (C) {
1620 // If one side is a constant, and all of the set bits in the constant are
1621 // also known set on the other side, turn this into an AND, as we know
1622 // the bits will be cleared.
1623 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1624 // NB: it is okay if more bits are known than are requested
1625 if (C->getAPIntValue() == Known2.One) {
1626 SDValue ANDC =
1627 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1628 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1629 }
1630
1631 // If the RHS is a constant, see if we can change it. Don't alter a -1
1632 // constant because that's a 'not' op, and that is better for combining
1633 // and codegen.
1634 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1635 // We're flipping all demanded bits. Flip the undemanded bits too.
1636 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1637 return TLO.CombineTo(Op, New);
1638 }
1639
1640 unsigned Op0Opcode = Op0.getOpcode();
1641 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1642 if (ConstantSDNode *ShiftC =
1643 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1644 // Don't crash on an oversized shift. We can not guarantee that a
1645 // bogus shift has been simplified to undef.
1646 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1647 uint64_t ShiftAmt = ShiftC->getZExtValue();
1649 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1650 : Ones.lshr(ShiftAmt);
1651 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1652 isDesirableToCommuteXorWithShift(Op.getNode())) {
1653 // If the xor constant is a demanded mask, do a 'not' before the
1654 // shift:
1655 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1656 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1657 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1658 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1659 Op0.getOperand(1)));
1660 }
1661 }
1662 }
1663 }
1664 }
1665
1666 // If we can't turn this into a 'not', try to shrink the constant.
1667 if (!C || !C->isAllOnes())
1668 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1669 return true;
1670
1671 // Attempt to avoid multi-use ops if we don't need anything from them.
1672 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1673 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1674 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1675 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1676 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1677 if (DemandedOp0 || DemandedOp1) {
1678 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1679 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1680 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1681 return TLO.CombineTo(Op, NewOp);
1682 }
1683 }
1684
1685 Known ^= Known2;
1686 break;
1687 }
1688 case ISD::SELECT:
1689 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1690 Known, TLO, Depth + 1))
1691 return true;
1692 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1693 Known2, TLO, Depth + 1))
1694 return true;
1695
1696 // If the operands are constants, see if we can simplify them.
1697 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1698 return true;
1699
1700 // Only known if known in both the LHS and RHS.
1701 Known = Known.intersectWith(Known2);
1702 break;
1703 case ISD::VSELECT:
1704 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1705 Known, TLO, Depth + 1))
1706 return true;
1707 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1708 Known2, TLO, Depth + 1))
1709 return true;
1710
1711 // Only known if known in both the LHS and RHS.
1712 Known = Known.intersectWith(Known2);
1713 break;
1714 case ISD::SELECT_CC:
1715 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1716 Known, TLO, Depth + 1))
1717 return true;
1718 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1719 Known2, TLO, Depth + 1))
1720 return true;
1721
1722 // If the operands are constants, see if we can simplify them.
1723 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1724 return true;
1725
1726 // Only known if known in both the LHS and RHS.
1727 Known = Known.intersectWith(Known2);
1728 break;
1729 case ISD::SETCC: {
1730 SDValue Op0 = Op.getOperand(0);
1731 SDValue Op1 = Op.getOperand(1);
1732 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1733 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1734 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1735 // -1, we may be able to bypass the setcc.
1736 if (DemandedBits.isSignMask() &&
1740 // If we're testing X < 0, then this compare isn't needed - just use X!
1741 // FIXME: We're limiting to integer types here, but this should also work
1742 // if we don't care about FP signed-zero. The use of SETLT with FP means
1743 // that we don't care about NaNs.
1744 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1746 return TLO.CombineTo(Op, Op0);
1747
1748 // TODO: Should we check for other forms of sign-bit comparisons?
1749 // Examples: X <= -1, X >= 0
1750 }
1751 if (getBooleanContents(Op0.getValueType()) ==
1753 BitWidth > 1)
1754 Known.Zero.setBitsFrom(1);
1755 break;
1756 }
1757 case ISD::SHL: {
1758 SDValue Op0 = Op.getOperand(0);
1759 SDValue Op1 = Op.getOperand(1);
1760 EVT ShiftVT = Op1.getValueType();
1761
1762 if (std::optional<uint64_t> KnownSA =
1763 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1764 unsigned ShAmt = *KnownSA;
1765 if (ShAmt == 0)
1766 return TLO.CombineTo(Op, Op0);
1767
1768 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1769 // single shift. We can do this if the bottom bits (which are shifted
1770 // out) are never demanded.
1771 // TODO - support non-uniform vector amounts.
1772 if (Op0.getOpcode() == ISD::SRL) {
1773 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1774 if (std::optional<uint64_t> InnerSA =
1775 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1776 unsigned C1 = *InnerSA;
1777 unsigned Opc = ISD::SHL;
1778 int Diff = ShAmt - C1;
1779 if (Diff < 0) {
1780 Diff = -Diff;
1781 Opc = ISD::SRL;
1782 }
1783 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1784 return TLO.CombineTo(
1785 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1786 }
1787 }
1788 }
1789
1790 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1791 // are not demanded. This will likely allow the anyext to be folded away.
1792 // TODO - support non-uniform vector amounts.
1793 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1794 SDValue InnerOp = Op0.getOperand(0);
1795 EVT InnerVT = InnerOp.getValueType();
1796 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1797 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1798 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1799 SDValue NarrowShl = TLO.DAG.getNode(
1800 ISD::SHL, dl, InnerVT, InnerOp,
1801 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1802 return TLO.CombineTo(
1803 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1804 }
1805
1806 // Repeat the SHL optimization above in cases where an extension
1807 // intervenes: (shl (anyext (shr x, c1)), c2) to
1808 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1809 // aren't demanded (as above) and that the shifted upper c1 bits of
1810 // x aren't demanded.
1811 // TODO - support non-uniform vector amounts.
1812 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1813 InnerOp.hasOneUse()) {
1814 if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1815 InnerOp, DemandedElts, Depth + 2)) {
1816 unsigned InnerShAmt = *SA2;
1817 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1818 DemandedBits.getActiveBits() <=
1819 (InnerBits - InnerShAmt + ShAmt) &&
1820 DemandedBits.countr_zero() >= ShAmt) {
1821 SDValue NewSA =
1822 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1823 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1824 InnerOp.getOperand(0));
1825 return TLO.CombineTo(
1826 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1827 }
1828 }
1829 }
1830 }
1831
1832 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1833 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1834 Depth + 1)) {
1835 // Disable the nsw and nuw flags. We can no longer guarantee that we
1836 // won't wrap after simplification.
1837 Op->dropFlags(SDNodeFlags::NoWrap);
1838 return true;
1839 }
1840 Known.Zero <<= ShAmt;
1841 Known.One <<= ShAmt;
1842 // low bits known zero.
1843 Known.Zero.setLowBits(ShAmt);
1844
1845 // Attempt to avoid multi-use ops if we don't need anything from them.
1846 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1847 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1848 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1849 if (DemandedOp0) {
1850 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1851 return TLO.CombineTo(Op, NewOp);
1852 }
1853 }
1854
1855 // TODO: Can we merge this fold with the one below?
1856 // Try shrinking the operation as long as the shift amount will still be
1857 // in range.
1858 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1859 Op.getNode()->hasOneUse()) {
1860 // Search for the smallest integer type with free casts to and from
1861 // Op's type. For expedience, just check power-of-2 integer types.
1862 unsigned DemandedSize = DemandedBits.getActiveBits();
1863 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1864 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1865 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1866 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1867 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1868 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1869 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1870 assert(DemandedSize <= SmallVTBits &&
1871 "Narrowed below demanded bits?");
1872 // We found a type with free casts.
1873 SDValue NarrowShl = TLO.DAG.getNode(
1874 ISD::SHL, dl, SmallVT,
1875 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1876 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1877 return TLO.CombineTo(
1878 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1879 }
1880 }
1881 }
1882
1883 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1884 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1885 // Only do this if we demand the upper half so the knownbits are correct.
1886 unsigned HalfWidth = BitWidth / 2;
1887 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1888 DemandedBits.countLeadingOnes() >= HalfWidth) {
1889 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1890 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1891 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1892 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1893 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1894 // If we're demanding the upper bits at all, we must ensure
1895 // that the upper bits of the shift result are known to be zero,
1896 // which is equivalent to the narrow shift being NUW.
1897 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1898 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1899 SDNodeFlags Flags;
1900 Flags.setNoSignedWrap(IsNSW);
1901 Flags.setNoUnsignedWrap(IsNUW);
1902 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1903 SDValue NewShiftAmt =
1904 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1905 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1906 NewShiftAmt, Flags);
1907 SDValue NewExt =
1908 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1909 return TLO.CombineTo(Op, NewExt);
1910 }
1911 }
1912 }
1913 } else {
1914 // This is a variable shift, so we can't shift the demand mask by a known
1915 // amount. But if we are not demanding high bits, then we are not
1916 // demanding those bits from the pre-shifted operand either.
1917 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1918 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1919 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1920 Depth + 1)) {
1921 // Disable the nsw and nuw flags. We can no longer guarantee that we
1922 // won't wrap after simplification.
1923 Op->dropFlags(SDNodeFlags::NoWrap);
1924 return true;
1925 }
1926 Known.resetAll();
1927 }
1928 }
1929
1930 // If we are only demanding sign bits then we can use the shift source
1931 // directly.
1932 if (std::optional<uint64_t> MaxSA =
1933 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1934 unsigned ShAmt = *MaxSA;
1935 unsigned NumSignBits =
1936 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1937 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1938 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1939 return TLO.CombineTo(Op, Op0);
1940 }
1941 break;
1942 }
1943 case ISD::SRL: {
1944 SDValue Op0 = Op.getOperand(0);
1945 SDValue Op1 = Op.getOperand(1);
1946 EVT ShiftVT = Op1.getValueType();
1947
1948 if (std::optional<uint64_t> KnownSA =
1949 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1950 unsigned ShAmt = *KnownSA;
1951 if (ShAmt == 0)
1952 return TLO.CombineTo(Op, Op0);
1953
1954 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1955 // single shift. We can do this if the top bits (which are shifted out)
1956 // are never demanded.
1957 // TODO - support non-uniform vector amounts.
1958 if (Op0.getOpcode() == ISD::SHL) {
1959 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1960 if (std::optional<uint64_t> InnerSA =
1961 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1962 unsigned C1 = *InnerSA;
1963 unsigned Opc = ISD::SRL;
1964 int Diff = ShAmt - C1;
1965 if (Diff < 0) {
1966 Diff = -Diff;
1967 Opc = ISD::SHL;
1968 }
1969 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1970 return TLO.CombineTo(
1971 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1972 }
1973 }
1974 }
1975
1976 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1977 // single sra. We can do this if the top bits are never demanded.
1978 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1979 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1980 if (std::optional<uint64_t> InnerSA =
1981 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1982 unsigned C1 = *InnerSA;
1983 // Clamp the combined shift amount if it exceeds the bit width.
1984 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
1985 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
1986 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
1987 Op0.getOperand(0), NewSA));
1988 }
1989 }
1990 }
1991
1992 APInt InDemandedMask = (DemandedBits << ShAmt);
1993
1994 // If the shift is exact, then it does demand the low bits (and knows that
1995 // they are zero).
1996 if (Op->getFlags().hasExact())
1997 InDemandedMask.setLowBits(ShAmt);
1998
1999 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2000 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2001 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2003 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2004 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2005 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2006 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2007 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2008 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2009 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2010 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2011 SDValue NewShiftAmt =
2012 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2013 SDValue NewShift =
2014 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2015 return TLO.CombineTo(
2016 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2017 }
2018 }
2019
2020 // Compute the new bits that are at the top now.
2021 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2022 Depth + 1))
2023 return true;
2024 Known.Zero.lshrInPlace(ShAmt);
2025 Known.One.lshrInPlace(ShAmt);
2026 // High bits known zero.
2027 Known.Zero.setHighBits(ShAmt);
2028
2029 // Attempt to avoid multi-use ops if we don't need anything from them.
2030 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2031 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2032 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2033 if (DemandedOp0) {
2034 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2035 return TLO.CombineTo(Op, NewOp);
2036 }
2037 }
2038 } else {
2039 // Use generic knownbits computation as it has support for non-uniform
2040 // shift amounts.
2041 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2042 }
2043
2044 // If we are only demanding sign bits then we can use the shift source
2045 // directly.
2046 if (std::optional<uint64_t> MaxSA =
2047 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2048 unsigned ShAmt = *MaxSA;
2049 // Must already be signbits in DemandedBits bounds, and can't demand any
2050 // shifted in zeroes.
2051 if (DemandedBits.countl_zero() >= ShAmt) {
2052 unsigned NumSignBits =
2053 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2054 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2055 return TLO.CombineTo(Op, Op0);
2056 }
2057 }
2058
2059 // Try to match AVG patterns (after shift simplification).
2060 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2061 DemandedElts, Depth + 1))
2062 return TLO.CombineTo(Op, AVG);
2063
2064 break;
2065 }
2066 case ISD::SRA: {
2067 SDValue Op0 = Op.getOperand(0);
2068 SDValue Op1 = Op.getOperand(1);
2069 EVT ShiftVT = Op1.getValueType();
2070
2071 // If we only want bits that already match the signbit then we don't need
2072 // to shift.
2073 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2074 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2075 NumHiDemandedBits)
2076 return TLO.CombineTo(Op, Op0);
2077
2078 // If this is an arithmetic shift right and only the low-bit is set, we can
2079 // always convert this into a logical shr, even if the shift amount is
2080 // variable. The low bit of the shift cannot be an input sign bit unless
2081 // the shift amount is >= the size of the datatype, which is undefined.
2082 if (DemandedBits.isOne())
2083 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2084
2085 if (std::optional<uint64_t> KnownSA =
2086 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2087 unsigned ShAmt = *KnownSA;
2088 if (ShAmt == 0)
2089 return TLO.CombineTo(Op, Op0);
2090
2091 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2092 // supports sext_inreg.
2093 if (Op0.getOpcode() == ISD::SHL) {
2094 if (std::optional<uint64_t> InnerSA =
2095 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2096 unsigned LowBits = BitWidth - ShAmt;
2097 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2098 if (VT.isVector())
2099 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2101
2102 if (*InnerSA == ShAmt) {
2103 if (!TLO.LegalOperations() ||
2105 return TLO.CombineTo(
2106 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2107 Op0.getOperand(0),
2108 TLO.DAG.getValueType(ExtVT)));
2109
2110 // Even if we can't convert to sext_inreg, we might be able to
2111 // remove this shift pair if the input is already sign extended.
2112 unsigned NumSignBits =
2113 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2114 if (NumSignBits > ShAmt)
2115 return TLO.CombineTo(Op, Op0.getOperand(0));
2116 }
2117 }
2118 }
2119
2120 APInt InDemandedMask = (DemandedBits << ShAmt);
2121
2122 // If the shift is exact, then it does demand the low bits (and knows that
2123 // they are zero).
2124 if (Op->getFlags().hasExact())
2125 InDemandedMask.setLowBits(ShAmt);
2126
2127 // If any of the demanded bits are produced by the sign extension, we also
2128 // demand the input sign bit.
2129 if (DemandedBits.countl_zero() < ShAmt)
2130 InDemandedMask.setSignBit();
2131
2132 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2133 Depth + 1))
2134 return true;
2135 Known.Zero.lshrInPlace(ShAmt);
2136 Known.One.lshrInPlace(ShAmt);
2137
2138 // If the input sign bit is known to be zero, or if none of the top bits
2139 // are demanded, turn this into an unsigned shift right.
2140 if (Known.Zero[BitWidth - ShAmt - 1] ||
2141 DemandedBits.countl_zero() >= ShAmt) {
2142 SDNodeFlags Flags;
2143 Flags.setExact(Op->getFlags().hasExact());
2144 return TLO.CombineTo(
2145 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2146 }
2147
2148 int Log2 = DemandedBits.exactLogBase2();
2149 if (Log2 >= 0) {
2150 // The bit must come from the sign.
2151 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2152 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2153 }
2154
2155 if (Known.One[BitWidth - ShAmt - 1])
2156 // New bits are known one.
2157 Known.One.setHighBits(ShAmt);
2158
2159 // Attempt to avoid multi-use ops if we don't need anything from them.
2160 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2161 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2162 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2163 if (DemandedOp0) {
2164 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2165 return TLO.CombineTo(Op, NewOp);
2166 }
2167 }
2168 }
2169
2170 // Try to match AVG patterns (after shift simplification).
2171 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2172 DemandedElts, Depth + 1))
2173 return TLO.CombineTo(Op, AVG);
2174
2175 break;
2176 }
2177 case ISD::FSHL:
2178 case ISD::FSHR: {
2179 SDValue Op0 = Op.getOperand(0);
2180 SDValue Op1 = Op.getOperand(1);
2181 SDValue Op2 = Op.getOperand(2);
2182 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2183
2184 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2185 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2186
2187 // For fshl, 0-shift returns the 1st arg.
2188 // For fshr, 0-shift returns the 2nd arg.
2189 if (Amt == 0) {
2190 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2191 Known, TLO, Depth + 1))
2192 return true;
2193 break;
2194 }
2195
2196 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2197 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2198 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2199 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2200 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2201 Depth + 1))
2202 return true;
2203 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2204 Depth + 1))
2205 return true;
2206
2207 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2208 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2209 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2210 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2211 Known = Known.unionWith(Known2);
2212
2213 // Attempt to avoid multi-use ops if we don't need anything from them.
2214 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2215 !DemandedElts.isAllOnes()) {
2216 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2217 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2218 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2219 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2220 if (DemandedOp0 || DemandedOp1) {
2221 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2222 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2223 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2224 DemandedOp1, Op2);
2225 return TLO.CombineTo(Op, NewOp);
2226 }
2227 }
2228 }
2229
2230 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2231 if (isPowerOf2_32(BitWidth)) {
2232 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2233 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2234 Known2, TLO, Depth + 1))
2235 return true;
2236 }
2237 break;
2238 }
2239 case ISD::ROTL:
2240 case ISD::ROTR: {
2241 SDValue Op0 = Op.getOperand(0);
2242 SDValue Op1 = Op.getOperand(1);
2243 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2244
2245 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2246 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2247 return TLO.CombineTo(Op, Op0);
2248
2249 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2250 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2251 unsigned RevAmt = BitWidth - Amt;
2252
2253 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2254 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2255 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2256 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2257 Depth + 1))
2258 return true;
2259
2260 // rot*(x, 0) --> x
2261 if (Amt == 0)
2262 return TLO.CombineTo(Op, Op0);
2263
2264 // See if we don't demand either half of the rotated bits.
2265 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2266 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2267 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2268 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2269 }
2270 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2271 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2272 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2273 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2274 }
2275 }
2276
2277 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2278 if (isPowerOf2_32(BitWidth)) {
2279 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2280 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2281 Depth + 1))
2282 return true;
2283 }
2284 break;
2285 }
2286 case ISD::SMIN:
2287 case ISD::SMAX:
2288 case ISD::UMIN:
2289 case ISD::UMAX: {
2290 unsigned Opc = Op.getOpcode();
2291 SDValue Op0 = Op.getOperand(0);
2292 SDValue Op1 = Op.getOperand(1);
2293
2294 // If we're only demanding signbits, then we can simplify to OR/AND node.
2295 unsigned BitOp =
2296 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2297 unsigned NumSignBits =
2298 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2299 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2300 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2301 if (NumSignBits >= NumDemandedUpperBits)
2302 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2303
2304 // Check if one arg is always less/greater than (or equal) to the other arg.
2305 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2306 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2307 switch (Opc) {
2308 case ISD::SMIN:
2309 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2310 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2311 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2312 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2313 Known = KnownBits::smin(Known0, Known1);
2314 break;
2315 case ISD::SMAX:
2316 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2317 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2318 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2319 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2320 Known = KnownBits::smax(Known0, Known1);
2321 break;
2322 case ISD::UMIN:
2323 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2324 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2325 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2326 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2327 Known = KnownBits::umin(Known0, Known1);
2328 break;
2329 case ISD::UMAX:
2330 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2331 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2332 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2333 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2334 Known = KnownBits::umax(Known0, Known1);
2335 break;
2336 }
2337 break;
2338 }
2339 case ISD::BITREVERSE: {
2340 SDValue Src = Op.getOperand(0);
2341 APInt DemandedSrcBits = DemandedBits.reverseBits();
2342 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2343 Depth + 1))
2344 return true;
2345 Known.One = Known2.One.reverseBits();
2346 Known.Zero = Known2.Zero.reverseBits();
2347 break;
2348 }
2349 case ISD::BSWAP: {
2350 SDValue Src = Op.getOperand(0);
2351
2352 // If the only bits demanded come from one byte of the bswap result,
2353 // just shift the input byte into position to eliminate the bswap.
2354 unsigned NLZ = DemandedBits.countl_zero();
2355 unsigned NTZ = DemandedBits.countr_zero();
2356
2357 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2358 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2359 // have 14 leading zeros, round to 8.
2360 NLZ = alignDown(NLZ, 8);
2361 NTZ = alignDown(NTZ, 8);
2362 // If we need exactly one byte, we can do this transformation.
2363 if (BitWidth - NLZ - NTZ == 8) {
2364 // Replace this with either a left or right shift to get the byte into
2365 // the right place.
2366 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2367 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2368 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2369 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2370 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2371 return TLO.CombineTo(Op, NewOp);
2372 }
2373 }
2374
2375 APInt DemandedSrcBits = DemandedBits.byteSwap();
2376 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2377 Depth + 1))
2378 return true;
2379 Known.One = Known2.One.byteSwap();
2380 Known.Zero = Known2.Zero.byteSwap();
2381 break;
2382 }
2383 case ISD::CTPOP: {
2384 // If only 1 bit is demanded, replace with PARITY as long as we're before
2385 // op legalization.
2386 // FIXME: Limit to scalars for now.
2387 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2388 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2389 Op.getOperand(0)));
2390
2391 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2392 break;
2393 }
2395 SDValue Op0 = Op.getOperand(0);
2396 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2397 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2398
2399 // If we only care about the highest bit, don't bother shifting right.
2400 if (DemandedBits.isSignMask()) {
2401 unsigned MinSignedBits =
2402 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2403 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2404 // However if the input is already sign extended we expect the sign
2405 // extension to be dropped altogether later and do not simplify.
2406 if (!AlreadySignExtended) {
2407 // Compute the correct shift amount type, which must be getShiftAmountTy
2408 // for scalar types after legalization.
2409 SDValue ShiftAmt =
2410 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2411 return TLO.CombineTo(Op,
2412 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2413 }
2414 }
2415
2416 // If none of the extended bits are demanded, eliminate the sextinreg.
2417 if (DemandedBits.getActiveBits() <= ExVTBits)
2418 return TLO.CombineTo(Op, Op0);
2419
2420 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2421
2422 // Since the sign extended bits are demanded, we know that the sign
2423 // bit is demanded.
2424 InputDemandedBits.setBit(ExVTBits - 1);
2425
2426 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2427 Depth + 1))
2428 return true;
2429
2430 // If the sign bit of the input is known set or clear, then we know the
2431 // top bits of the result.
2432
2433 // If the input sign bit is known zero, convert this into a zero extension.
2434 if (Known.Zero[ExVTBits - 1])
2435 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2436
2437 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2438 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2439 Known.One.setBitsFrom(ExVTBits);
2440 Known.Zero &= Mask;
2441 } else { // Input sign bit unknown
2442 Known.Zero &= Mask;
2443 Known.One &= Mask;
2444 }
2445 break;
2446 }
2447 case ISD::BUILD_PAIR: {
2448 EVT HalfVT = Op.getOperand(0).getValueType();
2449 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2450
2451 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2452 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2453
2454 KnownBits KnownLo, KnownHi;
2455
2456 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2457 return true;
2458
2459 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2460 return true;
2461
2462 Known = KnownHi.concat(KnownLo);
2463 break;
2464 }
2466 if (VT.isScalableVector())
2467 return false;
2468 [[fallthrough]];
2469 case ISD::ZERO_EXTEND: {
2470 SDValue Src = Op.getOperand(0);
2471 EVT SrcVT = Src.getValueType();
2472 unsigned InBits = SrcVT.getScalarSizeInBits();
2473 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2474 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2475
2476 // If none of the top bits are demanded, convert this into an any_extend.
2477 if (DemandedBits.getActiveBits() <= InBits) {
2478 // If we only need the non-extended bits of the bottom element
2479 // then we can just bitcast to the result.
2480 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2481 VT.getSizeInBits() == SrcVT.getSizeInBits())
2482 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2483
2484 unsigned Opc =
2486 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2487 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2488 }
2489
2490 APInt InDemandedBits = DemandedBits.trunc(InBits);
2491 APInt InDemandedElts = DemandedElts.zext(InElts);
2492 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2493 Depth + 1)) {
2494 Op->dropFlags(SDNodeFlags::NonNeg);
2495 return true;
2496 }
2497 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2498 Known = Known.zext(BitWidth);
2499
2500 // Attempt to avoid multi-use ops if we don't need anything from them.
2501 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2502 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2503 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2504 break;
2505 }
2507 if (VT.isScalableVector())
2508 return false;
2509 [[fallthrough]];
2510 case ISD::SIGN_EXTEND: {
2511 SDValue Src = Op.getOperand(0);
2512 EVT SrcVT = Src.getValueType();
2513 unsigned InBits = SrcVT.getScalarSizeInBits();
2514 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2515 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2516
2517 APInt InDemandedElts = DemandedElts.zext(InElts);
2518 APInt InDemandedBits = DemandedBits.trunc(InBits);
2519
2520 // Since some of the sign extended bits are demanded, we know that the sign
2521 // bit is demanded.
2522 InDemandedBits.setBit(InBits - 1);
2523
2524 // If none of the top bits are demanded, convert this into an any_extend.
2525 if (DemandedBits.getActiveBits() <= InBits) {
2526 // If we only need the non-extended bits of the bottom element
2527 // then we can just bitcast to the result.
2528 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2529 VT.getSizeInBits() == SrcVT.getSizeInBits())
2530 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2531
2532 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2534 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2535 InBits) {
2536 unsigned Opc =
2538 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2539 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2540 }
2541 }
2542
2543 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2544 Depth + 1))
2545 return true;
2546 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2547
2548 // If the sign bit is known one, the top bits match.
2549 Known = Known.sext(BitWidth);
2550
2551 // If the sign bit is known zero, convert this to a zero extend.
2552 if (Known.isNonNegative()) {
2553 unsigned Opc =
2555 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2556 SDNodeFlags Flags;
2557 if (!IsVecInReg)
2558 Flags |= SDNodeFlags::NonNeg;
2559 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2560 }
2561 }
2562
2563 // Attempt to avoid multi-use ops if we don't need anything from them.
2564 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2565 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2566 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2567 break;
2568 }
2570 if (VT.isScalableVector())
2571 return false;
2572 [[fallthrough]];
2573 case ISD::ANY_EXTEND: {
2574 SDValue Src = Op.getOperand(0);
2575 EVT SrcVT = Src.getValueType();
2576 unsigned InBits = SrcVT.getScalarSizeInBits();
2577 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2578 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2579
2580 // If we only need the bottom element then we can just bitcast.
2581 // TODO: Handle ANY_EXTEND?
2582 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2583 VT.getSizeInBits() == SrcVT.getSizeInBits())
2584 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2585
2586 APInt InDemandedBits = DemandedBits.trunc(InBits);
2587 APInt InDemandedElts = DemandedElts.zext(InElts);
2588 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2589 Depth + 1))
2590 return true;
2591 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2592 Known = Known.anyext(BitWidth);
2593
2594 // Attempt to avoid multi-use ops if we don't need anything from them.
2595 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2596 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2597 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2598 break;
2599 }
2600 case ISD::TRUNCATE: {
2601 SDValue Src = Op.getOperand(0);
2602
2603 // Simplify the input, using demanded bit information, and compute the known
2604 // zero/one bits live out.
2605 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2606 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2607 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2608 Depth + 1)) {
2609 // Disable the nsw and nuw flags. We can no longer guarantee that we
2610 // won't wrap after simplification.
2611 Op->dropFlags(SDNodeFlags::NoWrap);
2612 return true;
2613 }
2614 Known = Known.trunc(BitWidth);
2615
2616 // Attempt to avoid multi-use ops if we don't need anything from them.
2617 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2618 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2619 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2620
2621 // If the input is only used by this truncate, see if we can shrink it based
2622 // on the known demanded bits.
2623 switch (Src.getOpcode()) {
2624 default:
2625 break;
2626 case ISD::SRL:
2627 // Shrink SRL by a constant if none of the high bits shifted in are
2628 // demanded.
2629 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2630 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2631 // undesirable.
2632 break;
2633
2634 if (Src.getNode()->hasOneUse()) {
2635 if (isTruncateFree(Src, VT) &&
2636 !isTruncateFree(Src.getValueType(), VT)) {
2637 // If truncate is only free at trunc(srl), do not turn it into
2638 // srl(trunc). The check is done by first check the truncate is free
2639 // at Src's opcode(srl), then check the truncate is not done by
2640 // referencing sub-register. In test, if both trunc(srl) and
2641 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2642 // trunc(srl)'s trunc is free, trunc(srl) is better.
2643 break;
2644 }
2645
2646 std::optional<uint64_t> ShAmtC =
2647 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2648 if (!ShAmtC || *ShAmtC >= BitWidth)
2649 break;
2650 uint64_t ShVal = *ShAmtC;
2651
2652 APInt HighBits =
2653 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2654 HighBits.lshrInPlace(ShVal);
2655 HighBits = HighBits.trunc(BitWidth);
2656 if (!(HighBits & DemandedBits)) {
2657 // None of the shifted in bits are needed. Add a truncate of the
2658 // shift input, then shift it.
2659 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2660 SDValue NewTrunc =
2661 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2662 return TLO.CombineTo(
2663 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2664 }
2665 }
2666 break;
2667 }
2668
2669 break;
2670 }
2671 case ISD::AssertZext: {
2672 // AssertZext demands all of the high bits, plus any of the low bits
2673 // demanded by its users.
2674 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2676 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2677 TLO, Depth + 1))
2678 return true;
2679
2680 Known.Zero |= ~InMask;
2681 Known.One &= (~Known.Zero);
2682 break;
2683 }
2685 SDValue Src = Op.getOperand(0);
2686 SDValue Idx = Op.getOperand(1);
2687 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2688 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2689
2690 if (SrcEltCnt.isScalable())
2691 return false;
2692
2693 // Demand the bits from every vector element without a constant index.
2694 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2695 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2696 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2697 if (CIdx->getAPIntValue().ult(NumSrcElts))
2698 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2699
2700 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2701 // anything about the extended bits.
2702 APInt DemandedSrcBits = DemandedBits;
2703 if (BitWidth > EltBitWidth)
2704 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2705
2706 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2707 Depth + 1))
2708 return true;
2709
2710 // Attempt to avoid multi-use ops if we don't need anything from them.
2711 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2712 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2713 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2714 SDValue NewOp =
2715 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2716 return TLO.CombineTo(Op, NewOp);
2717 }
2718 }
2719
2720 Known = Known2;
2721 if (BitWidth > EltBitWidth)
2722 Known = Known.anyext(BitWidth);
2723 break;
2724 }
2725 case ISD::BITCAST: {
2726 if (VT.isScalableVector())
2727 return false;
2728 SDValue Src = Op.getOperand(0);
2729 EVT SrcVT = Src.getValueType();
2730 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2731
2732 // If this is an FP->Int bitcast and if the sign bit is the only
2733 // thing demanded, turn this into a FGETSIGN.
2734 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2735 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2736 SrcVT.isFloatingPoint()) {
2737 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2738 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2739 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2740 SrcVT != MVT::f128) {
2741 // Cannot eliminate/lower SHL for f128 yet.
2742 EVT Ty = OpVTLegal ? VT : MVT::i32;
2743 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2744 // place. We expect the SHL to be eliminated by other optimizations.
2745 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2746 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2747 if (!OpVTLegal && OpVTSizeInBits > 32)
2748 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2749 unsigned ShVal = Op.getValueSizeInBits() - 1;
2750 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2751 return TLO.CombineTo(Op,
2752 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2753 }
2754 }
2755
2756 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2757 // Demand the elt/bit if any of the original elts/bits are demanded.
2758 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2759 unsigned Scale = BitWidth / NumSrcEltBits;
2760 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2761 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2762 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2763 for (unsigned i = 0; i != Scale; ++i) {
2764 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2765 unsigned BitOffset = EltOffset * NumSrcEltBits;
2766 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2767 if (!Sub.isZero()) {
2768 DemandedSrcBits |= Sub;
2769 for (unsigned j = 0; j != NumElts; ++j)
2770 if (DemandedElts[j])
2771 DemandedSrcElts.setBit((j * Scale) + i);
2772 }
2773 }
2774
2775 APInt KnownSrcUndef, KnownSrcZero;
2776 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2777 KnownSrcZero, TLO, Depth + 1))
2778 return true;
2779
2780 KnownBits KnownSrcBits;
2781 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2782 KnownSrcBits, TLO, Depth + 1))
2783 return true;
2784 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2785 // TODO - bigendian once we have test coverage.
2786 unsigned Scale = NumSrcEltBits / BitWidth;
2787 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2788 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2789 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2790 for (unsigned i = 0; i != NumElts; ++i)
2791 if (DemandedElts[i]) {
2792 unsigned Offset = (i % Scale) * BitWidth;
2793 DemandedSrcBits.insertBits(DemandedBits, Offset);
2794 DemandedSrcElts.setBit(i / Scale);
2795 }
2796
2797 if (SrcVT.isVector()) {
2798 APInt KnownSrcUndef, KnownSrcZero;
2799 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2800 KnownSrcZero, TLO, Depth + 1))
2801 return true;
2802 }
2803
2804 KnownBits KnownSrcBits;
2805 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2806 KnownSrcBits, TLO, Depth + 1))
2807 return true;
2808
2809 // Attempt to avoid multi-use ops if we don't need anything from them.
2810 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2811 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2812 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2813 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2814 return TLO.CombineTo(Op, NewOp);
2815 }
2816 }
2817 }
2818
2819 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2820 // recursive call where Known may be useful to the caller.
2821 if (Depth > 0) {
2822 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2823 return false;
2824 }
2825 break;
2826 }
2827 case ISD::MUL:
2828 if (DemandedBits.isPowerOf2()) {
2829 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2830 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2831 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2832 unsigned CTZ = DemandedBits.countr_zero();
2833 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2834 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2835 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2836 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2837 return TLO.CombineTo(Op, Shl);
2838 }
2839 }
2840 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2841 // X * X is odd iff X is odd.
2842 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2843 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2844 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2845 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2846 return TLO.CombineTo(Op, And1);
2847 }
2848 [[fallthrough]];
2849 case ISD::ADD:
2850 case ISD::SUB: {
2851 // Add, Sub, and Mul don't demand any bits in positions beyond that
2852 // of the highest bit demanded of them.
2853 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2854 SDNodeFlags Flags = Op.getNode()->getFlags();
2855 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2856 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2857 KnownBits KnownOp0, KnownOp1;
2858 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2859 const KnownBits &KnownRHS) {
2860 if (Op.getOpcode() == ISD::MUL)
2861 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2862 return Demanded;
2863 };
2864 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2865 Depth + 1) ||
2866 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2867 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2868 // See if the operation should be performed at a smaller bit width.
2869 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2870 // Disable the nsw and nuw flags. We can no longer guarantee that we
2871 // won't wrap after simplification.
2872 Op->dropFlags(SDNodeFlags::NoWrap);
2873 return true;
2874 }
2875
2876 // neg x with only low bit demanded is simply x.
2877 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2878 isNullConstant(Op0))
2879 return TLO.CombineTo(Op, Op1);
2880
2881 // Attempt to avoid multi-use ops if we don't need anything from them.
2882 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2883 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2884 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2885 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2886 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2887 if (DemandedOp0 || DemandedOp1) {
2888 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2889 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2890 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2891 Flags & ~SDNodeFlags::NoWrap);
2892 return TLO.CombineTo(Op, NewOp);
2893 }
2894 }
2895
2896 // If we have a constant operand, we may be able to turn it into -1 if we
2897 // do not demand the high bits. This can make the constant smaller to
2898 // encode, allow more general folding, or match specialized instruction
2899 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2900 // is probably not useful (and could be detrimental).
2902 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2903 if (C && !C->isAllOnes() && !C->isOne() &&
2904 (C->getAPIntValue() | HighMask).isAllOnes()) {
2905 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2906 // Disable the nsw and nuw flags. We can no longer guarantee that we
2907 // won't wrap after simplification.
2908 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2909 Flags & ~SDNodeFlags::NoWrap);
2910 return TLO.CombineTo(Op, NewOp);
2911 }
2912
2913 // Match a multiply with a disguised negated-power-of-2 and convert to a
2914 // an equivalent shift-left amount.
2915 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2916 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2917 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2918 return 0;
2919
2920 // Don't touch opaque constants. Also, ignore zero and power-of-2
2921 // multiplies. Those will get folded later.
2922 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2923 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2924 !MulC->getAPIntValue().isPowerOf2()) {
2925 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2926 if (UnmaskedC.isNegatedPowerOf2())
2927 return (-UnmaskedC).logBase2();
2928 }
2929 return 0;
2930 };
2931
2932 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2933 unsigned ShlAmt) {
2934 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2935 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2936 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2937 return TLO.CombineTo(Op, Res);
2938 };
2939
2941 if (Op.getOpcode() == ISD::ADD) {
2942 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2943 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2944 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2945 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2946 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2947 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2948 }
2949 if (Op.getOpcode() == ISD::SUB) {
2950 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2951 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2952 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2953 }
2954 }
2955
2956 if (Op.getOpcode() == ISD::MUL) {
2957 Known = KnownBits::mul(KnownOp0, KnownOp1);
2958 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2960 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2961 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2962 }
2963 break;
2964 }
2965 default:
2966 // We also ask the target about intrinsics (which could be specific to it).
2967 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2968 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2969 // TODO: Probably okay to remove after audit; here to reduce change size
2970 // in initial enablement patch for scalable vectors
2971 if (Op.getValueType().isScalableVector())
2972 break;
2973 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2974 Known, TLO, Depth))
2975 return true;
2976 break;
2977 }
2978
2979 // Just use computeKnownBits to compute output bits.
2980 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2981 break;
2982 }
2983
2984 // If we know the value of all of the demanded bits, return this as a
2985 // constant.
2986 if (!isTargetCanonicalConstantNode(Op) &&
2987 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2988 // Avoid folding to a constant if any OpaqueConstant is involved.
2989 if (llvm::any_of(Op->ops(), [](SDValue V) {
2990 auto *C = dyn_cast<ConstantSDNode>(V);
2991 return C && C->isOpaque();
2992 }))
2993 return false;
2994 if (VT.isInteger())
2995 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2996 if (VT.isFloatingPoint())
2997 return TLO.CombineTo(
2998 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
2999 dl, VT));
3000 }
3001
3002 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3003 // Try again just for the original demanded elts.
3004 // Ensure we do this AFTER constant folding above.
3005 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3006 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3007
3008 return false;
3009}
3010
3012 const APInt &DemandedElts,
3013 DAGCombinerInfo &DCI) const {
3014 SelectionDAG &DAG = DCI.DAG;
3015 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3016 !DCI.isBeforeLegalizeOps());
3017
3018 APInt KnownUndef, KnownZero;
3019 bool Simplified =
3020 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3021 if (Simplified) {
3022 DCI.AddToWorklist(Op.getNode());
3023 DCI.CommitTargetLoweringOpt(TLO);
3024 }
3025
3026 return Simplified;
3027}
3028
3029/// Given a vector binary operation and known undefined elements for each input
3030/// operand, compute whether each element of the output is undefined.
3032 const APInt &UndefOp0,
3033 const APInt &UndefOp1) {
3034 EVT VT = BO.getValueType();
3036 "Vector binop only");
3037
3038 EVT EltVT = VT.getVectorElementType();
3039 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3040 assert(UndefOp0.getBitWidth() == NumElts &&
3041 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3042
3043 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3044 const APInt &UndefVals) {
3045 if (UndefVals[Index])
3046 return DAG.getUNDEF(EltVT);
3047
3048 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3049 // Try hard to make sure that the getNode() call is not creating temporary
3050 // nodes. Ignore opaque integers because they do not constant fold.
3051 SDValue Elt = BV->getOperand(Index);
3052 auto *C = dyn_cast<ConstantSDNode>(Elt);
3053 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3054 return Elt;
3055 }
3056
3057 return SDValue();
3058 };
3059
3060 APInt KnownUndef = APInt::getZero(NumElts);
3061 for (unsigned i = 0; i != NumElts; ++i) {
3062 // If both inputs for this element are either constant or undef and match
3063 // the element type, compute the constant/undef result for this element of
3064 // the vector.
3065 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3066 // not handle FP constants. The code within getNode() should be refactored
3067 // to avoid the danger of creating a bogus temporary node here.
3068 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3069 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3070 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3071 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3072 KnownUndef.setBit(i);
3073 }
3074 return KnownUndef;
3075}
3076
3078 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3079 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3080 bool AssumeSingleUse) const {
3081 EVT VT = Op.getValueType();
3082 unsigned Opcode = Op.getOpcode();
3083 APInt DemandedElts = OriginalDemandedElts;
3084 unsigned NumElts = DemandedElts.getBitWidth();
3085 assert(VT.isVector() && "Expected vector op");
3086
3087 KnownUndef = KnownZero = APInt::getZero(NumElts);
3088
3089 if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3090 return false;
3091
3092 // TODO: For now we assume we know nothing about scalable vectors.
3093 if (VT.isScalableVector())
3094 return false;
3095
3096 assert(VT.getVectorNumElements() == NumElts &&
3097 "Mask size mismatches value type element count!");
3098
3099 // Undef operand.
3100 if (Op.isUndef()) {
3101 KnownUndef.setAllBits();
3102 return false;
3103 }
3104
3105 // If Op has other users, assume that all elements are needed.
3106 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3107 DemandedElts.setAllBits();
3108
3109 // Not demanding any elements from Op.
3110 if (DemandedElts == 0) {
3111 KnownUndef.setAllBits();
3112 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3113 }
3114
3115 // Limit search depth.
3117 return false;
3118
3119 SDLoc DL(Op);
3120 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3121 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3122
3123 // Helper for demanding the specified elements and all the bits of both binary
3124 // operands.
3125 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3126 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3127 TLO.DAG, Depth + 1);
3128 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3129 TLO.DAG, Depth + 1);
3130 if (NewOp0 || NewOp1) {
3131 SDValue NewOp =
3132 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3133 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3134 return TLO.CombineTo(Op, NewOp);
3135 }
3136 return false;
3137 };
3138
3139 switch (Opcode) {
3140 case ISD::SCALAR_TO_VECTOR: {
3141 if (!DemandedElts[0]) {
3142 KnownUndef.setAllBits();
3143 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3144 }
3145 SDValue ScalarSrc = Op.getOperand(0);
3146 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3147 SDValue Src = ScalarSrc.getOperand(0);
3148 SDValue Idx = ScalarSrc.getOperand(1);
3149 EVT SrcVT = Src.getValueType();
3150
3151 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3152
3153 if (SrcEltCnt.isScalable())
3154 return false;
3155
3156 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3157 if (isNullConstant(Idx)) {
3158 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3159 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3160 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3161 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3162 TLO, Depth + 1))
3163 return true;
3164 }
3165 }
3166 KnownUndef.setHighBits(NumElts - 1);
3167 break;
3168 }
3169 case ISD::BITCAST: {
3170 SDValue Src = Op.getOperand(0);
3171 EVT SrcVT = Src.getValueType();
3172
3173 // We only handle vectors here.
3174 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3175 if (!SrcVT.isVector())
3176 break;
3177
3178 // Fast handling of 'identity' bitcasts.
3179 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3180 if (NumSrcElts == NumElts)
3181 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3182 KnownZero, TLO, Depth + 1);
3183
3184 APInt SrcDemandedElts, SrcZero, SrcUndef;
3185
3186 // Bitcast from 'large element' src vector to 'small element' vector, we
3187 // must demand a source element if any DemandedElt maps to it.
3188 if ((NumElts % NumSrcElts) == 0) {
3189 unsigned Scale = NumElts / NumSrcElts;
3190 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3191 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3192 TLO, Depth + 1))
3193 return true;
3194
3195 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3196 // of the large element.
3197 // TODO - bigendian once we have test coverage.
3198 if (IsLE) {
3199 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3200 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3201 for (unsigned i = 0; i != NumElts; ++i)
3202 if (DemandedElts[i]) {
3203 unsigned Ofs = (i % Scale) * EltSizeInBits;
3204 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3205 }
3206
3207 KnownBits Known;
3208 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3209 TLO, Depth + 1))
3210 return true;
3211
3212 // The bitcast has split each wide element into a number of
3213 // narrow subelements. We have just computed the Known bits
3214 // for wide elements. See if element splitting results in
3215 // some subelements being zero. Only for demanded elements!
3216 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3217 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3218 .isAllOnes())
3219 continue;
3220 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3221 unsigned Elt = Scale * SrcElt + SubElt;
3222 if (DemandedElts[Elt])
3223 KnownZero.setBit(Elt);
3224 }
3225 }
3226 }
3227
3228 // If the src element is zero/undef then all the output elements will be -
3229 // only demanded elements are guaranteed to be correct.
3230 for (unsigned i = 0; i != NumSrcElts; ++i) {
3231 if (SrcDemandedElts[i]) {
3232 if (SrcZero[i])
3233 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3234 if (SrcUndef[i])
3235 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3236 }
3237 }
3238 }
3239
3240 // Bitcast from 'small element' src vector to 'large element' vector, we
3241 // demand all smaller source elements covered by the larger demanded element
3242 // of this vector.
3243 if ((NumSrcElts % NumElts) == 0) {
3244 unsigned Scale = NumSrcElts / NumElts;
3245 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3246 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3247 TLO, Depth + 1))
3248 return true;
3249
3250 // If all the src elements covering an output element are zero/undef, then
3251 // the output element will be as well, assuming it was demanded.
3252 for (unsigned i = 0; i != NumElts; ++i) {
3253 if (DemandedElts[i]) {
3254 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3255 KnownZero.setBit(i);
3256 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3257 KnownUndef.setBit(i);
3258 }
3259 }
3260 }
3261 break;
3262 }
3263 case ISD::FREEZE: {
3264 SDValue N0 = Op.getOperand(0);
3265 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3266 /*PoisonOnly=*/false))
3267 return TLO.CombineTo(Op, N0);
3268
3269 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3270 // freeze(op(x, ...)) -> op(freeze(x), ...).
3271 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3272 return TLO.CombineTo(
3274 TLO.DAG.getFreeze(N0.getOperand(0))));
3275 break;
3276 }
3277 case ISD::BUILD_VECTOR: {
3278 // Check all elements and simplify any unused elements with UNDEF.
3279 if (!DemandedElts.isAllOnes()) {
3280 // Don't simplify BROADCASTS.
3281 if (llvm::any_of(Op->op_values(),
3282 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3283 SmallVector<SDValue, 32> Ops(Op->ops());
3284 bool Updated = false;
3285 for (unsigned i = 0; i != NumElts; ++i) {
3286 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3287 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3288 KnownUndef.setBit(i);
3289 Updated = true;
3290 }
3291 }
3292 if (Updated)
3293 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3294 }
3295 }
3296 for (unsigned i = 0; i != NumElts; ++i) {
3297 SDValue SrcOp = Op.getOperand(i);
3298 if (SrcOp.isUndef()) {
3299 KnownUndef.setBit(i);
3300 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3302 KnownZero.setBit(i);
3303 }
3304 }
3305 break;
3306 }
3307 case ISD::CONCAT_VECTORS: {
3308 EVT SubVT = Op.getOperand(0).getValueType();
3309 unsigned NumSubVecs = Op.getNumOperands();
3310 unsigned NumSubElts = SubVT.getVectorNumElements();
3311 for (unsigned i = 0; i != NumSubVecs; ++i) {
3312 SDValue SubOp = Op.getOperand(i);
3313 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3314 APInt SubUndef, SubZero;
3315 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3316 Depth + 1))
3317 return true;
3318 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3319 KnownZero.insertBits(SubZero, i * NumSubElts);
3320 }
3321
3322 // Attempt to avoid multi-use ops if we don't need anything from them.
3323 if (!DemandedElts.isAllOnes()) {
3324 bool FoundNewSub = false;
3325 SmallVector<SDValue, 2> DemandedSubOps;
3326 for (unsigned i = 0; i != NumSubVecs; ++i) {
3327 SDValue SubOp = Op.getOperand(i);
3328 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3329 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3330 SubOp, SubElts, TLO.DAG, Depth + 1);
3331 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3332 FoundNewSub = NewSubOp ? true : FoundNewSub;
3333 }
3334 if (FoundNewSub) {
3335 SDValue NewOp =
3336 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3337 return TLO.CombineTo(Op, NewOp);
3338 }
3339 }
3340 break;
3341 }
3342 case ISD::INSERT_SUBVECTOR: {
3343 // Demand any elements from the subvector and the remainder from the src its
3344 // inserted into.
3345 SDValue Src = Op.getOperand(0);
3346 SDValue Sub = Op.getOperand(1);
3347 uint64_t Idx = Op.getConstantOperandVal(2);
3348 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3349 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3350 APInt DemandedSrcElts = DemandedElts;
3351 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3352
3353 APInt SubUndef, SubZero;
3354 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3355 Depth + 1))
3356 return true;
3357
3358 // If none of the src operand elements are demanded, replace it with undef.
3359 if (!DemandedSrcElts && !Src.isUndef())
3360 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3361 TLO.DAG.getUNDEF(VT), Sub,
3362 Op.getOperand(2)));
3363
3364 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3365 TLO, Depth + 1))
3366 return true;
3367 KnownUndef.insertBits(SubUndef, Idx);
3368 KnownZero.insertBits(SubZero, Idx);
3369
3370 // Attempt to avoid multi-use ops if we don't need anything from them.
3371 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3372 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3373 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3374 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3375 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3376 if (NewSrc || NewSub) {
3377 NewSrc = NewSrc ? NewSrc : Src;
3378 NewSub = NewSub ? NewSub : Sub;
3379 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3380 NewSub, Op.getOperand(2));
3381 return TLO.CombineTo(Op, NewOp);
3382 }
3383 }
3384 break;
3385 }
3387 // Offset the demanded elts by the subvector index.
3388 SDValue Src = Op.getOperand(0);
3389 if (Src.getValueType().isScalableVector())
3390 break;
3391 uint64_t Idx = Op.getConstantOperandVal(1);
3392 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3393 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3394
3395 APInt SrcUndef, SrcZero;
3396 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3397 Depth + 1))
3398 return true;
3399 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3400 KnownZero = SrcZero.extractBits(NumElts, Idx);
3401
3402 // Attempt to avoid multi-use ops if we don't need anything from them.
3403 if (!DemandedElts.isAllOnes()) {
3404 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3405 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3406 if (NewSrc) {
3407 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3408 Op.getOperand(1));
3409 return TLO.CombineTo(Op, NewOp);
3410 }
3411 }
3412 break;
3413 }
3415 SDValue Vec = Op.getOperand(0);
3416 SDValue Scl = Op.getOperand(1);
3417 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3418
3419 // For a legal, constant insertion index, if we don't need this insertion
3420 // then strip it, else remove it from the demanded elts.
3421 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3422 unsigned Idx = CIdx->getZExtValue();
3423 if (!DemandedElts[Idx])
3424 return TLO.CombineTo(Op, Vec);
3425
3426 APInt DemandedVecElts(DemandedElts);
3427 DemandedVecElts.clearBit(Idx);
3428 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3429 KnownZero, TLO, Depth + 1))
3430 return true;
3431
3432 KnownUndef.setBitVal(Idx, Scl.isUndef());
3433
3434 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3435 break;
3436 }
3437
3438 APInt VecUndef, VecZero;
3439 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3440 Depth + 1))
3441 return true;
3442 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3443 break;
3444 }
3445 case ISD::VSELECT: {
3446 SDValue Sel = Op.getOperand(0);
3447 SDValue LHS = Op.getOperand(1);
3448 SDValue RHS = Op.getOperand(2);
3449
3450 // Try to transform the select condition based on the current demanded
3451 // elements.
3452 APInt UndefSel, ZeroSel;
3453 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3454 Depth + 1))
3455 return true;
3456
3457 // See if we can simplify either vselect operand.
3458 APInt DemandedLHS(DemandedElts);
3459 APInt DemandedRHS(DemandedElts);
3460 APInt UndefLHS, ZeroLHS;
3461 APInt UndefRHS, ZeroRHS;
3462 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3463 Depth + 1))
3464 return true;
3465 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3466 Depth + 1))
3467 return true;
3468
3469 KnownUndef = UndefLHS & UndefRHS;
3470 KnownZero = ZeroLHS & ZeroRHS;
3471
3472 // If we know that the selected element is always zero, we don't need the
3473 // select value element.
3474 APInt DemandedSel = DemandedElts & ~KnownZero;
3475 if (DemandedSel != DemandedElts)
3476 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3477 Depth + 1))
3478 return true;
3479
3480 break;
3481 }
3482 case ISD::VECTOR_SHUFFLE: {
3483 SDValue LHS = Op.getOperand(0);
3484 SDValue RHS = Op.getOperand(1);
3485 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3486
3487 // Collect demanded elements from shuffle operands..
3488 APInt DemandedLHS(NumElts, 0);
3489 APInt DemandedRHS(NumElts, 0);
3490 for (unsigned i = 0; i != NumElts; ++i) {
3491 int M = ShuffleMask[i];
3492 if (M < 0 || !DemandedElts[i])
3493 continue;
3494 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3495 if (M < (int)NumElts)
3496 DemandedLHS.setBit(M);
3497 else
3498 DemandedRHS.setBit(M - NumElts);
3499 }
3500
3501 // See if we can simplify either shuffle operand.
3502 APInt UndefLHS, ZeroLHS;
3503 APInt UndefRHS, ZeroRHS;
3504 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3505 Depth + 1))
3506 return true;
3507 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3508 Depth + 1))
3509 return true;
3510
3511 // Simplify mask using undef elements from LHS/RHS.
3512 bool Updated = false;
3513 bool IdentityLHS = true, IdentityRHS = true;
3514 SmallVector<int, 32> NewMask(ShuffleMask);
3515 for (unsigned i = 0; i != NumElts; ++i) {
3516 int &M = NewMask[i];
3517 if (M < 0)
3518 continue;
3519 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3520 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3521 Updated = true;
3522 M = -1;
3523 }
3524 IdentityLHS &= (M < 0) || (M == (int)i);
3525 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3526 }
3527
3528 // Update legal shuffle masks based on demanded elements if it won't reduce
3529 // to Identity which can cause premature removal of the shuffle mask.
3530 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3531 SDValue LegalShuffle =
3532 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3533 if (LegalShuffle)
3534 return TLO.CombineTo(Op, LegalShuffle);
3535 }
3536
3537 // Propagate undef/zero elements from LHS/RHS.
3538 for (unsigned i = 0; i != NumElts; ++i) {
3539 int M = ShuffleMask[i];
3540 if (M < 0) {
3541 KnownUndef.setBit(i);
3542 } else if (M < (int)NumElts) {
3543 if (UndefLHS[M])
3544 KnownUndef.setBit(i);
3545 if (ZeroLHS[M])
3546 KnownZero.setBit(i);
3547 } else {
3548 if (UndefRHS[M - NumElts])
3549 KnownUndef.setBit(i);
3550 if (ZeroRHS[M - NumElts])
3551 KnownZero.setBit(i);
3552 }
3553 }
3554 break;
3555 }
3559 APInt SrcUndef, SrcZero;
3560 SDValue Src = Op.getOperand(0);
3561 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3562 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3563 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3564 Depth + 1))
3565 return true;
3566 KnownZero = SrcZero.zextOrTrunc(NumElts);
3567 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3568
3569 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3570 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3571 DemandedSrcElts == 1) {
3572 // aext - if we just need the bottom element then we can bitcast.
3573 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3574 }
3575
3576 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3577 // zext(undef) upper bits are guaranteed to be zero.
3578 if (DemandedElts.isSubsetOf(KnownUndef))
3579 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3580 KnownUndef.clearAllBits();
3581
3582 // zext - if we just need the bottom element then we can mask:
3583 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3584 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3585 Op->isOnlyUserOf(Src.getNode()) &&
3586 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3587 SDLoc DL(Op);
3588 EVT SrcVT = Src.getValueType();
3589 EVT SrcSVT = SrcVT.getScalarType();
3590 SmallVector<SDValue> MaskElts;
3591 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3592 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3593 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3594 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3595 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3596 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3597 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3598 }
3599 }
3600 }
3601 break;
3602 }
3603
3604 // TODO: There are more binop opcodes that could be handled here - MIN,
3605 // MAX, saturated math, etc.
3606 case ISD::ADD: {
3607 SDValue Op0 = Op.getOperand(0);
3608 SDValue Op1 = Op.getOperand(1);
3609 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3610 APInt UndefLHS, ZeroLHS;
3611 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3612 Depth + 1, /*AssumeSingleUse*/ true))
3613 return true;
3614 }
3615 [[fallthrough]];
3616 }
3617 case ISD::AVGCEILS:
3618 case ISD::AVGCEILU:
3619 case ISD::AVGFLOORS:
3620 case ISD::AVGFLOORU:
3621 case ISD::OR:
3622 case ISD::XOR:
3623 case ISD::SUB:
3624 case ISD::FADD:
3625 case ISD::FSUB:
3626 case ISD::FMUL:
3627 case ISD::FDIV:
3628 case ISD::FREM: {
3629 SDValue Op0 = Op.getOperand(0);
3630 SDValue Op1 = Op.getOperand(1);
3631
3632 APInt UndefRHS, ZeroRHS;
3633 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3634 Depth + 1))
3635 return true;
3636 APInt UndefLHS, ZeroLHS;
3637 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3638 Depth + 1))
3639 return true;
3640
3641 KnownZero = ZeroLHS & ZeroRHS;
3642 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3643
3644 // Attempt to avoid multi-use ops if we don't need anything from them.
3645 // TODO - use KnownUndef to relax the demandedelts?
3646 if (!DemandedElts.isAllOnes())
3647 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3648 return true;
3649 break;
3650 }
3651 case ISD::SHL:
3652 case ISD::SRL:
3653 case ISD::SRA:
3654 case ISD::ROTL:
3655 case ISD::ROTR: {
3656 SDValue Op0 = Op.getOperand(0);
3657 SDValue Op1 = Op.getOperand(1);
3658
3659 APInt UndefRHS, ZeroRHS;
3660 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3661 Depth + 1))
3662 return true;
3663 APInt UndefLHS, ZeroLHS;
3664 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3665 Depth + 1))
3666 return true;
3667
3668 KnownZero = ZeroLHS;
3669 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3670
3671 // Attempt to avoid multi-use ops if we don't need anything from them.
3672 // TODO - use KnownUndef to relax the demandedelts?
3673 if (!DemandedElts.isAllOnes())
3674 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3675 return true;
3676 break;
3677 }
3678 case ISD::MUL:
3679 case ISD::MULHU:
3680 case ISD::MULHS:
3681 case ISD::AND: {
3682 SDValue Op0 = Op.getOperand(0);
3683 SDValue Op1 = Op.getOperand(1);
3684
3685 APInt SrcUndef, SrcZero;
3686 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3687 Depth + 1))
3688 return true;
3689 // If we know that a demanded element was zero in Op1 we don't need to
3690 // demand it in Op0 - its guaranteed to be zero.
3691 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3692 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3693 TLO, Depth + 1))
3694 return true;
3695
3696 KnownUndef &= DemandedElts0;
3697 KnownZero &= DemandedElts0;
3698
3699 // If every element pair has a zero/undef then just fold to zero.
3700 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3701 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3702 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3703 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3704
3705 // If either side has a zero element, then the result element is zero, even
3706 // if the other is an UNDEF.
3707 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3708 // and then handle 'and' nodes with the rest of the binop opcodes.
3709 KnownZero |= SrcZero;
3710 KnownUndef &= SrcUndef;
3711 KnownUndef &= ~KnownZero;
3712
3713 // Attempt to avoid multi-use ops if we don't need anything from them.
3714 if (!DemandedElts.isAllOnes())
3715 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3716 return true;
3717 break;
3718 }
3719 case ISD::TRUNCATE:
3720 case ISD::SIGN_EXTEND:
3721 case ISD::ZERO_EXTEND:
3722 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3723 KnownZero, TLO, Depth + 1))
3724 return true;
3725
3726 if (!DemandedElts.isAllOnes())
3727 if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3728 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3729 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3730
3731 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3732 // zext(undef) upper bits are guaranteed to be zero.
3733 if (DemandedElts.isSubsetOf(KnownUndef))
3734 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3735 KnownUndef.clearAllBits();
3736 }
3737 break;
3738 case ISD::SINT_TO_FP:
3739 case ISD::UINT_TO_FP:
3740 case ISD::FP_TO_SINT:
3741 case ISD::FP_TO_UINT:
3742 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3743 KnownZero, TLO, Depth + 1))
3744 return true;
3745 // Don't fall through to generic undef -> undef handling.
3746 return false;
3747 default: {
3748 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3749 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3750 KnownZero, TLO, Depth))
3751 return true;
3752 } else {
3753 KnownBits Known;
3754 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3755 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3756 TLO, Depth, AssumeSingleUse))
3757 return true;
3758 }
3759 break;
3760 }
3761 }
3762 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3763
3764 // Constant fold all undef cases.
3765 // TODO: Handle zero cases as well.
3766 if (DemandedElts.isSubsetOf(KnownUndef))
3767 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3768
3769 return false;
3770}
3771
3772/// Determine which of the bits specified in Mask are known to be either zero or
3773/// one and return them in the Known.
3775 KnownBits &Known,
3776 const APInt &DemandedElts,
3777 const SelectionDAG &DAG,
3778 unsigned Depth) const {
3779 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3780 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3781 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3782 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3783 "Should use MaskedValueIsZero if you don't know whether Op"
3784 " is a target node!");
3785 Known.resetAll();
3786}
3787
3790 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3791 unsigned Depth) const {
3792 Known.resetAll();
3793}
3794
3796 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3797 // The low bits are known zero if the pointer is aligned.
3798 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3799}
3800
3803 unsigned Depth) const {
3804 return Align(1);
3805}
3806
3807/// This method can be implemented by targets that want to expose additional
3808/// information about sign bits to the DAG Combiner.
3810 const APInt &,
3811 const SelectionDAG &,
3812 unsigned Depth) const {
3813 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3814 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3815 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3816 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3817 "Should use ComputeNumSignBits if you don't know whether Op"
3818 " is a target node!");
3819 return 1;
3820}
3821
3823 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3824 const MachineRegisterInfo &MRI, unsigned Depth) const {
3825 return 1;
3826}
3827
3829 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3830 TargetLoweringOpt &TLO, unsigned Depth) const {
3831 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3832 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3833 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3834 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3835 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3836 " is a target node!");
3837 return false;
3838}
3839
3841 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3842 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3843 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3844 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3845 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3846 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3847 "Should use SimplifyDemandedBits if you don't know whether Op"
3848 " is a target node!");
3849 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3850 return false;
3851}
3852
3854 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3855 SelectionDAG &DAG, unsigned Depth) const {
3856 assert(
3857 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3858 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3859 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3860 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3861 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3862 " is a target node!");
3863 return SDValue();
3864}
3865
3866SDValue
3869 SelectionDAG &DAG) const {
3870 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3871 if (!LegalMask) {
3872 std::swap(N0, N1);
3874 LegalMask = isShuffleMaskLegal(Mask, VT);
3875 }
3876
3877 if (!LegalMask)
3878 return SDValue();
3879
3880 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3881}
3882
3884 return nullptr;
3885}
3886
3888 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3889 bool PoisonOnly, unsigned Depth) const {
3890 assert(
3891 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3892 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3893 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3894 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3895 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3896 " is a target node!");
3897
3898 // If Op can't create undef/poison and none of its operands are undef/poison
3899 // then Op is never undef/poison.
3900 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3901 /*ConsiderFlags*/ true, Depth) &&
3902 all_of(Op->ops(), [&](SDValue V) {
3903 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3904 Depth + 1);
3905 });
3906}
3907
3909 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3910 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3911 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3912 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3913 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3914 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3915 "Should use canCreateUndefOrPoison if you don't know whether Op"
3916 " is a target node!");
3917 // Be conservative and return true.
3918 return true;
3919}
3920
3922 const SelectionDAG &DAG,
3923 bool SNaN,
3924 unsigned Depth) const {
3925 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3926 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3927 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3928 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3929 "Should use isKnownNeverNaN if you don't know whether Op"
3930 " is a target node!");
3931 return false;
3932}
3933
3935 const APInt &DemandedElts,
3936 APInt &UndefElts,
3937 const SelectionDAG &DAG,
3938 unsigned Depth) const {
3939 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3940 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3941 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3942 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3943 "Should use isSplatValue if you don't know whether Op"
3944 " is a target node!");
3945 return false;
3946}
3947
3948// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3949// work with truncating build vectors and vectors with elements of less than
3950// 8 bits.
3952 if (!N)
3953 return false;
3954
3955 unsigned EltWidth;
3956 APInt CVal;
3957 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3958 /*AllowTruncation=*/true)) {
3959 CVal = CN->getAPIntValue();
3960 EltWidth = N.getValueType().getScalarSizeInBits();
3961 } else
3962 return false;
3963
3964 // If this is a truncating splat, truncate the splat value.
3965 // Otherwise, we may fail to match the expected values below.
3966 if (EltWidth < CVal.getBitWidth())
3967 CVal = CVal.trunc(EltWidth);
3968
3969 switch (getBooleanContents(N.getValueType())) {
3971 return CVal[0];
3973 return CVal.isOne();
3975 return CVal.isAllOnes();
3976 }
3977
3978 llvm_unreachable("Invalid boolean contents");
3979}
3980
3982 if (!N)
3983 return false;
3984
3985 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3986 if (!CN) {
3987 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3988 if (!BV)
3989 return false;
3990
3991 // Only interested in constant splats, we don't care about undef
3992 // elements in identifying boolean constants and getConstantSplatNode
3993 // returns NULL if all ops are undef;
3994 CN = BV->getConstantSplatNode();
3995 if (!CN)
3996 return false;
3997 }
3998
3999 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4000 return !CN->getAPIntValue()[0];
4001
4002 return CN->isZero();
4003}
4004
4006 bool SExt) const {
4007 if (VT == MVT::i1)
4008 return N->isOne();
4009
4011 switch (Cnt) {
4013 // An extended value of 1 is always true, unless its original type is i1,
4014 // in which case it will be sign extended to -1.
4015 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4018 return N->isAllOnes() && SExt;
4019 }
4020 llvm_unreachable("Unexpected enumeration.");
4021}
4022
4023/// This helper function of SimplifySetCC tries to optimize the comparison when
4024/// either operand of the SetCC node is a bitwise-and instruction.
4025SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4026 ISD::CondCode Cond, const SDLoc &DL,
4027 DAGCombinerInfo &DCI) const {
4028 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4029 std::swap(N0, N1);
4030
4031 SelectionDAG &DAG = DCI.DAG;
4032 EVT OpVT = N0.getValueType();
4033 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4034 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4035 return SDValue();
4036
4037 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4038 // iff everything but LSB is known zero:
4039 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4042 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4043 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4044 if (DAG.MaskedValueIsZero(N0, UpperBits))
4045 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4046 }
4047
4048 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4049 // test in a narrow type that we can truncate to with no cost. Examples:
4050 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4051 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4052 // TODO: This conservatively checks for type legality on the source and
4053 // destination types. That may inhibit optimizations, but it also
4054 // allows setcc->shift transforms that may be more beneficial.
4055 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4056 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4057 isTypeLegal(OpVT) && N0.hasOneUse()) {
4058 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4059 AndC->getAPIntValue().getActiveBits());
4060 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4061 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4062 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4063 return DAG.getSetCC(DL, VT, Trunc, Zero,
4065 }
4066 }
4067
4068 // Match these patterns in any of their permutations:
4069 // (X & Y) == Y
4070 // (X & Y) != Y
4071 SDValue X, Y;
4072 if (N0.getOperand(0) == N1) {
4073 X = N0.getOperand(1);
4074 Y = N0.getOperand(0);
4075 } else if (N0.getOperand(1) == N1) {
4076 X = N0.getOperand(0);
4077 Y = N0.getOperand(1);
4078 } else {
4079 return SDValue();
4080 }
4081
4082 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4083 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4084 // its liable to create and infinite loop.
4085 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4086 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4088 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4089 // Note that where Y is variable and is known to have at most one bit set
4090 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4091 // equivalent when Y == 0.
4092 assert(OpVT.isInteger());
4094 if (DCI.isBeforeLegalizeOps() ||
4096 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4097 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4098 // If the target supports an 'and-not' or 'and-complement' logic operation,
4099 // try to use that to make a comparison operation more efficient.
4100 // But don't do this transform if the mask is a single bit because there are
4101 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4102 // 'rlwinm' on PPC).
4103
4104 // Bail out if the compare operand that we want to turn into a zero is
4105 // already a zero (otherwise, infinite loop).
4106 if (isNullConstant(Y))
4107 return SDValue();
4108
4109 // Transform this into: ~X & Y == 0.
4110 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4111 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4112 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4113 }
4114
4115 return SDValue();
4116}
4117
4118/// There are multiple IR patterns that could be checking whether certain
4119/// truncation of a signed number would be lossy or not. The pattern which is
4120/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4121/// We are looking for the following pattern: (KeptBits is a constant)
4122/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4123/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4124/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4125/// We will unfold it into the natural trunc+sext pattern:
4126/// ((%x << C) a>> C) dstcond %x
4127/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4128SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4129 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4130 const SDLoc &DL) const {
4131 // We must be comparing with a constant.
4132 ConstantSDNode *C1;
4133 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4134 return SDValue();
4135
4136 // N0 should be: add %x, (1 << (KeptBits-1))
4137 if (N0->getOpcode() != ISD::ADD)
4138 return SDValue();
4139
4140 // And we must be 'add'ing a constant.
4141 ConstantSDNode *C01;
4142 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4143 return SDValue();
4144
4145 SDValue X = N0->getOperand(0);
4146 EVT XVT = X.getValueType();
4147
4148 // Validate constants ...
4149
4150 APInt I1 = C1->getAPIntValue();
4151
4152 ISD::CondCode NewCond;
4153 if (Cond == ISD::CondCode::SETULT) {
4154 NewCond = ISD::CondCode::SETEQ;
4155 } else if (Cond == ISD::CondCode::SETULE) {
4156 NewCond = ISD::CondCode::SETEQ;
4157 // But need to 'canonicalize' the constant.
4158 I1 += 1;
4159 } else if (Cond == ISD::CondCode::SETUGT) {
4160 NewCond = ISD::CondCode::SETNE;
4161 // But need to 'canonicalize' the constant.
4162 I1 += 1;
4163 } else if (Cond == ISD::CondCode::SETUGE) {
4164 NewCond = ISD::CondCode::SETNE;
4165 } else
4166 return SDValue();
4167
4168 APInt I01 = C01->getAPIntValue();
4169
4170 auto checkConstants = [&I1, &I01]() -> bool {
4171 // Both of them must be power-of-two, and the constant from setcc is bigger.
4172 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4173 };
4174
4175 if (checkConstants()) {
4176 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4177 } else {
4178 // What if we invert constants? (and the target predicate)
4179 I1.negate();
4180 I01.negate();
4181 assert(XVT.isInteger());
4182 NewCond = getSetCCInverse(NewCond, XVT);
4183 if (!checkConstants())
4184 return SDValue();
4185 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4186 }
4187
4188 // They are power-of-two, so which bit is set?
4189 const unsigned KeptBits = I1.logBase2();
4190 const unsigned KeptBitsMinusOne = I01.logBase2();
4191
4192 // Magic!
4193 if (KeptBits != (KeptBitsMinusOne + 1))
4194 return SDValue();
4195 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4196
4197 // We don't want to do this in every single case.
4198 SelectionDAG &DAG = DCI.DAG;
4199 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4200 return SDValue();
4201
4202 // Unfold into: sext_inreg(%x) cond %x
4203 // Where 'cond' will be either 'eq' or 'ne'.
4204 SDValue SExtInReg = DAG.getNode(
4206 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4207 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4208}
4209
4210// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4211SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4212 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4213 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4215 "Should be a comparison with 0.");
4216 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4217 "Valid only for [in]equality comparisons.");
4218
4219 unsigned NewShiftOpcode;
4220 SDValue X, C, Y;
4221
4222 SelectionDAG &DAG = DCI.DAG;
4223
4224 // Look for '(C l>>/<< Y)'.
4225 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4226 // The shift should be one-use.
4227 if (!V.hasOneUse())
4228 return false;
4229 unsigned OldShiftOpcode = V.getOpcode();
4230 switch (OldShiftOpcode) {
4231 case ISD::SHL:
4232 NewShiftOpcode = ISD::SRL;
4233 break;
4234 case ISD::SRL:
4235 NewShiftOpcode = ISD::SHL;
4236 break;
4237 default:
4238 return false; // must be a logical shift.
4239 }
4240 // We should be shifting a constant.
4241 // FIXME: best to use isConstantOrConstantVector().
4242 C = V.getOperand(0);
4244 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4245 if (!CC)
4246 return false;
4247 Y = V.getOperand(1);
4248
4250 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4252 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4253 };
4254
4255 // LHS of comparison should be an one-use 'and'.
4256 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4257 return SDValue();
4258
4259 X = N0.getOperand(0);
4260 SDValue Mask = N0.getOperand(1);
4261
4262 // 'and' is commutative!
4263 if (!Match(Mask)) {
4264 std::swap(X, Mask);
4265 if (!Match(Mask))
4266 return SDValue();
4267 }
4268
4269 EVT VT = X.getValueType();
4270
4271 // Produce:
4272 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4273 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4274 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4275 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4276 return T2;
4277}
4278
4279/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4280/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4281/// handle the commuted versions of these patterns.
4282SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4283 ISD::CondCode Cond, const SDLoc &DL,
4284 DAGCombinerInfo &DCI) const {
4285 unsigned BOpcode = N0.getOpcode();
4286 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4287 "Unexpected binop");
4288 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4289
4290 // (X + Y) == X --> Y == 0
4291 // (X - Y) == X --> Y == 0
4292 // (X ^ Y) == X --> Y == 0
4293 SelectionDAG &DAG = DCI.DAG;
4294 EVT OpVT = N0.getValueType();
4295 SDValue X = N0.getOperand(0);
4296 SDValue Y = N0.getOperand(1);
4297 if (X == N1)
4298 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4299
4300 if (Y != N1)
4301 return SDValue();
4302
4303 // (X + Y) == Y --> X == 0
4304 // (X ^ Y) == Y --> X == 0
4305 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4306 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4307
4308 // The shift would not be valid if the operands are boolean (i1).
4309 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4310 return SDValue();
4311
4312 // (X - Y) == Y --> X == Y << 1
4313 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4314 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4315 if (!DCI.isCalledByLegalizer())
4316 DCI.AddToWorklist(YShl1.getNode());
4317 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4318}
4319
4321 SDValue N0, const APInt &C1,
4322 ISD::CondCode Cond, const SDLoc &dl,
4323 SelectionDAG &DAG) {
4324 // Look through truncs that don't change the value of a ctpop.
4325 // FIXME: Add vector support? Need to be careful with setcc result type below.
4326 SDValue CTPOP = N0;
4327 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4329 CTPOP = N0.getOperand(0);
4330
4331 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4332 return SDValue();
4333
4334 EVT CTVT = CTPOP.getValueType();
4335 SDValue CTOp = CTPOP.getOperand(0);
4336
4337 // Expand a power-of-2-or-zero comparison based on ctpop:
4338 // (ctpop x) u< 2 -> (x & x-1) == 0
4339 // (ctpop x) u> 1 -> (x & x-1) != 0
4340 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4341 // Keep the CTPOP if it is a cheap vector op.
4342 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4343 return SDValue();
4344
4345 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4346 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4347 return SDValue();
4348 if (C1 == 0 && (Cond == ISD::SETULT))
4349 return SDValue(); // This is handled elsewhere.
4350
4351 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4352
4353 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4354 SDValue Result = CTOp;
4355 for (unsigned i = 0; i < Passes; i++) {
4356 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4357 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4358 }
4360 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4361 }
4362
4363 // Expand a power-of-2 comparison based on ctpop
4364 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4365 // Keep the CTPOP if it is cheap.
4366 if (TLI.isCtpopFast(CTVT))
4367 return SDValue();
4368
4369 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4370 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4371 assert(CTVT.isInteger());
4372 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4373
4374 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4375 // check before emitting a potentially unnecessary op.
4376 if (DAG.isKnownNeverZero(CTOp)) {
4377 // (ctpop x) == 1 --> (x & x-1) == 0
4378 // (ctpop x) != 1 --> (x & x-1) != 0
4379 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4380 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4381 return RHS;
4382 }
4383
4384 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4385 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4386 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4388 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4389 }
4390
4391 return SDValue();
4392}
4393
4395 ISD::CondCode Cond, const SDLoc &dl,
4396 SelectionDAG &DAG) {
4397 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4398 return SDValue();
4399
4400 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4401 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4402 return SDValue();
4403
4404 auto getRotateSource = [](SDValue X) {
4405 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4406 return X.getOperand(0);
4407 return SDValue();
4408 };
4409
4410 // Peek through a rotated value compared against 0 or -1:
4411 // (rot X, Y) == 0/-1 --> X == 0/-1
4412 // (rot X, Y) != 0/-1 --> X != 0/-1
4413 if (SDValue R = getRotateSource(N0))
4414 return DAG.getSetCC(dl, VT, R, N1, Cond);
4415
4416 // Peek through an 'or' of a rotated value compared against 0:
4417 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4418 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4419 //
4420 // TODO: Add the 'and' with -1 sibling.
4421 // TODO: Recurse through a series of 'or' ops to find the rotate.
4422 EVT OpVT = N0.getValueType();
4423 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4424 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4425 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4426 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4427 }
4428 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4429 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4430 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4431 }
4432 }
4433
4434 return SDValue();
4435}
4436
4438 ISD::CondCode Cond, const SDLoc &dl,
4439 SelectionDAG &DAG) {
4440 // If we are testing for all-bits-clear, we might be able to do that with
4441 // less shifting since bit-order does not matter.
4442 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4443 return SDValue();
4444
4445 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4446 if (!C1 || !C1->isZero())
4447 return SDValue();
4448
4449 if (!N0.hasOneUse() ||
4450 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4451 return SDValue();
4452
4453 unsigned BitWidth = N0.getScalarValueSizeInBits();
4454 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4455 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4456 return SDValue();
4457
4458 // Canonicalize fshr as fshl to reduce pattern-matching.
4459 unsigned ShAmt = ShAmtC->getZExtValue();
4460 if (N0.getOpcode() == ISD::FSHR)
4461 ShAmt = BitWidth - ShAmt;
4462
4463 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4464 SDValue X, Y;
4465 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4466 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4467 return false;
4468 if (Or.getOperand(0) == Other) {
4469 X = Or.getOperand(0);
4470 Y = Or.getOperand(1);
4471 return true;
4472 }
4473 if (Or.getOperand(1) == Other) {
4474 X = Or.getOperand(1);
4475 Y = Or.getOperand(0);
4476 return true;
4477 }
4478 return false;
4479 };
4480
4481 EVT OpVT = N0.getValueType();
4482 EVT ShAmtVT = N0.getOperand(2).getValueType();
4483 SDValue F0 = N0.getOperand(0);
4484 SDValue F1 = N0.getOperand(1);
4485 if (matchOr(F0, F1)) {
4486 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4487 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4488 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4489 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4490 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4491 }
4492 if (matchOr(F1, F0)) {
4493 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4494 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4495 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4496 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4497 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4498 }
4499
4500 return SDValue();
4501}
4502
4503/// Try to simplify a setcc built with the specified operands and cc. If it is
4504/// unable to simplify it, return a null SDValue.
4506 ISD::CondCode Cond, bool foldBooleans,
4507 DAGCombinerInfo &DCI,
4508 const SDLoc &dl) const {
4509 SelectionDAG &DAG = DCI.DAG;
4510 const DataLayout &Layout = DAG.getDataLayout();
4511 EVT OpVT = N0.getValueType();
4513
4514 // Constant fold or commute setcc.
4515 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4516 return Fold;
4517
4518 bool N0ConstOrSplat =
4519 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4520 bool N1ConstOrSplat =
4521 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4522
4523 // Canonicalize toward having the constant on the RHS.
4524 // TODO: Handle non-splat vector constants. All undef causes trouble.
4525 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4526 // infinite loop here when we encounter one.
4528 if (N0ConstOrSplat && !N1ConstOrSplat &&
4529 (DCI.isBeforeLegalizeOps() ||
4530 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4531 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4532
4533 // If we have a subtract with the same 2 non-constant operands as this setcc
4534 // -- but in reverse order -- then try to commute the operands of this setcc
4535 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4536 // instruction on some targets.
4537 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4538 (DCI.isBeforeLegalizeOps() ||
4539 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4540 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4541 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4542 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4543
4544 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4545 return V;
4546
4547 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4548 return V;
4549
4550 if (auto *N1C = isConstOrConstSplat(N1)) {
4551 const APInt &C1 = N1C->getAPIntValue();
4552
4553 // Optimize some CTPOP cases.
4554 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4555 return V;
4556
4557 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4558 // X * Y == 0 --> (X == 0) || (Y == 0)
4559 // X * Y != 0 --> (X != 0) && (Y != 0)
4560 // TODO: This bails out if minsize is set, but if the target doesn't have a
4561 // single instruction multiply for this type, it would likely be
4562 // smaller to decompose.
4563 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4564 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4565 (N0->getFlags().hasNoUnsignedWrap() ||
4566 N0->getFlags().hasNoSignedWrap()) &&
4567 !Attr.hasFnAttr(Attribute::MinSize)) {
4568 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4569 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4570 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4571 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4572 }
4573
4574 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4575 // equality comparison, then we're just comparing whether X itself is
4576 // zero.
4577 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4578 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4579 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4580 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4581 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4582 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4583 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4584 // (srl (ctlz x), 5) == 0 -> X != 0
4585 // (srl (ctlz x), 5) != 1 -> X != 0
4586 Cond = ISD::SETNE;
4587 } else {
4588 // (srl (ctlz x), 5) != 0 -> X == 0
4589 // (srl (ctlz x), 5) == 1 -> X == 0
4590 Cond = ISD::SETEQ;
4591 }
4592 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4593 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4594 Cond);
4595 }
4596 }
4597 }
4598 }
4599
4600 // FIXME: Support vectors.
4601 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4602 const APInt &C1 = N1C->getAPIntValue();
4603
4604 // (zext x) == C --> x == (trunc C)
4605 // (sext x) == C --> x == (trunc C)
4606 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4607 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4608 unsigned MinBits = N0.getValueSizeInBits();
4609 SDValue PreExt;
4610 bool Signed = false;
4611 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4612 // ZExt
4613 MinBits = N0->getOperand(0).getValueSizeInBits();
4614 PreExt = N0->getOperand(0);
4615 } else if (N0->getOpcode() == ISD::AND) {
4616 // DAGCombine turns costly ZExts into ANDs
4617 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4618 if ((C->getAPIntValue()+1).isPowerOf2()) {
4619 MinBits = C->getAPIntValue().countr_one();
4620 PreExt = N0->getOperand(0);
4621 }
4622 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4623 // SExt
4624 MinBits = N0->getOperand(0).getValueSizeInBits();
4625 PreExt = N0->getOperand(0);
4626 Signed = true;
4627 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4628 // ZEXTLOAD / SEXTLOAD
4629 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4630 MinBits = LN0->getMemoryVT().getSizeInBits();
4631 PreExt = N0;
4632 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4633 Signed = true;
4634 MinBits = LN0->getMemoryVT().getSizeInBits();
4635 PreExt = N0;
4636 }
4637 }
4638
4639 // Figure out how many bits we need to preserve this constant.
4640 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4641
4642 // Make sure we're not losing bits from the constant.
4643 if (MinBits > 0 &&
4644 MinBits < C1.getBitWidth() &&
4645 MinBits >= ReqdBits) {
4646 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4647 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4648 // Will get folded away.
4649 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4650 if (MinBits == 1 && C1 == 1)
4651 // Invert the condition.
4652 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4654 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4655 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4656 }
4657
4658 // If truncating the setcc operands is not desirable, we can still
4659 // simplify the expression in some cases:
4660 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4661 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4662 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4663 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4664 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4665 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4666 SDValue TopSetCC = N0->getOperand(0);
4667 unsigned N0Opc = N0->getOpcode();
4668 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4669 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4670 TopSetCC.getOpcode() == ISD::SETCC &&
4671 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4672 (isConstFalseVal(N1) ||
4673 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4674
4675 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4676 (!N1C->isZero() && Cond == ISD::SETNE);
4677
4678 if (!Inverse)
4679 return TopSetCC;
4680
4682 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4683 TopSetCC.getOperand(0).getValueType());
4684 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4685 TopSetCC.getOperand(1),
4686 InvCond);
4687 }
4688 }
4689 }
4690
4691 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4692 // equality or unsigned, and all 1 bits of the const are in the same
4693 // partial word, see if we can shorten the load.
4694 if (DCI.isBeforeLegalize() &&
4696 N0.getOpcode() == ISD::AND && C1 == 0 &&
4697 N0.getNode()->hasOneUse() &&
4698 isa<LoadSDNode>(N0.getOperand(0)) &&
4699 N0.getOperand(0).getNode()->hasOneUse() &&
4700 isa<ConstantSDNode>(N0.getOperand(1))) {
4701 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4702 APInt bestMask;
4703 unsigned bestWidth = 0, bestOffset = 0;
4704 if (Lod->isSimple() && Lod->isUnindexed() &&
4705 (Lod->getMemoryVT().isByteSized() ||
4706 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4707 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4708 unsigned origWidth = N0.getValueSizeInBits();
4709 unsigned maskWidth = origWidth;
4710 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4711 // 8 bits, but have to be careful...
4712 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4713 origWidth = Lod->getMemoryVT().getSizeInBits();
4714 const APInt &Mask = N0.getConstantOperandAPInt(1);
4715 // Only consider power-of-2 widths (and at least one byte) as candiates
4716 // for the narrowed load.
4717 for (unsigned width = 8; width < origWidth; width *= 2) {
4718 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4719 if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4720 continue;
4721 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4722 // Avoid accessing any padding here for now (we could use memWidth
4723 // instead of origWidth here otherwise).
4724 unsigned maxOffset = origWidth - width;
4725 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4726 if (Mask.isSubsetOf(newMask)) {
4727 unsigned ptrOffset =
4728 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4729 unsigned IsFast = 0;
4730 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4732 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4733 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4734 IsFast) {
4735 bestOffset = ptrOffset / 8;
4736 bestMask = Mask.lshr(offset);
4737 bestWidth = width;
4738 break;
4739 }
4740 }
4741 newMask <<= 8;
4742 }
4743 if (bestWidth)
4744 break;
4745 }
4746 }
4747 if (bestWidth) {
4748 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4749 SDValue Ptr = Lod->getBasePtr();
4750 if (bestOffset != 0)
4751 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4752 SDValue NewLoad =
4753 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4754 Lod->getPointerInfo().getWithOffset(bestOffset),
4755 Lod->getOriginalAlign());
4756 SDValue And =
4757 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4758 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4759 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4760 }
4761 }
4762
4763 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4764 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4765 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4766
4767 // If the comparison constant has bits in the upper part, the
4768 // zero-extended value could never match.
4770 C1.getBitWidth() - InSize))) {
4771 switch (Cond) {
4772 case ISD::SETUGT:
4773 case ISD::SETUGE:
4774 case ISD::SETEQ:
4775 return DAG.getConstant(0, dl, VT);
4776 case ISD::SETULT:
4777 case ISD::SETULE:
4778 case ISD::SETNE:
4779 return DAG.getConstant(1, dl, VT);
4780 case ISD::SETGT:
4781 case ISD::SETGE:
4782 // True if the sign bit of C1 is set.
4783 return DAG.getConstant(C1.isNegative(), dl, VT);
4784 case ISD::SETLT:
4785 case ISD::SETLE:
4786 // True if the sign bit of C1 isn't set.
4787 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4788 default:
4789 break;
4790 }
4791 }
4792
4793 // Otherwise, we can perform the comparison with the low bits.
4794 switch (Cond) {
4795 case ISD::SETEQ:
4796 case ISD::SETNE:
4797 case ISD::SETUGT:
4798 case ISD::SETUGE:
4799 case ISD::SETULT:
4800 case ISD::SETULE: {
4801 EVT newVT = N0.getOperand(0).getValueType();
4802 // FIXME: Should use isNarrowingProfitable.
4803 if (DCI.isBeforeLegalizeOps() ||
4804 (isOperationLegal(ISD::SETCC, newVT) &&
4805 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4806 isTypeDesirableForOp(ISD::SETCC, newVT))) {
4807 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4808 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4809
4810 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4811 NewConst, Cond);
4812 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4813 }
4814 break;
4815 }
4816 default:
4817 break; // todo, be more careful with signed comparisons
4818 }
4819 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4820 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4821 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4822 OpVT)) {
4823 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4824 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4825 EVT ExtDstTy = N0.getValueType();
4826 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4827
4828 // If the constant doesn't fit into the number of bits for the source of
4829 // the sign extension, it is impossible for both sides to be equal.
4830 if (C1.getSignificantBits() > ExtSrcTyBits)
4831 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4832
4833 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4834 ExtDstTy != ExtSrcTy && "Unexpected types!");
4835 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4836 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4837 DAG.getConstant(Imm, dl, ExtDstTy));
4838 if (!DCI.isCalledByLegalizer())
4839 DCI.AddToWorklist(ZextOp.getNode());
4840 // Otherwise, make this a use of a zext.
4841 return DAG.getSetCC(dl, VT, ZextOp,
4842 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4843 } else if ((N1C->isZero() || N1C->isOne()) &&
4844 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4845 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4846 // excluded as they are handled below whilst checking for foldBooleans.
4847 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4848 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4849 (N0.getValueType() == MVT::i1 ||
4853 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4854 if (TrueWhenTrue)
4855 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4856 // Invert the condition.
4857 if (N0.getOpcode() == ISD::SETCC) {
4858 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4860 if (DCI.isBeforeLegalizeOps() ||
4862 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4863 }
4864 }
4865
4866 if ((N0.getOpcode() == ISD::XOR ||
4867 (N0.getOpcode() == ISD::AND &&
4868 N0.getOperand(0).getOpcode() == ISD::XOR &&
4869 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4870 isOneConstant(N0.getOperand(1))) {
4871 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4872 // can only do this if the top bits are known zero.
4873 unsigned BitWidth = N0.getValueSizeInBits();
4874 if (DAG.MaskedValueIsZero(N0,
4876 BitWidth-1))) {
4877 // Okay, get the un-inverted input value.
4878 SDValue Val;
4879 if (N0.getOpcode() == ISD::XOR) {
4880 Val = N0.getOperand(0);
4881 } else {
4882 assert(N0.getOpcode() == ISD::AND &&
4883 N0.getOperand(0).getOpcode() == ISD::XOR);
4884 // ((X^1)&1)^1 -> X & 1
4885 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4886 N0.getOperand(0).getOperand(0),
4887 N0.getOperand(1));
4888 }
4889
4890 return DAG.getSetCC(dl, VT, Val, N1,
4892 }
4893 } else if (N1C->isOne()) {
4894 SDValue Op0 = N0;
4895 if (Op0.getOpcode() == ISD::TRUNCATE)
4896 Op0 = Op0.getOperand(0);
4897
4898 if ((Op0.getOpcode() == ISD::XOR) &&
4899 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4900 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4901 SDValue XorLHS = Op0.getOperand(0);
4902 SDValue XorRHS = Op0.getOperand(1);
4903 // Ensure that the input setccs return an i1 type or 0/1 value.
4904 if (Op0.getValueType() == MVT::i1 ||
4909 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4911 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4912 }
4913 }
4914 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4915 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4916 if (Op0.getValueType().bitsGT(VT))
4917 Op0 = DAG.getNode(ISD::AND, dl, VT,
4918 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4919 DAG.getConstant(1, dl, VT));
4920 else if (Op0.getValueType().bitsLT(VT))
4921 Op0 = DAG.getNode(ISD::AND, dl, VT,
4922 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4923 DAG.getConstant(1, dl, VT));
4924
4925 return DAG.getSetCC(dl, VT, Op0,
4926 DAG.getConstant(0, dl, Op0.getValueType()),
4928 }
4929 if (Op0.getOpcode() == ISD::AssertZext &&
4930 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4931 return DAG.getSetCC(dl, VT, Op0,
4932 DAG.getConstant(0, dl, Op0.getValueType()),
4934 }
4935 }
4936
4937 // Given:
4938 // icmp eq/ne (urem %x, %y), 0
4939 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4940 // icmp eq/ne %x, 0
4941 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4942 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4943 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4944 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4945 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4946 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4947 }
4948
4949 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4950 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4951 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4952 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4953 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4954 N1C->isAllOnes()) {
4955 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4956 DAG.getConstant(0, dl, OpVT),
4958 }
4959
4960 if (SDValue V =
4961 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4962 return V;
4963 }
4964
4965 // These simplifications apply to splat vectors as well.
4966 // TODO: Handle more splat vector cases.
4967 if (auto *N1C = isConstOrConstSplat(N1)) {
4968 const APInt &C1 = N1C->getAPIntValue();
4969
4970 APInt MinVal, MaxVal;
4971 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4973 MinVal = APInt::getSignedMinValue(OperandBitSize);
4974 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4975 } else {
4976 MinVal = APInt::getMinValue(OperandBitSize);
4977 MaxVal = APInt::getMaxValue(OperandBitSize);
4978 }
4979
4980 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4981 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4982 // X >= MIN --> true
4983 if (C1 == MinVal)
4984 return DAG.getBoolConstant(true, dl, VT, OpVT);
4985
4986 if (!VT.isVector()) { // TODO: Support this for vectors.
4987 // X >= C0 --> X > (C0 - 1)
4988 APInt C = C1 - 1;
4990 if ((DCI.isBeforeLegalizeOps() ||
4991 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
4992 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4993 isLegalICmpImmediate(C.getSExtValue())))) {
4994 return DAG.getSetCC(dl, VT, N0,
4995 DAG.getConstant(C, dl, N1.getValueType()),
4996 NewCC);
4997 }
4998 }
4999 }
5000
5001 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5002 // X <= MAX --> true
5003 if (C1 == MaxVal)
5004 return DAG.getBoolConstant(true, dl, VT, OpVT);
5005
5006 // X <= C0 --> X < (C0 + 1)
5007 if (!VT.isVector()) { // TODO: Support this for vectors.
5008 APInt C = C1 + 1;
5010 if ((DCI.isBeforeLegalizeOps() ||
5011 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5012 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5013 isLegalICmpImmediate(C.getSExtValue())))) {
5014 return DAG.getSetCC(dl, VT, N0,
5015 DAG.getConstant(C, dl, N1.getValueType()),
5016 NewCC);
5017 }
5018 }
5019 }
5020
5021 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5022 if (C1 == MinVal)
5023 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5024
5025 // TODO: Support this for vectors after legalize ops.
5026 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5027 // Canonicalize setlt X, Max --> setne X, Max
5028 if (C1 == MaxVal)
5029 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5030
5031 // If we have setult X, 1, turn it into seteq X, 0
5032 if (C1 == MinVal+1)
5033 return DAG.getSetCC(dl, VT, N0,
5034 DAG.getConstant(MinVal, dl, N0.getValueType()),
5035 ISD::SETEQ);
5036 }
5037 }
5038
5039 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5040 if (C1 == MaxVal)
5041 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5042
5043 // TODO: Support this for vectors after legalize ops.
5044 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5045 // Canonicalize setgt X, Min --> setne X, Min
5046 if (C1 == MinVal)
5047 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5048
5049 // If we have setugt X, Max-1, turn it into seteq X, Max
5050 if (C1 == MaxVal-1)
5051 return DAG.getSetCC(dl, VT, N0,
5052 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5053 ISD::SETEQ);
5054 }
5055 }
5056
5057 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5058 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5059 if (C1.isZero())
5060 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5061 VT, N0, N1, Cond, DCI, dl))
5062 return CC;
5063
5064 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5065 // For example, when high 32-bits of i64 X are known clear:
5066 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5067 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5068 bool CmpZero = N1C->isZero();
5069 bool CmpNegOne = N1C->isAllOnes();
5070 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5071 // Match or(lo,shl(hi,bw/2)) pattern.
5072 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5073 unsigned EltBits = V.getScalarValueSizeInBits();
5074 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5075 return false;
5076 SDValue LHS = V.getOperand(0);
5077 SDValue RHS = V.getOperand(1);
5078 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5079 // Unshifted element must have zero upperbits.
5080 if (RHS.getOpcode() == ISD::SHL &&
5081 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5082 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5083 DAG.MaskedValueIsZero(LHS, HiBits)) {
5084 Lo = LHS;
5085 Hi = RHS.getOperand(0);
5086 return true;
5087 }
5088 if (LHS.getOpcode() == ISD::SHL &&
5089 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5090 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5091 DAG.MaskedValueIsZero(RHS, HiBits)) {
5092 Lo = RHS;
5093 Hi = LHS.getOperand(0);
5094 return true;
5095 }
5096 return false;
5097 };
5098
5099 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5100 unsigned EltBits = N0.getScalarValueSizeInBits();
5101 unsigned HalfBits = EltBits / 2;
5102 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5103 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5104 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5105 SDValue NewN0 =
5106 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5107 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5108 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5109 };
5110
5111 SDValue Lo, Hi;
5112 if (IsConcat(N0, Lo, Hi))
5113 return MergeConcat(Lo, Hi);
5114
5115 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5116 SDValue Lo0, Lo1, Hi0, Hi1;
5117 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5118 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5119 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5120 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5121 }
5122 }
5123 }
5124 }
5125
5126 // If we have "setcc X, C0", check to see if we can shrink the immediate
5127 // by changing cc.
5128 // TODO: Support this for vectors after legalize ops.
5129 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5130 // SETUGT X, SINTMAX -> SETLT X, 0
5131 // SETUGE X, SINTMIN -> SETLT X, 0
5132 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5133 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5134 return DAG.getSetCC(dl, VT, N0,
5135 DAG.getConstant(0, dl, N1.getValueType()),
5136 ISD::SETLT);
5137
5138 // SETULT X, SINTMIN -> SETGT X, -1
5139 // SETULE X, SINTMAX -> SETGT X, -1
5140 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5141 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5142 return DAG.getSetCC(dl, VT, N0,
5143 DAG.getAllOnesConstant(dl, N1.getValueType()),
5144 ISD::SETGT);
5145 }
5146 }
5147
5148 // Back to non-vector simplifications.
5149 // TODO: Can we do these for vector splats?
5150 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5151 const APInt &C1 = N1C->getAPIntValue();
5152 EVT ShValTy = N0.getValueType();
5153
5154 // Fold bit comparisons when we can. This will result in an
5155 // incorrect value when boolean false is negative one, unless
5156 // the bitsize is 1 in which case the false value is the same
5157 // in practice regardless of the representation.
5158 if ((VT.getSizeInBits() == 1 ||
5160 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5161 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5162 N0.getOpcode() == ISD::AND) {
5163 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5164 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5165 // Perform the xform if the AND RHS is a single bit.
5166 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5167 if (AndRHS->getAPIntValue().isPowerOf2() &&
5168 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5169 return DAG.getNode(
5170 ISD::TRUNCATE, dl, VT,
5171 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5172 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5173 }
5174 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5175 // (X & 8) == 8 --> (X & 8) >> 3
5176 // Perform the xform if C1 is a single bit.
5177 unsigned ShCt = C1.logBase2();
5178 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5179 return DAG.getNode(
5180 ISD::TRUNCATE, dl, VT,
5181 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5182 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5183 }
5184 }
5185 }
5186 }
5187
5188 if (C1.getSignificantBits() <= 64 &&
5190 // (X & -256) == 256 -> (X >> 8) == 1
5191 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5192 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5193 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5194 const APInt &AndRHSC = AndRHS->getAPIntValue();
5195 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5196 unsigned ShiftBits = AndRHSC.countr_zero();
5197 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5198 SDValue Shift = DAG.getNode(
5199 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5200 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5201 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5202 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5203 }
5204 }
5205 }
5206 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5207 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5208 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5209 // X < 0x100000000 -> (X >> 32) < 1
5210 // X >= 0x100000000 -> (X >> 32) >= 1
5211 // X <= 0x0ffffffff -> (X >> 32) < 1
5212 // X > 0x0ffffffff -> (X >> 32) >= 1
5213 unsigned ShiftBits;
5214 APInt NewC = C1;
5215 ISD::CondCode NewCond = Cond;
5216 if (AdjOne) {
5217 ShiftBits = C1.countr_one();
5218 NewC = NewC + 1;
5219 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5220 } else {
5221 ShiftBits = C1.countr_zero();
5222 }
5223 NewC.lshrInPlace(ShiftBits);
5224 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5226 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5227 SDValue Shift =
5228 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5229 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5230 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5231 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5232 }
5233 }
5234 }
5235 }
5236
5237 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5238 auto *CFP = cast<ConstantFPSDNode>(N1);
5239 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5240
5241 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5242 // constant if knowing that the operand is non-nan is enough. We prefer to
5243 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5244 // materialize 0.0.
5245 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5246 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5247
5248 // setcc (fneg x), C -> setcc swap(pred) x, -C
5249 if (N0.getOpcode() == ISD::FNEG) {
5251 if (DCI.isBeforeLegalizeOps() ||
5252 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5253 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5254 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5255 }
5256 }
5257
5258 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5260 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5261 bool IsFabs = N0.getOpcode() == ISD::FABS;
5262 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5263 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5264 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5265 : (IsFabs ? fcInf : fcPosInf);
5266 if (Cond == ISD::SETUEQ)
5267 Flag |= fcNan;
5268 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5269 DAG.getTargetConstant(Flag, dl, MVT::i32));
5270 }
5271 }
5272
5273 // If the condition is not legal, see if we can find an equivalent one
5274 // which is legal.
5276 // If the comparison was an awkward floating-point == or != and one of
5277 // the comparison operands is infinity or negative infinity, convert the
5278 // condition to a less-awkward <= or >=.
5279 if (CFP->getValueAPF().isInfinity()) {
5280 bool IsNegInf = CFP->getValueAPF().isNegative();
5282 switch (Cond) {
5283 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5284 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5285 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5286 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5287 default: break;
5288 }
5289 if (NewCond != ISD::SETCC_INVALID &&
5290 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5291 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5292 }
5293 }
5294 }
5295
5296 if (N0 == N1) {
5297 // The sext(setcc()) => setcc() optimization relies on the appropriate
5298 // constant being emitted.
5299 assert(!N0.getValueType().isInteger() &&
5300 "Integer types should be handled by FoldSetCC");
5301
5302 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5303 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5304 if (UOF == 2) // FP operators that are undefined on NaNs.
5305 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5306 if (UOF == unsigned(EqTrue))
5307 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5308 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5309 // if it is not already.
5310 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5311 if (NewCond != Cond &&
5312 (DCI.isBeforeLegalizeOps() ||
5313 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5314 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5315 }
5316
5317 // ~X > ~Y --> Y > X
5318 // ~X < ~Y --> Y < X
5319 // ~X < C --> X > ~C
5320 // ~X > C --> X < ~C
5321 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5322 N0.getValueType().isInteger()) {
5323 if (isBitwiseNot(N0)) {
5324 if (isBitwiseNot(N1))
5325 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5326
5329 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5330 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5331 }
5332 }
5333 }
5334
5335 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5336 N0.getValueType().isInteger()) {
5337 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5338 N0.getOpcode() == ISD::XOR) {
5339 // Simplify (X+Y) == (X+Z) --> Y == Z
5340 if (N0.getOpcode() == N1.getOpcode()) {
5341 if (N0.getOperand(0) == N1.getOperand(0))
5342 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5343 if (N0.getOperand(1) == N1.getOperand(1))
5344 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5345 if (isCommutativeBinOp(N0.getOpcode())) {
5346 // If X op Y == Y op X, try other combinations.
5347 if (N0.getOperand(0) == N1.getOperand(1))
5348 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5349 Cond);
5350 if (N0.getOperand(1) == N1.getOperand(0))
5351 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5352 Cond);
5353 }
5354 }
5355
5356 // If RHS is a legal immediate value for a compare instruction, we need
5357 // to be careful about increasing register pressure needlessly.
5358 bool LegalRHSImm = false;
5359
5360 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5361 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5362 // Turn (X+C1) == C2 --> X == C2-C1
5363 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5364 return DAG.getSetCC(
5365 dl, VT, N0.getOperand(0),
5366 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5367 dl, N0.getValueType()),
5368 Cond);
5369
5370 // Turn (X^C1) == C2 --> X == C1^C2
5371 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5372 return DAG.getSetCC(
5373 dl, VT, N0.getOperand(0),
5374 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5375 dl, N0.getValueType()),
5376 Cond);
5377 }
5378
5379 // Turn (C1-X) == C2 --> X == C1-C2
5380 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5381 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5382 return DAG.getSetCC(
5383 dl, VT, N0.getOperand(1),
5384 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5385 dl, N0.getValueType()),
5386 Cond);
5387
5388 // Could RHSC fold directly into a compare?
5389 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5390 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5391 }
5392
5393 // (X+Y) == X --> Y == 0 and similar folds.
5394 // Don't do this if X is an immediate that can fold into a cmp
5395 // instruction and X+Y has other uses. It could be an induction variable
5396 // chain, and the transform would increase register pressure.
5397 if (!LegalRHSImm || N0.hasOneUse())
5398 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5399 return V;
5400 }
5401
5402 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5403 N1.getOpcode() == ISD::XOR)
5404 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5405 return V;
5406
5407 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5408 return V;
5409 }
5410
5411 // Fold remainder of division by a constant.
5412 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5413 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5414 // When division is cheap or optimizing for minimum size,
5415 // fall through to DIVREM creation by skipping this fold.
5416 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5417 if (N0.getOpcode() == ISD::UREM) {
5418 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5419 return Folded;
5420 } else if (N0.getOpcode() == ISD::SREM) {
5421 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5422 return Folded;
5423 }
5424 }
5425 }
5426
5427 // Fold away ALL boolean setcc's.
5428 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5429 SDValue Temp;
5430 switch (Cond) {
5431 default: llvm_unreachable("Unknown integer setcc!");
5432 case ISD::SETEQ: // X == Y -> ~(X^Y)
5433 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5434 N0 = DAG.getNOT(dl, Temp, OpVT);
5435 if (!DCI.isCalledByLegalizer())
5436 DCI.AddToWorklist(Temp.getNode());
5437 break;
5438 case ISD::SETNE: // X != Y --> (X^Y)
5439 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5440 break;
5441 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5442 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5443 Temp = DAG.getNOT(dl, N0, OpVT);
5444 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5445 if (!DCI.isCalledByLegalizer())
5446 DCI.AddToWorklist(Temp.getNode());
5447 break;
5448 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5449 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5450 Temp = DAG.getNOT(dl, N1, OpVT);
5451 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5452 if (!DCI.isCalledByLegalizer())
5453 DCI.AddToWorklist(Temp.getNode());
5454 break;
5455 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5456 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5457 Temp = DAG.getNOT(dl, N0, OpVT);
5458 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5459 if (!DCI.isCalledByLegalizer())
5460 DCI.AddToWorklist(Temp.getNode());
5461 break;
5462 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5463 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5464 Temp = DAG.getNOT(dl, N1, OpVT);
5465 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5466 break;
5467 }
5468 if (VT.getScalarType() != MVT::i1) {
5469 if (!DCI.isCalledByLegalizer())
5470 DCI.AddToWorklist(N0.getNode());
5471 // FIXME: If running after legalize, we probably can't do this.
5473 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5474 }
5475 return N0;
5476 }
5477
5478 // Could not fold it.
5479 return SDValue();
5480}
5481
5482/// Returns true (and the GlobalValue and the offset) if the node is a
5483/// GlobalAddress + offset.
5485 int64_t &Offset) const {
5486
5487 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5488
5489 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5490 GA = GASD->getGlobal();
5491 Offset += GASD->getOffset();
5492 return true;
5493 }
5494
5495 if (N->getOpcode() == ISD::ADD) {
5496 SDValue N1 = N->getOperand(0);
5497 SDValue N2 = N->getOperand(1);
5498 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5499 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5500 Offset += V->getSExtValue();
5501 return true;
5502 }
5503 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5504 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5505 Offset += V->getSExtValue();
5506 return true;
5507 }
5508 }
5509 }
5510
5511 return false;
5512}
5513
5515 DAGCombinerInfo &DCI) const {
5516 // Default implementation: no optimization.
5517 return SDValue();
5518}
5519
5520//===----------------------------------------------------------------------===//
5521// Inline Assembler Implementation Methods
5522//===----------------------------------------------------------------------===//
5523
5526 unsigned S = Constraint.size();
5527
5528 if (S == 1) {
5529 switch (Constraint[0]) {
5530 default: break;
5531 case 'r':
5532 return C_RegisterClass;
5533 case 'm': // memory
5534 case 'o': // offsetable
5535 case 'V': // not offsetable
5536 return C_Memory;
5537 case 'p': // Address.
5538 return C_Address;
5539 case 'n': // Simple Integer
5540 case 'E': // Floating Point Constant
5541 case 'F': // Floating Point Constant
5542 return C_Immediate;
5543 case 'i': // Simple Integer or Relocatable Constant
5544 case 's': // Relocatable Constant
5545 case 'X': // Allow ANY value.
5546 case 'I': // Target registers.
5547 case 'J':
5548 case 'K':
5549 case 'L':
5550 case 'M':
5551 case 'N':
5552 case 'O':
5553 case 'P':
5554 case '<':
5555 case '>':
5556 return C_Other;
5557 }
5558 }
5559
5560 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5561 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5562 return C_Memory;
5563 return C_Register;
5564 }
5565 return C_Unknown;
5566}
5567
5568/// Try to replace an X constraint, which matches anything, with another that
5569/// has more specific requirements based on the type of the corresponding
5570/// operand.
5571const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5572 if (ConstraintVT.isInteger())
5573 return "r";
5574 if (ConstraintVT.isFloatingPoint())
5575 return "f"; // works for many targets
5576 return nullptr;
5577}
5578
5580 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5581 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5582 return SDValue();
5583}
5584
5585/// Lower the specified operand into the Ops vector.
5586/// If it is invalid, don't add anything to Ops.
5588 StringRef Constraint,
5589 std::vector<SDValue> &Ops,
5590 SelectionDAG &DAG) const {
5591
5592 if (Constraint.size() > 1)
5593 return;
5594
5595 char ConstraintLetter = Constraint[0];
5596 switch (ConstraintLetter) {
5597 default: break;
5598 case 'X': // Allows any operand
5599 case 'i': // Simple Integer or Relocatable Constant
5600 case 'n': // Simple Integer
5601 case 's': { // Relocatable Constant
5602
5604 uint64_t Offset = 0;
5605
5606 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5607 // etc., since getelementpointer is variadic. We can't use
5608 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5609 // while in this case the GA may be furthest from the root node which is
5610 // likely an ISD::ADD.
5611 while (true) {
5612 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5613 // gcc prints these as sign extended. Sign extend value to 64 bits
5614 // now; without this it would get ZExt'd later in
5615 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5616 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5617 BooleanContent BCont = getBooleanContents(MVT::i64);
5618 ISD::NodeType ExtOpc =
5619 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5620 int64_t ExtVal =
5621 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5622 Ops.push_back(
5623 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5624 return;
5625 }
5626 if (ConstraintLetter != 'n') {
5627 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5628 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5629 GA->getValueType(0),
5630 Offset + GA->getOffset()));
5631 return;
5632 }
5633 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5634 Ops.push_back(DAG.getTargetBlockAddress(
5635 BA->getBlockAddress(), BA->getValueType(0),
5636 Offset + BA->getOffset(), BA->getTargetFlags()));
5637 return;
5638 }
5639 if (isa<BasicBlockSDNode>(Op)) {
5640 Ops.push_back(Op);
5641 return;
5642 }
5643 }
5644 const unsigned OpCode = Op.getOpcode();
5645 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5646 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5647 Op = Op.getOperand(1);
5648 // Subtraction is not commutative.
5649 else if (OpCode == ISD::ADD &&
5650 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5651 Op = Op.getOperand(0);
5652 else
5653 return;
5654 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5655 continue;
5656 }
5657 return;
5658 }
5659 break;
5660 }
5661 }
5662}
5663
5665 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5666}
5667
5668std::pair<unsigned, const TargetRegisterClass *>
5670 StringRef Constraint,
5671 MVT VT) const {
5672 if (!Constraint.starts_with("{"))
5673 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5674 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5675
5676 // Remove the braces from around the name.
5677 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5678
5679 std::pair<unsigned, const TargetRegisterClass *> R =
5680 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5681
5682 // Figure out which register class contains this reg.
5683 for (const TargetRegisterClass *RC : RI->regclasses()) {
5684 // If none of the value types for this register class are valid, we
5685 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5686 if (!isLegalRC(*RI, *RC))
5687 continue;
5688
5689 for (const MCPhysReg &PR : *RC) {
5690 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5691 std::pair<unsigned, const TargetRegisterClass *> S =
5692 std::make_pair(PR, RC);
5693
5694 // If this register class has the requested value type, return it,
5695 // otherwise keep searching and return the first class found
5696 // if no other is found which explicitly has the requested type.
5697 if (RI->isTypeLegalForClass(*RC, VT))
5698 return S;
5699 if (!R.second)
5700 R = S;
5701 }
5702 }
5703 }
5704
5705 return R;
5706}
5707
5708//===----------------------------------------------------------------------===//
5709// Constraint Selection.
5710
5711/// Return true of this is an input operand that is a matching constraint like
5712/// "4".
5714 assert(!ConstraintCode.empty() && "No known constraint!");
5715 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5716}
5717
5718/// If this is an input matching constraint, this method returns the output
5719/// operand it matches.
5721 assert(!ConstraintCode.empty() && "No known constraint!");
5722 return atoi(ConstraintCode.c_str());
5723}
5724
5725/// Split up the constraint string from the inline assembly value into the
5726/// specific constraints and their prefixes, and also tie in the associated
5727/// operand values.
5728/// If this returns an empty vector, and if the constraint string itself
5729/// isn't empty, there was an error parsing.
5732 const TargetRegisterInfo *TRI,
5733 const CallBase &Call) const {
5734 /// Information about all of the constraints.
5735 AsmOperandInfoVector ConstraintOperands;
5736 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5737 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5738
5739 // Do a prepass over the constraints, canonicalizing them, and building up the
5740 // ConstraintOperands list.
5741 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5742 unsigned ResNo = 0; // ResNo - The result number of the next output.
5743 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5744
5745 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5746 ConstraintOperands.emplace_back(std::move(CI));
5747 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5748
5749 // Update multiple alternative constraint count.
5750 if (OpInfo.multipleAlternatives.size() > maCount)
5751 maCount = OpInfo.multipleAlternatives.size();
5752
5753 OpInfo.ConstraintVT = MVT::Other;
5754
5755 // Compute the value type for each operand.
5756 switch (OpInfo.Type) {
5758 // Indirect outputs just consume an argument.
5759 if (OpInfo.isIndirect) {
5760 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5761 break;
5762 }
5763
5764 // The return value of the call is this value. As such, there is no
5765 // corresponding argument.
5766 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5767 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5768 OpInfo.ConstraintVT =
5769 getAsmOperandValueType(DL, STy->getElementType(ResNo))
5770 .getSimpleVT();
5771 } else {
5772 assert(ResNo == 0 && "Asm only has one result!");
5773 OpInfo.ConstraintVT =
5774 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5775 }
5776 ++ResNo;
5777 break;
5778 case InlineAsm::isInput:
5779 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5780 break;
5781 case InlineAsm::isLabel:
5782 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5783 ++LabelNo;
5784 continue;
5786 // Nothing to do.
5787 break;
5788 }
5789
5790 if (OpInfo.CallOperandVal) {
5791 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5792 if (OpInfo.isIndirect) {
5793 OpTy = Call.getParamElementType(ArgNo);
5794 assert(OpTy && "Indirect operand must have elementtype attribute");
5795 }
5796
5797 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5798 if (StructType *STy = dyn_cast<StructType>(OpTy))
5799 if (STy->getNumElements() == 1)
5800 OpTy = STy->getElementType(0);
5801
5802 // If OpTy is not a single value, it may be a struct/union that we
5803 // can tile with integers.
5804 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5805 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5806 switch (BitSize) {
5807 default: break;
5808 case 1:
5809 case 8:
5810 case 16:
5811 case 32:
5812 case 64:
5813 case 128:
5814 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5815 break;
5816 }
5817 }
5818
5819 EVT VT = getAsmOperandValueType(DL, OpTy, true);
5820 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5821 ArgNo++;
5822 }
5823 }
5824
5825 // If we have multiple alternative constraints, select the best alternative.
5826 if (!ConstraintOperands.empty()) {
5827 if (maCount) {
5828 unsigned bestMAIndex = 0;
5829 int bestWeight = -1;
5830 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5831 int weight = -1;
5832 unsigned maIndex;
5833 // Compute the sums of the weights for each alternative, keeping track
5834 // of the best (highest weight) one so far.
5835 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5836 int weightSum = 0;
5837 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5838 cIndex != eIndex; ++cIndex) {
5839 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5840 if (OpInfo.Type == InlineAsm::isClobber)
5841 continue;
5842
5843 // If this is an output operand with a matching input operand,
5844 // look up the matching input. If their types mismatch, e.g. one
5845 // is an integer, the other is floating point, or their sizes are
5846 // different, flag it as an maCantMatch.
5847 if (OpInfo.hasMatchingInput()) {
5848 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5849 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5850 if ((OpInfo.ConstraintVT.isInteger() !=
5851 Input.ConstraintVT.isInteger()) ||
5852 (OpInfo.ConstraintVT.getSizeInBits() !=
5853 Input.ConstraintVT.getSizeInBits())) {
5854 weightSum = -1; // Can't match.
5855 break;
5856 }
5857 }
5858 }
5859 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5860 if (weight == -1) {
5861 weightSum = -1;
5862 break;
5863 }
5864 weightSum += weight;
5865 }
5866 // Update best.
5867 if (weightSum > bestWeight) {
5868 bestWeight = weightSum;
5869 bestMAIndex = maIndex;
5870 }
5871 }
5872
5873 // Now select chosen alternative in each constraint.
5874 for (AsmOperandInfo &cInfo : ConstraintOperands)
5875 if (cInfo.Type != InlineAsm::isClobber)
5876 cInfo.selectAlternative(bestMAIndex);
5877 }
5878 }
5879
5880 // Check and hook up tied operands, choose constraint code to use.
5881 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5882 cIndex != eIndex; ++cIndex) {
5883 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5884
5885 // If this is an output operand with a matching input operand, look up the
5886 // matching input. If their types mismatch, e.g. one is an integer, the
5887 // other is floating point, or their sizes are different, flag it as an
5888 // error.
5889 if (OpInfo.hasMatchingInput()) {
5890 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5891
5892 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5893 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5894 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5895 OpInfo.ConstraintVT);
5896 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5897 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5898 Input.ConstraintVT);
5899 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
5901 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
5903 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
5904 (MatchRC.second != InputRC.second)) {
5905 report_fatal_error("Unsupported asm: input constraint"
5906 " with a matching output constraint of"
5907 " incompatible type!");
5908 }
5909 }
5910 }
5911 }
5912
5913 return ConstraintOperands;
5914}
5915
5916/// Return a number indicating our preference for chosing a type of constraint
5917/// over another, for the purpose of sorting them. Immediates are almost always
5918/// preferrable (when they can be emitted). A higher return value means a
5919/// stronger preference for one constraint type relative to another.
5920/// FIXME: We should prefer registers over memory but doing so may lead to
5921/// unrecoverable register exhaustion later.
5922/// https://github.com/llvm/llvm-project/issues/20571
5924 switch (CT) {
5927 return 4;
5930 return 3;
5932 return 2;
5934 return 1;
5936 return 0;
5937 }
5938 llvm_unreachable("Invalid constraint type");
5939}
5940
5941/// Examine constraint type and operand type and determine a weight value.
5942/// This object must already have been set up with the operand type
5943/// and the current alternative constraint selected.
5946 AsmOperandInfo &info, int maIndex) const {
5948 if (maIndex >= (int)info.multipleAlternatives.size())
5949 rCodes = &info.Codes;
5950 else
5951 rCodes = &info.multipleAlternatives[maIndex].Codes;
5952 ConstraintWeight BestWeight = CW_Invalid;
5953
5954 // Loop over the options, keeping track of the most general one.
5955 for (const std::string &rCode : *rCodes) {
5956 ConstraintWeight weight =
5957 getSingleConstraintMatchWeight(info, rCode.c_str());
5958 if (weight > BestWeight)
5959 BestWeight = weight;
5960 }
5961
5962 return BestWeight;
5963}
5964
5965/// Examine constraint type and operand type and determine a weight value.
5966/// This object must already have been set up with the operand type
5967/// and the current alternative constraint selected.
5970 AsmOperandInfo &info, const char *constraint) const {
5971 ConstraintWeight weight = CW_Invalid;
5972 Value *CallOperandVal = info.CallOperandVal;
5973 // If we don't have a value, we can't do a match,
5974 // but allow it at the lowest weight.
5975 if (!CallOperandVal)
5976 return CW_Default;
5977 // Look at the constraint type.
5978 switch (*constraint) {
5979 case 'i': // immediate integer.
5980 case 'n': // immediate integer with a known value.
5981 if (isa<ConstantInt>(CallOperandVal))
5982 weight = CW_Constant;
5983 break;
5984 case 's': // non-explicit intregal immediate.
5985 if (isa<GlobalValue>(CallOperandVal))
5986 weight = CW_Constant;
5987 break;
5988 case 'E': // immediate float if host format.
5989 case 'F': // immediate float.
5990 if (isa<ConstantFP>(CallOperandVal))
5991 weight = CW_Constant;
5992 break;
5993 case '<': // memory operand with autodecrement.
5994 case '>': // memory operand with autoincrement.
5995 case 'm': // memory operand.
5996 case 'o': // offsettable memory operand
5997 case 'V': // non-offsettable memory operand
5998 weight = CW_Memory;
5999 break;
6000 case 'r': // general register.
6001 case 'g': // general register, memory operand or immediate integer.
6002 // note: Clang converts "g" to "imr".
6003 if (CallOperandVal->getType()->isIntegerTy())
6004 weight = CW_Register;
6005 break;
6006 case 'X': // any operand.
6007 default:
6008 weight = CW_Default;
6009 break;
6010 }
6011 return weight;
6012}
6013
6014/// If there are multiple different constraints that we could pick for this
6015/// operand (e.g. "imr") try to pick the 'best' one.
6016/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6017/// into seven classes:
6018/// Register -> one specific register
6019/// RegisterClass -> a group of regs
6020/// Memory -> memory
6021/// Address -> a symbolic memory reference
6022/// Immediate -> immediate values
6023/// Other -> magic values (such as "Flag Output Operands")
6024/// Unknown -> something we don't recognize yet and can't handle
6025/// Ideally, we would pick the most specific constraint possible: if we have
6026/// something that fits into a register, we would pick it. The problem here
6027/// is that if we have something that could either be in a register or in
6028/// memory that use of the register could cause selection of *other*
6029/// operands to fail: they might only succeed if we pick memory. Because of
6030/// this the heuristic we use is:
6031///
6032/// 1) If there is an 'other' constraint, and if the operand is valid for
6033/// that constraint, use it. This makes us take advantage of 'i'
6034/// constraints when available.
6035/// 2) Otherwise, pick the most general constraint present. This prefers
6036/// 'm' over 'r', for example.
6037///
6039 TargetLowering::AsmOperandInfo &OpInfo) const {
6040 ConstraintGroup Ret;
6041
6042 Ret.reserve(OpInfo.Codes.size());
6043 for (StringRef Code : OpInfo.Codes) {
6044 TargetLowering::ConstraintType CType = getConstraintType(Code);
6045
6046 // Indirect 'other' or 'immediate' constraints are not allowed.
6047 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6048 CType == TargetLowering::C_Register ||
6050 continue;
6051
6052 // Things with matching constraints can only be registers, per gcc
6053 // documentation. This mainly affects "g" constraints.
6054 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6055 continue;
6056
6057 Ret.emplace_back(Code, CType);
6058 }
6059
6060 std::stable_sort(
6061 Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
6062 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6063 });
6064
6065 return Ret;
6066}
6067
6068/// If we have an immediate, see if we can lower it. Return true if we can,
6069/// false otherwise.
6071 SDValue Op, SelectionDAG *DAG,
6072 const TargetLowering &TLI) {
6073
6074 assert((P.second == TargetLowering::C_Other ||
6075 P.second == TargetLowering::C_Immediate) &&
6076 "need immediate or other");
6077
6078 if (!Op.getNode())
6079 return false;
6080
6081 std::vector<SDValue> ResultOps;
6082 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6083 return !ResultOps.empty();
6084}
6085
6086/// Determines the constraint code and constraint type to use for the specific
6087/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6089 SDValue Op,
6090 SelectionDAG *DAG) const {
6091 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6092
6093 // Single-letter constraints ('r') are very common.
6094 if (OpInfo.Codes.size() == 1) {
6095 OpInfo.ConstraintCode = OpInfo.Codes[0];
6096 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6097 } else {
6098 ConstraintGroup G = getConstraintPreferences(OpInfo);
6099 if (G.empty())
6100 return;
6101
6102 unsigned BestIdx = 0;
6103 for (const unsigned E = G.size();
6104 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6105 G[BestIdx].second == TargetLowering::C_Immediate);
6106 ++BestIdx) {
6107 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6108 break;
6109 // If we're out of constraints, just pick the first one.
6110 if (BestIdx + 1 == E) {
6111 BestIdx = 0;
6112 break;
6113 }
6114 }
6115
6116 OpInfo.ConstraintCode = G[BestIdx].first;
6117 OpInfo.ConstraintType = G[BestIdx].second;
6118 }
6119
6120 // 'X' matches anything.
6121 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6122 // Constants are handled elsewhere. For Functions, the type here is the
6123 // type of the result, which is not what we want to look at; leave them
6124 // alone.
6125 Value *v = OpInfo.CallOperandVal;
6126 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6127 return;
6128 }
6129
6130 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6131 OpInfo.ConstraintCode = "i";
6132 return;
6133 }
6134
6135 // Otherwise, try to resolve it to something we know about by looking at
6136 // the actual operand type.
6137 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6138 OpInfo.ConstraintCode = Repl;
6139 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6140 }
6141 }
6142}
6143
6144/// Given an exact SDIV by a constant, create a multiplication
6145/// with the multiplicative inverse of the constant.
6146/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6148 const SDLoc &dl, SelectionDAG &DAG,
6149 SmallVectorImpl<SDNode *> &Created) {
6150 SDValue Op0 = N->getOperand(0);
6151 SDValue Op1 = N->getOperand(1);
6152 EVT VT = N->getValueType(0);
6153 EVT SVT = VT.getScalarType();
6154 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6155 EVT ShSVT = ShVT.getScalarType();
6156
6157 bool UseSRA = false;
6158 SmallVector<SDValue, 16> Shifts, Factors;
6159
6160 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6161 if (C->isZero())
6162 return false;
6163 APInt Divisor = C->getAPIntValue();
6164 unsigned Shift = Divisor.countr_zero();
6165 if (Shift) {
6166 Divisor.ashrInPlace(Shift);
6167 UseSRA = true;
6168 }
6169 APInt Factor = Divisor.multiplicativeInverse();
6170 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6171 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6172 return true;
6173 };
6174
6175 // Collect all magic values from the build vector.
6176 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6177 return SDValue();
6178
6179 SDValue Shift, Factor;
6180 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6181 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6182 Factor = DAG.getBuildVector(VT, dl, Factors);
6183 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6184 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6185 "Expected matchUnaryPredicate to return one element for scalable "
6186 "vectors");
6187 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6188 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6189 } else {
6190 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6191 Shift = Shifts[0];
6192 Factor = Factors[0];
6193 }
6194
6195 SDValue Res = Op0;
6196 if (UseSRA) {
6197 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6198 Created.push_back(Res.getNode());
6199 }
6200
6201 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6202}
6203
6204/// Given an exact UDIV by a constant, create a multiplication
6205/// with the multiplicative inverse of the constant.
6206/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6208 const SDLoc &dl, SelectionDAG &DAG,
6209 SmallVectorImpl<SDNode *> &Created) {
6210 EVT VT = N->getValueType(0);
6211 EVT SVT = VT.getScalarType();
6212 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6213 EVT ShSVT = ShVT.getScalarType();
6214
6215 bool UseSRL = false;
6216 SmallVector<SDValue, 16> Shifts, Factors;
6217
6218 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6219 if (C->isZero())
6220 return false;
6221 APInt Divisor = C->getAPIntValue();
6222 unsigned Shift = Divisor.countr_zero();
6223 if (Shift) {
6224 Divisor.lshrInPlace(Shift);
6225 UseSRL = true;
6226 }
6227 // Calculate the multiplicative inverse modulo BW.
6228 APInt Factor = Divisor.multiplicativeInverse();
6229 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6230 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6231 return true;
6232 };
6233
6234 SDValue Op1 = N->getOperand(1);
6235
6236 // Collect all magic values from the build vector.
6237 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6238 return SDValue();
6239
6240 SDValue Shift, Factor;
6241 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6242 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6243 Factor = DAG.getBuildVector(VT, dl, Factors);
6244 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6245 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6246 "Expected matchUnaryPredicate to return one element for scalable "
6247 "vectors");
6248 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6249 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6250 } else {
6251 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6252 Shift = Shifts[0];
6253 Factor = Factors[0];
6254 }
6255
6256 SDValue Res = N->getOperand(0);
6257 if (UseSRL) {
6258 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6259 Created.push_back(Res.getNode());
6260 }
6261
6262 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6263}
6264
6266 SelectionDAG &DAG,
6267 SmallVectorImpl<SDNode *> &Created) const {
6269 if (isIntDivCheap(N->getValueType(0), Attr))
6270 return SDValue(N, 0); // Lower SDIV as SDIV
6271 return SDValue();
6272}
6273
6274SDValue
6276 SelectionDAG &DAG,
6277 SmallVectorImpl<SDNode *> &Created) const {
6279 if (isIntDivCheap(N->getValueType(0), Attr))
6280 return SDValue(N, 0); // Lower SREM as SREM
6281 return SDValue();
6282}
6283
6284/// Build sdiv by power-of-2 with conditional move instructions
6285/// Ref: "Hacker's Delight" by Henry Warren 10-1
6286/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6287/// bgez x, label
6288/// add x, x, 2**k-1
6289/// label:
6290/// sra res, x, k
6291/// neg res, res (when the divisor is negative)
6293 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6294 SmallVectorImpl<SDNode *> &Created) const {
6295 unsigned Lg2 = Divisor.countr_zero();
6296 EVT VT = N->getValueType(0);
6297
6298 SDLoc DL(N);
6299 SDValue N0 = N->getOperand(0);
6300 SDValue Zero = DAG.getConstant(0, DL, VT);
6301 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6302 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6303
6304 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6305 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6306 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6307 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6308 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6309
6310 Created.push_back(Cmp.getNode());
6311 Created.push_back(Add.getNode());
6312 Created.push_back(CMov.getNode());
6313
6314 // Divide by pow2.
6315 SDValue SRA =
6316 DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6317
6318 // If we're dividing by a positive value, we're done. Otherwise, we must
6319 // negate the result.
6320 if (Divisor.isNonNegative())
6321 return SRA;
6322
6323 Created.push_back(SRA.getNode());
6324 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6325}
6326
6327/// Given an ISD::SDIV node expressing a divide by constant,
6328/// return a DAG expression to select that will generate the same value by
6329/// multiplying by a magic number.
6330/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6332 bool IsAfterLegalization,
6333 bool IsAfterLegalTypes,
6334 SmallVectorImpl<SDNode *> &Created) const {
6335 SDLoc dl(N);
6336 EVT VT = N->getValueType(0);
6337 EVT SVT = VT.getScalarType();
6338 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6339 EVT ShSVT = ShVT.getScalarType();
6340 unsigned EltBits = VT.getScalarSizeInBits();
6341 EVT MulVT;
6342
6343 // Check to see if we can do this.
6344 // FIXME: We should be more aggressive here.
6345 if (!isTypeLegal(VT)) {
6346 // Limit this to simple scalars for now.
6347 if (VT.isVector() || !VT.isSimple())
6348 return SDValue();
6349
6350 // If this type will be promoted to a large enough type with a legal
6351 // multiply operation, we can go ahead and do this transform.
6353 return SDValue();
6354
6355 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6356 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6357 !isOperationLegal(ISD::MUL, MulVT))
6358 return SDValue();
6359 }
6360
6361 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6362 if (N->getFlags().hasExact())
6363 return BuildExactSDIV(*this, N, dl, DAG, Created);
6364
6365 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6366
6367 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6368 if (C->isZero())
6369 return false;
6370
6371 const APInt &Divisor = C->getAPIntValue();
6373 int NumeratorFactor = 0;
6374 int ShiftMask = -1;
6375
6376 if (Divisor.isOne() || Divisor.isAllOnes()) {
6377 // If d is +1/-1, we just multiply the numerator by +1/-1.
6378 NumeratorFactor = Divisor.getSExtValue();
6379 magics.Magic = 0;
6380 magics.ShiftAmount = 0;
6381 ShiftMask = 0;
6382 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6383 // If d > 0 and m < 0, add the numerator.
6384 NumeratorFactor = 1;
6385 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6386 // If d < 0 and m > 0, subtract the numerator.
6387 NumeratorFactor = -1;
6388 }
6389
6390 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6391 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6392 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6393 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6394 return true;
6395 };
6396
6397 SDValue N0 = N->getOperand(0);
6398 SDValue N1 = N->getOperand(1);
6399
6400 // Collect the shifts / magic values from each element.
6401 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6402 return SDValue();
6403
6404 SDValue MagicFactor, Factor, Shift, ShiftMask;
6405 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6406 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6407 Factor = DAG.getBuildVector(VT, dl, Factors);
6408 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6409 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6410 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6411 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6412 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6413 "Expected matchUnaryPredicate to return one element for scalable "
6414 "vectors");
6415 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6416 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6417 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6418 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6419 } else {
6420 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6421 MagicFactor = MagicFactors[0];
6422 Factor = Factors[0];
6423 Shift = Shifts[0];
6424 ShiftMask = ShiftMasks[0];
6425 }
6426
6427 // Multiply the numerator (operand 0) by the magic value.
6428 // FIXME: We should support doing a MUL in a wider type.
6429 auto GetMULHS = [&](SDValue X, SDValue Y) {
6430 // If the type isn't legal, use a wider mul of the type calculated
6431 // earlier.
6432 if (!isTypeLegal(VT)) {
6433 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6434 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6435 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6436 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6437 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6438 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6439 }
6440
6441 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6442 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6443 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6444 SDValue LoHi =
6445 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6446 return SDValue(LoHi.getNode(), 1);
6447 }
6448 // If type twice as wide legal, widen and use a mul plus a shift.
6449 unsigned Size = VT.getScalarSizeInBits();
6450 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6451 if (VT.isVector())
6452 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6454 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6455 // custom lowered. This is very expensive so avoid it at all costs for
6456 // constant divisors.
6457 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6460 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6461 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6462 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6463 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6464 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6465 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6466 }
6467 return SDValue();
6468 };
6469
6470 SDValue Q = GetMULHS(N0, MagicFactor);
6471 if (!Q)
6472 return SDValue();
6473
6474 Created.push_back(Q.getNode());
6475
6476 // (Optionally) Add/subtract the numerator using Factor.
6477 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6478 Created.push_back(Factor.getNode());
6479 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6480 Created.push_back(Q.getNode());
6481
6482 // Shift right algebraic by shift value.
6483 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6484 Created.push_back(Q.getNode());
6485
6486 // Extract the sign bit, mask it and add it to the quotient.
6487 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6488 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6489 Created.push_back(T.getNode());
6490 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6491 Created.push_back(T.getNode());
6492 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6493}
6494
6495/// Given an ISD::UDIV node expressing a divide by constant,
6496/// return a DAG expression to select that will generate the same value by
6497/// multiplying by a magic number.
6498/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6500 bool IsAfterLegalization,
6501 bool IsAfterLegalTypes,
6502 SmallVectorImpl<SDNode *> &Created) const {
6503 SDLoc dl(N);
6504 EVT VT = N->getValueType(0);
6505 EVT SVT = VT.getScalarType();
6506 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6507 EVT ShSVT = ShVT.getScalarType();
6508 unsigned EltBits = VT.getScalarSizeInBits();
6509 EVT MulVT;
6510
6511 // Check to see if we can do this.
6512 // FIXME: We should be more aggressive here.
6513 if (!isTypeLegal(VT)) {
6514 // Limit this to simple scalars for now.
6515 if (VT.isVector() || !VT.isSimple())
6516 return SDValue();
6517
6518 // If this type will be promoted to a large enough type with a legal
6519 // multiply operation, we can go ahead and do this transform.
6521 return SDValue();
6522
6523 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6524 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6525 !isOperationLegal(ISD::MUL, MulVT))
6526 return SDValue();
6527 }
6528
6529 // If the udiv has an 'exact' bit we can use a simpler lowering.
6530 if (N->getFlags().hasExact())
6531 return BuildExactUDIV(*this, N, dl, DAG, Created);
6532
6533 SDValue N0 = N->getOperand(0);
6534 SDValue N1 = N->getOperand(1);
6535
6536 // Try to use leading zeros of the dividend to reduce the multiplier and
6537 // avoid expensive fixups.
6538 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6539
6540 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6541 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6542
6543 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6544 if (C->isZero())
6545 return false;
6546 const APInt& Divisor = C->getAPIntValue();
6547
6548 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6549
6550 // Magic algorithm doesn't work for division by 1. We need to emit a select
6551 // at the end.
6552 if (Divisor.isOne()) {
6553 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6554 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6555 } else {
6558 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6559
6560 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6561
6562 assert(magics.PreShift < Divisor.getBitWidth() &&
6563 "We shouldn't generate an undefined shift!");
6564 assert(magics.PostShift < Divisor.getBitWidth() &&
6565 "We shouldn't generate an undefined shift!");
6566 assert((!magics.IsAdd || magics.PreShift == 0) &&
6567 "Unexpected pre-shift");
6568 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6569 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6570 NPQFactor = DAG.getConstant(
6571 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6572 : APInt::getZero(EltBits),
6573 dl, SVT);
6574 UseNPQ |= magics.IsAdd;
6575 UsePreShift |= magics.PreShift != 0;
6576 UsePostShift |= magics.PostShift != 0;
6577 }
6578
6579 PreShifts.push_back(PreShift);
6580 MagicFactors.push_back(MagicFactor);
6581 NPQFactors.push_back(NPQFactor);
6582 PostShifts.push_back(PostShift);
6583 return true;
6584 };
6585
6586 // Collect the shifts/magic values from each element.
6587 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6588 return SDValue();
6589
6590 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6591 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6592 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6593 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6594 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6595 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6596 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6597 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6598 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6599 "Expected matchUnaryPredicate to return one for scalable vectors");
6600 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6601 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6602 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6603 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6604 } else {
6605 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6606 PreShift = PreShifts[0];
6607 MagicFactor = MagicFactors[0];
6608 PostShift = PostShifts[0];
6609 }
6610
6611 SDValue Q = N0;
6612 if (UsePreShift) {
6613 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6614 Created.push_back(Q.getNode());
6615 }
6616
6617 // FIXME: We should support doing a MUL in a wider type.
6618 auto GetMULHU = [&](SDValue X, SDValue Y) {
6619 // If the type isn't legal, use a wider mul of the type calculated
6620 // earlier.
6621 if (!isTypeLegal(VT)) {
6622 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6623 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6624 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6625 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6626 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6627 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6628 }
6629
6630 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6631 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6632 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6633 SDValue LoHi =
6634 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6635 return SDValue(LoHi.getNode(), 1);
6636 }
6637 // If type twice as wide legal, widen and use a mul plus a shift.
6638 unsigned Size = VT.getScalarSizeInBits();
6639 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6640 if (VT.isVector())
6641 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6643 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6644 // custom lowered. This is very expensive so avoid it at all costs for
6645 // constant divisors.
6646 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6649 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6650 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6651 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6652 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6653 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6654 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6655 }
6656 return SDValue(); // No mulhu or equivalent
6657 };
6658
6659 // Multiply the numerator (operand 0) by the magic value.
6660 Q = GetMULHU(Q, MagicFactor);
6661 if (!Q)
6662 return SDValue();
6663
6664 Created.push_back(Q.getNode());
6665
6666 if (UseNPQ) {
6667 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6668 Created.push_back(NPQ.getNode());
6669
6670 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6671 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6672 if (VT.isVector())
6673 NPQ = GetMULHU(NPQ, NPQFactor);
6674 else
6675 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6676
6677 Created.push_back(NPQ.getNode());
6678
6679 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6680 Created.push_back(Q.getNode());
6681 }
6682
6683 if (UsePostShift) {
6684 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6685 Created.push_back(Q.getNode());
6686 }
6687
6688 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6689
6690 SDValue One = DAG.getConstant(1, dl, VT);
6691 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6692 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6693}
6694
6695/// If all values in Values that *don't* match the predicate are same 'splat'
6696/// value, then replace all values with that splat value.
6697/// Else, if AlternativeReplacement was provided, then replace all values that
6698/// do match predicate with AlternativeReplacement value.
6699static void
6701 std::function<bool(SDValue)> Predicate,
6702 SDValue AlternativeReplacement = SDValue()) {
6703 SDValue Replacement;
6704 // Is there a value for which the Predicate does *NOT* match? What is it?
6705 auto SplatValue = llvm::find_if_not(Values, Predicate);
6706 if (SplatValue != Values.end()) {
6707 // Does Values consist only of SplatValue's and values matching Predicate?
6708 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6709 return Value == *SplatValue || Predicate(Value);
6710 })) // Then we shall replace values matching predicate with SplatValue.
6711 Replacement = *SplatValue;
6712 }
6713 if (!Replacement) {
6714 // Oops, we did not find the "baseline" splat value.
6715 if (!AlternativeReplacement)
6716 return; // Nothing to do.
6717 // Let's replace with provided value then.
6718 Replacement = AlternativeReplacement;
6719 }
6720 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6721}
6722
6723/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6724/// where the divisor is constant and the comparison target is zero,
6725/// return a DAG expression that will generate the same comparison result
6726/// using only multiplications, additions and shifts/rotations.
6727/// Ref: "Hacker's Delight" 10-17.
6728SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6729 SDValue CompTargetNode,
6731 DAGCombinerInfo &DCI,
6732 const SDLoc &DL) const {
6734 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6735 DCI, DL, Built)) {
6736 for (SDNode *N : Built)
6737 DCI.AddToWorklist(N);
6738 return Folded;
6739 }
6740
6741 return SDValue();
6742}
6743
6744SDValue
6745TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6746 SDValue CompTargetNode, ISD::CondCode Cond,
6747 DAGCombinerInfo &DCI, const SDLoc &DL,
6748 SmallVectorImpl<SDNode *> &Created) const {
6749 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6750 // - D must be constant, with D = D0 * 2^K where D0 is odd
6751 // - P is the multiplicative inverse of D0 modulo 2^W
6752 // - Q = floor(((2^W) - 1) / D)
6753 // where W is the width of the common type of N and D.
6754 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6755 "Only applicable for (in)equality comparisons.");
6756
6757 SelectionDAG &DAG = DCI.DAG;
6758
6759 EVT VT = REMNode.getValueType();
6760 EVT SVT = VT.getScalarType();
6761 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6762 EVT ShSVT = ShVT.getScalarType();
6763
6764 // If MUL is unavailable, we cannot proceed in any case.
6765 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6766 return SDValue();
6767
6768 bool ComparingWithAllZeros = true;
6769 bool AllComparisonsWithNonZerosAreTautological = true;
6770 bool HadTautologicalLanes = false;
6771 bool AllLanesAreTautological = true;
6772 bool HadEvenDivisor = false;
6773 bool AllDivisorsArePowerOfTwo = true;
6774 bool HadTautologicalInvertedLanes = false;
6775 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6776
6777 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6778 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6779 if (CDiv->isZero())
6780 return false;
6781
6782 const APInt &D = CDiv->getAPIntValue();
6783 const APInt &Cmp = CCmp->getAPIntValue();
6784
6785 ComparingWithAllZeros &= Cmp.isZero();
6786
6787 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6788 // if C2 is not less than C1, the comparison is always false.
6789 // But we will only be able to produce the comparison that will give the
6790 // opposive tautological answer. So this lane would need to be fixed up.
6791 bool TautologicalInvertedLane = D.ule(Cmp);
6792 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6793
6794 // If all lanes are tautological (either all divisors are ones, or divisor
6795 // is not greater than the constant we are comparing with),
6796 // we will prefer to avoid the fold.
6797 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6798 HadTautologicalLanes |= TautologicalLane;
6799 AllLanesAreTautological &= TautologicalLane;
6800
6801 // If we are comparing with non-zero, we need'll need to subtract said
6802 // comparison value from the LHS. But there is no point in doing that if
6803 // every lane where we are comparing with non-zero is tautological..
6804 if (!Cmp.isZero())
6805 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6806
6807 // Decompose D into D0 * 2^K
6808 unsigned K = D.countr_zero();
6809 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6810 APInt D0 = D.lshr(K);
6811
6812 // D is even if it has trailing zeros.
6813 HadEvenDivisor |= (K != 0);
6814 // D is a power-of-two if D0 is one.
6815 // If all divisors are power-of-two, we will prefer to avoid the fold.
6816 AllDivisorsArePowerOfTwo &= D0.isOne();
6817
6818 // P = inv(D0, 2^W)
6819 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6820 unsigned W = D.getBitWidth();
6822 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6823
6824 // Q = floor((2^W - 1) u/ D)
6825 // R = ((2^W - 1) u% D)
6826 APInt Q, R;
6828
6829 // If we are comparing with zero, then that comparison constant is okay,
6830 // else it may need to be one less than that.
6831 if (Cmp.ugt(R))
6832 Q -= 1;
6833
6835 "We are expecting that K is always less than all-ones for ShSVT");
6836
6837 // If the lane is tautological the result can be constant-folded.
6838 if (TautologicalLane) {
6839 // Set P and K amount to a bogus values so we can try to splat them.
6840 P = 0;
6841 K = -1;
6842 // And ensure that comparison constant is tautological,
6843 // it will always compare true/false.
6844 Q = -1;
6845 }
6846
6847 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6848 KAmts.push_back(
6849 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
6850 /*implicitTrunc=*/true),
6851 DL, ShSVT));
6852 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6853 return true;
6854 };
6855
6856 SDValue N = REMNode.getOperand(0);
6857 SDValue D = REMNode.getOperand(1);
6858
6859 // Collect the values from each element.
6860 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6861 return SDValue();
6862
6863 // If all lanes are tautological, the result can be constant-folded.
6864 if (AllLanesAreTautological)
6865 return SDValue();
6866
6867 // If this is a urem by a powers-of-two, avoid the fold since it can be
6868 // best implemented as a bit test.
6869 if (AllDivisorsArePowerOfTwo)
6870 return SDValue();
6871
6872 SDValue PVal, KVal, QVal;
6873 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6874 if (HadTautologicalLanes) {
6875 // Try to turn PAmts into a splat, since we don't care about the values
6876 // that are currently '0'. If we can't, just keep '0'`s.
6878 // Try to turn KAmts into a splat, since we don't care about the values
6879 // that are currently '-1'. If we can't, change them to '0'`s.
6881 DAG.getConstant(0, DL, ShSVT));
6882 }
6883
6884 PVal = DAG.getBuildVector(VT, DL, PAmts);
6885 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6886 QVal = DAG.getBuildVector(VT, DL, QAmts);
6887 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6888 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6889 "Expected matchBinaryPredicate to return one element for "
6890 "SPLAT_VECTORs");
6891 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6892 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6893 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6894 } else {
6895 PVal = PAmts[0];
6896 KVal = KAmts[0];
6897 QVal = QAmts[0];
6898 }
6899
6900 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6901 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6902 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6903 assert(CompTargetNode.getValueType() == N.getValueType() &&
6904 "Expecting that the types on LHS and RHS of comparisons match.");
6905 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6906 }
6907
6908 // (mul N, P)
6909 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6910 Created.push_back(Op0.getNode());
6911
6912 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6913 // divisors as a performance improvement, since rotating by 0 is a no-op.
6914 if (HadEvenDivisor) {
6915 // We need ROTR to do this.
6916 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6917 return SDValue();
6918 // UREM: (rotr (mul N, P), K)
6919 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6920 Created.push_back(Op0.getNode());
6921 }
6922
6923 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6924 SDValue NewCC =
6925 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6927 if (!HadTautologicalInvertedLanes)
6928 return NewCC;
6929
6930 // If any lanes previously compared always-false, the NewCC will give
6931 // always-true result for them, so we need to fixup those lanes.
6932 // Or the other way around for inequality predicate.
6933 assert(VT.isVector() && "Can/should only get here for vectors.");
6934 Created.push_back(NewCC.getNode());
6935
6936 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6937 // if C2 is not less than C1, the comparison is always false.
6938 // But we have produced the comparison that will give the
6939 // opposive tautological answer. So these lanes would need to be fixed up.
6940 SDValue TautologicalInvertedChannels =
6941 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6942 Created.push_back(TautologicalInvertedChannels.getNode());
6943
6944 // NOTE: we avoid letting illegal types through even if we're before legalize
6945 // ops – legalization has a hard time producing good code for this.
6946 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6947 // If we have a vector select, let's replace the comparison results in the
6948 // affected lanes with the correct tautological result.
6949 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6950 DL, SETCCVT, SETCCVT);
6951 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6952 Replacement, NewCC);
6953 }
6954
6955 // Else, we can just invert the comparison result in the appropriate lanes.
6956 //
6957 // NOTE: see the note above VSELECT above.
6958 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6959 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6960 TautologicalInvertedChannels);
6961
6962 return SDValue(); // Don't know how to lower.
6963}
6964
6965/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6966/// where the divisor is constant and the comparison target is zero,
6967/// return a DAG expression that will generate the same comparison result
6968/// using only multiplications, additions and shifts/rotations.
6969/// Ref: "Hacker's Delight" 10-17.
6970SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6971 SDValue CompTargetNode,
6973 DAGCombinerInfo &DCI,
6974 const SDLoc &DL) const {
6976 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6977 DCI, DL, Built)) {
6978 assert(Built.size() <= 7 && "Max size prediction failed.");
6979 for (SDNode *N : Built)
6980 DCI.AddToWorklist(N);
6981 return Folded;
6982 }
6983
6984 return SDValue();
6985}
6986
6987SDValue
6988TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6989 SDValue CompTargetNode, ISD::CondCode Cond,
6990 DAGCombinerInfo &DCI, const SDLoc &DL,
6991 SmallVectorImpl<SDNode *> &Created) const {
6992 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6993 // Fold:
6994 // (seteq/ne (srem N, D), 0)
6995 // To:
6996 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6997 //
6998 // - D must be constant, with D = D0 * 2^K where D0 is odd
6999 // - P is the multiplicative inverse of D0 modulo 2^W
7000 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7001 // - Q = floor((2 * A) / (2^K))
7002 // where W is the width of the common type of N and D.
7003 //
7004 // When D is a power of two (and thus D0 is 1), the normal
7005 // formula for A and Q don't apply, because the derivation
7006 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7007 // does not apply. This specifically fails when N = INT_MIN.
7008 //
7009 // Instead, for power-of-two D, we use:
7010 // - A = 2^(W-1)
7011 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7012 // - Q = 2^(W-K) - 1
7013 // |-> Test that the top K bits are zero after rotation
7014 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7015 "Only applicable for (in)equality comparisons.");
7016
7017 SelectionDAG &DAG = DCI.DAG;
7018
7019 EVT VT = REMNode.getValueType();
7020 EVT SVT = VT.getScalarType();
7021 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7022 EVT ShSVT = ShVT.getScalarType();
7023
7024 // If we are after ops legalization, and MUL is unavailable, we can not
7025 // proceed.
7026 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7027 return SDValue();
7028
7029 // TODO: Could support comparing with non-zero too.
7030 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7031 if (!CompTarget || !CompTarget->isZero())
7032 return SDValue();
7033
7034 bool HadIntMinDivisor = false;
7035 bool HadOneDivisor = false;
7036 bool AllDivisorsAreOnes = true;
7037 bool HadEvenDivisor = false;
7038 bool NeedToApplyOffset = false;
7039 bool AllDivisorsArePowerOfTwo = true;
7040 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7041
7042 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7043 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7044 if (C->isZero())
7045 return false;
7046
7047 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7048
7049 // WARNING: this fold is only valid for positive divisors!
7050 APInt D = C->getAPIntValue();
7051 if (D.isNegative())
7052 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7053
7054 HadIntMinDivisor |= D.isMinSignedValue();
7055
7056 // If all divisors are ones, we will prefer to avoid the fold.
7057 HadOneDivisor |= D.isOne();
7058 AllDivisorsAreOnes &= D.isOne();
7059
7060 // Decompose D into D0 * 2^K
7061 unsigned K = D.countr_zero();
7062 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7063 APInt D0 = D.lshr(K);
7064
7065 if (!D.isMinSignedValue()) {
7066 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7067 // we don't care about this lane in this fold, we'll special-handle it.
7068 HadEvenDivisor |= (K != 0);
7069 }
7070
7071 // D is a power-of-two if D0 is one. This includes INT_MIN.
7072 // If all divisors are power-of-two, we will prefer to avoid the fold.
7073 AllDivisorsArePowerOfTwo &= D0.isOne();
7074
7075 // P = inv(D0, 2^W)
7076 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7077 unsigned W = D.getBitWidth();
7079 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7080
7081 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7083 A.clearLowBits(K);
7084
7085 if (!D.isMinSignedValue()) {
7086 // If divisor INT_MIN, then we don't care about this lane in this fold,
7087 // we'll special-handle it.
7088 NeedToApplyOffset |= A != 0;
7089 }
7090
7091 // Q = floor((2 * A) / (2^K))
7092 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7093
7095 "We are expecting that A is always less than all-ones for SVT");
7097 "We are expecting that K is always less than all-ones for ShSVT");
7098
7099 // If D was a power of two, apply the alternate constant derivation.
7100 if (D0.isOne()) {
7101 // A = 2^(W-1)
7103 // - Q = 2^(W-K) - 1
7104 Q = APInt::getAllOnes(W - K).zext(W);
7105 }
7106
7107 // If the divisor is 1 the result can be constant-folded. Likewise, we
7108 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7109 if (D.isOne()) {
7110 // Set P, A and K to a bogus values so we can try to splat them.
7111 P = 0;
7112 A = -1;
7113 K = -1;
7114
7115 // x ?% 1 == 0 <--> true <--> x u<= -1
7116 Q = -1;
7117 }
7118
7119 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7120 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7121 KAmts.push_back(
7122 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7123 /*implicitTrunc=*/true),
7124 DL, ShSVT));
7125 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7126 return true;
7127 };
7128
7129 SDValue N = REMNode.getOperand(0);
7130 SDValue D = REMNode.getOperand(1);
7131
7132 // Collect the values from each element.
7133 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7134 return SDValue();
7135
7136 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7137 if (AllDivisorsAreOnes)
7138 return SDValue();
7139
7140 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7141 // since it can be best implemented as a bit test.
7142 if (AllDivisorsArePowerOfTwo)
7143 return SDValue();
7144
7145 SDValue PVal, AVal, KVal, QVal;
7146 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7147 if (HadOneDivisor) {
7148 // Try to turn PAmts into a splat, since we don't care about the values
7149 // that are currently '0'. If we can't, just keep '0'`s.
7151 // Try to turn AAmts into a splat, since we don't care about the
7152 // values that are currently '-1'. If we can't, change them to '0'`s.
7154 DAG.getConstant(0, DL, SVT));
7155 // Try to turn KAmts into a splat, since we don't care about the values
7156 // that are currently '-1'. If we can't, change them to '0'`s.
7158 DAG.getConstant(0, DL, ShSVT));
7159 }
7160
7161 PVal = DAG.getBuildVector(VT, DL, PAmts);
7162 AVal = DAG.getBuildVector(VT, DL, AAmts);
7163 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7164 QVal = DAG.getBuildVector(VT, DL, QAmts);
7165 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7166 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7167 QAmts.size() == 1 &&
7168 "Expected matchUnaryPredicate to return one element for scalable "
7169 "vectors");
7170 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7171 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7172 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7173 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7174 } else {
7175 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7176 PVal = PAmts[0];
7177 AVal = AAmts[0];
7178 KVal = KAmts[0];
7179 QVal = QAmts[0];
7180 }
7181
7182 // (mul N, P)
7183 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7184 Created.push_back(Op0.getNode());
7185
7186 if (NeedToApplyOffset) {
7187 // We need ADD to do this.
7188 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7189 return SDValue();
7190
7191 // (add (mul N, P), A)
7192 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7193 Created.push_back(Op0.getNode());
7194 }
7195
7196 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7197 // divisors as a performance improvement, since rotating by 0 is a no-op.
7198 if (HadEvenDivisor) {
7199 // We need ROTR to do this.
7200 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7201 return SDValue();
7202 // SREM: (rotr (add (mul N, P), A), K)
7203 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7204 Created.push_back(Op0.getNode());
7205 }
7206
7207 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7208 SDValue Fold =
7209 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7211
7212 // If we didn't have lanes with INT_MIN divisor, then we're done.
7213 if (!HadIntMinDivisor)
7214 return Fold;
7215
7216 // That fold is only valid for positive divisors. Which effectively means,
7217 // it is invalid for INT_MIN divisors. So if we have such a lane,
7218 // we must fix-up results for said lanes.
7219 assert(VT.isVector() && "Can/should only get here for vectors.");
7220
7221 // NOTE: we avoid letting illegal types through even if we're before legalize
7222 // ops – legalization has a hard time producing good code for the code that
7223 // follows.
7224 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7228 return SDValue();
7229
7230 Created.push_back(Fold.getNode());
7231
7232 SDValue IntMin = DAG.getConstant(
7234 SDValue IntMax = DAG.getConstant(
7236 SDValue Zero =
7238
7239 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7240 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7241 Created.push_back(DivisorIsIntMin.getNode());
7242
7243 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7244 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7245 Created.push_back(Masked.getNode());
7246 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7247 Created.push_back(MaskedIsZero.getNode());
7248
7249 // To produce final result we need to blend 2 vectors: 'SetCC' and
7250 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7251 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7252 // constant-folded, select can get lowered to a shuffle with constant mask.
7253 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7254 MaskedIsZero, Fold);
7255
7256 return Blended;
7257}
7258
7261 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7262 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7263 "be a constant integer");
7264 return true;
7265 }
7266
7267 return false;
7268}
7269
7271 const DenormalMode &Mode) const {
7272 SDLoc DL(Op);
7273 EVT VT = Op.getValueType();
7274 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7275 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7276
7277 // This is specifically a check for the handling of denormal inputs, not the
7278 // result.
7279 if (Mode.Input == DenormalMode::PreserveSign ||
7280 Mode.Input == DenormalMode::PositiveZero) {
7281 // Test = X == 0.0
7282 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7283 }
7284
7285 // Testing it with denormal inputs to avoid wrong estimate.
7286 //
7287 // Test = fabs(X) < SmallestNormal
7288 const fltSemantics &FltSem = VT.getFltSemantics();
7289 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7290 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7291 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7292 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7293}
7294
7296 bool LegalOps, bool OptForSize,
7298 unsigned Depth) const {
7299 // fneg is removable even if it has multiple uses.
7300 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7302 return Op.getOperand(0);
7303 }
7304
7305 // Don't recurse exponentially.
7307 return SDValue();
7308
7309 // Pre-increment recursion depth for use in recursive calls.
7310 ++Depth;
7311 const SDNodeFlags Flags = Op->getFlags();
7312 const TargetOptions &Options = DAG.getTarget().Options;
7313 EVT VT = Op.getValueType();
7314 unsigned Opcode = Op.getOpcode();
7315
7316 // Don't allow anything with multiple uses unless we know it is free.
7317 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7318 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7319 isFPExtFree(VT, Op.getOperand(0).getValueType());
7320 if (!IsFreeExtend)
7321 return SDValue();
7322 }
7323
7324 auto RemoveDeadNode = [&](SDValue N) {
7325 if (N && N.getNode()->use_empty())
7326 DAG.RemoveDeadNode(N.getNode());
7327 };
7328
7329 SDLoc DL(Op);
7330
7331 // Because getNegatedExpression can delete nodes we need a handle to keep
7332 // temporary nodes alive in case the recursion manages to create an identical
7333 // node.
7334 std::list<HandleSDNode> Handles;
7335
7336 switch (Opcode) {
7337 case ISD::ConstantFP: {
7338 // Don't invert constant FP values after legalization unless the target says
7339 // the negated constant is legal.
7340 bool IsOpLegal =
7342 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7343 OptForSize);
7344
7345 if (LegalOps && !IsOpLegal)
7346 break;
7347
7348 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7349 V.changeSign();
7350 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7351
7352 // If we already have the use of the negated floating constant, it is free
7353 // to negate it even it has multiple uses.
7354 if (!Op.hasOneUse() && CFP.use_empty())
7355 break;
7357 return CFP;
7358 }
7359 case ISD::BUILD_VECTOR: {
7360 // Only permit BUILD_VECTOR of constants.
7361 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7362 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7363 }))
7364 break;
7365
7366 bool IsOpLegal =
7369 llvm::all_of(Op->op_values(), [&](SDValue N) {
7370 return N.isUndef() ||
7371 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7372 OptForSize);
7373 });
7374
7375 if (LegalOps && !IsOpLegal)
7376 break;
7377
7379 for (SDValue C : Op->op_values()) {
7380 if (C.isUndef()) {
7381 Ops.push_back(C);
7382 continue;
7383 }
7384 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7385 V.changeSign();
7386 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7387 }
7389 return DAG.getBuildVector(VT, DL, Ops);
7390 }
7391 case ISD::FADD: {
7392 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7393 break;
7394
7395 // After operation legalization, it might not be legal to create new FSUBs.
7396 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7397 break;
7398 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7399
7400 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7402 SDValue NegX =
7403 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7404 // Prevent this node from being deleted by the next call.
7405 if (NegX)
7406 Handles.emplace_back(NegX);
7407
7408 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7410 SDValue NegY =
7411 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7412
7413 // We're done with the handles.
7414 Handles.clear();
7415
7416 // Negate the X if its cost is less or equal than Y.
7417 if (NegX && (CostX <= CostY)) {
7418 Cost = CostX;
7419 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7420 if (NegY != N)
7421 RemoveDeadNode(NegY);
7422 return N;
7423 }
7424
7425 // Negate the Y if it is not expensive.
7426 if (NegY) {
7427 Cost = CostY;
7428 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7429 if (NegX != N)
7430 RemoveDeadNode(NegX);
7431 return N;
7432 }
7433 break;
7434 }
7435 case ISD::FSUB: {
7436 // We can't turn -(A-B) into B-A when we honor signed zeros.
7437 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7438 break;
7439
7440 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7441 // fold (fneg (fsub 0, Y)) -> Y
7442 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7443 if (C->isZero()) {
7445 return Y;
7446 }
7447
7448 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7450 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7451 }
7452 case ISD::FMUL:
7453 case ISD::FDIV: {
7454 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7455
7456 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7458 SDValue NegX =
7459 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7460 // Prevent this node from being deleted by the next call.
7461 if (NegX)
7462 Handles.emplace_back(NegX);
7463
7464 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7466 SDValue NegY =
7467 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7468
7469 // We're done with the handles.
7470 Handles.clear();
7471
7472 // Negate the X if its cost is less or equal than Y.
7473 if (NegX && (CostX <= CostY)) {
7474 Cost = CostX;
7475 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7476 if (NegY != N)
7477 RemoveDeadNode(NegY);
7478 return N;
7479 }
7480
7481 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7482 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7483 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7484 break;
7485
7486 // Negate the Y if it is not expensive.
7487 if (NegY) {
7488 Cost = CostY;
7489 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7490 if (NegX != N)
7491 RemoveDeadNode(NegX);
7492 return N;
7493 }
7494 break;
7495 }
7496 case ISD::FMA:
7497 case ISD::FMAD: {
7498 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7499 break;
7500
7501 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7503 SDValue NegZ =
7504 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7505 // Give up if fail to negate the Z.
7506 if (!NegZ)
7507 break;
7508
7509 // Prevent this node from being deleted by the next two calls.
7510 Handles.emplace_back(NegZ);
7511
7512 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7514 SDValue NegX =
7515 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7516 // Prevent this node from being deleted by the next call.
7517 if (NegX)
7518 Handles.emplace_back(NegX);
7519
7520 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7522 SDValue NegY =
7523 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7524
7525 // We're done with the handles.
7526 Handles.clear();
7527
7528 // Negate the X if its cost is less or equal than Y.
7529 if (NegX && (CostX <= CostY)) {
7530 Cost = std::min(CostX, CostZ);
7531 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7532 if (NegY != N)
7533 RemoveDeadNode(NegY);
7534 return N;
7535 }
7536
7537 // Negate the Y if it is not expensive.
7538 if (NegY) {
7539 Cost = std::min(CostY, CostZ);
7540 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7541 if (NegX != N)
7542 RemoveDeadNode(NegX);
7543 return N;
7544 }
7545 break;
7546 }
7547
7548 case ISD::FP_EXTEND:
7549 case ISD::FSIN:
7550 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7551 OptForSize, Cost, Depth))
7552 return DAG.getNode(Opcode, DL, VT, NegV);
7553 break;
7554 case ISD::FP_ROUND:
7555 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7556 OptForSize, Cost, Depth))
7557 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7558 break;
7559 case ISD::SELECT:
7560 case ISD::VSELECT: {
7561 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7562 // iff at least one cost is cheaper and the other is neutral/cheaper
7563 SDValue LHS = Op.getOperand(1);
7565 SDValue NegLHS =
7566 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7567 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7568 RemoveDeadNode(NegLHS);
7569 break;
7570 }
7571
7572 // Prevent this node from being deleted by the next call.
7573 Handles.emplace_back(NegLHS);
7574
7575 SDValue RHS = Op.getOperand(2);
7577 SDValue NegRHS =
7578 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7579
7580 // We're done with the handles.
7581 Handles.clear();
7582
7583 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7584 (CostLHS != NegatibleCost::Cheaper &&
7585 CostRHS != NegatibleCost::Cheaper)) {
7586 RemoveDeadNode(NegLHS);
7587 RemoveDeadNode(NegRHS);
7588 break;
7589 }
7590
7591 Cost = std::min(CostLHS, CostRHS);
7592 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7593 }
7594 }
7595
7596 return SDValue();
7597}
7598
7599//===----------------------------------------------------------------------===//
7600// Legalization Utilities
7601//===----------------------------------------------------------------------===//
7602
7603bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7604 SDValue LHS, SDValue RHS,
7606 EVT HiLoVT, SelectionDAG &DAG,
7607 MulExpansionKind Kind, SDValue LL,
7608 SDValue LH, SDValue RL, SDValue RH) const {
7609 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7610 Opcode == ISD::SMUL_LOHI);
7611
7612 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7614 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7616 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7618 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7620
7621 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7622 return false;
7623
7624 unsigned OuterBitSize = VT.getScalarSizeInBits();
7625 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7626
7627 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7628 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7629 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7630
7631 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7632 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7633 bool Signed) -> bool {
7634 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7635 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7636 Hi = SDValue(Lo.getNode(), 1);
7637 return true;
7638 }
7639 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7640 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7641 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7642 return true;
7643 }
7644 return false;
7645 };
7646
7647 SDValue Lo, Hi;
7648
7649 if (!LL.getNode() && !RL.getNode() &&
7651 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7652 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7653 }
7654
7655 if (!LL.getNode())
7656 return false;
7657
7658 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7659 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7660 DAG.MaskedValueIsZero(RHS, HighMask)) {
7661 // The inputs are both zero-extended.
7662 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7663 Result.push_back(Lo);
7664 Result.push_back(Hi);
7665 if (Opcode != ISD::MUL) {
7666 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7667 Result.push_back(Zero);
7668 Result.push_back(Zero);
7669 }
7670 return true;
7671 }
7672 }
7673
7674 if (!VT.isVector() && Opcode == ISD::MUL &&
7675 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7676 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7677 // The input values are both sign-extended.
7678 // TODO non-MUL case?
7679 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7680 Result.push_back(Lo);
7681 Result.push_back(Hi);
7682 return true;
7683 }
7684 }
7685
7686 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7687 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7688
7689 if (!LH.getNode() && !RH.getNode() &&
7692 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7693 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7694 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7695 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7696 }
7697
7698 if (!LH.getNode())
7699 return false;
7700
7701 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7702 return false;
7703
7704 Result.push_back(Lo);
7705
7706 if (Opcode == ISD::MUL) {
7707 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7708 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7709 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7710 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7711 Result.push_back(Hi);
7712 return true;
7713 }
7714
7715 // Compute the full width result.
7716 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7717 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7718 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7719 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7720 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7721 };
7722
7723 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7724 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7725 return false;
7726
7727 // This is effectively the add part of a multiply-add of half-sized operands,
7728 // so it cannot overflow.
7729 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7730
7731 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7732 return false;
7733
7734 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7735 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7736
7737 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7739 if (UseGlue)
7740 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7741 Merge(Lo, Hi));
7742 else
7743 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7744 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7745
7746 SDValue Carry = Next.getValue(1);
7747 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7748 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7749
7750 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7751 return false;
7752
7753 if (UseGlue)
7754 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7755 Carry);
7756 else
7757 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7758 Zero, Carry);
7759
7760 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7761
7762 if (Opcode == ISD::SMUL_LOHI) {
7763 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7764 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7765 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7766
7767 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7768 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7769 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7770 }
7771
7772 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7773 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7774 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7775 return true;
7776}
7777
7779 SelectionDAG &DAG, MulExpansionKind Kind,
7780 SDValue LL, SDValue LH, SDValue RL,
7781 SDValue RH) const {
7783 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7784 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7785 DAG, Kind, LL, LH, RL, RH);
7786 if (Ok) {
7787 assert(Result.size() == 2);
7788 Lo = Result[0];
7789 Hi = Result[1];
7790 }
7791 return Ok;
7792}
7793
7794// Optimize unsigned division or remainder by constants for types twice as large
7795// as a legal VT.
7796//
7797// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7798// can be computed
7799// as:
7800// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7801// Remainder = Sum % Constant
7802// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7803//
7804// For division, we can compute the remainder using the algorithm described
7805// above, subtract it from the dividend to get an exact multiple of Constant.
7806// Then multiply that exact multiply by the multiplicative inverse modulo
7807// (1 << (BitWidth / 2)) to get the quotient.
7808
7809// If Constant is even, we can shift right the dividend and the divisor by the
7810// number of trailing zeros in Constant before applying the remainder algorithm.
7811// If we're after the quotient, we can subtract this value from the shifted
7812// dividend and multiply by the multiplicative inverse of the shifted divisor.
7813// If we want the remainder, we shift the value left by the number of trailing
7814// zeros and add the bits that were shifted out of the dividend.
7817 EVT HiLoVT, SelectionDAG &DAG,
7818 SDValue LL, SDValue LH) const {
7819 unsigned Opcode = N->getOpcode();
7820 EVT VT = N->getValueType(0);
7821
7822 // TODO: Support signed division/remainder.
7823 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7824 return false;
7825 assert(
7826 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7827 "Unexpected opcode");
7828
7829 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7830 if (!CN)
7831 return false;
7832
7833 APInt Divisor = CN->getAPIntValue();
7834 unsigned BitWidth = Divisor.getBitWidth();
7835 unsigned HBitWidth = BitWidth / 2;
7837 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7838
7839 // Divisor needs to less than (1 << HBitWidth).
7840 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7841 if (Divisor.uge(HalfMaxPlus1))
7842 return false;
7843
7844 // We depend on the UREM by constant optimization in DAGCombiner that requires
7845 // high multiply.
7846 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7848 return false;
7849
7850 // Don't expand if optimizing for size.
7851 if (DAG.shouldOptForSize())
7852 return false;
7853
7854 // Early out for 0 or 1 divisors.
7855 if (Divisor.ule(1))
7856 return false;
7857
7858 // If the divisor is even, shift it until it becomes odd.
7859 unsigned TrailingZeros = 0;
7860 if (!Divisor[0]) {
7861 TrailingZeros = Divisor.countr_zero();
7862 Divisor.lshrInPlace(TrailingZeros);
7863 }
7864
7865 SDLoc dl(N);
7866 SDValue Sum;
7867 SDValue PartialRem;
7868
7869 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7870 // then add in the carry.
7871 // TODO: If we can't split it in half, we might be able to split into 3 or
7872 // more pieces using a smaller bit width.
7873 if (HalfMaxPlus1.urem(Divisor).isOne()) {
7874 assert(!LL == !LH && "Expected both input halves or no input halves!");
7875 if (!LL)
7876 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7877
7878 // Shift the input by the number of TrailingZeros in the divisor. The
7879 // shifted out bits will be added to the remainder later.
7880 if (TrailingZeros) {
7881 // Save the shifted off bits if we need the remainder.
7882 if (Opcode != ISD::UDIV) {
7883 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7884 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7885 DAG.getConstant(Mask, dl, HiLoVT));
7886 }
7887
7888 LL = DAG.getNode(
7889 ISD::OR, dl, HiLoVT,
7890 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7891 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7892 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7893 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7894 HiLoVT, dl)));
7895 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7896 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7897 }
7898
7899 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7900 EVT SetCCType =
7901 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7903 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7904 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7905 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7906 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7907 } else {
7908 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7909 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7910 // If the boolean for the target is 0 or 1, we can add the setcc result
7911 // directly.
7912 if (getBooleanContents(HiLoVT) ==
7914 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7915 else
7916 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7917 DAG.getConstant(0, dl, HiLoVT));
7918 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7919 }
7920 }
7921
7922 // If we didn't find a sum, we can't do the expansion.
7923 if (!Sum)
7924 return false;
7925
7926 // Perform a HiLoVT urem on the Sum using truncated divisor.
7927 SDValue RemL =
7928 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7929 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7930 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7931
7932 if (Opcode != ISD::UREM) {
7933 // Subtract the remainder from the shifted dividend.
7934 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7935 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7936
7937 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7938
7939 // Multiply by the multiplicative inverse of the divisor modulo
7940 // (1 << BitWidth).
7941 APInt MulFactor = Divisor.multiplicativeInverse();
7942
7943 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7944 DAG.getConstant(MulFactor, dl, VT));
7945
7946 // Split the quotient into low and high parts.
7947 SDValue QuotL, QuotH;
7948 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7949 Result.push_back(QuotL);
7950 Result.push_back(QuotH);
7951 }
7952
7953 if (Opcode != ISD::UDIV) {
7954 // If we shifted the input, shift the remainder left and add the bits we
7955 // shifted off the input.
7956 if (TrailingZeros) {
7957 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7958 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7959 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7960 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7961 }
7962 Result.push_back(RemL);
7963 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7964 }
7965
7966 return true;
7967}
7968
7969// Check that (every element of) Z is undef or not an exact multiple of BW.
7970static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7972 Z,
7973 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7974 true);
7975}
7976
7978 EVT VT = Node->getValueType(0);
7979 SDValue ShX, ShY;
7980 SDValue ShAmt, InvShAmt;
7981 SDValue X = Node->getOperand(0);
7982 SDValue Y = Node->getOperand(1);
7983 SDValue Z = Node->getOperand(2);
7984 SDValue Mask = Node->getOperand(3);
7985 SDValue VL = Node->getOperand(4);
7986
7987 unsigned BW = VT.getScalarSizeInBits();
7988 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7989 SDLoc DL(SDValue(Node, 0));
7990
7991 EVT ShVT = Z.getValueType();
7992 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7993 // fshl: X << C | Y >> (BW - C)
7994 // fshr: X << (BW - C) | Y >> C
7995 // where C = Z % BW is not zero
7996 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7997 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7998 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7999 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8000 VL);
8001 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8002 VL);
8003 } else {
8004 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8005 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8006 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8007 if (isPowerOf2_32(BW)) {
8008 // Z % BW -> Z & (BW - 1)
8009 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8010 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8011 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8012 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8013 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8014 } else {
8015 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8016 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8017 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8018 }
8019
8020 SDValue One = DAG.getConstant(1, DL, ShVT);
8021 if (IsFSHL) {
8022 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8023 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8024 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8025 } else {
8026 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8027 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8028 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8029 }
8030 }
8031 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8032}
8033
8035 SelectionDAG &DAG) const {
8036 if (Node->isVPOpcode())
8037 return expandVPFunnelShift(Node, DAG);
8038
8039 EVT VT = Node->getValueType(0);
8040
8041 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8045 return SDValue();
8046
8047 SDValue X = Node->getOperand(0);
8048 SDValue Y = Node->getOperand(1);
8049 SDValue Z = Node->getOperand(2);
8050
8051 unsigned BW = VT.getScalarSizeInBits();
8052 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8053 SDLoc DL(SDValue(Node, 0));
8054
8055 EVT ShVT = Z.getValueType();
8056
8057 // If a funnel shift in the other direction is more supported, use it.
8058 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8059 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8060 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8061 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8062 // fshl X, Y, Z -> fshr X, Y, -Z
8063 // fshr X, Y, Z -> fshl X, Y, -Z
8064 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8065 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8066 } else {
8067 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8068 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8069 SDValue One = DAG.getConstant(1, DL, ShVT);
8070 if (IsFSHL) {
8071 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8072 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8073 } else {
8074 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8075 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8076 }
8077 Z = DAG.getNOT(DL, Z, ShVT);
8078 }
8079 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8080 }
8081
8082 SDValue ShX, ShY;
8083 SDValue ShAmt, InvShAmt;
8084 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8085 // fshl: X << C | Y >> (BW - C)
8086 // fshr: X << (BW - C) | Y >> C
8087 // where C = Z % BW is not zero
8088 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8089 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8090 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8091 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8092 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8093 } else {
8094 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8095 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8096 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8097 if (isPowerOf2_32(BW)) {
8098 // Z % BW -> Z & (BW - 1)
8099 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8100 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8101 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8102 } else {
8103 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8104 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8105 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8106 }
8107
8108 SDValue One = DAG.getConstant(1, DL, ShVT);
8109 if (IsFSHL) {
8110 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8111 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8112 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8113 } else {
8114 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8115 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8116 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8117 }
8118 }
8119 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8120}
8121
8122// TODO: Merge with expandFunnelShift.
8123SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8124 SelectionDAG &DAG) const {
8125 EVT VT = Node->getValueType(0);
8126 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8127 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8128 SDValue Op0 = Node->getOperand(0);
8129 SDValue Op1 = Node->getOperand(1);
8130 SDLoc DL(SDValue(Node, 0));
8131
8132 EVT ShVT = Op1.getValueType();
8133 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8134
8135 // If a rotate in the other direction is more supported, use it.
8136 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8137 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8138 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8139 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8140 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8141 }
8142
8143 if (!AllowVectorOps && VT.isVector() &&
8149 return SDValue();
8150
8151 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8152 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8153 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8154 SDValue ShVal;
8155 SDValue HsVal;
8156 if (isPowerOf2_32(EltSizeInBits)) {
8157 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8158 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8159 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8160 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8161 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8162 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8163 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8164 } else {
8165 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8166 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8167 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8168 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8169 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8170 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8171 SDValue One = DAG.getConstant(1, DL, ShVT);
8172 HsVal =
8173 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8174 }
8175 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8176}
8177
8179 SelectionDAG &DAG) const {
8180 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8181 EVT VT = Node->getValueType(0);
8182 unsigned VTBits = VT.getScalarSizeInBits();
8183 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8184
8185 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8186 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8187 SDValue ShOpLo = Node->getOperand(0);
8188 SDValue ShOpHi = Node->getOperand(1);
8189 SDValue ShAmt = Node->getOperand(2);
8190 EVT ShAmtVT = ShAmt.getValueType();
8191 EVT ShAmtCCVT =
8192 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8193 SDLoc dl(Node);
8194
8195 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8196 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8197 // away during isel.
8198 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8199 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8200 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8201 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8202 : DAG.getConstant(0, dl, VT);
8203
8204 SDValue Tmp2, Tmp3;
8205 if (IsSHL) {
8206 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8207 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8208 } else {
8209 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8210 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8211 }
8212
8213 // If the shift amount is larger or equal than the width of a part we don't
8214 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8215 // values for large shift amounts.
8216 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8217 DAG.getConstant(VTBits, dl, ShAmtVT));
8218 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8219 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8220
8221 if (IsSHL) {
8222 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8223 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8224 } else {
8225 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8226 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8227 }
8228}
8229
8231 SelectionDAG &DAG) const {
8232 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8233 SDValue Src = Node->getOperand(OpNo);
8234 EVT SrcVT = Src.getValueType();
8235 EVT DstVT = Node->getValueType(0);
8236 SDLoc dl(SDValue(Node, 0));
8237
8238 // FIXME: Only f32 to i64 conversions are supported.
8239 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8240 return false;
8241
8242 if (Node->isStrictFPOpcode())
8243 // When a NaN is converted to an integer a trap is allowed. We can't
8244 // use this expansion here because it would eliminate that trap. Other
8245 // traps are also allowed and cannot be eliminated. See
8246 // IEEE 754-2008 sec 5.8.
8247 return false;
8248
8249 // Expand f32 -> i64 conversion
8250 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8251 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8252 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8253 EVT IntVT = SrcVT.changeTypeToInteger();
8254 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8255
8256 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8257 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8258 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8259 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8260 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8261 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8262
8263 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8264
8265 SDValue ExponentBits = DAG.getNode(
8266 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8267 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8268 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8269
8270 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8271 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8272 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8273 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8274
8275 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8276 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8277 DAG.getConstant(0x00800000, dl, IntVT));
8278
8279 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8280
8281 R = DAG.getSelectCC(
8282 dl, Exponent, ExponentLoBit,
8283 DAG.getNode(ISD::SHL, dl, DstVT, R,
8284 DAG.getZExtOrTrunc(
8285 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8286 dl, IntShVT)),
8287 DAG.getNode(ISD::SRL, dl, DstVT, R,
8288 DAG.getZExtOrTrunc(
8289 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8290 dl, IntShVT)),
8291 ISD::SETGT);
8292
8293 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8294 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8295
8296 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8297 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8298 return true;
8299}
8300
8302 SDValue &Chain,
8303 SelectionDAG &DAG) const {
8304 SDLoc dl(SDValue(Node, 0));
8305 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8306 SDValue Src = Node->getOperand(OpNo);
8307
8308 EVT SrcVT = Src.getValueType();
8309 EVT DstVT = Node->getValueType(0);
8310 EVT SetCCVT =
8311 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8312 EVT DstSetCCVT =
8313 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8314
8315 // Only expand vector types if we have the appropriate vector bit operations.
8316 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8318 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8320 return false;
8321
8322 // If the maximum float value is smaller then the signed integer range,
8323 // the destination signmask can't be represented by the float, so we can
8324 // just use FP_TO_SINT directly.
8325 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8326 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8327 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8329 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8330 if (Node->isStrictFPOpcode()) {
8331 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8332 { Node->getOperand(0), Src });
8333 Chain = Result.getValue(1);
8334 } else
8335 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8336 return true;
8337 }
8338
8339 // Don't expand it if there isn't cheap fsub instruction.
8341 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8342 return false;
8343
8344 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8345 SDValue Sel;
8346
8347 if (Node->isStrictFPOpcode()) {
8348 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8349 Node->getOperand(0), /*IsSignaling*/ true);
8350 Chain = Sel.getValue(1);
8351 } else {
8352 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8353 }
8354
8355 bool Strict = Node->isStrictFPOpcode() ||
8356 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8357
8358 if (Strict) {
8359 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8360 // signmask then offset (the result of which should be fully representable).
8361 // Sel = Src < 0x8000000000000000
8362 // FltOfs = select Sel, 0, 0x8000000000000000
8363 // IntOfs = select Sel, 0, 0x8000000000000000
8364 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8365
8366 // TODO: Should any fast-math-flags be set for the FSUB?
8367 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8368 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8369 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8370 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8371 DAG.getConstant(0, dl, DstVT),
8372 DAG.getConstant(SignMask, dl, DstVT));
8373 SDValue SInt;
8374 if (Node->isStrictFPOpcode()) {
8375 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8376 { Chain, Src, FltOfs });
8377 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8378 { Val.getValue(1), Val });
8379 Chain = SInt.getValue(1);
8380 } else {
8381 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8382 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8383 }
8384 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8385 } else {
8386 // Expand based on maximum range of FP_TO_SINT:
8387 // True = fp_to_sint(Src)
8388 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8389 // Result = select (Src < 0x8000000000000000), True, False
8390
8391 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8392 // TODO: Should any fast-math-flags be set for the FSUB?
8393 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8394 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8395 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8396 DAG.getConstant(SignMask, dl, DstVT));
8397 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8398 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8399 }
8400 return true;
8401}
8402
8404 SDValue &Chain, SelectionDAG &DAG) const {
8405 // This transform is not correct for converting 0 when rounding mode is set
8406 // to round toward negative infinity which will produce -0.0. So disable
8407 // under strictfp.
8408 if (Node->isStrictFPOpcode())
8409 return false;
8410
8411 SDValue Src = Node->getOperand(0);
8412 EVT SrcVT = Src.getValueType();
8413 EVT DstVT = Node->getValueType(0);
8414
8415 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8416 // it.
8417 if (Node->getFlags().hasNonNeg() &&
8419 Result =
8420 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8421 return true;
8422 }
8423
8424 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8425 return false;
8426
8427 // Only expand vector types if we have the appropriate vector bit
8428 // operations.
8429 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8434 return false;
8435
8436 SDLoc dl(SDValue(Node, 0));
8437
8438 // Implementation of unsigned i64 to f64 following the algorithm in
8439 // __floatundidf in compiler_rt. This implementation performs rounding
8440 // correctly in all rounding modes with the exception of converting 0
8441 // when rounding toward negative infinity. In that case the fsub will
8442 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8443 // incorrect.
8444 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8445 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8446 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8447 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8448 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8449 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8450
8451 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8452 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8453 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8454 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8455 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8456 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8457 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8458 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8459 return true;
8460}
8461
8462SDValue
8464 SelectionDAG &DAG) const {
8465 unsigned Opcode = Node->getOpcode();
8466 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8467 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8468 "Wrong opcode");
8469
8470 if (Node->getFlags().hasNoNaNs()) {
8471 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8472 EVT VT = Node->getValueType(0);
8473 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8475 VT.isVector())
8476 return SDValue();
8477 SDValue Op1 = Node->getOperand(0);
8478 SDValue Op2 = Node->getOperand(1);
8479 SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8480 // Copy FMF flags, but always set the no-signed-zeros flag
8481 // as this is implied by the FMINNUM/FMAXNUM semantics.
8482 SelCC->setFlags(Node->getFlags() | SDNodeFlags::NoSignedZeros);
8483 return SelCC;
8484 }
8485
8486 return SDValue();
8487}
8488
8490 SelectionDAG &DAG) const {
8491 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8492 return Expanded;
8493
8494 EVT VT = Node->getValueType(0);
8495 if (VT.isScalableVector())
8497 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8498
8499 SDLoc dl(Node);
8500 unsigned NewOp =
8501 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8502
8503 if (isOperationLegalOrCustom(NewOp, VT)) {
8504 SDValue Quiet0 = Node->getOperand(0);
8505 SDValue Quiet1 = Node->getOperand(1);
8506
8507 if (!Node->getFlags().hasNoNaNs()) {
8508 // Insert canonicalizes if it's possible we need to quiet to get correct
8509 // sNaN behavior.
8510 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8511 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8512 Node->getFlags());
8513 }
8514 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8515 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8516 Node->getFlags());
8517 }
8518 }
8519
8520 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8521 }
8522
8523 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8524 // instead if there are no NaNs and there can't be an incompatible zero
8525 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8526 if ((Node->getFlags().hasNoNaNs() ||
8527 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8528 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8529 (Node->getFlags().hasNoSignedZeros() ||
8530 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8531 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8532 unsigned IEEE2018Op =
8533 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8534 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8535 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8536 Node->getOperand(1), Node->getFlags());
8537 }
8538
8539 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8540 return SelCC;
8541
8542 return SDValue();
8543}
8544
8546 SelectionDAG &DAG) const {
8547 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8548 return Expanded;
8549
8550 SDLoc DL(N);
8551 SDValue LHS = N->getOperand(0);
8552 SDValue RHS = N->getOperand(1);
8553 unsigned Opc = N->getOpcode();
8554 EVT VT = N->getValueType(0);
8555 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8556 bool IsMax = Opc == ISD::FMAXIMUM;
8557 SDNodeFlags Flags = N->getFlags();
8558
8559 // First, implement comparison not propagating NaN. If no native fmin or fmax
8560 // available, use plain select with setcc instead.
8562 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8563 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8564
8565 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8566 // signed zero behavior.
8567 bool MinMaxMustRespectOrderedZero = false;
8568
8569 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8570 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8571 MinMaxMustRespectOrderedZero = true;
8572 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8573 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8574 } else {
8576 return DAG.UnrollVectorOp(N);
8577
8578 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8579 SDValue Compare =
8580 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8581 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8582 }
8583
8584 // Propagate any NaN of both operands
8585 if (!N->getFlags().hasNoNaNs() &&
8586 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8587 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8589 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8590 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8591 }
8592
8593 // fminimum/fmaximum requires -0.0 less than +0.0
8594 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8596 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8597 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8598 SDValue TestZero =
8599 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8600 SDValue LCmp = DAG.getSelect(
8601 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8602 MinMax, Flags);
8603 SDValue RCmp = DAG.getSelect(
8604 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8605 LCmp, Flags);
8606 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8607 }
8608
8609 return MinMax;
8610}
8611
8613 SelectionDAG &DAG) const {
8614 SDLoc DL(Node);
8615 SDValue LHS = Node->getOperand(0);
8616 SDValue RHS = Node->getOperand(1);
8617 unsigned Opc = Node->getOpcode();
8618 EVT VT = Node->getValueType(0);
8619 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8620 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8621 const TargetOptions &Options = DAG.getTarget().Options;
8622 SDNodeFlags Flags = Node->getFlags();
8623
8624 unsigned NewOp =
8626
8627 if (isOperationLegalOrCustom(NewOp, VT)) {
8628 if (!Flags.hasNoNaNs()) {
8629 // Insert canonicalizes if it's possible we need to quiet to get correct
8630 // sNaN behavior.
8631 if (!DAG.isKnownNeverSNaN(LHS)) {
8632 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8633 }
8634 if (!DAG.isKnownNeverSNaN(RHS)) {
8635 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8636 }
8637 }
8638
8639 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8640 }
8641
8642 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8643 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8644 if (Flags.hasNoNaNs() ||
8645 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8646 unsigned IEEE2019Op =
8648 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8649 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8650 }
8651
8652 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8653 // either one for +0.0 vs -0.0.
8654 if ((Flags.hasNoNaNs() ||
8655 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8656 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8657 DAG.isKnownNeverZeroFloat(RHS))) {
8658 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8659 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8660 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8661 }
8662
8664 return DAG.UnrollVectorOp(Node);
8665
8666 // If only one operand is NaN, override it with another operand.
8667 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8668 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8669 }
8670 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8671 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8672 }
8673
8674 SDValue MinMax =
8675 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8676 // If MinMax is NaN, let's quiet it.
8677 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
8678 !DAG.isKnownNeverNaN(RHS)) {
8679 MinMax = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
8680 }
8681
8682 // Fixup signed zero behavior.
8683 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8685 return MinMax;
8686 }
8687 SDValue TestZero =
8688 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8689 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8690 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8691 SDValue LCmp = DAG.getSelect(
8692 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8693 MinMax, Flags);
8694 SDValue RCmp = DAG.getSelect(
8695 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8696 Flags);
8697 return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8698}
8699
8700/// Returns a true value if if this FPClassTest can be performed with an ordered
8701/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8702/// std::nullopt if it cannot be performed as a compare with 0.
8703static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8704 const fltSemantics &Semantics,
8705 const MachineFunction &MF) {
8706 FPClassTest OrderedMask = Test & ~fcNan;
8707 FPClassTest NanTest = Test & fcNan;
8708 bool IsOrdered = NanTest == fcNone;
8709 bool IsUnordered = NanTest == fcNan;
8710
8711 // Skip cases that are testing for only a qnan or snan.
8712 if (!IsOrdered && !IsUnordered)
8713 return std::nullopt;
8714
8715 if (OrderedMask == fcZero &&
8716 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8717 return IsOrdered;
8718 if (OrderedMask == (fcZero | fcSubnormal) &&
8719 MF.getDenormalMode(Semantics).inputsAreZero())
8720 return IsOrdered;
8721 return std::nullopt;
8722}
8723
8725 const FPClassTest OrigTestMask,
8726 SDNodeFlags Flags, const SDLoc &DL,
8727 SelectionDAG &DAG) const {
8728 EVT OperandVT = Op.getValueType();
8729 assert(OperandVT.isFloatingPoint());
8730 FPClassTest Test = OrigTestMask;
8731
8732 // Degenerated cases.
8733 if (Test == fcNone)
8734 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8735 if (Test == fcAllFlags)
8736 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8737
8738 // PPC double double is a pair of doubles, of which the higher part determines
8739 // the value class.
8740 if (OperandVT == MVT::ppcf128) {
8741 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8742 DAG.getConstant(1, DL, MVT::i32));
8743 OperandVT = MVT::f64;
8744 }
8745
8746 // Floating-point type properties.
8747 EVT ScalarFloatVT = OperandVT.getScalarType();
8748 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8749 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8750 bool IsF80 = (ScalarFloatVT == MVT::f80);
8751
8752 // Some checks can be implemented using float comparisons, if floating point
8753 // exceptions are ignored.
8754 if (Flags.hasNoFPExcept() &&
8756 FPClassTest FPTestMask = Test;
8757 bool IsInvertedFP = false;
8758
8759 if (FPClassTest InvertedFPCheck =
8760 invertFPClassTestIfSimpler(FPTestMask, true)) {
8761 FPTestMask = InvertedFPCheck;
8762 IsInvertedFP = true;
8763 }
8764
8765 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
8766 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
8767
8768 // See if we can fold an | fcNan into an unordered compare.
8769 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8770
8771 // Can't fold the ordered check if we're only testing for snan or qnan
8772 // individually.
8773 if ((FPTestMask & fcNan) != fcNan)
8774 OrderedFPTestMask = FPTestMask;
8775
8776 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8777
8778 if (std::optional<bool> IsCmp0 =
8779 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
8780 IsCmp0 && (isCondCodeLegalOrCustom(
8781 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8782 OperandVT.getScalarType().getSimpleVT()))) {
8783
8784 // If denormals could be implicitly treated as 0, this is not equivalent
8785 // to a compare with 0 since it will also be true for denormals.
8786 return DAG.getSetCC(DL, ResultVT, Op,
8787 DAG.getConstantFP(0.0, DL, OperandVT),
8788 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8789 }
8790
8791 if (FPTestMask == fcNan &&
8793 OperandVT.getScalarType().getSimpleVT()))
8794 return DAG.getSetCC(DL, ResultVT, Op, Op,
8795 IsInvertedFP ? ISD::SETO : ISD::SETUO);
8796
8797 bool IsOrderedInf = FPTestMask == fcInf;
8798 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
8799 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
8800 : UnorderedCmpOpcode,
8801 OperandVT.getScalarType().getSimpleVT()) &&
8804 (OperandVT.isVector() &&
8806 // isinf(x) --> fabs(x) == inf
8807 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8808 SDValue Inf =
8809 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8810 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8811 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
8812 }
8813
8814 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
8815 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
8816 : UnorderedCmpOpcode,
8817 OperandVT.getSimpleVT())) {
8818 // isposinf(x) --> x == inf
8819 // isneginf(x) --> x == -inf
8820 // isposinf(x) || nan --> x u== inf
8821 // isneginf(x) || nan --> x u== -inf
8822
8823 SDValue Inf = DAG.getConstantFP(
8824 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
8825 OperandVT);
8826 return DAG.getSetCC(DL, ResultVT, Op, Inf,
8827 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
8828 }
8829
8830 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8831 // TODO: Could handle ordered case, but it produces worse code for
8832 // x86. Maybe handle ordered if fabs is free?
8833
8834 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8835 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
8836
8837 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8838 OperandVT.getScalarType().getSimpleVT())) {
8839 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8840
8841 // TODO: Maybe only makes sense if fabs is free. Integer test of
8842 // exponent bits seems better for x86.
8843 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8844 SDValue SmallestNormal = DAG.getConstantFP(
8845 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8846 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8847 IsOrdered ? OrderedOp : UnorderedOp);
8848 }
8849 }
8850
8851 if (FPTestMask == fcNormal) {
8852 // TODO: Handle unordered
8853 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
8854 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
8855
8856 if (isCondCodeLegalOrCustom(IsFiniteOp,
8857 OperandVT.getScalarType().getSimpleVT()) &&
8858 isCondCodeLegalOrCustom(IsNormalOp,
8859 OperandVT.getScalarType().getSimpleVT()) &&
8860 isFAbsFree(OperandVT)) {
8861 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
8862 SDValue Inf =
8863 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8864 SDValue SmallestNormal = DAG.getConstantFP(
8865 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8866
8867 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8868 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
8869 SDValue IsNormal =
8870 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
8871 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
8872 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
8873 }
8874 }
8875 }
8876
8877 // Some checks may be represented as inversion of simpler check, for example
8878 // "inf|normal|subnormal|zero" => !"nan".
8879 bool IsInverted = false;
8880
8881 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
8882 Test = InvertedCheck;
8883 IsInverted = true;
8884 }
8885
8886 // In the general case use integer operations.
8887 unsigned BitSize = OperandVT.getScalarSizeInBits();
8888 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8889 if (OperandVT.isVector())
8890 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8891 OperandVT.getVectorElementCount());
8892 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8893
8894 // Various masks.
8895 APInt SignBit = APInt::getSignMask(BitSize);
8896 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8897 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8898 const unsigned ExplicitIntBitInF80 = 63;
8899 APInt ExpMask = Inf;
8900 if (IsF80)
8901 ExpMask.clearBit(ExplicitIntBitInF80);
8902 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8903 APInt QNaNBitMask =
8904 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8905 APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8906
8907 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8908 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8909 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8910 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8911 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8912 SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8913
8914 SDValue Res;
8915 const auto appendResult = [&](SDValue PartialRes) {
8916 if (PartialRes) {
8917 if (Res)
8918 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8919 else
8920 Res = PartialRes;
8921 }
8922 };
8923
8924 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8925 const auto getIntBitIsSet = [&]() -> SDValue {
8926 if (!IntBitIsSetV) {
8927 APInt IntBitMask(BitSize, 0);
8928 IntBitMask.setBit(ExplicitIntBitInF80);
8929 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8930 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8931 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8932 }
8933 return IntBitIsSetV;
8934 };
8935
8936 // Split the value into sign bit and absolute value.
8937 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8938 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8939 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
8940
8941 // Tests that involve more than one class should be processed first.
8942 SDValue PartialRes;
8943
8944 if (IsF80)
8945 ; // Detect finite numbers of f80 by checking individual classes because
8946 // they have different settings of the explicit integer bit.
8947 else if ((Test & fcFinite) == fcFinite) {
8948 // finite(V) ==> abs(V) < exp_mask
8949 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8950 Test &= ~fcFinite;
8951 } else if ((Test & fcFinite) == fcPosFinite) {
8952 // finite(V) && V > 0 ==> V < exp_mask
8953 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8954 Test &= ~fcPosFinite;
8955 } else if ((Test & fcFinite) == fcNegFinite) {
8956 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8957 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8958 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8959 Test &= ~fcNegFinite;
8960 }
8961 appendResult(PartialRes);
8962
8963 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8964 // fcZero | fcSubnormal => test all exponent bits are 0
8965 // TODO: Handle sign bit specific cases
8966 if (PartialCheck == (fcZero | fcSubnormal)) {
8967 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8968 SDValue ExpIsZero =
8969 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8970 appendResult(ExpIsZero);
8971 Test &= ~PartialCheck & fcAllFlags;
8972 }
8973 }
8974
8975 // Check for individual classes.
8976
8977 if (unsigned PartialCheck = Test & fcZero) {
8978 if (PartialCheck == fcPosZero)
8979 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8980 else if (PartialCheck == fcZero)
8981 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8982 else // ISD::fcNegZero
8983 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8984 appendResult(PartialRes);
8985 }
8986
8987 if (unsigned PartialCheck = Test & fcSubnormal) {
8988 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8989 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8990 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8991 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8992 SDValue VMinusOneV =
8993 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8994 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8995 if (PartialCheck == fcNegSubnormal)
8996 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8997 appendResult(PartialRes);
8998 }
8999
9000 if (unsigned PartialCheck = Test & fcInf) {
9001 if (PartialCheck == fcPosInf)
9002 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9003 else if (PartialCheck == fcInf)
9004 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9005 else { // ISD::fcNegInf
9006 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9007 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9008 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9009 }
9010 appendResult(PartialRes);
9011 }
9012
9013 if (unsigned PartialCheck = Test & fcNan) {
9014 APInt InfWithQnanBit = Inf | QNaNBitMask;
9015 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9016 if (PartialCheck == fcNan) {
9017 // isnan(V) ==> abs(V) > int(inf)
9018 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9019 if (IsF80) {
9020 // Recognize unsupported values as NaNs for compatibility with glibc.
9021 // In them (exp(V)==0) == int_bit.
9022 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9023 SDValue ExpIsZero =
9024 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9025 SDValue IsPseudo =
9026 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9027 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9028 }
9029 } else if (PartialCheck == fcQNan) {
9030 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9031 PartialRes =
9032 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9033 } else { // ISD::fcSNan
9034 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9035 // abs(V) < (unsigned(Inf) | quiet_bit)
9036 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9037 SDValue IsNotQnan =
9038 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9039 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9040 }
9041 appendResult(PartialRes);
9042 }
9043
9044 if (unsigned PartialCheck = Test & fcNormal) {
9045 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9046 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9047 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9048 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9049 APInt ExpLimit = ExpMask - ExpLSB;
9050 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9051 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9052 if (PartialCheck == fcNegNormal)
9053 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9054 else if (PartialCheck == fcPosNormal) {
9055 SDValue PosSignV =
9056 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
9057 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9058 }
9059 if (IsF80)
9060 PartialRes =
9061 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9062 appendResult(PartialRes);
9063 }
9064
9065 if (!Res)
9066 return DAG.getConstant(IsInverted, DL, ResultVT);
9067 if (IsInverted)
9068 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
9069 return Res;
9070}
9071
9072// Only expand vector types if we have the appropriate vector bit operations.
9073static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9074 assert(VT.isVector() && "Expected vector type");
9075 unsigned Len = VT.getScalarSizeInBits();
9076 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9079 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9081}
9082
9084 SDLoc dl(Node);
9085 EVT VT = Node->getValueType(0);
9086 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9087 SDValue Op = Node->getOperand(0);
9088 unsigned Len = VT.getScalarSizeInBits();
9089 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9090
9091 // TODO: Add support for irregular type lengths.
9092 if (!(Len <= 128 && Len % 8 == 0))
9093 return SDValue();
9094
9095 // Only expand vector types if we have the appropriate vector bit operations.
9096 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9097 return SDValue();
9098
9099 // This is the "best" algorithm from
9100 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9101 SDValue Mask55 =
9102 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9103 SDValue Mask33 =
9104 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9105 SDValue Mask0F =
9106 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9107
9108 // v = v - ((v >> 1) & 0x55555555...)
9109 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9110 DAG.getNode(ISD::AND, dl, VT,
9111 DAG.getNode(ISD::SRL, dl, VT, Op,
9112 DAG.getConstant(1, dl, ShVT)),
9113 Mask55));
9114 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9115 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9116 DAG.getNode(ISD::AND, dl, VT,
9117 DAG.getNode(ISD::SRL, dl, VT, Op,
9118 DAG.getConstant(2, dl, ShVT)),
9119 Mask33));
9120 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9121 Op = DAG.getNode(ISD::AND, dl, VT,
9122 DAG.getNode(ISD::ADD, dl, VT, Op,
9123 DAG.getNode(ISD::SRL, dl, VT, Op,
9124 DAG.getConstant(4, dl, ShVT))),
9125 Mask0F);
9126
9127 if (Len <= 8)
9128 return Op;
9129
9130 // Avoid the multiply if we only have 2 bytes to add.
9131 // TODO: Only doing this for scalars because vectors weren't as obviously
9132 // improved.
9133 if (Len == 16 && !VT.isVector()) {
9134 // v = (v + (v >> 8)) & 0x00FF;
9135 return DAG.getNode(ISD::AND, dl, VT,
9136 DAG.getNode(ISD::ADD, dl, VT, Op,
9137 DAG.getNode(ISD::SRL, dl, VT, Op,
9138 DAG.getConstant(8, dl, ShVT))),
9139 DAG.getConstant(0xFF, dl, VT));
9140 }
9141
9142 // v = (v * 0x01010101...) >> (Len - 8)
9143 SDValue V;
9146 SDValue Mask01 =
9147 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9148 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9149 } else {
9150 V = Op;
9151 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9152 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9153 V = DAG.getNode(ISD::ADD, dl, VT, V,
9154 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9155 }
9156 }
9157 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9158}
9159
9161 SDLoc dl(Node);
9162 EVT VT = Node->getValueType(0);
9163 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9164 SDValue Op = Node->getOperand(0);
9165 SDValue Mask = Node->getOperand(1);
9166 SDValue VL = Node->getOperand(2);
9167 unsigned Len = VT.getScalarSizeInBits();
9168 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9169
9170 // TODO: Add support for irregular type lengths.
9171 if (!(Len <= 128 && Len % 8 == 0))
9172 return SDValue();
9173
9174 // This is same algorithm of expandCTPOP from
9175 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9176 SDValue Mask55 =
9177 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9178 SDValue Mask33 =
9179 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9180 SDValue Mask0F =
9181 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9182
9183 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9184
9185 // v = v - ((v >> 1) & 0x55555555...)
9186 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9187 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9188 DAG.getConstant(1, dl, ShVT), Mask, VL),
9189 Mask55, Mask, VL);
9190 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9191
9192 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9193 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9194 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9195 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9196 DAG.getConstant(2, dl, ShVT), Mask, VL),
9197 Mask33, Mask, VL);
9198 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9199
9200 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9201 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9202 Mask, VL),
9203 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9204 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9205
9206 if (Len <= 8)
9207 return Op;
9208
9209 // v = (v * 0x01010101...) >> (Len - 8)
9210 SDValue V;
9212 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9213 SDValue Mask01 =
9214 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9215 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9216 } else {
9217 V = Op;
9218 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9219 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9220 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9221 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9222 Mask, VL);
9223 }
9224 }
9225 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9226 Mask, VL);
9227}
9228
9230 SDLoc dl(Node);
9231 EVT VT = Node->getValueType(0);
9232 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9233 SDValue Op = Node->getOperand(0);
9234 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9235
9236 // If the non-ZERO_UNDEF version is supported we can use that instead.
9237 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9239 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9240
9241 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9243 EVT SetCCVT =
9244 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9245 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9246 SDValue Zero = DAG.getConstant(0, dl, VT);
9247 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9248 return DAG.getSelect(dl, VT, SrcIsZero,
9249 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9250 }
9251
9252 // Only expand vector types if we have the appropriate vector bit operations.
9253 // This includes the operations needed to expand CTPOP if it isn't supported.
9254 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9256 !canExpandVectorCTPOP(*this, VT)) ||
9259 return SDValue();
9260
9261 // for now, we do this:
9262 // x = x | (x >> 1);
9263 // x = x | (x >> 2);
9264 // ...
9265 // x = x | (x >>16);
9266 // x = x | (x >>32); // for 64-bit input
9267 // return popcount(~x);
9268 //
9269 // Ref: "Hacker's Delight" by Henry Warren
9270 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9271 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9272 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9273 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9274 }
9275 Op = DAG.getNOT(dl, Op, VT);
9276 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9277}
9278
9280 SDLoc dl(Node);
9281 EVT VT = Node->getValueType(0);
9282 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9283 SDValue Op = Node->getOperand(0);
9284 SDValue Mask = Node->getOperand(1);
9285 SDValue VL = Node->getOperand(2);
9286 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9287
9288 // do this:
9289 // x = x | (x >> 1);
9290 // x = x | (x >> 2);
9291 // ...
9292 // x = x | (x >>16);
9293 // x = x | (x >>32); // for 64-bit input
9294 // return popcount(~x);
9295 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9296 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9297 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9298 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9299 VL);
9300 }
9301 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9302 Mask, VL);
9303 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9304}
9305
9307 const SDLoc &DL, EVT VT, SDValue Op,
9308 unsigned BitWidth) const {
9309 if (BitWidth != 32 && BitWidth != 64)
9310 return SDValue();
9311 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9312 : APInt(64, 0x0218A392CD3D5DBFULL);
9313 const DataLayout &TD = DAG.getDataLayout();
9314 MachinePointerInfo PtrInfo =
9316 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9317 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9318 SDValue Lookup = DAG.getNode(
9319 ISD::SRL, DL, VT,
9320 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9321 DAG.getConstant(DeBruijn, DL, VT)),
9322 DAG.getConstant(ShiftAmt, DL, VT));
9324
9326 for (unsigned i = 0; i < BitWidth; i++) {
9327 APInt Shl = DeBruijn.shl(i);
9328 APInt Lshr = Shl.lshr(ShiftAmt);
9329 Table[Lshr.getZExtValue()] = i;
9330 }
9331
9332 // Create a ConstantArray in Constant Pool
9333 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9334 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9335 TD.getPrefTypeAlign(CA->getType()));
9336 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9337 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9338 PtrInfo, MVT::i8);
9339 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9340 return ExtLoad;
9341
9342 EVT SetCCVT =
9343 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9344 SDValue Zero = DAG.getConstant(0, DL, VT);
9345 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9346 return DAG.getSelect(DL, VT, SrcIsZero,
9347 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9348}
9349
9351 SDLoc dl(Node);
9352 EVT VT = Node->getValueType(0);
9353 SDValue Op = Node->getOperand(0);
9354 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9355
9356 // If the non-ZERO_UNDEF version is supported we can use that instead.
9357 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9359 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9360
9361 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9363 EVT SetCCVT =
9364 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9365 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9366 SDValue Zero = DAG.getConstant(0, dl, VT);
9367 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9368 return DAG.getSelect(dl, VT, SrcIsZero,
9369 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9370 }
9371
9372 // Only expand vector types if we have the appropriate vector bit operations.
9373 // This includes the operations needed to expand CTPOP if it isn't supported.
9374 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9377 !canExpandVectorCTPOP(*this, VT)) ||
9381 return SDValue();
9382
9383 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9384 if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9386 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9387 return V;
9388
9389 // for now, we use: { return popcount(~x & (x - 1)); }
9390 // unless the target has ctlz but not ctpop, in which case we use:
9391 // { return 32 - nlz(~x & (x-1)); }
9392 // Ref: "Hacker's Delight" by Henry Warren
9393 SDValue Tmp = DAG.getNode(
9394 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9395 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9396
9397 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9399 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9400 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9401 }
9402
9403 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9404}
9405
9407 SDValue Op = Node->getOperand(0);
9408 SDValue Mask = Node->getOperand(1);
9409 SDValue VL = Node->getOperand(2);
9410 SDLoc dl(Node);
9411 EVT VT = Node->getValueType(0);
9412
9413 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9414 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9415 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9416 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9417 DAG.getConstant(1, dl, VT), Mask, VL);
9418 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9419 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9420}
9421
9423 SelectionDAG &DAG) const {
9424 // %cond = to_bool_vec %source
9425 // %splat = splat /*val=*/VL
9426 // %tz = step_vector
9427 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9428 // %r = vp.reduce.umin %v
9429 SDLoc DL(N);
9430 SDValue Source = N->getOperand(0);
9431 SDValue Mask = N->getOperand(1);
9432 SDValue EVL = N->getOperand(2);
9433 EVT SrcVT = Source.getValueType();
9434 EVT ResVT = N->getValueType(0);
9435 EVT ResVecVT =
9436 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9437
9438 // Convert to boolean vector.
9439 if (SrcVT.getScalarType() != MVT::i1) {
9440 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9441 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9442 SrcVT.getVectorElementCount());
9443 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9444 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9445 }
9446
9447 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9448 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9449 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9450 SDValue Select =
9451 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9452 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9453}
9454
9456 SelectionDAG &DAG) const {
9457 SDLoc DL(N);
9458 SDValue Mask = N->getOperand(0);
9459 EVT MaskVT = Mask.getValueType();
9460 EVT BoolVT = MaskVT.getScalarType();
9461
9462 // Find a suitable type for a stepvector.
9463 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9464 if (MaskVT.isScalableVector())
9465 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9466 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9467 unsigned EltWidth = TLI.getBitWidthForCttzElements(
9468 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9469 /*ZeroIsPoison=*/true, &VScaleRange);
9470 EVT StepVT = MVT::getIntegerVT(EltWidth);
9471 EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);
9472
9473 // If promotion is required to make the type legal, do it here; promotion
9474 // of integers within LegalizeVectorOps is looking for types of the same
9475 // size but with a smaller number of larger elements, not the usual larger
9476 // size with the same number of larger elements.
9477 if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==
9479 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9480 StepVT = StepVecVT.getVectorElementType();
9481 }
9482
9483 // Zero out lanes with inactive elements, then find the highest remaining
9484 // value from the stepvector.
9485 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9486 SDValue StepVec = DAG.getStepVector(DL, StepVecVT);
9487 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9488 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9489 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9490}
9491
9493 bool IsNegative) const {
9494 SDLoc dl(N);
9495 EVT VT = N->getValueType(0);
9496 SDValue Op = N->getOperand(0);
9497
9498 // abs(x) -> smax(x,sub(0,x))
9499 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9501 SDValue Zero = DAG.getConstant(0, dl, VT);
9502 Op = DAG.getFreeze(Op);
9503 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9504 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9505 }
9506
9507 // abs(x) -> umin(x,sub(0,x))
9508 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9510 SDValue Zero = DAG.getConstant(0, dl, VT);
9511 Op = DAG.getFreeze(Op);
9512 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9513 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9514 }
9515
9516 // 0 - abs(x) -> smin(x, sub(0,x))
9517 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9519 SDValue Zero = DAG.getConstant(0, dl, VT);
9520 Op = DAG.getFreeze(Op);
9521 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9522 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9523 }
9524
9525 // Only expand vector types if we have the appropriate vector operations.
9526 if (VT.isVector() &&
9528 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9529 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9531 return SDValue();
9532
9533 Op = DAG.getFreeze(Op);
9534 SDValue Shift = DAG.getNode(
9535 ISD::SRA, dl, VT, Op,
9536 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9537 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9538
9539 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9540 if (!IsNegative)
9541 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9542
9543 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9544 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9545}
9546
9548 SDLoc dl(N);
9549 EVT VT = N->getValueType(0);
9550 SDValue LHS = DAG.getFreeze(N->getOperand(0));
9551 SDValue RHS = DAG.getFreeze(N->getOperand(1));
9552 bool IsSigned = N->getOpcode() == ISD::ABDS;
9553
9554 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9555 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9556 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9557 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9558 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9559 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9560 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9561 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9562 }
9563
9564 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9565 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9566 return DAG.getNode(ISD::OR, dl, VT,
9567 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9568 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9569
9570 // If the subtract doesn't overflow then just use abs(sub())
9571 // NOTE: don't use frozen operands for value tracking.
9572 bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9573 DAG.SignBitIsZero(N->getOperand(0));
9574
9575 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
9576 N->getOperand(1)))
9577 return DAG.getNode(ISD::ABS, dl, VT,
9578 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9579
9580 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
9581 N->getOperand(0)))
9582 return DAG.getNode(ISD::ABS, dl, VT,
9583 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9584
9585 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9587 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9588
9589 // Branchless expansion iff cmp result is allbits:
9590 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9591 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9592 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9593 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9594 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9595 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9596 }
9597
9598 // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9599 // flag if the (scalar) type is illegal as this is more likely to legalize
9600 // cleanly:
9601 // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9602 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9603 SDValue USubO =
9604 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9605 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9606 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9607 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9608 }
9609
9610 // FIXME: Should really try to split the vector in case it's legal on a
9611 // subvector.
9613 return DAG.UnrollVectorOp(N);
9614
9615 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9616 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9617 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9618 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9619}
9620
9622 SDLoc dl(N);
9623 EVT VT = N->getValueType(0);
9624 SDValue LHS = N->getOperand(0);
9625 SDValue RHS = N->getOperand(1);
9626
9627 unsigned Opc = N->getOpcode();
9628 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9629 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9630 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9631 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9632 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9633 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9634 assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9635 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9636 "Unknown AVG node");
9637
9638 // If the operands are already extended, we can add+shift.
9639 bool IsExt =
9640 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9641 DAG.ComputeNumSignBits(RHS) >= 2) ||
9642 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9644 if (IsExt) {
9645 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9646 if (!IsFloor)
9647 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9648 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9649 DAG.getShiftAmountConstant(1, VT, dl));
9650 }
9651
9652 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9653 if (VT.isScalarInteger()) {
9654 unsigned BW = VT.getScalarSizeInBits();
9655 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9656 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9657 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9658 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9659 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9660 if (!IsFloor)
9661 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9662 DAG.getConstant(1, dl, ExtVT));
9663 // Just use SRL as we will be truncating away the extended sign bits.
9664 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9665 DAG.getShiftAmountConstant(1, ExtVT, dl));
9666 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9667 }
9668 }
9669
9670 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9671 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9672 SDValue UAddWithOverflow =
9673 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9674
9675 SDValue Sum = UAddWithOverflow.getValue(0);
9676 SDValue Overflow = UAddWithOverflow.getValue(1);
9677
9678 // Right shift the sum by 1
9679 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
9680 DAG.getShiftAmountConstant(1, VT, dl));
9681
9682 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9683 SDValue OverflowShl = DAG.getNode(
9684 ISD::SHL, dl, VT, ZeroExtOverflow,
9685 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9686
9687 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9688 }
9689
9690 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9691 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9692 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9693 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9694 LHS = DAG.getFreeze(LHS);
9695 RHS = DAG.getFreeze(RHS);
9696 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9697 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9698 SDValue Shift =
9699 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9700 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9701}
9702
9704 SDLoc dl(N);
9705 EVT VT = N->getValueType(0);
9706 SDValue Op = N->getOperand(0);
9707
9708 if (!VT.isSimple())
9709 return SDValue();
9710
9711 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9712 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9713 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9714 default:
9715 return SDValue();
9716 case MVT::i16:
9717 // Use a rotate by 8. This can be further expanded if necessary.
9718 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9719 case MVT::i32:
9720 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9721 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9722 DAG.getConstant(0xFF00, dl, VT));
9723 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9724 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9725 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9726 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9727 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9728 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9729 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9730 case MVT::i64:
9731 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9732 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9733 DAG.getConstant(255ULL<<8, dl, VT));
9734 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9735 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9736 DAG.getConstant(255ULL<<16, dl, VT));
9737 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9738 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9739 DAG.getConstant(255ULL<<24, dl, VT));
9740 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9741 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9742 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9743 DAG.getConstant(255ULL<<24, dl, VT));
9744 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9745 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9746 DAG.getConstant(255ULL<<16, dl, VT));
9747 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9748 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9749 DAG.getConstant(255ULL<<8, dl, VT));
9750 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9751 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9752 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9753 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9754 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9755 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9756 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9757 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9758 }
9759}
9760
9762 SDLoc dl(N);
9763 EVT VT = N->getValueType(0);
9764 SDValue Op = N->getOperand(0);
9765 SDValue Mask = N->getOperand(1);
9766 SDValue EVL = N->getOperand(2);
9767
9768 if (!VT.isSimple())
9769 return SDValue();
9770
9771 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9772 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9773 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9774 default:
9775 return SDValue();
9776 case MVT::i16:
9777 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9778 Mask, EVL);
9779 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9780 Mask, EVL);
9781 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9782 case MVT::i32:
9783 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9784 Mask, EVL);
9785 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9786 Mask, EVL);
9787 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9788 Mask, EVL);
9789 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9790 Mask, EVL);
9791 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9792 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9793 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9794 Mask, EVL);
9795 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9796 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9797 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9798 case MVT::i64:
9799 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9800 Mask, EVL);
9801 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9802 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9803 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9804 Mask, EVL);
9805 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9806 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9807 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9808 Mask, EVL);
9809 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9810 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9811 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9812 Mask, EVL);
9813 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9814 Mask, EVL);
9815 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9816 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9817 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9818 Mask, EVL);
9819 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9820 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9821 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9822 Mask, EVL);
9823 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9824 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9825 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9826 Mask, EVL);
9827 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9828 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9829 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9830 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9831 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9832 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9833 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9834 }
9835}
9836
9838 SDLoc dl(N);
9839 EVT VT = N->getValueType(0);
9840 SDValue Op = N->getOperand(0);
9841 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9842 unsigned Sz = VT.getScalarSizeInBits();
9843
9844 SDValue Tmp, Tmp2, Tmp3;
9845
9846 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9847 // and finally the i1 pairs.
9848 // TODO: We can easily support i4/i2 legal types if any target ever does.
9849 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9850 // Create the masks - repeating the pattern every byte.
9851 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9852 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9853 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9854
9855 // BSWAP if the type is wider than a single byte.
9856 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9857
9858 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9859 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9860 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9861 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9862 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9863 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9864
9865 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9866 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9867 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9868 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9869 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9870 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9871
9872 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9873 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9874 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9875 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9876 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9877 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9878 return Tmp;
9879 }
9880
9881 Tmp = DAG.getConstant(0, dl, VT);
9882 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9883 if (I < J)
9884 Tmp2 =
9885 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9886 else
9887 Tmp2 =
9888 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9889
9890 APInt Shift = APInt::getOneBitSet(Sz, J);
9891 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9892 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9893 }
9894
9895 return Tmp;
9896}
9897
9899 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9900
9901 SDLoc dl(N);
9902 EVT VT = N->getValueType(0);
9903 SDValue Op = N->getOperand(0);
9904 SDValue Mask = N->getOperand(1);
9905 SDValue EVL = N->getOperand(2);
9906 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9907 unsigned Sz = VT.getScalarSizeInBits();
9908
9909 SDValue Tmp, Tmp2, Tmp3;
9910
9911 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9912 // and finally the i1 pairs.
9913 // TODO: We can easily support i4/i2 legal types if any target ever does.
9914 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9915 // Create the masks - repeating the pattern every byte.
9916 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9917 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9918 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9919
9920 // BSWAP if the type is wider than a single byte.
9921 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9922
9923 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9924 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9925 Mask, EVL);
9926 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9927 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9928 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9929 Mask, EVL);
9930 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9931 Mask, EVL);
9932 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9933
9934 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9935 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9936 Mask, EVL);
9937 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9938 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9939 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9940 Mask, EVL);
9941 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9942 Mask, EVL);
9943 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9944
9945 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9946 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9947 Mask, EVL);
9948 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9949 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9950 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9951 Mask, EVL);
9952 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9953 Mask, EVL);
9954 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9955 return Tmp;
9956 }
9957 return SDValue();
9958}
9959
9960std::pair<SDValue, SDValue>
9962 SelectionDAG &DAG) const {
9963 SDLoc SL(LD);
9964 SDValue Chain = LD->getChain();
9965 SDValue BasePTR = LD->getBasePtr();
9966 EVT SrcVT = LD->getMemoryVT();
9967 EVT DstVT = LD->getValueType(0);
9968 ISD::LoadExtType ExtType = LD->getExtensionType();
9969
9970 if (SrcVT.isScalableVector())
9971 report_fatal_error("Cannot scalarize scalable vector loads");
9972
9973 unsigned NumElem = SrcVT.getVectorNumElements();
9974
9975 EVT SrcEltVT = SrcVT.getScalarType();
9976 EVT DstEltVT = DstVT.getScalarType();
9977
9978 // A vector must always be stored in memory as-is, i.e. without any padding
9979 // between the elements, since various code depend on it, e.g. in the
9980 // handling of a bitcast of a vector type to int, which may be done with a
9981 // vector store followed by an integer load. A vector that does not have
9982 // elements that are byte-sized must therefore be stored as an integer
9983 // built out of the extracted vector elements.
9984 if (!SrcEltVT.isByteSized()) {
9985 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9986 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9987
9988 unsigned NumSrcBits = SrcVT.getSizeInBits();
9989 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9990
9991 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9992 SDValue SrcEltBitMask = DAG.getConstant(
9993 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9994
9995 // Load the whole vector and avoid masking off the top bits as it makes
9996 // the codegen worse.
9997 SDValue Load =
9998 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9999 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
10000 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10001
10003 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10004 unsigned ShiftIntoIdx =
10005 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10006 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10007 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10008 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10009 SDValue Elt =
10010 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10011 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10012
10013 if (ExtType != ISD::NON_EXTLOAD) {
10014 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10015 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10016 }
10017
10018 Vals.push_back(Scalar);
10019 }
10020
10021 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10022 return std::make_pair(Value, Load.getValue(1));
10023 }
10024
10025 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10026 assert(SrcEltVT.isByteSized());
10027
10029 SmallVector<SDValue, 8> LoadChains;
10030
10031 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10032 SDValue ScalarLoad =
10033 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
10034 LD->getPointerInfo().getWithOffset(Idx * Stride),
10035 SrcEltVT, LD->getOriginalAlign(),
10036 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10037
10038 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10039
10040 Vals.push_back(ScalarLoad.getValue(0));
10041 LoadChains.push_back(ScalarLoad.getValue(1));
10042 }
10043
10044 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10045 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10046
10047 return std::make_pair(Value, NewChain);
10048}
10049
10051 SelectionDAG &DAG) const {
10052 SDLoc SL(ST);
10053
10054 SDValue Chain = ST->getChain();
10055 SDValue BasePtr = ST->getBasePtr();
10056 SDValue Value = ST->getValue();
10057 EVT StVT = ST->getMemoryVT();
10058
10059 if (StVT.isScalableVector())
10060 report_fatal_error("Cannot scalarize scalable vector stores");
10061
10062 // The type of the data we want to save
10063 EVT RegVT = Value.getValueType();
10064 EVT RegSclVT = RegVT.getScalarType();
10065
10066 // The type of data as saved in memory.
10067 EVT MemSclVT = StVT.getScalarType();
10068
10069 unsigned NumElem = StVT.getVectorNumElements();
10070
10071 // A vector must always be stored in memory as-is, i.e. without any padding
10072 // between the elements, since various code depend on it, e.g. in the
10073 // handling of a bitcast of a vector type to int, which may be done with a
10074 // vector store followed by an integer load. A vector that does not have
10075 // elements that are byte-sized must therefore be stored as an integer
10076 // built out of the extracted vector elements.
10077 if (!MemSclVT.isByteSized()) {
10078 unsigned NumBits = StVT.getSizeInBits();
10079 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10080
10081 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10082
10083 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10084 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10085 DAG.getVectorIdxConstant(Idx, SL));
10086 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10087 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10088 unsigned ShiftIntoIdx =
10089 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10090 SDValue ShiftAmount =
10091 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10092 SDValue ShiftedElt =
10093 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10094 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10095 }
10096
10097 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10098 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10099 ST->getAAInfo());
10100 }
10101
10102 // Store Stride in bytes
10103 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10104 assert(Stride && "Zero stride!");
10105 // Extract each of the elements from the original vector and save them into
10106 // memory individually.
10108 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10109 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
10110 DAG.getVectorIdxConstant(Idx, SL));
10111
10112 SDValue Ptr =
10113 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10114
10115 // This scalar TruncStore may be illegal, but we legalize it later.
10116 SDValue Store = DAG.getTruncStore(
10117 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10118 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
10119 ST->getAAInfo());
10120
10121 Stores.push_back(Store);
10122 }
10123
10124 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10125}
10126
10127std::pair<SDValue, SDValue>
10129 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10130 "unaligned indexed loads not implemented!");
10131 SDValue Chain = LD->getChain();
10132 SDValue Ptr = LD->getBasePtr();
10133 EVT VT = LD->getValueType(0);
10134 EVT LoadedVT = LD->getMemoryVT();
10135 SDLoc dl(LD);
10136 auto &MF = DAG.getMachineFunction();
10137
10138 if (VT.isFloatingPoint() || VT.isVector()) {
10139 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10140 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10141 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10142 LoadedVT.isVector()) {
10143 // Scalarize the load and let the individual components be handled.
10144 return scalarizeVectorLoad(LD, DAG);
10145 }
10146
10147 // Expand to a (misaligned) integer load of the same size,
10148 // then bitconvert to floating point or vector.
10149 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10150 LD->getMemOperand());
10151 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10152 if (LoadedVT != VT)
10153 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10154 ISD::ANY_EXTEND, dl, VT, Result);
10155
10156 return std::make_pair(Result, newLoad.getValue(1));
10157 }
10158
10159 // Copy the value to a (aligned) stack slot using (unaligned) integer
10160 // loads and stores, then do a (aligned) load from the stack slot.
10161 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10162 unsigned LoadedBytes = LoadedVT.getStoreSize();
10163 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10164 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10165
10166 // Make sure the stack slot is also aligned for the register type.
10167 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10168 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10170 SDValue StackPtr = StackBase;
10171 unsigned Offset = 0;
10172
10173 EVT PtrVT = Ptr.getValueType();
10174 EVT StackPtrVT = StackPtr.getValueType();
10175
10176 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10177 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10178
10179 // Do all but one copies using the full register width.
10180 for (unsigned i = 1; i < NumRegs; i++) {
10181 // Load one integer register's worth from the original location.
10182 SDValue Load = DAG.getLoad(
10183 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10184 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10185 LD->getAAInfo());
10186 // Follow the load with a store to the stack slot. Remember the store.
10187 Stores.push_back(DAG.getStore(
10188 Load.getValue(1), dl, Load, StackPtr,
10189 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10190 // Increment the pointers.
10191 Offset += RegBytes;
10192
10193 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10194 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10195 }
10196
10197 // The last copy may be partial. Do an extending load.
10198 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10199 8 * (LoadedBytes - Offset));
10200 SDValue Load =
10201 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10202 LD->getPointerInfo().getWithOffset(Offset), MemVT,
10203 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10204 LD->getAAInfo());
10205 // Follow the load with a store to the stack slot. Remember the store.
10206 // On big-endian machines this requires a truncating store to ensure
10207 // that the bits end up in the right place.
10208 Stores.push_back(DAG.getTruncStore(
10209 Load.getValue(1), dl, Load, StackPtr,
10210 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10211
10212 // The order of the stores doesn't matter - say it with a TokenFactor.
10213 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10214
10215 // Finally, perform the original load only redirected to the stack slot.
10216 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10217 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10218 LoadedVT);
10219
10220 // Callers expect a MERGE_VALUES node.
10221 return std::make_pair(Load, TF);
10222 }
10223
10224 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10225 "Unaligned load of unsupported type.");
10226
10227 // Compute the new VT that is half the size of the old one. This is an
10228 // integer MVT.
10229 unsigned NumBits = LoadedVT.getSizeInBits();
10230 EVT NewLoadedVT;
10231 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10232 NumBits >>= 1;
10233
10234 Align Alignment = LD->getOriginalAlign();
10235 unsigned IncrementSize = NumBits / 8;
10236 ISD::LoadExtType HiExtType = LD->getExtensionType();
10237
10238 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10239 if (HiExtType == ISD::NON_EXTLOAD)
10240 HiExtType = ISD::ZEXTLOAD;
10241
10242 // Load the value in two parts
10243 SDValue Lo, Hi;
10244 if (DAG.getDataLayout().isLittleEndian()) {
10245 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10246 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10247 LD->getAAInfo());
10248
10249 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10250 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10251 LD->getPointerInfo().getWithOffset(IncrementSize),
10252 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10253 LD->getAAInfo());
10254 } else {
10255 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10256 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10257 LD->getAAInfo());
10258
10259 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10260 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10261 LD->getPointerInfo().getWithOffset(IncrementSize),
10262 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10263 LD->getAAInfo());
10264 }
10265
10266 // aggregate the two parts
10267 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10268 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10269 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10270
10271 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10272 Hi.getValue(1));
10273
10274 return std::make_pair(Result, TF);
10275}
10276
10278 SelectionDAG &DAG) const {
10279 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10280 "unaligned indexed stores not implemented!");
10281 SDValue Chain = ST->getChain();
10282 SDValue Ptr = ST->getBasePtr();
10283 SDValue Val = ST->getValue();
10284 EVT VT = Val.getValueType();
10285 Align Alignment = ST->getOriginalAlign();
10286 auto &MF = DAG.getMachineFunction();
10287 EVT StoreMemVT = ST->getMemoryVT();
10288
10289 SDLoc dl(ST);
10290 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10291 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10292 if (isTypeLegal(intVT)) {
10293 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10294 StoreMemVT.isVector()) {
10295 // Scalarize the store and let the individual components be handled.
10296 SDValue Result = scalarizeVectorStore(ST, DAG);
10297 return Result;
10298 }
10299 // Expand to a bitconvert of the value to the integer type of the
10300 // same size, then a (misaligned) int store.
10301 // FIXME: Does not handle truncating floating point stores!
10302 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10303 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10304 Alignment, ST->getMemOperand()->getFlags());
10305 return Result;
10306 }
10307 // Do a (aligned) store to a stack slot, then copy from the stack slot
10308 // to the final destination using (unaligned) integer loads and stores.
10309 MVT RegVT = getRegisterType(
10310 *DAG.getContext(),
10311 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10312 EVT PtrVT = Ptr.getValueType();
10313 unsigned StoredBytes = StoreMemVT.getStoreSize();
10314 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10315 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10316
10317 // Make sure the stack slot is also aligned for the register type.
10318 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10319 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10320
10321 // Perform the original store, only redirected to the stack slot.
10322 SDValue Store = DAG.getTruncStore(
10323 Chain, dl, Val, StackPtr,
10324 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10325
10326 EVT StackPtrVT = StackPtr.getValueType();
10327
10328 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10329 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10331 unsigned Offset = 0;
10332
10333 // Do all but one copies using the full register width.
10334 for (unsigned i = 1; i < NumRegs; i++) {
10335 // Load one integer register's worth from the stack slot.
10336 SDValue Load = DAG.getLoad(
10337 RegVT, dl, Store, StackPtr,
10338 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10339 // Store it to the final location. Remember the store.
10340 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10341 ST->getPointerInfo().getWithOffset(Offset),
10342 ST->getOriginalAlign(),
10343 ST->getMemOperand()->getFlags()));
10344 // Increment the pointers.
10345 Offset += RegBytes;
10346 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10347 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10348 }
10349
10350 // The last store may be partial. Do a truncating store. On big-endian
10351 // machines this requires an extending load from the stack slot to ensure
10352 // that the bits are in the right place.
10353 EVT LoadMemVT =
10354 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10355
10356 // Load from the stack slot.
10357 SDValue Load = DAG.getExtLoad(
10358 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10359 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10360
10361 Stores.push_back(
10362 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10363 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10364 ST->getOriginalAlign(),
10365 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10366 // The order of the stores doesn't matter - say it with a TokenFactor.
10367 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10368 return Result;
10369 }
10370
10371 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10372 "Unaligned store of unknown type.");
10373 // Get the half-size VT
10374 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10375 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10376 unsigned IncrementSize = NumBits / 8;
10377
10378 // Divide the stored value in two parts.
10379 SDValue ShiftAmount =
10380 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10381 SDValue Lo = Val;
10382 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10383 // fold and not use the upper bits. A smaller constant may be easier to
10384 // materialize.
10385 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10386 Lo = DAG.getNode(
10387 ISD::AND, dl, VT, Lo,
10388 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10389 VT));
10390 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10391
10392 // Store the two parts
10393 SDValue Store1, Store2;
10394 Store1 = DAG.getTruncStore(Chain, dl,
10395 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10396 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10397 ST->getMemOperand()->getFlags());
10398
10399 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10400 Store2 = DAG.getTruncStore(
10401 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10402 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10403 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10404
10405 SDValue Result =
10406 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10407 return Result;
10408}
10409
10410SDValue
10412 const SDLoc &DL, EVT DataVT,
10413 SelectionDAG &DAG,
10414 bool IsCompressedMemory) const {
10415 SDValue Increment;
10416 EVT AddrVT = Addr.getValueType();
10417 EVT MaskVT = Mask.getValueType();
10418 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10419 "Incompatible types of Data and Mask");
10420 if (IsCompressedMemory) {
10421 if (DataVT.isScalableVector())
10423 "Cannot currently handle compressed memory with scalable vectors");
10424 // Incrementing the pointer according to number of '1's in the mask.
10425 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10426 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10427 if (MaskIntVT.getSizeInBits() < 32) {
10428 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10429 MaskIntVT = MVT::i32;
10430 }
10431
10432 // Count '1's with POPCNT.
10433 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10434 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10435 // Scale is an element size in bytes.
10436 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10437 AddrVT);
10438 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10439 } else if (DataVT.isScalableVector()) {
10440 Increment = DAG.getVScale(DL, AddrVT,
10441 APInt(AddrVT.getFixedSizeInBits(),
10442 DataVT.getStoreSize().getKnownMinValue()));
10443 } else
10444 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10445
10446 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10447}
10448
10450 EVT VecVT, const SDLoc &dl,
10451 ElementCount SubEC) {
10452 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10453 "Cannot index a scalable vector within a fixed-width vector");
10454
10455 unsigned NElts = VecVT.getVectorMinNumElements();
10456 unsigned NumSubElts = SubEC.getKnownMinValue();
10457 EVT IdxVT = Idx.getValueType();
10458
10459 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10460 // If this is a constant index and we know the value plus the number of the
10461 // elements in the subvector minus one is less than the minimum number of
10462 // elements then it's safe to return Idx.
10463 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10464 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10465 return Idx;
10466 SDValue VS =
10467 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10468 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10469 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10470 DAG.getConstant(NumSubElts, dl, IdxVT));
10471 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10472 }
10473 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10474 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10475 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10476 DAG.getConstant(Imm, dl, IdxVT));
10477 }
10478 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10479 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10480 DAG.getConstant(MaxIndex, dl, IdxVT));
10481}
10482
10484 SDValue VecPtr, EVT VecVT,
10485 SDValue Index) const {
10486 return getVectorSubVecPointer(
10487 DAG, VecPtr, VecVT,
10489 Index);
10490}
10491
10493 SDValue VecPtr, EVT VecVT,
10494 EVT SubVecVT,
10495 SDValue Index) const {
10496 SDLoc dl(Index);
10497 // Make sure the index type is big enough to compute in.
10498 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10499
10500 EVT EltVT = VecVT.getVectorElementType();
10501
10502 // Calculate the element offset and add it to the pointer.
10503 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10504 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10505 "Converting bits to bytes lost precision");
10506 assert(SubVecVT.getVectorElementType() == EltVT &&
10507 "Sub-vector must be a vector with matching element type");
10508 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10509 SubVecVT.getVectorElementCount());
10510
10511 EVT IdxVT = Index.getValueType();
10512 if (SubVecVT.isScalableVector())
10513 Index =
10514 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10515 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10516
10517 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10518 DAG.getConstant(EltSize, dl, IdxVT));
10519 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10520}
10521
10522//===----------------------------------------------------------------------===//
10523// Implementation of Emulated TLS Model
10524//===----------------------------------------------------------------------===//
10525
10527 SelectionDAG &DAG) const {
10528 // Access to address of TLS varialbe xyz is lowered to a function call:
10529 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10530 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10531 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10532 SDLoc dl(GA);
10533
10534 ArgListTy Args;
10535 ArgListEntry Entry;
10536 const GlobalValue *GV =
10537 cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10538 SmallString<32> NameString("__emutls_v.");
10539 NameString += GV->getName();
10540 StringRef EmuTlsVarName(NameString);
10541 const GlobalVariable *EmuTlsVar =
10542 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10543 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10544 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10545 Entry.Ty = VoidPtrType;
10546 Args.push_back(Entry);
10547
10548 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10549
10551 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10552 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10553 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10554
10555 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10556 // At last for X86 targets, maybe good for other targets too?
10558 MFI.setAdjustsStack(true); // Is this only for X86 target?
10559 MFI.setHasCalls(true);
10560
10561 assert((GA->getOffset() == 0) &&
10562 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10563 return CallResult.first;
10564}
10565
10567 SelectionDAG &DAG) const {
10568 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10569 if (!isCtlzFast())
10570 return SDValue();
10571 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10572 SDLoc dl(Op);
10573 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10574 EVT VT = Op.getOperand(0).getValueType();
10575 SDValue Zext = Op.getOperand(0);
10576 if (VT.bitsLT(MVT::i32)) {
10577 VT = MVT::i32;
10578 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10579 }
10580 unsigned Log2b = Log2_32(VT.getSizeInBits());
10581 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10582 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10583 DAG.getConstant(Log2b, dl, MVT::i32));
10584 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10585 }
10586 return SDValue();
10587}
10588
10590 SDValue Op0 = Node->getOperand(0);
10591 SDValue Op1 = Node->getOperand(1);
10592 EVT VT = Op0.getValueType();
10593 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10594 unsigned Opcode = Node->getOpcode();
10595 SDLoc DL(Node);
10596
10597 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10598 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10600 Op0 = DAG.getFreeze(Op0);
10601 SDValue Zero = DAG.getConstant(0, DL, VT);
10602 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10603 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10604 }
10605
10606 // umin(x,y) -> sub(x,usubsat(x,y))
10607 // TODO: Missing freeze(Op0)?
10608 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10610 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10611 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10612 }
10613
10614 // umax(x,y) -> add(x,usubsat(y,x))
10615 // TODO: Missing freeze(Op0)?
10616 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10618 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10619 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10620 }
10621
10622 // FIXME: Should really try to split the vector in case it's legal on a
10623 // subvector.
10625 return DAG.UnrollVectorOp(Node);
10626
10627 // Attempt to find an existing SETCC node that we can reuse.
10628 // TODO: Do we need a generic doesSETCCNodeExist?
10629 // TODO: Missing freeze(Op0)/freeze(Op1)?
10630 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10631 ISD::CondCode PrefCommuteCC,
10632 ISD::CondCode AltCommuteCC) {
10633 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10634 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10635 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10636 {Op0, Op1, DAG.getCondCode(CC)})) {
10637 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10638 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10639 }
10640 }
10641 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10642 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10643 {Op0, Op1, DAG.getCondCode(CC)})) {
10644 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10645 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10646 }
10647 }
10648 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10649 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10650 };
10651
10652 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10653 // -> Y = (A < B) ? B : A
10654 // -> Y = (A >= B) ? A : B
10655 // -> Y = (A <= B) ? B : A
10656 switch (Opcode) {
10657 case ISD::SMAX:
10658 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10659 case ISD::SMIN:
10660 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10661 case ISD::UMAX:
10662 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10663 case ISD::UMIN:
10664 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10665 }
10666
10667 llvm_unreachable("How did we get here?");
10668}
10669
10671 unsigned Opcode = Node->getOpcode();
10672 SDValue LHS = Node->getOperand(0);
10673 SDValue RHS = Node->getOperand(1);
10674 EVT VT = LHS.getValueType();
10675 SDLoc dl(Node);
10676
10677 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10678 assert(VT.isInteger() && "Expected operands to be integers");
10679
10680 // usub.sat(a, b) -> umax(a, b) - b
10681 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10682 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10683 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10684 }
10685
10686 // uadd.sat(a, b) -> umin(a, ~b) + b
10687 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10688 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10689 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10690 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10691 }
10692
10693 unsigned OverflowOp;
10694 switch (Opcode) {
10695 case ISD::SADDSAT:
10696 OverflowOp = ISD::SADDO;
10697 break;
10698 case ISD::UADDSAT:
10699 OverflowOp = ISD::UADDO;
10700 break;
10701 case ISD::SSUBSAT:
10702 OverflowOp = ISD::SSUBO;
10703 break;
10704 case ISD::USUBSAT:
10705 OverflowOp = ISD::USUBO;
10706 break;
10707 default:
10708 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10709 "addition or subtraction node.");
10710 }
10711
10712 // FIXME: Should really try to split the vector in case it's legal on a
10713 // subvector.
10715 return DAG.UnrollVectorOp(Node);
10716
10717 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10718 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10719 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10720 SDValue SumDiff = Result.getValue(0);
10721 SDValue Overflow = Result.getValue(1);
10722 SDValue Zero = DAG.getConstant(0, dl, VT);
10723 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10724
10725 if (Opcode == ISD::UADDSAT) {
10727 // (LHS + RHS) | OverflowMask
10728 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10729 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10730 }
10731 // Overflow ? 0xffff.... : (LHS + RHS)
10732 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10733 }
10734
10735 if (Opcode == ISD::USUBSAT) {
10737 // (LHS - RHS) & ~OverflowMask
10738 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10739 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10740 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10741 }
10742 // Overflow ? 0 : (LHS - RHS)
10743 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10744 }
10745
10746 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10749
10750 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10751 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10752
10753 // If either of the operand signs are known, then they are guaranteed to
10754 // only saturate in one direction. If non-negative they will saturate
10755 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10756 //
10757 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10758 // sign of 'y' has to be flipped.
10759
10760 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10761 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10762 : KnownRHS.isNegative();
10763 if (LHSIsNonNegative || RHSIsNonNegative) {
10764 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10765 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10766 }
10767
10768 bool LHSIsNegative = KnownLHS.isNegative();
10769 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10770 : KnownRHS.isNonNegative();
10771 if (LHSIsNegative || RHSIsNegative) {
10772 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10773 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10774 }
10775 }
10776
10777 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10779 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10780 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10781 DAG.getConstant(BitWidth - 1, dl, VT));
10782 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10783 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10784}
10785
10787 unsigned Opcode = Node->getOpcode();
10788 SDValue LHS = Node->getOperand(0);
10789 SDValue RHS = Node->getOperand(1);
10790 EVT VT = LHS.getValueType();
10791 EVT ResVT = Node->getValueType(0);
10792 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10793 SDLoc dl(Node);
10794
10795 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10796 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10797 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10798 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10799
10800 // We can't perform arithmetic on i1 values. Extending them would
10801 // probably result in worse codegen, so let's just use two selects instead.
10802 // Some targets are also just better off using selects rather than subtraction
10803 // because one of the conditions can be merged with one of the selects.
10804 // And finally, if we don't know the contents of high bits of a boolean value
10805 // we can't perform any arithmetic either.
10806 if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 ||
10808 SDValue SelectZeroOrOne =
10809 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10810 DAG.getConstant(0, dl, ResVT));
10811 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10812 SelectZeroOrOne);
10813 }
10814
10816 std::swap(IsGT, IsLT);
10817 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10818 ResVT);
10819}
10820
10822 unsigned Opcode = Node->getOpcode();
10823 bool IsSigned = Opcode == ISD::SSHLSAT;
10824 SDValue LHS = Node->getOperand(0);
10825 SDValue RHS = Node->getOperand(1);
10826 EVT VT = LHS.getValueType();
10827 SDLoc dl(Node);
10828
10829 assert((Node->getOpcode() == ISD::SSHLSAT ||
10830 Node->getOpcode() == ISD::USHLSAT) &&
10831 "Expected a SHLSAT opcode");
10832 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10833 assert(VT.isInteger() && "Expected operands to be integers");
10834
10836 return DAG.UnrollVectorOp(Node);
10837
10838 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10839
10840 unsigned BW = VT.getScalarSizeInBits();
10841 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10842 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10843 SDValue Orig =
10844 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10845
10846 SDValue SatVal;
10847 if (IsSigned) {
10848 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10849 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10850 SDValue Cond =
10851 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10852 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10853 } else {
10854 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10855 }
10856 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10857 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10858}
10859
10861 bool Signed, EVT WideVT,
10862 const SDValue LL, const SDValue LH,
10863 const SDValue RL, const SDValue RH,
10864 SDValue &Lo, SDValue &Hi) const {
10865 // We can fall back to a libcall with an illegal type for the MUL if we
10866 // have a libcall big enough.
10867 // Also, we can fall back to a division in some cases, but that's a big
10868 // performance hit in the general case.
10869 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10870 if (WideVT == MVT::i16)
10871 LC = RTLIB::MUL_I16;
10872 else if (WideVT == MVT::i32)
10873 LC = RTLIB::MUL_I32;
10874 else if (WideVT == MVT::i64)
10875 LC = RTLIB::MUL_I64;
10876 else if (WideVT == MVT::i128)
10877 LC = RTLIB::MUL_I128;
10878
10879 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10880 // We'll expand the multiplication by brute force because we have no other
10881 // options. This is a trivially-generalized version of the code from
10882 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10883 // 4.3.1).
10884 EVT VT = LL.getValueType();
10885 unsigned Bits = VT.getSizeInBits();
10886 unsigned HalfBits = Bits >> 1;
10887 SDValue Mask =
10888 DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10889 SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10890 SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10891
10892 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10893 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10894
10895 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10896 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10897 SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10898 SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10899
10900 SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10901 DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10902 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10903 SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10904
10905 SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10906 DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10907 SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10908
10909 SDValue W =
10910 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10911 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10912 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10913 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10914
10915 Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10916 DAG.getNode(ISD::ADD, dl, VT,
10917 DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10918 DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10919 } else {
10920 // Attempt a libcall.
10921 SDValue Ret;
10923 CallOptions.setIsSigned(Signed);
10924 CallOptions.setIsPostTypeLegalization(true);
10925 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10926 // Halves of WideVT are packed into registers in different order
10927 // depending on platform endianness. This is usually handled by
10928 // the C calling convention, but we can't defer to it in
10929 // the legalizer.
10930 SDValue Args[] = {LL, LH, RL, RH};
10931 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10932 } else {
10933 SDValue Args[] = {LH, LL, RH, RL};
10934 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10935 }
10936 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10937 "Ret value is a collection of constituent nodes holding result.");
10938 if (DAG.getDataLayout().isLittleEndian()) {
10939 // Same as above.
10940 Lo = Ret.getOperand(0);
10941 Hi = Ret.getOperand(1);
10942 } else {
10943 Lo = Ret.getOperand(1);
10944 Hi = Ret.getOperand(0);
10945 }
10946 }
10947}
10948
10950 bool Signed, const SDValue LHS,
10951 const SDValue RHS, SDValue &Lo,
10952 SDValue &Hi) const {
10953 EVT VT = LHS.getValueType();
10954 assert(RHS.getValueType() == VT && "Mismatching operand types");
10955
10956 SDValue HiLHS;
10957 SDValue HiRHS;
10958 if (Signed) {
10959 // The high part is obtained by SRA'ing all but one of the bits of low
10960 // part.
10961 unsigned LoSize = VT.getFixedSizeInBits();
10962 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
10963 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
10964 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
10965 } else {
10966 HiLHS = DAG.getConstant(0, dl, VT);
10967 HiRHS = DAG.getConstant(0, dl, VT);
10968 }
10969 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10970 forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10971}
10972
10973SDValue
10975 assert((Node->getOpcode() == ISD::SMULFIX ||
10976 Node->getOpcode() == ISD::UMULFIX ||
10977 Node->getOpcode() == ISD::SMULFIXSAT ||
10978 Node->getOpcode() == ISD::UMULFIXSAT) &&
10979 "Expected a fixed point multiplication opcode");
10980
10981 SDLoc dl(Node);
10982 SDValue LHS = Node->getOperand(0);
10983 SDValue RHS = Node->getOperand(1);
10984 EVT VT = LHS.getValueType();
10985 unsigned Scale = Node->getConstantOperandVal(2);
10986 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10987 Node->getOpcode() == ISD::UMULFIXSAT);
10988 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10989 Node->getOpcode() == ISD::SMULFIXSAT);
10990 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10991 unsigned VTSize = VT.getScalarSizeInBits();
10992
10993 if (!Scale) {
10994 // [us]mul.fix(a, b, 0) -> mul(a, b)
10995 if (!Saturating) {
10997 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10998 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10999 SDValue Result =
11000 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11001 SDValue Product = Result.getValue(0);
11002 SDValue Overflow = Result.getValue(1);
11003 SDValue Zero = DAG.getConstant(0, dl, VT);
11004
11005 APInt MinVal = APInt::getSignedMinValue(VTSize);
11006 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11007 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11008 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11009 // Xor the inputs, if resulting sign bit is 0 the product will be
11010 // positive, else negative.
11011 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11012 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11013 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11014 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11015 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11016 SDValue Result =
11017 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11018 SDValue Product = Result.getValue(0);
11019 SDValue Overflow = Result.getValue(1);
11020
11021 APInt MaxVal = APInt::getMaxValue(VTSize);
11022 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11023 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11024 }
11025 }
11026
11027 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11028 "Expected scale to be less than the number of bits if signed or at "
11029 "most the number of bits if unsigned.");
11030 assert(LHS.getValueType() == RHS.getValueType() &&
11031 "Expected both operands to be the same type");
11032
11033 // Get the upper and lower bits of the result.
11034 SDValue Lo, Hi;
11035 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11036 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11037 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11038 if (VT.isVector())
11039 WideVT =
11041 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11042 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11043 Lo = Result.getValue(0);
11044 Hi = Result.getValue(1);
11045 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11046 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11047 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11048 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11049 // Try for a multiplication using a wider type.
11050 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11051 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11052 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11053 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11054 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11055 SDValue Shifted =
11056 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11057 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11058 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11059 } else if (VT.isVector()) {
11060 return SDValue();
11061 } else {
11062 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11063 }
11064
11065 if (Scale == VTSize)
11066 // Result is just the top half since we'd be shifting by the width of the
11067 // operand. Overflow impossible so this works for both UMULFIX and
11068 // UMULFIXSAT.
11069 return Hi;
11070
11071 // The result will need to be shifted right by the scale since both operands
11072 // are scaled. The result is given to us in 2 halves, so we only want part of
11073 // both in the result.
11074 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11075 DAG.getShiftAmountConstant(Scale, VT, dl));
11076 if (!Saturating)
11077 return Result;
11078
11079 if (!Signed) {
11080 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11081 // widened multiplication) aren't all zeroes.
11082
11083 // Saturate to max if ((Hi >> Scale) != 0),
11084 // which is the same as if (Hi > ((1 << Scale) - 1))
11085 APInt MaxVal = APInt::getMaxValue(VTSize);
11086 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11087 dl, VT);
11088 Result = DAG.getSelectCC(dl, Hi, LowMask,
11089 DAG.getConstant(MaxVal, dl, VT), Result,
11090 ISD::SETUGT);
11091
11092 return Result;
11093 }
11094
11095 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11096 // widened multiplication) aren't all ones or all zeroes.
11097
11098 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11099 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11100
11101 if (Scale == 0) {
11102 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11103 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11104 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11105 // Saturated to SatMin if wide product is negative, and SatMax if wide
11106 // product is positive ...
11107 SDValue Zero = DAG.getConstant(0, dl, VT);
11108 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11109 ISD::SETLT);
11110 // ... but only if we overflowed.
11111 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11112 }
11113
11114 // We handled Scale==0 above so all the bits to examine is in Hi.
11115
11116 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11117 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11118 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11119 dl, VT);
11120 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11121 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11122 // which is the same as if (HI < (-1 << (Scale - 1))
11123 SDValue HighMask =
11124 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11125 dl, VT);
11126 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11127 return Result;
11128}
11129
11130SDValue
11132 SDValue LHS, SDValue RHS,
11133 unsigned Scale, SelectionDAG &DAG) const {
11134 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11135 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11136 "Expected a fixed point division opcode");
11137
11138 EVT VT = LHS.getValueType();
11139 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11140 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11141 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11142
11143 // If there is enough room in the type to upscale the LHS or downscale the
11144 // RHS before the division, we can perform it in this type without having to
11145 // resize. For signed operations, the LHS headroom is the number of
11146 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11147 // The headroom for the RHS is the number of trailing zeroes.
11148 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11150 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11151
11152 // For signed saturating operations, we need to be able to detect true integer
11153 // division overflow; that is, when you have MIN / -EPS. However, this
11154 // is undefined behavior and if we emit divisions that could take such
11155 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11156 // example).
11157 // Avoid this by requiring an extra bit so that we never get this case.
11158 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11159 // signed saturating division, we need to emit a whopping 32-bit division.
11160 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11161 return SDValue();
11162
11163 unsigned LHSShift = std::min(LHSLead, Scale);
11164 unsigned RHSShift = Scale - LHSShift;
11165
11166 // At this point, we know that if we shift the LHS up by LHSShift and the
11167 // RHS down by RHSShift, we can emit a regular division with a final scaling
11168 // factor of Scale.
11169
11170 if (LHSShift)
11171 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11172 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11173 if (RHSShift)
11174 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11175 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11176
11177 SDValue Quot;
11178 if (Signed) {
11179 // For signed operations, if the resulting quotient is negative and the
11180 // remainder is nonzero, subtract 1 from the quotient to round towards
11181 // negative infinity.
11182 SDValue Rem;
11183 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11184 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11185 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11186 if (isTypeLegal(VT) &&
11188 Quot = DAG.getNode(ISD::SDIVREM, dl,
11189 DAG.getVTList(VT, VT),
11190 LHS, RHS);
11191 Rem = Quot.getValue(1);
11192 Quot = Quot.getValue(0);
11193 } else {
11194 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11195 LHS, RHS);
11196 Rem = DAG.getNode(ISD::SREM, dl, VT,
11197 LHS, RHS);
11198 }
11199 SDValue Zero = DAG.getConstant(0, dl, VT);
11200 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11201 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11202 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11203 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11204 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11205 DAG.getConstant(1, dl, VT));
11206 Quot = DAG.getSelect(dl, VT,
11207 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11208 Sub1, Quot);
11209 } else
11210 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11211 LHS, RHS);
11212
11213 return Quot;
11214}
11215
11217 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11218 SDLoc dl(Node);
11219 SDValue LHS = Node->getOperand(0);
11220 SDValue RHS = Node->getOperand(1);
11221 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11222
11223 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11224 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11225 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11226 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11227 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11228 { LHS, RHS, CarryIn });
11229 Result = SDValue(NodeCarry.getNode(), 0);
11230 Overflow = SDValue(NodeCarry.getNode(), 1);
11231 return;
11232 }
11233
11234 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11235 LHS.getValueType(), LHS, RHS);
11236
11237 EVT ResultType = Node->getValueType(1);
11238 EVT SetCCType = getSetCCResultType(
11239 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11240 SDValue SetCC;
11241 if (IsAdd && isOneConstant(RHS)) {
11242 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11243 // the live range of X. We assume comparing with 0 is cheap.
11244 // The general case (X + C) < C is not necessarily beneficial. Although we
11245 // reduce the live range of X, we may introduce the materialization of
11246 // constant C.
11247 SetCC =
11248 DAG.getSetCC(dl, SetCCType, Result,
11249 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11250 } else if (IsAdd && isAllOnesConstant(RHS)) {
11251 // Special case: uaddo X, -1 overflows if X != 0.
11252 SetCC =
11253 DAG.getSetCC(dl, SetCCType, LHS,
11254 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11255 } else {
11257 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11258 }
11259 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11260}
11261
11263 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11264 SDLoc dl(Node);
11265 SDValue LHS = Node->getOperand(0);
11266 SDValue RHS = Node->getOperand(1);
11267 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11268
11269 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11270 LHS.getValueType(), LHS, RHS);
11271
11272 EVT ResultType = Node->getValueType(1);
11273 EVT OType = getSetCCResultType(
11274 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11275
11276 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11277 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11278 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11279 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11280 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11281 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11282 return;
11283 }
11284
11285 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11286
11287 // For an addition, the result should be less than one of the operands (LHS)
11288 // if and only if the other operand (RHS) is negative, otherwise there will
11289 // be overflow.
11290 // For a subtraction, the result should be less than one of the operands
11291 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11292 // otherwise there will be overflow.
11293 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11294 SDValue ConditionRHS =
11295 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11296
11297 Overflow = DAG.getBoolExtOrTrunc(
11298 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11299 ResultType, ResultType);
11300}
11301
11303 SDValue &Overflow, SelectionDAG &DAG) const {
11304 SDLoc dl(Node);
11305 EVT VT = Node->getValueType(0);
11306 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11307 SDValue LHS = Node->getOperand(0);
11308 SDValue RHS = Node->getOperand(1);
11309 bool isSigned = Node->getOpcode() == ISD::SMULO;
11310
11311 // For power-of-two multiplications we can use a simpler shift expansion.
11312 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11313 const APInt &C = RHSC->getAPIntValue();
11314 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11315 if (C.isPowerOf2()) {
11316 // smulo(x, signed_min) is same as umulo(x, signed_min).
11317 bool UseArithShift = isSigned && !C.isMinSignedValue();
11318 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11319 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11320 Overflow = DAG.getSetCC(dl, SetCCVT,
11321 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11322 dl, VT, Result, ShiftAmt),
11323 LHS, ISD::SETNE);
11324 return true;
11325 }
11326 }
11327
11328 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11329 if (VT.isVector())
11330 WideVT =
11332
11333 SDValue BottomHalf;
11334 SDValue TopHalf;
11335 static const unsigned Ops[2][3] =
11338 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11339 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11340 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11341 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11342 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11343 RHS);
11344 TopHalf = BottomHalf.getValue(1);
11345 } else if (isTypeLegal(WideVT)) {
11346 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11347 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11348 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11349 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11350 SDValue ShiftAmt =
11351 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11352 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11353 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11354 } else {
11355 if (VT.isVector())
11356 return false;
11357
11358 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11359 }
11360
11361 Result = BottomHalf;
11362 if (isSigned) {
11363 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11364 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11365 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11366 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11367 } else {
11368 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11369 DAG.getConstant(0, dl, VT), ISD::SETNE);
11370 }
11371
11372 // Truncate the result if SetCC returns a larger type than needed.
11373 EVT RType = Node->getValueType(1);
11374 if (RType.bitsLT(Overflow.getValueType()))
11375 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11376
11377 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11378 "Unexpected result type for S/UMULO legalization");
11379 return true;
11380}
11381
11383 SDLoc dl(Node);
11384 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11385 SDValue Op = Node->getOperand(0);
11386 EVT VT = Op.getValueType();
11387
11388 if (VT.isScalableVector())
11390 "Expanding reductions for scalable vectors is undefined.");
11391
11392 // Try to use a shuffle reduction for power of two vectors.
11393 if (VT.isPow2VectorType()) {
11394 while (VT.getVectorNumElements() > 1) {
11395 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11396 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11397 break;
11398
11399 SDValue Lo, Hi;
11400 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11401 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11402 VT = HalfVT;
11403 }
11404 }
11405
11406 EVT EltVT = VT.getVectorElementType();
11407 unsigned NumElts = VT.getVectorNumElements();
11408
11410 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11411
11412 SDValue Res = Ops[0];
11413 for (unsigned i = 1; i < NumElts; i++)
11414 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11415
11416 // Result type may be wider than element type.
11417 if (EltVT != Node->getValueType(0))
11418 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11419 return Res;
11420}
11421
11423 SDLoc dl(Node);
11424 SDValue AccOp = Node->getOperand(0);
11425 SDValue VecOp = Node->getOperand(1);
11426 SDNodeFlags Flags = Node->getFlags();
11427
11428 EVT VT = VecOp.getValueType();
11429 EVT EltVT = VT.getVectorElementType();
11430
11431 if (VT.isScalableVector())
11433 "Expanding reductions for scalable vectors is undefined.");
11434
11435 unsigned NumElts = VT.getVectorNumElements();
11436
11438 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11439
11440 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11441
11442 SDValue Res = AccOp;
11443 for (unsigned i = 0; i < NumElts; i++)
11444 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11445
11446 return Res;
11447}
11448
11450 SelectionDAG &DAG) const {
11451 EVT VT = Node->getValueType(0);
11452 SDLoc dl(Node);
11453 bool isSigned = Node->getOpcode() == ISD::SREM;
11454 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11455 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11456 SDValue Dividend = Node->getOperand(0);
11457 SDValue Divisor = Node->getOperand(1);
11458 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11459 SDVTList VTs = DAG.getVTList(VT, VT);
11460 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11461 return true;
11462 }
11463 if (isOperationLegalOrCustom(DivOpc, VT)) {
11464 // X % Y -> X-X/Y*Y
11465 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11466 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11467 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11468 return true;
11469 }
11470 return false;
11471}
11472
11474 SelectionDAG &DAG) const {
11475 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11476 SDLoc dl(SDValue(Node, 0));
11477 SDValue Src = Node->getOperand(0);
11478
11479 // DstVT is the result type, while SatVT is the size to which we saturate
11480 EVT SrcVT = Src.getValueType();
11481 EVT DstVT = Node->getValueType(0);
11482
11483 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11484 unsigned SatWidth = SatVT.getScalarSizeInBits();
11485 unsigned DstWidth = DstVT.getScalarSizeInBits();
11486 assert(SatWidth <= DstWidth &&
11487 "Expected saturation width smaller than result width");
11488
11489 // Determine minimum and maximum integer values and their corresponding
11490 // floating-point values.
11491 APInt MinInt, MaxInt;
11492 if (IsSigned) {
11493 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11494 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11495 } else {
11496 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11497 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11498 }
11499
11500 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11501 // libcall emission cannot handle this. Large result types will fail.
11502 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11503 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11504 SrcVT = Src.getValueType();
11505 }
11506
11507 const fltSemantics &Sem = SrcVT.getFltSemantics();
11508 APFloat MinFloat(Sem);
11509 APFloat MaxFloat(Sem);
11510
11511 APFloat::opStatus MinStatus =
11512 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11513 APFloat::opStatus MaxStatus =
11514 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11515 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11516 !(MaxStatus & APFloat::opStatus::opInexact);
11517
11518 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11519 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11520
11521 // If the integer bounds are exactly representable as floats and min/max are
11522 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11523 // of comparisons and selects.
11524 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11526 if (AreExactFloatBounds && MinMaxLegal) {
11527 SDValue Clamped = Src;
11528
11529 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11530 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11531 // Clamp by MaxFloat from above. NaN cannot occur.
11532 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11533 // Convert clamped value to integer.
11534 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11535 dl, DstVT, Clamped);
11536
11537 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11538 // which will cast to zero.
11539 if (!IsSigned)
11540 return FpToInt;
11541
11542 // Otherwise, select 0 if Src is NaN.
11543 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11544 EVT SetCCVT =
11545 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11546 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11547 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11548 }
11549
11550 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11551 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11552
11553 // Result of direct conversion. The assumption here is that the operation is
11554 // non-trapping and it's fine to apply it to an out-of-range value if we
11555 // select it away later.
11556 SDValue FpToInt =
11557 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11558
11559 SDValue Select = FpToInt;
11560
11561 EVT SetCCVT =
11562 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11563
11564 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11565 // MinInt if Src is NaN.
11566 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11567 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11568 // If Src OGT MaxFloat, select MaxInt.
11569 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11570 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11571
11572 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11573 // is already zero.
11574 if (!IsSigned)
11575 return Select;
11576
11577 // Otherwise, select 0 if Src is NaN.
11578 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11579 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11580 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11581}
11582
11584 const SDLoc &dl,
11585 SelectionDAG &DAG) const {
11586 EVT OperandVT = Op.getValueType();
11587 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11588 return Op;
11589 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11590 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11591 // can induce double-rounding which may alter the results. We can
11592 // correct for this using a trick explained in: Boldo, Sylvie, and
11593 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11594 // World Congress. 2005.
11595 unsigned BitSize = OperandVT.getScalarSizeInBits();
11596 EVT WideIntVT = OperandVT.changeTypeToInteger();
11597 SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11598 SDValue SignBit =
11599 DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11600 DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11601 SDValue AbsWide;
11602 if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11603 AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11604 } else {
11605 SDValue ClearedSign = DAG.getNode(
11606 ISD::AND, dl, WideIntVT, OpAsInt,
11607 DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11608 AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11609 }
11610 SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11611 SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11612
11613 // We can keep the narrow value as-is if narrowing was exact (no
11614 // rounding error), the wide value was NaN (the narrow value is also
11615 // NaN and should be preserved) or if we rounded to the odd value.
11616 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11617 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11618 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11619 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11620 EVT ResultIntVTCCVT = getSetCCResultType(
11621 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11622 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11623 // The result is already odd so we don't need to do anything.
11624 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11625
11626 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11627 AbsWide.getValueType());
11628 // We keep results which are exact, odd or NaN.
11629 SDValue KeepNarrow =
11630 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11631 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11632 // We morally performed a round-down if AbsNarrow is smaller than
11633 // AbsWide.
11634 SDValue NarrowIsRd =
11635 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11636 // If the narrow value is odd or exact, pick it.
11637 // Otherwise, narrow is even and corresponds to either the rounded-up
11638 // or rounded-down value. If narrow is the rounded-down value, we want
11639 // the rounded-up value as it will be odd.
11640 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11641 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11642 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11643 int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11644 SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11645 SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11646 SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11647 Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11648 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11649}
11650
11652 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11653 SDValue Op = Node->getOperand(0);
11654 EVT VT = Node->getValueType(0);
11655 SDLoc dl(Node);
11656 if (VT.getScalarType() == MVT::bf16) {
11657 if (Node->getConstantOperandVal(1) == 1) {
11658 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11659 }
11660 EVT OperandVT = Op.getValueType();
11661 SDValue IsNaN = DAG.getSetCC(
11662 dl,
11663 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11664 Op, Op, ISD::SETUO);
11665
11666 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11667 // can induce double-rounding which may alter the results. We can
11668 // correct for this using a trick explained in: Boldo, Sylvie, and
11669 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11670 // World Congress. 2005.
11671 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11672 EVT I32 = F32.changeTypeToInteger();
11673 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11674 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11675
11676 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11677 // turning into infinities.
11678 SDValue NaN =
11679 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11680
11681 // Factor in the contribution of the low 16 bits.
11682 SDValue One = DAG.getConstant(1, dl, I32);
11683 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11684 DAG.getShiftAmountConstant(16, I32, dl));
11685 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11686 SDValue RoundingBias =
11687 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11688 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11689
11690 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11691 // 0x80000000.
11692 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11693
11694 // Now that we have rounded, shift the bits into position.
11695 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11696 DAG.getShiftAmountConstant(16, I32, dl));
11697 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11698 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11699 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11700 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11701 }
11702 return SDValue();
11703}
11704
11706 SelectionDAG &DAG) const {
11707 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11708 assert(Node->getValueType(0).isScalableVector() &&
11709 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11710
11711 EVT VT = Node->getValueType(0);
11712 SDValue V1 = Node->getOperand(0);
11713 SDValue V2 = Node->getOperand(1);
11714 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11715 SDLoc DL(Node);
11716
11717 // Expand through memory thusly:
11718 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11719 // Store V1, Ptr
11720 // Store V2, Ptr + sizeof(V1)
11721 // If (Imm < 0)
11722 // TrailingElts = -Imm
11723 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11724 // else
11725 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11726 // Res = Load Ptr
11727
11728 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11729
11731 VT.getVectorElementCount() * 2);
11732 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11733 EVT PtrVT = StackPtr.getValueType();
11734 auto &MF = DAG.getMachineFunction();
11735 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11736 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11737
11738 // Store the lo part of CONCAT_VECTORS(V1, V2)
11739 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11740 // Store the hi part of CONCAT_VECTORS(V1, V2)
11741 SDValue OffsetToV2 = DAG.getVScale(
11742 DL, PtrVT,
11744 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11745 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11746
11747 if (Imm >= 0) {
11748 // Load back the required element. getVectorElementPointer takes care of
11749 // clamping the index if it's out-of-bounds.
11750 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11751 // Load the spliced result
11752 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11754 }
11755
11756 uint64_t TrailingElts = -Imm;
11757
11758 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11759 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11760 SDValue TrailingBytes =
11761 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11762
11763 if (TrailingElts > VT.getVectorMinNumElements()) {
11764 SDValue VLBytes =
11765 DAG.getVScale(DL, PtrVT,
11766 APInt(PtrVT.getFixedSizeInBits(),
11768 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11769 }
11770
11771 // Calculate the start address of the spliced result.
11772 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11773
11774 // Load the spliced result
11775 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11777}
11778
11780 SelectionDAG &DAG) const {
11781 SDLoc DL(Node);
11782 SDValue Vec = Node->getOperand(0);
11783 SDValue Mask = Node->getOperand(1);
11784 SDValue Passthru = Node->getOperand(2);
11785
11786 EVT VecVT = Vec.getValueType();
11787 EVT ScalarVT = VecVT.getScalarType();
11788 EVT MaskVT = Mask.getValueType();
11789 EVT MaskScalarVT = MaskVT.getScalarType();
11790
11791 // Needs to be handled by targets that have scalable vector types.
11792 if (VecVT.isScalableVector())
11793 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11794
11795 SDValue StackPtr = DAG.CreateStackTemporary(
11796 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11797 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11798 MachinePointerInfo PtrInfo =
11800
11801 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11802 SDValue Chain = DAG.getEntryNode();
11803 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11804
11805 bool HasPassthru = !Passthru.isUndef();
11806
11807 // If we have a passthru vector, store it on the stack, overwrite the matching
11808 // positions and then re-write the last element that was potentially
11809 // overwritten even though mask[i] = false.
11810 if (HasPassthru)
11811 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11812
11813 SDValue LastWriteVal;
11814 APInt PassthruSplatVal;
11815 bool IsSplatPassthru =
11816 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11817
11818 if (IsSplatPassthru) {
11819 // As we do not know which position we wrote to last, we cannot simply
11820 // access that index from the passthru vector. So we first check if passthru
11821 // is a splat vector, to use any element ...
11822 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11823 } else if (HasPassthru) {
11824 // ... if it is not a splat vector, we need to get the passthru value at
11825 // position = popcount(mask) and re-load it from the stack before it is
11826 // overwritten in the loop below.
11827 EVT PopcountVT = ScalarVT.changeTypeToInteger();
11828 SDValue Popcount = DAG.getNode(
11829 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11830 Popcount =
11832 MaskVT.changeVectorElementType(PopcountVT), Popcount);
11833 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
11834 SDValue LastElmtPtr =
11835 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11836 LastWriteVal = DAG.getLoad(
11837 ScalarVT, DL, Chain, LastElmtPtr,
11839 Chain = LastWriteVal.getValue(1);
11840 }
11841
11842 unsigned NumElms = VecVT.getVectorNumElements();
11843 for (unsigned I = 0; I < NumElms; I++) {
11845
11846 SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11847 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11848 Chain = DAG.getStore(
11849 Chain, DL, ValI, OutPtr,
11851
11852 // Get the mask value and add it to the current output position. This
11853 // either increments by 1 if MaskI is true or adds 0 otherwise.
11854 // Freeze in case we have poison/undef mask entries.
11855 SDValue MaskI = DAG.getFreeze(
11856 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11857 MaskI = DAG.getFreeze(MaskI);
11858 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11859 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11860 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11861
11862 if (HasPassthru && I == NumElms - 1) {
11863 SDValue EndOfVector =
11864 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11865 SDValue AllLanesSelected =
11866 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11867 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11868 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11869
11870 // Re-write the last ValI if all lanes were selected. Otherwise,
11871 // overwrite the last write it with the passthru value.
11872 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
11873 LastWriteVal, SDNodeFlags::Unpredictable);
11874 Chain = DAG.getStore(
11875 Chain, DL, LastWriteVal, OutPtr,
11877 }
11878 }
11879
11880 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11881}
11882
11884 SDValue &LHS, SDValue &RHS,
11885 SDValue &CC, SDValue Mask,
11886 SDValue EVL, bool &NeedInvert,
11887 const SDLoc &dl, SDValue &Chain,
11888 bool IsSignaling) const {
11889 MVT OpVT = LHS.getSimpleValueType();
11890 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11891 NeedInvert = false;
11892 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11893 bool IsNonVP = !EVL;
11894 switch (getCondCodeAction(CCCode, OpVT)) {
11895 default:
11896 llvm_unreachable("Unknown condition code action!");
11898 // Nothing to do.
11899 break;
11902 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11903 std::swap(LHS, RHS);
11904 CC = DAG.getCondCode(InvCC);
11905 return true;
11906 }
11907 // Swapping operands didn't work. Try inverting the condition.
11908 bool NeedSwap = false;
11909 InvCC = getSetCCInverse(CCCode, OpVT);
11910 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
11911 // If inverting the condition is not enough, try swapping operands
11912 // on top of it.
11913 InvCC = ISD::getSetCCSwappedOperands(InvCC);
11914 NeedSwap = true;
11915 }
11916 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11917 CC = DAG.getCondCode(InvCC);
11918 NeedInvert = true;
11919 if (NeedSwap)
11920 std::swap(LHS, RHS);
11921 return true;
11922 }
11923
11924 // Special case: expand i1 comparisons using logical operations.
11925 if (OpVT == MVT::i1) {
11926 SDValue Ret;
11927 switch (CCCode) {
11928 default:
11929 llvm_unreachable("Unknown integer setcc!");
11930 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
11931 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
11932 MVT::i1);
11933 break;
11934 case ISD::SETNE: // X != Y --> (X ^ Y)
11935 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
11936 break;
11937 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11938 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11939 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
11940 DAG.getNOT(dl, LHS, MVT::i1));
11941 break;
11942 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11943 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11944 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
11945 DAG.getNOT(dl, RHS, MVT::i1));
11946 break;
11947 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11948 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11949 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
11950 DAG.getNOT(dl, LHS, MVT::i1));
11951 break;
11952 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11953 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11954 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
11955 DAG.getNOT(dl, RHS, MVT::i1));
11956 break;
11957 }
11958
11959 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
11960 RHS = SDValue();
11961 CC = SDValue();
11962 return true;
11963 }
11964
11966 unsigned Opc = 0;
11967 switch (CCCode) {
11968 default:
11969 llvm_unreachable("Don't know how to expand this condition!");
11970 case ISD::SETUO:
11971 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
11972 CC1 = ISD::SETUNE;
11973 CC2 = ISD::SETUNE;
11974 Opc = ISD::OR;
11975 break;
11976 }
11978 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11979 NeedInvert = true;
11980 [[fallthrough]];
11981 case ISD::SETO:
11983 "If SETO is expanded, SETOEQ must be legal!");
11984 CC1 = ISD::SETOEQ;
11985 CC2 = ISD::SETOEQ;
11986 Opc = ISD::AND;
11987 break;
11988 case ISD::SETONE:
11989 case ISD::SETUEQ:
11990 // If the SETUO or SETO CC isn't legal, we might be able to use
11991 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11992 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11993 // the operands.
11994 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11995 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
11996 isCondCodeLegal(ISD::SETOLT, OpVT))) {
11997 CC1 = ISD::SETOGT;
11998 CC2 = ISD::SETOLT;
11999 Opc = ISD::OR;
12000 NeedInvert = ((unsigned)CCCode & 0x8U);
12001 break;
12002 }
12003 [[fallthrough]];
12004 case ISD::SETOEQ:
12005 case ISD::SETOGT:
12006 case ISD::SETOGE:
12007 case ISD::SETOLT:
12008 case ISD::SETOLE:
12009 case ISD::SETUNE:
12010 case ISD::SETUGT:
12011 case ISD::SETUGE:
12012 case ISD::SETULT:
12013 case ISD::SETULE:
12014 // If we are floating point, assign and break, otherwise fall through.
12015 if (!OpVT.isInteger()) {
12016 // We can use the 4th bit to tell if we are the unordered
12017 // or ordered version of the opcode.
12018 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12019 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12020 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12021 break;
12022 }
12023 // Fallthrough if we are unsigned integer.
12024 [[fallthrough]];
12025 case ISD::SETLE:
12026 case ISD::SETGT:
12027 case ISD::SETGE:
12028 case ISD::SETLT:
12029 case ISD::SETNE:
12030 case ISD::SETEQ:
12031 // If all combinations of inverting the condition and swapping operands
12032 // didn't work then we have no means to expand the condition.
12033 llvm_unreachable("Don't know how to expand this condition!");
12034 }
12035
12036 SDValue SetCC1, SetCC2;
12037 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12038 // If we aren't the ordered or unorder operation,
12039 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12040 if (IsNonVP) {
12041 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12042 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12043 } else {
12044 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12045 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12046 }
12047 } else {
12048 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12049 if (IsNonVP) {
12050 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12051 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12052 } else {
12053 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12054 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12055 }
12056 }
12057 if (Chain)
12058 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12059 SetCC2.getValue(1));
12060 if (IsNonVP)
12061 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12062 else {
12063 // Transform the binary opcode to the VP equivalent.
12064 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12065 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12066 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12067 }
12068 RHS = SDValue();
12069 CC = SDValue();
12070 return true;
12071 }
12072 }
12073 return false;
12074}
12075
12077 SelectionDAG &DAG) const {
12078 EVT VT = Node->getValueType(0);
12079 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12080 // split into two equal parts.
12081 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12082 return SDValue();
12083
12084 // Restrict expansion to cases where both parts can be concatenated.
12085 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12086 if (LoVT != HiVT || !isTypeLegal(LoVT))
12087 return SDValue();
12088
12089 SDLoc DL(Node);
12090 unsigned Opcode = Node->getOpcode();
12091
12092 // Don't expand if the result is likely to be unrolled anyway.
12093 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12094 return SDValue();
12095
12096 SmallVector<SDValue, 4> LoOps, HiOps;
12097 for (const SDValue &V : Node->op_values()) {
12098 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12099 LoOps.push_back(Lo);
12100 HiOps.push_back(Hi);
12101 }
12102
12103 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12104 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12105 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12106}
unsigned const MachineRegisterInfo * MRI
static const LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:557
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1334
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:1160
APInt bitcastToAPInt() const
Definition: APFloat.h:1351
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1140
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1100
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1111
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1547
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1732
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:449
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1392
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1007
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1640
void setSignBit()
Set the sign bit to 1.
Definition: APInt.h:1340
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:216
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:329
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1249
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1397
APInt reverseBits() const
Definition: APInt.cpp:741
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:834
void negate()
Negate this APInt in place.
Definition: APInt.h:1450
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1618
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1577
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
unsigned countLeadingZeros() const
Definition: APInt.h:1585
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:356
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
unsigned logBase2() const
Definition: APInt.h:1739
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:475
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1319
APInt multiplicativeInverse() const
Definition: APInt.cpp:1248
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition: APInt.h:405
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:334
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1150
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition: APInt.h:1367
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:873
APInt byteSwap() const
Definition: APInt.cpp:719
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1389
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:455
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:239
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition: APInt.h:1424
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1635
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1221
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition: APInt.h:1343
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1123
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:709
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:271
This class represents a range of values.
Definition: ConstantRange.h:47
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:197
bool isBigEndian() const
Definition: DataLayout.h:198
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
std::vector< std::string > ConstraintCodeVector
Definition: InlineAsm.h:102
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_GPRel32BlockAddress
EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
@ EK_GPRel64BlockAddress
EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:462
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
iterator end() const
Definition: ArrayRef.h:360
iterator begin() const
Definition: ArrayRef.h:359
Class to represent pointers.
Definition: DerivedTypes.h:670
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:750
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:982
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:458
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:856
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:890
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:700
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:796
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
std::optional< uint64_t > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:906
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:571
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
iterator end() const
Definition: StringRef.h:118
Class to represent struct types.
Definition: DerivedTypes.h:218
void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const
Get the CondCode that's to be used to test the result of the comparison libcall against zero.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, EVT WideVT, const SDValue LL, const SDValue LH, const SDValue RL, const SDValue RH, SDValue &Lo, SDValue &Hi) const
forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or brute force via a wide mul...
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:752
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:295
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:310
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition: Value.cpp:698
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2982
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:512
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:574
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:502
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:871
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:387
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1494
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:685
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1123
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1451
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:588
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:439
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:440
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:860
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:393
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:680
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:627
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:882
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:692
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
Definition: ISDOpcodes.h:1668
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
Definition: ISDOpcodes.h:1673
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1643
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1610
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:557
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition: STLExtras.h:1771
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Definition: Utils.cpp:1625
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1540
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:384
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:302
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:306
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:318
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:238
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:425
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:465
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:407
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:320
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
Definition: InlineAsm.h:126
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
Definition: InlineAsm.h:136
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
Definition: InlineAsm.h:154
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
Definition: InlineAsm.h:161
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:150
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
Definition: InlineAsm.h:140
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:293
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:178
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition: KnownBits.h:247
static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
Definition: KnownBits.cpp:211
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:100
bool isZero() const
Returns true if value is all zero.
Definition: KnownBits.h:79
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:234
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:65
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:153
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:536
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:281
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition: KnownBits.h:225
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
Definition: KnownBits.cpp:187
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition: KnownBits.h:313
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:240
static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
Definition: KnownBits.cpp:215
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:502
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:542
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:60
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:518
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:522
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:97
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:804
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition: KnownBits.h:159
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:546
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:526
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:278
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:512
static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Definition: KnownBits.cpp:205
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
std::string ConstraintCode
This contains the actual string for the code, like "m".
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setIsSigned(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...