LLVM 22.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/APSInt.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SetVector.h"
28#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
52#include "llvm/IR/Attributes.h"
53#include "llvm/IR/Constant.h"
54#include "llvm/IR/DataLayout.h"
56#include "llvm/IR/Function.h"
57#include "llvm/IR/Metadata.h"
62#include "llvm/Support/Debug.h"
70#include <algorithm>
71#include <cassert>
72#include <cstdint>
73#include <functional>
74#include <iterator>
75#include <optional>
76#include <string>
77#include <tuple>
78#include <utility>
79#include <variant>
80
81#include "MatchContext.h"
82
83using namespace llvm;
84using namespace llvm::SDPatternMatch;
85
86#define DEBUG_TYPE "dagcombine"
87
88STATISTIC(NodesCombined , "Number of dag nodes combined");
89STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
90STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
91STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
92STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
93STATISTIC(SlicedLoads, "Number of load sliced");
94STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
95
96DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
97 "Controls whether a DAG combine is performed for a node");
98
99static cl::opt<bool>
100CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
101 cl::desc("Enable DAG combiner's use of IR alias analysis"));
102
103static cl::opt<bool>
104UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
105 cl::desc("Enable DAG combiner's use of TBAA"));
106
107#ifndef NDEBUG
109CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
110 cl::desc("Only use DAG-combiner alias analysis in this"
111 " function"));
112#endif
113
114/// Hidden option to stress test load slicing, i.e., when this option
115/// is enabled, load slicing bypasses most of its profitability guards.
116static cl::opt<bool>
117StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
118 cl::desc("Bypass the profitability model of load slicing"),
119 cl::init(false));
120
121static cl::opt<bool>
122 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
123 cl::desc("DAG combiner may split indexing from loads"));
124
125static cl::opt<bool>
126 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
127 cl::desc("DAG combiner enable merging multiple stores "
128 "into a wider store"));
129
131 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
132 cl::desc("Limit the number of operands to inline for Token Factors"));
133
135 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
136 cl::desc("Limit the number of times for the same StoreNode and RootNode "
137 "to bail out in store merging dependence check"));
138
140 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
141 cl::desc("DAG combiner enable reducing the width of load/op/store "
142 "sequence"));
144 "combiner-reduce-load-op-store-width-force-narrowing-profitable",
145 cl::Hidden, cl::init(false),
146 cl::desc("DAG combiner force override the narrowing profitable check when "
147 "reducing the width of load/op/store sequences"));
148
150 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
151 cl::desc("DAG combiner enable load/<replace bytes>/store with "
152 "a narrower store"));
153
154static cl::opt<bool> DisableCombines("combiner-disabled", cl::Hidden,
155 cl::init(false),
156 cl::desc("Disable the DAG combiner"));
157
158namespace {
159
160 class DAGCombiner {
161 SelectionDAG &DAG;
162 const TargetLowering &TLI;
163 const SelectionDAGTargetInfo *STI;
165 CodeGenOptLevel OptLevel;
166 bool LegalDAG = false;
167 bool LegalOperations = false;
168 bool LegalTypes = false;
169 bool ForCodeSize;
170 bool DisableGenericCombines;
171
172 /// Worklist of all of the nodes that need to be simplified.
173 ///
174 /// This must behave as a stack -- new nodes to process are pushed onto the
175 /// back and when processing we pop off of the back.
176 ///
177 /// The worklist will not contain duplicates but may contain null entries
178 /// due to nodes being deleted from the underlying DAG. For fast lookup and
179 /// deduplication, the index of the node in this vector is stored in the
180 /// node in SDNode::CombinerWorklistIndex.
182
183 /// This records all nodes attempted to be added to the worklist since we
184 /// considered a new worklist entry. As we keep do not add duplicate nodes
185 /// in the worklist, this is different from the tail of the worklist.
187
188 /// Map from candidate StoreNode to the pair of RootNode and count.
189 /// The count is used to track how many times we have seen the StoreNode
190 /// with the same RootNode bail out in dependence check. If we have seen
191 /// the bail out for the same pair many times over a limit, we won't
192 /// consider the StoreNode with the same RootNode as store merging
193 /// candidate again.
195
196 // BatchAA - Used for DAG load/store alias analysis.
197 BatchAAResults *BatchAA;
198
199 /// This caches all chains that have already been processed in
200 /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
201 /// stores candidates.
202 SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
203
204 /// When an instruction is simplified, add all users of the instruction to
205 /// the work lists because they might get more simplified now.
206 void AddUsersToWorklist(SDNode *N) {
207 for (SDNode *Node : N->users())
208 AddToWorklist(Node);
209 }
210
211 /// Convenient shorthand to add a node and all of its user to the worklist.
212 void AddToWorklistWithUsers(SDNode *N) {
213 AddUsersToWorklist(N);
214 AddToWorklist(N);
215 }
216
217 // Prune potentially dangling nodes. This is called after
218 // any visit to a node, but should also be called during a visit after any
219 // failed combine which may have created a DAG node.
220 void clearAddedDanglingWorklistEntries() {
221 // Check any nodes added to the worklist to see if they are prunable.
222 while (!PruningList.empty()) {
223 auto *N = PruningList.pop_back_val();
224 if (N->use_empty())
225 recursivelyDeleteUnusedNodes(N);
226 }
227 }
228
229 SDNode *getNextWorklistEntry() {
230 // Before we do any work, remove nodes that are not in use.
231 clearAddedDanglingWorklistEntries();
232 SDNode *N = nullptr;
233 // The Worklist holds the SDNodes in order, but it may contain null
234 // entries.
235 while (!N && !Worklist.empty()) {
236 N = Worklist.pop_back_val();
237 }
238
239 if (N) {
240 assert(N->getCombinerWorklistIndex() >= 0 &&
241 "Found a worklist entry without a corresponding map entry!");
242 // Set to -2 to indicate that we combined the node.
243 N->setCombinerWorklistIndex(-2);
244 }
245 return N;
246 }
247
248 /// Call the node-specific routine that folds each particular type of node.
249 SDValue visit(SDNode *N);
250
251 public:
252 DAGCombiner(SelectionDAG &D, BatchAAResults *BatchAA, CodeGenOptLevel OL)
253 : DAG(D), TLI(D.getTargetLoweringInfo()),
254 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL),
255 BatchAA(BatchAA) {
256 ForCodeSize = DAG.shouldOptForSize();
257 DisableGenericCombines =
258 DisableCombines || (STI && STI->disableGenericCombines(OptLevel));
259
260 MaximumLegalStoreInBits = 0;
261 // We use the minimum store size here, since that's all we can guarantee
262 // for the scalable vector types.
263 for (MVT VT : MVT::all_valuetypes())
264 if (EVT(VT).isSimple() && VT != MVT::Other &&
265 TLI.isTypeLegal(EVT(VT)) &&
266 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
267 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
268 }
269
270 void ConsiderForPruning(SDNode *N) {
271 // Mark this for potential pruning.
272 PruningList.insert(N);
273 }
274
275 /// Add to the worklist making sure its instance is at the back (next to be
276 /// processed.)
277 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
278 bool SkipIfCombinedBefore = false) {
279 assert(N->getOpcode() != ISD::DELETED_NODE &&
280 "Deleted Node added to Worklist");
281
282 // Skip handle nodes as they can't usefully be combined and confuse the
283 // zero-use deletion strategy.
284 if (N->getOpcode() == ISD::HANDLENODE)
285 return;
286
287 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
288 return;
289
290 if (IsCandidateForPruning)
291 ConsiderForPruning(N);
292
293 if (N->getCombinerWorklistIndex() < 0) {
294 N->setCombinerWorklistIndex(Worklist.size());
295 Worklist.push_back(N);
296 }
297 }
298
299 /// Remove all instances of N from the worklist.
300 void removeFromWorklist(SDNode *N) {
301 PruningList.remove(N);
302 StoreRootCountMap.erase(N);
303
304 int WorklistIndex = N->getCombinerWorklistIndex();
305 // If not in the worklist, the index might be -1 or -2 (was combined
306 // before). As the node gets deleted anyway, there's no need to update
307 // the index.
308 if (WorklistIndex < 0)
309 return; // Not in the worklist.
310
311 // Null out the entry rather than erasing it to avoid a linear operation.
312 Worklist[WorklistIndex] = nullptr;
313 N->setCombinerWorklistIndex(-1);
314 }
315
316 void deleteAndRecombine(SDNode *N);
317 bool recursivelyDeleteUnusedNodes(SDNode *N);
318
319 /// Replaces all uses of the results of one DAG node with new values.
320 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
321 bool AddTo = true);
322
323 /// Replaces all uses of the results of one DAG node with new values.
324 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
325 return CombineTo(N, &Res, 1, AddTo);
326 }
327
328 /// Replaces all uses of the results of one DAG node with new values.
329 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
330 bool AddTo = true) {
331 SDValue To[] = { Res0, Res1 };
332 return CombineTo(N, To, 2, AddTo);
333 }
334
335 SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To,
336 bool AddTo = true) {
337 return CombineTo(N, To->data(), To->size(), AddTo);
338 }
339
340 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
341
342 private:
343 unsigned MaximumLegalStoreInBits;
344
345 /// Check the specified integer node value to see if it can be simplified or
346 /// if things it uses can be simplified by bit propagation.
347 /// If so, return true.
348 bool SimplifyDemandedBits(SDValue Op) {
349 unsigned BitWidth = Op.getScalarValueSizeInBits();
350 APInt DemandedBits = APInt::getAllOnes(BitWidth);
351 return SimplifyDemandedBits(Op, DemandedBits);
352 }
353
354 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
355 EVT VT = Op.getValueType();
356 APInt DemandedElts = VT.isFixedLengthVector()
358 : APInt(1, 1);
359 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
360 }
361
362 /// Check the specified vector node value to see if it can be simplified or
363 /// if things it uses can be simplified as it only uses some of the
364 /// elements. If so, return true.
365 bool SimplifyDemandedVectorElts(SDValue Op) {
366 // TODO: For now just pretend it cannot be simplified.
367 if (Op.getValueType().isScalableVector())
368 return false;
369
370 unsigned NumElts = Op.getValueType().getVectorNumElements();
371 APInt DemandedElts = APInt::getAllOnes(NumElts);
372 return SimplifyDemandedVectorElts(Op, DemandedElts);
373 }
374
375 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
376 const APInt &DemandedElts,
377 bool AssumeSingleUse = false);
378 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
379 bool AssumeSingleUse = false);
380
381 bool CombineToPreIndexedLoadStore(SDNode *N);
382 bool CombineToPostIndexedLoadStore(SDNode *N);
383 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
384 bool SliceUpLoad(SDNode *N);
385
386 // Looks up the chain to find a unique (unaliased) store feeding the passed
387 // load. If no such store is found, returns a nullptr.
388 // Note: This will look past a CALLSEQ_START if the load is chained to it so
389 // so that it can find stack stores for byval params.
390 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
391 // Scalars have size 0 to distinguish from singleton vectors.
392 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
393 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
394 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
395
396 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
397 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
398 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
399 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
400 SDValue PromoteIntBinOp(SDValue Op);
401 SDValue PromoteIntShiftOp(SDValue Op);
402 SDValue PromoteExtend(SDValue Op);
403 bool PromoteLoad(SDValue Op);
404
405 SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL);
406 // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
407 SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
408
409 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
410 SDValue RHS, SDValue True, SDValue False,
411 ISD::CondCode CC);
412
413 /// Call the node-specific routine that knows how to fold each
414 /// particular type of node. If that doesn't do anything, try the
415 /// target-specific DAG combines.
416 SDValue combine(SDNode *N);
417
418 // Visitation implementation - Implement dag node combining for different
419 // node types. The semantics are as follows:
420 // Return Value:
421 // SDValue.getNode() == 0 - No change was made
422 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
423 // otherwise - N should be replaced by the returned Operand.
424 //
425 SDValue visitTokenFactor(SDNode *N);
426 SDValue visitMERGE_VALUES(SDNode *N);
427 SDValue visitADD(SDNode *N);
428 SDValue visitADDLike(SDNode *N);
429 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
430 SDNode *LocReference);
431 SDValue visitPTRADD(SDNode *N);
432 SDValue visitSUB(SDNode *N);
433 SDValue visitADDSAT(SDNode *N);
434 SDValue visitSUBSAT(SDNode *N);
435 SDValue visitADDC(SDNode *N);
436 SDValue visitADDO(SDNode *N);
437 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
438 SDValue visitSUBC(SDNode *N);
439 SDValue visitSUBO(SDNode *N);
440 SDValue visitADDE(SDNode *N);
441 SDValue visitUADDO_CARRY(SDNode *N);
442 SDValue visitSADDO_CARRY(SDNode *N);
443 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
444 SDNode *N);
445 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
446 SDNode *N);
447 SDValue visitSUBE(SDNode *N);
448 SDValue visitUSUBO_CARRY(SDNode *N);
449 SDValue visitSSUBO_CARRY(SDNode *N);
450 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
451 SDValue visitMULFIX(SDNode *N);
452 SDValue useDivRem(SDNode *N);
453 SDValue visitSDIV(SDNode *N);
454 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
455 SDValue visitUDIV(SDNode *N);
456 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
457 SDValue visitREM(SDNode *N);
458 SDValue visitMULHU(SDNode *N);
459 SDValue visitMULHS(SDNode *N);
460 SDValue visitAVG(SDNode *N);
461 SDValue visitABD(SDNode *N);
462 SDValue visitSMUL_LOHI(SDNode *N);
463 SDValue visitUMUL_LOHI(SDNode *N);
464 SDValue visitMULO(SDNode *N);
465 SDValue visitIMINMAX(SDNode *N);
466 SDValue visitAND(SDNode *N);
467 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
468 SDValue visitOR(SDNode *N);
469 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
470 SDValue visitXOR(SDNode *N);
471 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
472 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
473 SDValue visitSHL(SDNode *N);
474 SDValue visitSRA(SDNode *N);
475 SDValue visitSRL(SDNode *N);
476 SDValue visitFunnelShift(SDNode *N);
477 SDValue visitSHLSAT(SDNode *N);
478 SDValue visitRotate(SDNode *N);
479 SDValue visitABS(SDNode *N);
480 SDValue visitBSWAP(SDNode *N);
481 SDValue visitBITREVERSE(SDNode *N);
482 SDValue visitCTLZ(SDNode *N);
483 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
484 SDValue visitCTTZ(SDNode *N);
485 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
486 SDValue visitCTPOP(SDNode *N);
487 SDValue visitSELECT(SDNode *N);
488 SDValue visitVSELECT(SDNode *N);
489 SDValue visitVP_SELECT(SDNode *N);
490 SDValue visitSELECT_CC(SDNode *N);
491 SDValue visitSETCC(SDNode *N);
492 SDValue visitSETCCCARRY(SDNode *N);
493 SDValue visitSIGN_EXTEND(SDNode *N);
494 SDValue visitZERO_EXTEND(SDNode *N);
495 SDValue visitANY_EXTEND(SDNode *N);
496 SDValue visitAssertExt(SDNode *N);
497 SDValue visitAssertAlign(SDNode *N);
498 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
499 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
500 SDValue visitTRUNCATE(SDNode *N);
501 SDValue visitTRUNCATE_USAT_U(SDNode *N);
502 SDValue visitBITCAST(SDNode *N);
503 SDValue visitFREEZE(SDNode *N);
504 SDValue visitBUILD_PAIR(SDNode *N);
505 SDValue visitFADD(SDNode *N);
506 SDValue visitVP_FADD(SDNode *N);
507 SDValue visitVP_FSUB(SDNode *N);
508 SDValue visitSTRICT_FADD(SDNode *N);
509 SDValue visitFSUB(SDNode *N);
510 SDValue visitFMUL(SDNode *N);
511 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
512 SDValue visitFMAD(SDNode *N);
513 SDValue visitFMULADD(SDNode *N);
514 SDValue visitFDIV(SDNode *N);
515 SDValue visitFREM(SDNode *N);
516 SDValue visitFSQRT(SDNode *N);
517 SDValue visitFCOPYSIGN(SDNode *N);
518 SDValue visitFPOW(SDNode *N);
519 SDValue visitFCANONICALIZE(SDNode *N);
520 SDValue visitSINT_TO_FP(SDNode *N);
521 SDValue visitUINT_TO_FP(SDNode *N);
522 SDValue visitFP_TO_SINT(SDNode *N);
523 SDValue visitFP_TO_UINT(SDNode *N);
524 SDValue visitXROUND(SDNode *N);
525 SDValue visitFP_ROUND(SDNode *N);
526 SDValue visitFP_EXTEND(SDNode *N);
527 SDValue visitFNEG(SDNode *N);
528 SDValue visitFABS(SDNode *N);
529 SDValue visitFCEIL(SDNode *N);
530 SDValue visitFTRUNC(SDNode *N);
531 SDValue visitFFREXP(SDNode *N);
532 SDValue visitFFLOOR(SDNode *N);
533 SDValue visitFMinMax(SDNode *N);
534 SDValue visitBRCOND(SDNode *N);
535 SDValue visitBR_CC(SDNode *N);
536 SDValue visitLOAD(SDNode *N);
537
538 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
539 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
540 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
541
542 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
543
544 SDValue visitSTORE(SDNode *N);
545 SDValue visitATOMIC_STORE(SDNode *N);
546 SDValue visitLIFETIME_END(SDNode *N);
547 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
548 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
549 SDValue visitBUILD_VECTOR(SDNode *N);
550 SDValue visitCONCAT_VECTORS(SDNode *N);
551 SDValue visitVECTOR_INTERLEAVE(SDNode *N);
552 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
553 SDValue visitVECTOR_SHUFFLE(SDNode *N);
554 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
555 SDValue visitINSERT_SUBVECTOR(SDNode *N);
556 SDValue visitVECTOR_COMPRESS(SDNode *N);
557 SDValue visitMLOAD(SDNode *N);
558 SDValue visitMSTORE(SDNode *N);
559 SDValue visitMGATHER(SDNode *N);
560 SDValue visitMSCATTER(SDNode *N);
561 SDValue visitMHISTOGRAM(SDNode *N);
562 SDValue visitPARTIAL_REDUCE_MLA(SDNode *N);
563 SDValue visitVPGATHER(SDNode *N);
564 SDValue visitVPSCATTER(SDNode *N);
565 SDValue visitVP_STRIDED_LOAD(SDNode *N);
566 SDValue visitVP_STRIDED_STORE(SDNode *N);
567 SDValue visitFP_TO_FP16(SDNode *N);
568 SDValue visitFP16_TO_FP(SDNode *N);
569 SDValue visitFP_TO_BF16(SDNode *N);
570 SDValue visitBF16_TO_FP(SDNode *N);
571 SDValue visitVECREDUCE(SDNode *N);
572 SDValue visitVPOp(SDNode *N);
573 SDValue visitGET_FPENV_MEM(SDNode *N);
574 SDValue visitSET_FPENV_MEM(SDNode *N);
575
576 template <class MatchContextClass>
577 SDValue visitFADDForFMACombine(SDNode *N);
578 template <class MatchContextClass>
579 SDValue visitFSUBForFMACombine(SDNode *N);
580 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
581
582 SDValue XformToShuffleWithZero(SDNode *N);
583 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
584 const SDLoc &DL,
585 SDNode *N,
586 SDValue N0,
587 SDValue N1);
588 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
589 SDValue N1, SDNodeFlags Flags);
590 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
591 SDValue N1, SDNodeFlags Flags);
592 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
593 EVT VT, SDValue N0, SDValue N1,
594 SDNodeFlags Flags = SDNodeFlags());
595
596 SDValue visitShiftByConstant(SDNode *N);
597
598 SDValue foldSelectOfConstants(SDNode *N);
599 SDValue foldVSelectOfConstants(SDNode *N);
600 SDValue foldBinOpIntoSelect(SDNode *BO);
601 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
602 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
603 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
604 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
605 SDValue N2, SDValue N3, ISD::CondCode CC,
606 bool NotExtCompare = false);
607 SDValue convertSelectOfFPConstantsToLoadOffset(
608 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
609 ISD::CondCode CC);
610 SDValue foldSignChangeInBitcast(SDNode *N);
611 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
612 SDValue N2, SDValue N3, ISD::CondCode CC);
613 SDValue foldSelectOfBinops(SDNode *N);
614 SDValue foldSextSetcc(SDNode *N);
615 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
616 const SDLoc &DL);
617 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
618 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
619 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
620 SDValue False, ISD::CondCode CC, const SDLoc &DL);
621 SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
622 SDValue False, ISD::CondCode CC, const SDLoc &DL);
623 SDValue unfoldMaskedMerge(SDNode *N);
624 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
625 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
626 const SDLoc &DL, bool foldBooleans);
627 SDValue rebuildSetCC(SDValue N);
628
629 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
630 SDValue &CC, bool MatchStrict = false) const;
631 bool isOneUseSetCC(SDValue N) const;
632
633 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
634 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
635
636 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
637 unsigned HiOp);
638 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
639 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
640 const TargetLowering &TLI);
641 SDValue foldPartialReduceMLAMulOp(SDNode *N);
642 SDValue foldPartialReduceAdd(SDNode *N);
643
644 SDValue CombineExtLoad(SDNode *N);
645 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
646 SDValue combineRepeatedFPDivisors(SDNode *N);
647 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
648 SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
649 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
650 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
651 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
652 SDValue BuildSDIV(SDNode *N);
653 SDValue BuildSDIVPow2(SDNode *N);
654 SDValue BuildUDIV(SDNode *N);
655 SDValue BuildSREMPow2(SDNode *N);
656 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
657 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
658 bool KnownNeverZero = false,
659 bool InexpensiveOnly = false,
660 std::optional<EVT> OutVT = std::nullopt);
661 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
662 SDValue buildRsqrtEstimate(SDValue Op);
663 SDValue buildSqrtEstimate(SDValue Op);
664 SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip);
665 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
666 bool Reciprocal);
667 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
668 bool Reciprocal);
669 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
670 bool DemandHighBits = true);
671 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
672 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
673 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
674 bool HasPos, unsigned PosOpcode,
675 unsigned NegOpcode, const SDLoc &DL);
676 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
677 SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
678 bool HasPos, unsigned PosOpcode,
679 unsigned NegOpcode, const SDLoc &DL);
680 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
681 bool FromAdd);
682 SDValue MatchLoadCombine(SDNode *N);
683 SDValue mergeTruncStores(StoreSDNode *N);
684 SDValue reduceLoadWidth(SDNode *N);
685 SDValue ReduceLoadOpStoreWidth(SDNode *N);
686 SDValue splitMergedValStore(StoreSDNode *ST);
687 SDValue TransformFPLoadStorePair(SDNode *N);
688 SDValue convertBuildVecZextToZext(SDNode *N);
689 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
690 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
691 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
692 SDValue reduceBuildVecToShuffle(SDNode *N);
693 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
694 ArrayRef<int> VectorMask, SDValue VecIn1,
695 SDValue VecIn2, unsigned LeftIdx,
696 bool DidSplitVec);
697 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
698
699 /// Walk up chain skipping non-aliasing memory nodes,
700 /// looking for aliasing nodes and adding them to the Aliases vector.
701 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
702 SmallVectorImpl<SDValue> &Aliases);
703
704 /// Return true if there is any possibility that the two addresses overlap.
705 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
706
707 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
708 /// chain (aliasing node.)
709 SDValue FindBetterChain(SDNode *N, SDValue Chain);
710
711 /// Try to replace a store and any possibly adjacent stores on
712 /// consecutive chains with better chains. Return true only if St is
713 /// replaced.
714 ///
715 /// Notice that other chains may still be replaced even if the function
716 /// returns false.
717 bool findBetterNeighborChains(StoreSDNode *St);
718
719 // Helper for findBetterNeighborChains. Walk up store chain add additional
720 // chained stores that do not overlap and can be parallelized.
721 bool parallelizeChainedStores(StoreSDNode *St);
722
723 /// Holds a pointer to an LSBaseSDNode as well as information on where it
724 /// is located in a sequence of memory operations connected by a chain.
725 struct MemOpLink {
726 // Ptr to the mem node.
727 LSBaseSDNode *MemNode;
728
729 // Offset from the base ptr.
730 int64_t OffsetFromBase;
731
732 MemOpLink(LSBaseSDNode *N, int64_t Offset)
733 : MemNode(N), OffsetFromBase(Offset) {}
734 };
735
736 // Classify the origin of a stored value.
737 enum class StoreSource { Unknown, Constant, Extract, Load };
738 StoreSource getStoreSource(SDValue StoreVal) {
739 switch (StoreVal.getOpcode()) {
740 case ISD::Constant:
741 case ISD::ConstantFP:
742 return StoreSource::Constant;
746 return StoreSource::Constant;
747 return StoreSource::Unknown;
750 return StoreSource::Extract;
751 case ISD::LOAD:
752 return StoreSource::Load;
753 default:
754 return StoreSource::Unknown;
755 }
756 }
757
758 /// This is a helper function for visitMUL to check the profitability
759 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
760 /// MulNode is the original multiply, AddNode is (add x, c1),
761 /// and ConstNode is c2.
762 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
763 SDValue ConstNode);
764
765 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
766 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
767 /// the type of the loaded value to be extended.
768 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
769 EVT LoadResultTy, EVT &ExtVT);
770
771 /// Helper function to calculate whether the given Load/Store can have its
772 /// width reduced to ExtVT.
773 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
774 EVT &MemVT, unsigned ShAmt = 0);
775
776 /// Used by BackwardsPropagateMask to find suitable loads.
777 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
778 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
779 ConstantSDNode *Mask, SDNode *&NodeToMask);
780 /// Attempt to propagate a given AND node back to load leaves so that they
781 /// can be combined into narrow loads.
782 bool BackwardsPropagateMask(SDNode *N);
783
784 /// Helper function for mergeConsecutiveStores which merges the component
785 /// store chains.
786 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
787 unsigned NumStores);
788
789 /// Helper function for mergeConsecutiveStores which checks if all the store
790 /// nodes have the same underlying object. We can still reuse the first
791 /// store's pointer info if all the stores are from the same object.
792 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
793
794 /// This is a helper function for mergeConsecutiveStores. When the source
795 /// elements of the consecutive stores are all constants or all extracted
796 /// vector elements, try to merge them into one larger store introducing
797 /// bitcasts if necessary. \return True if a merged store was created.
798 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
799 EVT MemVT, unsigned NumStores,
800 bool IsConstantSrc, bool UseVector,
801 bool UseTrunc);
802
803 /// This is a helper function for mergeConsecutiveStores. Stores that
804 /// potentially may be merged with St are placed in StoreNodes. On success,
805 /// returns a chain predecessor to all store candidates.
806 SDNode *getStoreMergeCandidates(StoreSDNode *St,
807 SmallVectorImpl<MemOpLink> &StoreNodes);
808
809 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
810 /// have indirect dependency through their operands. RootNode is the
811 /// predecessor to all stores calculated by getStoreMergeCandidates and is
812 /// used to prune the dependency check. \return True if safe to merge.
813 bool checkMergeStoreCandidatesForDependencies(
814 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
815 SDNode *RootNode);
816
817 /// Helper function for tryStoreMergeOfLoads. Checks if the load/store
818 /// chain has a call in it. \return True if a call is found.
819 bool hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld);
820
821 /// This is a helper function for mergeConsecutiveStores. Given a list of
822 /// store candidates, find the first N that are consecutive in memory.
823 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
824 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
825 int64_t ElementSizeBytes) const;
826
827 /// This is a helper function for mergeConsecutiveStores. It is used for
828 /// store chains that are composed entirely of constant values.
829 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
830 unsigned NumConsecutiveStores,
831 EVT MemVT, SDNode *Root, bool AllowVectors);
832
833 /// This is a helper function for mergeConsecutiveStores. It is used for
834 /// store chains that are composed entirely of extracted vector elements.
835 /// When extracting multiple vector elements, try to store them in one
836 /// vector store rather than a sequence of scalar stores.
837 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
838 unsigned NumConsecutiveStores, EVT MemVT,
839 SDNode *Root);
840
841 /// This is a helper function for mergeConsecutiveStores. It is used for
842 /// store chains that are composed entirely of loaded values.
843 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
844 unsigned NumConsecutiveStores, EVT MemVT,
845 SDNode *Root, bool AllowVectors,
846 bool IsNonTemporalStore, bool IsNonTemporalLoad);
847
848 /// Merge consecutive store operations into a wide store.
849 /// This optimization uses wide integers or vectors when possible.
850 /// \return true if stores were merged.
851 bool mergeConsecutiveStores(StoreSDNode *St);
852
853 /// Try to transform a truncation where C is a constant:
854 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
855 ///
856 /// \p N needs to be a truncation and its first operand an AND. Other
857 /// requirements are checked by the function (e.g. that trunc is
858 /// single-use) and if missed an empty SDValue is returned.
859 SDValue distributeTruncateThroughAnd(SDNode *N);
860
861 /// Helper function to determine whether the target supports operation
862 /// given by \p Opcode for type \p VT, that is, whether the operation
863 /// is legal or custom before legalizing operations, and whether is
864 /// legal (but not custom) after legalization.
865 bool hasOperation(unsigned Opcode, EVT VT) {
866 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
867 }
868
869 bool hasUMin(EVT VT) const {
870 auto LK = TLI.getTypeConversion(*DAG.getContext(), VT);
871 return (LK.first == TargetLoweringBase::TypeLegal ||
873 TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second);
874 }
875
876 public:
877 /// Runs the dag combiner on all nodes in the work list
878 void Run(CombineLevel AtLevel);
879
880 SelectionDAG &getDAG() const { return DAG; }
881
882 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
883 EVT getShiftAmountTy(EVT LHSTy) {
884 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
885 }
886
887 /// This method returns true if we are running before type legalization or
888 /// if the specified VT is legal.
889 bool isTypeLegal(const EVT &VT) {
890 if (!LegalTypes) return true;
891 return TLI.isTypeLegal(VT);
892 }
893
894 /// Convenience wrapper around TargetLowering::getSetCCResultType
895 EVT getSetCCResultType(EVT VT) const {
896 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
897 }
898
899 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
900 SDValue OrigLoad, SDValue ExtLoad,
901 ISD::NodeType ExtType);
902 };
903
904/// This class is a DAGUpdateListener that removes any deleted
905/// nodes from the worklist.
906class WorklistRemover : public SelectionDAG::DAGUpdateListener {
907 DAGCombiner &DC;
908
909public:
910 explicit WorklistRemover(DAGCombiner &dc)
911 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
912
913 void NodeDeleted(SDNode *N, SDNode *E) override {
914 DC.removeFromWorklist(N);
915 }
916};
917
918class WorklistInserter : public SelectionDAG::DAGUpdateListener {
919 DAGCombiner &DC;
920
921public:
922 explicit WorklistInserter(DAGCombiner &dc)
923 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
924
925 // FIXME: Ideally we could add N to the worklist, but this causes exponential
926 // compile time costs in large DAGs, e.g. Halide.
927 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
928};
929
930} // end anonymous namespace
931
932//===----------------------------------------------------------------------===//
933// TargetLowering::DAGCombinerInfo implementation
934//===----------------------------------------------------------------------===//
935
937 ((DAGCombiner*)DC)->AddToWorklist(N);
938}
939
941CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
942 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
943}
944
946CombineTo(SDNode *N, SDValue Res, bool AddTo) {
947 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
948}
949
951CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
952 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
953}
954
957 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
958}
959
962 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
963}
964
965//===----------------------------------------------------------------------===//
966// Helper Functions
967//===----------------------------------------------------------------------===//
968
969void DAGCombiner::deleteAndRecombine(SDNode *N) {
970 removeFromWorklist(N);
971
972 // If the operands of this node are only used by the node, they will now be
973 // dead. Make sure to re-visit them and recursively delete dead nodes.
974 for (const SDValue &Op : N->ops())
975 // For an operand generating multiple values, one of the values may
976 // become dead allowing further simplification (e.g. split index
977 // arithmetic from an indexed load).
978 if (Op->hasOneUse() || Op->getNumValues() > 1)
979 AddToWorklist(Op.getNode());
980
981 DAG.DeleteNode(N);
982}
983
984// APInts must be the same size for most operations, this helper
985// function zero extends the shorter of the pair so that they match.
986// We provide an Offset so that we can create bitwidths that won't overflow.
987static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
988 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
989 LHS = LHS.zext(Bits);
990 RHS = RHS.zext(Bits);
991}
992
993// Return true if this node is a setcc, or is a select_cc
994// that selects between the target values used for true and false, making it
995// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
996// the appropriate nodes based on the type of node we are checking. This
997// simplifies life a bit for the callers.
998bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
999 SDValue &CC, bool MatchStrict) const {
1000 if (N.getOpcode() == ISD::SETCC) {
1001 LHS = N.getOperand(0);
1002 RHS = N.getOperand(1);
1003 CC = N.getOperand(2);
1004 return true;
1005 }
1006
1007 if (MatchStrict &&
1008 (N.getOpcode() == ISD::STRICT_FSETCC ||
1009 N.getOpcode() == ISD::STRICT_FSETCCS)) {
1010 LHS = N.getOperand(1);
1011 RHS = N.getOperand(2);
1012 CC = N.getOperand(3);
1013 return true;
1014 }
1015
1016 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1017 !TLI.isConstFalseVal(N.getOperand(3)))
1018 return false;
1019
1020 if (TLI.getBooleanContents(N.getValueType()) ==
1022 return false;
1023
1024 LHS = N.getOperand(0);
1025 RHS = N.getOperand(1);
1026 CC = N.getOperand(4);
1027 return true;
1028}
1029
1030/// Return true if this is a SetCC-equivalent operation with only one use.
1031/// If this is true, it allows the users to invert the operation for free when
1032/// it is profitable to do so.
1033bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1034 SDValue N0, N1, N2;
1035 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1036 return true;
1037 return false;
1038}
1039
1041 if (!ScalarTy.isSimple())
1042 return false;
1043
1044 uint64_t MaskForTy = 0ULL;
1045 switch (ScalarTy.getSimpleVT().SimpleTy) {
1046 case MVT::i8:
1047 MaskForTy = 0xFFULL;
1048 break;
1049 case MVT::i16:
1050 MaskForTy = 0xFFFFULL;
1051 break;
1052 case MVT::i32:
1053 MaskForTy = 0xFFFFFFFFULL;
1054 break;
1055 default:
1056 return false;
1057 break;
1058 }
1059
1060 APInt Val;
1061 if (ISD::isConstantSplatVector(N, Val))
1062 return Val.getLimitedValue() == MaskForTy;
1063
1064 return false;
1065}
1066
1067// Determines if it is a constant integer or a splat/build vector of constant
1068// integers (and undefs).
1069// Do not permit build vector implicit truncation unless AllowTruncation is set.
1070static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
1071 bool AllowTruncation = false) {
1073 return !(Const->isOpaque() && NoOpaques);
1074 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1075 return false;
1076 unsigned BitWidth = N.getScalarValueSizeInBits();
1077 for (const SDValue &Op : N->op_values()) {
1078 if (Op.isUndef())
1079 continue;
1081 if (!Const || (Const->isOpaque() && NoOpaques))
1082 return false;
1083 // When AllowTruncation is true, allow constants that have been promoted
1084 // during type legalization as long as the value fits in the target type.
1085 if ((AllowTruncation &&
1086 Const->getAPIntValue().getActiveBits() > BitWidth) ||
1087 (!AllowTruncation && Const->getAPIntValue().getBitWidth() != BitWidth))
1088 return false;
1089 }
1090 return true;
1091}
1092
1093// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1094// undef's.
1095static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1096 if (V.getOpcode() != ISD::BUILD_VECTOR)
1097 return false;
1098 return isConstantOrConstantVector(V, NoOpaques) ||
1100}
1101
1102// Determine if this an indexed load with an opaque target constant index.
1103static bool canSplitIdx(LoadSDNode *LD) {
1104 return MaySplitLoadIndex &&
1105 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1106 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1107}
1108
1109bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1110 const SDLoc &DL,
1111 SDNode *N,
1112 SDValue N0,
1113 SDValue N1) {
1114 // Currently this only tries to ensure we don't undo the GEP splits done by
1115 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1116 // we check if the following transformation would be problematic:
1117 // (load/store (add, (add, x, offset1), offset2)) ->
1118 // (load/store (add, x, offset1+offset2)).
1119
1120 // (load/store (add, (add, x, y), offset2)) ->
1121 // (load/store (add, (add, x, offset2), y)).
1122
1123 if (!N0.isAnyAdd())
1124 return false;
1125
1126 // Check for vscale addressing modes.
1127 // (load/store (add/sub (add x, y), vscale))
1128 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1129 // (load/store (add/sub (add x, y), (mul vscale, C)))
1130 if ((N1.getOpcode() == ISD::VSCALE ||
1131 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1132 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1134 N1.getValueType().getFixedSizeInBits() <= 64) {
1135 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1136 ? N1.getConstantOperandVal(0)
1137 : (N1.getOperand(0).getConstantOperandVal(0) *
1138 (N1.getOpcode() == ISD::SHL
1139 ? (1LL << N1.getConstantOperandVal(1))
1140 : N1.getConstantOperandVal(1)));
1141 if (Opc == ISD::SUB)
1142 ScalableOffset = -ScalableOffset;
1143 if (all_of(N->users(), [&](SDNode *Node) {
1144 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1145 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1146 TargetLoweringBase::AddrMode AM;
1147 AM.HasBaseReg = true;
1148 AM.ScalableOffset = ScalableOffset;
1149 EVT VT = LoadStore->getMemoryVT();
1150 unsigned AS = LoadStore->getAddressSpace();
1151 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1152 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1153 AS);
1154 }
1155 return false;
1156 }))
1157 return true;
1158 }
1159
1160 if (Opc != ISD::ADD && Opc != ISD::PTRADD)
1161 return false;
1162
1163 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1164 if (!C2)
1165 return false;
1166
1167 const APInt &C2APIntVal = C2->getAPIntValue();
1168 if (C2APIntVal.getSignificantBits() > 64)
1169 return false;
1170
1171 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1172 if (N0.hasOneUse())
1173 return false;
1174
1175 const APInt &C1APIntVal = C1->getAPIntValue();
1176 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1177 if (CombinedValueIntVal.getSignificantBits() > 64)
1178 return false;
1179 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1180
1181 for (SDNode *Node : N->users()) {
1182 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1183 // Is x[offset2] already not a legal addressing mode? If so then
1184 // reassociating the constants breaks nothing (we test offset2 because
1185 // that's the one we hope to fold into the load or store).
1186 TargetLoweringBase::AddrMode AM;
1187 AM.HasBaseReg = true;
1188 AM.BaseOffs = C2APIntVal.getSExtValue();
1189 EVT VT = LoadStore->getMemoryVT();
1190 unsigned AS = LoadStore->getAddressSpace();
1191 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1192 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1193 continue;
1194
1195 // Would x[offset1+offset2] still be a legal addressing mode?
1196 AM.BaseOffs = CombinedValue;
1197 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1198 return true;
1199 }
1200 }
1201 } else {
1202 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1203 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1204 return false;
1205
1206 for (SDNode *Node : N->users()) {
1207 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1208 if (!LoadStore)
1209 return false;
1210
1211 // Is x[offset2] a legal addressing mode? If so then
1212 // reassociating the constants breaks address pattern
1213 TargetLoweringBase::AddrMode AM;
1214 AM.HasBaseReg = true;
1215 AM.BaseOffs = C2APIntVal.getSExtValue();
1216 EVT VT = LoadStore->getMemoryVT();
1217 unsigned AS = LoadStore->getAddressSpace();
1218 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1219 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1220 return false;
1221 }
1222 return true;
1223 }
1224
1225 return false;
1226}
1227
1228/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1229/// \p N0 is the same kind of operation as \p Opc.
1230SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1231 SDValue N0, SDValue N1,
1232 SDNodeFlags Flags) {
1233 EVT VT = N0.getValueType();
1234
1235 if (N0.getOpcode() != Opc)
1236 return SDValue();
1237
1238 SDValue N00 = N0.getOperand(0);
1239 SDValue N01 = N0.getOperand(1);
1240
1242 SDNodeFlags NewFlags;
1243 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1244 Flags.hasNoUnsignedWrap())
1245 NewFlags |= SDNodeFlags::NoUnsignedWrap;
1246
1248 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1249 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) {
1250 NewFlags.setDisjoint(Flags.hasDisjoint() &&
1251 N0->getFlags().hasDisjoint());
1252 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1253 }
1254 return SDValue();
1255 }
1256 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1257 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1258 // iff (op x, c1) has one use
1259 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1260 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1261 }
1262 }
1263
1264 // Check for repeated operand logic simplifications.
1265 if (Opc == ISD::AND || Opc == ISD::OR) {
1266 // (N00 & N01) & N00 --> N00 & N01
1267 // (N00 & N01) & N01 --> N00 & N01
1268 // (N00 | N01) | N00 --> N00 | N01
1269 // (N00 | N01) | N01 --> N00 | N01
1270 if (N1 == N00 || N1 == N01)
1271 return N0;
1272 }
1273 if (Opc == ISD::XOR) {
1274 // (N00 ^ N01) ^ N00 --> N01
1275 if (N1 == N00)
1276 return N01;
1277 // (N00 ^ N01) ^ N01 --> N00
1278 if (N1 == N01)
1279 return N00;
1280 }
1281
1282 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1283 if (N1 != N01) {
1284 // Reassociate if (op N00, N1) already exist
1285 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1286 // if Op (Op N00, N1), N01 already exist
1287 // we need to stop reassciate to avoid dead loop
1288 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1289 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1290 }
1291 }
1292
1293 if (N1 != N00) {
1294 // Reassociate if (op N01, N1) already exist
1295 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1296 // if Op (Op N01, N1), N00 already exist
1297 // we need to stop reassciate to avoid dead loop
1298 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1299 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1300 }
1301 }
1302
1303 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1304 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1305 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1306 // comparisons with the same predicate. This enables optimizations as the
1307 // following one:
1308 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1309 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1310 if (Opc == ISD::AND || Opc == ISD::OR) {
1311 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1312 N01->getOpcode() == ISD::SETCC) {
1313 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1314 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1315 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1316 if (CC1 == CC00 && CC1 != CC01) {
1317 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1318 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1319 }
1320 if (CC1 == CC01 && CC1 != CC00) {
1321 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1322 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1323 }
1324 }
1325 }
1326 }
1327
1328 return SDValue();
1329}
1330
1331/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1332/// same kind of operation as \p Opc.
1333SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1334 SDValue N1, SDNodeFlags Flags) {
1335 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1336
1337 // Floating-point reassociation is not allowed without loose FP math.
1338 if (N0.getValueType().isFloatingPoint() ||
1340 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1341 return SDValue();
1342
1343 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1344 return Combined;
1345 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1346 return Combined;
1347 return SDValue();
1348}
1349
1350// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1351// Note that we only expect Flags to be passed from FP operations. For integer
1352// operations they need to be dropped.
1353SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1354 const SDLoc &DL, EVT VT, SDValue N0,
1355 SDValue N1, SDNodeFlags Flags) {
1356 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1357 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1358 N0->hasOneUse() && N1->hasOneUse() &&
1360 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1361 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1362 return DAG.getNode(RedOpc, DL, VT,
1363 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1364 N0.getOperand(0), N1.getOperand(0)));
1365 }
1366
1367 // Reassociate op(op(vecreduce(a), b), op(vecreduce(c), d)) into
1368 // op(vecreduce(op(a, c)), op(b, d)), to combine the reductions into a
1369 // single node.
1370 SDValue A, B, C, D, RedA, RedB;
1371 if (sd_match(N0, m_OneUse(m_c_BinOp(
1372 Opc,
1373 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(A))),
1374 m_Value(RedA)),
1375 m_Value(B)))) &&
1377 Opc,
1378 m_AllOf(m_OneUse(m_UnaryOp(RedOpc, m_Value(C))),
1379 m_Value(RedB)),
1380 m_Value(D)))) &&
1381 !sd_match(B, m_UnaryOp(RedOpc, m_Value())) &&
1382 !sd_match(D, m_UnaryOp(RedOpc, m_Value())) &&
1383 A.getValueType() == C.getValueType() &&
1384 hasOperation(Opc, A.getValueType()) &&
1385 TLI.shouldReassociateReduction(RedOpc, VT)) {
1386 if ((Opc == ISD::FADD || Opc == ISD::FMUL) &&
1387 (!N0->getFlags().hasAllowReassociation() ||
1389 !RedA->getFlags().hasAllowReassociation() ||
1390 !RedB->getFlags().hasAllowReassociation()))
1391 return SDValue();
1392 SelectionDAG::FlagInserter FlagsInserter(
1393 DAG, Flags & N0->getFlags() & N1->getFlags() & RedA->getFlags() &
1394 RedB->getFlags());
1395 SDValue Op = DAG.getNode(Opc, DL, A.getValueType(), A, C);
1396 SDValue Red = DAG.getNode(RedOpc, DL, VT, Op);
1397 SDValue Op2 = DAG.getNode(Opc, DL, VT, B, D);
1398 return DAG.getNode(Opc, DL, VT, Red, Op2);
1399 }
1400 return SDValue();
1401}
1402
1403SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1404 bool AddTo) {
1405 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1406 ++NodesCombined;
1407 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1408 To[0].dump(&DAG);
1409 dbgs() << " and " << NumTo - 1 << " other values\n");
1410 for (unsigned i = 0, e = NumTo; i != e; ++i)
1411 assert((!To[i].getNode() ||
1412 N->getValueType(i) == To[i].getValueType()) &&
1413 "Cannot combine value to value of different type!");
1414
1415 WorklistRemover DeadNodes(*this);
1416 DAG.ReplaceAllUsesWith(N, To);
1417 if (AddTo) {
1418 // Push the new nodes and any users onto the worklist
1419 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1420 if (To[i].getNode())
1421 AddToWorklistWithUsers(To[i].getNode());
1422 }
1423 }
1424
1425 // Finally, if the node is now dead, remove it from the graph. The node
1426 // may not be dead if the replacement process recursively simplified to
1427 // something else needing this node.
1428 if (N->use_empty())
1429 deleteAndRecombine(N);
1430 return SDValue(N, 0);
1431}
1432
1433void DAGCombiner::
1434CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1435 // Replace the old value with the new one.
1436 ++NodesCombined;
1437 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1438 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1439
1440 // Replace all uses.
1441 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1442
1443 // Push the new node and any (possibly new) users onto the worklist.
1444 AddToWorklistWithUsers(TLO.New.getNode());
1445
1446 // Finally, if the node is now dead, remove it from the graph.
1447 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1448}
1449
1450/// Check the specified integer node value to see if it can be simplified or if
1451/// things it uses can be simplified by bit propagation. If so, return true.
1452bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1453 const APInt &DemandedElts,
1454 bool AssumeSingleUse) {
1455 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1456 KnownBits Known;
1457 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1458 AssumeSingleUse))
1459 return false;
1460
1461 // Revisit the node.
1462 AddToWorklist(Op.getNode());
1463
1464 CommitTargetLoweringOpt(TLO);
1465 return true;
1466}
1467
1468/// Check the specified vector node value to see if it can be simplified or
1469/// if things it uses can be simplified as it only uses some of the elements.
1470/// If so, return true.
1471bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1472 const APInt &DemandedElts,
1473 bool AssumeSingleUse) {
1474 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1475 APInt KnownUndef, KnownZero;
1476 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1477 TLO, 0, AssumeSingleUse))
1478 return false;
1479
1480 // Revisit the node.
1481 AddToWorklist(Op.getNode());
1482
1483 CommitTargetLoweringOpt(TLO);
1484 return true;
1485}
1486
1487void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1488 SDLoc DL(Load);
1489 EVT VT = Load->getValueType(0);
1490 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1491
1492 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1493 Trunc.dump(&DAG); dbgs() << '\n');
1494
1495 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1496 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1497
1498 AddToWorklist(Trunc.getNode());
1499 recursivelyDeleteUnusedNodes(Load);
1500}
1501
1502SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1503 Replace = false;
1504 SDLoc DL(Op);
1505 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1506 LoadSDNode *LD = cast<LoadSDNode>(Op);
1507 EVT MemVT = LD->getMemoryVT();
1509 : LD->getExtensionType();
1510 Replace = true;
1511 return DAG.getExtLoad(ExtType, DL, PVT,
1512 LD->getChain(), LD->getBasePtr(),
1513 MemVT, LD->getMemOperand());
1514 }
1515
1516 unsigned Opc = Op.getOpcode();
1517 switch (Opc) {
1518 default: break;
1519 case ISD::AssertSext:
1520 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1521 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1522 break;
1523 case ISD::AssertZext:
1524 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1525 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1526 break;
1527 case ISD::Constant: {
1528 unsigned ExtOpc =
1529 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1530 return DAG.getNode(ExtOpc, DL, PVT, Op);
1531 }
1532 }
1533
1534 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1535 return SDValue();
1536 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1537}
1538
1539SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1541 return SDValue();
1542 EVT OldVT = Op.getValueType();
1543 SDLoc DL(Op);
1544 bool Replace = false;
1545 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1546 if (!NewOp.getNode())
1547 return SDValue();
1548 AddToWorklist(NewOp.getNode());
1549
1550 if (Replace)
1551 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1552 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1553 DAG.getValueType(OldVT));
1554}
1555
1556SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1557 EVT OldVT = Op.getValueType();
1558 SDLoc DL(Op);
1559 bool Replace = false;
1560 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1561 if (!NewOp.getNode())
1562 return SDValue();
1563 AddToWorklist(NewOp.getNode());
1564
1565 if (Replace)
1566 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1567 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1568}
1569
1570/// Promote the specified integer binary operation if the target indicates it is
1571/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1572/// i32 since i16 instructions are longer.
1573SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1574 if (!LegalOperations)
1575 return SDValue();
1576
1577 EVT VT = Op.getValueType();
1578 if (VT.isVector() || !VT.isInteger())
1579 return SDValue();
1580
1581 // If operation type is 'undesirable', e.g. i16 on x86, consider
1582 // promoting it.
1583 unsigned Opc = Op.getOpcode();
1584 if (TLI.isTypeDesirableForOp(Opc, VT))
1585 return SDValue();
1586
1587 EVT PVT = VT;
1588 // Consult target whether it is a good idea to promote this operation and
1589 // what's the right type to promote it to.
1590 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1591 assert(PVT != VT && "Don't know what type to promote to!");
1592
1593 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1594
1595 bool Replace0 = false;
1596 SDValue N0 = Op.getOperand(0);
1597 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1598
1599 bool Replace1 = false;
1600 SDValue N1 = Op.getOperand(1);
1601 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1602 SDLoc DL(Op);
1603
1604 SDValue RV =
1605 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1606
1607 // We are always replacing N0/N1's use in N and only need additional
1608 // replacements if there are additional uses.
1609 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1610 // (SDValue) here because the node may reference multiple values
1611 // (for example, the chain value of a load node).
1612 Replace0 &= !N0->hasOneUse();
1613 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1614
1615 // Combine Op here so it is preserved past replacements.
1616 CombineTo(Op.getNode(), RV);
1617
1618 // If operands have a use ordering, make sure we deal with
1619 // predecessor first.
1620 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1621 std::swap(N0, N1);
1622 std::swap(NN0, NN1);
1623 }
1624
1625 if (Replace0) {
1626 AddToWorklist(NN0.getNode());
1627 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1628 }
1629 if (Replace1) {
1630 AddToWorklist(NN1.getNode());
1631 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1632 }
1633 return Op;
1634 }
1635 return SDValue();
1636}
1637
1638/// Promote the specified integer shift operation if the target indicates it is
1639/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1640/// i32 since i16 instructions are longer.
1641SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1642 if (!LegalOperations)
1643 return SDValue();
1644
1645 EVT VT = Op.getValueType();
1646 if (VT.isVector() || !VT.isInteger())
1647 return SDValue();
1648
1649 // If operation type is 'undesirable', e.g. i16 on x86, consider
1650 // promoting it.
1651 unsigned Opc = Op.getOpcode();
1652 if (TLI.isTypeDesirableForOp(Opc, VT))
1653 return SDValue();
1654
1655 EVT PVT = VT;
1656 // Consult target whether it is a good idea to promote this operation and
1657 // what's the right type to promote it to.
1658 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1659 assert(PVT != VT && "Don't know what type to promote to!");
1660
1661 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1662
1663 bool Replace = false;
1664 SDValue N0 = Op.getOperand(0);
1665 if (Opc == ISD::SRA)
1666 N0 = SExtPromoteOperand(N0, PVT);
1667 else if (Opc == ISD::SRL)
1668 N0 = ZExtPromoteOperand(N0, PVT);
1669 else
1670 N0 = PromoteOperand(N0, PVT, Replace);
1671
1672 if (!N0.getNode())
1673 return SDValue();
1674
1675 SDLoc DL(Op);
1676 SDValue N1 = Op.getOperand(1);
1677 SDValue RV =
1678 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1679
1680 if (Replace)
1681 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1682
1683 // Deal with Op being deleted.
1684 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1685 return RV;
1686 }
1687 return SDValue();
1688}
1689
1690SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1691 if (!LegalOperations)
1692 return SDValue();
1693
1694 EVT VT = Op.getValueType();
1695 if (VT.isVector() || !VT.isInteger())
1696 return SDValue();
1697
1698 // If operation type is 'undesirable', e.g. i16 on x86, consider
1699 // promoting it.
1700 unsigned Opc = Op.getOpcode();
1701 if (TLI.isTypeDesirableForOp(Opc, VT))
1702 return SDValue();
1703
1704 EVT PVT = VT;
1705 // Consult target whether it is a good idea to promote this operation and
1706 // what's the right type to promote it to.
1707 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1708 assert(PVT != VT && "Don't know what type to promote to!");
1709 // fold (aext (aext x)) -> (aext x)
1710 // fold (aext (zext x)) -> (zext x)
1711 // fold (aext (sext x)) -> (sext x)
1712 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1713 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1714 }
1715 return SDValue();
1716}
1717
1718bool DAGCombiner::PromoteLoad(SDValue Op) {
1719 if (!LegalOperations)
1720 return false;
1721
1722 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1723 return false;
1724
1725 EVT VT = Op.getValueType();
1726 if (VT.isVector() || !VT.isInteger())
1727 return false;
1728
1729 // If operation type is 'undesirable', e.g. i16 on x86, consider
1730 // promoting it.
1731 unsigned Opc = Op.getOpcode();
1732 if (TLI.isTypeDesirableForOp(Opc, VT))
1733 return false;
1734
1735 EVT PVT = VT;
1736 // Consult target whether it is a good idea to promote this operation and
1737 // what's the right type to promote it to.
1738 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1739 assert(PVT != VT && "Don't know what type to promote to!");
1740
1741 SDLoc DL(Op);
1742 SDNode *N = Op.getNode();
1743 LoadSDNode *LD = cast<LoadSDNode>(N);
1744 EVT MemVT = LD->getMemoryVT();
1746 : LD->getExtensionType();
1747 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1748 LD->getChain(), LD->getBasePtr(),
1749 MemVT, LD->getMemOperand());
1750 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1751
1752 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1753 Result.dump(&DAG); dbgs() << '\n');
1754
1755 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1756 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1757
1758 AddToWorklist(Result.getNode());
1759 recursivelyDeleteUnusedNodes(N);
1760 return true;
1761 }
1762
1763 return false;
1764}
1765
1766/// Recursively delete a node which has no uses and any operands for
1767/// which it is the only use.
1768///
1769/// Note that this both deletes the nodes and removes them from the worklist.
1770/// It also adds any nodes who have had a user deleted to the worklist as they
1771/// may now have only one use and subject to other combines.
1772bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1773 if (!N->use_empty())
1774 return false;
1775
1776 SmallSetVector<SDNode *, 16> Nodes;
1777 Nodes.insert(N);
1778 do {
1779 N = Nodes.pop_back_val();
1780 if (!N)
1781 continue;
1782
1783 if (N->use_empty()) {
1784 for (const SDValue &ChildN : N->op_values())
1785 Nodes.insert(ChildN.getNode());
1786
1787 removeFromWorklist(N);
1788 DAG.DeleteNode(N);
1789 } else {
1790 AddToWorklist(N);
1791 }
1792 } while (!Nodes.empty());
1793 return true;
1794}
1795
1796//===----------------------------------------------------------------------===//
1797// Main DAG Combiner implementation
1798//===----------------------------------------------------------------------===//
1799
1800void DAGCombiner::Run(CombineLevel AtLevel) {
1801 // set the instance variables, so that the various visit routines may use it.
1802 Level = AtLevel;
1803 LegalDAG = Level >= AfterLegalizeDAG;
1804 LegalOperations = Level >= AfterLegalizeVectorOps;
1805 LegalTypes = Level >= AfterLegalizeTypes;
1806
1807 WorklistInserter AddNodes(*this);
1808
1809 // Add all the dag nodes to the worklist.
1810 //
1811 // Note: All nodes are not added to PruningList here, this is because the only
1812 // nodes which can be deleted are those which have no uses and all other nodes
1813 // which would otherwise be added to the worklist by the first call to
1814 // getNextWorklistEntry are already present in it.
1815 for (SDNode &Node : DAG.allnodes())
1816 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1817
1818 // Create a dummy node (which is not added to allnodes), that adds a reference
1819 // to the root node, preventing it from being deleted, and tracking any
1820 // changes of the root.
1821 HandleSDNode Dummy(DAG.getRoot());
1822
1823 // While we have a valid worklist entry node, try to combine it.
1824 while (SDNode *N = getNextWorklistEntry()) {
1825 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1826 // N is deleted from the DAG, since they too may now be dead or may have a
1827 // reduced number of uses, allowing other xforms.
1828 if (recursivelyDeleteUnusedNodes(N))
1829 continue;
1830
1831 WorklistRemover DeadNodes(*this);
1832
1833 // If this combine is running after legalizing the DAG, re-legalize any
1834 // nodes pulled off the worklist.
1835 if (LegalDAG) {
1836 SmallSetVector<SDNode *, 16> UpdatedNodes;
1837 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1838
1839 for (SDNode *LN : UpdatedNodes)
1840 AddToWorklistWithUsers(LN);
1841
1842 if (!NIsValid)
1843 continue;
1844 }
1845
1846 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1847
1848 // Add any operands of the new node which have not yet been combined to the
1849 // worklist as well. getNextWorklistEntry flags nodes that have been
1850 // combined before. Because the worklist uniques things already, this won't
1851 // repeatedly process the same operand.
1852 for (const SDValue &ChildN : N->op_values())
1853 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1854 /*SkipIfCombinedBefore=*/true);
1855
1856 SDValue RV = combine(N);
1857
1858 if (!RV.getNode())
1859 continue;
1860
1861 ++NodesCombined;
1862
1863 // Invalidate cached info.
1864 ChainsWithoutMergeableStores.clear();
1865
1866 // If we get back the same node we passed in, rather than a new node or
1867 // zero, we know that the node must have defined multiple values and
1868 // CombineTo was used. Since CombineTo takes care of the worklist
1869 // mechanics for us, we have no work to do in this case.
1870 if (RV.getNode() == N)
1871 continue;
1872
1873 assert(N->getOpcode() != ISD::DELETED_NODE &&
1874 RV.getOpcode() != ISD::DELETED_NODE &&
1875 "Node was deleted but visit returned new node!");
1876
1877 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1878
1879 if (N->getNumValues() == RV->getNumValues())
1880 DAG.ReplaceAllUsesWith(N, RV.getNode());
1881 else {
1882 assert(N->getValueType(0) == RV.getValueType() &&
1883 N->getNumValues() == 1 && "Type mismatch");
1884 DAG.ReplaceAllUsesWith(N, &RV);
1885 }
1886
1887 // Push the new node and any users onto the worklist. Omit this if the
1888 // new node is the EntryToken (e.g. if a store managed to get optimized
1889 // out), because re-visiting the EntryToken and its users will not uncover
1890 // any additional opportunities, but there may be a large number of such
1891 // users, potentially causing compile time explosion.
1892 if (RV.getOpcode() != ISD::EntryToken)
1893 AddToWorklistWithUsers(RV.getNode());
1894
1895 // Finally, if the node is now dead, remove it from the graph. The node
1896 // may not be dead if the replacement process recursively simplified to
1897 // something else needing this node. This will also take care of adding any
1898 // operands which have lost a user to the worklist.
1899 recursivelyDeleteUnusedNodes(N);
1900 }
1901
1902 // If the root changed (e.g. it was a dead load, update the root).
1903 DAG.setRoot(Dummy.getValue());
1904 DAG.RemoveDeadNodes();
1905}
1906
1907SDValue DAGCombiner::visit(SDNode *N) {
1908 // clang-format off
1909 switch (N->getOpcode()) {
1910 default: break;
1911 case ISD::TokenFactor: return visitTokenFactor(N);
1912 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1913 case ISD::ADD: return visitADD(N);
1914 case ISD::PTRADD: return visitPTRADD(N);
1915 case ISD::SUB: return visitSUB(N);
1916 case ISD::SADDSAT:
1917 case ISD::UADDSAT: return visitADDSAT(N);
1918 case ISD::SSUBSAT:
1919 case ISD::USUBSAT: return visitSUBSAT(N);
1920 case ISD::ADDC: return visitADDC(N);
1921 case ISD::SADDO:
1922 case ISD::UADDO: return visitADDO(N);
1923 case ISD::SUBC: return visitSUBC(N);
1924 case ISD::SSUBO:
1925 case ISD::USUBO: return visitSUBO(N);
1926 case ISD::ADDE: return visitADDE(N);
1927 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1928 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1929 case ISD::SUBE: return visitSUBE(N);
1930 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1931 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1932 case ISD::SMULFIX:
1933 case ISD::SMULFIXSAT:
1934 case ISD::UMULFIX:
1935 case ISD::UMULFIXSAT: return visitMULFIX(N);
1936 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1937 case ISD::SDIV: return visitSDIV(N);
1938 case ISD::UDIV: return visitUDIV(N);
1939 case ISD::SREM:
1940 case ISD::UREM: return visitREM(N);
1941 case ISD::MULHU: return visitMULHU(N);
1942 case ISD::MULHS: return visitMULHS(N);
1943 case ISD::AVGFLOORS:
1944 case ISD::AVGFLOORU:
1945 case ISD::AVGCEILS:
1946 case ISD::AVGCEILU: return visitAVG(N);
1947 case ISD::ABDS:
1948 case ISD::ABDU: return visitABD(N);
1949 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1950 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1951 case ISD::SMULO:
1952 case ISD::UMULO: return visitMULO(N);
1953 case ISD::SMIN:
1954 case ISD::SMAX:
1955 case ISD::UMIN:
1956 case ISD::UMAX: return visitIMINMAX(N);
1957 case ISD::AND: return visitAND(N);
1958 case ISD::OR: return visitOR(N);
1959 case ISD::XOR: return visitXOR(N);
1960 case ISD::SHL: return visitSHL(N);
1961 case ISD::SRA: return visitSRA(N);
1962 case ISD::SRL: return visitSRL(N);
1963 case ISD::ROTR:
1964 case ISD::ROTL: return visitRotate(N);
1965 case ISD::FSHL:
1966 case ISD::FSHR: return visitFunnelShift(N);
1967 case ISD::SSHLSAT:
1968 case ISD::USHLSAT: return visitSHLSAT(N);
1969 case ISD::ABS: return visitABS(N);
1970 case ISD::BSWAP: return visitBSWAP(N);
1971 case ISD::BITREVERSE: return visitBITREVERSE(N);
1972 case ISD::CTLZ: return visitCTLZ(N);
1973 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1974 case ISD::CTTZ: return visitCTTZ(N);
1975 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1976 case ISD::CTPOP: return visitCTPOP(N);
1977 case ISD::SELECT: return visitSELECT(N);
1978 case ISD::VSELECT: return visitVSELECT(N);
1979 case ISD::SELECT_CC: return visitSELECT_CC(N);
1980 case ISD::SETCC: return visitSETCC(N);
1981 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1982 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1983 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1984 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1985 case ISD::AssertSext:
1986 case ISD::AssertZext: return visitAssertExt(N);
1987 case ISD::AssertAlign: return visitAssertAlign(N);
1988 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1991 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1992 case ISD::TRUNCATE: return visitTRUNCATE(N);
1993 case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
1994 case ISD::BITCAST: return visitBITCAST(N);
1995 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1996 case ISD::FADD: return visitFADD(N);
1997 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1998 case ISD::FSUB: return visitFSUB(N);
1999 case ISD::FMUL: return visitFMUL(N);
2000 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
2001 case ISD::FMAD: return visitFMAD(N);
2002 case ISD::FMULADD: return visitFMULADD(N);
2003 case ISD::FDIV: return visitFDIV(N);
2004 case ISD::FREM: return visitFREM(N);
2005 case ISD::FSQRT: return visitFSQRT(N);
2006 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
2007 case ISD::FPOW: return visitFPOW(N);
2008 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
2009 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
2010 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
2011 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
2012 case ISD::LROUND:
2013 case ISD::LLROUND:
2014 case ISD::LRINT:
2015 case ISD::LLRINT: return visitXROUND(N);
2016 case ISD::FP_ROUND: return visitFP_ROUND(N);
2017 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
2018 case ISD::FNEG: return visitFNEG(N);
2019 case ISD::FABS: return visitFABS(N);
2020 case ISD::FFLOOR: return visitFFLOOR(N);
2021 case ISD::FMINNUM:
2022 case ISD::FMAXNUM:
2023 case ISD::FMINIMUM:
2024 case ISD::FMAXIMUM:
2025 case ISD::FMINIMUMNUM:
2026 case ISD::FMAXIMUMNUM: return visitFMinMax(N);
2027 case ISD::FCEIL: return visitFCEIL(N);
2028 case ISD::FTRUNC: return visitFTRUNC(N);
2029 case ISD::FFREXP: return visitFFREXP(N);
2030 case ISD::BRCOND: return visitBRCOND(N);
2031 case ISD::BR_CC: return visitBR_CC(N);
2032 case ISD::LOAD: return visitLOAD(N);
2033 case ISD::STORE: return visitSTORE(N);
2034 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
2035 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
2036 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2037 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
2038 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
2039 case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N);
2040 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
2041 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
2042 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
2043 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
2044 case ISD::MGATHER: return visitMGATHER(N);
2045 case ISD::MLOAD: return visitMLOAD(N);
2046 case ISD::MSCATTER: return visitMSCATTER(N);
2047 case ISD::MSTORE: return visitMSTORE(N);
2048 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return visitMHISTOGRAM(N);
2053 return visitPARTIAL_REDUCE_MLA(N);
2054 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
2055 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
2056 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
2057 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
2058 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
2059 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
2060 case ISD::FREEZE: return visitFREEZE(N);
2061 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
2062 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
2063 case ISD::FCANONICALIZE: return visitFCANONICALIZE(N);
2066 case ISD::VECREDUCE_ADD:
2067 case ISD::VECREDUCE_MUL:
2068 case ISD::VECREDUCE_AND:
2069 case ISD::VECREDUCE_OR:
2070 case ISD::VECREDUCE_XOR:
2078 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
2079#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2080#include "llvm/IR/VPIntrinsics.def"
2081 return visitVPOp(N);
2082 }
2083 // clang-format on
2084 return SDValue();
2085}
2086
2087SDValue DAGCombiner::combine(SDNode *N) {
2088 if (!DebugCounter::shouldExecute(DAGCombineCounter))
2089 return SDValue();
2090
2091 SDValue RV;
2092 if (!DisableGenericCombines)
2093 RV = visit(N);
2094
2095 // If nothing happened, try a target-specific DAG combine.
2096 if (!RV.getNode()) {
2097 assert(N->getOpcode() != ISD::DELETED_NODE &&
2098 "Node was deleted but visit returned NULL!");
2099
2100 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2101 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2102
2103 // Expose the DAG combiner to the target combiner impls.
2104 TargetLowering::DAGCombinerInfo
2105 DagCombineInfo(DAG, Level, false, this);
2106
2107 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2108 }
2109 }
2110
2111 // If nothing happened still, try promoting the operation.
2112 if (!RV.getNode()) {
2113 switch (N->getOpcode()) {
2114 default: break;
2115 case ISD::ADD:
2116 case ISD::SUB:
2117 case ISD::MUL:
2118 case ISD::AND:
2119 case ISD::OR:
2120 case ISD::XOR:
2121 RV = PromoteIntBinOp(SDValue(N, 0));
2122 break;
2123 case ISD::SHL:
2124 case ISD::SRA:
2125 case ISD::SRL:
2126 RV = PromoteIntShiftOp(SDValue(N, 0));
2127 break;
2128 case ISD::SIGN_EXTEND:
2129 case ISD::ZERO_EXTEND:
2130 case ISD::ANY_EXTEND:
2131 RV = PromoteExtend(SDValue(N, 0));
2132 break;
2133 case ISD::LOAD:
2134 if (PromoteLoad(SDValue(N, 0)))
2135 RV = SDValue(N, 0);
2136 break;
2137 }
2138 }
2139
2140 // If N is a commutative binary node, try to eliminate it if the commuted
2141 // version is already present in the DAG.
2142 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2143 SDValue N0 = N->getOperand(0);
2144 SDValue N1 = N->getOperand(1);
2145
2146 // Constant operands are canonicalized to RHS.
2147 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2148 SDValue Ops[] = {N1, N0};
2149 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2150 N->getFlags());
2151 if (CSENode)
2152 return SDValue(CSENode, 0);
2153 }
2154 }
2155
2156 return RV;
2157}
2158
2159/// Given a node, return its input chain if it has one, otherwise return a null
2160/// sd operand.
2162 if (unsigned NumOps = N->getNumOperands()) {
2163 if (N->getOperand(0).getValueType() == MVT::Other)
2164 return N->getOperand(0);
2165 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2166 return N->getOperand(NumOps-1);
2167 for (unsigned i = 1; i < NumOps-1; ++i)
2168 if (N->getOperand(i).getValueType() == MVT::Other)
2169 return N->getOperand(i);
2170 }
2171 return SDValue();
2172}
2173
2174SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) {
2175 SDValue Operand = N->getOperand(0);
2176 EVT VT = Operand.getValueType();
2177 SDLoc dl(N);
2178
2179 // Canonicalize undef to quiet NaN.
2180 if (Operand.isUndef()) {
2181 APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
2182 return DAG.getConstantFP(CanonicalQNaN, dl, VT);
2183 }
2184 return SDValue();
2185}
2186
2187SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2188 // If N has two operands, where one has an input chain equal to the other,
2189 // the 'other' chain is redundant.
2190 if (N->getNumOperands() == 2) {
2191 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2192 return N->getOperand(0);
2193 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2194 return N->getOperand(1);
2195 }
2196
2197 // Don't simplify token factors if optnone.
2198 if (OptLevel == CodeGenOptLevel::None)
2199 return SDValue();
2200
2201 // Don't simplify the token factor if the node itself has too many operands.
2202 if (N->getNumOperands() > TokenFactorInlineLimit)
2203 return SDValue();
2204
2205 // If the sole user is a token factor, we should make sure we have a
2206 // chance to merge them together. This prevents TF chains from inhibiting
2207 // optimizations.
2208 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor)
2209 AddToWorklist(*(N->user_begin()));
2210
2211 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2212 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2213 SmallPtrSet<SDNode*, 16> SeenOps;
2214 bool Changed = false; // If we should replace this token factor.
2215
2216 // Start out with this token factor.
2217 TFs.push_back(N);
2218
2219 // Iterate through token factors. The TFs grows when new token factors are
2220 // encountered.
2221 for (unsigned i = 0; i < TFs.size(); ++i) {
2222 // Limit number of nodes to inline, to avoid quadratic compile times.
2223 // We have to add the outstanding Token Factors to Ops, otherwise we might
2224 // drop Ops from the resulting Token Factors.
2225 if (Ops.size() > TokenFactorInlineLimit) {
2226 for (unsigned j = i; j < TFs.size(); j++)
2227 Ops.emplace_back(TFs[j], 0);
2228 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2229 // combiner worklist later.
2230 TFs.resize(i);
2231 break;
2232 }
2233
2234 SDNode *TF = TFs[i];
2235 // Check each of the operands.
2236 for (const SDValue &Op : TF->op_values()) {
2237 switch (Op.getOpcode()) {
2238 case ISD::EntryToken:
2239 // Entry tokens don't need to be added to the list. They are
2240 // redundant.
2241 Changed = true;
2242 break;
2243
2244 case ISD::TokenFactor:
2245 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2246 // Queue up for processing.
2247 TFs.push_back(Op.getNode());
2248 Changed = true;
2249 break;
2250 }
2251 [[fallthrough]];
2252
2253 default:
2254 // Only add if it isn't already in the list.
2255 if (SeenOps.insert(Op.getNode()).second)
2256 Ops.push_back(Op);
2257 else
2258 Changed = true;
2259 break;
2260 }
2261 }
2262 }
2263
2264 // Re-visit inlined Token Factors, to clean them up in case they have been
2265 // removed. Skip the first Token Factor, as this is the current node.
2266 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2267 AddToWorklist(TFs[i]);
2268
2269 // Remove Nodes that are chained to another node in the list. Do so
2270 // by walking up chains breath-first stopping when we've seen
2271 // another operand. In general we must climb to the EntryNode, but we can exit
2272 // early if we find all remaining work is associated with just one operand as
2273 // no further pruning is possible.
2274
2275 // List of nodes to search through and original Ops from which they originate.
2277 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2278 SmallPtrSet<SDNode *, 16> SeenChains;
2279 bool DidPruneOps = false;
2280
2281 unsigned NumLeftToConsider = 0;
2282 for (const SDValue &Op : Ops) {
2283 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2284 OpWorkCount.push_back(1);
2285 }
2286
2287 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2288 // If this is an Op, we can remove the op from the list. Remark any
2289 // search associated with it as from the current OpNumber.
2290 if (SeenOps.contains(Op)) {
2291 Changed = true;
2292 DidPruneOps = true;
2293 unsigned OrigOpNumber = 0;
2294 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2295 OrigOpNumber++;
2296 assert((OrigOpNumber != Ops.size()) &&
2297 "expected to find TokenFactor Operand");
2298 // Re-mark worklist from OrigOpNumber to OpNumber
2299 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2300 if (Worklist[i].second == OrigOpNumber) {
2301 Worklist[i].second = OpNumber;
2302 }
2303 }
2304 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2305 OpWorkCount[OrigOpNumber] = 0;
2306 NumLeftToConsider--;
2307 }
2308 // Add if it's a new chain
2309 if (SeenChains.insert(Op).second) {
2310 OpWorkCount[OpNumber]++;
2311 Worklist.push_back(std::make_pair(Op, OpNumber));
2312 }
2313 };
2314
2315 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2316 // We need at least be consider at least 2 Ops to prune.
2317 if (NumLeftToConsider <= 1)
2318 break;
2319 auto CurNode = Worklist[i].first;
2320 auto CurOpNumber = Worklist[i].second;
2321 assert((OpWorkCount[CurOpNumber] > 0) &&
2322 "Node should not appear in worklist");
2323 switch (CurNode->getOpcode()) {
2324 case ISD::EntryToken:
2325 // Hitting EntryToken is the only way for the search to terminate without
2326 // hitting
2327 // another operand's search. Prevent us from marking this operand
2328 // considered.
2329 NumLeftToConsider++;
2330 break;
2331 case ISD::TokenFactor:
2332 for (const SDValue &Op : CurNode->op_values())
2333 AddToWorklist(i, Op.getNode(), CurOpNumber);
2334 break;
2336 case ISD::LIFETIME_END:
2337 case ISD::CopyFromReg:
2338 case ISD::CopyToReg:
2339 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2340 break;
2341 default:
2342 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2343 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2344 break;
2345 }
2346 OpWorkCount[CurOpNumber]--;
2347 if (OpWorkCount[CurOpNumber] == 0)
2348 NumLeftToConsider--;
2349 }
2350
2351 // If we've changed things around then replace token factor.
2352 if (Changed) {
2354 if (Ops.empty()) {
2355 // The entry token is the only possible outcome.
2356 Result = DAG.getEntryNode();
2357 } else {
2358 if (DidPruneOps) {
2359 SmallVector<SDValue, 8> PrunedOps;
2360 //
2361 for (const SDValue &Op : Ops) {
2362 if (SeenChains.count(Op.getNode()) == 0)
2363 PrunedOps.push_back(Op);
2364 }
2365 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2366 } else {
2367 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2368 }
2369 }
2370 return Result;
2371 }
2372 return SDValue();
2373}
2374
2375/// MERGE_VALUES can always be eliminated.
2376SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2377 WorklistRemover DeadNodes(*this);
2378 // Replacing results may cause a different MERGE_VALUES to suddenly
2379 // be CSE'd with N, and carry its uses with it. Iterate until no
2380 // uses remain, to ensure that the node can be safely deleted.
2381 // First add the users of this node to the work list so that they
2382 // can be tried again once they have new operands.
2383 AddUsersToWorklist(N);
2384 do {
2385 // Do as a single replacement to avoid rewalking use lists.
2387 DAG.ReplaceAllUsesWith(N, Ops.data());
2388 } while (!N->use_empty());
2389 deleteAndRecombine(N);
2390 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2391}
2392
2393/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2394/// ConstantSDNode pointer else nullptr.
2397 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2398}
2399
2400// isTruncateOf - If N is a truncate of some other value, return true, record
2401// the value being truncated in Op and which of Op's bits are zero/one in Known.
2402// This function computes KnownBits to avoid a duplicated call to
2403// computeKnownBits in the caller.
2405 KnownBits &Known) {
2406 if (N->getOpcode() == ISD::TRUNCATE) {
2407 Op = N->getOperand(0);
2408 Known = DAG.computeKnownBits(Op);
2409 if (N->getFlags().hasNoUnsignedWrap())
2410 Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
2411 return true;
2412 }
2413
2414 if (N.getValueType().getScalarType() != MVT::i1 ||
2415 !sd_match(
2417 return false;
2418
2419 Known = DAG.computeKnownBits(Op);
2420 return (Known.Zero | 1).isAllOnes();
2421}
2422
2423/// Return true if 'Use' is a load or a store that uses N as its base pointer
2424/// and that N may be folded in the load / store addressing mode.
2426 const TargetLowering &TLI) {
2427 EVT VT;
2428 unsigned AS;
2429
2430 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2431 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2432 return false;
2433 VT = LD->getMemoryVT();
2434 AS = LD->getAddressSpace();
2435 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2436 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2437 return false;
2438 VT = ST->getMemoryVT();
2439 AS = ST->getAddressSpace();
2441 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2442 return false;
2443 VT = LD->getMemoryVT();
2444 AS = LD->getAddressSpace();
2446 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2447 return false;
2448 VT = ST->getMemoryVT();
2449 AS = ST->getAddressSpace();
2450 } else {
2451 return false;
2452 }
2453
2455 if (N->isAnyAdd()) {
2456 AM.HasBaseReg = true;
2458 if (Offset)
2459 // [reg +/- imm]
2460 AM.BaseOffs = Offset->getSExtValue();
2461 else
2462 // [reg +/- reg]
2463 AM.Scale = 1;
2464 } else if (N->getOpcode() == ISD::SUB) {
2465 AM.HasBaseReg = true;
2467 if (Offset)
2468 // [reg +/- imm]
2469 AM.BaseOffs = -Offset->getSExtValue();
2470 else
2471 // [reg +/- reg]
2472 AM.Scale = 1;
2473 } else {
2474 return false;
2475 }
2476
2477 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2478 VT.getTypeForEVT(*DAG.getContext()), AS);
2479}
2480
2481/// This inverts a canonicalization in IR that replaces a variable select arm
2482/// with an identity constant. Codegen improves if we re-use the variable
2483/// operand rather than load a constant. This can also be converted into a
2484/// masked vector operation if the target supports it.
2486 bool ShouldCommuteOperands) {
2487 SDValue N0 = N->getOperand(0);
2488 SDValue N1 = N->getOperand(1);
2489
2490 // Match a select as operand 1. The identity constant that we are looking for
2491 // is only valid as operand 1 of a non-commutative binop.
2492 if (ShouldCommuteOperands)
2493 std::swap(N0, N1);
2494
2495 SDValue Cond, TVal, FVal;
2497 m_Value(FVal)))))
2498 return SDValue();
2499
2500 // We can't hoist all instructions because of immediate UB (not speculatable).
2501 // For example div/rem by zero.
2503 return SDValue();
2504
2505 unsigned SelOpcode = N1.getOpcode();
2506 unsigned Opcode = N->getOpcode();
2507 EVT VT = N->getValueType(0);
2508 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2509
2510 // This transform increases uses of N0, so freeze it to be safe.
2511 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2512 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2513 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo) &&
2514 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2515 FVal)) {
2516 SDValue F0 = DAG.getFreeze(N0);
2517 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2518 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2519 }
2520 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2521 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo) &&
2522 TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0,
2523 TVal)) {
2524 SDValue F0 = DAG.getFreeze(N0);
2525 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2526 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2527 }
2528
2529 return SDValue();
2530}
2531
2532SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2533 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2534 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2535 "Unexpected binary operator");
2536
2537 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2538 return Sel;
2539
2540 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2541 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2542 return Sel;
2543
2544 // Don't do this unless the old select is going away. We want to eliminate the
2545 // binary operator, not replace a binop with a select.
2546 // TODO: Handle ISD::SELECT_CC.
2547 unsigned SelOpNo = 0;
2548 SDValue Sel = BO->getOperand(0);
2549 auto BinOpcode = BO->getOpcode();
2550 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2551 SelOpNo = 1;
2552 Sel = BO->getOperand(1);
2553
2554 // Peek through trunc to shift amount type.
2555 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2556 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2557 // This is valid when the truncated bits of x are already zero.
2558 SDValue Op;
2559 KnownBits Known;
2560 if (isTruncateOf(DAG, Sel, Op, Known) &&
2562 Sel = Op;
2563 }
2564 }
2565
2566 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2567 return SDValue();
2568
2569 SDValue CT = Sel.getOperand(1);
2570 if (!isConstantOrConstantVector(CT, true) &&
2572 return SDValue();
2573
2574 SDValue CF = Sel.getOperand(2);
2575 if (!isConstantOrConstantVector(CF, true) &&
2577 return SDValue();
2578
2579 // Bail out if any constants are opaque because we can't constant fold those.
2580 // The exception is "and" and "or" with either 0 or -1 in which case we can
2581 // propagate non constant operands into select. I.e.:
2582 // and (select Cond, 0, -1), X --> select Cond, 0, X
2583 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2584 bool CanFoldNonConst =
2585 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2588
2589 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2590 if (!CanFoldNonConst &&
2591 !isConstantOrConstantVector(CBO, true) &&
2593 return SDValue();
2594
2595 SDLoc DL(Sel);
2596 SDValue NewCT, NewCF;
2597 EVT VT = BO->getValueType(0);
2598
2599 if (CanFoldNonConst) {
2600 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2601 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2602 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2603 NewCT = CT;
2604 else
2605 NewCT = CBO;
2606
2607 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2608 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2609 NewCF = CF;
2610 else
2611 NewCF = CBO;
2612 } else {
2613 // We have a select-of-constants followed by a binary operator with a
2614 // constant. Eliminate the binop by pulling the constant math into the
2615 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2616 // CBO, CF + CBO
2617 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2618 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2619 if (!NewCT)
2620 return SDValue();
2621
2622 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2623 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2624 if (!NewCF)
2625 return SDValue();
2626 }
2627
2628 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags());
2629}
2630
2632 SelectionDAG &DAG) {
2633 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2634 "Expecting add or sub");
2635
2636 // Match a constant operand and a zext operand for the math instruction:
2637 // add Z, C
2638 // sub C, Z
2639 bool IsAdd = N->getOpcode() == ISD::ADD;
2640 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2641 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2642 auto *CN = dyn_cast<ConstantSDNode>(C);
2643 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2644 return SDValue();
2645
2646 // Match the zext operand as a setcc of a boolean.
2647 if (Z.getOperand(0).getValueType() != MVT::i1)
2648 return SDValue();
2649
2650 // Match the compare as: setcc (X & 1), 0, eq.
2651 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2653 return SDValue();
2654
2655 // We are adding/subtracting a constant and an inverted low bit. Turn that
2656 // into a subtract/add of the low bit with incremented/decremented constant:
2657 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2658 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2659 EVT VT = C.getValueType();
2660 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2661 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2662 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2663 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2664}
2665
2666// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2667SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2668 SDValue N0 = N->getOperand(0);
2669 EVT VT = N0.getValueType();
2670 SDValue A, B;
2671
2672 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2674 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2675 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2676 }
2677 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2679 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2680 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2681 }
2682 return SDValue();
2683}
2684
2685/// Try to fold a pointer arithmetic node.
2686/// This needs to be done separately from normal addition, because pointer
2687/// addition is not commutative.
2688SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2689 SDValue N0 = N->getOperand(0);
2690 SDValue N1 = N->getOperand(1);
2691 EVT PtrVT = N0.getValueType();
2692 EVT IntVT = N1.getValueType();
2693 SDLoc DL(N);
2694
2695 // This is already ensured by an assert in SelectionDAG::getNode(). Several
2696 // combines here depend on this assumption.
2697 assert(PtrVT == IntVT &&
2698 "PTRADD with different operand types is not supported");
2699
2700 // fold (ptradd x, 0) -> x
2701 if (isNullConstant(N1))
2702 return N0;
2703
2704 // fold (ptradd 0, x) -> x
2705 if (PtrVT == IntVT && isNullConstant(N0))
2706 return N1;
2707
2708 if (N0.getOpcode() == ISD::PTRADD &&
2709 !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2710 SDValue X = N0.getOperand(0);
2711 SDValue Y = N0.getOperand(1);
2712 SDValue Z = N1;
2713 bool N0OneUse = N0.hasOneUse();
2714 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2715 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2716
2717 // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2718 // * y is a constant and (ptradd x, y) has one use; or
2719 // * y and z are both constants.
2720 if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2721 // If both additions in the original were NUW, the new ones are as well.
2722 SDNodeFlags Flags =
2723 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2724 SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2725 AddToWorklist(Add.getNode());
2726 // We can't set InBounds even if both original ptradds were InBounds and
2727 // NUW: SDAG usually represents pointers as integers, therefore, the
2728 // matched pattern behaves as if it had implicit casts:
2729 // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds x, y))), z)
2730 // The outer inbounds ptradd might therefore rely on a provenance that x
2731 // does not have.
2732 return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2733 }
2734 }
2735
2736 // The following combines can turn in-bounds pointer arithmetic out of bounds.
2737 // That is problematic for settings like AArch64's CPA, which checks that
2738 // intermediate results of pointer arithmetic remain in bounds. The target
2739 // therefore needs to opt-in to enable them.
2741 DAG.getMachineFunction().getFunction(), PtrVT))
2742 return SDValue();
2743
2744 if (N0.getOpcode() == ISD::PTRADD && isa<ConstantSDNode>(N1)) {
2745 // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2746 // global address GA and constant c, such that c can be folded into GA.
2747 // TODO: Support constant vector splats.
2748 SDValue GAValue = N0.getOperand(0);
2749 if (const GlobalAddressSDNode *GA =
2751 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2752 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2753 // If both additions in the original were NUW, reassociation preserves
2754 // that.
2755 SDNodeFlags Flags =
2756 (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2757 // We can't set InBounds even if both original ptradds were InBounds and
2758 // NUW: SDAG usually represents pointers as integers, therefore, the
2759 // matched pattern behaves as if it had implicit casts:
2760 // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds GA, v))), c)
2761 // The outer inbounds ptradd might therefore rely on a provenance that
2762 // GA does not have.
2763 SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2764 AddToWorklist(Inner.getNode());
2765 return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2766 }
2767 }
2768 }
2769
2770 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2771 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2772 // y is not, and (add y, z) is used only once.
2773 // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2774 // z is not, and (add y, z) is used only once.
2775 // The goal is to move constant offsets to the outermost ptradd, to create
2776 // more opportunities to fold offsets into memory instructions.
2777 // Together with the another combine above, this also implements
2778 // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2779 SDValue X = N0;
2780 SDValue Y = N1.getOperand(0);
2781 SDValue Z = N1.getOperand(1);
2782 bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2783 bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2784
2785 // If both additions in the original were NUW, reassociation preserves that.
2786 SDNodeFlags CommonFlags = N->getFlags() & N1->getFlags();
2787 SDNodeFlags ReassocFlags = CommonFlags & SDNodeFlags::NoUnsignedWrap;
2788 if (CommonFlags.hasNoUnsignedWrap()) {
2789 // If both operations are NUW and the PTRADD is inbounds, the offests are
2790 // both non-negative, so the reassociated PTRADDs are also inbounds.
2791 ReassocFlags |= N->getFlags() & SDNodeFlags::InBounds;
2792 }
2793
2794 if (ZIsConstant != YIsConstant) {
2795 if (YIsConstant)
2796 std::swap(Y, Z);
2797 SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2798 AddToWorklist(Inner.getNode());
2799 return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2800 }
2801 }
2802
2803 // Transform (ptradd a, b) -> (or disjoint a, b) if it is equivalent and if
2804 // that transformation can't block an offset folding at any use of the ptradd.
2805 // This should be done late, after legalization, so that it doesn't block
2806 // other ptradd combines that could enable more offset folding.
2807 if (LegalOperations && DAG.haveNoCommonBitsSet(N0, N1)) {
2808 bool TransformCannotBreakAddrMode = none_of(N->users(), [&](SDNode *User) {
2809 return canFoldInAddressingMode(N, User, DAG, TLI);
2810 });
2811
2812 if (TransformCannotBreakAddrMode)
2813 return DAG.getNode(ISD::OR, DL, PtrVT, N0, N1, SDNodeFlags::Disjoint);
2814 }
2815
2816 return SDValue();
2817}
2818
2819/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2820/// a shift and add with a different constant.
2822 SelectionDAG &DAG) {
2823 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2824 "Expecting add or sub");
2825
2826 // We need a constant operand for the add/sub, and the other operand is a
2827 // logical shift right: add (srl), C or sub C, (srl).
2828 bool IsAdd = N->getOpcode() == ISD::ADD;
2829 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2830 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2831 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2832 ShiftOp.getOpcode() != ISD::SRL)
2833 return SDValue();
2834
2835 // The shift must be of a 'not' value.
2836 SDValue Not = ShiftOp.getOperand(0);
2837 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2838 return SDValue();
2839
2840 // The shift must be moving the sign bit to the least-significant-bit.
2841 EVT VT = ShiftOp.getValueType();
2842 SDValue ShAmt = ShiftOp.getOperand(1);
2843 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2844 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2845 return SDValue();
2846
2847 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2848 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2849 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2850 if (SDValue NewC = DAG.FoldConstantArithmetic(
2851 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2852 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2853 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2854 Not.getOperand(0), ShAmt);
2855 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2856 }
2857
2858 return SDValue();
2859}
2860
2861static bool
2863 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2864 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2865}
2866
2867/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2868/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2869/// are no common bits set in the operands).
2870SDValue DAGCombiner::visitADDLike(SDNode *N) {
2871 SDValue N0 = N->getOperand(0);
2872 SDValue N1 = N->getOperand(1);
2873 EVT VT = N0.getValueType();
2874 SDLoc DL(N);
2875
2876 // fold (add x, undef) -> undef
2877 if (N0.isUndef())
2878 return N0;
2879 if (N1.isUndef())
2880 return N1;
2881
2882 // fold (add c1, c2) -> c1+c2
2883 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2884 return C;
2885
2886 // canonicalize constant to RHS
2889 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2890
2891 if (areBitwiseNotOfEachother(N0, N1))
2892 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2893
2894 // fold vector ops
2895 if (VT.isVector()) {
2896 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2897 return FoldedVOp;
2898
2899 // fold (add x, 0) -> x, vector edition
2901 return N0;
2902 }
2903
2904 // fold (add x, 0) -> x
2905 if (isNullConstant(N1))
2906 return N0;
2907
2908 if (N0.getOpcode() == ISD::SUB) {
2909 SDValue N00 = N0.getOperand(0);
2910 SDValue N01 = N0.getOperand(1);
2911
2912 // fold ((A-c1)+c2) -> (A+(c2-c1))
2913 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2914 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2915
2916 // fold ((c1-A)+c2) -> (c1+c2)-A
2917 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2918 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2919 }
2920
2921 // add (sext i1 X), 1 -> zext (not i1 X)
2922 // We don't transform this pattern:
2923 // add (zext i1 X), -1 -> sext (not i1 X)
2924 // because most (?) targets generate better code for the zext form.
2925 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2926 isOneOrOneSplat(N1)) {
2927 SDValue X = N0.getOperand(0);
2928 if ((!LegalOperations ||
2929 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2931 X.getScalarValueSizeInBits() == 1) {
2932 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2933 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2934 }
2935 }
2936
2937 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2938 // iff (or x, c0) is equivalent to (add x, c0).
2939 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2940 // iff (xor x, c0) is equivalent to (add x, c0).
2941 if (DAG.isADDLike(N0)) {
2942 SDValue N01 = N0.getOperand(1);
2943 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2944 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2945 }
2946
2947 if (SDValue NewSel = foldBinOpIntoSelect(N))
2948 return NewSel;
2949
2950 // reassociate add
2951 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2952 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2953 return RADD;
2954
2955 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2956 // equivalent to (add x, c).
2957 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2958 // equivalent to (add x, c).
2959 // Do this optimization only when adding c does not introduce instructions
2960 // for adding carries.
2961 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2962 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2963 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2964 // If N0's type does not split or is a sign mask, it does not introduce
2965 // add carry.
2966 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2967 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2970 if (NoAddCarry)
2971 return DAG.getNode(
2972 ISD::ADD, DL, VT,
2973 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2974 N0.getOperand(1));
2975 }
2976 return SDValue();
2977 };
2978 if (SDValue Add = ReassociateAddOr(N0, N1))
2979 return Add;
2980 if (SDValue Add = ReassociateAddOr(N1, N0))
2981 return Add;
2982
2983 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2984 if (SDValue SD =
2985 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2986 return SD;
2987 }
2988
2989 SDValue A, B, C, D;
2990
2991 // fold ((0-A) + B) -> B-A
2992 if (sd_match(N0, m_Neg(m_Value(A))))
2993 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2994
2995 // fold (A + (0-B)) -> A-B
2996 if (sd_match(N1, m_Neg(m_Value(B))))
2997 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2998
2999 // fold (A+(B-A)) -> B
3000 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
3001 return B;
3002
3003 // fold ((B-A)+A) -> B
3004 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
3005 return B;
3006
3007 // fold ((A-B)+(C-A)) -> (C-B)
3008 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
3010 return DAG.getNode(ISD::SUB, DL, VT, C, B);
3011
3012 // fold ((A-B)+(B-C)) -> (A-C)
3013 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
3015 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3016
3017 // fold (A+(B-(A+C))) to (B-C)
3018 // fold (A+(B-(C+A))) to (B-C)
3019 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
3020 return DAG.getNode(ISD::SUB, DL, VT, B, C);
3021
3022 // fold (A+((B-A)+or-C)) to (B+or-C)
3023 if (sd_match(N1,
3025 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
3026 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
3027
3028 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
3029 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
3030 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
3032 return DAG.getNode(ISD::SUB, DL, VT,
3033 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
3034 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
3035
3036 // fold (add (umax X, C), -C) --> (usubsat X, C)
3037 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
3038 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
3039 return (!Max && !Op) ||
3040 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
3041 };
3042 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
3043 /*AllowUndefs*/ true))
3044 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
3045 N0.getOperand(1));
3046 }
3047
3049 return SDValue(N, 0);
3050
3051 if (isOneOrOneSplat(N1)) {
3052 // fold (add (xor a, -1), 1) -> (sub 0, a)
3053 if (isBitwiseNot(N0))
3054 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
3055 N0.getOperand(0));
3056
3057 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
3058 if (N0.getOpcode() == ISD::ADD) {
3059 SDValue A, Xor;
3060
3061 if (isBitwiseNot(N0.getOperand(0))) {
3062 A = N0.getOperand(1);
3063 Xor = N0.getOperand(0);
3064 } else if (isBitwiseNot(N0.getOperand(1))) {
3065 A = N0.getOperand(0);
3066 Xor = N0.getOperand(1);
3067 }
3068
3069 if (Xor)
3070 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
3071 }
3072
3073 // Look for:
3074 // add (add x, y), 1
3075 // And if the target does not like this form then turn into:
3076 // sub y, (xor x, -1)
3077 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3078 N0.hasOneUse() &&
3079 // Limit this to after legalization if the add has wrap flags
3080 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
3081 !N->getFlags().hasNoSignedWrap()))) {
3082 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3083 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
3084 }
3085 }
3086
3087 // (x - y) + -1 -> add (xor y, -1), x
3088 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3089 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
3090 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
3091 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
3092 }
3093
3094 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
3095 // This can help if the inner add has multiple uses.
3096 APInt CM, CA;
3097 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
3098 if (VT.getScalarSizeInBits() <= 64) {
3100 m_ConstInt(CM)))) &&
3102 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3103 SDNodeFlags Flags;
3104 // If all the inputs are nuw, the outputs can be nuw. If all the input
3105 // are _also_ nsw the outputs can be too.
3106 if (N->getFlags().hasNoUnsignedWrap() &&
3107 N0->getFlags().hasNoUnsignedWrap() &&
3110 if (N->getFlags().hasNoSignedWrap() &&
3111 N0->getFlags().hasNoSignedWrap() &&
3114 }
3115 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3116 DAG.getConstant(CM, DL, VT), Flags);
3117 return DAG.getNode(
3118 ISD::ADD, DL, VT, Mul,
3119 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3120 }
3121 // Also look in case there is an intermediate add.
3122 if (sd_match(N0, m_OneUse(m_Add(
3124 m_ConstInt(CM))),
3125 m_Value(B)))) &&
3127 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
3128 SDNodeFlags Flags;
3129 // If all the inputs are nuw, the outputs can be nuw. If all the input
3130 // are _also_ nsw the outputs can be too.
3131 SDValue OMul =
3132 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
3133 if (N->getFlags().hasNoUnsignedWrap() &&
3134 N0->getFlags().hasNoUnsignedWrap() &&
3135 OMul->getFlags().hasNoUnsignedWrap() &&
3136 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
3138 if (N->getFlags().hasNoSignedWrap() &&
3139 N0->getFlags().hasNoSignedWrap() &&
3140 OMul->getFlags().hasNoSignedWrap() &&
3141 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
3143 }
3144 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
3145 DAG.getConstant(CM, DL, VT), Flags);
3146 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
3147 return DAG.getNode(
3148 ISD::ADD, DL, VT, Add,
3149 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
3150 }
3151 }
3152 }
3153
3154 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
3155 return Combined;
3156
3157 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
3158 return Combined;
3159
3160 return SDValue();
3161}
3162
3163// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
3164// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1)
3165SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
3166 SDValue N0 = N->getOperand(0);
3167 EVT VT = N0.getValueType();
3168 SDValue A, B;
3169
3170 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
3171 (sd_match(N,
3173 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3176 m_Srl(m_Deferred(A), m_One()),
3177 m_Srl(m_Deferred(B), m_One()))))) {
3178 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
3179 }
3180 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
3181 (sd_match(N,
3183 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
3186 m_Sra(m_Deferred(A), m_One()),
3187 m_Sra(m_Deferred(B), m_One()))))) {
3188 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
3189 }
3190
3191 return SDValue();
3192}
3193
3194SDValue DAGCombiner::visitADD(SDNode *N) {
3195 SDValue N0 = N->getOperand(0);
3196 SDValue N1 = N->getOperand(1);
3197 EVT VT = N0.getValueType();
3198 SDLoc DL(N);
3199
3200 if (SDValue Combined = visitADDLike(N))
3201 return Combined;
3202
3203 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3204 return V;
3205
3206 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3207 return V;
3208
3209 if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
3210 return V;
3211
3212 // Try to match AVGFLOOR fixedwidth pattern
3213 if (SDValue V = foldAddToAvg(N, DL))
3214 return V;
3215
3216 // fold (a+b) -> (a|b) iff a and b share no bits.
3217 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
3218 DAG.haveNoCommonBitsSet(N0, N1))
3219 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
3220
3221 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
3222 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
3223 const APInt &C0 = N0->getConstantOperandAPInt(0);
3224 const APInt &C1 = N1->getConstantOperandAPInt(0);
3225 return DAG.getVScale(DL, VT, C0 + C1);
3226 }
3227
3228 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3229 if (N0.getOpcode() == ISD::ADD &&
3230 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3231 N1.getOpcode() == ISD::VSCALE) {
3232 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3233 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3234 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3235 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3236 }
3237
3238 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3239 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3240 N1.getOpcode() == ISD::STEP_VECTOR) {
3241 const APInt &C0 = N0->getConstantOperandAPInt(0);
3242 const APInt &C1 = N1->getConstantOperandAPInt(0);
3243 APInt NewStep = C0 + C1;
3244 return DAG.getStepVector(DL, VT, NewStep);
3245 }
3246
3247 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3248 if (N0.getOpcode() == ISD::ADD &&
3250 N1.getOpcode() == ISD::STEP_VECTOR) {
3251 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3252 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3253 APInt NewStep = SV0 + SV1;
3254 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3255 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3256 }
3257
3258 return SDValue();
3259}
3260
3261SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3262 unsigned Opcode = N->getOpcode();
3263 SDValue N0 = N->getOperand(0);
3264 SDValue N1 = N->getOperand(1);
3265 EVT VT = N0.getValueType();
3266 bool IsSigned = Opcode == ISD::SADDSAT;
3267 SDLoc DL(N);
3268
3269 // fold (add_sat x, undef) -> -1
3270 if (N0.isUndef() || N1.isUndef())
3271 return DAG.getAllOnesConstant(DL, VT);
3272
3273 // fold (add_sat c1, c2) -> c3
3274 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3275 return C;
3276
3277 // canonicalize constant to RHS
3280 return DAG.getNode(Opcode, DL, VT, N1, N0);
3281
3282 // fold vector ops
3283 if (VT.isVector()) {
3284 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3285 return FoldedVOp;
3286
3287 // fold (add_sat x, 0) -> x, vector edition
3289 return N0;
3290 }
3291
3292 // fold (add_sat x, 0) -> x
3293 if (isNullConstant(N1))
3294 return N0;
3295
3296 // If it cannot overflow, transform into an add.
3297 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3298 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3299
3300 return SDValue();
3301}
3302
3304 bool ForceCarryReconstruction = false) {
3305 bool Masked = false;
3306
3307 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3308 while (true) {
3309 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3310 return V;
3311
3312 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3313 V = V.getOperand(0);
3314 continue;
3315 }
3316
3317 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3318 if (ForceCarryReconstruction)
3319 return V;
3320
3321 Masked = true;
3322 V = V.getOperand(0);
3323 continue;
3324 }
3325
3326 break;
3327 }
3328
3329 // If this is not a carry, return.
3330 if (V.getResNo() != 1)
3331 return SDValue();
3332
3333 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3334 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3335 return SDValue();
3336
3337 EVT VT = V->getValueType(0);
3338 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3339 return SDValue();
3340
3341 // If the result is masked, then no matter what kind of bool it is we can
3342 // return. If it isn't, then we need to make sure the bool type is either 0 or
3343 // 1 and not other values.
3344 if (Masked ||
3345 TLI.getBooleanContents(V.getValueType()) ==
3347 return V;
3348
3349 return SDValue();
3350}
3351
3352/// Given the operands of an add/sub operation, see if the 2nd operand is a
3353/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3354/// the opcode and bypass the mask operation.
3355static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3356 SelectionDAG &DAG, const SDLoc &DL) {
3357 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3358 N1 = N1.getOperand(0);
3359
3360 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3361 return SDValue();
3362
3363 EVT VT = N0.getValueType();
3364 SDValue N10 = N1.getOperand(0);
3365 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3366 N10 = N10.getOperand(0);
3367
3368 if (N10.getValueType() != VT)
3369 return SDValue();
3370
3371 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3372 return SDValue();
3373
3374 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3375 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3376 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3377}
3378
3379/// Helper for doing combines based on N0 and N1 being added to each other.
3380SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3381 SDNode *LocReference) {
3382 EVT VT = N0.getValueType();
3383 SDLoc DL(LocReference);
3384
3385 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3386 SDValue Y, N;
3387 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3388 return DAG.getNode(ISD::SUB, DL, VT, N0,
3389 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3390
3391 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3392 return V;
3393
3394 // Look for:
3395 // add (add x, 1), y
3396 // And if the target does not like this form then turn into:
3397 // sub y, (xor x, -1)
3398 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3399 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3400 // Limit this to after legalization if the add has wrap flags
3401 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3402 !N0->getFlags().hasNoSignedWrap()))) {
3403 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3404 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3405 }
3406
3407 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3408 // Hoist one-use subtraction by non-opaque constant:
3409 // (x - C) + y -> (x + y) - C
3410 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3411 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3412 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3413 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3414 }
3415 // Hoist one-use subtraction from non-opaque constant:
3416 // (C - x) + y -> (y - x) + C
3417 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3418 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3419 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3420 }
3421 }
3422
3423 // add (mul x, C), x -> mul x, C+1
3424 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3425 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3426 N0.hasOneUse()) {
3427 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3428 DAG.getConstant(1, DL, VT));
3429 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3430 }
3431
3432 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3433 // rather than 'add 0/-1' (the zext should get folded).
3434 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3435 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3436 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3438 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3439 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3440 }
3441
3442 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3443 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3444 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3445 if (TN->getVT() == MVT::i1) {
3446 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3447 DAG.getConstant(1, DL, VT));
3448 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3449 }
3450 }
3451
3452 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3453 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3454 N1.getResNo() == 0)
3455 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3456 N0, N1.getOperand(0), N1.getOperand(2));
3457
3458 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3460 if (SDValue Carry = getAsCarry(TLI, N1))
3461 return DAG.getNode(ISD::UADDO_CARRY, DL,
3462 DAG.getVTList(VT, Carry.getValueType()), N0,
3463 DAG.getConstant(0, DL, VT), Carry);
3464
3465 return SDValue();
3466}
3467
3468SDValue DAGCombiner::visitADDC(SDNode *N) {
3469 SDValue N0 = N->getOperand(0);
3470 SDValue N1 = N->getOperand(1);
3471 EVT VT = N0.getValueType();
3472 SDLoc DL(N);
3473
3474 // If the flag result is dead, turn this into an ADD.
3475 if (!N->hasAnyUseOfValue(1))
3476 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3477 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3478
3479 // canonicalize constant to RHS.
3480 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3481 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3482 if (N0C && !N1C)
3483 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3484
3485 // fold (addc x, 0) -> x + no carry out
3486 if (isNullConstant(N1))
3487 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3488 DL, MVT::Glue));
3489
3490 // If it cannot overflow, transform into an add.
3492 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3493 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3494
3495 return SDValue();
3496}
3497
3498/**
3499 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3500 * then the flip also occurs if computing the inverse is the same cost.
3501 * This function returns an empty SDValue in case it cannot flip the boolean
3502 * without increasing the cost of the computation. If you want to flip a boolean
3503 * no matter what, use DAG.getLogicalNOT.
3504 */
3506 const TargetLowering &TLI,
3507 bool Force) {
3508 if (Force && isa<ConstantSDNode>(V))
3509 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3510
3511 if (V.getOpcode() != ISD::XOR)
3512 return SDValue();
3513
3514 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3515 return V.getOperand(0);
3516 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3517 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3518 return SDValue();
3519}
3520
3521SDValue DAGCombiner::visitADDO(SDNode *N) {
3522 SDValue N0 = N->getOperand(0);
3523 SDValue N1 = N->getOperand(1);
3524 EVT VT = N0.getValueType();
3525 bool IsSigned = (ISD::SADDO == N->getOpcode());
3526
3527 EVT CarryVT = N->getValueType(1);
3528 SDLoc DL(N);
3529
3530 // If the flag result is dead, turn this into an ADD.
3531 if (!N->hasAnyUseOfValue(1))
3532 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3533 DAG.getUNDEF(CarryVT));
3534
3535 // canonicalize constant to RHS.
3538 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3539
3540 // fold (addo x, 0) -> x + no carry out
3541 if (isNullOrNullSplat(N1))
3542 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3543
3544 // If it cannot overflow, transform into an add.
3545 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3546 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3547 DAG.getConstant(0, DL, CarryVT));
3548
3549 if (IsSigned) {
3550 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3551 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3552 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3553 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3554 } else {
3555 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3556 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3557 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3558 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3559 return CombineTo(
3560 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3561 }
3562
3563 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3564 return Combined;
3565
3566 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3567 return Combined;
3568 }
3569
3570 return SDValue();
3571}
3572
3573SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3574 EVT VT = N0.getValueType();
3575 if (VT.isVector())
3576 return SDValue();
3577
3578 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3579 // If Y + 1 cannot overflow.
3580 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3581 SDValue Y = N1.getOperand(0);
3582 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3584 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3585 N1.getOperand(2));
3586 }
3587
3588 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3590 if (SDValue Carry = getAsCarry(TLI, N1))
3591 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3592 DAG.getConstant(0, SDLoc(N), VT), Carry);
3593
3594 return SDValue();
3595}
3596
3597SDValue DAGCombiner::visitADDE(SDNode *N) {
3598 SDValue N0 = N->getOperand(0);
3599 SDValue N1 = N->getOperand(1);
3600 SDValue CarryIn = N->getOperand(2);
3601
3602 // canonicalize constant to RHS
3603 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3604 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3605 if (N0C && !N1C)
3606 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3607 N1, N0, CarryIn);
3608
3609 // fold (adde x, y, false) -> (addc x, y)
3610 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3611 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3612
3613 return SDValue();
3614}
3615
3616SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3617 SDValue N0 = N->getOperand(0);
3618 SDValue N1 = N->getOperand(1);
3619 SDValue CarryIn = N->getOperand(2);
3620 SDLoc DL(N);
3621
3622 // canonicalize constant to RHS
3623 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3624 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3625 if (N0C && !N1C)
3626 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3627
3628 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3629 if (isNullConstant(CarryIn)) {
3630 if (!LegalOperations ||
3631 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3632 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3633 }
3634
3635 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3636 if (isNullConstant(N0) && isNullConstant(N1)) {
3637 EVT VT = N0.getValueType();
3638 EVT CarryVT = CarryIn.getValueType();
3639 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3640 AddToWorklist(CarryExt.getNode());
3641 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3642 DAG.getConstant(1, DL, VT)),
3643 DAG.getConstant(0, DL, CarryVT));
3644 }
3645
3646 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3647 return Combined;
3648
3649 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3650 return Combined;
3651
3652 // We want to avoid useless duplication.
3653 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3654 // not a binary operation, this is not really possible to leverage this
3655 // existing mechanism for it. However, if more operations require the same
3656 // deduplication logic, then it may be worth generalize.
3657 SDValue Ops[] = {N1, N0, CarryIn};
3658 SDNode *CSENode =
3659 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3660 if (CSENode)
3661 return SDValue(CSENode, 0);
3662
3663 return SDValue();
3664}
3665
3666/**
3667 * If we are facing some sort of diamond carry propagation pattern try to
3668 * break it up to generate something like:
3669 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3670 *
3671 * The end result is usually an increase in operation required, but because the
3672 * carry is now linearized, other transforms can kick in and optimize the DAG.
3673 *
3674 * Patterns typically look something like
3675 * (uaddo A, B)
3676 * / \
3677 * Carry Sum
3678 * | \
3679 * | (uaddo_carry *, 0, Z)
3680 * | /
3681 * \ Carry
3682 * | /
3683 * (uaddo_carry X, *, *)
3684 *
3685 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3686 * produce a combine with a single path for carry propagation.
3687 */
3689 SelectionDAG &DAG, SDValue X,
3690 SDValue Carry0, SDValue Carry1,
3691 SDNode *N) {
3692 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3693 return SDValue();
3694 if (Carry1.getOpcode() != ISD::UADDO)
3695 return SDValue();
3696
3697 SDValue Z;
3698
3699 /**
3700 * First look for a suitable Z. It will present itself in the form of
3701 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3702 */
3703 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3704 isNullConstant(Carry0.getOperand(1))) {
3705 Z = Carry0.getOperand(2);
3706 } else if (Carry0.getOpcode() == ISD::UADDO &&
3707 isOneConstant(Carry0.getOperand(1))) {
3708 EVT VT = Carry0->getValueType(1);
3709 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3710 } else {
3711 // We couldn't find a suitable Z.
3712 return SDValue();
3713 }
3714
3715
3716 auto cancelDiamond = [&](SDValue A,SDValue B) {
3717 SDLoc DL(N);
3718 SDValue NewY =
3719 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3720 Combiner.AddToWorklist(NewY.getNode());
3721 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3722 DAG.getConstant(0, DL, X.getValueType()),
3723 NewY.getValue(1));
3724 };
3725
3726 /**
3727 * (uaddo A, B)
3728 * |
3729 * Sum
3730 * |
3731 * (uaddo_carry *, 0, Z)
3732 */
3733 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3734 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3735 }
3736
3737 /**
3738 * (uaddo_carry A, 0, Z)
3739 * |
3740 * Sum
3741 * |
3742 * (uaddo *, B)
3743 */
3744 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3745 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3746 }
3747
3748 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3749 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3750 }
3751
3752 return SDValue();
3753}
3754
3755// If we are facing some sort of diamond carry/borrow in/out pattern try to
3756// match patterns like:
3757//
3758// (uaddo A, B) CarryIn
3759// | \ |
3760// | \ |
3761// PartialSum PartialCarryOutX /
3762// | | /
3763// | ____|____________/
3764// | / |
3765// (uaddo *, *) \________
3766// | \ \
3767// | \ |
3768// | PartialCarryOutY |
3769// | \ |
3770// | \ /
3771// AddCarrySum | ______/
3772// | /
3773// CarryOut = (or *, *)
3774//
3775// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3776//
3777// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3778//
3779// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3780// with a single path for carry/borrow out propagation.
3782 SDValue N0, SDValue N1, SDNode *N) {
3783 SDValue Carry0 = getAsCarry(TLI, N0);
3784 if (!Carry0)
3785 return SDValue();
3786 SDValue Carry1 = getAsCarry(TLI, N1);
3787 if (!Carry1)
3788 return SDValue();
3789
3790 unsigned Opcode = Carry0.getOpcode();
3791 if (Opcode != Carry1.getOpcode())
3792 return SDValue();
3793 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3794 return SDValue();
3795 // Guarantee identical type of CarryOut
3796 EVT CarryOutType = N->getValueType(0);
3797 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3798 CarryOutType != Carry1.getValue(1).getValueType())
3799 return SDValue();
3800
3801 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3802 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3803 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3804 std::swap(Carry0, Carry1);
3805
3806 // Check if nodes are connected in expected way.
3807 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3808 Carry1.getOperand(1) != Carry0.getValue(0))
3809 return SDValue();
3810
3811 // The carry in value must be on the righthand side for subtraction.
3812 unsigned CarryInOperandNum =
3813 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3814 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3815 return SDValue();
3816 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3817
3818 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3819 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3820 return SDValue();
3821
3822 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3823 CarryIn = getAsCarry(TLI, CarryIn, true);
3824 if (!CarryIn)
3825 return SDValue();
3826
3827 SDLoc DL(N);
3828 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3829 Carry1->getValueType(0));
3830 SDValue Merged =
3831 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3832 Carry0.getOperand(1), CarryIn);
3833
3834 // Please note that because we have proven that the result of the UADDO/USUBO
3835 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3836 // therefore prove that if the first UADDO/USUBO overflows, the second
3837 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3838 // maximum value.
3839 //
3840 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3841 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3842 //
3843 // This is important because it means that OR and XOR can be used to merge
3844 // carry flags; and that AND can return a constant zero.
3845 //
3846 // TODO: match other operations that can merge flags (ADD, etc)
3847 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3848 if (N->getOpcode() == ISD::AND)
3849 return DAG.getConstant(0, DL, CarryOutType);
3850 return Merged.getValue(1);
3851}
3852
3853SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3854 SDValue CarryIn, SDNode *N) {
3855 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3856 // carry.
3857 if (isBitwiseNot(N0))
3858 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3859 SDLoc DL(N);
3860 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3861 N0.getOperand(0), NotC);
3862 return CombineTo(
3863 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3864 }
3865
3866 // Iff the flag result is dead:
3867 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3868 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3869 // or the dependency between the instructions.
3870 if ((N0.getOpcode() == ISD::ADD ||
3871 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3872 N0.getValue(1) != CarryIn)) &&
3873 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3874 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3875 N0.getOperand(0), N0.getOperand(1), CarryIn);
3876
3877 /**
3878 * When one of the uaddo_carry argument is itself a carry, we may be facing
3879 * a diamond carry propagation. In which case we try to transform the DAG
3880 * to ensure linear carry propagation if that is possible.
3881 */
3882 if (auto Y = getAsCarry(TLI, N1)) {
3883 // Because both are carries, Y and Z can be swapped.
3884 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3885 return R;
3886 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3887 return R;
3888 }
3889
3890 return SDValue();
3891}
3892
3893SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3894 SDValue CarryIn, SDNode *N) {
3895 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3896 if (isBitwiseNot(N0)) {
3897 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3898 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3899 N0.getOperand(0), NotC);
3900 }
3901
3902 return SDValue();
3903}
3904
3905SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3906 SDValue N0 = N->getOperand(0);
3907 SDValue N1 = N->getOperand(1);
3908 SDValue CarryIn = N->getOperand(2);
3909 SDLoc DL(N);
3910
3911 // canonicalize constant to RHS
3912 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3913 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3914 if (N0C && !N1C)
3915 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3916
3917 // fold (saddo_carry x, y, false) -> (saddo x, y)
3918 if (isNullConstant(CarryIn)) {
3919 if (!LegalOperations ||
3920 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3921 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3922 }
3923
3924 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3925 return Combined;
3926
3927 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3928 return Combined;
3929
3930 return SDValue();
3931}
3932
3933// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3934// clamp/truncation if necessary.
3936 SDValue RHS, SelectionDAG &DAG,
3937 const SDLoc &DL) {
3938 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3939 "Illegal truncation");
3940
3941 if (DstVT == SrcVT)
3942 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3943
3944 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3945 // clamping RHS.
3947 DstVT.getScalarSizeInBits());
3948 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3949 return SDValue();
3950
3951 SDValue SatLimit =
3953 DstVT.getScalarSizeInBits()),
3954 DL, SrcVT);
3955 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3956 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3957 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3958 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3959}
3960
3961// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3962// usubsat(a,b), optionally as a truncated type.
3963SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3964 if (N->getOpcode() != ISD::SUB ||
3965 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3966 return SDValue();
3967
3968 EVT SubVT = N->getValueType(0);
3969 SDValue Op0 = N->getOperand(0);
3970 SDValue Op1 = N->getOperand(1);
3971
3972 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3973 // they may be converted to usubsat(a,b).
3974 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3975 SDValue MaxLHS = Op0.getOperand(0);
3976 SDValue MaxRHS = Op0.getOperand(1);
3977 if (MaxLHS == Op1)
3978 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3979 if (MaxRHS == Op1)
3980 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3981 }
3982
3983 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3984 SDValue MinLHS = Op1.getOperand(0);
3985 SDValue MinRHS = Op1.getOperand(1);
3986 if (MinLHS == Op0)
3987 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3988 if (MinRHS == Op0)
3989 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3990 }
3991
3992 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3993 if (Op1.getOpcode() == ISD::TRUNCATE &&
3994 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3995 Op1.getOperand(0).hasOneUse()) {
3996 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3997 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3998 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3999 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
4000 DAG, DL);
4001 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
4002 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
4003 DAG, DL);
4004 }
4005
4006 return SDValue();
4007}
4008
4009// Refinement of DAG/Type Legalisation (promotion) when CTLZ is used for
4010// counting leading ones. Broadly, it replaces the substraction with a left
4011// shift.
4012//
4013// * DAG Legalisation Pattern:
4014//
4015// (sub (ctlz (zeroextend (not Src)))
4016// BitWidthDiff)
4017//
4018// if BitWidthDiff == BitWidth(Node) - BitWidth(Src)
4019// -->
4020//
4021// (ctlz_zero_undef (not (shl (anyextend Src)
4022// BitWidthDiff)))
4023//
4024// * Type Legalisation Pattern:
4025//
4026// (sub (ctlz (and (xor Src XorMask)
4027// AndMask))
4028// BitWidthDiff)
4029//
4030// if AndMask has only trailing ones
4031// and MaskBitWidth(AndMask) == BitWidth(Node) - BitWidthDiff
4032// and XorMask has more trailing ones than AndMask
4033// -->
4034//
4035// (ctlz_zero_undef (not (shl Src BitWidthDiff)))
4036template <class MatchContextClass>
4038 const SDLoc DL(N);
4039 SDValue N0 = N->getOperand(0);
4040 EVT VT = N0.getValueType();
4041 unsigned BitWidth = VT.getScalarSizeInBits();
4042
4043 MatchContextClass Matcher(DAG, DAG.getTargetLoweringInfo(), N);
4044
4045 APInt AndMask;
4046 APInt XorMask;
4047 APInt BitWidthDiff;
4048
4049 SDValue CtlzOp;
4050 SDValue Src;
4051
4052 if (!sd_context_match(
4053 N, Matcher, m_Sub(m_Ctlz(m_Value(CtlzOp)), m_ConstInt(BitWidthDiff))))
4054 return SDValue();
4055
4056 if (sd_context_match(CtlzOp, Matcher, m_ZExt(m_Not(m_Value(Src))))) {
4057 // DAG Legalisation Pattern:
4058 // (sub (ctlz (zero_extend (not Op)) BitWidthDiff))
4059 if ((BitWidth - Src.getValueType().getScalarSizeInBits()) != BitWidthDiff)
4060 return SDValue();
4061
4062 Src = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Src);
4063 } else if (sd_context_match(CtlzOp, Matcher,
4064 m_And(m_Xor(m_Value(Src), m_ConstInt(XorMask)),
4065 m_ConstInt(AndMask)))) {
4066 // Type Legalisation Pattern:
4067 // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
4068 if (BitWidthDiff.getZExtValue() >= BitWidth)
4069 return SDValue();
4070 unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
4071 if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
4072 return SDValue();
4073 } else
4074 return SDValue();
4075
4076 SDValue ShiftConst = DAG.getShiftAmountConstant(BitWidthDiff, VT, DL);
4077 SDValue LShift = Matcher.getNode(ISD::SHL, DL, VT, Src, ShiftConst);
4078 SDValue Not =
4079 Matcher.getNode(ISD::XOR, DL, VT, LShift, DAG.getAllOnesConstant(DL, VT));
4080
4081 return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
4082}
4083
4084// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
4086 const SDLoc &DL) {
4087 assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
4088 SDValue Sub0 = N->getOperand(0);
4089 SDValue Sub1 = N->getOperand(1);
4090
4091 auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
4092 if ((DivRem.getOpcode() == ISD::SDIVREM ||
4093 DivRem.getOpcode() == ISD::UDIVREM) &&
4094 DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
4095 DivRem.getOperand(1) == MaybeY) {
4096 return SDValue(DivRem.getNode(), 1);
4097 }
4098 return SDValue();
4099 };
4100
4101 if (Sub1.getOpcode() == ISD::MUL) {
4102 // (sub x, (mul divrem(x,y)[0], y))
4103 SDValue Mul0 = Sub1.getOperand(0);
4104 SDValue Mul1 = Sub1.getOperand(1);
4105
4106 if (SDValue Res = CheckAndFoldMulCase(Mul0, Mul1))
4107 return Res;
4108
4109 if (SDValue Res = CheckAndFoldMulCase(Mul1, Mul0))
4110 return Res;
4111
4112 } else if (Sub1.getOpcode() == ISD::SHL) {
4113 // Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
4114 SDValue Shl0 = Sub1.getOperand(0);
4115 SDValue Shl1 = Sub1.getOperand(1);
4116 // Check if Shl0 is divrem(x, Y)[0]
4117 if ((Shl0.getOpcode() == ISD::SDIVREM ||
4118 Shl0.getOpcode() == ISD::UDIVREM) &&
4119 Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
4120
4121 SDValue Divisor = Shl0.getOperand(1);
4122
4123 ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
4125 if (!DivC || !ShC)
4126 return SDValue();
4127
4128 if (DivC->getAPIntValue().isPowerOf2() &&
4129 DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
4130 return SDValue(Shl0.getNode(), 1);
4131 }
4132 }
4133 return SDValue();
4134}
4135
4136// Since it may not be valid to emit a fold to zero for vector initializers
4137// check if we can before folding.
4138static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
4139 SelectionDAG &DAG, bool LegalOperations) {
4140 if (!VT.isVector())
4141 return DAG.getConstant(0, DL, VT);
4142 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
4143 return DAG.getConstant(0, DL, VT);
4144 return SDValue();
4145}
4146
4147SDValue DAGCombiner::visitSUB(SDNode *N) {
4148 SDValue N0 = N->getOperand(0);
4149 SDValue N1 = N->getOperand(1);
4150 EVT VT = N0.getValueType();
4151 unsigned BitWidth = VT.getScalarSizeInBits();
4152 SDLoc DL(N);
4153
4155 return V;
4156
4157 // fold (sub x, x) -> 0
4158 if (N0 == N1)
4159 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4160
4161 // fold (sub c1, c2) -> c3
4162 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
4163 return C;
4164
4165 // fold vector ops
4166 if (VT.isVector()) {
4167 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4168 return FoldedVOp;
4169
4170 // fold (sub x, 0) -> x, vector edition
4172 return N0;
4173 }
4174
4175 // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
4176 // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
4177 if (N1.hasOneUse() && hasUMin(VT)) {
4178 SDValue Y;
4179 auto MS0 = m_Specific(N0);
4180 auto MVY = m_Value(Y);
4181 auto MZ = m_Zero();
4182 auto MCC1 = m_SpecificCondCode(ISD::SETULT);
4183 auto MCC2 = m_SpecificCondCode(ISD::SETUGE);
4184
4185 if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) ||
4186 sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) ||
4187 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) ||
4188 sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ)))
4189
4190 return DAG.getNode(ISD::UMIN, DL, VT, N0,
4191 DAG.getNode(ISD::SUB, DL, VT, N0, Y));
4192 }
4193
4194 if (SDValue NewSel = foldBinOpIntoSelect(N))
4195 return NewSel;
4196
4197 // fold (sub x, c) -> (add x, -c)
4198 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4199 return DAG.getNode(ISD::ADD, DL, VT, N0,
4200 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4201
4202 if (isNullOrNullSplat(N0)) {
4203 // Right-shifting everything out but the sign bit followed by negation is
4204 // the same as flipping arithmetic/logical shift type without the negation:
4205 // -(X >>u 31) -> (X >>s 31)
4206 // -(X >>s 31) -> (X >>u 31)
4207 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
4208 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
4209 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
4210 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
4211 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
4212 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
4213 }
4214 }
4215
4216 // 0 - X --> 0 if the sub is NUW.
4217 if (N->getFlags().hasNoUnsignedWrap())
4218 return N0;
4219
4221 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
4222 // N1 must be 0 because negating the minimum signed value is undefined.
4223 if (N->getFlags().hasNoSignedWrap())
4224 return N0;
4225
4226 // 0 - X --> X if X is 0 or the minimum signed value.
4227 return N1;
4228 }
4229
4230 // Convert 0 - abs(x).
4231 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
4233 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
4234 return Result;
4235
4236 // Similar to the previous rule, but this time targeting an expanded abs.
4237 // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
4238 // as well as
4239 // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))
4240 // Note that these two are applicable to both signed and unsigned min/max.
4241 SDValue X;
4242 SDValue S0;
4243 auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0));
4244 if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat),
4245 m_UMax(m_Value(X), NegPat),
4246 m_SMin(m_Value(X), NegPat),
4247 m_UMin(m_Value(X), NegPat))))) {
4248 unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode());
4249 if (hasOperation(NewOpc, VT))
4250 return DAG.getNode(NewOpc, DL, VT, X, S0);
4251 }
4252
4253 // Fold neg(splat(neg(x)) -> splat(x)
4254 if (VT.isVector()) {
4255 SDValue N1S = DAG.getSplatValue(N1, true);
4256 if (N1S && N1S.getOpcode() == ISD::SUB &&
4257 isNullConstant(N1S.getOperand(0)))
4258 return DAG.getSplat(VT, DL, N1S.getOperand(1));
4259 }
4260
4261 // sub 0, (and x, 1) --> SIGN_EXTEND_INREG x, i1
4262 if (N1.getOpcode() == ISD::AND && N1.hasOneUse() &&
4263 isOneOrOneSplat(N1->getOperand(1))) {
4264 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 1);
4265 if (VT.isVector())
4266 ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
4270 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N1->getOperand(0),
4271 DAG.getValueType(ExtVT));
4272 }
4273 }
4274 }
4275
4276 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
4278 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4279
4280 // fold (A - (0-B)) -> A+B
4281 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
4282 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
4283
4284 // fold A-(A-B) -> B
4285 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
4286 return N1.getOperand(1);
4287
4288 // fold (A+B)-A -> B
4289 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
4290 return N0.getOperand(1);
4291
4292 // fold (A+B)-B -> A
4293 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
4294 return N0.getOperand(0);
4295
4296 // fold (A+C1)-C2 -> A+(C1-C2)
4297 if (N0.getOpcode() == ISD::ADD) {
4298 SDValue N01 = N0.getOperand(1);
4299 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
4300 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
4301 }
4302
4303 // fold C2-(A+C1) -> (C2-C1)-A
4304 if (N1.getOpcode() == ISD::ADD) {
4305 SDValue N11 = N1.getOperand(1);
4306 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
4307 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
4308 }
4309
4310 // fold (A-C1)-C2 -> A-(C1+C2)
4311 if (N0.getOpcode() == ISD::SUB) {
4312 SDValue N01 = N0.getOperand(1);
4313 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
4314 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
4315 }
4316
4317 // fold (c1-A)-c2 -> (c1-c2)-A
4318 if (N0.getOpcode() == ISD::SUB) {
4319 SDValue N00 = N0.getOperand(0);
4320 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
4321 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
4322 }
4323
4324 SDValue A, B, C;
4325
4326 // fold ((A+(B+C))-B) -> A+C
4327 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
4328 return DAG.getNode(ISD::ADD, DL, VT, A, C);
4329
4330 // fold ((A+(B-C))-B) -> A-C
4331 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
4332 return DAG.getNode(ISD::SUB, DL, VT, A, C);
4333
4334 // fold ((A-(B-C))-C) -> A-B
4335 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
4336 return DAG.getNode(ISD::SUB, DL, VT, A, B);
4337
4338 // fold (A-(B-C)) -> A+(C-B)
4339 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
4340 return DAG.getNode(ISD::ADD, DL, VT, N0,
4341 DAG.getNode(ISD::SUB, DL, VT, C, B));
4342
4343 // A - (A & B) -> A & (~B)
4344 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
4345 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
4346 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
4347
4348 // fold (A - (-B * C)) -> (A + (B * C))
4349 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
4350 return DAG.getNode(ISD::ADD, DL, VT, N0,
4351 DAG.getNode(ISD::MUL, DL, VT, B, C));
4352
4353 // If either operand of a sub is undef, the result is undef
4354 if (N0.isUndef())
4355 return N0;
4356 if (N1.isUndef())
4357 return N1;
4358
4359 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
4360 return V;
4361
4362 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
4363 return V;
4364
4365 // Try to match AVGCEIL fixedwidth pattern
4366 if (SDValue V = foldSubToAvg(N, DL))
4367 return V;
4368
4369 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
4370 return V;
4371
4372 if (SDValue V = foldSubToUSubSat(VT, N, DL))
4373 return V;
4374
4375 if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4376 return V;
4377
4378 // (A - B) - 1 -> add (xor B, -1), A
4380 m_One(/*AllowUndefs=*/true))))
4381 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
4382
4383 // Look for:
4384 // sub y, (xor x, -1)
4385 // And if the target does not like this form then turn into:
4386 // add (add x, y), 1
4387 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4388 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4389 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4390 }
4391
4392 // Hoist one-use addition by non-opaque constant:
4393 // (x + C) - y -> (x - y) + C
4394 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4395 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4396 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4397 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4398 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4399 }
4400 // y - (x + C) -> (y - x) - C
4401 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4402 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4403 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4404 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4405 }
4406 // (x - C) - y -> (x - y) - C
4407 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4408 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4409 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4410 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4411 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4412 }
4413 // (C - x) - y -> C - (x + y)
4414 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4415 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4416 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4417 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4418 }
4419
4420 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4421 // rather than 'sub 0/1' (the sext should get folded).
4422 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4423 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4424 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4425 TLI.getBooleanContents(VT) ==
4427 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4428 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4429 }
4430
4431 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4432 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4434 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4435 return DAG.getNode(ISD::ABS, DL, VT, A);
4436
4437 // If the relocation model supports it, consider symbol offsets.
4438 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4439 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4440 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4441 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4442 if (GA->getGlobal() == GB->getGlobal())
4443 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4444 DL, VT);
4445 }
4446
4447 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4448 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4449 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4450 if (TN->getVT() == MVT::i1) {
4451 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4452 DAG.getConstant(1, DL, VT));
4453 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4454 }
4455 }
4456
4457 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4458 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4459 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4460 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4461 }
4462
4463 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4464 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4465 APInt NewStep = -N1.getConstantOperandAPInt(0);
4466 return DAG.getNode(ISD::ADD, DL, VT, N0,
4467 DAG.getStepVector(DL, VT, NewStep));
4468 }
4469
4470 // Prefer an add for more folding potential and possibly better codegen:
4471 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4472 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4473 SDValue ShAmt = N1.getOperand(1);
4474 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4475 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4476 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4477 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4478 }
4479 }
4480
4481 // As with the previous fold, prefer add for more folding potential.
4482 // Subtracting SMIN/0 is the same as adding SMIN/0:
4483 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4484 if (N1.getOpcode() == ISD::SHL) {
4485 ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4486 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4487 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4488 }
4489
4490 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4491 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4492 N0.getResNo() == 0 && N0.hasOneUse())
4493 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4494 N0.getOperand(0), N1, N0.getOperand(2));
4495
4497 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4498 if (SDValue Carry = getAsCarry(TLI, N0)) {
4499 SDValue X = N1;
4500 SDValue Zero = DAG.getConstant(0, DL, VT);
4501 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4502 return DAG.getNode(ISD::UADDO_CARRY, DL,
4503 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4504 Carry);
4505 }
4506 }
4507
4508 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4509 // sub C0, X --> xor X, C0
4510 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4511 if (!C0->isOpaque()) {
4512 const APInt &C0Val = C0->getAPIntValue();
4513 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4514 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4515 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4516 }
4517 }
4518
4519 // smax(a,b) - smin(a,b) --> abds(a,b)
4520 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4521 sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) &&
4523 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4524
4525 // smin(a,b) - smax(a,b) --> neg(abds(a,b))
4526 if (hasOperation(ISD::ABDS, VT) &&
4527 sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) &&
4529 return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4530
4531 // umax(a,b) - umin(a,b) --> abdu(a,b)
4532 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4533 sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) &&
4535 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4536
4537 // umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4538 if (hasOperation(ISD::ABDU, VT) &&
4539 sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) &&
4541 return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4542
4543 return SDValue();
4544}
4545
4546SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4547 unsigned Opcode = N->getOpcode();
4548 SDValue N0 = N->getOperand(0);
4549 SDValue N1 = N->getOperand(1);
4550 EVT VT = N0.getValueType();
4551 bool IsSigned = Opcode == ISD::SSUBSAT;
4552 SDLoc DL(N);
4553
4554 // fold (sub_sat x, undef) -> 0
4555 if (N0.isUndef() || N1.isUndef())
4556 return DAG.getConstant(0, DL, VT);
4557
4558 // fold (sub_sat x, x) -> 0
4559 if (N0 == N1)
4560 return DAG.getConstant(0, DL, VT);
4561
4562 // fold (sub_sat c1, c2) -> c3
4563 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4564 return C;
4565
4566 // fold vector ops
4567 if (VT.isVector()) {
4568 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4569 return FoldedVOp;
4570
4571 // fold (sub_sat x, 0) -> x, vector edition
4573 return N0;
4574 }
4575
4576 // fold (sub_sat x, 0) -> x
4577 if (isNullConstant(N1))
4578 return N0;
4579
4580 // If it cannot overflow, transform into an sub.
4581 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4582 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4583
4584 return SDValue();
4585}
4586
4587SDValue DAGCombiner::visitSUBC(SDNode *N) {
4588 SDValue N0 = N->getOperand(0);
4589 SDValue N1 = N->getOperand(1);
4590 EVT VT = N0.getValueType();
4591 SDLoc DL(N);
4592
4593 // If the flag result is dead, turn this into an SUB.
4594 if (!N->hasAnyUseOfValue(1))
4595 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4596 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4597
4598 // fold (subc x, x) -> 0 + no borrow
4599 if (N0 == N1)
4600 return CombineTo(N, DAG.getConstant(0, DL, VT),
4601 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4602
4603 // fold (subc x, 0) -> x + no borrow
4604 if (isNullConstant(N1))
4605 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4606
4607 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4608 if (isAllOnesConstant(N0))
4609 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4610 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4611
4612 return SDValue();
4613}
4614
4615SDValue DAGCombiner::visitSUBO(SDNode *N) {
4616 SDValue N0 = N->getOperand(0);
4617 SDValue N1 = N->getOperand(1);
4618 EVT VT = N0.getValueType();
4619 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4620
4621 EVT CarryVT = N->getValueType(1);
4622 SDLoc DL(N);
4623
4624 // If the flag result is dead, turn this into an SUB.
4625 if (!N->hasAnyUseOfValue(1))
4626 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4627 DAG.getUNDEF(CarryVT));
4628
4629 // fold (subo x, x) -> 0 + no borrow
4630 if (N0 == N1)
4631 return CombineTo(N, DAG.getConstant(0, DL, VT),
4632 DAG.getConstant(0, DL, CarryVT));
4633
4634 // fold (subox, c) -> (addo x, -c)
4635 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1))
4636 if (IsSigned && !N1C->isMinSignedValue())
4637 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4638 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4639
4640 // fold (subo x, 0) -> x + no borrow
4641 if (isNullOrNullSplat(N1))
4642 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4643
4644 // If it cannot overflow, transform into an sub.
4645 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4646 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4647 DAG.getConstant(0, DL, CarryVT));
4648
4649 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4650 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4651 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4652 DAG.getConstant(0, DL, CarryVT));
4653
4654 return SDValue();
4655}
4656
4657SDValue DAGCombiner::visitSUBE(SDNode *N) {
4658 SDValue N0 = N->getOperand(0);
4659 SDValue N1 = N->getOperand(1);
4660 SDValue CarryIn = N->getOperand(2);
4661
4662 // fold (sube x, y, false) -> (subc x, y)
4663 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4664 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4665
4666 return SDValue();
4667}
4668
4669SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4670 SDValue N0 = N->getOperand(0);
4671 SDValue N1 = N->getOperand(1);
4672 SDValue CarryIn = N->getOperand(2);
4673
4674 // fold (usubo_carry x, y, false) -> (usubo x, y)
4675 if (isNullConstant(CarryIn)) {
4676 if (!LegalOperations ||
4677 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4678 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4679 }
4680
4681 return SDValue();
4682}
4683
4684SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4685 SDValue N0 = N->getOperand(0);
4686 SDValue N1 = N->getOperand(1);
4687 SDValue CarryIn = N->getOperand(2);
4688
4689 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4690 if (isNullConstant(CarryIn)) {
4691 if (!LegalOperations ||
4692 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4693 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4694 }
4695
4696 return SDValue();
4697}
4698
4699// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4700// UMULFIXSAT here.
4701SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4702 SDValue N0 = N->getOperand(0);
4703 SDValue N1 = N->getOperand(1);
4704 SDValue Scale = N->getOperand(2);
4705 EVT VT = N0.getValueType();
4706
4707 // fold (mulfix x, undef, scale) -> 0
4708 if (N0.isUndef() || N1.isUndef())
4709 return DAG.getConstant(0, SDLoc(N), VT);
4710
4711 // Canonicalize constant to RHS (vector doesn't have to splat)
4714 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4715
4716 // fold (mulfix x, 0, scale) -> 0
4717 if (isNullConstant(N1))
4718 return DAG.getConstant(0, SDLoc(N), VT);
4719
4720 return SDValue();
4721}
4722
4723template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4724 SDValue N0 = N->getOperand(0);
4725 SDValue N1 = N->getOperand(1);
4726 EVT VT = N0.getValueType();
4727 unsigned BitWidth = VT.getScalarSizeInBits();
4728 SDLoc DL(N);
4729 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4730 MatchContextClass Matcher(DAG, TLI, N);
4731
4732 // fold (mul x, undef) -> 0
4733 if (N0.isUndef() || N1.isUndef())
4734 return DAG.getConstant(0, DL, VT);
4735
4736 // fold (mul c1, c2) -> c1*c2
4737 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4738 return C;
4739
4740 // canonicalize constant to RHS (vector doesn't have to splat)
4743 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4744
4745 bool N1IsConst = false;
4746 bool N1IsOpaqueConst = false;
4747 APInt ConstValue1;
4748
4749 // fold vector ops
4750 if (VT.isVector()) {
4751 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4752 if (!UseVP)
4753 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4754 return FoldedVOp;
4755
4756 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4757 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4758 "Splat APInt should be element width");
4759 } else {
4760 N1IsConst = isa<ConstantSDNode>(N1);
4761 if (N1IsConst) {
4762 ConstValue1 = N1->getAsAPIntVal();
4763 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4764 }
4765 }
4766
4767 // fold (mul x, 0) -> 0
4768 if (N1IsConst && ConstValue1.isZero())
4769 return N1;
4770
4771 // fold (mul x, 1) -> x
4772 if (N1IsConst && ConstValue1.isOne())
4773 return N0;
4774
4775 if (!UseVP)
4776 if (SDValue NewSel = foldBinOpIntoSelect(N))
4777 return NewSel;
4778
4779 // fold (mul x, -1) -> 0-x
4780 if (N1IsConst && ConstValue1.isAllOnes())
4781 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4782
4783 // fold (mul x, (1 << c)) -> x << c
4784 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4785 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4786 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4787 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4788 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4789 SDNodeFlags Flags;
4790 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
4791 // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
4792 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
4793 }
4794 }
4795
4796 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4797 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4798 unsigned Log2Val = (-ConstValue1).logBase2();
4799
4800 // FIXME: If the input is something that is easily negated (e.g. a
4801 // single-use add), we should put the negate there.
4802 return Matcher.getNode(
4803 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4804 Matcher.getNode(ISD::SHL, DL, VT, N0,
4805 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4806 }
4807
4808 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4809 // hi result is in use in case we hit this mid-legalization.
4810 if (!UseVP) {
4811 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4812 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4813 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4814 // TODO: Can we match commutable operands with getNodeIfExists?
4815 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4816 if (LoHi->hasAnyUseOfValue(1))
4817 return SDValue(LoHi, 0);
4818 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4819 if (LoHi->hasAnyUseOfValue(1))
4820 return SDValue(LoHi, 0);
4821 }
4822 }
4823 }
4824
4825 // Try to transform:
4826 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4827 // mul x, (2^N + 1) --> add (shl x, N), x
4828 // mul x, (2^N - 1) --> sub (shl x, N), x
4829 // Examples: x * 33 --> (x << 5) + x
4830 // x * 15 --> (x << 4) - x
4831 // x * -33 --> -((x << 5) + x)
4832 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4833 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4834 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4835 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4836 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4837 // x * 0xf800 --> (x << 16) - (x << 11)
4838 // x * -0x8800 --> -((x << 15) + (x << 11))
4839 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4840 if (!UseVP && N1IsConst &&
4841 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4842 // TODO: We could handle more general decomposition of any constant by
4843 // having the target set a limit on number of ops and making a
4844 // callback to determine that sequence (similar to sqrt expansion).
4845 unsigned MathOp = ISD::DELETED_NODE;
4846 APInt MulC = ConstValue1.abs();
4847 // The constant `2` should be treated as (2^0 + 1).
4848 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4849 MulC.lshrInPlace(TZeros);
4850 if ((MulC - 1).isPowerOf2())
4851 MathOp = ISD::ADD;
4852 else if ((MulC + 1).isPowerOf2())
4853 MathOp = ISD::SUB;
4854
4855 if (MathOp != ISD::DELETED_NODE) {
4856 unsigned ShAmt =
4857 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4858 ShAmt += TZeros;
4859 assert(ShAmt < BitWidth &&
4860 "multiply-by-constant generated out of bounds shift");
4861 SDValue Shl =
4862 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4863 SDValue R =
4864 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4865 DAG.getNode(ISD::SHL, DL, VT, N0,
4866 DAG.getConstant(TZeros, DL, VT)))
4867 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4868 if (ConstValue1.isNegative())
4869 R = DAG.getNegative(R, DL, VT);
4870 return R;
4871 }
4872 }
4873
4874 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4875 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4876 SDValue N01 = N0.getOperand(1);
4877 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4878 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4879 }
4880
4881 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4882 // use.
4883 {
4884 SDValue Sh, Y;
4885
4886 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4887 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4889 Sh = N0; Y = N1;
4890 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4892 Sh = N1; Y = N0;
4893 }
4894
4895 if (Sh.getNode()) {
4896 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4897 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4898 }
4899 }
4900
4901 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4902 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4906 return Matcher.getNode(
4907 ISD::ADD, DL, VT,
4908 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4909 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4910
4911 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4912 ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4913 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4914 const APInt &C0 = N0.getConstantOperandAPInt(0);
4915 const APInt &C1 = NC1->getAPIntValue();
4916 return DAG.getVScale(DL, VT, C0 * C1);
4917 }
4918
4919 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4920 APInt MulVal;
4921 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4922 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4923 const APInt &C0 = N0.getConstantOperandAPInt(0);
4924 APInt NewStep = C0 * MulVal;
4925 return DAG.getStepVector(DL, VT, NewStep);
4926 }
4927
4928 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4929 SDValue X;
4930 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4932 N, Matcher,
4934 m_Deferred(X)))) {
4935 return Matcher.getNode(ISD::ABS, DL, VT, X);
4936 }
4937
4938 // Fold ((mul x, 0/undef) -> 0,
4939 // (mul x, 1) -> x) -> x)
4940 // -> and(x, mask)
4941 // We can replace vectors with '0' and '1' factors with a clearing mask.
4942 if (VT.isFixedLengthVector()) {
4943 unsigned NumElts = VT.getVectorNumElements();
4944 SmallBitVector ClearMask;
4945 ClearMask.reserve(NumElts);
4946 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4947 if (!V || V->isZero()) {
4948 ClearMask.push_back(true);
4949 return true;
4950 }
4951 ClearMask.push_back(false);
4952 return V->isOne();
4953 };
4954 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4955 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4956 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4957 EVT LegalSVT = N1.getOperand(0).getValueType();
4958 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4959 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4961 for (unsigned I = 0; I != NumElts; ++I)
4962 if (ClearMask[I])
4963 Mask[I] = Zero;
4964 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4965 }
4966 }
4967
4968 // reassociate mul
4969 // TODO: Change reassociateOps to support vp ops.
4970 if (!UseVP)
4971 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4972 return RMUL;
4973
4974 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4975 // TODO: Change reassociateReduction to support vp ops.
4976 if (!UseVP)
4977 if (SDValue SD =
4978 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4979 return SD;
4980
4981 // Simplify the operands using demanded-bits information.
4983 return SDValue(N, 0);
4984
4985 return SDValue();
4986}
4987
4988/// Return true if divmod libcall is available.
4989static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4990 const TargetLowering &TLI) {
4991 RTLIB::Libcall LC;
4992 EVT NodeType = Node->getValueType(0);
4993 if (!NodeType.isSimple())
4994 return false;
4995 switch (NodeType.getSimpleVT().SimpleTy) {
4996 default: return false; // No libcall for vector types.
4997 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4998 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4999 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
5000 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
5001 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
5002 }
5003
5004 return TLI.getLibcallImpl(LC) != RTLIB::Unsupported;
5005}
5006
5007/// Issue divrem if both quotient and remainder are needed.
5008SDValue DAGCombiner::useDivRem(SDNode *Node) {
5009 if (Node->use_empty())
5010 return SDValue(); // This is a dead node, leave it alone.
5011
5012 unsigned Opcode = Node->getOpcode();
5013 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
5014 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
5015
5016 // DivMod lib calls can still work on non-legal types if using lib-calls.
5017 EVT VT = Node->getValueType(0);
5018 if (VT.isVector() || !VT.isInteger())
5019 return SDValue();
5020
5021 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
5022 return SDValue();
5023
5024 // If DIVREM is going to get expanded into a libcall,
5025 // but there is no libcall available, then don't combine.
5026 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
5027 !isDivRemLibcallAvailable(Node, isSigned, TLI))
5028 return SDValue();
5029
5030 // If div is legal, it's better to do the normal expansion
5031 unsigned OtherOpcode = 0;
5032 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
5033 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
5034 if (TLI.isOperationLegalOrCustom(Opcode, VT))
5035 return SDValue();
5036 } else {
5037 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5038 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
5039 return SDValue();
5040 }
5041
5042 SDValue Op0 = Node->getOperand(0);
5043 SDValue Op1 = Node->getOperand(1);
5044 SDValue combined;
5045 for (SDNode *User : Op0->users()) {
5046 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
5047 User->use_empty())
5048 continue;
5049 // Convert the other matching node(s), too;
5050 // otherwise, the DIVREM may get target-legalized into something
5051 // target-specific that we won't be able to recognize.
5052 unsigned UserOpc = User->getOpcode();
5053 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
5054 User->getOperand(0) == Op0 &&
5055 User->getOperand(1) == Op1) {
5056 if (!combined) {
5057 if (UserOpc == OtherOpcode) {
5058 SDVTList VTs = DAG.getVTList(VT, VT);
5059 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
5060 } else if (UserOpc == DivRemOpc) {
5061 combined = SDValue(User, 0);
5062 } else {
5063 assert(UserOpc == Opcode);
5064 continue;
5065 }
5066 }
5067 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
5068 CombineTo(User, combined);
5069 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
5070 CombineTo(User, combined.getValue(1));
5071 }
5072 }
5073 return combined;
5074}
5075
5077 SDValue N0 = N->getOperand(0);
5078 SDValue N1 = N->getOperand(1);
5079 EVT VT = N->getValueType(0);
5080 SDLoc DL(N);
5081
5082 unsigned Opc = N->getOpcode();
5083 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
5084
5085 // X / undef -> undef
5086 // X % undef -> undef
5087 // X / 0 -> undef
5088 // X % 0 -> undef
5089 // NOTE: This includes vectors where any divisor element is zero/undef.
5090 if (DAG.isUndef(Opc, {N0, N1}))
5091 return DAG.getUNDEF(VT);
5092
5093 // undef / X -> 0
5094 // undef % X -> 0
5095 if (N0.isUndef())
5096 return DAG.getConstant(0, DL, VT);
5097
5098 // 0 / X -> 0
5099 // 0 % X -> 0
5101 if (N0C && N0C->isZero())
5102 return N0;
5103
5104 // X / X -> 1
5105 // X % X -> 0
5106 if (N0 == N1)
5107 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
5108
5109 // X / 1 -> X
5110 // X % 1 -> 0
5111 // If this is a boolean op (single-bit element type), we can't have
5112 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
5113 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
5114 // it's a 1.
5115 if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1))
5116 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
5117
5118 return SDValue();
5119}
5120
5121SDValue DAGCombiner::visitSDIV(SDNode *N) {
5122 SDValue N0 = N->getOperand(0);
5123 SDValue N1 = N->getOperand(1);
5124 EVT VT = N->getValueType(0);
5125 EVT CCVT = getSetCCResultType(VT);
5126 SDLoc DL(N);
5127
5128 // fold (sdiv c1, c2) -> c1/c2
5129 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
5130 return C;
5131
5132 // fold vector ops
5133 if (VT.isVector())
5134 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5135 return FoldedVOp;
5136
5137 // fold (sdiv X, -1) -> 0-X
5138 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5139 if (N1C && N1C->isAllOnes())
5140 return DAG.getNegative(N0, DL, VT);
5141
5142 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
5143 if (N1C && N1C->isMinSignedValue())
5144 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5145 DAG.getConstant(1, DL, VT),
5146 DAG.getConstant(0, DL, VT));
5147
5148 if (SDValue V = simplifyDivRem(N, DAG))
5149 return V;
5150
5151 if (SDValue NewSel = foldBinOpIntoSelect(N))
5152 return NewSel;
5153
5154 // If we know the sign bits of both operands are zero, strength reduce to a
5155 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
5156 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5157 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
5158
5159 if (SDValue V = visitSDIVLike(N0, N1, N)) {
5160 // If the corresponding remainder node exists, update its users with
5161 // (Dividend - (Quotient * Divisor).
5162 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
5163 { N0, N1 })) {
5164 // If the sdiv has the exact flag we shouldn't propagate it to the
5165 // remainder node.
5166 if (!N->getFlags().hasExact()) {
5167 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5168 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5169 AddToWorklist(Mul.getNode());
5170 AddToWorklist(Sub.getNode());
5171 CombineTo(RemNode, Sub);
5172 }
5173 }
5174 return V;
5175 }
5176
5177 // sdiv, srem -> sdivrem
5178 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5179 // true. Otherwise, we break the simplification logic in visitREM().
5180 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5181 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5182 if (SDValue DivRem = useDivRem(N))
5183 return DivRem;
5184
5185 return SDValue();
5186}
5187
5188static bool isDivisorPowerOfTwo(SDValue Divisor) {
5189 // Helper for determining whether a value is a power-2 constant scalar or a
5190 // vector of such elements.
5191 auto IsPowerOfTwo = [](ConstantSDNode *C) {
5192 if (C->isZero() || C->isOpaque())
5193 return false;
5194 if (C->getAPIntValue().isPowerOf2())
5195 return true;
5196 if (C->getAPIntValue().isNegatedPowerOf2())
5197 return true;
5198 return false;
5199 };
5200
5201 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false,
5202 /*AllowTruncation=*/true);
5203}
5204
5205SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5206 SDLoc DL(N);
5207 EVT VT = N->getValueType(0);
5208 EVT CCVT = getSetCCResultType(VT);
5209 unsigned BitWidth = VT.getScalarSizeInBits();
5210
5211 // fold (sdiv X, pow2) -> simple ops after legalize
5212 // FIXME: We check for the exact bit here because the generic lowering gives
5213 // better results in that case. The target-specific lowering should learn how
5214 // to handle exact sdivs efficiently.
5215 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
5216 // Target-specific implementation of sdiv x, pow2.
5217 if (SDValue Res = BuildSDIVPow2(N))
5218 return Res;
5219
5220 // Create constants that are functions of the shift amount value.
5221 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
5222 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
5223 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
5224 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
5225 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
5226 if (!isConstantOrConstantVector(Inexact))
5227 return SDValue();
5228
5229 // Splat the sign bit into the register
5230 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
5231 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
5232 AddToWorklist(Sign.getNode());
5233
5234 // Add (N0 < 0) ? abs2 - 1 : 0;
5235 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
5236 AddToWorklist(Srl.getNode());
5237 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
5238 AddToWorklist(Add.getNode());
5239 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
5240 AddToWorklist(Sra.getNode());
5241
5242 // Special case: (sdiv X, 1) -> X
5243 // Special Case: (sdiv X, -1) -> 0-X
5244 SDValue One = DAG.getConstant(1, DL, VT);
5246 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
5247 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
5248 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
5249 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
5250
5251 // If dividing by a positive value, we're done. Otherwise, the result must
5252 // be negated.
5253 SDValue Zero = DAG.getConstant(0, DL, VT);
5254 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
5255
5256 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
5257 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
5258 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
5259 return Res;
5260 }
5261
5262 // If integer divide is expensive and we satisfy the requirements, emit an
5263 // alternate sequence. Targets may check function attributes for size/speed
5264 // trade-offs.
5265 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5266 if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5267 /*AllowTruncation=*/true) &&
5268 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5269 if (SDValue Op = BuildSDIV(N))
5270 return Op;
5271
5272 return SDValue();
5273}
5274
5275SDValue DAGCombiner::visitUDIV(SDNode *N) {
5276 SDValue N0 = N->getOperand(0);
5277 SDValue N1 = N->getOperand(1);
5278 EVT VT = N->getValueType(0);
5279 EVT CCVT = getSetCCResultType(VT);
5280 SDLoc DL(N);
5281
5282 // fold (udiv c1, c2) -> c1/c2
5283 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
5284 return C;
5285
5286 // fold vector ops
5287 if (VT.isVector())
5288 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5289 return FoldedVOp;
5290
5291 // fold (udiv X, -1) -> select(X == -1, 1, 0)
5292 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5293 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
5294 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
5295 DAG.getConstant(1, DL, VT),
5296 DAG.getConstant(0, DL, VT));
5297 }
5298
5299 if (SDValue V = simplifyDivRem(N, DAG))
5300 return V;
5301
5302 if (SDValue NewSel = foldBinOpIntoSelect(N))
5303 return NewSel;
5304
5305 if (SDValue V = visitUDIVLike(N0, N1, N)) {
5306 // If the corresponding remainder node exists, update its users with
5307 // (Dividend - (Quotient * Divisor).
5308 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
5309 { N0, N1 })) {
5310 // If the udiv has the exact flag we shouldn't propagate it to the
5311 // remainder node.
5312 if (!N->getFlags().hasExact()) {
5313 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
5314 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5315 AddToWorklist(Mul.getNode());
5316 AddToWorklist(Sub.getNode());
5317 CombineTo(RemNode, Sub);
5318 }
5319 }
5320 return V;
5321 }
5322
5323 // sdiv, srem -> sdivrem
5324 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
5325 // true. Otherwise, we break the simplification logic in visitREM().
5326 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5327 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
5328 if (SDValue DivRem = useDivRem(N))
5329 return DivRem;
5330
5331 // Simplify the operands using demanded-bits information.
5332 // We don't have demanded bits support for UDIV so this just enables constant
5333 // folding based on known bits.
5335 return SDValue(N, 0);
5336
5337 return SDValue();
5338}
5339
5340SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
5341 SDLoc DL(N);
5342 EVT VT = N->getValueType(0);
5343
5344 // fold (udiv x, (1 << c)) -> x >>u c
5345 if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5346 /*AllowTruncation=*/true)) {
5347 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5348 AddToWorklist(LogBase2.getNode());
5349
5350 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5351 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
5352 AddToWorklist(Trunc.getNode());
5353 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5354 }
5355 }
5356
5357 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
5358 if (N1.getOpcode() == ISD::SHL) {
5359 SDValue N10 = N1.getOperand(0);
5360 if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
5361 /*AllowTruncation=*/true)) {
5362 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
5363 AddToWorklist(LogBase2.getNode());
5364
5365 EVT ADDVT = N1.getOperand(1).getValueType();
5366 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
5367 AddToWorklist(Trunc.getNode());
5368 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
5369 AddToWorklist(Add.getNode());
5370 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
5371 }
5372 }
5373 }
5374
5375 // fold (udiv x, c) -> alternate
5376 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5377 if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5378 /*AllowTruncation=*/true) &&
5379 !TLI.isIntDivCheap(N->getValueType(0), Attr))
5380 if (SDValue Op = BuildUDIV(N))
5381 return Op;
5382
5383 return SDValue();
5384}
5385
5386SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
5387 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
5388 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
5389 // Target-specific implementation of srem x, pow2.
5390 if (SDValue Res = BuildSREMPow2(N))
5391 return Res;
5392 }
5393 return SDValue();
5394}
5395
5396// handles ISD::SREM and ISD::UREM
5397SDValue DAGCombiner::visitREM(SDNode *N) {
5398 unsigned Opcode = N->getOpcode();
5399 SDValue N0 = N->getOperand(0);
5400 SDValue N1 = N->getOperand(1);
5401 EVT VT = N->getValueType(0);
5402 EVT CCVT = getSetCCResultType(VT);
5403
5404 bool isSigned = (Opcode == ISD::SREM);
5405 SDLoc DL(N);
5406
5407 // fold (rem c1, c2) -> c1%c2
5408 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5409 return C;
5410
5411 // fold (urem X, -1) -> select(FX == -1, 0, FX)
5412 // Freeze the numerator to avoid a miscompile with an undefined value.
5413 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5414 CCVT.isVector() == VT.isVector()) {
5415 SDValue F0 = DAG.getFreeze(N0);
5416 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5417 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5418 }
5419
5420 if (SDValue V = simplifyDivRem(N, DAG))
5421 return V;
5422
5423 if (SDValue NewSel = foldBinOpIntoSelect(N))
5424 return NewSel;
5425
5426 if (isSigned) {
5427 // If we know the sign bits of both operands are zero, strength reduce to a
5428 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5429 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5430 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5431 } else {
5432 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5433 // fold (urem x, pow2) -> (and x, pow2-1)
5434 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5435 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5436 AddToWorklist(Add.getNode());
5437 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5438 }
5439 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5440 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5441 // TODO: We should sink the following into isKnownToBePowerOfTwo
5442 // using a OrZero parameter analogous to our handling in ValueTracking.
5443 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5445 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5446 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5447 AddToWorklist(Add.getNode());
5448 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5449 }
5450 }
5451
5452 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5453
5454 // If X/C can be simplified by the division-by-constant logic, lower
5455 // X%C to the equivalent of X-X/C*C.
5456 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5457 // speculative DIV must not cause a DIVREM conversion. We guard against this
5458 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5459 // combine will not return a DIVREM. Regardless, checking cheapness here
5460 // makes sense since the simplification results in fatter code.
5461 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5462 if (isSigned) {
5463 // check if we can build faster implementation for srem
5464 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5465 return OptimizedRem;
5466 }
5467
5468 SDValue OptimizedDiv =
5469 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5470 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5471 // If the equivalent Div node also exists, update its users.
5472 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5473 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5474 { N0, N1 }))
5475 CombineTo(DivNode, OptimizedDiv);
5476 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5477 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5478 AddToWorklist(OptimizedDiv.getNode());
5479 AddToWorklist(Mul.getNode());
5480 return Sub;
5481 }
5482 }
5483
5484 // sdiv, srem -> sdivrem
5485 if (SDValue DivRem = useDivRem(N))
5486 return DivRem.getValue(1);
5487
5488 // fold urem(urem(A, BCst), Op1Cst) -> urem(A, Op1Cst)
5489 // iff urem(BCst, Op1Cst) == 0
5490 SDValue A;
5491 APInt Op1Cst, BCst;
5492 if (sd_match(N, m_URem(m_URem(m_Value(A), m_ConstInt(BCst)),
5493 m_ConstInt(Op1Cst))) &&
5494 BCst.urem(Op1Cst).isZero()) {
5495 return DAG.getNode(ISD::UREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5496 }
5497
5498 // fold srem(srem(A, BCst), Op1Cst) -> srem(A, Op1Cst)
5499 // iff srem(BCst, Op1Cst) == 0 && Op1Cst != 1
5500 if (sd_match(N, m_SRem(m_SRem(m_Value(A), m_ConstInt(BCst)),
5501 m_ConstInt(Op1Cst))) &&
5502 BCst.srem(Op1Cst).isZero() && !Op1Cst.isAllOnes()) {
5503 return DAG.getNode(ISD::SREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT));
5504 }
5505
5506 return SDValue();
5507}
5508
5509SDValue DAGCombiner::visitMULHS(SDNode *N) {
5510 SDValue N0 = N->getOperand(0);
5511 SDValue N1 = N->getOperand(1);
5512 EVT VT = N->getValueType(0);
5513 SDLoc DL(N);
5514
5515 // fold (mulhs c1, c2)
5516 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5517 return C;
5518
5519 // canonicalize constant to RHS.
5522 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5523
5524 if (VT.isVector()) {
5525 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5526 return FoldedVOp;
5527
5528 // fold (mulhs x, 0) -> 0
5529 // do not return N1, because undef node may exist.
5531 return DAG.getConstant(0, DL, VT);
5532 }
5533
5534 // fold (mulhs x, 0) -> 0
5535 if (isNullConstant(N1))
5536 return N1;
5537
5538 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5539 if (isOneConstant(N1))
5540 return DAG.getNode(
5541 ISD::SRA, DL, VT, N0,
5543
5544 // fold (mulhs x, undef) -> 0
5545 if (N0.isUndef() || N1.isUndef())
5546 return DAG.getConstant(0, DL, VT);
5547
5548 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5549 // plus a shift.
5550 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5551 !VT.isVector()) {
5552 MVT Simple = VT.getSimpleVT();
5553 unsigned SimpleSize = Simple.getSizeInBits();
5554 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5555 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5556 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5557 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5558 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5559 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5560 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5561 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5562 }
5563 }
5564
5565 return SDValue();
5566}
5567
5568SDValue DAGCombiner::visitMULHU(SDNode *N) {
5569 SDValue N0 = N->getOperand(0);
5570 SDValue N1 = N->getOperand(1);
5571 EVT VT = N->getValueType(0);
5572 SDLoc DL(N);
5573
5574 // fold (mulhu c1, c2)
5575 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5576 return C;
5577
5578 // canonicalize constant to RHS.
5581 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5582
5583 if (VT.isVector()) {
5584 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5585 return FoldedVOp;
5586
5587 // fold (mulhu x, 0) -> 0
5588 // do not return N1, because undef node may exist.
5590 return DAG.getConstant(0, DL, VT);
5591 }
5592
5593 // fold (mulhu x, 0) -> 0
5594 if (isNullConstant(N1))
5595 return N1;
5596
5597 // fold (mulhu x, 1) -> 0
5598 if (isOneConstant(N1))
5599 return DAG.getConstant(0, DL, VT);
5600
5601 // fold (mulhu x, undef) -> 0
5602 if (N0.isUndef() || N1.isUndef())
5603 return DAG.getConstant(0, DL, VT);
5604
5605 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5606 if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5607 /*AllowTruncation=*/true) &&
5608 hasOperation(ISD::SRL, VT)) {
5609 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5610 unsigned NumEltBits = VT.getScalarSizeInBits();
5611 SDValue SRLAmt = DAG.getNode(
5612 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5613 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5614 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5615 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5616 }
5617 }
5618
5619 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5620 // plus a shift.
5621 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5622 !VT.isVector()) {
5623 MVT Simple = VT.getSimpleVT();
5624 unsigned SimpleSize = Simple.getSizeInBits();
5625 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5626 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5627 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5628 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5629 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5630 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5631 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5632 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5633 }
5634 }
5635
5636 // Simplify the operands using demanded-bits information.
5637 // We don't have demanded bits support for MULHU so this just enables constant
5638 // folding based on known bits.
5640 return SDValue(N, 0);
5641
5642 return SDValue();
5643}
5644
5645SDValue DAGCombiner::visitAVG(SDNode *N) {
5646 unsigned Opcode = N->getOpcode();
5647 SDValue N0 = N->getOperand(0);
5648 SDValue N1 = N->getOperand(1);
5649 EVT VT = N->getValueType(0);
5650 SDLoc DL(N);
5651 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5652
5653 // fold (avg c1, c2)
5654 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5655 return C;
5656
5657 // canonicalize constant to RHS.
5660 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5661
5662 if (VT.isVector())
5663 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5664 return FoldedVOp;
5665
5666 // fold (avg x, undef) -> x
5667 if (N0.isUndef())
5668 return N1;
5669 if (N1.isUndef())
5670 return N0;
5671
5672 // fold (avg x, x) --> x
5673 if (N0 == N1 && Level >= AfterLegalizeTypes)
5674 return N0;
5675
5676 // fold (avgfloor x, 0) -> x >> 1
5677 SDValue X, Y;
5679 return DAG.getNode(ISD::SRA, DL, VT, X,
5680 DAG.getShiftAmountConstant(1, VT, DL));
5682 return DAG.getNode(ISD::SRL, DL, VT, X,
5683 DAG.getShiftAmountConstant(1, VT, DL));
5684
5685 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5686 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5687 if (!IsSigned &&
5688 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5689 X.getValueType() == Y.getValueType() &&
5690 hasOperation(Opcode, X.getValueType())) {
5691 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5692 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5693 }
5694 if (IsSigned &&
5695 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5696 X.getValueType() == Y.getValueType() &&
5697 hasOperation(Opcode, X.getValueType())) {
5698 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5699 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5700 }
5701
5702 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5703 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5704 // Check if avgflooru isn't legal/custom but avgceilu is.
5705 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5706 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5707 if (DAG.isKnownNeverZero(N1))
5708 return DAG.getNode(
5709 ISD::AVGCEILU, DL, VT, N0,
5710 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5711 if (DAG.isKnownNeverZero(N0))
5712 return DAG.getNode(
5713 ISD::AVGCEILU, DL, VT, N1,
5714 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5715 }
5716
5717 // Fold avgfloor((add nw x,y), 1) -> avgceil(x,y)
5718 // Fold avgfloor((add nw x,1), y) -> avgceil(x,y)
5719 if ((Opcode == ISD::AVGFLOORU && hasOperation(ISD::AVGCEILU, VT)) ||
5720 (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGCEILS, VT))) {
5721 SDValue Add;
5722 if (sd_match(N,
5723 m_c_BinOp(Opcode,
5725 m_One())) ||
5726 sd_match(N, m_c_BinOp(Opcode,
5728 m_Value(Y)))) {
5729
5730 if (IsSigned && Add->getFlags().hasNoSignedWrap())
5731 return DAG.getNode(ISD::AVGCEILS, DL, VT, X, Y);
5732
5733 if (!IsSigned && Add->getFlags().hasNoUnsignedWrap())
5734 return DAG.getNode(ISD::AVGCEILU, DL, VT, X, Y);
5735 }
5736 }
5737
5738 // Fold avgfloors(x,y) -> avgflooru(x,y) if both x and y are non-negative
5739 if (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGFLOORU, VT)) {
5740 if (DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5741 return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
5742 }
5743
5744 return SDValue();
5745}
5746
5747SDValue DAGCombiner::visitABD(SDNode *N) {
5748 unsigned Opcode = N->getOpcode();
5749 SDValue N0 = N->getOperand(0);
5750 SDValue N1 = N->getOperand(1);
5751 EVT VT = N->getValueType(0);
5752 SDLoc DL(N);
5753
5754 // fold (abd c1, c2)
5755 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5756 return C;
5757
5758 // canonicalize constant to RHS.
5761 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5762
5763 if (VT.isVector())
5764 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5765 return FoldedVOp;
5766
5767 // fold (abd x, undef) -> 0
5768 if (N0.isUndef() || N1.isUndef())
5769 return DAG.getConstant(0, DL, VT);
5770
5771 // fold (abd x, x) -> 0
5772 if (N0 == N1)
5773 return DAG.getConstant(0, DL, VT);
5774
5775 SDValue X, Y;
5776
5777 // fold (abds x, 0) -> abs x
5779 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5780 return DAG.getNode(ISD::ABS, DL, VT, X);
5781
5782 // fold (abdu x, 0) -> x
5784 return X;
5785
5786 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5787 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5788 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5789 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5790
5791 // fold (abd? (?ext x), (?ext y)) -> (zext (abd? x, y))
5794 EVT SmallVT = X.getScalarValueSizeInBits() > Y.getScalarValueSizeInBits()
5795 ? X.getValueType()
5796 : Y.getValueType();
5797 if (!LegalOperations || hasOperation(Opcode, SmallVT)) {
5798 SDValue ExtedX = DAG.getExtOrTrunc(X, SDLoc(X), SmallVT, N0->getOpcode());
5799 SDValue ExtedY = DAG.getExtOrTrunc(Y, SDLoc(Y), SmallVT, N0->getOpcode());
5800 SDValue SmallABD = DAG.getNode(Opcode, DL, SmallVT, {ExtedX, ExtedY});
5801 SDValue ZExted = DAG.getZExtOrTrunc(SmallABD, DL, VT);
5802 return ZExted;
5803 }
5804 }
5805
5806 return SDValue();
5807}
5808
5809/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5810/// give the opcodes for the two computations that are being performed. Return
5811/// true if a simplification was made.
5812SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5813 unsigned HiOp) {
5814 // If the high half is not needed, just compute the low half.
5815 bool HiExists = N->hasAnyUseOfValue(1);
5816 if (!HiExists && (!LegalOperations ||
5817 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5818 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5819 return CombineTo(N, Res, Res);
5820 }
5821
5822 // If the low half is not needed, just compute the high half.
5823 bool LoExists = N->hasAnyUseOfValue(0);
5824 if (!LoExists && (!LegalOperations ||
5825 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5826 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5827 return CombineTo(N, Res, Res);
5828 }
5829
5830 // If both halves are used, return as it is.
5831 if (LoExists && HiExists)
5832 return SDValue();
5833
5834 // If the two computed results can be simplified separately, separate them.
5835 if (LoExists) {
5836 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5837 AddToWorklist(Lo.getNode());
5838 SDValue LoOpt = combine(Lo.getNode());
5839 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5840 (!LegalOperations ||
5841 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5842 return CombineTo(N, LoOpt, LoOpt);
5843 }
5844
5845 if (HiExists) {
5846 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5847 AddToWorklist(Hi.getNode());
5848 SDValue HiOpt = combine(Hi.getNode());
5849 if (HiOpt.getNode() && HiOpt != Hi &&
5850 (!LegalOperations ||
5851 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5852 return CombineTo(N, HiOpt, HiOpt);
5853 }
5854
5855 return SDValue();
5856}
5857
5858SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5859 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5860 return Res;
5861
5862 SDValue N0 = N->getOperand(0);
5863 SDValue N1 = N->getOperand(1);
5864 EVT VT = N->getValueType(0);
5865 SDLoc DL(N);
5866
5867 // Constant fold.
5869 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5870
5871 // canonicalize constant to RHS (vector doesn't have to splat)
5874 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5875
5876 // If the type is twice as wide is legal, transform the mulhu to a wider
5877 // multiply plus a shift.
5878 if (VT.isSimple() && !VT.isVector()) {
5879 MVT Simple = VT.getSimpleVT();
5880 unsigned SimpleSize = Simple.getSizeInBits();
5881 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5882 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5883 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5884 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5885 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5886 // Compute the high part as N1.
5887 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5888 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5889 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5890 // Compute the low part as N0.
5891 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5892 return CombineTo(N, Lo, Hi);
5893 }
5894 }
5895
5896 return SDValue();
5897}
5898
5899SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5900 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5901 return Res;
5902
5903 SDValue N0 = N->getOperand(0);
5904 SDValue N1 = N->getOperand(1);
5905 EVT VT = N->getValueType(0);
5906 SDLoc DL(N);
5907
5908 // Constant fold.
5910 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5911
5912 // canonicalize constant to RHS (vector doesn't have to splat)
5915 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5916
5917 // (umul_lohi N0, 0) -> (0, 0)
5918 if (isNullConstant(N1)) {
5919 SDValue Zero = DAG.getConstant(0, DL, VT);
5920 return CombineTo(N, Zero, Zero);
5921 }
5922
5923 // (umul_lohi N0, 1) -> (N0, 0)
5924 if (isOneConstant(N1)) {
5925 SDValue Zero = DAG.getConstant(0, DL, VT);
5926 return CombineTo(N, N0, Zero);
5927 }
5928
5929 // If the type is twice as wide is legal, transform the mulhu to a wider
5930 // multiply plus a shift.
5931 if (VT.isSimple() && !VT.isVector()) {
5932 MVT Simple = VT.getSimpleVT();
5933 unsigned SimpleSize = Simple.getSizeInBits();
5934 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5935 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5936 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5937 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5938 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5939 // Compute the high part as N1.
5940 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5941 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5942 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5943 // Compute the low part as N0.
5944 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5945 return CombineTo(N, Lo, Hi);
5946 }
5947 }
5948
5949 return SDValue();
5950}
5951
5952SDValue DAGCombiner::visitMULO(SDNode *N) {
5953 SDValue N0 = N->getOperand(0);
5954 SDValue N1 = N->getOperand(1);
5955 EVT VT = N0.getValueType();
5956 bool IsSigned = (ISD::SMULO == N->getOpcode());
5957
5958 EVT CarryVT = N->getValueType(1);
5959 SDLoc DL(N);
5960
5961 ConstantSDNode *N0C = isConstOrConstSplat(N0);
5962 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5963
5964 // fold operation with constant operands.
5965 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5966 // multiple results.
5967 if (N0C && N1C) {
5968 bool Overflow;
5969 APInt Result =
5970 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5971 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5972 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5973 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5974 }
5975
5976 // canonicalize constant to RHS.
5979 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5980
5981 // fold (mulo x, 0) -> 0 + no carry out
5982 if (isNullOrNullSplat(N1))
5983 return CombineTo(N, DAG.getConstant(0, DL, VT),
5984 DAG.getConstant(0, DL, CarryVT));
5985
5986 // (mulo x, 2) -> (addo x, x)
5987 // FIXME: This needs a freeze.
5988 if (N1C && N1C->getAPIntValue() == 2 &&
5989 (!IsSigned || VT.getScalarSizeInBits() > 2))
5990 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5991 N->getVTList(), N0, N0);
5992
5993 // A 1 bit SMULO overflows if both inputs are 1.
5994 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5995 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5996 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5997 DAG.getConstant(0, DL, VT), ISD::SETNE);
5998 return CombineTo(N, And, Cmp);
5999 }
6000
6001 // If it cannot overflow, transform into a mul.
6002 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
6003 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
6004 DAG.getConstant(0, DL, CarryVT));
6005 return SDValue();
6006}
6007
6008// Function to calculate whether the Min/Max pair of SDNodes (potentially
6009// swapped around) make a signed saturate pattern, clamping to between a signed
6010// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
6011// Returns the node being clamped and the bitwidth of the clamp in BW. Should
6012// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
6013// same as SimplifySelectCC. N0<N1 ? N2 : N3.
6015 SDValue N3, ISD::CondCode CC, unsigned &BW,
6016 bool &Unsigned, SelectionDAG &DAG) {
6017 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
6018 ISD::CondCode CC) {
6019 // The compare and select operand should be the same or the select operands
6020 // should be truncated versions of the comparison.
6021 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
6022 return 0;
6023 // The constants need to be the same or a truncated version of each other.
6026 if (!N1C || !N3C)
6027 return 0;
6028 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
6029 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
6030 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
6031 return 0;
6032 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
6033 };
6034
6035 // Check the initial value is a SMIN/SMAX equivalent.
6036 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
6037 if (!Opcode0)
6038 return SDValue();
6039
6040 // We could only need one range check, if the fptosi could never produce
6041 // the upper value.
6042 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
6043 if (isNullOrNullSplat(N3)) {
6044 EVT IntVT = N0.getValueType().getScalarType();
6045 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
6046 if (FPVT.isSimple()) {
6047 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
6048 const fltSemantics &Semantics = InputTy->getFltSemantics();
6049 uint32_t MinBitWidth =
6050 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
6051 if (IntVT.getSizeInBits() >= MinBitWidth) {
6052 Unsigned = true;
6053 BW = PowerOf2Ceil(MinBitWidth);
6054 return N0;
6055 }
6056 }
6057 }
6058 }
6059
6060 SDValue N00, N01, N02, N03;
6061 ISD::CondCode N0CC;
6062 switch (N0.getOpcode()) {
6063 case ISD::SMIN:
6064 case ISD::SMAX:
6065 N00 = N02 = N0.getOperand(0);
6066 N01 = N03 = N0.getOperand(1);
6067 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
6068 break;
6069 case ISD::SELECT_CC:
6070 N00 = N0.getOperand(0);
6071 N01 = N0.getOperand(1);
6072 N02 = N0.getOperand(2);
6073 N03 = N0.getOperand(3);
6074 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
6075 break;
6076 case ISD::SELECT:
6077 case ISD::VSELECT:
6078 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
6079 return SDValue();
6080 N00 = N0.getOperand(0).getOperand(0);
6081 N01 = N0.getOperand(0).getOperand(1);
6082 N02 = N0.getOperand(1);
6083 N03 = N0.getOperand(2);
6084 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
6085 break;
6086 default:
6087 return SDValue();
6088 }
6089
6090 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
6091 if (!Opcode1 || Opcode0 == Opcode1)
6092 return SDValue();
6093
6094 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
6095 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
6096 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
6097 return SDValue();
6098
6099 const APInt &MinC = MinCOp->getAPIntValue();
6100 const APInt &MaxC = MaxCOp->getAPIntValue();
6101 APInt MinCPlus1 = MinC + 1;
6102 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
6103 BW = MinCPlus1.exactLogBase2() + 1;
6104 Unsigned = false;
6105 return N02;
6106 }
6107
6108 if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) {
6109 BW = MinCPlus1.exactLogBase2();
6110 Unsigned = true;
6111 return N02;
6112 }
6113
6114 return SDValue();
6115}
6116
6118 SDValue N3, ISD::CondCode CC,
6119 SelectionDAG &DAG) {
6120 unsigned BW;
6121 bool Unsigned;
6122 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
6123 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
6124 return SDValue();
6125 EVT FPVT = Fp.getOperand(0).getValueType();
6126 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6127 if (FPVT.isVector())
6128 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6129 FPVT.getVectorElementCount());
6130 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
6131 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
6132 return SDValue();
6133 SDLoc DL(Fp);
6134 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
6135 DAG.getValueType(NewVT.getScalarType()));
6136 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
6137}
6138
6140 SDValue N3, ISD::CondCode CC,
6141 SelectionDAG &DAG) {
6142 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
6143 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
6144 // be truncated versions of the setcc (N0/N1).
6145 if ((N0 != N2 &&
6146 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
6147 N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
6148 return SDValue();
6151 if (!N1C || !N3C)
6152 return SDValue();
6153 const APInt &C1 = N1C->getAPIntValue();
6154 const APInt &C3 = N3C->getAPIntValue();
6155 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
6156 C1 != C3.zext(C1.getBitWidth()))
6157 return SDValue();
6158
6159 unsigned BW = (C1 + 1).exactLogBase2();
6160 EVT FPVT = N0.getOperand(0).getValueType();
6161 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
6162 if (FPVT.isVector())
6163 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
6164 FPVT.getVectorElementCount());
6166 FPVT, NewVT))
6167 return SDValue();
6168
6169 SDValue Sat =
6170 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
6171 DAG.getValueType(NewVT.getScalarType()));
6172 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
6173}
6174
6175SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
6176 SDValue N0 = N->getOperand(0);
6177 SDValue N1 = N->getOperand(1);
6178 EVT VT = N0.getValueType();
6179 unsigned Opcode = N->getOpcode();
6180 SDLoc DL(N);
6181
6182 // fold operation with constant operands.
6183 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
6184 return C;
6185
6186 // If the operands are the same, this is a no-op.
6187 if (N0 == N1)
6188 return N0;
6189
6190 // Fold operation with vscale operands.
6191 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
6192 uint64_t C0 = N0->getConstantOperandVal(0);
6193 uint64_t C1 = N1->getConstantOperandVal(0);
6194 if (Opcode == ISD::UMAX)
6195 return C0 > C1 ? N0 : N1;
6196 else if (Opcode == ISD::UMIN)
6197 return C0 > C1 ? N1 : N0;
6198 }
6199
6200 // canonicalize constant to RHS
6203 return DAG.getNode(Opcode, DL, VT, N1, N0);
6204
6205 // fold vector ops
6206 if (VT.isVector())
6207 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6208 return FoldedVOp;
6209
6210 // reassociate minmax
6211 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
6212 return RMINMAX;
6213
6214 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
6215 // Only do this if:
6216 // 1. The current op isn't legal and the flipped is.
6217 // 2. The saturation pattern is broken by canonicalization in InstCombine.
6218 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
6219 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
6220 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
6221 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
6222 unsigned AltOpcode;
6223 switch (Opcode) {
6224 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
6225 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
6226 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
6227 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
6228 default: llvm_unreachable("Unknown MINMAX opcode");
6229 }
6230 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
6231 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
6232 }
6233
6234 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
6236 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
6237 return S;
6238 if (Opcode == ISD::UMIN)
6239 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
6240 return S;
6241
6242 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
6243 auto ReductionOpcode = [](unsigned Opcode) {
6244 switch (Opcode) {
6245 case ISD::SMIN:
6246 return ISD::VECREDUCE_SMIN;
6247 case ISD::SMAX:
6248 return ISD::VECREDUCE_SMAX;
6249 case ISD::UMIN:
6250 return ISD::VECREDUCE_UMIN;
6251 case ISD::UMAX:
6252 return ISD::VECREDUCE_UMAX;
6253 default:
6254 llvm_unreachable("Unexpected opcode");
6255 }
6256 };
6257 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
6258 SDLoc(N), VT, N0, N1))
6259 return SD;
6260
6261 // Simplify the operands using demanded-bits information.
6263 return SDValue(N, 0);
6264
6265 return SDValue();
6266}
6267
6268/// If this is a bitwise logic instruction and both operands have the same
6269/// opcode, try to sink the other opcode after the logic instruction.
6270SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
6271 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6272 EVT VT = N0.getValueType();
6273 unsigned LogicOpcode = N->getOpcode();
6274 unsigned HandOpcode = N0.getOpcode();
6275 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
6276 assert(HandOpcode == N1.getOpcode() && "Bad input!");
6277
6278 // Bail early if none of these transforms apply.
6279 if (N0.getNumOperands() == 0)
6280 return SDValue();
6281
6282 // FIXME: We should check number of uses of the operands to not increase
6283 // the instruction count for all transforms.
6284
6285 // Handle size-changing casts (or sign_extend_inreg).
6286 SDValue X = N0.getOperand(0);
6287 SDValue Y = N1.getOperand(0);
6288 EVT XVT = X.getValueType();
6289 SDLoc DL(N);
6290 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
6291 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
6292 N0.getOperand(1) == N1.getOperand(1))) {
6293 // If both operands have other uses, this transform would create extra
6294 // instructions without eliminating anything.
6295 if (!N0.hasOneUse() && !N1.hasOneUse())
6296 return SDValue();
6297 // We need matching integer source types.
6298 if (XVT != Y.getValueType())
6299 return SDValue();
6300 // Don't create an illegal op during or after legalization. Don't ever
6301 // create an unsupported vector op.
6302 if ((VT.isVector() || LegalOperations) &&
6303 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
6304 return SDValue();
6305 // Avoid infinite looping with PromoteIntBinOp.
6306 // TODO: Should we apply desirable/legal constraints to all opcodes?
6307 if ((HandOpcode == ISD::ANY_EXTEND ||
6308 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6309 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
6310 return SDValue();
6311 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
6312 SDNodeFlags LogicFlags;
6313 LogicFlags.setDisjoint(N->getFlags().hasDisjoint() &&
6314 ISD::isExtOpcode(HandOpcode));
6315 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y, LogicFlags);
6316 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
6317 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6318 return DAG.getNode(HandOpcode, DL, VT, Logic);
6319 }
6320
6321 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
6322 if (HandOpcode == ISD::TRUNCATE) {
6323 // If both operands have other uses, this transform would create extra
6324 // instructions without eliminating anything.
6325 if (!N0.hasOneUse() && !N1.hasOneUse())
6326 return SDValue();
6327 // We need matching source types.
6328 if (XVT != Y.getValueType())
6329 return SDValue();
6330 // Don't create an illegal op during or after legalization.
6331 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
6332 return SDValue();
6333 // Be extra careful sinking truncate. If it's free, there's no benefit in
6334 // widening a binop. Also, don't create a logic op on an illegal type.
6335 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
6336 return SDValue();
6337 if (!TLI.isTypeLegal(XVT))
6338 return SDValue();
6339 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6340 return DAG.getNode(HandOpcode, DL, VT, Logic);
6341 }
6342
6343 // For binops SHL/SRL/SRA/AND:
6344 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
6345 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
6346 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
6347 N0.getOperand(1) == N1.getOperand(1)) {
6348 // If either operand has other uses, this transform is not an improvement.
6349 if (!N0.hasOneUse() || !N1.hasOneUse())
6350 return SDValue();
6351 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6352 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
6353 }
6354
6355 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
6356 if (HandOpcode == ISD::BSWAP) {
6357 // If either operand has other uses, this transform is not an improvement.
6358 if (!N0.hasOneUse() || !N1.hasOneUse())
6359 return SDValue();
6360 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6361 return DAG.getNode(HandOpcode, DL, VT, Logic);
6362 }
6363
6364 // For funnel shifts FSHL/FSHR:
6365 // logic_op (OP x, x1, s), (OP y, y1, s) -->
6366 // --> OP (logic_op x, y), (logic_op, x1, y1), s
6367 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
6368 N0.getOperand(2) == N1.getOperand(2)) {
6369 if (!N0.hasOneUse() || !N1.hasOneUse())
6370 return SDValue();
6371 SDValue X1 = N0.getOperand(1);
6372 SDValue Y1 = N1.getOperand(1);
6373 SDValue S = N0.getOperand(2);
6374 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
6375 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
6376 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
6377 }
6378
6379 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
6380 // Only perform this optimization up until type legalization, before
6381 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
6382 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
6383 // we don't want to undo this promotion.
6384 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
6385 // on scalars.
6386 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
6387 Level <= AfterLegalizeTypes) {
6388 // Input types must be integer and the same.
6389 if (XVT.isInteger() && XVT == Y.getValueType() &&
6390 !(VT.isVector() && TLI.isTypeLegal(VT) &&
6391 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
6392 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
6393 return DAG.getNode(HandOpcode, DL, VT, Logic);
6394 }
6395 }
6396
6397 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
6398 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
6399 // If both shuffles use the same mask, and both shuffle within a single
6400 // vector, then it is worthwhile to move the swizzle after the operation.
6401 // The type-legalizer generates this pattern when loading illegal
6402 // vector types from memory. In many cases this allows additional shuffle
6403 // optimizations.
6404 // There are other cases where moving the shuffle after the xor/and/or
6405 // is profitable even if shuffles don't perform a swizzle.
6406 // If both shuffles use the same mask, and both shuffles have the same first
6407 // or second operand, then it might still be profitable to move the shuffle
6408 // after the xor/and/or operation.
6409 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
6410 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
6411 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
6412 assert(X.getValueType() == Y.getValueType() &&
6413 "Inputs to shuffles are not the same type");
6414
6415 // Check that both shuffles use the same mask. The masks are known to be of
6416 // the same length because the result vector type is the same.
6417 // Check also that shuffles have only one use to avoid introducing extra
6418 // instructions.
6419 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
6420 !SVN0->getMask().equals(SVN1->getMask()))
6421 return SDValue();
6422
6423 // Don't try to fold this node if it requires introducing a
6424 // build vector of all zeros that might be illegal at this stage.
6425 SDValue ShOp = N0.getOperand(1);
6426 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6427 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6428
6429 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
6430 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
6431 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
6432 N0.getOperand(0), N1.getOperand(0));
6433 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
6434 }
6435
6436 // Don't try to fold this node if it requires introducing a
6437 // build vector of all zeros that might be illegal at this stage.
6438 ShOp = N0.getOperand(0);
6439 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
6440 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6441
6442 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
6443 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
6444 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
6445 N1.getOperand(1));
6446 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
6447 }
6448 }
6449
6450 return SDValue();
6451}
6452
6453/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
6454SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
6455 const SDLoc &DL) {
6456 SDValue LL, LR, RL, RR, N0CC, N1CC;
6457 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
6458 !isSetCCEquivalent(N1, RL, RR, N1CC))
6459 return SDValue();
6460
6461 assert(N0.getValueType() == N1.getValueType() &&
6462 "Unexpected operand types for bitwise logic op");
6463 assert(LL.getValueType() == LR.getValueType() &&
6464 RL.getValueType() == RR.getValueType() &&
6465 "Unexpected operand types for setcc");
6466
6467 // If we're here post-legalization or the logic op type is not i1, the logic
6468 // op type must match a setcc result type. Also, all folds require new
6469 // operations on the left and right operands, so those types must match.
6470 EVT VT = N0.getValueType();
6471 EVT OpVT = LL.getValueType();
6472 if (LegalOperations || VT.getScalarType() != MVT::i1)
6473 if (VT != getSetCCResultType(OpVT))
6474 return SDValue();
6475 if (OpVT != RL.getValueType())
6476 return SDValue();
6477
6478 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
6479 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
6480 bool IsInteger = OpVT.isInteger();
6481 if (LR == RR && CC0 == CC1 && IsInteger) {
6482 bool IsZero = isNullOrNullSplat(LR);
6483 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
6484
6485 // All bits clear?
6486 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
6487 // All sign bits clear?
6488 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
6489 // Any bits set?
6490 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
6491 // Any sign bits set?
6492 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6493
6494 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6495 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6496 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6497 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6498 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6499 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6500 AddToWorklist(Or.getNode());
6501 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6502 }
6503
6504 // All bits set?
6505 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6506 // All sign bits set?
6507 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6508 // Any bits clear?
6509 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6510 // Any sign bits clear?
6511 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6512
6513 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6514 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6515 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6516 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6517 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6518 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6519 AddToWorklist(And.getNode());
6520 return DAG.getSetCC(DL, VT, And, LR, CC1);
6521 }
6522 }
6523
6524 // TODO: What is the 'or' equivalent of this fold?
6525 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6526 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6527 IsInteger && CC0 == ISD::SETNE &&
6528 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6529 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6530 SDValue One = DAG.getConstant(1, DL, OpVT);
6531 SDValue Two = DAG.getConstant(2, DL, OpVT);
6532 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6533 AddToWorklist(Add.getNode());
6534 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6535 }
6536
6537 // Try more general transforms if the predicates match and the only user of
6538 // the compares is the 'and' or 'or'.
6539 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6540 N0.hasOneUse() && N1.hasOneUse()) {
6541 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6542 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6543 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6544 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6545 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6546 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6547 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6548 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6549 }
6550
6551 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6552 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6553 // Match a shared variable operand and 2 non-opaque constant operands.
6554 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6555 // The difference of the constants must be a single bit.
6556 const APInt &CMax =
6557 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6558 const APInt &CMin =
6559 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6560 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6561 };
6562 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6563 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6564 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6565 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6566 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6567 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6568 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6569 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6570 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6571 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6572 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6573 }
6574 }
6575 }
6576
6577 // Canonicalize equivalent operands to LL == RL.
6578 if (LL == RR && LR == RL) {
6580 std::swap(RL, RR);
6581 }
6582
6583 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6584 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6585 if (LL == RL && LR == RR) {
6586 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6587 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6588 if (NewCC != ISD::SETCC_INVALID &&
6589 (!LegalOperations ||
6590 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6591 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6592 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6593 }
6594
6595 return SDValue();
6596}
6597
6598static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6599 SelectionDAG &DAG) {
6600 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6601}
6602
6603static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6604 SelectionDAG &DAG) {
6605 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6606}
6607
6608/// Returns an appropriate FP min/max opcode for clamping operations.
6609static unsigned getMinMaxOpcodeForClamp(bool IsMin, SDValue Operand1,
6610 SDValue Operand2, SelectionDAG &DAG,
6611 const TargetLowering &TLI) {
6612 EVT VT = Operand1.getValueType();
6613 unsigned IEEEOp = IsMin ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
6614 if (TLI.isOperationLegalOrCustom(IEEEOp, VT) &&
6615 arebothOperandsNotNan(Operand1, Operand2, DAG))
6616 return IEEEOp;
6617 unsigned PreferredOp = IsMin ? ISD::FMINNUM : ISD::FMAXNUM;
6618 if (TLI.isOperationLegalOrCustom(PreferredOp, VT))
6619 return PreferredOp;
6620 return ISD::DELETED_NODE;
6621}
6622
6623// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
6625 SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode,
6626 SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM) {
6627 // The optimization cannot be applied for all the predicates because
6628 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6629 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6630 // applied at all if one of the operands is a signaling NaN.
6631
6632 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6633 // are non NaN values.
6634 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6635 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) {
6636 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6637 isFMAXNUMFMINNUM_IEEE
6640 }
6641
6642 if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) ||
6643 ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) {
6644 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6645 isFMAXNUMFMINNUM_IEEE
6648 }
6649
6650 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6651 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6652 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6653 // that there are not any sNaNs, then the optimization is not valid
6654 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6655 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6656 // we can prove that we do not have any sNaNs, then we can do the
6657 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6658 // cases.
6659 if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) ||
6660 ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
6661 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6662 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6663 isFMAXNUMFMINNUM_IEEE
6666 }
6667
6668 if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) ||
6669 ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) {
6670 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6671 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6672 isFMAXNUMFMINNUM_IEEE
6675 }
6676
6677 return ISD::DELETED_NODE;
6678}
6679
6682 assert(
6683 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6684 "Invalid Op to combine SETCC with");
6685
6686 // TODO: Search past casts/truncates.
6687 SDValue LHS = LogicOp->getOperand(0);
6688 SDValue RHS = LogicOp->getOperand(1);
6689 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6690 !LHS->hasOneUse() || !RHS->hasOneUse())
6691 return SDValue();
6692
6693 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6695 LogicOp, LHS.getNode(), RHS.getNode());
6696
6697 SDValue LHS0 = LHS->getOperand(0);
6698 SDValue RHS0 = RHS->getOperand(0);
6699 SDValue LHS1 = LHS->getOperand(1);
6700 SDValue RHS1 = RHS->getOperand(1);
6701 // TODO: We don't actually need a splat here, for vectors we just need the
6702 // invariants to hold for each element.
6703 auto *LHS1C = isConstOrConstSplat(LHS1);
6704 auto *RHS1C = isConstOrConstSplat(RHS1);
6705 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6706 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6707 EVT VT = LogicOp->getValueType(0);
6708 EVT OpVT = LHS0.getValueType();
6709 SDLoc DL(LogicOp);
6710
6711 // Check if the operands of an and/or operation are comparisons and if they
6712 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6713 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6714 // sequence will be replaced with min-cmp sequence:
6715 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6716 // and and-cmp-cmp will be replaced with max-cmp sequence:
6717 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6718 // The optimization does not work for `==` or `!=` .
6719 // The two comparisons should have either the same predicate or the
6720 // predicate of one of the comparisons is the opposite of the other one.
6721 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6723 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6725 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6726 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6727 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6728 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6729 (OpVT.isFloatingPoint() &&
6730 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6732 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6733 CCL != ISD::SETTRUE &&
6734 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6735
6736 SDValue CommonValue, Operand1, Operand2;
6738 if (CCL == CCR) {
6739 if (LHS0 == RHS0) {
6740 CommonValue = LHS0;
6741 Operand1 = LHS1;
6742 Operand2 = RHS1;
6744 } else if (LHS1 == RHS1) {
6745 CommonValue = LHS1;
6746 Operand1 = LHS0;
6747 Operand2 = RHS0;
6748 CC = CCL;
6749 }
6750 } else {
6751 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6752 if (LHS0 == RHS1) {
6753 CommonValue = LHS0;
6754 Operand1 = LHS1;
6755 Operand2 = RHS0;
6756 CC = CCR;
6757 } else if (RHS0 == LHS1) {
6758 CommonValue = LHS1;
6759 Operand1 = LHS0;
6760 Operand2 = RHS1;
6761 CC = CCL;
6762 }
6763 }
6764
6765 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6766 // handle it using OR/AND.
6767 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6768 CC = ISD::SETCC_INVALID;
6769 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6770 CC = ISD::SETCC_INVALID;
6771
6772 if (CC != ISD::SETCC_INVALID) {
6773 unsigned NewOpcode = ISD::DELETED_NODE;
6774 bool IsSigned = isSignedIntSetCC(CC);
6775 if (OpVT.isInteger()) {
6776 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6777 CC == ISD::SETLT || CC == ISD::SETULT);
6778 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6779 if (IsLess == IsOr)
6780 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6781 else
6782 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6783 } else if (OpVT.isFloatingPoint())
6785 Operand1, Operand2, CC, LogicOp->getOpcode(), DAG,
6786 isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6787
6788 if (NewOpcode != ISD::DELETED_NODE) {
6789 SDValue MinMaxValue =
6790 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6791 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6792 }
6793 }
6794 }
6795
6796 if (LHS0 == LHS1 && RHS0 == RHS1 && CCL == CCR &&
6797 LHS0.getValueType() == RHS0.getValueType() &&
6798 ((LogicOp->getOpcode() == ISD::AND && CCL == ISD::SETO) ||
6799 (LogicOp->getOpcode() == ISD::OR && CCL == ISD::SETUO)))
6800 return DAG.getSetCC(DL, VT, LHS0, RHS0, CCL);
6801
6802 if (TargetPreference == AndOrSETCCFoldKind::None)
6803 return SDValue();
6804
6805 if (CCL == CCR &&
6806 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6807 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6808 const APInt &APLhs = LHS1C->getAPIntValue();
6809 const APInt &APRhs = RHS1C->getAPIntValue();
6810
6811 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6812 // case this is just a compare).
6813 if (APLhs == (-APRhs) &&
6814 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6815 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6816 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6817 // (icmp eq A, C) | (icmp eq A, -C)
6818 // -> (icmp eq Abs(A), C)
6819 // (icmp ne A, C) & (icmp ne A, -C)
6820 // -> (icmp ne Abs(A), C)
6821 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6822 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6823 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6824 } else if (TargetPreference &
6826
6827 // AndOrSETCCFoldKind::AddAnd:
6828 // A == C0 | A == C1
6829 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6830 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6831 // A != C0 & A != C1
6832 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6833 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6834
6835 // AndOrSETCCFoldKind::NotAnd:
6836 // A == C0 | A == C1
6837 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6838 // -> ~A & smin(C0, C1) == 0
6839 // A != C0 & A != C1
6840 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6841 // -> ~A & smin(C0, C1) != 0
6842
6843 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6844 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6845 APInt Dif = MaxC - MinC;
6846 if (!Dif.isZero() && Dif.isPowerOf2()) {
6847 if (MaxC.isAllOnes() &&
6848 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6849 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6850 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6851 DAG.getConstant(MinC, DL, OpVT));
6852 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6853 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6854 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6855
6856 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6857 DAG.getConstant(-MinC, DL, OpVT));
6858 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6859 DAG.getConstant(~Dif, DL, OpVT));
6860 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6861 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6862 }
6863 }
6864 }
6865 }
6866
6867 return SDValue();
6868}
6869
6870// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6871// We canonicalize to the `select` form in the middle end, but the `and` form
6872// gets better codegen and all tested targets (arm, x86, riscv)
6874 const SDLoc &DL, SelectionDAG &DAG) {
6875 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6876 if (!isNullConstant(F))
6877 return SDValue();
6878
6879 EVT CondVT = Cond.getValueType();
6880 if (TLI.getBooleanContents(CondVT) !=
6882 return SDValue();
6883
6884 if (T.getOpcode() != ISD::AND)
6885 return SDValue();
6886
6887 if (!isOneConstant(T.getOperand(1)))
6888 return SDValue();
6889
6890 EVT OpVT = T.getValueType();
6891
6892 SDValue CondMask =
6893 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6894 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6895}
6896
6897/// This contains all DAGCombine rules which reduce two values combined by
6898/// an And operation to a single value. This makes them reusable in the context
6899/// of visitSELECT(). Rules involving constants are not included as
6900/// visitSELECT() already handles those cases.
6901SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6902 EVT VT = N1.getValueType();
6903 SDLoc DL(N);
6904
6905 // fold (and x, undef) -> 0
6906 if (N0.isUndef() || N1.isUndef())
6907 return DAG.getConstant(0, DL, VT);
6908
6909 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6910 return V;
6911
6912 // Canonicalize:
6913 // and(x, add) -> and(add, x)
6914 if (N1.getOpcode() == ISD::ADD)
6915 std::swap(N0, N1);
6916
6917 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6918 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6919 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6920 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6921 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6922 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6923 // immediate for an add, but it is legal if its top c2 bits are set,
6924 // transform the ADD so the immediate doesn't need to be materialized
6925 // in a register.
6926 APInt ADDC = ADDI->getAPIntValue();
6927 APInt SRLC = SRLI->getAPIntValue();
6928 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6929 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6931 SRLC.getZExtValue());
6932 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6933 ADDC |= Mask;
6934 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6935 SDLoc DL0(N0);
6936 SDValue NewAdd =
6937 DAG.getNode(ISD::ADD, DL0, VT,
6938 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6939 CombineTo(N0.getNode(), NewAdd);
6940 // Return N so it doesn't get rechecked!
6941 return SDValue(N, 0);
6942 }
6943 }
6944 }
6945 }
6946 }
6947 }
6948
6949 return SDValue();
6950}
6951
6952bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6953 EVT LoadResultTy, EVT &ExtVT) {
6954 if (!AndC->getAPIntValue().isMask())
6955 return false;
6956
6957 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6958
6959 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6960 EVT LoadedVT = LoadN->getMemoryVT();
6961
6962 if (ExtVT == LoadedVT &&
6963 (!LegalOperations ||
6964 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6965 // ZEXTLOAD will match without needing to change the size of the value being
6966 // loaded.
6967 return true;
6968 }
6969
6970 // Do not change the width of a volatile or atomic loads.
6971 if (!LoadN->isSimple())
6972 return false;
6973
6974 // Do not generate loads of non-round integer types since these can
6975 // be expensive (and would be wrong if the type is not byte sized).
6976 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6977 return false;
6978
6979 if (LegalOperations &&
6980 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6981 return false;
6982
6983 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT, /*ByteOffset=*/0))
6984 return false;
6985
6986 return true;
6987}
6988
6989bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6990 ISD::LoadExtType ExtType, EVT &MemVT,
6991 unsigned ShAmt) {
6992 if (!LDST)
6993 return false;
6994
6995 // Only allow byte offsets.
6996 if (ShAmt % 8)
6997 return false;
6998 const unsigned ByteShAmt = ShAmt / 8;
6999
7000 // Do not generate loads of non-round integer types since these can
7001 // be expensive (and would be wrong if the type is not byte sized).
7002 if (!MemVT.isRound())
7003 return false;
7004
7005 // Don't change the width of a volatile or atomic loads.
7006 if (!LDST->isSimple())
7007 return false;
7008
7009 EVT LdStMemVT = LDST->getMemoryVT();
7010
7011 // Bail out when changing the scalable property, since we can't be sure that
7012 // we're actually narrowing here.
7013 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
7014 return false;
7015
7016 // Verify that we are actually reducing a load width here.
7017 if (LdStMemVT.bitsLT(MemVT))
7018 return false;
7019
7020 // Ensure that this isn't going to produce an unsupported memory access.
7021 if (ShAmt) {
7022 const Align LDSTAlign = LDST->getAlign();
7023 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
7024 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7025 LDST->getAddressSpace(), NarrowAlign,
7026 LDST->getMemOperand()->getFlags()))
7027 return false;
7028 }
7029
7030 // It's not possible to generate a constant of extended or untyped type.
7031 EVT PtrType = LDST->getBasePtr().getValueType();
7032 if (PtrType == MVT::Untyped || PtrType.isExtended())
7033 return false;
7034
7035 if (isa<LoadSDNode>(LDST)) {
7036 LoadSDNode *Load = cast<LoadSDNode>(LDST);
7037 // Don't transform one with multiple uses, this would require adding a new
7038 // load.
7039 if (!SDValue(Load, 0).hasOneUse())
7040 return false;
7041
7042 if (LegalOperations &&
7043 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
7044 return false;
7045
7046 // For the transform to be legal, the load must produce only two values
7047 // (the value loaded and the chain). Don't transform a pre-increment
7048 // load, for example, which produces an extra value. Otherwise the
7049 // transformation is not equivalent, and the downstream logic to replace
7050 // uses gets things wrong.
7051 if (Load->getNumValues() > 2)
7052 return false;
7053
7054 // If the load that we're shrinking is an extload and we're not just
7055 // discarding the extension we can't simply shrink the load. Bail.
7056 // TODO: It would be possible to merge the extensions in some cases.
7057 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
7058 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7059 return false;
7060
7061 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT, ByteShAmt))
7062 return false;
7063 } else {
7064 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
7065 StoreSDNode *Store = cast<StoreSDNode>(LDST);
7066 // Can't write outside the original store
7067 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
7068 return false;
7069
7070 if (LegalOperations &&
7071 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
7072 return false;
7073 }
7074 return true;
7075}
7076
7077bool DAGCombiner::SearchForAndLoads(SDNode *N,
7078 SmallVectorImpl<LoadSDNode*> &Loads,
7079 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
7080 ConstantSDNode *Mask,
7081 SDNode *&NodeToMask) {
7082 // Recursively search for the operands, looking for loads which can be
7083 // narrowed.
7084 for (SDValue Op : N->op_values()) {
7085 if (Op.getValueType().isVector())
7086 return false;
7087
7088 // Some constants may need fixing up later if they are too large.
7089 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
7090 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
7091 "Expected bitwise logic operation");
7092 if (!C->getAPIntValue().isSubsetOf(Mask->getAPIntValue()))
7093 NodesWithConsts.insert(N);
7094 continue;
7095 }
7096
7097 if (!Op.hasOneUse())
7098 return false;
7099
7100 switch(Op.getOpcode()) {
7101 case ISD::LOAD: {
7102 auto *Load = cast<LoadSDNode>(Op);
7103 EVT ExtVT;
7104 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
7105 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
7106
7107 // ZEXTLOAD is already small enough.
7108 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
7109 ExtVT.bitsGE(Load->getMemoryVT()))
7110 continue;
7111
7112 // Use LE to convert equal sized loads to zext.
7113 if (ExtVT.bitsLE(Load->getMemoryVT()))
7114 Loads.push_back(Load);
7115
7116 continue;
7117 }
7118 return false;
7119 }
7120 case ISD::ZERO_EXTEND:
7121 case ISD::AssertZext: {
7122 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
7123 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
7124 EVT VT = Op.getOpcode() == ISD::AssertZext ?
7125 cast<VTSDNode>(Op.getOperand(1))->getVT() :
7126 Op.getOperand(0).getValueType();
7127
7128 // We can accept extending nodes if the mask is wider or an equal
7129 // width to the original type.
7130 if (ExtVT.bitsGE(VT))
7131 continue;
7132 break;
7133 }
7134 case ISD::OR:
7135 case ISD::XOR:
7136 case ISD::AND:
7137 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
7138 NodeToMask))
7139 return false;
7140 continue;
7141 }
7142
7143 // Allow one node which will masked along with any loads found.
7144 if (NodeToMask)
7145 return false;
7146
7147 // Also ensure that the node to be masked only produces one data result.
7148 NodeToMask = Op.getNode();
7149 if (NodeToMask->getNumValues() > 1) {
7150 bool HasValue = false;
7151 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
7152 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
7153 if (VT != MVT::Glue && VT != MVT::Other) {
7154 if (HasValue) {
7155 NodeToMask = nullptr;
7156 return false;
7157 }
7158 HasValue = true;
7159 }
7160 }
7161 assert(HasValue && "Node to be masked has no data result?");
7162 }
7163 }
7164 return true;
7165}
7166
7167bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
7168 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
7169 if (!Mask)
7170 return false;
7171
7172 if (!Mask->getAPIntValue().isMask())
7173 return false;
7174
7175 // No need to do anything if the and directly uses a load.
7176 if (isa<LoadSDNode>(N->getOperand(0)))
7177 return false;
7178
7180 SmallPtrSet<SDNode*, 2> NodesWithConsts;
7181 SDNode *FixupNode = nullptr;
7182 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
7183 if (Loads.empty())
7184 return false;
7185
7186 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
7187 SDValue MaskOp = N->getOperand(1);
7188
7189 // If it exists, fixup the single node we allow in the tree that needs
7190 // masking.
7191 if (FixupNode) {
7192 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
7193 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
7194 FixupNode->getValueType(0),
7195 SDValue(FixupNode, 0), MaskOp);
7196 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
7197 if (And.getOpcode() == ISD ::AND)
7198 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
7199 }
7200
7201 // Narrow any constants that need it.
7202 for (auto *LogicN : NodesWithConsts) {
7203 SDValue Op0 = LogicN->getOperand(0);
7204 SDValue Op1 = LogicN->getOperand(1);
7205
7206 // We only need to fix AND if both inputs are constants. And we only need
7207 // to fix one of the constants.
7208 if (LogicN->getOpcode() == ISD::AND &&
7210 continue;
7211
7212 if (isa<ConstantSDNode>(Op0) && LogicN->getOpcode() != ISD::AND)
7213 Op0 =
7214 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
7215
7216 if (isa<ConstantSDNode>(Op1))
7217 Op1 =
7218 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
7219
7220 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
7221 std::swap(Op0, Op1);
7222
7223 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
7224 }
7225
7226 // Create narrow loads.
7227 for (auto *Load : Loads) {
7228 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
7229 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
7230 SDValue(Load, 0), MaskOp);
7231 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
7232 if (And.getOpcode() == ISD ::AND)
7233 And = SDValue(
7234 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
7235 SDValue NewLoad = reduceLoadWidth(And.getNode());
7236 assert(NewLoad &&
7237 "Shouldn't be masking the load if it can't be narrowed");
7238 CombineTo(Load, NewLoad, NewLoad.getValue(1));
7239 }
7240 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
7241 return true;
7242 }
7243 return false;
7244}
7245
7246// Unfold
7247// x & (-1 'logical shift' y)
7248// To
7249// (x 'opposite logical shift' y) 'logical shift' y
7250// if it is better for performance.
7251SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
7252 assert(N->getOpcode() == ISD::AND);
7253
7254 SDValue N0 = N->getOperand(0);
7255 SDValue N1 = N->getOperand(1);
7256
7257 // Do we actually prefer shifts over mask?
7259 return SDValue();
7260
7261 // Try to match (-1 '[outer] logical shift' y)
7262 unsigned OuterShift;
7263 unsigned InnerShift; // The opposite direction to the OuterShift.
7264 SDValue Y; // Shift amount.
7265 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
7266 if (!M.hasOneUse())
7267 return false;
7268 OuterShift = M->getOpcode();
7269 if (OuterShift == ISD::SHL)
7270 InnerShift = ISD::SRL;
7271 else if (OuterShift == ISD::SRL)
7272 InnerShift = ISD::SHL;
7273 else
7274 return false;
7275 if (!isAllOnesConstant(M->getOperand(0)))
7276 return false;
7277 Y = M->getOperand(1);
7278 return true;
7279 };
7280
7281 SDValue X;
7282 if (matchMask(N1))
7283 X = N0;
7284 else if (matchMask(N0))
7285 X = N1;
7286 else
7287 return SDValue();
7288
7289 SDLoc DL(N);
7290 EVT VT = N->getValueType(0);
7291
7292 // tmp = x 'opposite logical shift' y
7293 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
7294 // ret = tmp 'logical shift' y
7295 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
7296
7297 return T1;
7298}
7299
7300/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
7301/// For a target with a bit test, this is expected to become test + set and save
7302/// at least 1 instruction.
7304 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
7305
7306 // Look through an optional extension.
7307 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
7308 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
7309 And0 = And0.getOperand(0);
7310 if (!isOneConstant(And1) || !And0.hasOneUse())
7311 return SDValue();
7312
7313 SDValue Src = And0;
7314
7315 // Attempt to find a 'not' op.
7316 // TODO: Should we favor test+set even without the 'not' op?
7317 bool FoundNot = false;
7318 if (isBitwiseNot(Src)) {
7319 FoundNot = true;
7320 Src = Src.getOperand(0);
7321
7322 // Look though an optional truncation. The source operand may not be the
7323 // same type as the original 'and', but that is ok because we are masking
7324 // off everything but the low bit.
7325 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
7326 Src = Src.getOperand(0);
7327 }
7328
7329 // Match a shift-right by constant.
7330 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
7331 return SDValue();
7332
7333 // This is probably not worthwhile without a supported type.
7334 EVT SrcVT = Src.getValueType();
7335 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7336 if (!TLI.isTypeLegal(SrcVT))
7337 return SDValue();
7338
7339 // We might have looked through casts that make this transform invalid.
7340 unsigned BitWidth = SrcVT.getScalarSizeInBits();
7341 SDValue ShiftAmt = Src.getOperand(1);
7342 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
7343 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
7344 return SDValue();
7345
7346 // Set source to shift source.
7347 Src = Src.getOperand(0);
7348
7349 // Try again to find a 'not' op.
7350 // TODO: Should we favor test+set even with two 'not' ops?
7351 if (!FoundNot) {
7352 if (!isBitwiseNot(Src))
7353 return SDValue();
7354 Src = Src.getOperand(0);
7355 }
7356
7357 if (!TLI.hasBitTest(Src, ShiftAmt))
7358 return SDValue();
7359
7360 // Turn this into a bit-test pattern using mask op + setcc:
7361 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
7362 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
7363 SDLoc DL(And);
7364 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
7365 EVT CCVT =
7366 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7367 SDValue Mask = DAG.getConstant(
7368 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
7369 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
7370 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
7371 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
7372 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
7373}
7374
7375/// For targets that support usubsat, match a bit-hack form of that operation
7376/// that ends in 'and' and convert it.
7378 EVT VT = N->getValueType(0);
7379 unsigned BitWidth = VT.getScalarSizeInBits();
7380 APInt SignMask = APInt::getSignMask(BitWidth);
7381
7382 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
7383 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
7384 // xor/add with SMIN (signmask) are logically equivalent.
7385 SDValue X;
7386 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
7388 m_SpecificInt(BitWidth - 1))))) &&
7391 m_SpecificInt(BitWidth - 1))))))
7392 return SDValue();
7393
7394 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
7395 DAG.getConstant(SignMask, DL, VT));
7396}
7397
7398/// Given a bitwise logic operation N with a matching bitwise logic operand,
7399/// fold a pattern where 2 of the source operands are identically shifted
7400/// values. For example:
7401/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
7403 SelectionDAG &DAG) {
7404 unsigned LogicOpcode = N->getOpcode();
7405 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7406 "Expected bitwise logic operation");
7407
7408 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
7409 return SDValue();
7410
7411 // Match another bitwise logic op and a shift.
7412 unsigned ShiftOpcode = ShiftOp.getOpcode();
7413 if (LogicOp.getOpcode() != LogicOpcode ||
7414 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
7415 ShiftOpcode == ISD::SRA))
7416 return SDValue();
7417
7418 // Match another shift op inside the first logic operand. Handle both commuted
7419 // possibilities.
7420 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7421 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
7422 SDValue X1 = ShiftOp.getOperand(0);
7423 SDValue Y = ShiftOp.getOperand(1);
7424 SDValue X0, Z;
7425 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
7426 LogicOp.getOperand(0).getOperand(1) == Y) {
7427 X0 = LogicOp.getOperand(0).getOperand(0);
7428 Z = LogicOp.getOperand(1);
7429 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
7430 LogicOp.getOperand(1).getOperand(1) == Y) {
7431 X0 = LogicOp.getOperand(1).getOperand(0);
7432 Z = LogicOp.getOperand(0);
7433 } else {
7434 return SDValue();
7435 }
7436
7437 EVT VT = N->getValueType(0);
7438 SDLoc DL(N);
7439 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
7440 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
7441 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
7442}
7443
7444/// Given a tree of logic operations with shape like
7445/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
7446/// try to match and fold shift operations with the same shift amount.
7447/// For example:
7448/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
7449/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
7451 SDValue RightHand, SelectionDAG &DAG) {
7452 unsigned LogicOpcode = N->getOpcode();
7453 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
7454 "Expected bitwise logic operation");
7455 if (LeftHand.getOpcode() != LogicOpcode ||
7456 RightHand.getOpcode() != LogicOpcode)
7457 return SDValue();
7458 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
7459 return SDValue();
7460
7461 // Try to match one of following patterns:
7462 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
7463 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
7464 // Note that foldLogicOfShifts will handle commuted versions of the left hand
7465 // itself.
7466 SDValue CombinedShifts, W;
7467 SDValue R0 = RightHand.getOperand(0);
7468 SDValue R1 = RightHand.getOperand(1);
7469 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
7470 W = R1;
7471 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
7472 W = R0;
7473 else
7474 return SDValue();
7475
7476 EVT VT = N->getValueType(0);
7477 SDLoc DL(N);
7478 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
7479}
7480
7481/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan
7482/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)`
7483/// pattern. This is typically a better representation for targets without a
7484/// fused "and-not" operation.
7486 const TargetLowering &TLI, const SDLoc &DL) {
7487 // Note that masked-merge variants using XOR or ADD expressions are
7488 // normalized to OR by InstCombine so we only check for OR or AND.
7489 assert((Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::AND) &&
7490 "Must be called with ISD::OR or ISD::AND node");
7491
7492 // If the target supports and-not, don't fold this.
7493 if (TLI.hasAndNot(SDValue(Node, 0)))
7494 return SDValue();
7495
7496 SDValue M, X, Y;
7497
7498 if (sd_match(Node,
7500 m_OneUse(m_And(m_Deferred(M), m_Value(X))))) ||
7501 sd_match(Node,
7503 m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) {
7504 EVT VT = M.getValueType();
7505 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y);
7506 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M);
7507 return DAG.getNode(ISD::XOR, DL, VT, And, Y);
7508 }
7509 return SDValue();
7510}
7511
7512SDValue DAGCombiner::visitAND(SDNode *N) {
7513 SDValue N0 = N->getOperand(0);
7514 SDValue N1 = N->getOperand(1);
7515 EVT VT = N1.getValueType();
7516 SDLoc DL(N);
7517
7518 // x & x --> x
7519 if (N0 == N1)
7520 return N0;
7521
7522 // fold (and c1, c2) -> c1&c2
7523 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
7524 return C;
7525
7526 // canonicalize constant to RHS
7529 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
7530
7531 if (areBitwiseNotOfEachother(N0, N1))
7532 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
7533
7534 // fold vector ops
7535 if (VT.isVector()) {
7536 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7537 return FoldedVOp;
7538
7539 // fold (and x, 0) -> 0, vector edition
7541 // do not return N1, because undef node may exist in N1
7543 N1.getValueType());
7544
7545 // fold (and x, -1) -> x, vector edition
7547 return N0;
7548
7549 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
7550 bool Frozen = N0.getOpcode() == ISD::FREEZE;
7551 auto *MLoad = dyn_cast<MaskedLoadSDNode>(Frozen ? N0.getOperand(0) : N0);
7552 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7553 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat) {
7554 EVT MemVT = MLoad->getMemoryVT();
7555 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
7556 // For this AND to be a zero extension of the masked load the elements
7557 // of the BuildVec must mask the bottom bits of the extended element
7558 // type
7559 if (Splat->getAPIntValue().isMask(MemVT.getScalarSizeInBits())) {
7560 SDValue NewLoad = DAG.getMaskedLoad(
7561 VT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7562 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), MemVT,
7563 MLoad->getMemOperand(), MLoad->getAddressingMode(), ISD::ZEXTLOAD,
7564 MLoad->isExpandingLoad());
7565 CombineTo(N, Frozen ? N0 : NewLoad);
7566 CombineTo(MLoad, NewLoad, NewLoad.getValue(1));
7567 return SDValue(N, 0);
7568 }
7569 }
7570 }
7571 }
7572
7573 // fold (and x, -1) -> x
7574 if (isAllOnesConstant(N1))
7575 return N0;
7576
7577 // if (and x, c) is known to be zero, return 0
7578 unsigned BitWidth = VT.getScalarSizeInBits();
7579 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7581 return DAG.getConstant(0, DL, VT);
7582
7583 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7584 return R;
7585
7586 if (SDValue NewSel = foldBinOpIntoSelect(N))
7587 return NewSel;
7588
7589 // reassociate and
7590 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7591 return RAND;
7592
7593 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7594 if (SDValue SD =
7595 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7596 return SD;
7597
7598 // fold (and (or x, C), D) -> D if (C & D) == D
7599 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7600 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7601 };
7602 if (N0.getOpcode() == ISD::OR &&
7603 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7604 return N1;
7605
7606 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7607 SDValue N0Op0 = N0.getOperand(0);
7608 EVT SrcVT = N0Op0.getValueType();
7609 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7610 APInt Mask = ~N1C->getAPIntValue();
7611 Mask = Mask.trunc(SrcBitWidth);
7612
7613 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7614 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7615 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7616
7617 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7618 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7619 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7620 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7621 TLI.isNarrowingProfitable(N, VT, SrcVT))
7622 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7623 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7624 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7625 }
7626
7627 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7628 if (ISD::isExtOpcode(N0.getOpcode())) {
7629 unsigned ExtOpc = N0.getOpcode();
7630 SDValue N0Op0 = N0.getOperand(0);
7631 if (N0Op0.getOpcode() == ISD::AND &&
7632 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7633 N0->hasOneUse() && N0Op0->hasOneUse()) {
7634 if (SDValue NewExt = DAG.FoldConstantArithmetic(ExtOpc, DL, VT,
7635 {N0Op0.getOperand(1)})) {
7636 if (SDValue NewMask =
7637 DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N1, NewExt})) {
7638 return DAG.getNode(ISD::AND, DL, VT,
7639 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7640 NewMask);
7641 }
7642 }
7643 }
7644 }
7645
7646 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7647 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7648 // already be zero by virtue of the width of the base type of the load.
7649 //
7650 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7651 // more cases.
7652 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7654 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7655 N0.getOperand(0).getResNo() == 0) ||
7656 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7657 auto *Load =
7658 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7659
7660 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7661 // This can be a pure constant or a vector splat, in which case we treat the
7662 // vector as a scalar and use the splat value.
7663 APInt Constant = APInt::getZero(1);
7664 if (const ConstantSDNode *C = isConstOrConstSplat(
7665 N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
7666 Constant = C->getAPIntValue();
7667 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7668 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7669 APInt SplatValue, SplatUndef;
7670 unsigned SplatBitSize;
7671 bool HasAnyUndefs;
7672 // Endianness should not matter here. Code below makes sure that we only
7673 // use the result if the SplatBitSize is a multiple of the vector element
7674 // size. And after that we AND all element sized parts of the splat
7675 // together. So the end result should be the same regardless of in which
7676 // order we do those operations.
7677 const bool IsBigEndian = false;
7678 bool IsSplat =
7679 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7680 HasAnyUndefs, EltBitWidth, IsBigEndian);
7681
7682 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7683 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7684 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7685 // Undef bits can contribute to a possible optimisation if set, so
7686 // set them.
7687 SplatValue |= SplatUndef;
7688
7689 // The splat value may be something like "0x00FFFFFF", which means 0 for
7690 // the first vector value and FF for the rest, repeating. We need a mask
7691 // that will apply equally to all members of the vector, so AND all the
7692 // lanes of the constant together.
7693 Constant = APInt::getAllOnes(EltBitWidth);
7694 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7695 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7696 }
7697 }
7698
7699 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7700 // actually legal and isn't going to get expanded, else this is a false
7701 // optimisation.
7702 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7703 Load->getValueType(0),
7704 Load->getMemoryVT());
7705
7706 // Resize the constant to the same size as the original memory access before
7707 // extension. If it is still the AllOnesValue then this AND is completely
7708 // unneeded.
7709 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7710
7711 bool B;
7712 switch (Load->getExtensionType()) {
7713 default: B = false; break;
7714 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7715 case ISD::ZEXTLOAD:
7716 case ISD::NON_EXTLOAD: B = true; break;
7717 }
7718
7719 if (B && Constant.isAllOnes()) {
7720 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7721 // preserve semantics once we get rid of the AND.
7722 SDValue NewLoad(Load, 0);
7723
7724 // Fold the AND away. NewLoad may get replaced immediately.
7725 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7726
7727 if (Load->getExtensionType() == ISD::EXTLOAD) {
7728 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7729 Load->getValueType(0), SDLoc(Load),
7730 Load->getChain(), Load->getBasePtr(),
7731 Load->getOffset(), Load->getMemoryVT(),
7732 Load->getMemOperand());
7733 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7734 if (Load->getNumValues() == 3) {
7735 // PRE/POST_INC loads have 3 values.
7736 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7737 NewLoad.getValue(2) };
7738 CombineTo(Load, To, 3, true);
7739 } else {
7740 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7741 }
7742 }
7743
7744 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7745 }
7746 }
7747
7748 // Try to convert a constant mask AND into a shuffle clear mask.
7749 if (VT.isVector())
7750 if (SDValue Shuffle = XformToShuffleWithZero(N))
7751 return Shuffle;
7752
7753 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7754 return Combined;
7755
7756 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7758 SDValue Ext = N0.getOperand(0);
7759 EVT ExtVT = Ext->getValueType(0);
7760 SDValue Extendee = Ext->getOperand(0);
7761
7762 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7763 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7764 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7765 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7766 // => (extract_subvector (iN_zeroext v))
7767 SDValue ZeroExtExtendee =
7768 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7769
7770 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7771 N0.getOperand(1));
7772 }
7773 }
7774
7775 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7776 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7777 EVT MemVT = GN0->getMemoryVT();
7778 EVT ScalarVT = MemVT.getScalarType();
7779
7780 if (SDValue(GN0, 0).hasOneUse() &&
7781 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7783 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7784 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7785
7786 SDValue ZExtLoad = DAG.getMaskedGather(
7787 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7788 GN0->getIndexType(), ISD::ZEXTLOAD);
7789
7790 CombineTo(N, ZExtLoad);
7791 AddToWorklist(ZExtLoad.getNode());
7792 // Avoid recheck of N.
7793 return SDValue(N, 0);
7794 }
7795 }
7796
7797 // fold (and (load x), 255) -> (zextload x, i8)
7798 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7799 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7800 if (SDValue Res = reduceLoadWidth(N))
7801 return Res;
7802
7803 if (LegalTypes) {
7804 // Attempt to propagate the AND back up to the leaves which, if they're
7805 // loads, can be combined to narrow loads and the AND node can be removed.
7806 // Perform after legalization so that extend nodes will already be
7807 // combined into the loads.
7808 if (BackwardsPropagateMask(N))
7809 return SDValue(N, 0);
7810 }
7811
7812 if (SDValue Combined = visitANDLike(N0, N1, N))
7813 return Combined;
7814
7815 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7816 if (N0.getOpcode() == N1.getOpcode())
7817 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7818 return V;
7819
7820 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7821 return R;
7822 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7823 return R;
7824
7825 // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
7826 // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
7827 SDValue X, Y, Z, NotY;
7828 for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
7829 if (sd_match(N,
7830 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
7831 sd_match(NotY, m_Not(m_Value(Y))) &&
7832 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7833 return DAG.getNode(ISD::AND, DL, VT, X,
7834 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
7835
7836 // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
7837 for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
7838 if (sd_match(N, m_And(m_Value(X),
7839 m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
7840 sd_match(NotY, m_Not(m_Value(Y))) &&
7841 (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
7842 return DAG.getNode(ISD::AND, DL, VT, X,
7843 DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
7844
7845 // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
7846 // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
7847 if (TLI.hasAndNot(SDValue(N, 0)))
7848 if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
7849 return Folded;
7850
7851 // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
7852 // If we are shifting down an extended sign bit, see if we can simplify
7853 // this to shifting the MSB directly to expose further simplifications.
7854 // This pattern often appears after sext_inreg legalization.
7855 APInt Amt;
7856 if (sd_match(N, m_And(m_Srl(m_Value(X), m_ConstInt(Amt)), m_One())) &&
7857 Amt.ult(BitWidth - 1) && Amt.uge(BitWidth - DAG.ComputeNumSignBits(X)))
7858 return DAG.getNode(ISD::SRL, DL, VT, X,
7859 DAG.getShiftAmountConstant(BitWidth - 1, VT, DL));
7860
7861 // Masking the negated extension of a boolean is just the zero-extended
7862 // boolean:
7863 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7864 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7865 //
7866 // Note: the SimplifyDemandedBits fold below can make an information-losing
7867 // transform, and then we have no way to find this better fold.
7868 if (sd_match(N, m_And(m_Sub(m_Zero(), m_Value(X)), m_One()))) {
7869 if (X.getOpcode() == ISD::ZERO_EXTEND &&
7870 X.getOperand(0).getScalarValueSizeInBits() == 1)
7871 return X;
7872 if (X.getOpcode() == ISD::SIGN_EXTEND &&
7873 X.getOperand(0).getScalarValueSizeInBits() == 1)
7874 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, X.getOperand(0));
7875 }
7876
7877 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7878 // fold (and (sra)) -> (and (srl)) when possible.
7880 return SDValue(N, 0);
7881
7882 // fold (zext_inreg (extload x)) -> (zextload x)
7883 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7884 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7885 (ISD::isEXTLoad(N0.getNode()) ||
7886 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7887 auto *LN0 = cast<LoadSDNode>(N0);
7888 EVT MemVT = LN0->getMemoryVT();
7889 // If we zero all the possible extended bits, then we can turn this into
7890 // a zextload if we are running before legalize or the operation is legal.
7891 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7892 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7893 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7894 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7895 ((!LegalOperations && LN0->isSimple()) ||
7896 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7897 SDValue ExtLoad =
7898 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7899 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7900 AddToWorklist(N);
7901 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7902 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7903 }
7904 }
7905
7906 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7907 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7908 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7909 N0.getOperand(1), false))
7910 return BSwap;
7911 }
7912
7913 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7914 return Shifts;
7915
7916 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7917 return V;
7918
7919 // Recognize the following pattern:
7920 //
7921 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7922 //
7923 // where bitmask is a mask that clears the upper bits of AndVT. The
7924 // number of bits in bitmask must be a power of two.
7925 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7926 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7927 return false;
7928
7930 if (!C)
7931 return false;
7932
7933 if (!C->getAPIntValue().isMask(
7934 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7935 return false;
7936
7937 return true;
7938 };
7939
7940 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7941 if (IsAndZeroExtMask(N0, N1))
7942 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7943
7944 if (hasOperation(ISD::USUBSAT, VT))
7945 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7946 return V;
7947
7948 // Postpone until legalization completed to avoid interference with bswap
7949 // folding
7950 if (LegalOperations || VT.isVector())
7951 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7952 return R;
7953
7954 if (VT.isScalarInteger() && VT != MVT::i1)
7955 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
7956 return R;
7957
7958 return SDValue();
7959}
7960
7961/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7962SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7963 bool DemandHighBits) {
7964 if (!LegalOperations)
7965 return SDValue();
7966
7967 EVT VT = N->getValueType(0);
7968 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7969 return SDValue();
7971 return SDValue();
7972
7973 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7974 bool LookPassAnd0 = false;
7975 bool LookPassAnd1 = false;
7976 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7977 std::swap(N0, N1);
7978 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7979 std::swap(N0, N1);
7980 if (N0.getOpcode() == ISD::AND) {
7981 if (!N0->hasOneUse())
7982 return SDValue();
7983 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7984 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7985 // This is needed for X86.
7986 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7987 N01C->getZExtValue() != 0xFFFF))
7988 return SDValue();
7989 N0 = N0.getOperand(0);
7990 LookPassAnd0 = true;
7991 }
7992
7993 if (N1.getOpcode() == ISD::AND) {
7994 if (!N1->hasOneUse())
7995 return SDValue();
7996 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7997 if (!N11C || N11C->getZExtValue() != 0xFF)
7998 return SDValue();
7999 N1 = N1.getOperand(0);
8000 LookPassAnd1 = true;
8001 }
8002
8003 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
8004 std::swap(N0, N1);
8005 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
8006 return SDValue();
8007 if (!N0->hasOneUse() || !N1->hasOneUse())
8008 return SDValue();
8009
8010 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
8011 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
8012 if (!N01C || !N11C)
8013 return SDValue();
8014 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
8015 return SDValue();
8016
8017 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
8018 SDValue N00 = N0->getOperand(0);
8019 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
8020 if (!N00->hasOneUse())
8021 return SDValue();
8022 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
8023 if (!N001C || N001C->getZExtValue() != 0xFF)
8024 return SDValue();
8025 N00 = N00.getOperand(0);
8026 LookPassAnd0 = true;
8027 }
8028
8029 SDValue N10 = N1->getOperand(0);
8030 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
8031 if (!N10->hasOneUse())
8032 return SDValue();
8033 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
8034 // Also allow 0xFFFF since the bits will be shifted out. This is needed
8035 // for X86.
8036 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
8037 N101C->getZExtValue() != 0xFFFF))
8038 return SDValue();
8039 N10 = N10.getOperand(0);
8040 LookPassAnd1 = true;
8041 }
8042
8043 if (N00 != N10)
8044 return SDValue();
8045
8046 // Make sure everything beyond the low halfword gets set to zero since the SRL
8047 // 16 will clear the top bits.
8048 unsigned OpSizeInBits = VT.getSizeInBits();
8049 if (OpSizeInBits > 16) {
8050 // If the left-shift isn't masked out then the only way this is a bswap is
8051 // if all bits beyond the low 8 are 0. In that case the entire pattern
8052 // reduces to a left shift anyway: leave it for other parts of the combiner.
8053 if (DemandHighBits && !LookPassAnd0)
8054 return SDValue();
8055
8056 // However, if the right shift isn't masked out then it might be because
8057 // it's not needed. See if we can spot that too. If the high bits aren't
8058 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
8059 // upper bits to be zero.
8060 if (!LookPassAnd1) {
8061 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
8062 if (!DAG.MaskedValueIsZero(N10,
8063 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
8064 return SDValue();
8065 }
8066 }
8067
8068 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
8069 if (OpSizeInBits > 16) {
8070 SDLoc DL(N);
8071 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8072 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
8073 }
8074 return Res;
8075}
8076
8077/// Return true if the specified node is an element that makes up a 32-bit
8078/// packed halfword byteswap.
8079/// ((x & 0x000000ff) << 8) |
8080/// ((x & 0x0000ff00) >> 8) |
8081/// ((x & 0x00ff0000) << 8) |
8082/// ((x & 0xff000000) >> 8)
8084 if (!N->hasOneUse())
8085 return false;
8086
8087 unsigned Opc = N.getOpcode();
8088 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
8089 return false;
8090
8091 SDValue N0 = N.getOperand(0);
8092 unsigned Opc0 = N0.getOpcode();
8093 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
8094 return false;
8095
8096 ConstantSDNode *N1C = nullptr;
8097 // SHL or SRL: look upstream for AND mask operand
8098 if (Opc == ISD::AND)
8099 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8100 else if (Opc0 == ISD::AND)
8102 if (!N1C)
8103 return false;
8104
8105 unsigned MaskByteOffset;
8106 switch (N1C->getZExtValue()) {
8107 default:
8108 return false;
8109 case 0xFF: MaskByteOffset = 0; break;
8110 case 0xFF00: MaskByteOffset = 1; break;
8111 case 0xFFFF:
8112 // In case demanded bits didn't clear the bits that will be shifted out.
8113 // This is needed for X86.
8114 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
8115 MaskByteOffset = 1;
8116 break;
8117 }
8118 return false;
8119 case 0xFF0000: MaskByteOffset = 2; break;
8120 case 0xFF000000: MaskByteOffset = 3; break;
8121 }
8122
8123 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
8124 if (Opc == ISD::AND) {
8125 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
8126 // (x >> 8) & 0xff
8127 // (x >> 8) & 0xff0000
8128 if (Opc0 != ISD::SRL)
8129 return false;
8131 if (!C || C->getZExtValue() != 8)
8132 return false;
8133 } else {
8134 // (x << 8) & 0xff00
8135 // (x << 8) & 0xff000000
8136 if (Opc0 != ISD::SHL)
8137 return false;
8139 if (!C || C->getZExtValue() != 8)
8140 return false;
8141 }
8142 } else if (Opc == ISD::SHL) {
8143 // (x & 0xff) << 8
8144 // (x & 0xff0000) << 8
8145 if (MaskByteOffset != 0 && MaskByteOffset != 2)
8146 return false;
8147 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8148 if (!C || C->getZExtValue() != 8)
8149 return false;
8150 } else { // Opc == ISD::SRL
8151 // (x & 0xff00) >> 8
8152 // (x & 0xff000000) >> 8
8153 if (MaskByteOffset != 1 && MaskByteOffset != 3)
8154 return false;
8155 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
8156 if (!C || C->getZExtValue() != 8)
8157 return false;
8158 }
8159
8160 if (Parts[MaskByteOffset])
8161 return false;
8162
8163 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
8164 return true;
8165}
8166
8167// Match 2 elements of a packed halfword bswap.
8169 if (N.getOpcode() == ISD::OR)
8170 return isBSwapHWordElement(N.getOperand(0), Parts) &&
8171 isBSwapHWordElement(N.getOperand(1), Parts);
8172
8173 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
8174 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
8175 if (!C || C->getAPIntValue() != 16)
8176 return false;
8177 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
8178 return true;
8179 }
8180
8181 return false;
8182}
8183
8184// Match this pattern:
8185// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
8186// And rewrite this to:
8187// (rotr (bswap A), 16)
8189 SelectionDAG &DAG, SDNode *N, SDValue N0,
8190 SDValue N1, EVT VT) {
8191 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
8192 "MatchBSwapHWordOrAndAnd: expecting i32");
8193 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
8194 return SDValue();
8195 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
8196 return SDValue();
8197 // TODO: this is too restrictive; lifting this restriction requires more tests
8198 if (!N0->hasOneUse() || !N1->hasOneUse())
8199 return SDValue();
8202 if (!Mask0 || !Mask1)
8203 return SDValue();
8204 if (Mask0->getAPIntValue() != 0xff00ff00 ||
8205 Mask1->getAPIntValue() != 0x00ff00ff)
8206 return SDValue();
8207 SDValue Shift0 = N0.getOperand(0);
8208 SDValue Shift1 = N1.getOperand(0);
8209 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
8210 return SDValue();
8211 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
8212 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
8213 if (!ShiftAmt0 || !ShiftAmt1)
8214 return SDValue();
8215 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
8216 return SDValue();
8217 if (Shift0.getOperand(0) != Shift1.getOperand(0))
8218 return SDValue();
8219
8220 SDLoc DL(N);
8221 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
8222 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8223 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8224}
8225
8226/// Match a 32-bit packed halfword bswap. That is
8227/// ((x & 0x000000ff) << 8) |
8228/// ((x & 0x0000ff00) >> 8) |
8229/// ((x & 0x00ff0000) << 8) |
8230/// ((x & 0xff000000) >> 8)
8231/// => (rotl (bswap x), 16)
8232SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
8233 if (!LegalOperations)
8234 return SDValue();
8235
8236 EVT VT = N->getValueType(0);
8237 if (VT != MVT::i32)
8238 return SDValue();
8240 return SDValue();
8241
8242 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
8243 return BSwap;
8244
8245 // Try again with commuted operands.
8246 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
8247 return BSwap;
8248
8249
8250 // Look for either
8251 // (or (bswaphpair), (bswaphpair))
8252 // (or (or (bswaphpair), (and)), (and))
8253 // (or (or (and), (bswaphpair)), (and))
8254 SDNode *Parts[4] = {};
8255
8256 if (isBSwapHWordPair(N0, Parts)) {
8257 // (or (or (and), (and)), (or (and), (and)))
8258 if (!isBSwapHWordPair(N1, Parts))
8259 return SDValue();
8260 } else if (N0.getOpcode() == ISD::OR) {
8261 // (or (or (or (and), (and)), (and)), (and))
8262 if (!isBSwapHWordElement(N1, Parts))
8263 return SDValue();
8264 SDValue N00 = N0.getOperand(0);
8265 SDValue N01 = N0.getOperand(1);
8266 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
8267 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
8268 return SDValue();
8269 } else {
8270 return SDValue();
8271 }
8272
8273 // Make sure the parts are all coming from the same node.
8274 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
8275 return SDValue();
8276
8277 SDLoc DL(N);
8278 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
8279 SDValue(Parts[0], 0));
8280
8281 // Result of the bswap should be rotated by 16. If it's not legal, then
8282 // do (x << 16) | (x >> 16).
8283 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
8285 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
8287 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
8288 return DAG.getNode(ISD::OR, DL, VT,
8289 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
8290 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
8291}
8292
8293/// This contains all DAGCombine rules which reduce two values combined by
8294/// an Or operation to a single value \see visitANDLike().
8295SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
8296 EVT VT = N1.getValueType();
8297
8298 // fold (or x, undef) -> -1
8299 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
8300 return DAG.getAllOnesConstant(DL, VT);
8301
8302 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
8303 return V;
8304
8305 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
8306 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
8307 // Don't increase # computations.
8308 (N0->hasOneUse() || N1->hasOneUse())) {
8309 // We can only do this xform if we know that bits from X that are set in C2
8310 // but not in C1 are already zero. Likewise for Y.
8311 if (const ConstantSDNode *N0O1C =
8313 if (const ConstantSDNode *N1O1C =
8315 // We can only do this xform if we know that bits from X that are set in
8316 // C2 but not in C1 are already zero. Likewise for Y.
8317 const APInt &LHSMask = N0O1C->getAPIntValue();
8318 const APInt &RHSMask = N1O1C->getAPIntValue();
8319
8320 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
8321 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
8322 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8323 N0.getOperand(0), N1.getOperand(0));
8324 return DAG.getNode(ISD::AND, DL, VT, X,
8325 DAG.getConstant(LHSMask | RHSMask, DL, VT));
8326 }
8327 }
8328 }
8329 }
8330
8331 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
8332 if (N0.getOpcode() == ISD::AND &&
8333 N1.getOpcode() == ISD::AND &&
8334 N0.getOperand(0) == N1.getOperand(0) &&
8335 // Don't increase # computations.
8336 (N0->hasOneUse() || N1->hasOneUse())) {
8337 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
8338 N0.getOperand(1), N1.getOperand(1));
8339 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
8340 }
8341
8342 return SDValue();
8343}
8344
8345/// OR combines for which the commuted variant will be tried as well.
8347 SDNode *N) {
8348 EVT VT = N0.getValueType();
8349 unsigned BW = VT.getScalarSizeInBits();
8350 SDLoc DL(N);
8351
8352 auto peekThroughResize = [](SDValue V) {
8353 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
8354 return V->getOperand(0);
8355 return V;
8356 };
8357
8358 SDValue N0Resized = peekThroughResize(N0);
8359 if (N0Resized.getOpcode() == ISD::AND) {
8360 SDValue N1Resized = peekThroughResize(N1);
8361 SDValue N00 = N0Resized.getOperand(0);
8362 SDValue N01 = N0Resized.getOperand(1);
8363
8364 // fold or (and x, y), x --> x
8365 if (N00 == N1Resized || N01 == N1Resized)
8366 return N1;
8367
8368 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
8369 // TODO: Set AllowUndefs = true.
8370 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
8371 /* AllowUndefs */ false)) {
8372 if (peekThroughResize(NotOperand) == N1Resized)
8373 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
8374 N1);
8375 }
8376
8377 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
8378 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
8379 /* AllowUndefs */ false)) {
8380 if (peekThroughResize(NotOperand) == N1Resized)
8381 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
8382 N1);
8383 }
8384 }
8385
8386 SDValue X, Y;
8387
8388 // fold or (xor X, N1), N1 --> or X, N1
8389 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
8390 return DAG.getNode(ISD::OR, DL, VT, X, N1);
8391
8392 // fold or (xor x, y), (x and/or y) --> or x, y
8393 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
8394 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
8396 return DAG.getNode(ISD::OR, DL, VT, X, Y);
8397
8398 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8399 return R;
8400
8401 auto peekThroughZext = [](SDValue V) {
8402 if (V->getOpcode() == ISD::ZERO_EXTEND)
8403 return V->getOperand(0);
8404 return V;
8405 };
8406
8407 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
8408 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
8409 N0.getOperand(0) == N1.getOperand(0) &&
8410 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8411 return N0;
8412
8413 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
8414 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
8415 N0.getOperand(1) == N1.getOperand(0) &&
8416 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
8417 return N0;
8418
8419 // Attempt to match a legalized build_pair-esque pattern:
8420 // or(shl(aext(Hi),BW/2),zext(Lo))
8421 SDValue Lo, Hi;
8422 if (sd_match(N0,
8424 sd_match(N1, m_ZExt(m_Value(Lo))) &&
8425 Lo.getScalarValueSizeInBits() == (BW / 2) &&
8426 Lo.getValueType() == Hi.getValueType()) {
8427 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
8428 SDValue NotLo, NotHi;
8429 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
8430 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
8431 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
8432 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
8433 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8434 DAG.getShiftAmountConstant(BW / 2, VT, DL));
8435 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
8436 }
8437 }
8438
8439 return SDValue();
8440}
8441
8442SDValue DAGCombiner::visitOR(SDNode *N) {
8443 SDValue N0 = N->getOperand(0);
8444 SDValue N1 = N->getOperand(1);
8445 EVT VT = N1.getValueType();
8446 SDLoc DL(N);
8447
8448 // x | x --> x
8449 if (N0 == N1)
8450 return N0;
8451
8452 // fold (or c1, c2) -> c1|c2
8453 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
8454 return C;
8455
8456 // canonicalize constant to RHS
8459 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
8460
8461 // fold vector ops
8462 if (VT.isVector()) {
8463 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8464 return FoldedVOp;
8465
8466 // fold (or x, 0) -> x, vector edition
8468 return N0;
8469
8470 // fold (or x, -1) -> -1, vector edition
8472 // do not return N1, because undef node may exist in N1
8473 return DAG.getAllOnesConstant(DL, N1.getValueType());
8474
8475 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
8476 // Do this only if the resulting type / shuffle is legal.
8477 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
8478 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
8479 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
8480 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
8481 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
8482 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8483 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
8484 // Ensure both shuffles have a zero input.
8485 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
8486 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
8487 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
8488 bool CanFold = true;
8489 int NumElts = VT.getVectorNumElements();
8490 SmallVector<int, 4> Mask(NumElts, -1);
8491
8492 for (int i = 0; i != NumElts; ++i) {
8493 int M0 = SV0->getMaskElt(i);
8494 int M1 = SV1->getMaskElt(i);
8495
8496 // Determine if either index is pointing to a zero vector.
8497 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
8498 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
8499
8500 // If one element is zero and the otherside is undef, keep undef.
8501 // This also handles the case that both are undef.
8502 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
8503 continue;
8504
8505 // Make sure only one of the elements is zero.
8506 if (M0Zero == M1Zero) {
8507 CanFold = false;
8508 break;
8509 }
8510
8511 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
8512
8513 // We have a zero and non-zero element. If the non-zero came from
8514 // SV0 make the index a LHS index. If it came from SV1, make it
8515 // a RHS index. We need to mod by NumElts because we don't care
8516 // which operand it came from in the original shuffles.
8517 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
8518 }
8519
8520 if (CanFold) {
8521 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
8522 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
8523 SDValue LegalShuffle =
8524 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
8525 if (LegalShuffle)
8526 return LegalShuffle;
8527 }
8528 }
8529 }
8530 }
8531
8532 // fold (or x, 0) -> x
8533 if (isNullConstant(N1))
8534 return N0;
8535
8536 // fold (or x, -1) -> -1
8537 if (isAllOnesConstant(N1))
8538 return N1;
8539
8540 if (SDValue NewSel = foldBinOpIntoSelect(N))
8541 return NewSel;
8542
8543 // fold (or x, c) -> c iff (x & ~c) == 0
8544 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
8545 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
8546 return N1;
8547
8548 if (SDValue R = foldAndOrOfSETCC(N, DAG))
8549 return R;
8550
8551 if (SDValue Combined = visitORLike(N0, N1, DL))
8552 return Combined;
8553
8554 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8555 return Combined;
8556
8557 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
8558 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
8559 return BSwap;
8560 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
8561 return BSwap;
8562
8563 // reassociate or
8564 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
8565 return ROR;
8566
8567 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
8568 if (SDValue SD =
8569 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
8570 return SD;
8571
8572 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
8573 // iff (c1 & c2) != 0 or c1/c2 are undef.
8574 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
8575 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
8576 };
8577 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
8578 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
8579 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
8580 {N1, N0.getOperand(1)})) {
8581 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8582 AddToWorklist(IOR.getNode());
8583 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8584 }
8585 }
8586
8587 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8588 return Combined;
8589 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8590 return Combined;
8591
8592 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8593 if (N0.getOpcode() == N1.getOpcode())
8594 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8595 return V;
8596
8597 // See if this is some rotate idiom.
8598 if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
8599 return Rot;
8600
8601 if (SDValue Load = MatchLoadCombine(N))
8602 return Load;
8603
8604 // Simplify the operands using demanded-bits information.
8606 return SDValue(N, 0);
8607
8608 // If OR can be rewritten into ADD, try combines based on ADD.
8609 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8610 DAG.isADDLike(SDValue(N, 0)))
8611 if (SDValue Combined = visitADDLike(N))
8612 return Combined;
8613
8614 // Postpone until legalization completed to avoid interference with bswap
8615 // folding
8616 if (LegalOperations || VT.isVector())
8617 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8618 return R;
8619
8620 if (VT.isScalarInteger() && VT != MVT::i1)
8621 if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL))
8622 return R;
8623
8624 return SDValue();
8625}
8626
8628 SDValue &Mask) {
8629 if (Op.getOpcode() == ISD::AND &&
8630 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8631 Mask = Op.getOperand(1);
8632 return Op.getOperand(0);
8633 }
8634 return Op;
8635}
8636
8637/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8638static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8639 SDValue &Mask) {
8640 Op = stripConstantMask(DAG, Op, Mask);
8641 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8642 Shift = Op;
8643 return true;
8644 }
8645 return false;
8646}
8647
8648/// Helper function for visitOR to extract the needed side of a rotate idiom
8649/// from a shl/srl/mul/udiv. This is meant to handle cases where
8650/// InstCombine merged some outside op with one of the shifts from
8651/// the rotate pattern.
8652/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8653/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8654/// patterns:
8655///
8656/// (or (add v v) (shrl v bitwidth-1)):
8657/// expands (add v v) -> (shl v 1)
8658///
8659/// (or (mul v c0) (shrl (mul v c1) c2)):
8660/// expands (mul v c0) -> (shl (mul v c1) c3)
8661///
8662/// (or (udiv v c0) (shl (udiv v c1) c2)):
8663/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8664///
8665/// (or (shl v c0) (shrl (shl v c1) c2)):
8666/// expands (shl v c0) -> (shl (shl v c1) c3)
8667///
8668/// (or (shrl v c0) (shl (shrl v c1) c2)):
8669/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8670///
8671/// Such that in all cases, c3+c2==bitwidth(op v c1).
8673 SDValue ExtractFrom, SDValue &Mask,
8674 const SDLoc &DL) {
8675 assert(OppShift && ExtractFrom && "Empty SDValue");
8676 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8677 return SDValue();
8678
8679 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8680
8681 // Value and Type of the shift.
8682 SDValue OppShiftLHS = OppShift.getOperand(0);
8683 EVT ShiftedVT = OppShiftLHS.getValueType();
8684
8685 // Amount of the existing shift.
8686 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8687
8688 // (add v v) -> (shl v 1)
8689 // TODO: Should this be a general DAG canonicalization?
8690 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8691 ExtractFrom.getOpcode() == ISD::ADD &&
8692 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8693 ExtractFrom.getOperand(0) == OppShiftLHS &&
8694 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8695 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8696 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8697
8698 // Preconditions:
8699 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8700 //
8701 // Find opcode of the needed shift to be extracted from (op0 v c0).
8702 unsigned Opcode = ISD::DELETED_NODE;
8703 bool IsMulOrDiv = false;
8704 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8705 // opcode or its arithmetic (mul or udiv) variant.
8706 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8707 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8708 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8709 return false;
8710 Opcode = NeededShift;
8711 return true;
8712 };
8713 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8714 // that the needed shift can be extracted from.
8715 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8716 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8717 return SDValue();
8718
8719 // op0 must be the same opcode on both sides, have the same LHS argument,
8720 // and produce the same value type.
8721 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8722 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8723 ShiftedVT != ExtractFrom.getValueType())
8724 return SDValue();
8725
8726 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8727 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8728 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8729 ConstantSDNode *ExtractFromCst =
8730 isConstOrConstSplat(ExtractFrom.getOperand(1));
8731 // TODO: We should be able to handle non-uniform constant vectors for these values
8732 // Check that we have constant values.
8733 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8734 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8735 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8736 return SDValue();
8737
8738 // Compute the shift amount we need to extract to complete the rotate.
8739 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8740 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8741 return SDValue();
8742 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8743 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8744 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8745 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8746 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8747
8748 // Now try extract the needed shift from the ExtractFrom op and see if the
8749 // result matches up with the existing shift's LHS op.
8750 if (IsMulOrDiv) {
8751 // Op to extract from is a mul or udiv by a constant.
8752 // Check:
8753 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8754 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8755 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8756 NeededShiftAmt.getZExtValue());
8757 APInt ResultAmt;
8758 APInt Rem;
8759 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8760 if (Rem != 0 || ResultAmt != OppLHSAmt)
8761 return SDValue();
8762 } else {
8763 // Op to extract from is a shift by a constant.
8764 // Check:
8765 // c2 - (bitwidth(op0 v c0) - c1) == c0
8766 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8767 ExtractFromAmt.getBitWidth()))
8768 return SDValue();
8769 }
8770
8771 // Return the expanded shift op that should allow a rotate to be formed.
8772 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8773 EVT ResVT = ExtractFrom.getValueType();
8774 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8775 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8776}
8777
8778// Return true if we can prove that, whenever Neg and Pos are both in the
8779// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8780// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8781//
8782// (or (shift1 X, Neg), (shift2 X, Pos))
8783//
8784// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8785// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8786// to consider shift amounts with defined behavior.
8787//
8788// The IsRotate flag should be set when the LHS of both shifts is the same.
8789// Otherwise if matching a general funnel shift, it should be clear.
8790static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8791 SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
8792 const auto &TLI = DAG.getTargetLoweringInfo();
8793 // If EltSize is a power of 2 then:
8794 //
8795 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8796 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8797 //
8798 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8799 // for the stronger condition:
8800 //
8801 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8802 //
8803 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8804 // we can just replace Neg with Neg' for the rest of the function.
8805 //
8806 // In other cases we check for the even stronger condition:
8807 //
8808 // Neg == EltSize - Pos [B]
8809 //
8810 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8811 // behavior if Pos == 0 (and consequently Neg == EltSize).
8812 //
8813 // We could actually use [A] whenever EltSize is a power of 2, but the
8814 // only extra cases that it would match are those uninteresting ones
8815 // where Neg and Pos are never in range at the same time. E.g. for
8816 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8817 // as well as (sub 32, Pos), but:
8818 //
8819 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8820 //
8821 // always invokes undefined behavior for 32-bit X.
8822 //
8823 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8824 // This allows us to peek through any operations that only affect Mask's
8825 // un-demanded bits.
8826 //
8827 // NOTE: We can only do this when matching operations which won't modify the
8828 // least Log2(EltSize) significant bits and not a general funnel shift.
8829 unsigned MaskLoBits = 0;
8830 if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
8831 unsigned Bits = Log2_64(EltSize);
8832 unsigned NegBits = Neg.getScalarValueSizeInBits();
8833 if (NegBits >= Bits) {
8834 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8835 if (SDValue Inner =
8837 Neg = Inner;
8838 MaskLoBits = Bits;
8839 }
8840 }
8841 }
8842
8843 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8844 if (Neg.getOpcode() != ISD::SUB)
8845 return false;
8847 if (!NegC)
8848 return false;
8849 SDValue NegOp1 = Neg.getOperand(1);
8850
8851 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8852 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8853 // are redundant for the purpose of the equality.
8854 if (MaskLoBits) {
8855 unsigned PosBits = Pos.getScalarValueSizeInBits();
8856 if (PosBits >= MaskLoBits) {
8857 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8858 if (SDValue Inner =
8860 Pos = Inner;
8861 }
8862 }
8863 }
8864
8865 // The condition we need is now:
8866 //
8867 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8868 //
8869 // If NegOp1 == Pos then we need:
8870 //
8871 // EltSize & Mask == NegC & Mask
8872 //
8873 // (because "x & Mask" is a truncation and distributes through subtraction).
8874 //
8875 // We also need to account for a potential truncation of NegOp1 if the amount
8876 // has already been legalized to a shift amount type.
8877 APInt Width;
8878 if ((Pos == NegOp1) ||
8879 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8880 Width = NegC->getAPIntValue();
8881
8882 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8883 // Then the condition we want to prove becomes:
8884 //
8885 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8886 //
8887 // which, again because "x & Mask" is a truncation, becomes:
8888 //
8889 // NegC & Mask == (EltSize - PosC) & Mask
8890 // EltSize & Mask == (NegC + PosC) & Mask
8891 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8892 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8893 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8894 else
8895 return false;
8896 } else
8897 return false;
8898
8899 // Now we just need to check that EltSize & Mask == Width & Mask.
8900 if (MaskLoBits)
8901 // EltSize & Mask is 0 since Mask is EltSize - 1.
8902 return Width.getLoBits(MaskLoBits) == 0;
8903 return Width == EltSize;
8904}
8905
8906// A subroutine of MatchRotate used once we have found an OR of two opposite
8907// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8908// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8909// former being preferred if supported. InnerPos and InnerNeg are Pos and
8910// Neg with outer conversions stripped away.
8911SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8912 SDValue Neg, SDValue InnerPos,
8913 SDValue InnerNeg, bool FromAdd,
8914 bool HasPos, unsigned PosOpcode,
8915 unsigned NegOpcode, const SDLoc &DL) {
8916 // fold (or/add (shl x, (*ext y)),
8917 // (srl x, (*ext (sub 32, y)))) ->
8918 // (rotl x, y) or (rotr x, (sub 32, y))
8919 //
8920 // fold (or/add (shl x, (*ext (sub 32, y))),
8921 // (srl x, (*ext y))) ->
8922 // (rotr x, y) or (rotl x, (sub 32, y))
8923 EVT VT = Shifted.getValueType();
8924 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8925 /*IsRotate*/ true, FromAdd))
8926 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8927 HasPos ? Pos : Neg);
8928
8929 return SDValue();
8930}
8931
8932// A subroutine of MatchRotate used once we have found an OR of two opposite
8933// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8934// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8935// former being preferred if supported. InnerPos and InnerNeg are Pos and
8936// Neg with outer conversions stripped away.
8937// TODO: Merge with MatchRotatePosNeg.
8938SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8939 SDValue Neg, SDValue InnerPos,
8940 SDValue InnerNeg, bool FromAdd,
8941 bool HasPos, unsigned PosOpcode,
8942 unsigned NegOpcode, const SDLoc &DL) {
8943 EVT VT = N0.getValueType();
8944 unsigned EltBits = VT.getScalarSizeInBits();
8945
8946 // fold (or/add (shl x0, (*ext y)),
8947 // (srl x1, (*ext (sub 32, y)))) ->
8948 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8949 //
8950 // fold (or/add (shl x0, (*ext (sub 32, y))),
8951 // (srl x1, (*ext y))) ->
8952 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8953 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8954 FromAdd))
8955 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8956 HasPos ? Pos : Neg);
8957
8958 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8959 // so for now just use the PosOpcode case if its legal.
8960 // TODO: When can we use the NegOpcode case?
8961 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8962 SDValue X;
8963 // fold (or/add (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8964 // -> (fshl x0, x1, y)
8965 if (sd_match(N1, m_Srl(m_Value(X), m_One())) &&
8966 sd_match(InnerNeg,
8967 m_Xor(m_Specific(InnerPos), m_SpecificInt(EltBits - 1))) &&
8969 return DAG.getNode(ISD::FSHL, DL, VT, N0, X, Pos);
8970 }
8971
8972 // fold (or/add (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8973 // -> (fshr x0, x1, y)
8974 if (sd_match(N0, m_Shl(m_Value(X), m_One())) &&
8975 sd_match(InnerPos,
8976 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8978 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8979 }
8980
8981 // fold (or/add (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8982 // -> (fshr x0, x1, y)
8983 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8984 if (sd_match(N0, m_Add(m_Value(X), m_Deferred(X))) &&
8985 sd_match(InnerPos,
8986 m_Xor(m_Specific(InnerNeg), m_SpecificInt(EltBits - 1))) &&
8988 return DAG.getNode(ISD::FSHR, DL, VT, X, N1, Neg);
8989 }
8990 }
8991
8992 return SDValue();
8993}
8994
8995// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
8996// many idioms for rotate, and if the target supports rotation instructions,
8997// generate a rot[lr]. This also matches funnel shift patterns, similar to
8998// rotation but with different shifted sources.
8999SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
9000 bool FromAdd) {
9001 EVT VT = LHS.getValueType();
9002
9003 // The target must have at least one rotate/funnel flavor.
9004 // We still try to match rotate by constant pre-legalization.
9005 // TODO: Support pre-legalization funnel-shift by constant.
9006 bool HasROTL = hasOperation(ISD::ROTL, VT);
9007 bool HasROTR = hasOperation(ISD::ROTR, VT);
9008 bool HasFSHL = hasOperation(ISD::FSHL, VT);
9009 bool HasFSHR = hasOperation(ISD::FSHR, VT);
9010
9011 // If the type is going to be promoted and the target has enabled custom
9012 // lowering for rotate, allow matching rotate by non-constants. Only allow
9013 // this for scalar types.
9014 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
9018 }
9019
9020 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9021 return SDValue();
9022
9023 // Check for truncated rotate.
9024 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
9025 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
9026 assert(LHS.getValueType() == RHS.getValueType());
9027 if (SDValue Rot =
9028 MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
9029 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
9030 }
9031
9032 // Match "(X shl/srl V1) & V2" where V2 may not be present.
9033 SDValue LHSShift; // The shift.
9034 SDValue LHSMask; // AND value if any.
9035 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
9036
9037 SDValue RHSShift; // The shift.
9038 SDValue RHSMask; // AND value if any.
9039 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
9040
9041 // If neither side matched a rotate half, bail
9042 if (!LHSShift && !RHSShift)
9043 return SDValue();
9044
9045 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
9046 // side of the rotate, so try to handle that here. In all cases we need to
9047 // pass the matched shift from the opposite side to compute the opcode and
9048 // needed shift amount to extract. We still want to do this if both sides
9049 // matched a rotate half because one half may be a potential overshift that
9050 // can be broken down (ie if InstCombine merged two shl or srl ops into a
9051 // single one).
9052
9053 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
9054 if (LHSShift)
9055 if (SDValue NewRHSShift =
9056 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
9057 RHSShift = NewRHSShift;
9058 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
9059 if (RHSShift)
9060 if (SDValue NewLHSShift =
9061 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
9062 LHSShift = NewLHSShift;
9063
9064 // If a side is still missing, nothing else we can do.
9065 if (!RHSShift || !LHSShift)
9066 return SDValue();
9067
9068 // At this point we've matched or extracted a shift op on each side.
9069
9070 if (LHSShift.getOpcode() == RHSShift.getOpcode())
9071 return SDValue(); // Shifts must disagree.
9072
9073 // Canonicalize shl to left side in a shl/srl pair.
9074 if (RHSShift.getOpcode() == ISD::SHL) {
9075 std::swap(LHS, RHS);
9076 std::swap(LHSShift, RHSShift);
9077 std::swap(LHSMask, RHSMask);
9078 }
9079
9080 // Something has gone wrong - we've lost the shl/srl pair - bail.
9081 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
9082 return SDValue();
9083
9084 unsigned EltSizeInBits = VT.getScalarSizeInBits();
9085 SDValue LHSShiftArg = LHSShift.getOperand(0);
9086 SDValue LHSShiftAmt = LHSShift.getOperand(1);
9087 SDValue RHSShiftArg = RHSShift.getOperand(0);
9088 SDValue RHSShiftAmt = RHSShift.getOperand(1);
9089
9090 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
9091 ConstantSDNode *RHS) {
9092 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
9093 };
9094
9095 auto ApplyMasks = [&](SDValue Res) {
9096 // If there is an AND of either shifted operand, apply it to the result.
9097 if (LHSMask.getNode() || RHSMask.getNode()) {
9100
9101 if (LHSMask.getNode()) {
9102 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
9103 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9104 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
9105 }
9106 if (RHSMask.getNode()) {
9107 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
9108 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
9109 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
9110 }
9111
9112 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
9113 }
9114
9115 return Res;
9116 };
9117
9118 // TODO: Support pre-legalization funnel-shift by constant.
9119 bool IsRotate = LHSShiftArg == RHSShiftArg;
9120 if (!IsRotate && !(HasFSHL || HasFSHR)) {
9121 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
9122 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9123 // Look for a disguised rotate by constant.
9124 // The common shifted operand X may be hidden inside another 'or'.
9125 SDValue X, Y;
9126 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
9127 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
9128 return false;
9129 if (CommonOp == Or.getOperand(0)) {
9130 X = CommonOp;
9131 Y = Or.getOperand(1);
9132 return true;
9133 }
9134 if (CommonOp == Or.getOperand(1)) {
9135 X = CommonOp;
9136 Y = Or.getOperand(0);
9137 return true;
9138 }
9139 return false;
9140 };
9141
9142 SDValue Res;
9143 if (matchOr(LHSShiftArg, RHSShiftArg)) {
9144 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
9145 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9146 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
9147 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
9148 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
9149 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
9150 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
9151 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
9152 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
9153 } else {
9154 return SDValue();
9155 }
9156
9157 return ApplyMasks(Res);
9158 }
9159
9160 return SDValue(); // Requires funnel shift support.
9161 }
9162
9163 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotl x, C1)
9164 // fold (or/add (shl x, C1), (srl x, C2)) -> (rotr x, C2)
9165 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
9166 // fold (or/add (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
9167 // iff C1+C2 == EltSizeInBits
9168 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
9169 SDValue Res;
9170 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
9171 bool UseROTL = !LegalOperations || HasROTL;
9172 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
9173 UseROTL ? LHSShiftAmt : RHSShiftAmt);
9174 } else {
9175 bool UseFSHL = !LegalOperations || HasFSHL;
9176 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
9177 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
9178 }
9179
9180 return ApplyMasks(Res);
9181 }
9182
9183 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
9184 // shift.
9185 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
9186 return SDValue();
9187
9188 // If there is a mask here, and we have a variable shift, we can't be sure
9189 // that we're masking out the right stuff.
9190 if (LHSMask.getNode() || RHSMask.getNode())
9191 return SDValue();
9192
9193 // If the shift amount is sign/zext/any-extended just peel it off.
9194 SDValue LExtOp0 = LHSShiftAmt;
9195 SDValue RExtOp0 = RHSShiftAmt;
9196 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9197 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9198 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9199 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
9200 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
9201 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
9202 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
9203 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
9204 LExtOp0 = LHSShiftAmt.getOperand(0);
9205 RExtOp0 = RHSShiftAmt.getOperand(0);
9206 }
9207
9208 if (IsRotate && (HasROTL || HasROTR)) {
9209 if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
9210 LExtOp0, RExtOp0, FromAdd, HasROTL,
9212 return TryL;
9213
9214 if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
9215 RExtOp0, LExtOp0, FromAdd, HasROTR,
9217 return TryR;
9218 }
9219
9220 if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
9221 RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
9222 HasFSHL, ISD::FSHL, ISD::FSHR, DL))
9223 return TryL;
9224
9225 if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
9226 LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
9227 HasFSHR, ISD::FSHR, ISD::FSHL, DL))
9228 return TryR;
9229
9230 return SDValue();
9231}
9232
9233/// Recursively traverses the expression calculating the origin of the requested
9234/// byte of the given value. Returns std::nullopt if the provider can't be
9235/// calculated.
9236///
9237/// For all the values except the root of the expression, we verify that the
9238/// value has exactly one use and if not then return std::nullopt. This way if
9239/// the origin of the byte is returned it's guaranteed that the values which
9240/// contribute to the byte are not used outside of this expression.
9241
9242/// However, there is a special case when dealing with vector loads -- we allow
9243/// more than one use if the load is a vector type. Since the values that
9244/// contribute to the byte ultimately come from the ExtractVectorElements of the
9245/// Load, we don't care if the Load has uses other than ExtractVectorElements,
9246/// because those operations are independent from the pattern to be combined.
9247/// For vector loads, we simply care that the ByteProviders are adjacent
9248/// positions of the same vector, and their index matches the byte that is being
9249/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
9250/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
9251/// byte position we are trying to provide for the LoadCombine. If these do
9252/// not match, then we can not combine the vector loads. \p Index uses the
9253/// byte position we are trying to provide for and is matched against the
9254/// shl and load size. The \p Index algorithm ensures the requested byte is
9255/// provided for by the pattern, and the pattern does not over provide bytes.
9256///
9257///
9258/// The supported LoadCombine pattern for vector loads is as follows
9259/// or
9260/// / \
9261/// or shl
9262/// / \ |
9263/// or shl zext
9264/// / \ | |
9265/// shl zext zext EVE*
9266/// | | | |
9267/// zext EVE* EVE* LOAD
9268/// | | |
9269/// EVE* LOAD LOAD
9270/// |
9271/// LOAD
9272///
9273/// *ExtractVectorElement
9275
9276static std::optional<SDByteProvider>
9277calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
9278 std::optional<uint64_t> VectorIndex,
9279 unsigned StartingIndex = 0) {
9280
9281 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
9282 if (Depth == 10)
9283 return std::nullopt;
9284
9285 // Only allow multiple uses if the instruction is a vector load (in which
9286 // case we will use the load for every ExtractVectorElement)
9287 if (Depth && !Op.hasOneUse() &&
9288 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
9289 return std::nullopt;
9290
9291 // Fail to combine if we have encountered anything but a LOAD after handling
9292 // an ExtractVectorElement.
9293 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
9294 return std::nullopt;
9295
9296 unsigned BitWidth = Op.getScalarValueSizeInBits();
9297 if (BitWidth % 8 != 0)
9298 return std::nullopt;
9299 unsigned ByteWidth = BitWidth / 8;
9300 assert(Index < ByteWidth && "invalid index requested");
9301 (void) ByteWidth;
9302
9303 switch (Op.getOpcode()) {
9304 case ISD::OR: {
9305 auto LHS =
9306 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
9307 if (!LHS)
9308 return std::nullopt;
9309 auto RHS =
9310 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
9311 if (!RHS)
9312 return std::nullopt;
9313
9314 if (LHS->isConstantZero())
9315 return RHS;
9316 if (RHS->isConstantZero())
9317 return LHS;
9318 return std::nullopt;
9319 }
9320 case ISD::SHL: {
9321 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9322 if (!ShiftOp)
9323 return std::nullopt;
9324
9325 uint64_t BitShift = ShiftOp->getZExtValue();
9326
9327 if (BitShift % 8 != 0)
9328 return std::nullopt;
9329 uint64_t ByteShift = BitShift / 8;
9330
9331 // If we are shifting by an amount greater than the index we are trying to
9332 // provide, then do not provide anything. Otherwise, subtract the index by
9333 // the amount we shifted by.
9334 return Index < ByteShift
9336 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
9337 Depth + 1, VectorIndex, Index);
9338 }
9339 case ISD::ANY_EXTEND:
9340 case ISD::SIGN_EXTEND:
9341 case ISD::ZERO_EXTEND: {
9342 SDValue NarrowOp = Op->getOperand(0);
9343 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9344 if (NarrowBitWidth % 8 != 0)
9345 return std::nullopt;
9346 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9347
9348 if (Index >= NarrowByteWidth)
9349 return Op.getOpcode() == ISD::ZERO_EXTEND
9350 ? std::optional<SDByteProvider>(
9352 : std::nullopt;
9353 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
9354 StartingIndex);
9355 }
9356 case ISD::BSWAP:
9357 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
9358 Depth + 1, VectorIndex, StartingIndex);
9360 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
9361 if (!OffsetOp)
9362 return std::nullopt;
9363
9364 VectorIndex = OffsetOp->getZExtValue();
9365
9366 SDValue NarrowOp = Op->getOperand(0);
9367 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
9368 if (NarrowBitWidth % 8 != 0)
9369 return std::nullopt;
9370 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9371 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
9372 // type, leaving the high bits undefined.
9373 if (Index >= NarrowByteWidth)
9374 return std::nullopt;
9375
9376 // Check to see if the position of the element in the vector corresponds
9377 // with the byte we are trying to provide for. In the case of a vector of
9378 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
9379 // the element will provide a range of bytes. For example, if we have a
9380 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
9381 // 3).
9382 if (*VectorIndex * NarrowByteWidth > StartingIndex)
9383 return std::nullopt;
9384 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
9385 return std::nullopt;
9386
9387 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
9388 VectorIndex, StartingIndex);
9389 }
9390 case ISD::LOAD: {
9391 auto L = cast<LoadSDNode>(Op.getNode());
9392 if (!L->isSimple() || L->isIndexed())
9393 return std::nullopt;
9394
9395 unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits();
9396 if (NarrowBitWidth % 8 != 0)
9397 return std::nullopt;
9398 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
9399
9400 // If the width of the load does not reach byte we are trying to provide for
9401 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
9402 // question
9403 if (Index >= NarrowByteWidth)
9404 return L->getExtensionType() == ISD::ZEXTLOAD
9405 ? std::optional<SDByteProvider>(
9407 : std::nullopt;
9408
9409 unsigned BPVectorIndex = VectorIndex.value_or(0U);
9410 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
9411 }
9412 }
9413
9414 return std::nullopt;
9415}
9416
9417static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
9418 return i;
9419}
9420
9421static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
9422 return BW - i - 1;
9423}
9424
9425// Check if the bytes offsets we are looking at match with either big or
9426// little endian value loaded. Return true for big endian, false for little
9427// endian, and std::nullopt if match failed.
9428static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets,
9429 int64_t FirstOffset) {
9430 // The endian can be decided only when it is 2 bytes at least.
9431 unsigned Width = ByteOffsets.size();
9432 if (Width < 2)
9433 return std::nullopt;
9434
9435 bool BigEndian = true, LittleEndian = true;
9436 for (unsigned i = 0; i < Width; i++) {
9437 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
9438 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
9439 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
9440 if (!BigEndian && !LittleEndian)
9441 return std::nullopt;
9442 }
9443
9444 assert((BigEndian != LittleEndian) && "It should be either big endian or"
9445 "little endian");
9446 return BigEndian;
9447}
9448
9449// Look through one layer of truncate or extend.
9451 switch (Value.getOpcode()) {
9452 case ISD::TRUNCATE:
9453 case ISD::ZERO_EXTEND:
9454 case ISD::SIGN_EXTEND:
9455 case ISD::ANY_EXTEND:
9456 return Value.getOperand(0);
9457 }
9458 return SDValue();
9459}
9460
9461/// Match a pattern where a wide type scalar value is stored by several narrow
9462/// stores. Fold it into a single store or a BSWAP and a store if the targets
9463/// supports it.
9464///
9465/// Assuming little endian target:
9466/// i8 *p = ...
9467/// i32 val = ...
9468/// p[0] = (val >> 0) & 0xFF;
9469/// p[1] = (val >> 8) & 0xFF;
9470/// p[2] = (val >> 16) & 0xFF;
9471/// p[3] = (val >> 24) & 0xFF;
9472/// =>
9473/// *((i32)p) = val;
9474///
9475/// i8 *p = ...
9476/// i32 val = ...
9477/// p[0] = (val >> 24) & 0xFF;
9478/// p[1] = (val >> 16) & 0xFF;
9479/// p[2] = (val >> 8) & 0xFF;
9480/// p[3] = (val >> 0) & 0xFF;
9481/// =>
9482/// *((i32)p) = BSWAP(val);
9483SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
9484 // The matching looks for "store (trunc x)" patterns that appear early but are
9485 // likely to be replaced by truncating store nodes during combining.
9486 // TODO: If there is evidence that running this later would help, this
9487 // limitation could be removed. Legality checks may need to be added
9488 // for the created store and optional bswap/rotate.
9489 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
9490 return SDValue();
9491
9492 // We only handle merging simple stores of 1-4 bytes.
9493 // TODO: Allow unordered atomics when wider type is legal (see D66309)
9494 EVT MemVT = N->getMemoryVT();
9495 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
9496 !N->isSimple() || N->isIndexed())
9497 return SDValue();
9498
9499 // Collect all of the stores in the chain, upto the maximum store width (i64).
9500 SDValue Chain = N->getChain();
9502 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
9503 unsigned MaxWideNumBits = 64;
9504 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
9505 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
9506 // All stores must be the same size to ensure that we are writing all of the
9507 // bytes in the wide value.
9508 // This store should have exactly one use as a chain operand for another
9509 // store in the merging set. If there are other chain uses, then the
9510 // transform may not be safe because order of loads/stores outside of this
9511 // set may not be preserved.
9512 // TODO: We could allow multiple sizes by tracking each stored byte.
9513 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
9514 Store->isIndexed() || !Store->hasOneUse())
9515 return SDValue();
9516 Stores.push_back(Store);
9517 Chain = Store->getChain();
9518 if (MaxStores < Stores.size())
9519 return SDValue();
9520 }
9521 // There is no reason to continue if we do not have at least a pair of stores.
9522 if (Stores.size() < 2)
9523 return SDValue();
9524
9525 // Handle simple types only.
9526 LLVMContext &Context = *DAG.getContext();
9527 unsigned NumStores = Stores.size();
9528 unsigned WideNumBits = NumStores * NarrowNumBits;
9529 if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64)
9530 return SDValue();
9531
9532 // Check if all bytes of the source value that we are looking at are stored
9533 // to the same base address. Collect offsets from Base address into OffsetMap.
9534 SDValue SourceValue;
9535 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
9536 int64_t FirstOffset = INT64_MAX;
9537 StoreSDNode *FirstStore = nullptr;
9538 std::optional<BaseIndexOffset> Base;
9539 for (auto *Store : Stores) {
9540 // All the stores store different parts of the CombinedValue. A truncate is
9541 // required to get the partial value.
9542 SDValue Trunc = Store->getValue();
9543 if (Trunc.getOpcode() != ISD::TRUNCATE)
9544 return SDValue();
9545 // Other than the first/last part, a shift operation is required to get the
9546 // offset.
9547 int64_t Offset = 0;
9548 SDValue WideVal = Trunc.getOperand(0);
9549 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
9550 isa<ConstantSDNode>(WideVal.getOperand(1))) {
9551 // The shift amount must be a constant multiple of the narrow type.
9552 // It is translated to the offset address in the wide source value "y".
9553 //
9554 // x = srl y, ShiftAmtC
9555 // i8 z = trunc x
9556 // store z, ...
9557 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
9558 if (ShiftAmtC % NarrowNumBits != 0)
9559 return SDValue();
9560
9561 // Make sure we aren't reading bits that are shifted in.
9562 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
9563 return SDValue();
9564
9565 Offset = ShiftAmtC / NarrowNumBits;
9566 WideVal = WideVal.getOperand(0);
9567 }
9568
9569 // Stores must share the same source value with different offsets.
9570 if (!SourceValue)
9571 SourceValue = WideVal;
9572 else if (SourceValue != WideVal) {
9573 // Truncate and extends can be stripped to see if the values are related.
9574 if (stripTruncAndExt(SourceValue) != WideVal &&
9575 stripTruncAndExt(WideVal) != SourceValue)
9576 return SDValue();
9577
9578 if (WideVal.getScalarValueSizeInBits() >
9579 SourceValue.getScalarValueSizeInBits())
9580 SourceValue = WideVal;
9581
9582 // Give up if the source value type is smaller than the store size.
9583 if (SourceValue.getScalarValueSizeInBits() < WideNumBits)
9584 return SDValue();
9585 }
9586
9587 // Stores must share the same base address.
9588 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
9589 int64_t ByteOffsetFromBase = 0;
9590 if (!Base)
9591 Base = Ptr;
9592 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9593 return SDValue();
9594
9595 // Remember the first store.
9596 if (ByteOffsetFromBase < FirstOffset) {
9597 FirstStore = Store;
9598 FirstOffset = ByteOffsetFromBase;
9599 }
9600 // Map the offset in the store and the offset in the combined value, and
9601 // early return if it has been set before.
9602 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9603 return SDValue();
9604 OffsetMap[Offset] = ByteOffsetFromBase;
9605 }
9606
9607 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
9608
9609 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9610 assert(FirstStore && "First store must be set");
9611
9612 // Check that a store of the wide type is both allowed and fast on the target
9613 const DataLayout &Layout = DAG.getDataLayout();
9614 unsigned Fast = 0;
9615 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9616 *FirstStore->getMemOperand(), &Fast);
9617 if (!Allowed || !Fast)
9618 return SDValue();
9619
9620 // Check if the pieces of the value are going to the expected places in memory
9621 // to merge the stores.
9622 auto checkOffsets = [&](bool MatchLittleEndian) {
9623 if (MatchLittleEndian) {
9624 for (unsigned i = 0; i != NumStores; ++i)
9625 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9626 return false;
9627 } else { // MatchBigEndian by reversing loop counter.
9628 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9629 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9630 return false;
9631 }
9632 return true;
9633 };
9634
9635 // Check if the offsets line up for the native data layout of this target.
9636 bool NeedBswap = false;
9637 bool NeedRotate = false;
9638 if (!checkOffsets(Layout.isLittleEndian())) {
9639 // Special-case: check if byte offsets line up for the opposite endian.
9640 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9641 NeedBswap = true;
9642 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9643 NeedRotate = true;
9644 else
9645 return SDValue();
9646 }
9647
9648 SDLoc DL(N);
9649 if (WideVT != SourceValue.getValueType()) {
9650 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9651 "Unexpected store value to merge");
9652 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9653 }
9654
9655 // Before legalize we can introduce illegal bswaps/rotates which will be later
9656 // converted to an explicit bswap sequence. This way we end up with a single
9657 // store and byte shuffling instead of several stores and byte shuffling.
9658 if (NeedBswap) {
9659 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9660 } else if (NeedRotate) {
9661 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9662 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9663 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9664 }
9665
9666 SDValue NewStore =
9667 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9668 FirstStore->getPointerInfo(), FirstStore->getAlign());
9669
9670 // Rely on other DAG combine rules to remove the other individual stores.
9671 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9672 return NewStore;
9673}
9674
9675/// Match a pattern where a wide type scalar value is loaded by several narrow
9676/// loads and combined by shifts and ors. Fold it into a single load or a load
9677/// and a BSWAP if the targets supports it.
9678///
9679/// Assuming little endian target:
9680/// i8 *a = ...
9681/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9682/// =>
9683/// i32 val = *((i32)a)
9684///
9685/// i8 *a = ...
9686/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9687/// =>
9688/// i32 val = BSWAP(*((i32)a))
9689///
9690/// TODO: This rule matches complex patterns with OR node roots and doesn't
9691/// interact well with the worklist mechanism. When a part of the pattern is
9692/// updated (e.g. one of the loads) its direct users are put into the worklist,
9693/// but the root node of the pattern which triggers the load combine is not
9694/// necessarily a direct user of the changed node. For example, once the address
9695/// of t28 load is reassociated load combine won't be triggered:
9696/// t25: i32 = add t4, Constant:i32<2>
9697/// t26: i64 = sign_extend t25
9698/// t27: i64 = add t2, t26
9699/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9700/// t29: i32 = zero_extend t28
9701/// t32: i32 = shl t29, Constant:i8<8>
9702/// t33: i32 = or t23, t32
9703/// As a possible fix visitLoad can check if the load can be a part of a load
9704/// combine pattern and add corresponding OR roots to the worklist.
9705SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9706 assert(N->getOpcode() == ISD::OR &&
9707 "Can only match load combining against OR nodes");
9708
9709 // Handles simple types only
9710 EVT VT = N->getValueType(0);
9711 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9712 return SDValue();
9713 unsigned ByteWidth = VT.getSizeInBits() / 8;
9714
9715 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9716 auto MemoryByteOffset = [&](SDByteProvider P) {
9717 assert(P.hasSrc() && "Must be a memory byte provider");
9718 auto *Load = cast<LoadSDNode>(P.Src.value());
9719
9720 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9721
9722 assert(LoadBitWidth % 8 == 0 &&
9723 "can only analyze providers for individual bytes not bit");
9724 unsigned LoadByteWidth = LoadBitWidth / 8;
9725 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9726 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9727 };
9728
9729 std::optional<BaseIndexOffset> Base;
9730 SDValue Chain;
9731
9732 SmallPtrSet<LoadSDNode *, 8> Loads;
9733 std::optional<SDByteProvider> FirstByteProvider;
9734 int64_t FirstOffset = INT64_MAX;
9735
9736 // Check if all the bytes of the OR we are looking at are loaded from the same
9737 // base address. Collect bytes offsets from Base address in ByteOffsets.
9738 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9739 unsigned ZeroExtendedBytes = 0;
9740 for (int i = ByteWidth - 1; i >= 0; --i) {
9741 auto P =
9742 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9743 /*StartingIndex*/ i);
9744 if (!P)
9745 return SDValue();
9746
9747 if (P->isConstantZero()) {
9748 // It's OK for the N most significant bytes to be 0, we can just
9749 // zero-extend the load.
9750 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9751 return SDValue();
9752 continue;
9753 }
9754 assert(P->hasSrc() && "provenance should either be memory or zero");
9755 auto *L = cast<LoadSDNode>(P->Src.value());
9756
9757 // All loads must share the same chain
9758 SDValue LChain = L->getChain();
9759 if (!Chain)
9760 Chain = LChain;
9761 else if (Chain != LChain)
9762 return SDValue();
9763
9764 // Loads must share the same base address
9765 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
9766 int64_t ByteOffsetFromBase = 0;
9767
9768 // For vector loads, the expected load combine pattern will have an
9769 // ExtractElement for each index in the vector. While each of these
9770 // ExtractElements will be accessing the same base address as determined
9771 // by the load instruction, the actual bytes they interact with will differ
9772 // due to different ExtractElement indices. To accurately determine the
9773 // byte position of an ExtractElement, we offset the base load ptr with
9774 // the index multiplied by the byte size of each element in the vector.
9775 if (L->getMemoryVT().isVector()) {
9776 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9777 if (LoadWidthInBit % 8 != 0)
9778 return SDValue();
9779 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9780 Ptr.addToOffset(ByteOffsetFromVector);
9781 }
9782
9783 if (!Base)
9784 Base = Ptr;
9785
9786 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9787 return SDValue();
9788
9789 // Calculate the offset of the current byte from the base address
9790 ByteOffsetFromBase += MemoryByteOffset(*P);
9791 ByteOffsets[i] = ByteOffsetFromBase;
9792
9793 // Remember the first byte load
9794 if (ByteOffsetFromBase < FirstOffset) {
9795 FirstByteProvider = P;
9796 FirstOffset = ByteOffsetFromBase;
9797 }
9798
9799 Loads.insert(L);
9800 }
9801
9802 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9803 "memory, so there must be at least one load which produces the value");
9804 assert(Base && "Base address of the accessed memory location must be set");
9805 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9806
9807 bool NeedsZext = ZeroExtendedBytes > 0;
9808
9809 EVT MemVT =
9810 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9811
9812 if (!MemVT.isSimple())
9813 return SDValue();
9814
9815 // Before legalize we can introduce too wide illegal loads which will be later
9816 // split into legal sized loads. This enables us to combine i64 load by i8
9817 // patterns to a couple of i32 loads on 32 bit targets.
9818 if (LegalOperations &&
9819 !TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
9820 MemVT))
9821 return SDValue();
9822
9823 // Check if the bytes of the OR we are looking at match with either big or
9824 // little endian value load
9825 std::optional<bool> IsBigEndian = isBigEndian(
9826 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9827 if (!IsBigEndian)
9828 return SDValue();
9829
9830 assert(FirstByteProvider && "must be set");
9831
9832 // Ensure that the first byte is loaded from zero offset of the first load.
9833 // So the combined value can be loaded from the first load address.
9834 if (MemoryByteOffset(*FirstByteProvider) != 0)
9835 return SDValue();
9836 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9837
9838 // The node we are looking at matches with the pattern, check if we can
9839 // replace it with a single (possibly zero-extended) load and bswap + shift if
9840 // needed.
9841
9842 // If the load needs byte swap check if the target supports it
9843 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9844
9845 // Before legalize we can introduce illegal bswaps which will be later
9846 // converted to an explicit bswap sequence. This way we end up with a single
9847 // load and byte shuffling instead of several loads and byte shuffling.
9848 // We do not introduce illegal bswaps when zero-extending as this tends to
9849 // introduce too many arithmetic instructions.
9850 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9851 !TLI.isOperationLegal(ISD::BSWAP, VT))
9852 return SDValue();
9853
9854 // If we need to bswap and zero extend, we have to insert a shift. Check that
9855 // it is legal.
9856 if (NeedsBswap && NeedsZext && LegalOperations &&
9857 !TLI.isOperationLegal(ISD::SHL, VT))
9858 return SDValue();
9859
9860 // Check that a load of the wide type is both allowed and fast on the target
9861 unsigned Fast = 0;
9862 bool Allowed =
9863 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9864 *FirstLoad->getMemOperand(), &Fast);
9865 if (!Allowed || !Fast)
9866 return SDValue();
9867
9868 SDValue NewLoad =
9869 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9870 Chain, FirstLoad->getBasePtr(),
9871 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9872
9873 // Transfer chain users from old loads to the new load.
9874 for (LoadSDNode *L : Loads)
9875 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9876
9877 if (!NeedsBswap)
9878 return NewLoad;
9879
9880 SDValue ShiftedLoad =
9881 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9882 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9883 VT, SDLoc(N)))
9884 : NewLoad;
9885 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9886}
9887
9888// If the target has andn, bsl, or a similar bit-select instruction,
9889// we want to unfold masked merge, with canonical pattern of:
9890// | A | |B|
9891// ((x ^ y) & m) ^ y
9892// | D |
9893// Into:
9894// (x & m) | (y & ~m)
9895// If y is a constant, m is not a 'not', and the 'andn' does not work with
9896// immediates, we unfold into a different pattern:
9897// ~(~x & m) & (m | y)
9898// If x is a constant, m is a 'not', and the 'andn' does not work with
9899// immediates, we unfold into a different pattern:
9900// (x | ~m) & ~(~m & ~y)
9901// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9902// the very least that breaks andnpd / andnps patterns, and because those
9903// patterns are simplified in IR and shouldn't be created in the DAG
9904SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9905 assert(N->getOpcode() == ISD::XOR);
9906
9907 // Don't touch 'not' (i.e. where y = -1).
9908 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9909 return SDValue();
9910
9911 EVT VT = N->getValueType(0);
9912
9913 // There are 3 commutable operators in the pattern,
9914 // so we have to deal with 8 possible variants of the basic pattern.
9915 SDValue X, Y, M;
9916 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9917 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9918 return false;
9919 SDValue Xor = And.getOperand(XorIdx);
9920 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9921 return false;
9922 SDValue Xor0 = Xor.getOperand(0);
9923 SDValue Xor1 = Xor.getOperand(1);
9924 // Don't touch 'not' (i.e. where y = -1).
9925 if (isAllOnesOrAllOnesSplat(Xor1))
9926 return false;
9927 if (Other == Xor0)
9928 std::swap(Xor0, Xor1);
9929 if (Other != Xor1)
9930 return false;
9931 X = Xor0;
9932 Y = Xor1;
9933 M = And.getOperand(XorIdx ? 0 : 1);
9934 return true;
9935 };
9936
9937 SDValue N0 = N->getOperand(0);
9938 SDValue N1 = N->getOperand(1);
9939 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9940 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9941 return SDValue();
9942
9943 // Don't do anything if the mask is constant. This should not be reachable.
9944 // InstCombine should have already unfolded this pattern, and DAGCombiner
9945 // probably shouldn't produce it, too.
9946 if (isa<ConstantSDNode>(M.getNode()))
9947 return SDValue();
9948
9949 // We can transform if the target has AndNot
9950 if (!TLI.hasAndNot(M))
9951 return SDValue();
9952
9953 SDLoc DL(N);
9954
9955 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9956 // a bitwise not that would already allow ANDN to be used.
9957 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9958 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9959 // If not, we need to do a bit more work to make sure andn is still used.
9960 SDValue NotX = DAG.getNOT(DL, X, VT);
9961 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9962 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9963 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9964 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9965 }
9966
9967 // If X is a constant and M is a bitwise not, check that 'andn' works with
9968 // immediates.
9969 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9970 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9971 // If not, we need to do a bit more work to make sure andn is still used.
9972 SDValue NotM = M.getOperand(0);
9973 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9974 SDValue NotY = DAG.getNOT(DL, Y, VT);
9975 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9976 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9977 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9978 }
9979
9980 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9981 SDValue NotM = DAG.getNOT(DL, M, VT);
9982 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9983
9984 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9985}
9986
9987SDValue DAGCombiner::visitXOR(SDNode *N) {
9988 SDValue N0 = N->getOperand(0);
9989 SDValue N1 = N->getOperand(1);
9990 EVT VT = N0.getValueType();
9991 SDLoc DL(N);
9992
9993 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9994 if (N0.isUndef() && N1.isUndef())
9995 return DAG.getConstant(0, DL, VT);
9996
9997 // fold (xor x, undef) -> undef
9998 if (N0.isUndef())
9999 return N0;
10000 if (N1.isUndef())
10001 return N1;
10002
10003 // fold (xor c1, c2) -> c1^c2
10004 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
10005 return C;
10006
10007 // canonicalize constant to RHS
10010 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
10011
10012 // fold vector ops
10013 if (VT.isVector()) {
10014 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10015 return FoldedVOp;
10016
10017 // fold (xor x, 0) -> x, vector edition
10019 return N0;
10020 }
10021
10022 // fold (xor x, 0) -> x
10023 if (isNullConstant(N1))
10024 return N0;
10025
10026 if (SDValue NewSel = foldBinOpIntoSelect(N))
10027 return NewSel;
10028
10029 // reassociate xor
10030 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
10031 return RXOR;
10032
10033 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
10034 if (SDValue SD =
10035 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
10036 return SD;
10037
10038 // fold (a^b) -> (a|b) iff a and b share no bits.
10039 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
10040 DAG.haveNoCommonBitsSet(N0, N1))
10041 return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint);
10042
10043 // look for 'add-like' folds:
10044 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
10045 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
10047 if (SDValue Combined = visitADDLike(N))
10048 return Combined;
10049
10050 // fold not (setcc x, y, cc) -> setcc x y !cc
10051 // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec
10052 unsigned N0Opcode = N0.getOpcode();
10053 SDValue LHS, RHS, CC;
10054 if (TLI.isConstTrueVal(N1) &&
10055 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
10056 !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() &&
10057 N->use_begin()->getUser()->getOpcode() == ISD::AND)) {
10059 LHS.getValueType());
10060 if (!LegalOperations ||
10061 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
10062 switch (N0Opcode) {
10063 default:
10064 llvm_unreachable("Unhandled SetCC Equivalent!");
10065 case ISD::SETCC:
10066 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
10067 case ISD::SELECT_CC:
10068 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
10069 N0.getOperand(3), NotCC);
10070 case ISD::STRICT_FSETCC:
10071 case ISD::STRICT_FSETCCS: {
10072 if (N0.hasOneUse()) {
10073 // FIXME Can we handle multiple uses? Could we token factor the chain
10074 // results from the new/old setcc?
10075 SDValue SetCC =
10076 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
10077 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
10078 CombineTo(N, SetCC);
10079 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
10080 recursivelyDeleteUnusedNodes(N0.getNode());
10081 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10082 }
10083 break;
10084 }
10085 }
10086 }
10087 }
10088
10089 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
10090 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10091 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
10092 SDValue V = N0.getOperand(0);
10093 SDLoc DL0(N0);
10094 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
10095 DAG.getConstant(1, DL0, V.getValueType()));
10096 AddToWorklist(V.getNode());
10097 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
10098 }
10099
10100 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
10101 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are setcc
10102 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
10103 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10104 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10105 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
10106 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10107 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10108 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10109 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10110 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10111 }
10112 }
10113 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
10114 // fold (not (and x, y)) -> (or (not x), (not y)) iff x or y are constants
10115 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
10116 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
10117 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
10118 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
10119 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
10120 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
10121 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
10122 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
10123 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
10124 }
10125 }
10126
10127 // fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant
10128 if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(N1)) {
10129 SDValue Y = N0.getOperand(0);
10130 SDValue X = N0.getOperand(1);
10131
10132 if (auto *YConst = dyn_cast<ConstantSDNode>(Y)) {
10133 APInt NotYValue = ~YConst->getAPIntValue();
10134 SDValue NotY = DAG.getConstant(NotYValue, DL, VT);
10135 return DAG.getNode(ISD::ADD, DL, VT, X, NotY, N->getFlags());
10136 }
10137 }
10138
10139 // fold (not (add X, -1)) -> (neg X)
10140 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isAllOnesConstant(N1) &&
10142 return DAG.getNegative(N0.getOperand(0), DL, VT);
10143 }
10144
10145 // fold (xor (and x, y), y) -> (and (not x), y)
10146 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
10147 SDValue X = N0.getOperand(0);
10148 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
10149 AddToWorklist(NotX.getNode());
10150 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
10151 }
10152
10153 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
10154 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
10155 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
10156 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
10157 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
10158 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
10159 SDValue S0 = S.getOperand(0);
10160 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
10161 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
10162 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
10163 return DAG.getNode(ISD::ABS, DL, VT, S0);
10164 }
10165 }
10166
10167 // fold (xor x, x) -> 0
10168 if (N0 == N1)
10169 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
10170
10171 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
10172 // Here is a concrete example of this equivalence:
10173 // i16 x == 14
10174 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
10175 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
10176 //
10177 // =>
10178 //
10179 // i16 ~1 == 0b1111111111111110
10180 // i16 rol(~1, 14) == 0b1011111111111111
10181 //
10182 // Some additional tips to help conceptualize this transform:
10183 // - Try to see the operation as placing a single zero in a value of all ones.
10184 // - There exists no value for x which would allow the result to contain zero.
10185 // - Values of x larger than the bitwidth are undefined and do not require a
10186 // consistent result.
10187 // - Pushing the zero left requires shifting one bits in from the right.
10188 // A rotate left of ~1 is a nice way of achieving the desired result.
10189 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
10191 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getSignedConstant(~1, DL, VT),
10192 N0.getOperand(1));
10193 }
10194
10195 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
10196 if (N0Opcode == N1.getOpcode())
10197 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
10198 return V;
10199
10200 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
10201 return R;
10202 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
10203 return R;
10204 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
10205 return R;
10206
10207 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
10208 if (SDValue MM = unfoldMaskedMerge(N))
10209 return MM;
10210
10211 // Simplify the expression using non-local knowledge.
10213 return SDValue(N, 0);
10214
10215 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
10216 return Combined;
10217
10218 // fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
10219 // fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
10220 // fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
10221 // fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
10222 SDValue Op0;
10223 if (sd_match(N0, m_OneUse(m_AnyOf(m_SMin(m_Value(Op0), m_Specific(N1)),
10224 m_SMax(m_Value(Op0), m_Specific(N1)),
10225 m_UMin(m_Value(Op0), m_Specific(N1)),
10226 m_UMax(m_Value(Op0), m_Specific(N1)))))) {
10227
10228 if (isa<ConstantSDNode>(N1) ||
10230 // For vectors, only optimize when the constant is zero or all-ones to
10231 // avoid generating more instructions
10232 if (VT.isVector()) {
10233 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10234 if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
10235 return SDValue();
10236 }
10237
10238 // Avoid the fold if the minmax operation is legal and select is expensive
10239 if (TLI.isOperationLegal(N0.getOpcode(), VT) &&
10241 return SDValue();
10242
10243 EVT CCVT = getSetCCResultType(VT);
10244 ISD::CondCode CC;
10245 switch (N0.getOpcode()) {
10246 case ISD::SMIN:
10247 CC = ISD::SETLT;
10248 break;
10249 case ISD::SMAX:
10250 CC = ISD::SETGT;
10251 break;
10252 case ISD::UMIN:
10253 CC = ISD::SETULT;
10254 break;
10255 case ISD::UMAX:
10256 CC = ISD::SETUGT;
10257 break;
10258 }
10259 SDValue FN1 = DAG.getFreeze(N1);
10260 SDValue Cmp = DAG.getSetCC(DL, CCVT, Op0, FN1, CC);
10261 SDValue XorXC = DAG.getNode(ISD::XOR, DL, VT, Op0, FN1);
10262 SDValue Zero = DAG.getConstant(0, DL, VT);
10263 return DAG.getSelect(DL, VT, Cmp, XorXC, Zero);
10264 }
10265 }
10266
10267 return SDValue();
10268}
10269
10270/// If we have a shift-by-constant of a bitwise logic op that itself has a
10271/// shift-by-constant operand with identical opcode, we may be able to convert
10272/// that into 2 independent shifts followed by the logic op. This is a
10273/// throughput improvement.
10275 // Match a one-use bitwise logic op.
10276 SDValue LogicOp = Shift->getOperand(0);
10277 if (!LogicOp.hasOneUse())
10278 return SDValue();
10279
10280 unsigned LogicOpcode = LogicOp.getOpcode();
10281 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
10282 LogicOpcode != ISD::XOR)
10283 return SDValue();
10284
10285 // Find a matching one-use shift by constant.
10286 unsigned ShiftOpcode = Shift->getOpcode();
10287 SDValue C1 = Shift->getOperand(1);
10288 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
10289 assert(C1Node && "Expected a shift with constant operand");
10290 const APInt &C1Val = C1Node->getAPIntValue();
10291 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
10292 const APInt *&ShiftAmtVal) {
10293 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
10294 return false;
10295
10296 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
10297 if (!ShiftCNode)
10298 return false;
10299
10300 // Capture the shifted operand and shift amount value.
10301 ShiftOp = V.getOperand(0);
10302 ShiftAmtVal = &ShiftCNode->getAPIntValue();
10303
10304 // Shift amount types do not have to match their operand type, so check that
10305 // the constants are the same width.
10306 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
10307 return false;
10308
10309 // The fold is not valid if the sum of the shift values doesn't fit in the
10310 // given shift amount type.
10311 bool Overflow = false;
10312 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
10313 if (Overflow)
10314 return false;
10315
10316 // The fold is not valid if the sum of the shift values exceeds bitwidth.
10317 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
10318 return false;
10319
10320 return true;
10321 };
10322
10323 // Logic ops are commutative, so check each operand for a match.
10324 SDValue X, Y;
10325 const APInt *C0Val;
10326 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
10327 Y = LogicOp.getOperand(1);
10328 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
10329 Y = LogicOp.getOperand(0);
10330 else
10331 return SDValue();
10332
10333 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
10334 SDLoc DL(Shift);
10335 EVT VT = Shift->getValueType(0);
10336 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
10337 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
10338 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
10339 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
10340 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
10341 LogicOp->getFlags());
10342}
10343
10344/// Handle transforms common to the three shifts, when the shift amount is a
10345/// constant.
10346/// We are looking for: (shift being one of shl/sra/srl)
10347/// shift (binop X, C0), C1
10348/// And want to transform into:
10349/// binop (shift X, C1), (shift C0, C1)
10350SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
10351 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
10352
10353 // Do not turn a 'not' into a regular xor.
10354 if (isBitwiseNot(N->getOperand(0)))
10355 return SDValue();
10356
10357 // The inner binop must be one-use, since we want to replace it.
10358 SDValue LHS = N->getOperand(0);
10359 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
10360 return SDValue();
10361
10362 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
10363 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
10364 return R;
10365
10366 // We want to pull some binops through shifts, so that we have (and (shift))
10367 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
10368 // thing happens with address calculations, so it's important to canonicalize
10369 // it.
10370 switch (LHS.getOpcode()) {
10371 default:
10372 return SDValue();
10373 case ISD::OR:
10374 case ISD::XOR:
10375 case ISD::AND:
10376 break;
10377 case ISD::ADD:
10378 if (N->getOpcode() != ISD::SHL)
10379 return SDValue(); // only shl(add) not sr[al](add).
10380 break;
10381 }
10382
10383 // FIXME: disable this unless the input to the binop is a shift by a constant
10384 // or is copy/select. Enable this in other cases when figure out it's exactly
10385 // profitable.
10386 SDValue BinOpLHSVal = LHS.getOperand(0);
10387 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
10388 BinOpLHSVal.getOpcode() == ISD::SRA ||
10389 BinOpLHSVal.getOpcode() == ISD::SRL) &&
10390 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
10391 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
10392 BinOpLHSVal.getOpcode() == ISD::SELECT;
10393
10394 if (!IsShiftByConstant && !IsCopyOrSelect)
10395 return SDValue();
10396
10397 if (IsCopyOrSelect && N->hasOneUse())
10398 return SDValue();
10399
10400 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
10401 SDLoc DL(N);
10402 EVT VT = N->getValueType(0);
10403 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
10404 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
10405 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
10406 N->getOperand(1));
10407 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
10408 }
10409
10410 return SDValue();
10411}
10412
10413SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
10414 assert(N->getOpcode() == ISD::TRUNCATE);
10415 assert(N->getOperand(0).getOpcode() == ISD::AND);
10416
10417 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
10418 EVT TruncVT = N->getValueType(0);
10419 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
10420 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
10421 SDValue N01 = N->getOperand(0).getOperand(1);
10422 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
10423 SDLoc DL(N);
10424 SDValue N00 = N->getOperand(0).getOperand(0);
10425 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
10426 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
10427 AddToWorklist(Trunc00.getNode());
10428 AddToWorklist(Trunc01.getNode());
10429 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
10430 }
10431 }
10432
10433 return SDValue();
10434}
10435
10436SDValue DAGCombiner::visitRotate(SDNode *N) {
10437 SDLoc dl(N);
10438 SDValue N0 = N->getOperand(0);
10439 SDValue N1 = N->getOperand(1);
10440 EVT VT = N->getValueType(0);
10441 unsigned Bitsize = VT.getScalarSizeInBits();
10442
10443 // fold (rot x, 0) -> x
10444 if (isNullOrNullSplat(N1))
10445 return N0;
10446
10447 // fold (rot x, c) -> x iff (c % BitSize) == 0
10448 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
10449 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
10450 if (DAG.MaskedValueIsZero(N1, ModuloMask))
10451 return N0;
10452 }
10453
10454 // fold (rot x, c) -> (rot x, c % BitSize)
10455 bool OutOfRange = false;
10456 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
10457 OutOfRange |= C->getAPIntValue().uge(Bitsize);
10458 return true;
10459 };
10460 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
10461 EVT AmtVT = N1.getValueType();
10462 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
10463 if (SDValue Amt =
10464 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
10465 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
10466 }
10467
10468 // rot i16 X, 8 --> bswap X
10469 auto *RotAmtC = isConstOrConstSplat(N1);
10470 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
10471 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
10472 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
10473
10474 // Simplify the operands using demanded-bits information.
10476 return SDValue(N, 0);
10477
10478 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
10479 if (N1.getOpcode() == ISD::TRUNCATE &&
10480 N1.getOperand(0).getOpcode() == ISD::AND) {
10481 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10482 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
10483 }
10484
10485 unsigned NextOp = N0.getOpcode();
10486
10487 // fold (rot* (rot* x, c2), c1)
10488 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
10489 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
10490 bool C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
10492 if (C1 && C2 && N1.getValueType() == N0.getOperand(1).getValueType()) {
10493 EVT ShiftVT = N1.getValueType();
10494 bool SameSide = (N->getOpcode() == NextOp);
10495 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
10496 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
10497 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10498 {N1, BitsizeC});
10499 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
10500 {N0.getOperand(1), BitsizeC});
10501 if (Norm1 && Norm2)
10502 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
10503 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
10504 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
10505 {CombinedShift, BitsizeC});
10506 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
10507 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
10508 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
10509 CombinedShiftNorm);
10510 }
10511 }
10512 }
10513 return SDValue();
10514}
10515
10516SDValue DAGCombiner::visitSHL(SDNode *N) {
10517 SDValue N0 = N->getOperand(0);
10518 SDValue N1 = N->getOperand(1);
10519 if (SDValue V = DAG.simplifyShift(N0, N1))
10520 return V;
10521
10522 SDLoc DL(N);
10523 EVT VT = N0.getValueType();
10524 EVT ShiftVT = N1.getValueType();
10525 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10526
10527 // fold (shl c1, c2) -> c1<<c2
10528 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
10529 return C;
10530
10531 // fold vector ops
10532 if (VT.isVector()) {
10533 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10534 return FoldedVOp;
10535
10536 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
10537 // If setcc produces all-one true value then:
10538 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
10539 if (N1CV && N1CV->isConstant()) {
10540 if (N0.getOpcode() == ISD::AND) {
10541 SDValue N00 = N0->getOperand(0);
10542 SDValue N01 = N0->getOperand(1);
10543 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
10544
10545 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
10548 if (SDValue C =
10549 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
10550 return DAG.getNode(ISD::AND, DL, VT, N00, C);
10551 }
10552 }
10553 }
10554 }
10555
10556 if (SDValue NewSel = foldBinOpIntoSelect(N))
10557 return NewSel;
10558
10559 // if (shl x, c) is known to be zero, return 0
10560 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10561 return DAG.getConstant(0, DL, VT);
10562
10563 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
10564 if (N1.getOpcode() == ISD::TRUNCATE &&
10565 N1.getOperand(0).getOpcode() == ISD::AND) {
10566 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10567 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
10568 }
10569
10570 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
10571 if (N0.getOpcode() == ISD::SHL) {
10572 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10573 ConstantSDNode *RHS) {
10574 APInt c1 = LHS->getAPIntValue();
10575 APInt c2 = RHS->getAPIntValue();
10576 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10577 return (c1 + c2).uge(OpSizeInBits);
10578 };
10579 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10580 return DAG.getConstant(0, DL, VT);
10581
10582 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10583 ConstantSDNode *RHS) {
10584 APInt c1 = LHS->getAPIntValue();
10585 APInt c2 = RHS->getAPIntValue();
10586 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10587 return (c1 + c2).ult(OpSizeInBits);
10588 };
10589 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10590 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10591 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
10592 }
10593 }
10594
10595 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
10596 // For this to be valid, the second form must not preserve any of the bits
10597 // that are shifted out by the inner shift in the first form. This means
10598 // the outer shift size must be >= the number of bits added by the ext.
10599 // As a corollary, we don't care what kind of ext it is.
10600 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
10601 N0.getOpcode() == ISD::ANY_EXTEND ||
10602 N0.getOpcode() == ISD::SIGN_EXTEND) &&
10603 N0.getOperand(0).getOpcode() == ISD::SHL) {
10604 SDValue N0Op0 = N0.getOperand(0);
10605 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10606 EVT InnerVT = N0Op0.getValueType();
10607 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
10608
10609 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10610 ConstantSDNode *RHS) {
10611 APInt c1 = LHS->getAPIntValue();
10612 APInt c2 = RHS->getAPIntValue();
10613 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10614 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10615 (c1 + c2).uge(OpSizeInBits);
10616 };
10617 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
10618 /*AllowUndefs*/ false,
10619 /*AllowTypeMismatch*/ true))
10620 return DAG.getConstant(0, DL, VT);
10621
10622 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
10623 ConstantSDNode *RHS) {
10624 APInt c1 = LHS->getAPIntValue();
10625 APInt c2 = RHS->getAPIntValue();
10626 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10627 return c2.uge(OpSizeInBits - InnerBitwidth) &&
10628 (c1 + c2).ult(OpSizeInBits);
10629 };
10630 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10631 /*AllowUndefs*/ false,
10632 /*AllowTypeMismatch*/ true)) {
10633 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10634 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10635 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10636 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10637 }
10638 }
10639
10640 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10641 // Only fold this if the inner zext has no other uses to avoid increasing
10642 // the total number of instructions.
10643 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10644 N0.getOperand(0).getOpcode() == ISD::SRL) {
10645 SDValue N0Op0 = N0.getOperand(0);
10646 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10647
10648 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10649 APInt c1 = LHS->getAPIntValue();
10650 APInt c2 = RHS->getAPIntValue();
10651 zeroExtendToMatch(c1, c2);
10652 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10653 };
10654 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10655 /*AllowUndefs*/ false,
10656 /*AllowTypeMismatch*/ true)) {
10657 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10658 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10659 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10660 AddToWorklist(NewSHL.getNode());
10661 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10662 }
10663 }
10664
10665 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10666 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10667 ConstantSDNode *RHS) {
10668 const APInt &LHSC = LHS->getAPIntValue();
10669 const APInt &RHSC = RHS->getAPIntValue();
10670 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10671 LHSC.getZExtValue() <= RHSC.getZExtValue();
10672 };
10673
10674 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10675 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10676 if (N0->getFlags().hasExact()) {
10677 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10678 /*AllowUndefs*/ false,
10679 /*AllowTypeMismatch*/ true)) {
10680 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10681 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10682 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10683 }
10684 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10685 /*AllowUndefs*/ false,
10686 /*AllowTypeMismatch*/ true)) {
10687 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10688 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10689 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10690 }
10691 }
10692
10693 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10694 // (and (srl x, (sub c1, c2), MASK)
10695 // Only fold this if the inner shift has no other uses -- if it does,
10696 // folding this will increase the total number of instructions.
10697 if (N0.getOpcode() == ISD::SRL &&
10698 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10700 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10701 /*AllowUndefs*/ false,
10702 /*AllowTypeMismatch*/ true)) {
10703 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10704 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10705 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10706 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10707 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10708 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10709 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10710 }
10711 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10712 /*AllowUndefs*/ false,
10713 /*AllowTypeMismatch*/ true)) {
10714 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10715 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10716 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10717 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10718 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10719 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10720 }
10721 }
10722 }
10723
10724 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10725 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10726 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10727 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10728 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10729 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10730 }
10731
10732 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10733 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10734 // Variant of version done on multiply, except mul by a power of 2 is turned
10735 // into a shift.
10736 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10737 TLI.isDesirableToCommuteWithShift(N, Level)) {
10738 SDValue N01 = N0.getOperand(1);
10739 if (SDValue Shl1 =
10740 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10741 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10742 AddToWorklist(Shl0.getNode());
10743 SDNodeFlags Flags;
10744 // Preserve the disjoint flag for Or.
10745 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10747 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10748 }
10749 }
10750
10751 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10752 // TODO: Add zext/add_nuw variant with suitable test coverage
10753 // TODO: Should we limit this with isLegalAddImmediate?
10754 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10755 N0.getOperand(0).getOpcode() == ISD::ADD &&
10756 N0.getOperand(0)->getFlags().hasNoSignedWrap() &&
10757 TLI.isDesirableToCommuteWithShift(N, Level)) {
10758 SDValue Add = N0.getOperand(0);
10759 SDLoc DL(N0);
10760 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10761 {Add.getOperand(1)})) {
10762 if (SDValue ShlC =
10763 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10764 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10765 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10766 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10767 }
10768 }
10769 }
10770
10771 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10772 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10773 SDValue N01 = N0.getOperand(1);
10774 if (SDValue Shl =
10775 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10776 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10777 }
10778
10779 ConstantSDNode *N1C = isConstOrConstSplat(N1);
10780 if (N1C && !N1C->isOpaque())
10781 if (SDValue NewSHL = visitShiftByConstant(N))
10782 return NewSHL;
10783
10784 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10785 // target.
10786 if (((N1.getOpcode() == ISD::CTTZ &&
10787 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10789 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10791 SDValue Y = N1.getOperand(0);
10792 SDLoc DL(N);
10793 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10794 SDValue And =
10795 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10796 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10797 }
10798
10800 return SDValue(N, 0);
10801
10802 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10803 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10804 const APInt &C0 = N0.getConstantOperandAPInt(0);
10805 const APInt &C1 = N1C->getAPIntValue();
10806 return DAG.getVScale(DL, VT, C0 << C1);
10807 }
10808
10809 SDValue X;
10810 APInt VS0;
10811
10812 // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1))
10813 if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) {
10814 SDNodeFlags Flags;
10815 Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
10816 N0->getFlags().hasNoUnsignedWrap());
10817
10818 SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue());
10819 return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags);
10820 }
10821
10822 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10823 APInt ShlVal;
10824 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10825 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10826 const APInt &C0 = N0.getConstantOperandAPInt(0);
10827 if (ShlVal.ult(C0.getBitWidth())) {
10828 APInt NewStep = C0 << ShlVal;
10829 return DAG.getStepVector(DL, VT, NewStep);
10830 }
10831 }
10832
10833 return SDValue();
10834}
10835
10836// Transform a right shift of a multiply into a multiply-high.
10837// Examples:
10838// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10839// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10841 const TargetLowering &TLI) {
10842 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10843 "SRL or SRA node is required here!");
10844
10845 // Check the shift amount. Proceed with the transformation if the shift
10846 // amount is constant.
10847 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10848 if (!ShiftAmtSrc)
10849 return SDValue();
10850
10851 // The operation feeding into the shift must be a multiply.
10852 SDValue ShiftOperand = N->getOperand(0);
10853 if (ShiftOperand.getOpcode() != ISD::MUL)
10854 return SDValue();
10855
10856 // Both operands must be equivalent extend nodes.
10857 SDValue LeftOp = ShiftOperand.getOperand(0);
10858 SDValue RightOp = ShiftOperand.getOperand(1);
10859
10860 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10861 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10862
10863 if (!IsSignExt && !IsZeroExt)
10864 return SDValue();
10865
10866 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10867 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10868
10869 // return true if U may use the lower bits of its operands
10870 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10871 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10872 return true;
10873 }
10874 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10875 if (!UShiftAmtSrc) {
10876 return true;
10877 }
10878 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10879 return UShiftAmt < NarrowVTSize;
10880 };
10881
10882 // If the lower part of the MUL is also used and MUL_LOHI is supported
10883 // do not introduce the MULH in favor of MUL_LOHI
10884 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10885 if (!ShiftOperand.hasOneUse() &&
10886 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10887 llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) {
10888 return SDValue();
10889 }
10890
10891 SDValue MulhRightOp;
10893 unsigned ActiveBits = IsSignExt
10894 ? Constant->getAPIntValue().getSignificantBits()
10895 : Constant->getAPIntValue().getActiveBits();
10896 if (ActiveBits > NarrowVTSize)
10897 return SDValue();
10898 MulhRightOp = DAG.getConstant(
10899 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10900 NarrowVT);
10901 } else {
10902 if (LeftOp.getOpcode() != RightOp.getOpcode())
10903 return SDValue();
10904 // Check that the two extend nodes are the same type.
10905 if (NarrowVT != RightOp.getOperand(0).getValueType())
10906 return SDValue();
10907 MulhRightOp = RightOp.getOperand(0);
10908 }
10909
10910 EVT WideVT = LeftOp.getValueType();
10911 // Proceed with the transformation if the wide types match.
10912 assert((WideVT == RightOp.getValueType()) &&
10913 "Cannot have a multiply node with two different operand types.");
10914
10915 // Proceed with the transformation if the wide type is twice as large
10916 // as the narrow type.
10917 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10918 return SDValue();
10919
10920 // Check the shift amount with the narrow type size.
10921 // Proceed with the transformation if the shift amount is the width
10922 // of the narrow type.
10923 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10924 if (ShiftAmt != NarrowVTSize)
10925 return SDValue();
10926
10927 // If the operation feeding into the MUL is a sign extend (sext),
10928 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10929 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10930
10931 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10932 // or if it is a vector type then we could transform to an acceptable type and
10933 // rely on legalization to split/combine the result.
10934 EVT TransformVT = NarrowVT;
10935 if (NarrowVT.isVector()) {
10936 TransformVT = TLI.getLegalTypeToTransformTo(*DAG.getContext(), NarrowVT);
10937 if (TransformVT.getScalarType() != NarrowVT.getScalarType())
10938 return SDValue();
10939 }
10940 if (!TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10941 return SDValue();
10942
10943 SDValue Result =
10944 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10945 bool IsSigned = N->getOpcode() == ISD::SRA;
10946 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10947}
10948
10949// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10950// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10952 unsigned Opcode = N->getOpcode();
10953 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10954 return SDValue();
10955
10956 SDValue N0 = N->getOperand(0);
10957 EVT VT = N->getValueType(0);
10958 SDLoc DL(N);
10959 SDValue X, Y;
10960
10961 // If both operands are bswap/bitreverse, ignore the multiuse
10963 m_UnaryOp(Opcode, m_Value(Y))))))
10964 return DAG.getNode(N0.getOpcode(), DL, VT, X, Y);
10965
10966 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10968 m_OneUse(m_UnaryOp(Opcode, m_Value(X))), m_Value(Y))))) {
10969 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, Y);
10970 return DAG.getNode(N0.getOpcode(), DL, VT, X, NewBitReorder);
10971 }
10972
10973 return SDValue();
10974}
10975
10976SDValue DAGCombiner::visitSRA(SDNode *N) {
10977 SDValue N0 = N->getOperand(0);
10978 SDValue N1 = N->getOperand(1);
10979 if (SDValue V = DAG.simplifyShift(N0, N1))
10980 return V;
10981
10982 SDLoc DL(N);
10983 EVT VT = N0.getValueType();
10984 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10985
10986 // fold (sra c1, c2) -> (sra c1, c2)
10987 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10988 return C;
10989
10990 // Arithmetic shifting an all-sign-bit value is a no-op.
10991 // fold (sra 0, x) -> 0
10992 // fold (sra -1, x) -> -1
10993 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10994 return N0;
10995
10996 // fold vector ops
10997 if (VT.isVector())
10998 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10999 return FoldedVOp;
11000
11001 if (SDValue NewSel = foldBinOpIntoSelect(N))
11002 return NewSel;
11003
11004 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11005
11006 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
11007 // clamp (add c1, c2) to max shift.
11008 if (N0.getOpcode() == ISD::SRA) {
11009 EVT ShiftVT = N1.getValueType();
11010 EVT ShiftSVT = ShiftVT.getScalarType();
11011 SmallVector<SDValue, 16> ShiftValues;
11012
11013 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
11014 APInt c1 = LHS->getAPIntValue();
11015 APInt c2 = RHS->getAPIntValue();
11016 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11017 APInt Sum = c1 + c2;
11018 unsigned ShiftSum =
11019 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
11020 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
11021 return true;
11022 };
11023 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
11024 SDValue ShiftValue;
11025 if (N1.getOpcode() == ISD::BUILD_VECTOR)
11026 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
11027 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
11028 assert(ShiftValues.size() == 1 &&
11029 "Expected matchBinaryPredicate to return one element for "
11030 "SPLAT_VECTORs");
11031 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
11032 } else
11033 ShiftValue = ShiftValues[0];
11034 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
11035 }
11036 }
11037
11038 // fold (sra (xor (sra x, c1), -1), c2) -> (xor (sra x, c3), -1)
11039 // This allows merging two arithmetic shifts even when there's a NOT in
11040 // between.
11041 SDValue X;
11042 APInt C1;
11043 if (N1C && sd_match(N0, m_OneUse(m_Not(
11044 m_OneUse(m_Sra(m_Value(X), m_ConstInt(C1))))))) {
11045 APInt C2 = N1C->getAPIntValue();
11046 zeroExtendToMatch(C1, C2, 1 /* Overflow Bit */);
11047 APInt Sum = C1 + C2;
11048 unsigned ShiftSum = Sum.getLimitedValue(OpSizeInBits - 1);
11049 SDValue NewShift = DAG.getNode(
11050 ISD::SRA, DL, VT, X, DAG.getShiftAmountConstant(ShiftSum, VT, DL));
11051 return DAG.getNOT(DL, NewShift, VT);
11052 }
11053
11054 // fold (sra (shl X, m), (sub result_size, n))
11055 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
11056 // result_size - n != m.
11057 // If truncate is free for the target sext(shl) is likely to result in better
11058 // code.
11059 if (N0.getOpcode() == ISD::SHL && N1C) {
11060 // Get the two constants of the shifts, CN0 = m, CN = n.
11061 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
11062 if (N01C) {
11063 LLVMContext &Ctx = *DAG.getContext();
11064 // Determine what the truncate's result bitsize and type would be.
11065 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
11066
11067 if (VT.isVector())
11068 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
11069
11070 // Determine the residual right-shift amount.
11071 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
11072
11073 // If the shift is not a no-op (in which case this should be just a sign
11074 // extend already), the truncated to type is legal, sign_extend is legal
11075 // on that type, and the truncate to that type is both legal and free,
11076 // perform the transform.
11077 if ((ShiftAmt > 0) &&
11080 TLI.isTruncateFree(VT, TruncVT)) {
11081 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
11082 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
11083 N0.getOperand(0), Amt);
11084 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
11085 Shift);
11086 return DAG.getNode(ISD::SIGN_EXTEND, DL,
11087 N->getValueType(0), Trunc);
11088 }
11089 }
11090 }
11091
11092 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
11093 // sra (add (shl X, N1C), AddC), N1C -->
11094 // sext (add (trunc X to (width - N1C)), AddC')
11095 // sra (sub AddC, (shl X, N1C)), N1C -->
11096 // sext (sub AddC1',(trunc X to (width - N1C)))
11097 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
11098 N0.hasOneUse()) {
11099 bool IsAdd = N0.getOpcode() == ISD::ADD;
11100 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
11101 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
11102 Shl.hasOneUse()) {
11103 // TODO: AddC does not need to be a splat.
11104 if (ConstantSDNode *AddC =
11105 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
11106 // Determine what the truncate's type would be and ask the target if
11107 // that is a free operation.
11108 LLVMContext &Ctx = *DAG.getContext();
11109 unsigned ShiftAmt = N1C->getZExtValue();
11110 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
11111 if (VT.isVector())
11112 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
11113
11114 // TODO: The simple type check probably belongs in the default hook
11115 // implementation and/or target-specific overrides (because
11116 // non-simple types likely require masking when legalized), but
11117 // that restriction may conflict with other transforms.
11118 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
11119 TLI.isTruncateFree(VT, TruncVT)) {
11120 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
11121 SDValue ShiftC =
11122 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
11123 TruncVT.getScalarSizeInBits()),
11124 DL, TruncVT);
11125 SDValue Add;
11126 if (IsAdd)
11127 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
11128 else
11129 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
11130 return DAG.getSExtOrTrunc(Add, DL, VT);
11131 }
11132 }
11133 }
11134 }
11135
11136 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
11137 if (N1.getOpcode() == ISD::TRUNCATE &&
11138 N1.getOperand(0).getOpcode() == ISD::AND) {
11139 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11140 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
11141 }
11142
11143 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
11144 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
11145 // if c1 is equal to the number of bits the trunc removes
11146 // TODO - support non-uniform vector shift amounts.
11147 if (N0.getOpcode() == ISD::TRUNCATE &&
11148 (N0.getOperand(0).getOpcode() == ISD::SRL ||
11149 N0.getOperand(0).getOpcode() == ISD::SRA) &&
11150 N0.getOperand(0).hasOneUse() &&
11151 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
11152 SDValue N0Op0 = N0.getOperand(0);
11153 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
11154 EVT LargeVT = N0Op0.getValueType();
11155 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
11156 if (LargeShift->getAPIntValue() == TruncBits) {
11157 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
11158 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
11159 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
11160 DAG.getConstant(TruncBits, DL, LargeShiftVT));
11161 SDValue SRA =
11162 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
11163 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
11164 }
11165 }
11166 }
11167
11168 // Simplify, based on bits shifted out of the LHS.
11170 return SDValue(N, 0);
11171
11172 // If the sign bit is known to be zero, switch this to a SRL.
11173 if (DAG.SignBitIsZero(N0))
11174 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
11175
11176 if (N1C && !N1C->isOpaque())
11177 if (SDValue NewSRA = visitShiftByConstant(N))
11178 return NewSRA;
11179
11180 // Try to transform this shift into a multiply-high if
11181 // it matches the appropriate pattern detected in combineShiftToMULH.
11182 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11183 return MULH;
11184
11185 // Attempt to convert a sra of a load into a narrower sign-extending load.
11186 if (SDValue NarrowLoad = reduceLoadWidth(N))
11187 return NarrowLoad;
11188
11189 if (SDValue AVG = foldShiftToAvg(N, DL))
11190 return AVG;
11191
11192 return SDValue();
11193}
11194
11195SDValue DAGCombiner::visitSRL(SDNode *N) {
11196 SDValue N0 = N->getOperand(0);
11197 SDValue N1 = N->getOperand(1);
11198 if (SDValue V = DAG.simplifyShift(N0, N1))
11199 return V;
11200
11201 SDLoc DL(N);
11202 EVT VT = N0.getValueType();
11203 EVT ShiftVT = N1.getValueType();
11204 unsigned OpSizeInBits = VT.getScalarSizeInBits();
11205
11206 // fold (srl c1, c2) -> c1 >>u c2
11207 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
11208 return C;
11209
11210 // fold vector ops
11211 if (VT.isVector())
11212 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
11213 return FoldedVOp;
11214
11215 if (SDValue NewSel = foldBinOpIntoSelect(N))
11216 return NewSel;
11217
11218 // if (srl x, c) is known to be zero, return 0
11219 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11220 if (N1C &&
11221 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
11222 return DAG.getConstant(0, DL, VT);
11223
11224 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
11225 if (N0.getOpcode() == ISD::SRL) {
11226 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
11227 ConstantSDNode *RHS) {
11228 APInt c1 = LHS->getAPIntValue();
11229 APInt c2 = RHS->getAPIntValue();
11230 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11231 return (c1 + c2).uge(OpSizeInBits);
11232 };
11233 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
11234 return DAG.getConstant(0, DL, VT);
11235
11236 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
11237 ConstantSDNode *RHS) {
11238 APInt c1 = LHS->getAPIntValue();
11239 APInt c2 = RHS->getAPIntValue();
11240 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
11241 return (c1 + c2).ult(OpSizeInBits);
11242 };
11243 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
11244 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
11245 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
11246 }
11247 }
11248
11249 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
11250 N0.getOperand(0).getOpcode() == ISD::SRL) {
11251 SDValue InnerShift = N0.getOperand(0);
11252 // TODO - support non-uniform vector shift amounts.
11253 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
11254 uint64_t c1 = N001C->getZExtValue();
11255 uint64_t c2 = N1C->getZExtValue();
11256 EVT InnerShiftVT = InnerShift.getValueType();
11257 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
11258 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
11259 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
11260 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
11261 if (c1 + OpSizeInBits == InnerShiftSize) {
11262 if (c1 + c2 >= InnerShiftSize)
11263 return DAG.getConstant(0, DL, VT);
11264 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11265 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11266 InnerShift.getOperand(0), NewShiftAmt);
11267 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
11268 }
11269 // In the more general case, we can clear the high bits after the shift:
11270 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
11271 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
11272 c1 + c2 < InnerShiftSize) {
11273 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
11274 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
11275 InnerShift.getOperand(0), NewShiftAmt);
11276 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
11277 OpSizeInBits - c2),
11278 DL, InnerShiftVT);
11279 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
11280 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
11281 }
11282 }
11283 }
11284
11285 if (N0.getOpcode() == ISD::SHL) {
11286 // fold (srl (shl nuw x, c), c) -> x
11287 if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
11288 return N0.getOperand(0);
11289
11290 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
11291 // (and (srl x, (sub c2, c1), MASK)
11292 if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
11294 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
11295 ConstantSDNode *RHS) {
11296 const APInt &LHSC = LHS->getAPIntValue();
11297 const APInt &RHSC = RHS->getAPIntValue();
11298 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
11299 LHSC.getZExtValue() <= RHSC.getZExtValue();
11300 };
11301 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
11302 /*AllowUndefs*/ false,
11303 /*AllowTypeMismatch*/ true)) {
11304 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11305 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
11306 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11307 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
11308 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
11309 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
11310 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11311 }
11312 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
11313 /*AllowUndefs*/ false,
11314 /*AllowTypeMismatch*/ true)) {
11315 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
11316 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
11317 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
11318 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
11319 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
11320 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
11321 }
11322 }
11323 }
11324
11325 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
11326 // TODO - support non-uniform vector shift amounts.
11327 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
11328 // Shifting in all undef bits?
11329 EVT SmallVT = N0.getOperand(0).getValueType();
11330 unsigned BitSize = SmallVT.getScalarSizeInBits();
11331 if (N1C->getAPIntValue().uge(BitSize))
11332 return DAG.getUNDEF(VT);
11333
11334 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
11335 uint64_t ShiftAmt = N1C->getZExtValue();
11336 SDLoc DL0(N0);
11337 SDValue SmallShift =
11338 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
11339 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
11340 AddToWorklist(SmallShift.getNode());
11341 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
11342 return DAG.getNode(ISD::AND, DL, VT,
11343 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
11344 DAG.getConstant(Mask, DL, VT));
11345 }
11346 }
11347
11348 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
11349 // bit, which is unmodified by sra.
11350 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
11351 if (N0.getOpcode() == ISD::SRA)
11352 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
11353 }
11354
11355 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
11356 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
11357 if (N1C && N0.getOpcode() == ISD::CTLZ &&
11358 isPowerOf2_32(OpSizeInBits) &&
11359 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
11360 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
11361
11362 // If any of the input bits are KnownOne, then the input couldn't be all
11363 // zeros, thus the result of the srl will always be zero.
11364 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
11365
11366 // If all of the bits input the to ctlz node are known to be zero, then
11367 // the result of the ctlz is "32" and the result of the shift is one.
11368 APInt UnknownBits = ~Known.Zero;
11369 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
11370
11371 // Otherwise, check to see if there is exactly one bit input to the ctlz.
11372 if (UnknownBits.isPowerOf2()) {
11373 // Okay, we know that only that the single bit specified by UnknownBits
11374 // could be set on input to the CTLZ node. If this bit is set, the SRL
11375 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
11376 // to an SRL/XOR pair, which is likely to simplify more.
11377 unsigned ShAmt = UnknownBits.countr_zero();
11378 SDValue Op = N0.getOperand(0);
11379
11380 if (ShAmt) {
11381 SDLoc DL(N0);
11382 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
11383 DAG.getShiftAmountConstant(ShAmt, VT, DL));
11384 AddToWorklist(Op.getNode());
11385 }
11386 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
11387 }
11388 }
11389
11390 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
11391 if (N1.getOpcode() == ISD::TRUNCATE &&
11392 N1.getOperand(0).getOpcode() == ISD::AND) {
11393 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
11394 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
11395 }
11396
11397 // fold (srl (logic_op x, (shl (zext y), c1)), c1)
11398 // -> (logic_op (srl x, c1), (zext y))
11399 // c1 <= leadingzeros(zext(y))
11400 SDValue X, ZExtY;
11401 if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
11402 m_Value(X),
11405 m_Specific(N1))))))) {
11406 unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
11408 if (N1C->getZExtValue() <= NumLeadingZeros)
11409 return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
11410 DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
11411 }
11412
11413 // fold operands of srl based on knowledge that the low bits are not
11414 // demanded.
11416 return SDValue(N, 0);
11417
11418 if (N1C && !N1C->isOpaque())
11419 if (SDValue NewSRL = visitShiftByConstant(N))
11420 return NewSRL;
11421
11422 // Attempt to convert a srl of a load into a narrower zero-extending load.
11423 if (SDValue NarrowLoad = reduceLoadWidth(N))
11424 return NarrowLoad;
11425
11426 // Here is a common situation. We want to optimize:
11427 //
11428 // %a = ...
11429 // %b = and i32 %a, 2
11430 // %c = srl i32 %b, 1
11431 // brcond i32 %c ...
11432 //
11433 // into
11434 //
11435 // %a = ...
11436 // %b = and %a, 2
11437 // %c = setcc eq %b, 0
11438 // brcond %c ...
11439 //
11440 // However when after the source operand of SRL is optimized into AND, the SRL
11441 // itself may not be optimized further. Look for it and add the BRCOND into
11442 // the worklist.
11443 //
11444 // The also tends to happen for binary operations when SimplifyDemandedBits
11445 // is involved.
11446 //
11447 // FIXME: This is unecessary if we process the DAG in topological order,
11448 // which we plan to do. This workaround can be removed once the DAG is
11449 // processed in topological order.
11450 if (N->hasOneUse()) {
11451 SDNode *User = *N->user_begin();
11452
11453 // Look pass the truncate.
11454 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse())
11455 User = *User->user_begin();
11456
11457 if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND ||
11458 User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR)
11459 AddToWorklist(User);
11460 }
11461
11462 // Try to transform this shift into a multiply-high if
11463 // it matches the appropriate pattern detected in combineShiftToMULH.
11464 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
11465 return MULH;
11466
11467 if (SDValue AVG = foldShiftToAvg(N, DL))
11468 return AVG;
11469
11470 return SDValue();
11471}
11472
11473SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
11474 EVT VT = N->getValueType(0);
11475 SDValue N0 = N->getOperand(0);
11476 SDValue N1 = N->getOperand(1);
11477 SDValue N2 = N->getOperand(2);
11478 bool IsFSHL = N->getOpcode() == ISD::FSHL;
11479 unsigned BitWidth = VT.getScalarSizeInBits();
11480 SDLoc DL(N);
11481
11482 // fold (fshl/fshr C0, C1, C2) -> C3
11483 if (SDValue C =
11484 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
11485 return C;
11486
11487 // fold (fshl N0, N1, 0) -> N0
11488 // fold (fshr N0, N1, 0) -> N1
11490 if (DAG.MaskedValueIsZero(
11491 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
11492 return IsFSHL ? N0 : N1;
11493
11494 auto IsUndefOrZero = [](SDValue V) {
11495 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
11496 };
11497
11498 // TODO - support non-uniform vector shift amounts.
11499 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
11500 EVT ShAmtTy = N2.getValueType();
11501
11502 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
11503 if (Cst->getAPIntValue().uge(BitWidth)) {
11504 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
11505 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
11506 DAG.getConstant(RotAmt, DL, ShAmtTy));
11507 }
11508
11509 unsigned ShAmt = Cst->getZExtValue();
11510 if (ShAmt == 0)
11511 return IsFSHL ? N0 : N1;
11512
11513 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
11514 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
11515 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
11516 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
11517 if (IsUndefOrZero(N0))
11518 return DAG.getNode(
11519 ISD::SRL, DL, VT, N1,
11520 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
11521 if (IsUndefOrZero(N1))
11522 return DAG.getNode(
11523 ISD::SHL, DL, VT, N0,
11524 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
11525
11526 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11527 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
11528 // TODO - bigendian support once we have test coverage.
11529 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
11530 // TODO - permit LHS EXTLOAD if extensions are shifted out.
11531 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
11532 !DAG.getDataLayout().isBigEndian()) {
11533 auto *LHS = dyn_cast<LoadSDNode>(N0);
11534 auto *RHS = dyn_cast<LoadSDNode>(N1);
11535 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
11536 LHS->getAddressSpace() == RHS->getAddressSpace() &&
11537 (LHS->hasNUsesOfValue(1, 0) || RHS->hasNUsesOfValue(1, 0)) &&
11539 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
11540 SDLoc DL(RHS);
11541 uint64_t PtrOff =
11542 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
11543 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
11544 unsigned Fast = 0;
11545 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
11546 RHS->getAddressSpace(), NewAlign,
11547 RHS->getMemOperand()->getFlags(), &Fast) &&
11548 Fast) {
11549 SDValue NewPtr = DAG.getMemBasePlusOffset(
11550 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
11551 AddToWorklist(NewPtr.getNode());
11552 SDValue Load = DAG.getLoad(
11553 VT, DL, RHS->getChain(), NewPtr,
11554 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11555 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
11556 DAG.makeEquivalentMemoryOrdering(LHS, Load.getValue(1));
11557 DAG.makeEquivalentMemoryOrdering(RHS, Load.getValue(1));
11558 return Load;
11559 }
11560 }
11561 }
11562 }
11563 }
11564
11565 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
11566 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
11567 // iff We know the shift amount is in range.
11568 // TODO: when is it worth doing SUB(BW, N2) as well?
11569 if (isPowerOf2_32(BitWidth)) {
11570 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
11571 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11572 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
11573 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
11574 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
11575 }
11576
11577 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
11578 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
11579 // TODO: Investigate flipping this rotate if only one is legal.
11580 // If funnel shift is legal as well we might be better off avoiding
11581 // non-constant (BW - N2).
11582 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
11583 if (N0 == N1 && hasOperation(RotOpc, VT))
11584 return DAG.getNode(RotOpc, DL, VT, N0, N2);
11585
11586 // Simplify, based on bits shifted out of N0/N1.
11588 return SDValue(N, 0);
11589
11590 return SDValue();
11591}
11592
11593SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
11594 SDValue N0 = N->getOperand(0);
11595 SDValue N1 = N->getOperand(1);
11596 if (SDValue V = DAG.simplifyShift(N0, N1))
11597 return V;
11598
11599 SDLoc DL(N);
11600 EVT VT = N0.getValueType();
11601
11602 // fold (*shlsat c1, c2) -> c1<<c2
11603 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
11604 return C;
11605
11606 ConstantSDNode *N1C = isConstOrConstSplat(N1);
11607
11608 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
11609 // fold (sshlsat x, c) -> (shl x, c)
11610 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
11611 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
11612 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11613
11614 // fold (ushlsat x, c) -> (shl x, c)
11615 if (N->getOpcode() == ISD::USHLSAT && N1C &&
11616 N1C->getAPIntValue().ule(
11618 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
11619 }
11620
11621 return SDValue();
11622}
11623
11624// Given a ABS node, detect the following patterns:
11625// (ABS (SUB (EXTEND a), (EXTEND b))).
11626// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
11627// Generates UABD/SABD instruction.
11628SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
11629 EVT SrcVT = N->getValueType(0);
11630
11631 if (N->getOpcode() == ISD::TRUNCATE)
11632 N = N->getOperand(0).getNode();
11633
11634 EVT VT = N->getValueType(0);
11635 SDValue Op0, Op1;
11636
11637 if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1)))))
11638 return SDValue();
11639
11640 SDValue AbsOp0 = N->getOperand(0);
11641 unsigned Opc0 = Op0.getOpcode();
11642
11643 // Check if the operands of the sub are (zero|sign)-extended, otherwise
11644 // fallback to ValueTracking.
11645 if (Opc0 != Op1.getOpcode() ||
11646 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
11647 Opc0 != ISD::SIGN_EXTEND_INREG)) {
11648 // fold (abs (sub nsw x, y)) -> abds(x, y)
11649 // Don't fold this for unsupported types as we lose the NSW handling.
11650 if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11651 (AbsOp0->getFlags().hasNoSignedWrap() ||
11652 DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
11653 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
11654 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11655 }
11656 // fold (abs (sub x, y)) -> abdu(x, y)
11657 if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
11658 DAG.SignBitIsZero(Op1)) {
11659 SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
11660 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11661 }
11662 return SDValue();
11663 }
11664
11665 EVT VT0, VT1;
11666 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
11667 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
11668 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
11669 } else {
11670 VT0 = Op0.getOperand(0).getValueType();
11671 VT1 = Op1.getOperand(0).getValueType();
11672 }
11673 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
11674
11675 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
11676 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
11677 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11678 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11679 (VT1 == MaxVT || Op1->hasOneUse()) &&
11680 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
11681 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11682 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11683 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11684 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11685 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11686 }
11687
11688 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11689 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11690 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
11691 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11692 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11693 }
11694
11695 return SDValue();
11696}
11697
11698SDValue DAGCombiner::visitABS(SDNode *N) {
11699 SDValue N0 = N->getOperand(0);
11700 EVT VT = N->getValueType(0);
11701 SDLoc DL(N);
11702
11703 // fold (abs c1) -> c2
11704 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11705 return C;
11706 // fold (abs (abs x)) -> (abs x)
11707 if (N0.getOpcode() == ISD::ABS)
11708 return N0;
11709 // fold (abs x) -> x iff not-negative
11710 if (DAG.SignBitIsZero(N0))
11711 return N0;
11712
11713 if (SDValue ABD = foldABSToABD(N, DL))
11714 return ABD;
11715
11716 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11717 // iff zero_extend/truncate are free.
11718 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11719 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11720 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11721 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11722 hasOperation(ISD::ABS, ExtVT)) {
11723 return DAG.getNode(
11724 ISD::ZERO_EXTEND, DL, VT,
11725 DAG.getNode(ISD::ABS, DL, ExtVT,
11726 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11727 }
11728 }
11729
11730 return SDValue();
11731}
11732
11733SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11734 SDValue N0 = N->getOperand(0);
11735 EVT VT = N->getValueType(0);
11736 SDLoc DL(N);
11737
11738 // fold (bswap c1) -> c2
11739 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11740 return C;
11741 // fold (bswap (bswap x)) -> x
11742 if (N0.getOpcode() == ISD::BSWAP)
11743 return N0.getOperand(0);
11744
11745 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11746 // isn't supported, it will be expanded to bswap followed by a manual reversal
11747 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11748 // the two bswaps if the bitreverse gets expanded.
11749 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11750 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11751 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11752 }
11753
11754 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11755 // iff x >= bw/2 (i.e. lower half is known zero)
11756 unsigned BW = VT.getScalarSizeInBits();
11757 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11758 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11759 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11760 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11761 ShAmt->getZExtValue() >= (BW / 2) &&
11762 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11763 TLI.isTruncateFree(VT, HalfVT) &&
11764 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11765 SDValue Res = N0.getOperand(0);
11766 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11767 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11768 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11769 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11770 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11771 return DAG.getZExtOrTrunc(Res, DL, VT);
11772 }
11773 }
11774
11775 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11776 // inverse-shift-of-bswap:
11777 // bswap (X u<< C) --> (bswap X) u>> C
11778 // bswap (X u>> C) --> (bswap X) u<< C
11779 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11780 N0.hasOneUse()) {
11781 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11782 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11783 ShAmt->getZExtValue() % 8 == 0) {
11784 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11785 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11786 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11787 }
11788 }
11789
11790 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11791 return V;
11792
11793 return SDValue();
11794}
11795
11796SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11797 SDValue N0 = N->getOperand(0);
11798 EVT VT = N->getValueType(0);
11799 SDLoc DL(N);
11800
11801 // fold (bitreverse c1) -> c2
11802 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11803 return C;
11804
11805 // fold (bitreverse (bitreverse x)) -> x
11806 if (N0.getOpcode() == ISD::BITREVERSE)
11807 return N0.getOperand(0);
11808
11809 SDValue X, Y;
11810
11811 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11812 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11814 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11815
11816 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11817 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11819 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11820
11821 return SDValue();
11822}
11823
11824SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11825 SDValue N0 = N->getOperand(0);
11826 EVT VT = N->getValueType(0);
11827 SDLoc DL(N);
11828
11829 // fold (ctlz c1) -> c2
11830 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11831 return C;
11832
11833 // If the value is known never to be zero, switch to the undef version.
11834 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11835 if (DAG.isKnownNeverZero(N0))
11836 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11837
11838 return SDValue();
11839}
11840
11841SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11842 SDValue N0 = N->getOperand(0);
11843 EVT VT = N->getValueType(0);
11844 SDLoc DL(N);
11845
11846 // fold (ctlz_zero_undef c1) -> c2
11847 if (SDValue C =
11849 return C;
11850 return SDValue();
11851}
11852
11853SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11854 SDValue N0 = N->getOperand(0);
11855 EVT VT = N->getValueType(0);
11856 SDLoc DL(N);
11857
11858 // fold (cttz c1) -> c2
11859 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11860 return C;
11861
11862 // If the value is known never to be zero, switch to the undef version.
11863 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11864 if (DAG.isKnownNeverZero(N0))
11865 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11866
11867 return SDValue();
11868}
11869
11870SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11871 SDValue N0 = N->getOperand(0);
11872 EVT VT = N->getValueType(0);
11873 SDLoc DL(N);
11874
11875 // fold (cttz_zero_undef c1) -> c2
11876 if (SDValue C =
11878 return C;
11879 return SDValue();
11880}
11881
11882SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11883 SDValue N0 = N->getOperand(0);
11884 EVT VT = N->getValueType(0);
11885 unsigned NumBits = VT.getScalarSizeInBits();
11886 SDLoc DL(N);
11887
11888 // fold (ctpop c1) -> c2
11889 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11890 return C;
11891
11892 // If the source is being shifted, but doesn't affect any active bits,
11893 // then we can call CTPOP on the shift source directly.
11894 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11895 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11896 const APInt &Amt = AmtC->getAPIntValue();
11897 if (Amt.ult(NumBits)) {
11898 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11899 if ((N0.getOpcode() == ISD::SRL &&
11900 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11901 (N0.getOpcode() == ISD::SHL &&
11902 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11903 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11904 }
11905 }
11906 }
11907 }
11908
11909 // If the upper bits are known to be zero, then see if its profitable to
11910 // only count the lower bits.
11911 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11912 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11913 if (hasOperation(ISD::CTPOP, HalfVT) &&
11914 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11915 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11916 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11917 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11918 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11919 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11920 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11921 }
11922 }
11923 }
11924
11925 return SDValue();
11926}
11927
11929 SDValue RHS, const SDNodeFlags Flags,
11930 const TargetLowering &TLI) {
11931 EVT VT = LHS.getValueType();
11932 if (!VT.isFloatingPoint())
11933 return false;
11934
11935 return Flags.hasNoSignedZeros() &&
11937 (Flags.hasNoNaNs() ||
11938 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11939}
11940
11942 SDValue RHS, SDValue True, SDValue False,
11943 ISD::CondCode CC,
11944 const TargetLowering &TLI,
11945 SelectionDAG &DAG) {
11946 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11947 switch (CC) {
11948 case ISD::SETOLT:
11949 case ISD::SETOLE:
11950 case ISD::SETLT:
11951 case ISD::SETLE:
11952 case ISD::SETULT:
11953 case ISD::SETULE: {
11954 // Since it's known never nan to get here already, either fminnum or
11955 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11956 // expanded in terms of it.
11957 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11958 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11959 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11960
11961 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11962 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11963 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11964 return SDValue();
11965 }
11966 case ISD::SETOGT:
11967 case ISD::SETOGE:
11968 case ISD::SETGT:
11969 case ISD::SETGE:
11970 case ISD::SETUGT:
11971 case ISD::SETUGE: {
11972 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11973 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11974 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11975
11976 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11977 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11978 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11979 return SDValue();
11980 }
11981 default:
11982 return SDValue();
11983 }
11984}
11985
11986// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11987SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
11988 const unsigned Opcode = N->getOpcode();
11989 if (Opcode != ISD::SRA && Opcode != ISD::SRL)
11990 return SDValue();
11991
11992 EVT VT = N->getValueType(0);
11993 bool IsUnsigned = Opcode == ISD::SRL;
11994
11995 // Captured values.
11996 SDValue A, B, Add;
11997
11998 // Match floor average as it is common to both floor/ceil avgs.
11999 if (sd_match(N, m_BinOp(Opcode,
12001 m_One()))) {
12002 // Decide whether signed or unsigned.
12003 unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
12004 if (!hasOperation(FloorISD, VT))
12005 return SDValue();
12006
12007 // Can't optimize adds that may wrap.
12008 if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
12009 (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
12010 return SDValue();
12011
12012 return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B});
12013 }
12014
12015 return SDValue();
12016}
12017
12018SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
12019 unsigned Opc = N->getOpcode();
12020 SDValue X, Y, Z;
12021 if (sd_match(
12023 return DAG.getNode(Opc, DL, VT, X,
12024 DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
12025
12027 m_Value(Z)))))
12028 return DAG.getNode(Opc, DL, VT, X,
12029 DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
12030
12031 return SDValue();
12032}
12033
12034/// Generate Min/Max node
12035SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
12036 SDValue RHS, SDValue True,
12037 SDValue False, ISD::CondCode CC) {
12038 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
12039 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
12040
12041 // If we can't directly match this, try to see if we can pull an fneg out of
12042 // the select.
12044 True, DAG, LegalOperations, ForCodeSize);
12045 if (!NegTrue)
12046 return SDValue();
12047
12048 HandleSDNode NegTrueHandle(NegTrue);
12049
12050 // Try to unfold an fneg from the select if we are comparing the negated
12051 // constant.
12052 //
12053 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
12054 //
12055 // TODO: Handle fabs
12056 if (LHS == NegTrue) {
12057 // If we can't directly match this, try to see if we can pull an fneg out of
12058 // the select.
12060 RHS, DAG, LegalOperations, ForCodeSize);
12061 if (NegRHS) {
12062 HandleSDNode NegRHSHandle(NegRHS);
12063 if (NegRHS == False) {
12064 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
12065 False, CC, TLI, DAG);
12066 if (Combined)
12067 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
12068 }
12069 }
12070 }
12071
12072 return SDValue();
12073}
12074
12075/// If a (v)select has a condition value that is a sign-bit test, try to smear
12076/// the condition operand sign-bit across the value width and use it as a mask.
12078 SelectionDAG &DAG) {
12079 SDValue Cond = N->getOperand(0);
12080 SDValue C1 = N->getOperand(1);
12081 SDValue C2 = N->getOperand(2);
12083 return SDValue();
12084
12085 EVT VT = N->getValueType(0);
12086 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
12087 VT != Cond.getOperand(0).getValueType())
12088 return SDValue();
12089
12090 // The inverted-condition + commuted-select variants of these patterns are
12091 // canonicalized to these forms in IR.
12092 SDValue X = Cond.getOperand(0);
12093 SDValue CondC = Cond.getOperand(1);
12094 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12095 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
12097 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
12098 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12099 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12100 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
12101 }
12102 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
12103 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
12104 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
12105 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
12106 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
12107 }
12108 return SDValue();
12109}
12110
12112 const TargetLowering &TLI) {
12113 if (!TLI.convertSelectOfConstantsToMath(VT))
12114 return false;
12115
12116 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
12117 return true;
12119 return true;
12120
12121 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12122 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
12123 return true;
12124 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
12125 return true;
12126
12127 return false;
12128}
12129
12130SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
12131 SDValue Cond = N->getOperand(0);
12132 SDValue N1 = N->getOperand(1);
12133 SDValue N2 = N->getOperand(2);
12134 EVT VT = N->getValueType(0);
12135 EVT CondVT = Cond.getValueType();
12136 SDLoc DL(N);
12137
12138 if (!VT.isInteger())
12139 return SDValue();
12140
12141 auto *C1 = dyn_cast<ConstantSDNode>(N1);
12142 auto *C2 = dyn_cast<ConstantSDNode>(N2);
12143 if (!C1 || !C2)
12144 return SDValue();
12145
12146 if (CondVT != MVT::i1 || LegalOperations) {
12147 // fold (select Cond, 0, 1) -> (xor Cond, 1)
12148 // We can't do this reliably if integer based booleans have different contents
12149 // to floating point based booleans. This is because we can't tell whether we
12150 // have an integer-based boolean or a floating-point-based boolean unless we
12151 // can find the SETCC that produced it and inspect its operands. This is
12152 // fairly easy if C is the SETCC node, but it can potentially be
12153 // undiscoverable (or not reasonably discoverable). For example, it could be
12154 // in another basic block or it could require searching a complicated
12155 // expression.
12156 if (CondVT.isInteger() &&
12157 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
12159 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
12161 C1->isZero() && C2->isOne()) {
12162 SDValue NotCond =
12163 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
12164 if (VT.bitsEq(CondVT))
12165 return NotCond;
12166 return DAG.getZExtOrTrunc(NotCond, DL, VT);
12167 }
12168
12169 return SDValue();
12170 }
12171
12172 // Only do this before legalization to avoid conflicting with target-specific
12173 // transforms in the other direction (create a select from a zext/sext). There
12174 // is also a target-independent combine here in DAGCombiner in the other
12175 // direction for (select Cond, -1, 0) when the condition is not i1.
12176 assert(CondVT == MVT::i1 && !LegalOperations);
12177
12178 // select Cond, 1, 0 --> zext (Cond)
12179 if (C1->isOne() && C2->isZero())
12180 return DAG.getZExtOrTrunc(Cond, DL, VT);
12181
12182 // select Cond, -1, 0 --> sext (Cond)
12183 if (C1->isAllOnes() && C2->isZero())
12184 return DAG.getSExtOrTrunc(Cond, DL, VT);
12185
12186 // select Cond, 0, 1 --> zext (!Cond)
12187 if (C1->isZero() && C2->isOne()) {
12188 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12189 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
12190 return NotCond;
12191 }
12192
12193 // select Cond, 0, -1 --> sext (!Cond)
12194 if (C1->isZero() && C2->isAllOnes()) {
12195 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12196 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12197 return NotCond;
12198 }
12199
12200 // Use a target hook because some targets may prefer to transform in the
12201 // other direction.
12203 return SDValue();
12204
12205 // For any constants that differ by 1, we can transform the select into
12206 // an extend and add.
12207 const APInt &C1Val = C1->getAPIntValue();
12208 const APInt &C2Val = C2->getAPIntValue();
12209
12210 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
12211 if (C1Val - 1 == C2Val) {
12212 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12213 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12214 }
12215
12216 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
12217 if (C1Val + 1 == C2Val) {
12218 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12219 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
12220 }
12221
12222 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
12223 if (C1Val.isPowerOf2() && C2Val.isZero()) {
12224 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
12225 SDValue ShAmtC =
12226 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
12227 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
12228 }
12229
12230 // select Cond, -1, C --> or (sext Cond), C
12231 if (C1->isAllOnes()) {
12232 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
12233 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
12234 }
12235
12236 // select Cond, C, -1 --> or (sext (not Cond)), C
12237 if (C2->isAllOnes()) {
12238 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
12239 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
12240 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
12241 }
12242
12244 return V;
12245
12246 return SDValue();
12247}
12248
12249template <class MatchContextClass>
12251 SelectionDAG &DAG) {
12252 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
12253 N->getOpcode() == ISD::VP_SELECT) &&
12254 "Expected a (v)(vp.)select");
12255 SDValue Cond = N->getOperand(0);
12256 SDValue T = N->getOperand(1), F = N->getOperand(2);
12257 EVT VT = N->getValueType(0);
12258 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12259 MatchContextClass matcher(DAG, TLI, N);
12260
12261 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
12262 return SDValue();
12263
12264 // select Cond, Cond, F --> or Cond, freeze(F)
12265 // select Cond, 1, F --> or Cond, freeze(F)
12266 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
12267 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
12268
12269 // select Cond, T, Cond --> and Cond, freeze(T)
12270 // select Cond, T, 0 --> and Cond, freeze(T)
12271 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
12272 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
12273
12274 // select Cond, T, 1 --> or (not Cond), freeze(T)
12275 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
12276 SDValue NotCond =
12277 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12278 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
12279 }
12280
12281 // select Cond, 0, F --> and (not Cond), freeze(F)
12282 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
12283 SDValue NotCond =
12284 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
12285 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
12286 }
12287
12288 return SDValue();
12289}
12290
12292 SDValue N0 = N->getOperand(0);
12293 SDValue N1 = N->getOperand(1);
12294 SDValue N2 = N->getOperand(2);
12295 EVT VT = N->getValueType(0);
12296 unsigned EltSizeInBits = VT.getScalarSizeInBits();
12297
12298 SDValue Cond0, Cond1;
12299 ISD::CondCode CC;
12300 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
12301 m_CondCode(CC)))) ||
12302 VT != Cond0.getValueType())
12303 return SDValue();
12304
12305 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
12306 // compare is inverted from that pattern ("Cond0 s> -1").
12307 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
12308 ; // This is the pattern we are looking for.
12309 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
12310 std::swap(N1, N2);
12311 else
12312 return SDValue();
12313
12314 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
12315 if (isNullOrNullSplat(N2)) {
12316 SDLoc DL(N);
12317 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12318 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12319 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
12320 }
12321
12322 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
12323 if (isAllOnesOrAllOnesSplat(N1)) {
12324 SDLoc DL(N);
12325 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12326 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12327 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
12328 }
12329
12330 // If we have to invert the sign bit mask, only do that transform if the
12331 // target has a bitwise 'and not' instruction (the invert is free).
12332 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
12333 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12334 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
12335 SDLoc DL(N);
12336 SDValue ShiftAmt = DAG.getShiftAmountConstant(EltSizeInBits - 1, VT, DL);
12337 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
12338 SDValue Not = DAG.getNOT(DL, Sra, VT);
12339 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
12340 }
12341
12342 // TODO: There's another pattern in this family, but it may require
12343 // implementing hasOrNot() to check for profitability:
12344 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
12345
12346 return SDValue();
12347}
12348
12349// Match SELECTs with absolute difference patterns.
12350// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12351// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12352// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12353// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12354SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
12355 SDValue False, ISD::CondCode CC,
12356 const SDLoc &DL) {
12357 bool IsSigned = isSignedIntSetCC(CC);
12358 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12359 EVT VT = LHS.getValueType();
12360
12361 if (LegalOperations && !hasOperation(ABDOpc, VT))
12362 return SDValue();
12363
12364 switch (CC) {
12365 case ISD::SETGT:
12366 case ISD::SETGE:
12367 case ISD::SETUGT:
12368 case ISD::SETUGE:
12369 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12371 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12372 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12373 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12374 hasOperation(ABDOpc, VT))
12375 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12376 break;
12377 case ISD::SETLT:
12378 case ISD::SETLE:
12379 case ISD::SETULT:
12380 case ISD::SETULE:
12381 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12383 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12384 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
12385 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
12386 hasOperation(ABDOpc, VT))
12387 return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
12388 break;
12389 default:
12390 break;
12391 }
12392
12393 return SDValue();
12394}
12395
12396// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12397// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12398SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True,
12399 SDValue False, ISD::CondCode CC,
12400 const SDLoc &DL) {
12401 APInt C;
12402 EVT VT = True.getValueType();
12403 if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) {
12404 if (CC == ISD::SETUGT && LHS == False &&
12405 sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) {
12406 SDValue AddC = DAG.getConstant(~C, DL, VT);
12407 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC);
12408 return DAG.getNode(ISD::UMIN, DL, VT, Add, False);
12409 }
12410 if (CC == ISD::SETULT && LHS == True &&
12411 sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) {
12412 SDValue AddC = DAG.getConstant(-C, DL, VT);
12413 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC);
12414 return DAG.getNode(ISD::UMIN, DL, VT, True, Add);
12415 }
12416 }
12417 return SDValue();
12418}
12419
12420SDValue DAGCombiner::visitSELECT(SDNode *N) {
12421 SDValue N0 = N->getOperand(0);
12422 SDValue N1 = N->getOperand(1);
12423 SDValue N2 = N->getOperand(2);
12424 EVT VT = N->getValueType(0);
12425 EVT VT0 = N0.getValueType();
12426 SDLoc DL(N);
12427 SDNodeFlags Flags = N->getFlags();
12428
12429 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12430 return V;
12431
12433 return V;
12434
12435 // select (not Cond), N1, N2 -> select Cond, N2, N1
12436 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12437 return DAG.getSelect(DL, VT, F, N2, N1, Flags);
12438
12439 if (SDValue V = foldSelectOfConstants(N))
12440 return V;
12441
12442 // If we can fold this based on the true/false value, do so.
12443 if (SimplifySelectOps(N, N1, N2))
12444 return SDValue(N, 0); // Don't revisit N.
12445
12446 if (VT0 == MVT::i1) {
12447 // The code in this block deals with the following 2 equivalences:
12448 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
12449 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
12450 // The target can specify its preferred form with the
12451 // shouldNormalizeToSelectSequence() callback. However we always transform
12452 // to the right anyway if we find the inner select exists in the DAG anyway
12453 // and we always transform to the left side if we know that we can further
12454 // optimize the combination of the conditions.
12455 bool normalizeToSequence =
12457 // select (and Cond0, Cond1), X, Y
12458 // -> select Cond0, (select Cond1, X, Y), Y
12459 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
12460 SDValue Cond0 = N0->getOperand(0);
12461 SDValue Cond1 = N0->getOperand(1);
12462 SDValue InnerSelect =
12463 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
12464 if (normalizeToSequence || !InnerSelect.use_empty())
12465 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
12466 InnerSelect, N2, Flags);
12467 // Cleanup on failure.
12468 if (InnerSelect.use_empty())
12469 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12470 }
12471 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
12472 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
12473 SDValue Cond0 = N0->getOperand(0);
12474 SDValue Cond1 = N0->getOperand(1);
12475 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
12476 Cond1, N1, N2, Flags);
12477 if (normalizeToSequence || !InnerSelect.use_empty())
12478 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
12479 InnerSelect, Flags);
12480 // Cleanup on failure.
12481 if (InnerSelect.use_empty())
12482 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
12483 }
12484
12485 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
12486 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
12487 SDValue N1_0 = N1->getOperand(0);
12488 SDValue N1_1 = N1->getOperand(1);
12489 SDValue N1_2 = N1->getOperand(2);
12490 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
12491 // Create the actual and node if we can generate good code for it.
12492 if (!normalizeToSequence) {
12493 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
12494 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
12495 N2, Flags);
12496 }
12497 // Otherwise see if we can optimize the "and" to a better pattern.
12498 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
12499 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
12500 N2, Flags);
12501 }
12502 }
12503 }
12504 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
12505 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
12506 SDValue N2_0 = N2->getOperand(0);
12507 SDValue N2_1 = N2->getOperand(1);
12508 SDValue N2_2 = N2->getOperand(2);
12509 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
12510 // Create the actual or node if we can generate good code for it.
12511 if (!normalizeToSequence) {
12512 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
12513 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
12514 N2_2, Flags);
12515 }
12516 // Otherwise see if we can optimize to a better pattern.
12517 if (SDValue Combined = visitORLike(N0, N2_0, DL))
12518 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
12519 N2_2, Flags);
12520 }
12521 }
12522
12523 // select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
12524 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12525 N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
12526 N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12527 N2.getOperand(1) == N1.getOperand(0) &&
12528 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12529 return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
12530
12531 // select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
12532 if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
12533 N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
12534 N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
12535 N2.getOperand(1) == N1.getOperand(0) &&
12536 (!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
12537 return DAG.getNegative(
12538 DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
12539 DL, VT);
12540 }
12541
12542 // Fold selects based on a setcc into other things, such as min/max/abs.
12543 if (N0.getOpcode() == ISD::SETCC) {
12544 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
12546
12547 // select (fcmp lt x, y), x, y -> fminnum x, y
12548 // select (fcmp gt x, y), x, y -> fmaxnum x, y
12549 //
12550 // This is OK if we don't care what happens if either operand is a NaN.
12551 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
12552 if (SDValue FMinMax =
12553 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
12554 return FMinMax;
12555
12556 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
12557 // This is conservatively limited to pre-legal-operations to give targets
12558 // a chance to reverse the transform if they want to do that. Also, it is
12559 // unlikely that the pattern would be formed late, so it's probably not
12560 // worth going through the other checks.
12561 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
12562 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
12563 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
12564 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
12565 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
12566 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
12567 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
12568 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
12569 //
12570 // The IR equivalent of this transform would have this form:
12571 // %a = add %x, C
12572 // %c = icmp ugt %x, ~C
12573 // %r = select %c, -1, %a
12574 // =>
12575 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
12576 // %u0 = extractvalue %u, 0
12577 // %u1 = extractvalue %u, 1
12578 // %r = select %u1, -1, %u0
12579 SDVTList VTs = DAG.getVTList(VT, VT0);
12580 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
12581 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
12582 }
12583 }
12584
12585 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
12586 (!LegalOperations &&
12588 // Any flags available in a select/setcc fold will be on the setcc as they
12589 // migrated from fcmp
12590 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
12591 N0.getOperand(2), N0->getFlags());
12592 }
12593
12594 if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
12595 return ABD;
12596
12597 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
12598 return NewSel;
12599
12600 // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
12601 // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
12602 if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL))
12603 return UMin;
12604 }
12605
12606 if (!VT.isVector())
12607 if (SDValue BinOp = foldSelectOfBinops(N))
12608 return BinOp;
12609
12610 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
12611 return R;
12612
12613 return SDValue();
12614}
12615
12616// This function assumes all the vselect's arguments are CONCAT_VECTOR
12617// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
12619 SDLoc DL(N);
12620 SDValue Cond = N->getOperand(0);
12621 SDValue LHS = N->getOperand(1);
12622 SDValue RHS = N->getOperand(2);
12623 EVT VT = N->getValueType(0);
12624 int NumElems = VT.getVectorNumElements();
12625 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
12626 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
12627 Cond.getOpcode() == ISD::BUILD_VECTOR);
12628
12629 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
12630 // binary ones here.
12631 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
12632 return SDValue();
12633
12634 // We're sure we have an even number of elements due to the
12635 // concat_vectors we have as arguments to vselect.
12636 // Skip BV elements until we find one that's not an UNDEF
12637 // After we find an UNDEF element, keep looping until we get to half the
12638 // length of the BV and see if all the non-undef nodes are the same.
12639 ConstantSDNode *BottomHalf = nullptr;
12640 for (int i = 0; i < NumElems / 2; ++i) {
12641 if (Cond->getOperand(i)->isUndef())
12642 continue;
12643
12644 if (BottomHalf == nullptr)
12645 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12646 else if (Cond->getOperand(i).getNode() != BottomHalf)
12647 return SDValue();
12648 }
12649
12650 // Do the same for the second half of the BuildVector
12651 ConstantSDNode *TopHalf = nullptr;
12652 for (int i = NumElems / 2; i < NumElems; ++i) {
12653 if (Cond->getOperand(i)->isUndef())
12654 continue;
12655
12656 if (TopHalf == nullptr)
12657 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
12658 else if (Cond->getOperand(i).getNode() != TopHalf)
12659 return SDValue();
12660 }
12661
12662 assert(TopHalf && BottomHalf &&
12663 "One half of the selector was all UNDEFs and the other was all the "
12664 "same value. This should have been addressed before this function.");
12665 return DAG.getNode(
12667 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
12668 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
12669}
12670
12671bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
12672 SelectionDAG &DAG, const SDLoc &DL) {
12673
12674 // Only perform the transformation when existing operands can be reused.
12675 if (IndexIsScaled)
12676 return false;
12677
12678 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
12679 return false;
12680
12681 EVT VT = BasePtr.getValueType();
12682
12683 if (SDValue SplatVal = DAG.getSplatValue(Index);
12684 SplatVal && !isNullConstant(SplatVal) &&
12685 SplatVal.getValueType() == VT) {
12686 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12687 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
12688 return true;
12689 }
12690
12691 if (Index.getOpcode() != ISD::ADD)
12692 return false;
12693
12694 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
12695 SplatVal && SplatVal.getValueType() == VT) {
12696 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12697 Index = Index.getOperand(1);
12698 return true;
12699 }
12700 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
12701 SplatVal && SplatVal.getValueType() == VT) {
12702 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
12703 Index = Index.getOperand(0);
12704 return true;
12705 }
12706 return false;
12707}
12708
12709// Fold sext/zext of index into index type.
12710bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
12711 SelectionDAG &DAG) {
12712 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12713
12714 // It's always safe to look through zero extends.
12715 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
12716 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12717 IndexType = ISD::UNSIGNED_SCALED;
12718 Index = Index.getOperand(0);
12719 return true;
12720 }
12721 if (ISD::isIndexTypeSigned(IndexType)) {
12722 IndexType = ISD::UNSIGNED_SCALED;
12723 return true;
12724 }
12725 }
12726
12727 // It's only safe to look through sign extends when Index is signed.
12728 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
12729 ISD::isIndexTypeSigned(IndexType) &&
12730 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
12731 Index = Index.getOperand(0);
12732 return true;
12733 }
12734
12735 return false;
12736}
12737
12738SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
12739 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
12740 SDValue Mask = MSC->getMask();
12741 SDValue Chain = MSC->getChain();
12742 SDValue Index = MSC->getIndex();
12743 SDValue Scale = MSC->getScale();
12744 SDValue StoreVal = MSC->getValue();
12745 SDValue BasePtr = MSC->getBasePtr();
12746 SDValue VL = MSC->getVectorLength();
12747 ISD::MemIndexType IndexType = MSC->getIndexType();
12748 SDLoc DL(N);
12749
12750 // Zap scatters with a zero mask.
12752 return Chain;
12753
12754 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12755 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12756 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12757 DL, Ops, MSC->getMemOperand(), IndexType);
12758 }
12759
12760 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12761 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
12762 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12763 DL, Ops, MSC->getMemOperand(), IndexType);
12764 }
12765
12766 return SDValue();
12767}
12768
12769SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
12770 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
12771 SDValue Mask = MSC->getMask();
12772 SDValue Chain = MSC->getChain();
12773 SDValue Index = MSC->getIndex();
12774 SDValue Scale = MSC->getScale();
12775 SDValue StoreVal = MSC->getValue();
12776 SDValue BasePtr = MSC->getBasePtr();
12777 ISD::MemIndexType IndexType = MSC->getIndexType();
12778 SDLoc DL(N);
12779
12780 // Zap scatters with a zero mask.
12782 return Chain;
12783
12784 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
12785 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12786 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12787 DL, Ops, MSC->getMemOperand(), IndexType,
12788 MSC->isTruncatingStore());
12789 }
12790
12791 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
12792 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
12793 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
12794 DL, Ops, MSC->getMemOperand(), IndexType,
12795 MSC->isTruncatingStore());
12796 }
12797
12798 return SDValue();
12799}
12800
12801SDValue DAGCombiner::visitMSTORE(SDNode *N) {
12802 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
12803 SDValue Mask = MST->getMask();
12804 SDValue Chain = MST->getChain();
12805 SDValue Value = MST->getValue();
12806 SDValue Ptr = MST->getBasePtr();
12807
12808 // Zap masked stores with a zero mask.
12810 return Chain;
12811
12812 // Remove a masked store if base pointers and masks are equal.
12813 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
12814 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
12815 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12816 !MST->getBasePtr().isUndef() &&
12817 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12818 MST1->getMemoryVT().getStoreSize()) ||
12820 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12821 MST->getMemoryVT().getStoreSize())) {
12822 CombineTo(MST1, MST1->getChain());
12823 if (N->getOpcode() != ISD::DELETED_NODE)
12824 AddToWorklist(N);
12825 return SDValue(N, 0);
12826 }
12827 }
12828
12829 // If this is a masked load with an all ones mask, we can use a unmasked load.
12830 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12831 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12832 !MST->isCompressingStore() && !MST->isTruncatingStore())
12833 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12834 MST->getBasePtr(), MST->getPointerInfo(),
12835 MST->getBaseAlign(), MST->getMemOperand()->getFlags(),
12836 MST->getAAInfo());
12837
12838 // Try transforming N to an indexed store.
12839 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12840 return SDValue(N, 0);
12841
12842 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12843 Value.getValueType().isInteger() &&
12845 !cast<ConstantSDNode>(Value)->isOpaque())) {
12846 APInt TruncDemandedBits =
12847 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12849
12850 // See if we can simplify the operation with
12851 // SimplifyDemandedBits, which only works if the value has a single use.
12852 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12853 // Re-visit the store if anything changed and the store hasn't been merged
12854 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12855 // node back to the worklist if necessary, but we also need to re-visit
12856 // the Store node itself.
12857 if (N->getOpcode() != ISD::DELETED_NODE)
12858 AddToWorklist(N);
12859 return SDValue(N, 0);
12860 }
12861 }
12862
12863 // If this is a TRUNC followed by a masked store, fold this into a masked
12864 // truncating store. We can do this even if this is already a masked
12865 // truncstore.
12866 // TODO: Try combine to masked compress store if possiable.
12867 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12868 MST->isUnindexed() && !MST->isCompressingStore() &&
12869 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12870 MST->getMemoryVT(), LegalOperations)) {
12871 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12872 Value.getOperand(0).getValueType());
12873 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12874 MST->getOffset(), Mask, MST->getMemoryVT(),
12875 MST->getMemOperand(), MST->getAddressingMode(),
12876 /*IsTruncating=*/true);
12877 }
12878
12879 return SDValue();
12880}
12881
12882SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12883 auto *SST = cast<VPStridedStoreSDNode>(N);
12884 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12885 // Combine strided stores with unit-stride to a regular VP store.
12886 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12887 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12888 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12889 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12890 SST->getVectorLength(), SST->getMemoryVT(),
12891 SST->getMemOperand(), SST->getAddressingMode(),
12892 SST->isTruncatingStore(), SST->isCompressingStore());
12893 }
12894 return SDValue();
12895}
12896
12897SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12898 SDLoc DL(N);
12899 SDValue Vec = N->getOperand(0);
12900 SDValue Mask = N->getOperand(1);
12901 SDValue Passthru = N->getOperand(2);
12902 EVT VecVT = Vec.getValueType();
12903
12904 bool HasPassthru = !Passthru.isUndef();
12905
12906 APInt SplatVal;
12907 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12908 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12909
12910 if (Vec.isUndef() || Mask.isUndef())
12911 return Passthru;
12912
12913 // No need for potentially expensive compress if the mask is constant.
12916 EVT ScalarVT = VecVT.getVectorElementType();
12917 unsigned NumSelected = 0;
12918 unsigned NumElmts = VecVT.getVectorNumElements();
12919 for (unsigned I = 0; I < NumElmts; ++I) {
12920 SDValue MaskI = Mask.getOperand(I);
12921 // We treat undef mask entries as "false".
12922 if (MaskI.isUndef())
12923 continue;
12924
12925 if (TLI.isConstTrueVal(MaskI)) {
12926 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12927 DAG.getVectorIdxConstant(I, DL));
12928 Ops.push_back(VecI);
12929 NumSelected++;
12930 }
12931 }
12932 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12933 SDValue Val =
12934 HasPassthru
12935 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12936 DAG.getVectorIdxConstant(Rest, DL))
12937 : DAG.getUNDEF(ScalarVT);
12938 Ops.push_back(Val);
12939 }
12940 return DAG.getBuildVector(VecVT, DL, Ops);
12941 }
12942
12943 return SDValue();
12944}
12945
12946SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12947 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12948 SDValue Mask = MGT->getMask();
12949 SDValue Chain = MGT->getChain();
12950 SDValue Index = MGT->getIndex();
12951 SDValue Scale = MGT->getScale();
12952 SDValue BasePtr = MGT->getBasePtr();
12953 SDValue VL = MGT->getVectorLength();
12954 ISD::MemIndexType IndexType = MGT->getIndexType();
12955 SDLoc DL(N);
12956
12957 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12958 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12959 return DAG.getGatherVP(
12960 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12961 Ops, MGT->getMemOperand(), IndexType);
12962 }
12963
12964 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12965 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12966 return DAG.getGatherVP(
12967 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12968 Ops, MGT->getMemOperand(), IndexType);
12969 }
12970
12971 return SDValue();
12972}
12973
12974SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12975 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12976 SDValue Mask = MGT->getMask();
12977 SDValue Chain = MGT->getChain();
12978 SDValue Index = MGT->getIndex();
12979 SDValue Scale = MGT->getScale();
12980 SDValue PassThru = MGT->getPassThru();
12981 SDValue BasePtr = MGT->getBasePtr();
12982 ISD::MemIndexType IndexType = MGT->getIndexType();
12983 SDLoc DL(N);
12984
12985 // Zap gathers with a zero mask.
12987 return CombineTo(N, PassThru, MGT->getChain());
12988
12989 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12990 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12991 return DAG.getMaskedGather(
12992 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12993 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12994 }
12995
12996 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12997 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12998 return DAG.getMaskedGather(
12999 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
13000 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
13001 }
13002
13003 return SDValue();
13004}
13005
13006SDValue DAGCombiner::visitMLOAD(SDNode *N) {
13007 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
13008 SDValue Mask = MLD->getMask();
13009
13010 // Zap masked loads with a zero mask.
13012 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
13013
13014 // If this is a masked load with an all ones mask, we can use a unmasked load.
13015 // FIXME: Can we do this for indexed, expanding, or extending loads?
13016 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
13017 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
13018 SDValue NewLd = DAG.getLoad(
13019 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
13020 MLD->getPointerInfo(), MLD->getBaseAlign(),
13021 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
13022 return CombineTo(N, NewLd, NewLd.getValue(1));
13023 }
13024
13025 // Try transforming N to an indexed load.
13026 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13027 return SDValue(N, 0);
13028
13029 return SDValue();
13030}
13031
13032SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
13033 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
13034 SDValue Chain = HG->getChain();
13035 SDValue Inc = HG->getInc();
13036 SDValue Mask = HG->getMask();
13037 SDValue BasePtr = HG->getBasePtr();
13038 SDValue Index = HG->getIndex();
13039 SDLoc DL(HG);
13040
13041 EVT MemVT = HG->getMemoryVT();
13042 EVT DataVT = Index.getValueType();
13043 MachineMemOperand *MMO = HG->getMemOperand();
13044 ISD::MemIndexType IndexType = HG->getIndexType();
13045
13047 return Chain;
13048
13049 if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
13050 refineIndexType(Index, IndexType, DataVT, DAG)) {
13051 SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
13052 HG->getScale(), HG->getIntID()};
13053 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
13054 MMO, IndexType);
13055 }
13056
13057 return SDValue();
13058}
13059
13060SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
13061 if (SDValue Res = foldPartialReduceMLAMulOp(N))
13062 return Res;
13063 if (SDValue Res = foldPartialReduceAdd(N))
13064 return Res;
13065 return SDValue();
13066}
13067
13068// partial_reduce_*mla(acc, mul(*ext(a), *ext(b)), splat(1))
13069// -> partial_reduce_*mla(acc, a, b)
13070//
13071// partial_reduce_*mla(acc, mul(*ext(x), splat(C)), splat(1))
13072// -> partial_reduce_*mla(acc, x, splat(C))
13073//
13074// partial_reduce_*mla(acc, sel(p, mul(*ext(a), *ext(b)), splat(0)), splat(1))
13075// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), b)
13076//
13077// partial_reduce_*mla(acc, sel(p, mul(*ext(a), splat(C)), splat(0)), splat(1))
13078// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), splat(C))
13079SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
13080 SDLoc DL(N);
13081 auto *Context = DAG.getContext();
13082 SDValue Acc = N->getOperand(0);
13083 SDValue Op1 = N->getOperand(1);
13084 SDValue Op2 = N->getOperand(2);
13085 unsigned Opc = Op1->getOpcode();
13086
13087 // Handle predication by moving the SELECT into the operand of the MUL.
13088 SDValue Pred;
13089 if (Opc == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) ||
13090 isZeroOrZeroSplatFP(Op1->getOperand(2)))) {
13091 Pred = Op1->getOperand(0);
13092 Op1 = Op1->getOperand(1);
13093 Opc = Op1->getOpcode();
13094 }
13095
13096 if (Opc != ISD::MUL && Opc != ISD::FMUL && Opc != ISD::SHL)
13097 return SDValue();
13098
13099 SDValue LHS = Op1->getOperand(0);
13100 SDValue RHS = Op1->getOperand(1);
13101
13102 // Try to treat (shl %a, %c) as (mul %a, (1 << %c)) for constant %c.
13103 if (Opc == ISD::SHL) {
13104 APInt C;
13105 if (!ISD::isConstantSplatVector(RHS.getNode(), C))
13106 return SDValue();
13107
13108 RHS =
13109 DAG.getSplatVector(RHS.getValueType(), DL,
13110 DAG.getConstant(APInt(C.getBitWidth(), 1).shl(C), DL,
13111 RHS.getValueType().getScalarType()));
13112 Opc = ISD::MUL;
13113 }
13114
13115 if (!(Opc == ISD::MUL && llvm::isOneOrOneSplat(Op2)) &&
13117 return SDValue();
13118
13119 auto IsIntOrFPExtOpcode = [](unsigned int Opcode) {
13120 return (ISD::isExtOpcode(Opcode) || Opcode == ISD::FP_EXTEND);
13121 };
13122
13123 unsigned LHSOpcode = LHS->getOpcode();
13124 if (!IsIntOrFPExtOpcode(LHSOpcode))
13125 return SDValue();
13126
13127 SDValue LHSExtOp = LHS->getOperand(0);
13128 EVT LHSExtOpVT = LHSExtOp.getValueType();
13129
13130 // When Pred is non-zero, set Op = select(Pred, Op, splat(0)) and freeze
13131 // OtherOp to keep the same semantics when moving the selects into the MUL
13132 // operands.
13133 auto ApplyPredicate = [&](SDValue &Op, SDValue &OtherOp) {
13134 if (Pred) {
13135 EVT OpVT = Op.getValueType();
13136 SDValue Zero = OpVT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, OpVT)
13137 : DAG.getConstant(0, DL, OpVT);
13138 Op = DAG.getSelect(DL, OpVT, Pred, Op, Zero);
13139 OtherOp = DAG.getFreeze(OtherOp);
13140 }
13141 };
13142
13143 // partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
13144 // -> partial_reduce_*mla(acc, x, C)
13145 APInt C;
13146 if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
13147 // TODO: Make use of partial_reduce_sumla here
13148 APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
13149 unsigned LHSBits = LHS.getValueType().getScalarSizeInBits();
13150 if ((LHSOpcode != ISD::ZERO_EXTEND || CTrunc.zext(LHSBits) != C) &&
13151 (LHSOpcode != ISD::SIGN_EXTEND || CTrunc.sext(LHSBits) != C))
13152 return SDValue();
13153
13154 unsigned NewOpcode = LHSOpcode == ISD::SIGN_EXTEND
13157
13158 // Only perform these combines if the target supports folding
13159 // the extends into the operation.
13161 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13162 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13163 return SDValue();
13164
13165 SDValue C = DAG.getConstant(CTrunc, DL, LHSExtOpVT);
13166 ApplyPredicate(C, LHSExtOp);
13167 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp, C);
13168 }
13169
13170 unsigned RHSOpcode = RHS->getOpcode();
13171 if (!IsIntOrFPExtOpcode(RHSOpcode))
13172 return SDValue();
13173
13174 SDValue RHSExtOp = RHS->getOperand(0);
13175 if (LHSExtOpVT != RHSExtOp.getValueType())
13176 return SDValue();
13177
13178 unsigned NewOpc;
13179 if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::SIGN_EXTEND)
13180 NewOpc = ISD::PARTIAL_REDUCE_SMLA;
13181 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13182 NewOpc = ISD::PARTIAL_REDUCE_UMLA;
13183 else if (LHSOpcode == ISD::SIGN_EXTEND && RHSOpcode == ISD::ZERO_EXTEND)
13185 else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
13187 std::swap(LHSExtOp, RHSExtOp);
13188 } else if (LHSOpcode == ISD::FP_EXTEND && RHSOpcode == ISD::FP_EXTEND) {
13189 NewOpc = ISD::PARTIAL_REDUCE_FMLA;
13190 } else
13191 return SDValue();
13192 // For a 2-stage extend the signedness of both of the extends must match
13193 // If the mul has the same type, there is no outer extend, and thus we
13194 // can simply use the inner extends to pick the result node.
13195 // TODO: extend to handle nonneg zext as sext
13196 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13197 if (Op1.getValueType().getVectorElementType() != AccElemVT &&
13198 NewOpc != N->getOpcode())
13199 return SDValue();
13200
13201 // Only perform these combines if the target supports folding
13202 // the extends into the operation.
13204 NewOpc, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13205 TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
13206 return SDValue();
13207
13208 ApplyPredicate(RHSExtOp, LHSExtOp);
13209 return DAG.getNode(NewOpc, DL, N->getValueType(0), Acc, LHSExtOp, RHSExtOp);
13210}
13211
13212// partial.reduce.*mla(acc, *ext(op), splat(1))
13213// -> partial.reduce.*mla(acc, op, splat(trunc(1)))
13214// partial.reduce.sumla(acc, sext(op), splat(1))
13215// -> partial.reduce.smla(acc, op, splat(trunc(1)))
13216//
13217// partial.reduce.*mla(acc, sel(p, *ext(op), splat(0)), splat(1))
13218// -> partial.reduce.*mla(acc, sel(p, op, splat(0)), splat(trunc(1)))
13219SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
13220 SDLoc DL(N);
13221 SDValue Acc = N->getOperand(0);
13222 SDValue Op1 = N->getOperand(1);
13223 SDValue Op2 = N->getOperand(2);
13224
13226 return SDValue();
13227
13228 SDValue Pred;
13229 unsigned Op1Opcode = Op1.getOpcode();
13230 if (Op1Opcode == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) ||
13231 isZeroOrZeroSplatFP(Op1->getOperand(2)))) {
13232 Pred = Op1->getOperand(0);
13233 Op1 = Op1->getOperand(1);
13234 Op1Opcode = Op1->getOpcode();
13235 }
13236
13237 if (!ISD::isExtOpcode(Op1Opcode) && Op1Opcode != ISD::FP_EXTEND)
13238 return SDValue();
13239
13240 bool Op1IsSigned =
13241 Op1Opcode == ISD::SIGN_EXTEND || Op1Opcode == ISD::FP_EXTEND;
13242 bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
13243 EVT AccElemVT = Acc.getValueType().getVectorElementType();
13244 if (Op1IsSigned != NodeIsSigned &&
13245 Op1.getValueType().getVectorElementType() != AccElemVT)
13246 return SDValue();
13247
13248 unsigned NewOpcode = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13250 : Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA
13252
13253 SDValue UnextOp1 = Op1.getOperand(0);
13254 EVT UnextOp1VT = UnextOp1.getValueType();
13255 auto *Context = DAG.getContext();
13257 NewOpcode, TLI.getTypeToTransformTo(*Context, N->getValueType(0)),
13258 TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
13259 return SDValue();
13260
13261 SDValue Constant = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13262 ? DAG.getConstantFP(1, DL, UnextOp1VT)
13263 : DAG.getConstant(1, DL, UnextOp1VT);
13264
13265 if (Pred) {
13266 SDValue Zero = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
13267 ? DAG.getConstantFP(0, DL, UnextOp1VT)
13268 : DAG.getConstant(0, DL, UnextOp1VT);
13269 Constant = DAG.getSelect(DL, UnextOp1VT, Pred, Constant, Zero);
13270 }
13271 return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1,
13272 Constant);
13273}
13274
13275SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
13276 auto *SLD = cast<VPStridedLoadSDNode>(N);
13277 EVT EltVT = SLD->getValueType(0).getVectorElementType();
13278 // Combine strided loads with unit-stride to a regular VP load.
13279 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
13280 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
13281 SDValue NewLd = DAG.getLoadVP(
13282 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
13283 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
13284 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
13285 SLD->getMemOperand(), SLD->isExpandingLoad());
13286 return CombineTo(N, NewLd, NewLd.getValue(1));
13287 }
13288 return SDValue();
13289}
13290
13291/// A vector select of 2 constant vectors can be simplified to math/logic to
13292/// avoid a variable select instruction and possibly avoid constant loads.
13293SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
13294 SDValue Cond = N->getOperand(0);
13295 SDValue N1 = N->getOperand(1);
13296 SDValue N2 = N->getOperand(2);
13297 EVT VT = N->getValueType(0);
13298 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
13302 return SDValue();
13303
13304 // Check if we can use the condition value to increment/decrement a single
13305 // constant value. This simplifies a select to an add and removes a constant
13306 // load/materialization from the general case.
13307 bool AllAddOne = true;
13308 bool AllSubOne = true;
13309 unsigned Elts = VT.getVectorNumElements();
13310 for (unsigned i = 0; i != Elts; ++i) {
13311 SDValue N1Elt = N1.getOperand(i);
13312 SDValue N2Elt = N2.getOperand(i);
13313 if (N1Elt.isUndef())
13314 continue;
13315 // N2 should not contain undef values since it will be reused in the fold.
13316 if (N2Elt.isUndef() || N1Elt.getValueType() != N2Elt.getValueType()) {
13317 AllAddOne = false;
13318 AllSubOne = false;
13319 break;
13320 }
13321
13322 const APInt &C1 = N1Elt->getAsAPIntVal();
13323 const APInt &C2 = N2Elt->getAsAPIntVal();
13324 if (C1 != C2 + 1)
13325 AllAddOne = false;
13326 if (C1 != C2 - 1)
13327 AllSubOne = false;
13328 }
13329
13330 // Further simplifications for the extra-special cases where the constants are
13331 // all 0 or all -1 should be implemented as folds of these patterns.
13332 SDLoc DL(N);
13333 if (AllAddOne || AllSubOne) {
13334 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
13335 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
13336 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
13337 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
13338 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
13339 }
13340
13341 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
13342 APInt Pow2C;
13343 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
13344 isNullOrNullSplat(N2)) {
13345 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
13346 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
13347 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
13348 }
13349
13351 return V;
13352
13353 // The general case for select-of-constants:
13354 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
13355 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
13356 // leave that to a machine-specific pass.
13357 return SDValue();
13358}
13359
13360SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
13361 SDValue N0 = N->getOperand(0);
13362 SDValue N1 = N->getOperand(1);
13363 SDValue N2 = N->getOperand(2);
13364 SDLoc DL(N);
13365
13366 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13367 return V;
13368
13370 return V;
13371
13372 return SDValue();
13373}
13374
13376 SDValue FVal,
13377 const TargetLowering &TLI,
13378 SelectionDAG &DAG,
13379 const SDLoc &DL) {
13380 EVT VT = TVal.getValueType();
13381 if (!TLI.isTypeLegal(VT))
13382 return SDValue();
13383
13384 EVT CondVT = Cond.getValueType();
13385 assert(CondVT.isVector() && "Vector select expects a vector selector!");
13386
13387 bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode());
13388 bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode());
13389 bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode());
13390 bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode());
13391
13392 // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return
13393 if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne)
13394 return SDValue();
13395
13396 // select Cond, 0, 0 → 0
13397 if (IsTAllZero && IsFAllZero) {
13398 return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, VT)
13399 : DAG.getConstant(0, DL, VT);
13400 }
13401
13402 // check select(setgt lhs, -1), 1, -1 --> or (sra lhs, bitwidth - 1), 1
13403 APInt TValAPInt;
13404 if (Cond.getOpcode() == ISD::SETCC &&
13405 Cond.getOperand(2) == DAG.getCondCode(ISD::SETGT) &&
13406 Cond.getOperand(0).getValueType() == VT && VT.isSimple() &&
13407 ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
13408 TValAPInt.isOne() &&
13409 ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
13411 return SDValue();
13412 }
13413
13414 // To use the condition operand as a bitwise mask, it must have elements that
13415 // are the same size as the select elements. i.e, the condition operand must
13416 // have already been promoted from the IR select condition type <N x i1>.
13417 // Don't check if the types themselves are equal because that excludes
13418 // vector floating-point selects.
13419 if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
13420 return SDValue();
13421
13422 // Cond value must be 'sign splat' to be converted to a logical op.
13423 if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
13424 return SDValue();
13425
13426 // Try inverting Cond and swapping T/F if it gives all-ones/all-zeros form
13427 if (!IsTAllOne && !IsFAllZero && Cond.hasOneUse() &&
13428 Cond.getOpcode() == ISD::SETCC &&
13429 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
13430 CondVT) {
13431 if (IsTAllZero || IsFAllOne) {
13432 SDValue CC = Cond.getOperand(2);
13434 cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());
13435 Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
13436 InverseCC);
13437 std::swap(TVal, FVal);
13438 std::swap(IsTAllOne, IsFAllOne);
13439 std::swap(IsTAllZero, IsFAllZero);
13440 }
13441 }
13442
13444 "Select condition no longer all-sign bits");
13445
13446 // select Cond, -1, 0 → bitcast Cond
13447 if (IsTAllOne && IsFAllZero)
13448 return DAG.getBitcast(VT, Cond);
13449
13450 // select Cond, -1, x → or Cond, x
13451 if (IsTAllOne) {
13452 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13453 SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
13454 return DAG.getBitcast(VT, Or);
13455 }
13456
13457 // select Cond, x, 0 → and Cond, x
13458 if (IsFAllZero) {
13459 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
13460 SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
13461 return DAG.getBitcast(VT, And);
13462 }
13463
13464 // select Cond, 0, x -> and not(Cond), x
13465 if (IsTAllZero &&
13467 SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
13468 SDValue And =
13469 DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
13470 return DAG.getBitcast(VT, And);
13471 }
13472
13473 return SDValue();
13474}
13475
13476SDValue DAGCombiner::visitVSELECT(SDNode *N) {
13477 SDValue N0 = N->getOperand(0);
13478 SDValue N1 = N->getOperand(1);
13479 SDValue N2 = N->getOperand(2);
13480 EVT VT = N->getValueType(0);
13481 SDLoc DL(N);
13482
13483 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
13484 return V;
13485
13487 return V;
13488
13489 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
13490 if (!TLI.isTargetCanonicalSelect(N))
13491 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
13492 return DAG.getSelect(DL, VT, F, N2, N1);
13493
13494 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
13495 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
13498 TLI.getBooleanContents(N0.getValueType()) ==
13500 return DAG.getNode(
13501 ISD::ADD, DL, N1.getValueType(), N2,
13502 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
13503 }
13504
13505 // Canonicalize integer abs.
13506 // vselect (setg[te] X, 0), X, -X ->
13507 // vselect (setgt X, -1), X, -X ->
13508 // vselect (setl[te] X, 0), -X, X ->
13509 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
13510 if (N0.getOpcode() == ISD::SETCC) {
13511 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
13513 bool isAbs = false;
13514 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
13515
13516 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
13517 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
13518 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
13520 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
13521 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
13523
13524 if (isAbs) {
13526 return DAG.getNode(ISD::ABS, DL, VT, LHS);
13527
13528 SDValue Shift = DAG.getNode(
13529 ISD::SRA, DL, VT, LHS,
13530 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
13531 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
13532 AddToWorklist(Shift.getNode());
13533 AddToWorklist(Add.getNode());
13534 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
13535 }
13536
13537 // vselect x, y (fcmp lt x, y) -> fminnum x, y
13538 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
13539 //
13540 // This is OK if we don't care about what happens if either operand is a
13541 // NaN.
13542 //
13543 if (N0.hasOneUse() &&
13544 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
13545 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
13546 return FMinMax;
13547 }
13548
13549 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13550 return S;
13551 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
13552 return S;
13553
13554 // If this select has a condition (setcc) with narrower operands than the
13555 // select, try to widen the compare to match the select width.
13556 // TODO: This should be extended to handle any constant.
13557 // TODO: This could be extended to handle non-loading patterns, but that
13558 // requires thorough testing to avoid regressions.
13559 if (isNullOrNullSplat(RHS)) {
13560 EVT NarrowVT = LHS.getValueType();
13562 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
13563 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
13564 unsigned WideWidth = WideVT.getScalarSizeInBits();
13565 bool IsSigned = isSignedIntSetCC(CC);
13566 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13567 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
13568 SetCCWidth != 1 && SetCCWidth < WideWidth &&
13569 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
13570 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
13571 // Both compare operands can be widened for free. The LHS can use an
13572 // extended load, and the RHS is a constant:
13573 // vselect (ext (setcc load(X), C)), N1, N2 -->
13574 // vselect (setcc extload(X), C'), N1, N2
13575 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13576 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
13577 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
13578 EVT WideSetCCVT = getSetCCResultType(WideVT);
13579 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
13580 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
13581 }
13582 }
13583
13584 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
13585 return ABD;
13586
13587 // Match VSELECTs into add with unsigned saturation.
13588 if (hasOperation(ISD::UADDSAT, VT)) {
13589 // Check if one of the arms of the VSELECT is vector with all bits set.
13590 // If it's on the left side invert the predicate to simplify logic below.
13591 SDValue Other;
13592 ISD::CondCode SatCC = CC;
13594 Other = N2;
13595 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13596 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
13597 Other = N1;
13598 }
13599
13600 if (Other && Other.getOpcode() == ISD::ADD) {
13601 SDValue CondLHS = LHS, CondRHS = RHS;
13602 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13603
13604 // Canonicalize condition operands.
13605 if (SatCC == ISD::SETUGE) {
13606 std::swap(CondLHS, CondRHS);
13607 SatCC = ISD::SETULE;
13608 }
13609
13610 // We can test against either of the addition operands.
13611 // x <= x+y ? x+y : ~0 --> uaddsat x, y
13612 // x+y >= x ? x+y : ~0 --> uaddsat x, y
13613 if (SatCC == ISD::SETULE && Other == CondRHS &&
13614 (OpLHS == CondLHS || OpRHS == CondLHS))
13615 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13616
13617 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
13618 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13619 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
13620 CondLHS == OpLHS) {
13621 // If the RHS is a constant we have to reverse the const
13622 // canonicalization.
13623 // x >= ~C ? x+C : ~0 --> uaddsat x, C
13624 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13625 return Cond->getAPIntValue() == ~Op->getAPIntValue();
13626 };
13627 if (SatCC == ISD::SETULE &&
13628 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
13629 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
13630 }
13631 }
13632 }
13633
13634 // Match VSELECTs into sub with unsigned saturation.
13635 if (hasOperation(ISD::USUBSAT, VT)) {
13636 // Check if one of the arms of the VSELECT is a zero vector. If it's on
13637 // the left side invert the predicate to simplify logic below.
13638 SDValue Other;
13639 ISD::CondCode SatCC = CC;
13641 Other = N2;
13642 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
13644 Other = N1;
13645 }
13646
13647 // zext(x) >= y ? trunc(zext(x) - y) : 0
13648 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13649 // zext(x) > y ? trunc(zext(x) - y) : 0
13650 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
13651 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
13652 Other.getOperand(0).getOpcode() == ISD::SUB &&
13653 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
13654 SDValue OpLHS = Other.getOperand(0).getOperand(0);
13655 SDValue OpRHS = Other.getOperand(0).getOperand(1);
13656 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
13657 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
13658 DAG, DL))
13659 return R;
13660 }
13661
13662 if (Other && Other.getNumOperands() == 2) {
13663 SDValue CondRHS = RHS;
13664 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
13665
13666 if (OpLHS == LHS) {
13667 // Look for a general sub with unsigned saturation first.
13668 // x >= y ? x-y : 0 --> usubsat x, y
13669 // x > y ? x-y : 0 --> usubsat x, y
13670 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
13671 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
13672 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13673
13674 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
13675 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13676 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
13677 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
13678 // If the RHS is a constant we have to reverse the const
13679 // canonicalization.
13680 // x > C-1 ? x+-C : 0 --> usubsat x, C
13681 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
13682 return (!Op && !Cond) ||
13683 (Op && Cond &&
13684 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
13685 };
13686 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
13687 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
13688 /*AllowUndefs*/ true)) {
13689 OpRHS = DAG.getNegative(OpRHS, DL, VT);
13690 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13691 }
13692
13693 // Another special case: If C was a sign bit, the sub has been
13694 // canonicalized into a xor.
13695 // FIXME: Would it be better to use computeKnownBits to
13696 // determine whether it's safe to decanonicalize the xor?
13697 // x s< 0 ? x^C : 0 --> usubsat x, C
13698 APInt SplatValue;
13699 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
13700 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
13702 SplatValue.isSignMask()) {
13703 // Note that we have to rebuild the RHS constant here to
13704 // ensure we don't rely on particular values of undef lanes.
13705 OpRHS = DAG.getConstant(SplatValue, DL, VT);
13706 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
13707 }
13708 }
13709 }
13710 }
13711 }
13712 }
13713
13714 // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
13715 // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
13716 if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL))
13717 return UMin;
13718 }
13719
13720 if (SimplifySelectOps(N, N1, N2))
13721 return SDValue(N, 0); // Don't revisit N.
13722
13723 // Fold (vselect all_ones, N1, N2) -> N1
13725 return N1;
13726 // Fold (vselect all_zeros, N1, N2) -> N2
13728 return N2;
13729
13730 // The ConvertSelectToConcatVector function is assuming both the above
13731 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
13732 // and addressed.
13733 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
13736 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
13737 return CV;
13738 }
13739
13740 if (SDValue V = foldVSelectOfConstants(N))
13741 return V;
13742
13743 if (hasOperation(ISD::SRA, VT))
13745 return V;
13746
13748 return SDValue(N, 0);
13749
13750 if (SDValue V = combineVSelectWithAllOnesOrZeros(N0, N1, N2, TLI, DAG, DL))
13751 return V;
13752
13753 return SDValue();
13754}
13755
13756SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
13757 SDValue N0 = N->getOperand(0);
13758 SDValue N1 = N->getOperand(1);
13759 SDValue N2 = N->getOperand(2);
13760 SDValue N3 = N->getOperand(3);
13761 SDValue N4 = N->getOperand(4);
13762 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
13763 SDLoc DL(N);
13764
13765 // fold select_cc lhs, rhs, x, x, cc -> x
13766 if (N2 == N3)
13767 return N2;
13768
13769 // select_cc bool, 0, x, y, seteq -> select bool, y, x
13770 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
13771 isNullConstant(N1))
13772 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
13773
13774 // Determine if the condition we're dealing with is constant
13775 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
13776 CC, DL, false)) {
13777 AddToWorklist(SCC.getNode());
13778
13779 // cond always true -> true val
13780 // cond always false -> false val
13781 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
13782 return SCCC->isZero() ? N3 : N2;
13783
13784 // When the condition is UNDEF, just return the first operand. This is
13785 // coherent the DAG creation, no setcc node is created in this case
13786 if (SCC->isUndef())
13787 return N2;
13788
13789 // Fold to a simpler select_cc
13790 if (SCC.getOpcode() == ISD::SETCC) {
13791 return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(),
13792 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
13793 SCC.getOperand(2), SCC->getFlags());
13794 }
13795 }
13796
13797 // If we can fold this based on the true/false value, do so.
13798 if (SimplifySelectOps(N, N2, N3))
13799 return SDValue(N, 0); // Don't revisit N.
13800
13801 // fold select_cc into other things, such as min/max/abs
13802 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
13803}
13804
13805SDValue DAGCombiner::visitSETCC(SDNode *N) {
13806 // setcc is very commonly used as an argument to brcond. This pattern
13807 // also lend itself to numerous combines and, as a result, it is desired
13808 // we keep the argument to a brcond as a setcc as much as possible.
13809 bool PreferSetCC =
13810 N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND;
13811
13812 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13813 EVT VT = N->getValueType(0);
13814 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13815 SDLoc DL(N);
13816
13817 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
13818 // If we prefer to have a setcc, and we don't, we'll try our best to
13819 // recreate one using rebuildSetCC.
13820 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
13821 SDValue NewSetCC = rebuildSetCC(Combined);
13822
13823 // We don't have anything interesting to combine to.
13824 if (NewSetCC.getNode() == N)
13825 return SDValue();
13826
13827 if (NewSetCC)
13828 return NewSetCC;
13829 }
13830 return Combined;
13831 }
13832
13833 // Optimize
13834 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
13835 // or
13836 // 2) (icmp eq/ne X, (rotate X, C1))
13837 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
13838 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
13839 // Then:
13840 // If C1 is a power of 2, then the rotate and shift+and versions are
13841 // equivilent, so we can interchange them depending on target preference.
13842 // Otherwise, if we have the shift+and version we can interchange srl/shl
13843 // which inturn affects the constant C0. We can use this to get better
13844 // constants again determined by target preference.
13845 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
13846 auto IsAndWithShift = [](SDValue A, SDValue B) {
13847 return A.getOpcode() == ISD::AND &&
13848 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
13849 A.getOperand(0) == B.getOperand(0);
13850 };
13851 auto IsRotateWithOp = [](SDValue A, SDValue B) {
13852 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
13853 B.getOperand(0) == A;
13854 };
13855 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
13856 bool IsRotate = false;
13857
13858 // Find either shift+and or rotate pattern.
13859 if (IsAndWithShift(N0, N1)) {
13860 AndOrOp = N0;
13861 ShiftOrRotate = N1;
13862 } else if (IsAndWithShift(N1, N0)) {
13863 AndOrOp = N1;
13864 ShiftOrRotate = N0;
13865 } else if (IsRotateWithOp(N0, N1)) {
13866 IsRotate = true;
13867 AndOrOp = N0;
13868 ShiftOrRotate = N1;
13869 } else if (IsRotateWithOp(N1, N0)) {
13870 IsRotate = true;
13871 AndOrOp = N1;
13872 ShiftOrRotate = N0;
13873 }
13874
13875 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
13876 (IsRotate || AndOrOp.hasOneUse())) {
13877 EVT OpVT = N0.getValueType();
13878 // Get constant shift/rotate amount and possibly mask (if its shift+and
13879 // variant).
13880 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
13881 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
13882 /*AllowTrunc*/ false);
13883 if (CNode == nullptr)
13884 return std::nullopt;
13885 return CNode->getAPIntValue();
13886 };
13887 std::optional<APInt> AndCMask =
13888 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
13889 std::optional<APInt> ShiftCAmt =
13890 GetAPIntValue(ShiftOrRotate.getOperand(1));
13891 unsigned NumBits = OpVT.getScalarSizeInBits();
13892
13893 // We found constants.
13894 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
13895 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
13896 // Check that the constants meet the constraints.
13897 bool CanTransform = IsRotate;
13898 if (!CanTransform) {
13899 // Check that mask and shift compliment eachother
13900 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
13901 // Check that we are comparing all bits
13902 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
13903 // Check that the and mask is correct for the shift
13904 CanTransform &=
13905 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
13906 }
13907
13908 // See if target prefers another shift/rotate opcode.
13909 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
13910 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
13911 // Transform is valid and we have a new preference.
13912 if (CanTransform && NewShiftOpc != ShiftOpc) {
13913 SDValue NewShiftOrRotate =
13914 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
13915 ShiftOrRotate.getOperand(1));
13916 SDValue NewAndOrOp = SDValue();
13917
13918 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
13919 APInt NewMask =
13920 NewShiftOpc == ISD::SHL
13921 ? APInt::getHighBitsSet(NumBits,
13922 NumBits - ShiftCAmt->getZExtValue())
13923 : APInt::getLowBitsSet(NumBits,
13924 NumBits - ShiftCAmt->getZExtValue());
13925 NewAndOrOp =
13926 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
13927 DAG.getConstant(NewMask, DL, OpVT));
13928 } else {
13929 NewAndOrOp = ShiftOrRotate.getOperand(0);
13930 }
13931
13932 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
13933 }
13934 }
13935 }
13936 }
13937 return SDValue();
13938}
13939
13940SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
13941 SDValue LHS = N->getOperand(0);
13942 SDValue RHS = N->getOperand(1);
13943 SDValue Carry = N->getOperand(2);
13944 SDValue Cond = N->getOperand(3);
13945
13946 // If Carry is false, fold to a regular SETCC.
13947 if (isNullConstant(Carry))
13948 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
13949
13950 return SDValue();
13951}
13952
13953/// Check if N satisfies:
13954/// N is used once.
13955/// N is a Load.
13956/// The load is compatible with ExtOpcode. It means
13957/// If load has explicit zero/sign extension, ExpOpcode must have the same
13958/// extension.
13959/// Otherwise returns true.
13960static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
13961 if (!N.hasOneUse())
13962 return false;
13963
13964 if (!isa<LoadSDNode>(N))
13965 return false;
13966
13967 LoadSDNode *Load = cast<LoadSDNode>(N);
13968 ISD::LoadExtType LoadExt = Load->getExtensionType();
13969 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
13970 return true;
13971
13972 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
13973 // extension.
13974 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
13975 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
13976 return false;
13977
13978 return true;
13979}
13980
13981/// Fold
13982/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
13983/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
13984/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
13985/// This function is called by the DAGCombiner when visiting sext/zext/aext
13986/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
13988 SelectionDAG &DAG, const SDLoc &DL,
13989 CombineLevel Level) {
13990 unsigned Opcode = N->getOpcode();
13991 SDValue N0 = N->getOperand(0);
13992 EVT VT = N->getValueType(0);
13993 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
13994 Opcode == ISD::ANY_EXTEND) &&
13995 "Expected EXTEND dag node in input!");
13996
13997 SDValue Cond, Op1, Op2;
13999 m_Value(Op2)))))
14000 return SDValue();
14001
14002 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
14003 return SDValue();
14004
14005 auto ExtLoadOpcode = ISD::EXTLOAD;
14006 if (Opcode == ISD::SIGN_EXTEND)
14007 ExtLoadOpcode = ISD::SEXTLOAD;
14008 else if (Opcode == ISD::ZERO_EXTEND)
14009 ExtLoadOpcode = ISD::ZEXTLOAD;
14010
14011 // Illegal VSELECT may ISel fail if happen after legalization (DAG
14012 // Combine2), so we should conservatively check the OperationAction.
14013 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
14014 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
14015 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
14016 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
14017 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
14019 return SDValue();
14020
14021 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
14022 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
14023 return DAG.getSelect(DL, VT, Cond, Ext1, Ext2);
14024}
14025
14026/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
14027/// a build_vector of constants.
14028/// This function is called by the DAGCombiner when visiting sext/zext/aext
14029/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
14030/// Vector extends are not folded if operations are legal; this is to
14031/// avoid introducing illegal build_vector dag nodes.
14033 const TargetLowering &TLI,
14034 SelectionDAG &DAG, bool LegalTypes) {
14035 unsigned Opcode = N->getOpcode();
14036 SDValue N0 = N->getOperand(0);
14037 EVT VT = N->getValueType(0);
14038
14039 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
14040 "Expected EXTEND dag node in input!");
14041
14042 // fold (sext c1) -> c1
14043 // fold (zext c1) -> c1
14044 // fold (aext c1) -> c1
14045 if (isa<ConstantSDNode>(N0))
14046 return DAG.getNode(Opcode, DL, VT, N0);
14047
14048 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
14049 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
14050 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
14051 if (N0->getOpcode() == ISD::SELECT) {
14052 SDValue Op1 = N0->getOperand(1);
14053 SDValue Op2 = N0->getOperand(2);
14054 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
14055 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
14056 // For any_extend, choose sign extension of the constants to allow a
14057 // possible further transform to sign_extend_inreg.i.e.
14058 //
14059 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
14060 // t2: i64 = any_extend t1
14061 // -->
14062 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
14063 // -->
14064 // t4: i64 = sign_extend_inreg t3
14065 unsigned FoldOpc = Opcode;
14066 if (FoldOpc == ISD::ANY_EXTEND)
14067 FoldOpc = ISD::SIGN_EXTEND;
14068 return DAG.getSelect(DL, VT, N0->getOperand(0),
14069 DAG.getNode(FoldOpc, DL, VT, Op1),
14070 DAG.getNode(FoldOpc, DL, VT, Op2));
14071 }
14072 }
14073
14074 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
14075 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
14076 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
14077 EVT SVT = VT.getScalarType();
14078 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
14080 return SDValue();
14081
14082 // We can fold this node into a build_vector.
14083 unsigned VTBits = SVT.getSizeInBits();
14084 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
14086 unsigned NumElts = VT.getVectorNumElements();
14087
14088 for (unsigned i = 0; i != NumElts; ++i) {
14089 SDValue Op = N0.getOperand(i);
14090 if (Op.isUndef()) {
14091 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
14092 Elts.push_back(DAG.getUNDEF(SVT));
14093 else
14094 Elts.push_back(DAG.getConstant(0, DL, SVT));
14095 continue;
14096 }
14097
14098 SDLoc DL(Op);
14099 // Get the constant value and if needed trunc it to the size of the type.
14100 // Nodes like build_vector might have constants wider than the scalar type.
14101 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
14102 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
14103 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
14104 else
14105 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
14106 }
14107
14108 return DAG.getBuildVector(VT, DL, Elts);
14109}
14110
14111// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
14112// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
14113// transformation. Returns true if extension are possible and the above
14114// mentioned transformation is profitable.
14116 unsigned ExtOpc,
14117 SmallVectorImpl<SDNode *> &ExtendNodes,
14118 const TargetLowering &TLI) {
14119 bool HasCopyToRegUses = false;
14120 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
14121 for (SDUse &Use : N0->uses()) {
14122 SDNode *User = Use.getUser();
14123 if (User == N)
14124 continue;
14125 if (Use.getResNo() != N0.getResNo())
14126 continue;
14127 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
14128 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
14130 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
14131 // Sign bits will be lost after a zext.
14132 return false;
14133 bool Add = false;
14134 for (unsigned i = 0; i != 2; ++i) {
14135 SDValue UseOp = User->getOperand(i);
14136 if (UseOp == N0)
14137 continue;
14138 if (!isa<ConstantSDNode>(UseOp))
14139 return false;
14140 Add = true;
14141 }
14142 if (Add)
14143 ExtendNodes.push_back(User);
14144 continue;
14145 }
14146 // If truncates aren't free and there are users we can't
14147 // extend, it isn't worthwhile.
14148 if (!isTruncFree)
14149 return false;
14150 // Remember if this value is live-out.
14151 if (User->getOpcode() == ISD::CopyToReg)
14152 HasCopyToRegUses = true;
14153 }
14154
14155 if (HasCopyToRegUses) {
14156 bool BothLiveOut = false;
14157 for (SDUse &Use : N->uses()) {
14158 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
14159 BothLiveOut = true;
14160 break;
14161 }
14162 }
14163 if (BothLiveOut)
14164 // Both unextended and extended values are live out. There had better be
14165 // a good reason for the transformation.
14166 return !ExtendNodes.empty();
14167 }
14168 return true;
14169}
14170
14171void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
14172 SDValue OrigLoad, SDValue ExtLoad,
14173 ISD::NodeType ExtType) {
14174 // Extend SetCC uses if necessary.
14175 SDLoc DL(ExtLoad);
14176 for (SDNode *SetCC : SetCCs) {
14178
14179 for (unsigned j = 0; j != 2; ++j) {
14180 SDValue SOp = SetCC->getOperand(j);
14181 if (SOp == OrigLoad)
14182 Ops.push_back(ExtLoad);
14183 else
14184 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
14185 }
14186
14187 Ops.push_back(SetCC->getOperand(2));
14188 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
14189 }
14190}
14191
14192// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
14193SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
14194 SDValue N0 = N->getOperand(0);
14195 EVT DstVT = N->getValueType(0);
14196 EVT SrcVT = N0.getValueType();
14197
14198 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14199 N->getOpcode() == ISD::ZERO_EXTEND) &&
14200 "Unexpected node type (not an extend)!");
14201
14202 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
14203 // For example, on a target with legal v4i32, but illegal v8i32, turn:
14204 // (v8i32 (sext (v8i16 (load x))))
14205 // into:
14206 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14207 // (v4i32 (sextload (x + 16)))))
14208 // Where uses of the original load, i.e.:
14209 // (v8i16 (load x))
14210 // are replaced with:
14211 // (v8i16 (truncate
14212 // (v8i32 (concat_vectors (v4i32 (sextload x)),
14213 // (v4i32 (sextload (x + 16)))))))
14214 //
14215 // This combine is only applicable to illegal, but splittable, vectors.
14216 // All legal types, and illegal non-vector types, are handled elsewhere.
14217 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
14218 //
14219 if (N0->getOpcode() != ISD::LOAD)
14220 return SDValue();
14221
14222 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14223
14224 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
14225 !N0.hasOneUse() || !LN0->isSimple() ||
14226 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
14228 return SDValue();
14229
14231 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
14232 return SDValue();
14233
14234 ISD::LoadExtType ExtType =
14235 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14236
14237 // Try to split the vector types to get down to legal types.
14238 EVT SplitSrcVT = SrcVT;
14239 EVT SplitDstVT = DstVT;
14240 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
14241 SplitSrcVT.getVectorNumElements() > 1) {
14242 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
14243 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
14244 }
14245
14246 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
14247 return SDValue();
14248
14249 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
14250
14251 SDLoc DL(N);
14252 const unsigned NumSplits =
14253 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
14254 const unsigned Stride = SplitSrcVT.getStoreSize();
14257
14258 SDValue BasePtr = LN0->getBasePtr();
14259 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
14260 const unsigned Offset = Idx * Stride;
14261
14263 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
14264 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
14265 SplitSrcVT, LN0->getBaseAlign(),
14266 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14267
14268 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
14269
14270 Loads.push_back(SplitLoad.getValue(0));
14271 Chains.push_back(SplitLoad.getValue(1));
14272 }
14273
14274 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
14275 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
14276
14277 // Simplify TF.
14278 AddToWorklist(NewChain.getNode());
14279
14280 CombineTo(N, NewValue);
14281
14282 // Replace uses of the original load (before extension)
14283 // with a truncate of the concatenated sextloaded vectors.
14284 SDValue Trunc =
14285 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
14286 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
14287 CombineTo(N0.getNode(), Trunc, NewChain);
14288 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14289}
14290
14291// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14292// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14293SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
14294 assert(N->getOpcode() == ISD::ZERO_EXTEND);
14295 EVT VT = N->getValueType(0);
14296 EVT OrigVT = N->getOperand(0).getValueType();
14297 if (TLI.isZExtFree(OrigVT, VT))
14298 return SDValue();
14299
14300 // and/or/xor
14301 SDValue N0 = N->getOperand(0);
14302 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
14303 N0.getOperand(1).getOpcode() != ISD::Constant ||
14304 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
14305 return SDValue();
14306
14307 // shl/shr
14308 SDValue N1 = N0->getOperand(0);
14309 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
14310 N1.getOperand(1).getOpcode() != ISD::Constant ||
14311 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
14312 return SDValue();
14313
14314 // load
14315 if (!isa<LoadSDNode>(N1.getOperand(0)))
14316 return SDValue();
14317 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
14318 EVT MemVT = Load->getMemoryVT();
14319 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
14320 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
14321 return SDValue();
14322
14323
14324 // If the shift op is SHL, the logic op must be AND, otherwise the result
14325 // will be wrong.
14326 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
14327 return SDValue();
14328
14329 if (!N0.hasOneUse() || !N1.hasOneUse())
14330 return SDValue();
14331
14333 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
14334 ISD::ZERO_EXTEND, SetCCs, TLI))
14335 return SDValue();
14336
14337 // Actually do the transformation.
14338 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
14339 Load->getChain(), Load->getBasePtr(),
14340 Load->getMemoryVT(), Load->getMemOperand());
14341
14342 SDLoc DL1(N1);
14343 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
14344 N1.getOperand(1));
14345
14346 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
14347 SDLoc DL0(N0);
14348 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
14349 DAG.getConstant(Mask, DL0, VT));
14350
14351 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14352 CombineTo(N, And);
14353 if (SDValue(Load, 0).hasOneUse()) {
14354 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
14355 } else {
14356 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
14357 Load->getValueType(0), ExtLoad);
14358 CombineTo(Load, Trunc, ExtLoad.getValue(1));
14359 }
14360
14361 // N0 is dead at this point.
14362 recursivelyDeleteUnusedNodes(N0.getNode());
14363
14364 return SDValue(N,0); // Return N so it doesn't get rechecked!
14365}
14366
14367/// If we're narrowing or widening the result of a vector select and the final
14368/// size is the same size as a setcc (compare) feeding the select, then try to
14369/// apply the cast operation to the select's operands because matching vector
14370/// sizes for a select condition and other operands should be more efficient.
14371SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
14372 unsigned CastOpcode = Cast->getOpcode();
14373 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
14374 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
14375 CastOpcode == ISD::FP_ROUND) &&
14376 "Unexpected opcode for vector select narrowing/widening");
14377
14378 // We only do this transform before legal ops because the pattern may be
14379 // obfuscated by target-specific operations after legalization. Do not create
14380 // an illegal select op, however, because that may be difficult to lower.
14381 EVT VT = Cast->getValueType(0);
14382 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
14383 return SDValue();
14384
14385 SDValue VSel = Cast->getOperand(0);
14386 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
14387 VSel.getOperand(0).getOpcode() != ISD::SETCC)
14388 return SDValue();
14389
14390 // Does the setcc have the same vector size as the casted select?
14391 SDValue SetCC = VSel.getOperand(0);
14392 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
14393 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
14394 return SDValue();
14395
14396 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
14397 SDValue A = VSel.getOperand(1);
14398 SDValue B = VSel.getOperand(2);
14399 SDValue CastA, CastB;
14400 SDLoc DL(Cast);
14401 if (CastOpcode == ISD::FP_ROUND) {
14402 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
14403 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
14404 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
14405 } else {
14406 CastA = DAG.getNode(CastOpcode, DL, VT, A);
14407 CastB = DAG.getNode(CastOpcode, DL, VT, B);
14408 }
14409 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
14410}
14411
14412// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14413// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14415 const TargetLowering &TLI, EVT VT,
14416 bool LegalOperations, SDNode *N,
14417 SDValue N0, ISD::LoadExtType ExtLoadType) {
14418 SDNode *N0Node = N0.getNode();
14419 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
14420 : ISD::isZEXTLoad(N0Node);
14421 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
14422 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
14423 return SDValue();
14424
14425 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14426 EVT MemVT = LN0->getMemoryVT();
14427 if ((LegalOperations || !LN0->isSimple() ||
14428 VT.isVector()) &&
14429 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
14430 return SDValue();
14431
14432 SDValue ExtLoad =
14433 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14434 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
14435 Combiner.CombineTo(N, ExtLoad);
14436 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14437 if (LN0->use_empty())
14438 Combiner.recursivelyDeleteUnusedNodes(LN0);
14439 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14440}
14441
14442// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
14443// Only generate vector extloads when 1) they're legal, and 2) they are
14444// deemed desirable by the target. NonNegZExt can be set to true if a zero
14445// extend has the nonneg flag to allow use of sextload if profitable.
14447 const TargetLowering &TLI, EVT VT,
14448 bool LegalOperations, SDNode *N, SDValue N0,
14449 ISD::LoadExtType ExtLoadType,
14450 ISD::NodeType ExtOpc,
14451 bool NonNegZExt = false) {
14453 return {};
14454
14455 // If this is zext nneg, see if it would make sense to treat it as a sext.
14456 if (NonNegZExt) {
14457 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
14458 "Unexpected load type or opcode");
14459 for (SDNode *User : N0->users()) {
14460 if (User->getOpcode() == ISD::SETCC) {
14462 if (ISD::isSignedIntSetCC(CC)) {
14463 ExtLoadType = ISD::SEXTLOAD;
14464 ExtOpc = ISD::SIGN_EXTEND;
14465 break;
14466 }
14467 }
14468 }
14469 }
14470
14471 // TODO: isFixedLengthVector() should be removed and any negative effects on
14472 // code generation being the result of that target's implementation of
14473 // isVectorLoadExtDesirable().
14474 if ((LegalOperations || VT.isFixedLengthVector() ||
14475 !cast<LoadSDNode>(N0)->isSimple()) &&
14476 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
14477 return {};
14478
14479 bool DoXform = true;
14481 if (!N0.hasOneUse())
14482 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
14483 if (VT.isVector())
14484 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
14485 if (!DoXform)
14486 return {};
14487
14488 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14489 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
14490 LN0->getBasePtr(), N0.getValueType(),
14491 LN0->getMemOperand());
14492 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
14493 // If the load value is used only by N, replace it via CombineTo N.
14494 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
14495 Combiner.CombineTo(N, ExtLoad);
14496 if (NoReplaceTrunc) {
14497 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14498 Combiner.recursivelyDeleteUnusedNodes(LN0);
14499 } else {
14500 SDValue Trunc =
14501 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14502 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14503 }
14504 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14505}
14506
14507static SDValue
14509 bool LegalOperations, SDNode *N, SDValue N0,
14510 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
14511 if (!N0.hasOneUse())
14512 return SDValue();
14513
14515 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
14516 return SDValue();
14517
14518 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
14519 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
14520 return SDValue();
14521
14522 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
14523 return SDValue();
14524
14525 SDLoc dl(Ld);
14526 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
14527 SDValue NewLoad = DAG.getMaskedLoad(
14528 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
14529 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
14530 ExtLoadType, Ld->isExpandingLoad());
14531 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
14532 return NewLoad;
14533}
14534
14535// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
14537 const TargetLowering &TLI, EVT VT,
14538 SDValue N0,
14539 ISD::LoadExtType ExtLoadType) {
14540 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
14541 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
14542 return {};
14543 EVT MemoryVT = ALoad->getMemoryVT();
14544 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
14545 return {};
14546 // Can't fold into ALoad if it is already extending differently.
14547 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
14548 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
14549 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
14550 return {};
14551
14552 EVT OrigVT = ALoad->getValueType(0);
14553 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
14554 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomicLoad(
14555 ExtLoadType, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
14556 ALoad->getBasePtr(), ALoad->getMemOperand()));
14558 SDValue(ALoad, 0),
14559 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
14560 // Update the chain uses.
14561 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
14562 return SDValue(NewALoad, 0);
14563}
14564
14566 bool LegalOperations) {
14567 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
14568 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
14569
14570 SDValue SetCC = N->getOperand(0);
14571 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
14572 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
14573 return SDValue();
14574
14575 SDValue X = SetCC.getOperand(0);
14576 SDValue Ones = SetCC.getOperand(1);
14577 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
14578 EVT VT = N->getValueType(0);
14579 EVT XVT = X.getValueType();
14580 // setge X, C is canonicalized to setgt, so we do not need to match that
14581 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
14582 // not require the 'not' op.
14583 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
14584 // Invert and smear/shift the sign bit:
14585 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
14586 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
14587 SDLoc DL(N);
14588 unsigned ShCt = VT.getSizeInBits() - 1;
14589 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14590 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
14591 SDValue NotX = DAG.getNOT(DL, X, VT);
14592 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
14593 auto ShiftOpcode =
14594 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
14595 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
14596 }
14597 }
14598 return SDValue();
14599}
14600
14601SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
14602 SDValue N0 = N->getOperand(0);
14603 if (N0.getOpcode() != ISD::SETCC)
14604 return SDValue();
14605
14606 SDValue N00 = N0.getOperand(0);
14607 SDValue N01 = N0.getOperand(1);
14609 EVT VT = N->getValueType(0);
14610 EVT N00VT = N00.getValueType();
14611 SDLoc DL(N);
14612
14613 // Propagate fast-math-flags.
14614 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14615
14616 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
14617 // the same size as the compared operands. Try to optimize sext(setcc())
14618 // if this is the case.
14619 if (VT.isVector() && !LegalOperations &&
14620 TLI.getBooleanContents(N00VT) ==
14622 EVT SVT = getSetCCResultType(N00VT);
14623
14624 // If we already have the desired type, don't change it.
14625 if (SVT != N0.getValueType()) {
14626 // We know that the # elements of the results is the same as the
14627 // # elements of the compare (and the # elements of the compare result
14628 // for that matter). Check to see that they are the same size. If so,
14629 // we know that the element size of the sext'd result matches the
14630 // element size of the compare operands.
14631 if (VT.getSizeInBits() == SVT.getSizeInBits())
14632 return DAG.getSetCC(DL, VT, N00, N01, CC);
14633
14634 // If the desired elements are smaller or larger than the source
14635 // elements, we can use a matching integer vector type and then
14636 // truncate/sign extend.
14637 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
14638 if (SVT == MatchingVecType) {
14639 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
14640 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
14641 }
14642 }
14643
14644 // Try to eliminate the sext of a setcc by zexting the compare operands.
14645 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
14647 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
14648 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
14649 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14650
14651 // We have an unsupported narrow vector compare op that would be legal
14652 // if extended to the destination type. See if the compare operands
14653 // can be freely extended to the destination type.
14654 auto IsFreeToExtend = [&](SDValue V) {
14655 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
14656 return true;
14657 // Match a simple, non-extended load that can be converted to a
14658 // legal {z/s}ext-load.
14659 // TODO: Allow widening of an existing {z/s}ext-load?
14660 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
14661 ISD::isUNINDEXEDLoad(V.getNode()) &&
14662 cast<LoadSDNode>(V)->isSimple() &&
14663 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
14664 return false;
14665
14666 // Non-chain users of this value must either be the setcc in this
14667 // sequence or extends that can be folded into the new {z/s}ext-load.
14668 for (SDUse &Use : V->uses()) {
14669 // Skip uses of the chain and the setcc.
14670 SDNode *User = Use.getUser();
14671 if (Use.getResNo() != 0 || User == N0.getNode())
14672 continue;
14673 // Extra users must have exactly the same cast we are about to create.
14674 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
14675 // is enhanced similarly.
14676 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
14677 return false;
14678 }
14679 return true;
14680 };
14681
14682 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
14683 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
14684 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
14685 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
14686 }
14687 }
14688 }
14689
14690 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
14691 // Here, T can be 1 or -1, depending on the type of the setcc and
14692 // getBooleanContents().
14693 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
14694
14695 // To determine the "true" side of the select, we need to know the high bit
14696 // of the value returned by the setcc if it evaluates to true.
14697 // If the type of the setcc is i1, then the true case of the select is just
14698 // sext(i1 1), that is, -1.
14699 // If the type of the setcc is larger (say, i8) then the value of the high
14700 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
14701 // of the appropriate width.
14702 SDValue ExtTrueVal = (SetCCWidth == 1)
14703 ? DAG.getAllOnesConstant(DL, VT)
14704 : DAG.getBoolConstant(true, DL, VT, N00VT);
14705 SDValue Zero = DAG.getConstant(0, DL, VT);
14706 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
14707 return SCC;
14708
14709 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
14710 EVT SetCCVT = getSetCCResultType(N00VT);
14711 // Don't do this transform for i1 because there's a select transform
14712 // that would reverse it.
14713 // TODO: We should not do this transform at all without a target hook
14714 // because a sext is likely cheaper than a select?
14715 if (SetCCVT.getScalarSizeInBits() != 1 &&
14716 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
14717 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
14718 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
14719 }
14720 }
14721
14722 return SDValue();
14723}
14724
14725SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
14726 SDValue N0 = N->getOperand(0);
14727 EVT VT = N->getValueType(0);
14728 SDLoc DL(N);
14729
14730 if (VT.isVector())
14731 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
14732 return FoldedVOp;
14733
14734 // sext(undef) = 0 because the top bit will all be the same.
14735 if (N0.isUndef())
14736 return DAG.getConstant(0, DL, VT);
14737
14738 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14739 return Res;
14740
14741 // fold (sext (sext x)) -> (sext x)
14742 // fold (sext (aext x)) -> (sext x)
14743 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
14744 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
14745
14746 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14747 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14750 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
14751 N0.getOperand(0));
14752
14753 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
14754 SDValue N00 = N0.getOperand(0);
14755 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
14756 if (N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) {
14757 // fold (sext (sext_inreg x)) -> (sext (trunc x))
14758 if ((!LegalTypes || TLI.isTypeLegal(ExtVT))) {
14759 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
14760 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
14761 }
14762
14763 // If the trunc wasn't legal, try to fold to (sext_inreg (anyext x))
14764 if (!LegalTypes || TLI.isTypeLegal(VT)) {
14765 SDValue ExtSrc = DAG.getAnyExtOrTrunc(N00, DL, VT);
14766 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, ExtSrc,
14767 N0->getOperand(1));
14768 }
14769 }
14770 }
14771
14772 if (N0.getOpcode() == ISD::TRUNCATE) {
14773 // fold (sext (truncate (load x))) -> (sext (smaller load x))
14774 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
14775 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14776 SDNode *oye = N0.getOperand(0).getNode();
14777 if (NarrowLoad.getNode() != N0.getNode()) {
14778 CombineTo(N0.getNode(), NarrowLoad);
14779 // CombineTo deleted the truncate, if needed, but not what's under it.
14780 AddToWorklist(oye);
14781 }
14782 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14783 }
14784
14785 // See if the value being truncated is already sign extended. If so, just
14786 // eliminate the trunc/sext pair.
14787 SDValue Op = N0.getOperand(0);
14788 unsigned OpBits = Op.getScalarValueSizeInBits();
14789 unsigned MidBits = N0.getScalarValueSizeInBits();
14790 unsigned DestBits = VT.getScalarSizeInBits();
14791
14792 if (N0->getFlags().hasNoSignedWrap() ||
14793 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14794 if (OpBits == DestBits) {
14795 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14796 // bits, it is already ready.
14797 return Op;
14798 }
14799
14800 if (OpBits < DestBits) {
14801 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14802 // bits, just sext from i32.
14803 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14804 }
14805
14806 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
14807 // bits, just truncate to i32.
14808 SDNodeFlags Flags;
14809 Flags.setNoSignedWrap(true);
14810 Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
14811 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
14812 }
14813
14814 // fold (sext (truncate x)) -> (sextinreg x).
14815 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
14816 N0.getValueType())) {
14817 if (OpBits < DestBits)
14818 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
14819 else if (OpBits > DestBits)
14820 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
14821 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
14822 DAG.getValueType(N0.getValueType()));
14823 }
14824 }
14825
14826 // Try to simplify (sext (load x)).
14827 if (SDValue foldedExt =
14828 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14830 return foldedExt;
14831
14832 if (SDValue foldedExt =
14833 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
14835 return foldedExt;
14836
14837 // fold (sext (load x)) to multiple smaller sextloads.
14838 // Only on illegal but splittable vectors.
14839 if (SDValue ExtLoad = CombineExtLoad(N))
14840 return ExtLoad;
14841
14842 // Try to simplify (sext (sextload x)).
14843 if (SDValue foldedExt = tryToFoldExtOfExtload(
14844 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
14845 return foldedExt;
14846
14847 // Try to simplify (sext (atomic_load x)).
14848 if (SDValue foldedExt =
14849 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
14850 return foldedExt;
14851
14852 // fold (sext (and/or/xor (load x), cst)) ->
14853 // (and/or/xor (sextload x), (sext cst))
14854 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
14855 isa<LoadSDNode>(N0.getOperand(0)) &&
14856 N0.getOperand(1).getOpcode() == ISD::Constant &&
14857 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
14858 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
14859 EVT MemVT = LN00->getMemoryVT();
14860 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
14861 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
14863 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14864 ISD::SIGN_EXTEND, SetCCs, TLI);
14865 if (DoXform) {
14866 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
14867 LN00->getChain(), LN00->getBasePtr(),
14868 LN00->getMemoryVT(),
14869 LN00->getMemOperand());
14870 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
14871 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14872 ExtLoad, DAG.getConstant(Mask, DL, VT));
14873 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
14874 bool NoReplaceTruncAnd = !N0.hasOneUse();
14875 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14876 CombineTo(N, And);
14877 // If N0 has multiple uses, change other uses as well.
14878 if (NoReplaceTruncAnd) {
14879 SDValue TruncAnd =
14881 CombineTo(N0.getNode(), TruncAnd);
14882 }
14883 if (NoReplaceTrunc) {
14884 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14885 } else {
14886 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14887 LN00->getValueType(0), ExtLoad);
14888 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14889 }
14890 return SDValue(N,0); // Return N so it doesn't get rechecked!
14891 }
14892 }
14893 }
14894
14895 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14896 return V;
14897
14898 if (SDValue V = foldSextSetcc(N))
14899 return V;
14900
14901 // fold (sext x) -> (zext x) if the sign bit is known zero.
14902 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
14903 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
14904 DAG.SignBitIsZero(N0))
14905 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, SDNodeFlags::NonNeg);
14906
14907 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14908 return NewVSel;
14909
14910 // Eliminate this sign extend by doing a negation in the destination type:
14911 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
14912 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
14916 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
14917 return DAG.getNegative(Zext, DL, VT);
14918 }
14919 // Eliminate this sign extend by doing a decrement in the destination type:
14920 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
14921 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
14925 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14926 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14927 }
14928
14929 // fold sext (not i1 X) -> add (zext i1 X), -1
14930 // TODO: This could be extended to handle bool vectors.
14931 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
14932 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
14933 TLI.isOperationLegal(ISD::ADD, VT)))) {
14934 // If we can eliminate the 'not', the sext form should be better
14935 if (SDValue NewXor = visitXOR(N0.getNode())) {
14936 // Returning N0 is a form of in-visit replacement that may have
14937 // invalidated N0.
14938 if (NewXor.getNode() == N0.getNode()) {
14939 // Return SDValue here as the xor should have already been replaced in
14940 // this sext.
14941 return SDValue();
14942 }
14943
14944 // Return a new sext with the new xor.
14945 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
14946 }
14947
14948 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
14949 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
14950 }
14951
14952 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14953 return Res;
14954
14955 return SDValue();
14956}
14957
14958/// Given an extending node with a pop-count operand, if the target does not
14959/// support a pop-count in the narrow source type but does support it in the
14960/// destination type, widen the pop-count to the destination type.
14961static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
14962 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
14963 Extend->getOpcode() == ISD::ANY_EXTEND) &&
14964 "Expected extend op");
14965
14966 SDValue CtPop = Extend->getOperand(0);
14967 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
14968 return SDValue();
14969
14970 EVT VT = Extend->getValueType(0);
14971 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14974 return SDValue();
14975
14976 // zext (ctpop X) --> ctpop (zext X)
14977 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
14978 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
14979}
14980
14981// If we have (zext (abs X)) where X is a type that will be promoted by type
14982// legalization, convert to (abs (sext X)). But don't extend past a legal type.
14983static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
14984 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
14985
14986 EVT VT = Extend->getValueType(0);
14987 if (VT.isVector())
14988 return SDValue();
14989
14990 SDValue Abs = Extend->getOperand(0);
14991 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
14992 return SDValue();
14993
14994 EVT AbsVT = Abs.getValueType();
14995 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14996 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
14998 return SDValue();
14999
15000 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
15001
15002 SDValue SExt =
15003 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
15004 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
15005 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
15006}
15007
15008SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
15009 SDValue N0 = N->getOperand(0);
15010 EVT VT = N->getValueType(0);
15011 SDLoc DL(N);
15012
15013 if (VT.isVector())
15014 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
15015 return FoldedVOp;
15016
15017 // zext(undef) = 0
15018 if (N0.isUndef())
15019 return DAG.getConstant(0, DL, VT);
15020
15021 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15022 return Res;
15023
15024 // fold (zext (zext x)) -> (zext x)
15025 // fold (zext (aext x)) -> (zext x)
15026 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15027 SDNodeFlags Flags;
15028 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15029 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15030 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
15031 }
15032
15033 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15034 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15037 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
15038
15039 // fold (zext (truncate x)) -> (zext x) or
15040 // (zext (truncate x)) -> (truncate x)
15041 // This is valid when the truncated bits of x are already zero.
15042 SDValue Op;
15043 KnownBits Known;
15044 if (isTruncateOf(DAG, N0, Op, Known)) {
15045 APInt TruncatedBits =
15046 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
15047 APInt(Op.getScalarValueSizeInBits(), 0) :
15048 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
15049 N0.getScalarValueSizeInBits(),
15050 std::min(Op.getScalarValueSizeInBits(),
15051 VT.getScalarSizeInBits()));
15052 if (TruncatedBits.isSubsetOf(Known.Zero)) {
15053 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
15054 DAG.salvageDebugInfo(*N0.getNode());
15055
15056 return ZExtOrTrunc;
15057 }
15058 }
15059
15060 // fold (zext (truncate x)) -> (and x, mask)
15061 if (N0.getOpcode() == ISD::TRUNCATE) {
15062 // fold (zext (truncate (load x))) -> (zext (smaller load x))
15063 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
15064 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15065 SDNode *oye = N0.getOperand(0).getNode();
15066 if (NarrowLoad.getNode() != N0.getNode()) {
15067 CombineTo(N0.getNode(), NarrowLoad);
15068 // CombineTo deleted the truncate, if needed, but not what's under it.
15069 AddToWorklist(oye);
15070 }
15071 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15072 }
15073
15074 EVT SrcVT = N0.getOperand(0).getValueType();
15075 EVT MinVT = N0.getValueType();
15076
15077 if (N->getFlags().hasNonNeg()) {
15078 SDValue Op = N0.getOperand(0);
15079 unsigned OpBits = SrcVT.getScalarSizeInBits();
15080 unsigned MidBits = MinVT.getScalarSizeInBits();
15081 unsigned DestBits = VT.getScalarSizeInBits();
15082
15083 if (N0->getFlags().hasNoSignedWrap() ||
15084 DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
15085 if (OpBits == DestBits) {
15086 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
15087 // bits, it is already ready.
15088 return Op;
15089 }
15090
15091 if (OpBits < DestBits) {
15092 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
15093 // bits, just sext from i32.
15094 // FIXME: This can probably be ZERO_EXTEND nneg?
15095 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
15096 }
15097
15098 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
15099 // bits, just truncate to i32.
15100 SDNodeFlags Flags;
15101 Flags.setNoSignedWrap(true);
15102 Flags.setNoUnsignedWrap(true);
15103 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
15104 }
15105 }
15106
15107 // Try to mask before the extension to avoid having to generate a larger mask,
15108 // possibly over several sub-vectors.
15109 if (SrcVT.bitsLT(VT) && VT.isVector()) {
15110 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
15112 SDValue Op = N0.getOperand(0);
15113 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
15114 AddToWorklist(Op.getNode());
15115 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
15116 // Transfer the debug info; the new node is equivalent to N0.
15117 DAG.transferDbgValues(N0, ZExtOrTrunc);
15118 return ZExtOrTrunc;
15119 }
15120 }
15121
15122 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
15123 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15124 AddToWorklist(Op.getNode());
15125 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
15126 // We may safely transfer the debug info describing the truncate node over
15127 // to the equivalent and operation.
15128 DAG.transferDbgValues(N0, And);
15129 return And;
15130 }
15131 }
15132
15133 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
15134 // if either of the casts is not free.
15135 if (N0.getOpcode() == ISD::AND &&
15136 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15137 N0.getOperand(1).getOpcode() == ISD::Constant &&
15138 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
15139 !TLI.isZExtFree(N0.getValueType(), VT))) {
15140 SDValue X = N0.getOperand(0).getOperand(0);
15141 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
15142 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15143 return DAG.getNode(ISD::AND, DL, VT,
15144 X, DAG.getConstant(Mask, DL, VT));
15145 }
15146
15147 // Try to simplify (zext (load x)).
15148 if (SDValue foldedExt = tryToFoldExtOfLoad(
15149 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
15150 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
15151 return foldedExt;
15152
15153 if (SDValue foldedExt =
15154 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
15156 return foldedExt;
15157
15158 // fold (zext (load x)) to multiple smaller zextloads.
15159 // Only on illegal but splittable vectors.
15160 if (SDValue ExtLoad = CombineExtLoad(N))
15161 return ExtLoad;
15162
15163 // Try to simplify (zext (atomic_load x)).
15164 if (SDValue foldedExt =
15165 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
15166 return foldedExt;
15167
15168 // fold (zext (and/or/xor (load x), cst)) ->
15169 // (and/or/xor (zextload x), (zext cst))
15170 // Unless (and (load x) cst) will match as a zextload already and has
15171 // additional users, or the zext is already free.
15172 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
15173 isa<LoadSDNode>(N0.getOperand(0)) &&
15174 N0.getOperand(1).getOpcode() == ISD::Constant &&
15175 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
15176 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
15177 EVT MemVT = LN00->getMemoryVT();
15178 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
15179 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
15180 bool DoXform = true;
15182 if (!N0.hasOneUse()) {
15183 if (N0.getOpcode() == ISD::AND) {
15184 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
15185 EVT LoadResultTy = AndC->getValueType(0);
15186 EVT ExtVT;
15187 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
15188 DoXform = false;
15189 }
15190 }
15191 if (DoXform)
15192 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
15193 ISD::ZERO_EXTEND, SetCCs, TLI);
15194 if (DoXform) {
15195 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
15196 LN00->getChain(), LN00->getBasePtr(),
15197 LN00->getMemoryVT(),
15198 LN00->getMemOperand());
15199 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
15200 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
15201 ExtLoad, DAG.getConstant(Mask, DL, VT));
15202 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
15203 bool NoReplaceTruncAnd = !N0.hasOneUse();
15204 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
15205 CombineTo(N, And);
15206 // If N0 has multiple uses, change other uses as well.
15207 if (NoReplaceTruncAnd) {
15208 SDValue TruncAnd =
15210 CombineTo(N0.getNode(), TruncAnd);
15211 }
15212 if (NoReplaceTrunc) {
15213 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
15214 } else {
15215 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
15216 LN00->getValueType(0), ExtLoad);
15217 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
15218 }
15219 return SDValue(N,0); // Return N so it doesn't get rechecked!
15220 }
15221 }
15222 }
15223
15224 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
15225 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
15226 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
15227 return ZExtLoad;
15228
15229 // Try to simplify (zext (zextload x)).
15230 if (SDValue foldedExt = tryToFoldExtOfExtload(
15231 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
15232 return foldedExt;
15233
15234 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
15235 return V;
15236
15237 if (N0.getOpcode() == ISD::SETCC) {
15238 // Propagate fast-math-flags.
15239 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15240
15241 // Only do this before legalize for now.
15242 if (!LegalOperations && VT.isVector() &&
15243 N0.getValueType().getVectorElementType() == MVT::i1) {
15244 EVT N00VT = N0.getOperand(0).getValueType();
15245 if (getSetCCResultType(N00VT) == N0.getValueType())
15246 return SDValue();
15247
15248 // We know that the # elements of the results is the same as the #
15249 // elements of the compare (and the # elements of the compare result for
15250 // that matter). Check to see that they are the same size. If so, we know
15251 // that the element size of the sext'd result matches the element size of
15252 // the compare operands.
15253 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
15254 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
15255 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
15256 N0.getOperand(1), N0.getOperand(2));
15257 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
15258 }
15259
15260 // If the desired elements are smaller or larger than the source
15261 // elements we can use a matching integer vector type and then
15262 // truncate/any extend followed by zext_in_reg.
15263 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15264 SDValue VsetCC =
15265 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
15266 N0.getOperand(1), N0.getOperand(2));
15267 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
15268 N0.getValueType());
15269 }
15270
15271 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
15272 EVT N0VT = N0.getValueType();
15273 EVT N00VT = N0.getOperand(0).getValueType();
15274 if (SDValue SCC = SimplifySelectCC(
15275 DL, N0.getOperand(0), N0.getOperand(1),
15276 DAG.getBoolConstant(true, DL, N0VT, N00VT),
15277 DAG.getBoolConstant(false, DL, N0VT, N00VT),
15278 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15279 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
15280 }
15281
15282 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
15283 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
15284 !TLI.isZExtFree(N0, VT)) {
15285 SDValue ShVal = N0.getOperand(0);
15286 SDValue ShAmt = N0.getOperand(1);
15287 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
15288 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
15289 if (N0.getOpcode() == ISD::SHL) {
15290 // If the original shl may be shifting out bits, do not perform this
15291 // transformation.
15292 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
15293 ShVal.getOperand(0).getValueSizeInBits();
15294 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
15295 // If the shift is too large, then see if we can deduce that the
15296 // shift is safe anyway.
15297
15298 // Check if the bits being shifted out are known to be zero.
15299 KnownBits KnownShVal = DAG.computeKnownBits(ShVal);
15300 if (ShAmtC->getAPIntValue().ugt(KnownShVal.countMinLeadingZeros()))
15301 return SDValue();
15302 }
15303 }
15304
15305 // Ensure that the shift amount is wide enough for the shifted value.
15306 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
15307 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
15308
15309 return DAG.getNode(N0.getOpcode(), DL, VT,
15310 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
15311 }
15312 }
15313 }
15314
15315 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15316 return NewVSel;
15317
15318 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15319 return NewCtPop;
15320
15321 if (SDValue V = widenAbs(N, DAG))
15322 return V;
15323
15324 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15325 return Res;
15326
15327 // CSE zext nneg with sext if the zext is not free.
15328 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
15329 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
15330 if (CSENode)
15331 return SDValue(CSENode, 0);
15332 }
15333
15334 return SDValue();
15335}
15336
15337SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
15338 SDValue N0 = N->getOperand(0);
15339 EVT VT = N->getValueType(0);
15340 SDLoc DL(N);
15341
15342 // aext(undef) = undef
15343 if (N0.isUndef())
15344 return DAG.getUNDEF(VT);
15345
15346 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
15347 return Res;
15348
15349 // fold (aext (aext x)) -> (aext x)
15350 // fold (aext (zext x)) -> (zext x)
15351 // fold (aext (sext x)) -> (sext x)
15352 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
15353 N0.getOpcode() == ISD::SIGN_EXTEND) {
15354 SDNodeFlags Flags;
15355 if (N0.getOpcode() == ISD::ZERO_EXTEND)
15356 Flags.setNonNeg(N0->getFlags().hasNonNeg());
15357 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
15358 }
15359
15360 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
15361 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
15362 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
15366 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
15367
15368 // fold (aext (truncate (load x))) -> (aext (smaller load x))
15369 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
15370 if (N0.getOpcode() == ISD::TRUNCATE) {
15371 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
15372 SDNode *oye = N0.getOperand(0).getNode();
15373 if (NarrowLoad.getNode() != N0.getNode()) {
15374 CombineTo(N0.getNode(), NarrowLoad);
15375 // CombineTo deleted the truncate, if needed, but not what's under it.
15376 AddToWorklist(oye);
15377 }
15378 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15379 }
15380 }
15381
15382 // fold (aext (truncate x))
15383 if (N0.getOpcode() == ISD::TRUNCATE)
15384 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
15385
15386 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
15387 // if the trunc is not free.
15388 if (N0.getOpcode() == ISD::AND &&
15389 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
15390 N0.getOperand(1).getOpcode() == ISD::Constant &&
15391 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
15392 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
15393 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
15394 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
15395 return DAG.getNode(ISD::AND, DL, VT, X, Y);
15396 }
15397
15398 // fold (aext (load x)) -> (aext (truncate (extload x)))
15399 // None of the supported targets knows how to perform load and any_ext
15400 // on vectors in one instruction, so attempt to fold to zext instead.
15401 if (VT.isVector()) {
15402 // Try to simplify (zext (load x)).
15403 if (SDValue foldedExt =
15404 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
15406 return foldedExt;
15407 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
15410 bool DoXform = true;
15412 if (!N0.hasOneUse())
15413 DoXform =
15414 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
15415 if (DoXform) {
15416 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15417 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
15418 LN0->getBasePtr(), N0.getValueType(),
15419 LN0->getMemOperand());
15420 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
15421 // If the load value is used only by N, replace it via CombineTo N.
15422 bool NoReplaceTrunc = N0.hasOneUse();
15423 CombineTo(N, ExtLoad);
15424 if (NoReplaceTrunc) {
15425 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15426 recursivelyDeleteUnusedNodes(LN0);
15427 } else {
15428 SDValue Trunc =
15429 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
15430 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
15431 }
15432 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15433 }
15434 }
15435
15436 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
15437 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
15438 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
15439 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
15440 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
15441 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15442 ISD::LoadExtType ExtType = LN0->getExtensionType();
15443 EVT MemVT = LN0->getMemoryVT();
15444 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
15445 SDValue ExtLoad =
15446 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
15447 MemVT, LN0->getMemOperand());
15448 CombineTo(N, ExtLoad);
15449 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
15450 recursivelyDeleteUnusedNodes(LN0);
15451 return SDValue(N, 0); // Return N so it doesn't get rechecked!
15452 }
15453 }
15454
15455 if (N0.getOpcode() == ISD::SETCC) {
15456 // Propagate fast-math-flags.
15457 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
15458
15459 // For vectors:
15460 // aext(setcc) -> vsetcc
15461 // aext(setcc) -> truncate(vsetcc)
15462 // aext(setcc) -> aext(vsetcc)
15463 // Only do this before legalize for now.
15464 if (VT.isVector() && !LegalOperations) {
15465 EVT N00VT = N0.getOperand(0).getValueType();
15466 if (getSetCCResultType(N00VT) == N0.getValueType())
15467 return SDValue();
15468
15469 // We know that the # elements of the results is the same as the
15470 // # elements of the compare (and the # elements of the compare result
15471 // for that matter). Check to see that they are the same size. If so,
15472 // we know that the element size of the sext'd result matches the
15473 // element size of the compare operands.
15474 if (VT.getSizeInBits() == N00VT.getSizeInBits())
15475 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
15476 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15477
15478 // If the desired elements are smaller or larger than the source
15479 // elements we can use a matching integer vector type and then
15480 // truncate/any extend
15481 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
15482 SDValue VsetCC = DAG.getSetCC(
15483 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
15484 cast<CondCodeSDNode>(N0.getOperand(2))->get());
15485 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
15486 }
15487
15488 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
15489 if (SDValue SCC = SimplifySelectCC(
15490 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
15491 DAG.getConstant(0, DL, VT),
15492 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
15493 return SCC;
15494 }
15495
15496 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
15497 return NewCtPop;
15498
15499 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
15500 return Res;
15501
15502 return SDValue();
15503}
15504
15505SDValue DAGCombiner::visitAssertExt(SDNode *N) {
15506 unsigned Opcode = N->getOpcode();
15507 SDValue N0 = N->getOperand(0);
15508 SDValue N1 = N->getOperand(1);
15509 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
15510
15511 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
15512 if (N0.getOpcode() == Opcode &&
15513 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
15514 return N0;
15515
15516 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15517 N0.getOperand(0).getOpcode() == Opcode) {
15518 // We have an assert, truncate, assert sandwich. Make one stronger assert
15519 // by asserting on the smallest asserted type to the larger source type.
15520 // This eliminates the later assert:
15521 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
15522 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
15523 SDLoc DL(N);
15524 SDValue BigA = N0.getOperand(0);
15525 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15526 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
15527 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
15528 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15529 BigA.getOperand(0), MinAssertVTVal);
15530 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15531 }
15532
15533 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
15534 // than X. Just move the AssertZext in front of the truncate and drop the
15535 // AssertSExt.
15536 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
15538 Opcode == ISD::AssertZext) {
15539 SDValue BigA = N0.getOperand(0);
15540 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15541 if (AssertVT.bitsLT(BigA_AssertVT)) {
15542 SDLoc DL(N);
15543 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
15544 BigA.getOperand(0), N1);
15545 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
15546 }
15547 }
15548
15549 if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
15551 const APInt &Mask = N0.getConstantOperandAPInt(1);
15552
15553 // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
15554 // than X, and the And doesn't change the lower iX bits, we can move the
15555 // AssertZext in front of the And and drop the AssertSext.
15556 if (N0.getOperand(0).getOpcode() == ISD::AssertSext && N0.hasOneUse()) {
15557 SDValue BigA = N0.getOperand(0);
15558 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
15559 if (AssertVT.bitsLT(BigA_AssertVT) &&
15560 Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
15561 SDLoc DL(N);
15562 SDValue NewAssert =
15563 DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
15564 return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
15565 N0.getOperand(1));
15566 }
15567 }
15568
15569 // Remove AssertZext entirely if the mask guarantees the assertion cannot
15570 // fail.
15571 // TODO: Use KB countMinLeadingZeros to handle non-constant masks?
15572 if (Mask.isIntN(AssertVT.getScalarSizeInBits()))
15573 return N0;
15574 }
15575
15576 return SDValue();
15577}
15578
15579SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
15580 SDLoc DL(N);
15581
15582 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
15583 SDValue N0 = N->getOperand(0);
15584
15585 // Fold (assertalign (assertalign x, AL0), AL1) ->
15586 // (assertalign x, max(AL0, AL1))
15587 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
15588 return DAG.getAssertAlign(DL, N0.getOperand(0),
15589 std::max(AL, AAN->getAlign()));
15590
15591 // In rare cases, there are trivial arithmetic ops in source operands. Sink
15592 // this assert down to source operands so that those arithmetic ops could be
15593 // exposed to the DAG combining.
15594 switch (N0.getOpcode()) {
15595 default:
15596 break;
15597 case ISD::ADD:
15598 case ISD::PTRADD:
15599 case ISD::SUB: {
15600 unsigned AlignShift = Log2(AL);
15601 SDValue LHS = N0.getOperand(0);
15602 SDValue RHS = N0.getOperand(1);
15603 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
15604 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
15605 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
15606 if (LHSAlignShift < AlignShift)
15607 LHS = DAG.getAssertAlign(DL, LHS, AL);
15608 if (RHSAlignShift < AlignShift)
15609 RHS = DAG.getAssertAlign(DL, RHS, AL);
15610 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
15611 }
15612 break;
15613 }
15614 }
15615
15616 return SDValue();
15617}
15618
15619/// If the result of a load is shifted/masked/truncated to an effectively
15620/// narrower type, try to transform the load to a narrower type and/or
15621/// use an extending load.
15622SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
15623 unsigned Opc = N->getOpcode();
15624
15626 SDValue N0 = N->getOperand(0);
15627 EVT VT = N->getValueType(0);
15628 EVT ExtVT = VT;
15629
15630 // This transformation isn't valid for vector loads.
15631 if (VT.isVector())
15632 return SDValue();
15633
15634 // The ShAmt variable is used to indicate that we've consumed a right
15635 // shift. I.e. we want to narrow the width of the load by skipping to load the
15636 // ShAmt least significant bits.
15637 unsigned ShAmt = 0;
15638 // A special case is when the least significant bits from the load are masked
15639 // away, but using an AND rather than a right shift. HasShiftedOffset is used
15640 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
15641 // the result.
15642 unsigned ShiftedOffset = 0;
15643 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
15644 // extended to VT.
15645 if (Opc == ISD::SIGN_EXTEND_INREG) {
15646 ExtType = ISD::SEXTLOAD;
15647 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15648 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
15649 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
15650 // value, or it may be shifting a higher subword, half or byte into the
15651 // lowest bits.
15652
15653 // Only handle shift with constant shift amount, and the shiftee must be a
15654 // load.
15655 auto *LN = dyn_cast<LoadSDNode>(N0);
15656 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15657 if (!N1C || !LN)
15658 return SDValue();
15659 // If the shift amount is larger than the memory type then we're not
15660 // accessing any of the loaded bytes.
15661 ShAmt = N1C->getZExtValue();
15662 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
15663 if (MemoryWidth <= ShAmt)
15664 return SDValue();
15665 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
15666 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
15667 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15668 // If original load is a SEXTLOAD then we can't simply replace it by a
15669 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
15670 // followed by a ZEXT, but that is not handled at the moment). Similarly if
15671 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
15672 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
15673 LN->getExtensionType() == ISD::ZEXTLOAD) &&
15674 LN->getExtensionType() != ExtType)
15675 return SDValue();
15676 } else if (Opc == ISD::AND) {
15677 // An AND with a constant mask is the same as a truncate + zero-extend.
15678 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
15679 if (!AndC)
15680 return SDValue();
15681
15682 const APInt &Mask = AndC->getAPIntValue();
15683 unsigned ActiveBits = 0;
15684 if (Mask.isMask()) {
15685 ActiveBits = Mask.countr_one();
15686 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
15687 ShiftedOffset = ShAmt;
15688 } else {
15689 return SDValue();
15690 }
15691
15692 ExtType = ISD::ZEXTLOAD;
15693 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15694 }
15695
15696 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
15697 // a right shift. Here we redo some of those checks, to possibly adjust the
15698 // ExtVT even further based on "a masking AND". We could also end up here for
15699 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
15700 // need to be done here as well.
15701 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
15702 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
15703 // Bail out when the SRL has more than one use. This is done for historical
15704 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
15705 // check below? And maybe it could be non-profitable to do the transform in
15706 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
15707 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
15708 if (!SRL.hasOneUse())
15709 return SDValue();
15710
15711 // Only handle shift with constant shift amount, and the shiftee must be a
15712 // load.
15713 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
15714 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
15715 if (!SRL1C || !LN)
15716 return SDValue();
15717
15718 // If the shift amount is larger than the input type then we're not
15719 // accessing any of the loaded bytes. If the load was a zextload/extload
15720 // then the result of the shift+trunc is zero/undef (handled elsewhere).
15721 ShAmt = SRL1C->getZExtValue();
15722 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
15723 if (ShAmt >= MemoryWidth)
15724 return SDValue();
15725
15726 // Because a SRL must be assumed to *need* to zero-extend the high bits
15727 // (as opposed to anyext the high bits), we can't combine the zextload
15728 // lowering of SRL and an sextload.
15729 if (LN->getExtensionType() == ISD::SEXTLOAD)
15730 return SDValue();
15731
15732 // Avoid reading outside the memory accessed by the original load (could
15733 // happened if we only adjust the load base pointer by ShAmt). Instead we
15734 // try to narrow the load even further. The typical scenario here is:
15735 // (i64 (truncate (i96 (srl (load x), 64)))) ->
15736 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
15737 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
15738 // Don't replace sextload by zextload.
15739 if (ExtType == ISD::SEXTLOAD)
15740 return SDValue();
15741 // Narrow the load.
15742 ExtType = ISD::ZEXTLOAD;
15743 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
15744 }
15745
15746 // If the SRL is only used by a masking AND, we may be able to adjust
15747 // the ExtVT to make the AND redundant.
15748 SDNode *Mask = *(SRL->user_begin());
15749 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
15750 isa<ConstantSDNode>(Mask->getOperand(1))) {
15751 unsigned Offset, ActiveBits;
15752 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
15753 if (ShiftMask.isMask()) {
15754 EVT MaskedVT =
15755 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
15756 // If the mask is smaller, recompute the type.
15757 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
15758 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
15759 ExtVT = MaskedVT;
15760 } else if (ExtType == ISD::ZEXTLOAD &&
15761 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
15762 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
15763 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
15764 // If the mask is shifted we can use a narrower load and a shl to insert
15765 // the trailing zeros.
15766 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
15767 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
15768 ExtVT = MaskedVT;
15769 ShAmt = Offset + ShAmt;
15770 ShiftedOffset = Offset;
15771 }
15772 }
15773 }
15774
15775 N0 = SRL.getOperand(0);
15776 }
15777
15778 // If the load is shifted left (and the result isn't shifted back right), we
15779 // can fold a truncate through the shift. The typical scenario is that N
15780 // points at a TRUNCATE here so the attempted fold is:
15781 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
15782 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
15783 unsigned ShLeftAmt = 0;
15784 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15785 ExtVT == VT && TLI.isNarrowingProfitable(N, N0.getValueType(), VT)) {
15786 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
15787 ShLeftAmt = N01->getZExtValue();
15788 N0 = N0.getOperand(0);
15789 }
15790 }
15791
15792 // If we haven't found a load, we can't narrow it.
15793 if (!isa<LoadSDNode>(N0))
15794 return SDValue();
15795
15796 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15797 // Reducing the width of a volatile load is illegal. For atomics, we may be
15798 // able to reduce the width provided we never widen again. (see D66309)
15799 if (!LN0->isSimple() ||
15800 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
15801 return SDValue();
15802
15803 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
15804 unsigned LVTStoreBits =
15806 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
15807 return LVTStoreBits - EVTStoreBits - ShAmt;
15808 };
15809
15810 // We need to adjust the pointer to the load by ShAmt bits in order to load
15811 // the correct bytes.
15812 unsigned PtrAdjustmentInBits =
15813 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
15814
15815 uint64_t PtrOff = PtrAdjustmentInBits / 8;
15816 SDLoc DL(LN0);
15817 // The original load itself didn't wrap, so an offset within it doesn't.
15818 SDValue NewPtr =
15821 AddToWorklist(NewPtr.getNode());
15822
15823 SDValue Load;
15824 if (ExtType == ISD::NON_EXTLOAD) {
15825 const MDNode *OldRanges = LN0->getRanges();
15826 const MDNode *NewRanges = nullptr;
15827 // If LSBs are loaded and the truncated ConstantRange for the OldRanges
15828 // metadata is not the full-set for the new width then create a NewRanges
15829 // metadata for the truncated load
15830 if (ShAmt == 0 && OldRanges) {
15831 ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
15832 unsigned BitSize = VT.getScalarSizeInBits();
15833
15834 // It is possible for an 8-bit extending load with 8-bit range
15835 // metadata to be narrowed to an 8-bit load. This guard is necessary to
15836 // ensure that truncation is strictly smaller.
15837 if (CR.getBitWidth() > BitSize) {
15838 ConstantRange TruncatedCR = CR.truncate(BitSize);
15839 if (!TruncatedCR.isFullSet()) {
15840 Metadata *Bounds[2] = {
15842 ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
15844 ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
15845 NewRanges = MDNode::get(*DAG.getContext(), Bounds);
15846 }
15847 } else if (CR.getBitWidth() == BitSize)
15848 NewRanges = OldRanges;
15849 }
15850 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
15851 LN0->getPointerInfo().getWithOffset(PtrOff),
15852 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15853 LN0->getAAInfo(), NewRanges);
15854 } else
15855 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
15856 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
15857 LN0->getBaseAlign(), LN0->getMemOperand()->getFlags(),
15858 LN0->getAAInfo());
15859
15860 // Replace the old load's chain with the new load's chain.
15861 WorklistRemover DeadNodes(*this);
15862 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15863
15864 // Shift the result left, if we've swallowed a left shift.
15866 if (ShLeftAmt != 0) {
15867 // If the shift amount is as large as the result size (but, presumably,
15868 // no larger than the source) then the useful bits of the result are
15869 // zero; we can't simply return the shortened shift, because the result
15870 // of that operation is undefined.
15871 if (ShLeftAmt >= VT.getScalarSizeInBits())
15872 Result = DAG.getConstant(0, DL, VT);
15873 else
15874 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
15875 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
15876 }
15877
15878 if (ShiftedOffset != 0) {
15879 // We're using a shifted mask, so the load now has an offset. This means
15880 // that data has been loaded into the lower bytes than it would have been
15881 // before, so we need to shl the loaded data into the correct position in the
15882 // register.
15883 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
15884 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
15885 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
15886 }
15887
15888 // Return the new loaded value.
15889 return Result;
15890}
15891
15892SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
15893 SDValue N0 = N->getOperand(0);
15894 SDValue N1 = N->getOperand(1);
15895 EVT VT = N->getValueType(0);
15896 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
15897 unsigned VTBits = VT.getScalarSizeInBits();
15898 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
15899 SDLoc DL(N);
15900
15901 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
15902 if (N0.isUndef())
15903 return DAG.getConstant(0, DL, VT);
15904
15905 // fold (sext_in_reg c1) -> c1
15906 if (SDValue C =
15908 return C;
15909
15910 // If the input is already sign extended, just drop the extension.
15911 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
15912 return N0;
15913
15914 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
15915 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
15916 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
15917 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N0.getOperand(0), N1);
15918
15919 // fold (sext_in_reg (sext x)) -> (sext x)
15920 // fold (sext_in_reg (aext x)) -> (sext x)
15921 // if x is small enough or if we know that x has more than 1 sign bit and the
15922 // sign_extend_inreg is extending from one of them.
15923 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
15924 SDValue N00 = N0.getOperand(0);
15925 unsigned N00Bits = N00.getScalarValueSizeInBits();
15926 if ((N00Bits <= ExtVTBits ||
15927 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
15928 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15929 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15930 }
15931
15932 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
15933 // if x is small enough or if we know that x has more than 1 sign bit and the
15934 // sign_extend_inreg is extending from one of them.
15936 SDValue N00 = N0.getOperand(0);
15937 unsigned N00Bits = N00.getScalarValueSizeInBits();
15938 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
15939 if ((N00Bits == ExtVTBits ||
15940 (!IsZext && (N00Bits < ExtVTBits ||
15941 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
15942 (!LegalOperations ||
15944 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, N00);
15945 }
15946
15947 // fold (sext_in_reg (zext x)) -> (sext x)
15948 // iff we are extending the source sign bit.
15949 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
15950 SDValue N00 = N0.getOperand(0);
15951 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
15952 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
15953 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N00);
15954 }
15955
15956 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
15957 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
15958 return DAG.getZeroExtendInReg(N0, DL, ExtVT);
15959
15960 // fold operands of sext_in_reg based on knowledge that the top bits are not
15961 // demanded.
15963 return SDValue(N, 0);
15964
15965 // fold (sext_in_reg (load x)) -> (smaller sextload x)
15966 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
15967 if (SDValue NarrowLoad = reduceLoadWidth(N))
15968 return NarrowLoad;
15969
15970 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
15971 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
15972 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
15973 if (N0.getOpcode() == ISD::SRL) {
15974 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
15975 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
15976 // We can turn this into an SRA iff the input to the SRL is already sign
15977 // extended enough.
15978 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
15979 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
15980 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
15981 N0.getOperand(1));
15982 }
15983 }
15984
15985 // fold (sext_inreg (extload x)) -> (sextload x)
15986 // If sextload is not supported by target, we can only do the combine when
15987 // load has one use. Doing otherwise can block folding the extload with other
15988 // extends that the target does support.
15990 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
15991 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
15992 N0.hasOneUse()) ||
15993 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
15994 auto *LN0 = cast<LoadSDNode>(N0);
15995 SDValue ExtLoad =
15996 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
15997 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
15998 CombineTo(N, ExtLoad);
15999 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
16000 AddToWorklist(ExtLoad.getNode());
16001 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16002 }
16003
16004 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
16006 N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
16007 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
16008 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
16009 auto *LN0 = cast<LoadSDNode>(N0);
16010 SDValue ExtLoad =
16011 DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
16012 LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
16013 CombineTo(N, ExtLoad);
16014 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
16015 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16016 }
16017
16018 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
16019 // ignore it if the masked load is already sign extended
16020 bool Frozen = N0.getOpcode() == ISD::FREEZE && N0.hasOneUse();
16021 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(Frozen ? N0.getOperand(0) : N0)) {
16022 if (ExtVT == Ld->getMemoryVT() && Ld->hasNUsesOfValue(1, 0) &&
16023 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
16024 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
16025 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
16026 VT, DL, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
16027 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
16028 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
16029 CombineTo(N, Frozen ? N0 : ExtMaskedLoad);
16030 CombineTo(Ld, ExtMaskedLoad, ExtMaskedLoad.getValue(1));
16031 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16032 }
16033 }
16034
16035 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
16036 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
16037 if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() &&
16039 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
16040 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
16041
16042 SDValue ExtLoad = DAG.getMaskedGather(
16043 DAG.getVTList(VT, MVT::Other), ExtVT, DL, Ops, GN0->getMemOperand(),
16044 GN0->getIndexType(), ISD::SEXTLOAD);
16045
16046 CombineTo(N, ExtLoad);
16047 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
16048 AddToWorklist(ExtLoad.getNode());
16049 return SDValue(N, 0); // Return N so it doesn't get rechecked!
16050 }
16051 }
16052
16053 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
16054 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
16055 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
16056 N0.getOperand(1), false))
16057 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, BSwap, N1);
16058 }
16059
16060 // Fold (iM_signext_inreg
16061 // (extract_subvector (zext|anyext|sext iN_v to _) _)
16062 // from iN)
16063 // -> (extract_subvector (signext iN_v to iM))
16064 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
16066 SDValue InnerExt = N0.getOperand(0);
16067 EVT InnerExtVT = InnerExt->getValueType(0);
16068 SDValue Extendee = InnerExt->getOperand(0);
16069
16070 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
16071 (!LegalOperations ||
16072 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
16073 SDValue SignExtExtendee =
16074 DAG.getNode(ISD::SIGN_EXTEND, DL, InnerExtVT, Extendee);
16075 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SignExtExtendee,
16076 N0.getOperand(1));
16077 }
16078 }
16079
16080 return SDValue();
16081}
16082
16084 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
16085 bool LegalOperations) {
16086 unsigned InregOpcode = N->getOpcode();
16087 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
16088
16089 SDValue Src = N->getOperand(0);
16090 EVT VT = N->getValueType(0);
16091 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
16092 Src.getValueType().getVectorElementType(),
16094
16095 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
16096 "Expected EXTEND_VECTOR_INREG dag node in input!");
16097
16098 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
16099 // FIXME: one-use check may be overly restrictive
16100 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
16101 return SDValue();
16102
16103 // Profitability check: we must be extending exactly one of it's operands.
16104 // FIXME: this is probably overly restrictive.
16105 Src = Src.getOperand(0);
16106 if (Src.getValueType() != SrcVT)
16107 return SDValue();
16108
16109 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
16110 return SDValue();
16111
16112 return DAG.getNode(Opcode, DL, VT, Src);
16113}
16114
16115SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
16116 SDValue N0 = N->getOperand(0);
16117 EVT VT = N->getValueType(0);
16118 SDLoc DL(N);
16119
16120 if (N0.isUndef()) {
16121 // aext_vector_inreg(undef) = undef because the top bits are undefined.
16122 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
16123 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
16124 ? DAG.getUNDEF(VT)
16125 : DAG.getConstant(0, DL, VT);
16126 }
16127
16128 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
16129 return Res;
16130
16132 return SDValue(N, 0);
16133
16135 LegalOperations))
16136 return R;
16137
16138 return SDValue();
16139}
16140
16141SDValue DAGCombiner::visitTRUNCATE_USAT_U(SDNode *N) {
16142 EVT VT = N->getValueType(0);
16143 SDValue N0 = N->getOperand(0);
16144
16145 SDValue FPVal;
16146 if (sd_match(N0, m_FPToUI(m_Value(FPVal))) &&
16148 ISD::FP_TO_UINT_SAT, FPVal.getValueType(), VT))
16149 return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), VT, FPVal,
16150 DAG.getValueType(VT.getScalarType()));
16151
16152 return SDValue();
16153}
16154
16155/// Detect patterns of truncation with unsigned saturation:
16156///
16157/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
16158/// Return the source value x to be truncated or SDValue() if the pattern was
16159/// not matched.
16160///
16162 unsigned NumDstBits = VT.getScalarSizeInBits();
16163 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16164 // Saturation with truncation. We truncate from InVT to VT.
16165 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16166
16167 SDValue Min;
16168 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16169 if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
16170 return Min;
16171
16172 return SDValue();
16173}
16174
16175/// Detect patterns of truncation with signed saturation:
16176/// (truncate (smin (smax (x, signed_min_of_dest_type),
16177/// signed_max_of_dest_type)) to dest_type)
16178/// or:
16179/// (truncate (smax (smin (x, signed_max_of_dest_type),
16180/// signed_min_of_dest_type)) to dest_type).
16181///
16182/// Return the source value to be truncated or SDValue() if the pattern was not
16183/// matched.
16185 unsigned NumDstBits = VT.getScalarSizeInBits();
16186 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16187 // Saturation with truncation. We truncate from InVT to VT.
16188 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16189
16190 SDValue Val;
16191 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16192 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16193
16194 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
16195 m_SpecificInt(SignedMax))))
16196 return Val;
16197
16198 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
16199 m_SpecificInt(SignedMin))))
16200 return Val;
16201
16202 return SDValue();
16203}
16204
16205/// Detect patterns of truncation with unsigned saturation:
16207 const SDLoc &DL) {
16208 unsigned NumDstBits = VT.getScalarSizeInBits();
16209 unsigned NumSrcBits = In.getScalarValueSizeInBits();
16210 // Saturation with truncation. We truncate from InVT to VT.
16211 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
16212
16213 SDValue Val;
16214 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
16215 // Min == 0, Max is unsigned max of destination type.
16216 if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
16217 m_Zero())))
16218 return Val;
16219
16220 if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
16221 m_SpecificInt(UnsignedMax))))
16222 return Val;
16223
16224 if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
16225 m_SpecificInt(UnsignedMax))))
16226 return Val;
16227
16228 return SDValue();
16229}
16230
16231static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
16232 SDLoc &DL, const TargetLowering &TLI,
16233 SelectionDAG &DAG) {
16234 auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
16235 return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
16236 TLI.isTypeDesirableForOp(Opc, VT));
16237 };
16238
16239 if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
16240 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
16241 if (SDValue SSatVal = detectSSatSPattern(Src, VT))
16242 return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
16243 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16244 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16245 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16246 } else if (Src.getOpcode() == ISD::UMIN) {
16247 if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
16248 if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
16249 return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
16250 if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
16251 if (SDValue USatVal = detectUSatUPattern(Src, VT))
16252 return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
16253 }
16254
16255 return SDValue();
16256}
16257
16258SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
16259 SDValue N0 = N->getOperand(0);
16260 EVT VT = N->getValueType(0);
16261 EVT SrcVT = N0.getValueType();
16262 bool isLE = DAG.getDataLayout().isLittleEndian();
16263 SDLoc DL(N);
16264
16265 // trunc(undef) = undef
16266 if (N0.isUndef())
16267 return DAG.getUNDEF(VT);
16268
16269 // fold (truncate (truncate x)) -> (truncate x)
16270 if (N0.getOpcode() == ISD::TRUNCATE)
16271 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16272
16273 // fold saturated truncate
16274 if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
16275 return SaturatedTR;
16276
16277 // fold (truncate c1) -> c1
16278 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
16279 return C;
16280
16281 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
16282 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
16283 N0.getOpcode() == ISD::SIGN_EXTEND ||
16284 N0.getOpcode() == ISD::ANY_EXTEND) {
16285 // if the source is smaller than the dest, we still need an extend.
16286 if (N0.getOperand(0).getValueType().bitsLT(VT)) {
16287 SDNodeFlags Flags;
16288 if (N0.getOpcode() == ISD::ZERO_EXTEND)
16289 Flags.setNonNeg(N0->getFlags().hasNonNeg());
16290 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
16291 }
16292 // if the source is larger than the dest, than we just need the truncate.
16293 if (N0.getOperand(0).getValueType().bitsGT(VT))
16294 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16295 // if the source and dest are the same type, we can drop both the extend
16296 // and the truncate.
16297 return N0.getOperand(0);
16298 }
16299
16300 // Try to narrow a truncate-of-sext_in_reg to the destination type:
16301 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
16302 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
16303 N0.hasOneUse()) {
16304 SDValue X = N0.getOperand(0);
16305 SDValue ExtVal = N0.getOperand(1);
16306 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
16307 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
16308 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
16309 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
16310 }
16311 }
16312
16313 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
16314 if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND))
16315 return SDValue();
16316
16317 // Fold extract-and-trunc into a narrow extract. For example:
16318 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
16319 // i32 y = TRUNCATE(i64 x)
16320 // -- becomes --
16321 // v16i8 b = BITCAST (v2i64 val)
16322 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
16323 //
16324 // Note: We only run this optimization after type legalization (which often
16325 // creates this pattern) and before operation legalization after which
16326 // we need to be more careful about the vector instructions that we generate.
16327 if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 &&
16328 N0->hasOneUse()) {
16329 EVT TrTy = N->getValueType(0);
16330 SDValue Src = N0;
16331
16332 // Check for cases where we shift down an upper element before truncation.
16333 int EltOffset = 0;
16334 if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) {
16335 if (auto ShAmt = DAG.getValidShiftAmount(Src)) {
16336 if ((*ShAmt % TrTy.getSizeInBits()) == 0) {
16337 Src = Src.getOperand(0);
16338 EltOffset = *ShAmt / TrTy.getSizeInBits();
16339 }
16340 }
16341 }
16342
16343 if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16344 EVT VecTy = Src.getOperand(0).getValueType();
16345 EVT ExTy = Src.getValueType();
16346
16347 auto EltCnt = VecTy.getVectorElementCount();
16348 unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits();
16349 auto NewEltCnt = EltCnt * SizeRatio;
16350
16351 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
16352 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
16353
16354 SDValue EltNo = Src->getOperand(1);
16355 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
16356 int Elt = EltNo->getAsZExtVal();
16357 int Index = isLE ? (Elt * SizeRatio + EltOffset)
16358 : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset);
16359 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
16360 DAG.getBitcast(NVT, Src.getOperand(0)),
16361 DAG.getVectorIdxConstant(Index, DL));
16362 }
16363 }
16364 }
16365
16366 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
16367 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse() &&
16368 TLI.isTruncateFree(SrcVT, VT)) {
16369 if (!LegalOperations ||
16370 (TLI.isOperationLegal(ISD::SELECT, SrcVT) &&
16371 TLI.isNarrowingProfitable(N0.getNode(), SrcVT, VT))) {
16372 SDLoc SL(N0);
16373 SDValue Cond = N0.getOperand(0);
16374 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
16375 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
16376 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
16377 }
16378 }
16379
16380 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
16381 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
16382 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
16383 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
16384 SDValue Amt = N0.getOperand(1);
16385 KnownBits Known = DAG.computeKnownBits(Amt);
16386 unsigned Size = VT.getScalarSizeInBits();
16387 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
16388 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
16389 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16390 if (AmtVT != Amt.getValueType()) {
16391 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
16392 AddToWorklist(Amt.getNode());
16393 }
16394 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
16395 }
16396 }
16397
16398 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
16399 return V;
16400
16401 if (SDValue ABD = foldABSToABD(N, DL))
16402 return ABD;
16403
16404 // Attempt to pre-truncate BUILD_VECTOR sources.
16405 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
16406 N0.hasOneUse() &&
16407 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
16408 // Avoid creating illegal types if running after type legalizer.
16409 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
16410 EVT SVT = VT.getScalarType();
16411 SmallVector<SDValue, 8> TruncOps;
16412 for (const SDValue &Op : N0->op_values()) {
16413 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
16414 TruncOps.push_back(TruncOp);
16415 }
16416 return DAG.getBuildVector(VT, DL, TruncOps);
16417 }
16418
16419 // trunc (splat_vector x) -> splat_vector (trunc x)
16420 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
16421 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
16422 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
16423 EVT SVT = VT.getScalarType();
16424 return DAG.getSplatVector(
16425 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
16426 }
16427
16428 // Fold a series of buildvector, bitcast, and truncate if possible.
16429 // For example fold
16430 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
16431 // (2xi32 (buildvector x, y)).
16432 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
16433 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
16435 N0.getOperand(0).hasOneUse()) {
16436 SDValue BuildVect = N0.getOperand(0);
16437 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
16438 EVT TruncVecEltTy = VT.getVectorElementType();
16439
16440 // Check that the element types match.
16441 if (BuildVectEltTy == TruncVecEltTy) {
16442 // Now we only need to compute the offset of the truncated elements.
16443 unsigned BuildVecNumElts = BuildVect.getNumOperands();
16444 unsigned TruncVecNumElts = VT.getVectorNumElements();
16445 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
16446 unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
16447
16448 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
16449 "Invalid number of elements");
16450
16452 for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
16453 i += TruncEltOffset)
16454 Opnds.push_back(BuildVect.getOperand(i));
16455
16456 return DAG.getBuildVector(VT, DL, Opnds);
16457 }
16458 }
16459
16460 // fold (truncate (load x)) -> (smaller load x)
16461 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
16462 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
16463 if (SDValue Reduced = reduceLoadWidth(N))
16464 return Reduced;
16465
16466 // Handle the case where the truncated result is at least as wide as the
16467 // loaded type.
16468 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
16469 auto *LN0 = cast<LoadSDNode>(N0);
16470 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
16471 SDValue NewLoad = DAG.getExtLoad(
16472 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
16473 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
16474 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
16475 return NewLoad;
16476 }
16477 }
16478 }
16479
16480 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
16481 // where ... are all 'undef'.
16482 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
16484 SDValue V;
16485 unsigned Idx = 0;
16486 unsigned NumDefs = 0;
16487
16488 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
16489 SDValue X = N0.getOperand(i);
16490 if (!X.isUndef()) {
16491 V = X;
16492 Idx = i;
16493 NumDefs++;
16494 }
16495 // Stop if more than one members are non-undef.
16496 if (NumDefs > 1)
16497 break;
16498
16501 X.getValueType().getVectorElementCount()));
16502 }
16503
16504 if (NumDefs == 0)
16505 return DAG.getUNDEF(VT);
16506
16507 if (NumDefs == 1) {
16508 assert(V.getNode() && "The single defined operand is empty!");
16510 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
16511 if (i != Idx) {
16512 Opnds.push_back(DAG.getUNDEF(VTs[i]));
16513 continue;
16514 }
16515 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
16516 AddToWorklist(NV.getNode());
16517 Opnds.push_back(NV);
16518 }
16519 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
16520 }
16521 }
16522
16523 // Fold truncate of a bitcast of a vector to an extract of the low vector
16524 // element.
16525 //
16526 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
16527 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
16528 SDValue VecSrc = N0.getOperand(0);
16529 EVT VecSrcVT = VecSrc.getValueType();
16530 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
16531 (!LegalOperations ||
16532 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
16533 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
16534 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
16535 DAG.getVectorIdxConstant(Idx, DL));
16536 }
16537 }
16538
16539 // Simplify the operands using demanded-bits information.
16541 return SDValue(N, 0);
16542
16543 // fold (truncate (extract_subvector(ext x))) ->
16544 // (extract_subvector x)
16545 // TODO: This can be generalized to cover cases where the truncate and extract
16546 // do not fully cancel each other out.
16547 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
16548 SDValue N00 = N0.getOperand(0);
16549 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
16550 N00.getOpcode() == ISD::ZERO_EXTEND ||
16551 N00.getOpcode() == ISD::ANY_EXTEND) {
16552 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
16554 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
16555 N00.getOperand(0), N0.getOperand(1));
16556 }
16557 }
16558
16559 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
16560 return NewVSel;
16561
16562 // Narrow a suitable binary operation with a non-opaque constant operand by
16563 // moving it ahead of the truncate. This is limited to pre-legalization
16564 // because targets may prefer a wider type during later combines and invert
16565 // this transform.
16566 switch (N0.getOpcode()) {
16567 case ISD::ADD:
16568 case ISD::SUB:
16569 case ISD::MUL:
16570 case ISD::AND:
16571 case ISD::OR:
16572 case ISD::XOR:
16573 if (!LegalOperations && N0.hasOneUse() &&
16574 (N0.getOperand(0) == N0.getOperand(1) ||
16576 isConstantOrConstantVector(N0.getOperand(1), true))) {
16577 // TODO: We already restricted this to pre-legalization, but for vectors
16578 // we are extra cautious to not create an unsupported operation.
16579 // Target-specific changes are likely needed to avoid regressions here.
16580 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
16581 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16582 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16583 SDNodeFlags Flags;
16584 // Propagate nuw for sub.
16585 if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
16587 N0->getOperand(0),
16589 VT.getScalarSizeInBits())))
16590 Flags.setNoUnsignedWrap(true);
16591 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
16592 }
16593 }
16594 break;
16595 case ISD::ADDE:
16596 case ISD::UADDO_CARRY:
16597 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
16598 // (trunc uaddo_carry(X, Y, Carry)) ->
16599 // (uaddo_carry trunc(X), trunc(Y), Carry)
16600 // When the adde's carry is not used.
16601 // We only do for uaddo_carry before legalize operation
16602 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
16603 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
16604 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
16605 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
16606 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
16607 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
16608 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
16609 }
16610 break;
16611 case ISD::USUBSAT:
16612 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
16613 // enough to know that the upper bits are zero we must ensure that we don't
16614 // introduce an extra truncate.
16615 if (!LegalOperations && N0.hasOneUse() &&
16618 VT.getScalarSizeInBits() &&
16619 hasOperation(N0.getOpcode(), VT)) {
16620 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
16621 DAG, DL);
16622 }
16623 break;
16624 case ISD::AVGCEILS:
16625 case ISD::AVGCEILU:
16626 // trunc (avgceilu (sext (x), sext (y))) -> avgceils(x, y)
16627 // trunc (avgceils (zext (x), zext (y))) -> avgceilu(x, y)
16628 if (N0.hasOneUse()) {
16629 SDValue Op0 = N0.getOperand(0);
16630 SDValue Op1 = N0.getOperand(1);
16631 if (N0.getOpcode() == ISD::AVGCEILU) {
16633 Op0.getOpcode() == ISD::SIGN_EXTEND &&
16634 Op1.getOpcode() == ISD::SIGN_EXTEND &&
16635 Op0.getOperand(0).getValueType() == VT &&
16636 Op1.getOperand(0).getValueType() == VT)
16637 return DAG.getNode(ISD::AVGCEILS, DL, VT, Op0.getOperand(0),
16638 Op1.getOperand(0));
16639 } else {
16641 Op0.getOpcode() == ISD::ZERO_EXTEND &&
16642 Op1.getOpcode() == ISD::ZERO_EXTEND &&
16643 Op0.getOperand(0).getValueType() == VT &&
16644 Op1.getOperand(0).getValueType() == VT)
16645 return DAG.getNode(ISD::AVGCEILU, DL, VT, Op0.getOperand(0),
16646 Op1.getOperand(0));
16647 }
16648 }
16649 [[fallthrough]];
16650 case ISD::AVGFLOORS:
16651 case ISD::AVGFLOORU:
16652 case ISD::ABDS:
16653 case ISD::ABDU:
16654 // (trunc (avg a, b)) -> (avg (trunc a), (trunc b))
16655 // (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b))
16656 if (!LegalOperations && N0.hasOneUse() &&
16657 TLI.isOperationLegal(N0.getOpcode(), VT)) {
16658 EVT TruncVT = VT;
16659 unsigned SrcBits = SrcVT.getScalarSizeInBits();
16660 unsigned TruncBits = TruncVT.getScalarSizeInBits();
16661
16662 SDValue A = N0.getOperand(0);
16663 SDValue B = N0.getOperand(1);
16664 bool CanFold = false;
16665
16666 if (N0.getOpcode() == ISD::AVGFLOORU || N0.getOpcode() == ISD::AVGCEILU ||
16667 N0.getOpcode() == ISD::ABDU) {
16668 APInt UpperBits = APInt::getBitsSetFrom(SrcBits, TruncBits);
16669 CanFold = DAG.MaskedValueIsZero(B, UpperBits) &&
16670 DAG.MaskedValueIsZero(A, UpperBits);
16671 } else {
16672 unsigned NeededBits = SrcBits - TruncBits;
16673 CanFold = DAG.ComputeNumSignBits(B) > NeededBits &&
16674 DAG.ComputeNumSignBits(A) > NeededBits;
16675 }
16676
16677 if (CanFold) {
16678 SDValue NewA = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, A);
16679 SDValue NewB = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, B);
16680 return DAG.getNode(N0.getOpcode(), DL, TruncVT, NewA, NewB);
16681 }
16682 }
16683 break;
16684 }
16685
16686 return SDValue();
16687}
16688
16689static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
16690 SDValue Elt = N->getOperand(i);
16691 if (Elt.getOpcode() != ISD::MERGE_VALUES)
16692 return Elt.getNode();
16693 return Elt.getOperand(Elt.getResNo()).getNode();
16694}
16695
16696/// build_pair (load, load) -> load
16697/// if load locations are consecutive.
16698SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
16699 assert(N->getOpcode() == ISD::BUILD_PAIR);
16700
16701 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
16702 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
16703
16704 // A BUILD_PAIR is always having the least significant part in elt 0 and the
16705 // most significant part in elt 1. So when combining into one large load, we
16706 // need to consider the endianness.
16707 if (DAG.getDataLayout().isBigEndian())
16708 std::swap(LD1, LD2);
16709
16710 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
16711 !LD1->hasOneUse() || !LD2->hasOneUse() ||
16712 LD1->getAddressSpace() != LD2->getAddressSpace())
16713 return SDValue();
16714
16715 unsigned LD1Fast = 0;
16716 EVT LD1VT = LD1->getValueType(0);
16717 unsigned LD1Bytes = LD1VT.getStoreSize();
16718 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
16719 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
16720 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16721 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
16722 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
16723 LD1->getPointerInfo(), LD1->getAlign());
16724
16725 return SDValue();
16726}
16727
16728static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
16729 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
16730 // and Lo parts; on big-endian machines it doesn't.
16731 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
16732}
16733
16734SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
16735 const TargetLowering &TLI) {
16736 // If this is not a bitcast to an FP type or if the target doesn't have
16737 // IEEE754-compliant FP logic, we're done.
16738 EVT VT = N->getValueType(0);
16739 SDValue N0 = N->getOperand(0);
16740 EVT SourceVT = N0.getValueType();
16741
16742 if (!VT.isFloatingPoint())
16743 return SDValue();
16744
16745 // TODO: Handle cases where the integer constant is a different scalar
16746 // bitwidth to the FP.
16747 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
16748 return SDValue();
16749
16750 unsigned FPOpcode;
16751 APInt SignMask;
16752 switch (N0.getOpcode()) {
16753 case ISD::AND:
16754 FPOpcode = ISD::FABS;
16755 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
16756 break;
16757 case ISD::XOR:
16758 FPOpcode = ISD::FNEG;
16759 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16760 break;
16761 case ISD::OR:
16762 FPOpcode = ISD::FABS;
16763 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
16764 break;
16765 default:
16766 return SDValue();
16767 }
16768
16769 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
16770 return SDValue();
16771
16772 // This needs to be the inverse of logic in foldSignChangeInBitcast.
16773 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
16774 // removing this would require more changes.
16775 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
16776 if (sd_match(Op, m_BitCast(m_SpecificVT(VT))))
16777 return true;
16778
16779 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
16780 };
16781
16782 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
16783 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
16784 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
16785 // fneg (fabs X)
16786 SDValue LogicOp0 = N0.getOperand(0);
16787 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
16788 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
16789 IsBitCastOrFree(LogicOp0, VT)) {
16790 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
16791 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
16792 NumFPLogicOpsConv++;
16793 if (N0.getOpcode() == ISD::OR)
16794 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
16795 return FPOp;
16796 }
16797
16798 return SDValue();
16799}
16800
16801SDValue DAGCombiner::visitBITCAST(SDNode *N) {
16802 SDValue N0 = N->getOperand(0);
16803 EVT VT = N->getValueType(0);
16804
16805 if (N0.isUndef())
16806 return DAG.getUNDEF(VT);
16807
16808 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
16809 // Only do this before legalize types, unless both types are integer and the
16810 // scalar type is legal. Only do this before legalize ops, since the target
16811 // maybe depending on the bitcast.
16812 // First check to see if this is all constant.
16813 // TODO: Support FP bitcasts after legalize types.
16814 if (VT.isVector() &&
16815 (!LegalTypes ||
16816 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
16817 TLI.isTypeLegal(VT.getVectorElementType()))) &&
16818 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
16819 cast<BuildVectorSDNode>(N0)->isConstant())
16820 return DAG.FoldConstantBuildVector(cast<BuildVectorSDNode>(N0), SDLoc(N),
16822
16823 // If the input is a constant, let getNode fold it.
16824 if (isIntOrFPConstant(N0)) {
16825 // If we can't allow illegal operations, we need to check that this is just
16826 // a fp -> int or int -> conversion and that the resulting operation will
16827 // be legal.
16828 if (!LegalOperations ||
16829 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
16831 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
16832 TLI.isOperationLegal(ISD::Constant, VT))) {
16833 SDValue C = DAG.getBitcast(VT, N0);
16834 if (C.getNode() != N)
16835 return C;
16836 }
16837 }
16838
16839 // (conv (conv x, t1), t2) -> (conv x, t2)
16840 if (N0.getOpcode() == ISD::BITCAST)
16841 return DAG.getBitcast(VT, N0.getOperand(0));
16842
16843 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
16844 // iff the current bitwise logicop type isn't legal
16845 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
16846 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
16847 auto IsFreeBitcast = [VT](SDValue V) {
16848 return (V.getOpcode() == ISD::BITCAST &&
16849 V.getOperand(0).getValueType() == VT) ||
16851 V->hasOneUse());
16852 };
16853 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
16854 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
16855 DAG.getBitcast(VT, N0.getOperand(0)),
16856 DAG.getBitcast(VT, N0.getOperand(1)));
16857 }
16858
16859 // fold (conv (load x)) -> (load (conv*)x)
16860 // fold (conv (freeze (load x))) -> (freeze (load (conv*)x))
16861 // If the resultant load doesn't need a higher alignment than the original!
16862 auto CastLoad = [this, &VT](SDValue N0, const SDLoc &DL) {
16863 if (!ISD::isNormalLoad(N0.getNode()) || !N0.hasOneUse())
16864 return SDValue();
16865
16866 // Do not remove the cast if the types differ in endian layout.
16869 return SDValue();
16870
16871 // If the load is volatile, we only want to change the load type if the
16872 // resulting load is legal. Otherwise we might increase the number of
16873 // memory accesses. We don't care if the original type was legal or not
16874 // as we assume software couldn't rely on the number of accesses of an
16875 // illegal type.
16876 auto *LN0 = cast<LoadSDNode>(N0);
16877 if ((LegalOperations || !LN0->isSimple()) &&
16878 !TLI.isOperationLegal(ISD::LOAD, VT))
16879 return SDValue();
16880
16881 if (!TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
16882 *LN0->getMemOperand()))
16883 return SDValue();
16884
16885 // If the range metadata type does not match the new memory
16886 // operation type, remove the range metadata.
16887 if (const MDNode *MD = LN0->getRanges()) {
16888 ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
16889 if (Lower->getBitWidth() != VT.getScalarSizeInBits() || !VT.isInteger()) {
16890 LN0->getMemOperand()->clearRanges();
16891 }
16892 }
16893 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
16894 LN0->getMemOperand());
16895 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
16896 return Load;
16897 };
16898
16899 if (SDValue NewLd = CastLoad(N0, SDLoc(N)))
16900 return NewLd;
16901
16902 if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse())
16903 if (SDValue NewLd = CastLoad(N0.getOperand(0), SDLoc(N)))
16904 return DAG.getFreeze(NewLd);
16905
16906 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
16907 return V;
16908
16909 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16910 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16911 //
16912 // For ppc_fp128:
16913 // fold (bitcast (fneg x)) ->
16914 // flipbit = signbit
16915 // (xor (bitcast x) (build_pair flipbit, flipbit))
16916 //
16917 // fold (bitcast (fabs x)) ->
16918 // flipbit = (and (extract_element (bitcast x), 0), signbit)
16919 // (xor (bitcast x) (build_pair flipbit, flipbit))
16920 // This often reduces constant pool loads.
16921 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
16922 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
16923 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
16924 !N0.getValueType().isVector()) {
16925 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
16926 AddToWorklist(NewConv.getNode());
16927
16928 SDLoc DL(N);
16929 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
16930 assert(VT.getSizeInBits() == 128);
16931 SDValue SignBit = DAG.getConstant(
16932 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
16933 SDValue FlipBit;
16934 if (N0.getOpcode() == ISD::FNEG) {
16935 FlipBit = SignBit;
16936 AddToWorklist(FlipBit.getNode());
16937 } else {
16938 assert(N0.getOpcode() == ISD::FABS);
16939 SDValue Hi =
16940 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
16942 SDLoc(NewConv)));
16943 AddToWorklist(Hi.getNode());
16944 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
16945 AddToWorklist(FlipBit.getNode());
16946 }
16947 SDValue FlipBits =
16948 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
16949 AddToWorklist(FlipBits.getNode());
16950 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
16951 }
16952 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
16953 if (N0.getOpcode() == ISD::FNEG)
16954 return DAG.getNode(ISD::XOR, DL, VT,
16955 NewConv, DAG.getConstant(SignBit, DL, VT));
16956 assert(N0.getOpcode() == ISD::FABS);
16957 return DAG.getNode(ISD::AND, DL, VT,
16958 NewConv, DAG.getConstant(~SignBit, DL, VT));
16959 }
16960
16961 // fold (bitconvert (fcopysign cst, x)) ->
16962 // (or (and (bitconvert x), sign), (and cst, (not sign)))
16963 // Note that we don't handle (copysign x, cst) because this can always be
16964 // folded to an fneg or fabs.
16965 //
16966 // For ppc_fp128:
16967 // fold (bitcast (fcopysign cst, x)) ->
16968 // flipbit = (and (extract_element
16969 // (xor (bitcast cst), (bitcast x)), 0),
16970 // signbit)
16971 // (xor (bitcast cst) (build_pair flipbit, flipbit))
16972 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
16974 !VT.isVector()) {
16975 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
16976 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
16977 if (isTypeLegal(IntXVT)) {
16978 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
16979 AddToWorklist(X.getNode());
16980
16981 // If X has a different width than the result/lhs, sext it or truncate it.
16982 unsigned VTWidth = VT.getSizeInBits();
16983 if (OrigXWidth < VTWidth) {
16984 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
16985 AddToWorklist(X.getNode());
16986 } else if (OrigXWidth > VTWidth) {
16987 // To get the sign bit in the right place, we have to shift it right
16988 // before truncating.
16989 SDLoc DL(X);
16990 X = DAG.getNode(ISD::SRL, DL,
16991 X.getValueType(), X,
16992 DAG.getConstant(OrigXWidth-VTWidth, DL,
16993 X.getValueType()));
16994 AddToWorklist(X.getNode());
16995 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
16996 AddToWorklist(X.getNode());
16997 }
16998
16999 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
17000 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
17001 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
17002 AddToWorklist(Cst.getNode());
17003 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
17004 AddToWorklist(X.getNode());
17005 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
17006 AddToWorklist(XorResult.getNode());
17007 SDValue XorResult64 = DAG.getNode(
17008 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
17010 SDLoc(XorResult)));
17011 AddToWorklist(XorResult64.getNode());
17012 SDValue FlipBit =
17013 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
17014 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
17015 AddToWorklist(FlipBit.getNode());
17016 SDValue FlipBits =
17017 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
17018 AddToWorklist(FlipBits.getNode());
17019 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
17020 }
17021 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
17022 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
17023 X, DAG.getConstant(SignBit, SDLoc(X), VT));
17024 AddToWorklist(X.getNode());
17025
17026 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
17027 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
17028 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
17029 AddToWorklist(Cst.getNode());
17030
17031 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
17032 }
17033 }
17034
17035 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
17036 if (N0.getOpcode() == ISD::BUILD_PAIR)
17037 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
17038 return CombineLD;
17039
17040 // int_vt (bitcast (vec_vt (scalar_to_vector elt_vt:x)))
17041 // => int_vt (any_extend elt_vt:x)
17042 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isScalarInteger()) {
17043 SDValue SrcScalar = N0.getOperand(0);
17044 if (SrcScalar.getValueType().isScalarInteger())
17045 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SrcScalar);
17046 }
17047
17048 // Remove double bitcasts from shuffles - this is often a legacy of
17049 // XformToShuffleWithZero being used to combine bitmaskings (of
17050 // float vectors bitcast to integer vectors) into shuffles.
17051 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
17052 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
17053 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
17056 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
17057
17058 // If operands are a bitcast, peek through if it casts the original VT.
17059 // If operands are a constant, just bitcast back to original VT.
17060 auto PeekThroughBitcast = [&](SDValue Op) {
17061 if (Op.getOpcode() == ISD::BITCAST &&
17062 Op.getOperand(0).getValueType() == VT)
17063 return SDValue(Op.getOperand(0));
17064 if (Op.isUndef() || isAnyConstantBuildVector(Op))
17065 return DAG.getBitcast(VT, Op);
17066 return SDValue();
17067 };
17068
17069 // FIXME: If either input vector is bitcast, try to convert the shuffle to
17070 // the result type of this bitcast. This would eliminate at least one
17071 // bitcast. See the transform in InstCombine.
17072 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
17073 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
17074 if (!(SV0 && SV1))
17075 return SDValue();
17076
17077 int MaskScale =
17079 SmallVector<int, 8> NewMask;
17080 for (int M : SVN->getMask())
17081 for (int i = 0; i != MaskScale; ++i)
17082 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
17083
17084 SDValue LegalShuffle =
17085 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
17086 if (LegalShuffle)
17087 return LegalShuffle;
17088 }
17089
17090 return SDValue();
17091}
17092
17093SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
17094 EVT VT = N->getValueType(0);
17095 return CombineConsecutiveLoads(N, VT);
17096}
17097
17098SDValue DAGCombiner::visitFREEZE(SDNode *N) {
17099 SDValue N0 = N->getOperand(0);
17100
17101 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
17102 return N0;
17103
17104 // If we have frozen and unfrozen users of N0, update so everything uses N.
17105 if (!N0.isUndef() && !N0.hasOneUse()) {
17106 SDValue FrozenN0(N, 0);
17107 // Unfreeze all uses of N to avoid double deleting N from the CSE map.
17108 DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
17109 DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
17110 // ReplaceAllUsesOfValueWith will have also updated the use in N, thus
17111 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
17112 assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG");
17113 DAG.UpdateNodeOperands(N, N0);
17114 return FrozenN0;
17115 }
17116
17117 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
17118 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
17119 // example https://reviews.llvm.org/D136529#4120959.
17120 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
17121 return SDValue();
17122
17123 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
17124 // Try to push freeze through instructions that propagate but don't produce
17125 // poison as far as possible. If an operand of freeze follows three
17126 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
17127 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
17128 // the freeze through to the operands that are not guaranteed non-poison.
17129 // NOTE: we will strip poison-generating flags, so ignore them here.
17130 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
17131 /*ConsiderFlags*/ false) ||
17132 N0->getNumValues() != 1 || !N0->hasOneUse())
17133 return SDValue();
17134
17135 // TOOD: we should always allow multiple operands, however this increases the
17136 // likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
17137 // below causing later nodes that share frozen operands to fold again and no
17138 // longer being able to confirm other operands are not poison due to recursion
17139 // depth limits on isGuaranteedNotToBeUndefOrPoison.
17140 bool AllowMultipleMaybePoisonOperands =
17141 N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
17142 N0.getOpcode() == ISD::BUILD_VECTOR ||
17144 N0.getOpcode() == ISD::BUILD_PAIR ||
17147
17148 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
17149 // ones" or "constant" into something that depends on FrozenUndef. We can
17150 // instead pick undef values to keep those properties, while at the same time
17151 // folding away the freeze.
17152 // If we implement a more general solution for folding away freeze(undef) in
17153 // the future, then this special handling can be removed.
17154 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
17155 SDLoc DL(N0);
17156 EVT VT = N0.getValueType();
17158 return DAG.getAllOnesConstant(DL, VT);
17161 for (const SDValue &Op : N0->op_values())
17162 NewVecC.push_back(
17163 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
17164 return DAG.getBuildVector(VT, DL, NewVecC);
17165 }
17166 }
17167
17168 SmallSet<SDValue, 8> MaybePoisonOperands;
17169 SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
17170 for (auto [OpNo, Op] : enumerate(N0->ops())) {
17171 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false))
17172 continue;
17173 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
17174 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
17175 if (IsNewMaybePoisonOperand)
17176 MaybePoisonOperandNumbers.push_back(OpNo);
17177 if (!HadMaybePoisonOperands)
17178 continue;
17179 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
17180 // Multiple maybe-poison ops when not allowed - bail out.
17181 return SDValue();
17182 }
17183 }
17184 // NOTE: the whole op may be not guaranteed to not be undef or poison because
17185 // it could create undef or poison due to it's poison-generating flags.
17186 // So not finding any maybe-poison operands is fine.
17187
17188 for (unsigned OpNo : MaybePoisonOperandNumbers) {
17189 // N0 can mutate during iteration, so make sure to refetch the maybe poison
17190 // operands via the operand numbers. The typical scenario is that we have
17191 // something like this
17192 // t262: i32 = freeze t181
17193 // t150: i32 = ctlz_zero_undef t262
17194 // t184: i32 = ctlz_zero_undef t181
17195 // t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
17196 // When freezing the t181 operand we get t262 back, and then the
17197 // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
17198 // also recursively replace t184 by t150.
17199 SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
17200 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
17201 if (MaybePoisonOperand.isUndef())
17202 continue;
17203 // First, freeze each offending operand.
17204 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
17205 // Then, change all other uses of unfrozen operand to use frozen operand.
17206 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
17207 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
17208 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
17209 // But, that also updated the use in the freeze we just created, thus
17210 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
17211 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
17212 MaybePoisonOperand);
17213 }
17214
17215 // This node has been merged with another.
17216 if (N->getOpcode() == ISD::DELETED_NODE)
17217 return SDValue(N, 0);
17218 }
17219
17220 assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted!");
17221
17222 // The whole node may have been updated, so the value we were holding
17223 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
17224 N0 = N->getOperand(0);
17225
17226 // Finally, recreate the node, it's operands were updated to use
17227 // frozen operands, so we just need to use it's "original" operands.
17229 // TODO: ISD::UNDEF and ISD::POISON should get separate handling, but best
17230 // leave for a future patch.
17231 for (SDValue &Op : Ops) {
17232 if (Op.isUndef())
17233 Op = DAG.getFreeze(Op);
17234 }
17235
17236 SDLoc DL(N0);
17237
17238 // Special case handling for ShuffleVectorSDNode nodes.
17239 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0))
17240 return DAG.getVectorShuffle(N0.getValueType(), DL, Ops[0], Ops[1],
17241 SVN->getMask());
17242
17243 // NOTE: this strips poison generating flags.
17244 // Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
17245 // ninf, nsz, or fast.
17246 // However, contract, reassoc, afn, and arcp should be preserved,
17247 // as these fast-math flags do not introduce poison values.
17248 SDNodeFlags SrcFlags = N0->getFlags();
17249 SDNodeFlags SafeFlags;
17250 SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
17251 SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
17252 SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
17253 SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
17254 return DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), Ops, SafeFlags);
17255}
17256
17257// Returns true if floating point contraction is allowed on the FMUL-SDValue
17258// `N`
17260 assert(N.getOpcode() == ISD::FMUL);
17261
17262 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
17263 N->getFlags().hasAllowContract();
17264}
17265
17266/// Try to perform FMA combining on a given FADD node.
17267template <class MatchContextClass>
17268SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
17269 SDValue N0 = N->getOperand(0);
17270 SDValue N1 = N->getOperand(1);
17271 EVT VT = N->getValueType(0);
17272 SDLoc SL(N);
17273 MatchContextClass matcher(DAG, TLI, N);
17274 const TargetOptions &Options = DAG.getTarget().Options;
17275
17276 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17277
17278 // Floating-point multiply-add with intermediate rounding.
17279 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17280 // FIXME: Add VP_FMAD opcode.
17281 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17282
17283 // Floating-point multiply-add without intermediate rounding.
17284 bool HasFMA =
17285 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17287
17288 // No valid opcode, do not combine.
17289 if (!HasFMAD && !HasFMA)
17290 return SDValue();
17291
17292 bool AllowFusionGlobally =
17293 Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
17294 // If the addition is not contractable, do not combine.
17295 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17296 return SDValue();
17297
17298 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
17299 // beneficial. It does not reduce latency. It increases register pressure. It
17300 // replaces an fadd with an fma which is a more complex instruction, so is
17301 // likely to have a larger encoding, use more functional units, etc.
17302 if (N0 == N1)
17303 return SDValue();
17304
17305 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17306 return SDValue();
17307
17308 // Always prefer FMAD to FMA for precision.
17309 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17311
17312 auto isFusedOp = [&](SDValue N) {
17313 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17314 };
17315
17316 // Is the node an FMUL and contractable either due to global flags or
17317 // SDNodeFlags.
17318 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17319 if (!matcher.match(N, ISD::FMUL))
17320 return false;
17321 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17322 };
17323 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
17324 // prefer to fold the multiply with fewer uses.
17326 if (N0->use_size() > N1->use_size())
17327 std::swap(N0, N1);
17328 }
17329
17330 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
17331 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
17332 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
17333 N0.getOperand(1), N1);
17334 }
17335
17336 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
17337 // Note: Commutes FADD operands.
17338 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
17339 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
17340 N1.getOperand(1), N0);
17341 }
17342
17343 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
17344 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
17345 // This also works with nested fma instructions:
17346 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
17347 // fma A, B, (fma C, D, fma (E, F, G))
17348 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
17349 // fma A, B, (fma C, D, fma (E, F, G)).
17350 // This requires reassociation because it changes the order of operations.
17351 bool CanReassociate = N->getFlags().hasAllowReassociation();
17352 if (CanReassociate) {
17353 SDValue FMA, E;
17354 if (isFusedOp(N0) && N0.hasOneUse()) {
17355 FMA = N0;
17356 E = N1;
17357 } else if (isFusedOp(N1) && N1.hasOneUse()) {
17358 FMA = N1;
17359 E = N0;
17360 }
17361
17362 SDValue TmpFMA = FMA;
17363 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
17364 SDValue FMul = TmpFMA->getOperand(2);
17365 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
17366 SDValue C = FMul.getOperand(0);
17367 SDValue D = FMul.getOperand(1);
17368 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
17370 // Replacing the inner FMul could cause the outer FMA to be simplified
17371 // away.
17372 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
17373 }
17374
17375 TmpFMA = TmpFMA->getOperand(2);
17376 }
17377 }
17378
17379 // Look through FP_EXTEND nodes to do more combining.
17380
17381 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
17382 if (matcher.match(N0, ISD::FP_EXTEND)) {
17383 SDValue N00 = N0.getOperand(0);
17384 if (isContractableFMUL(N00) &&
17385 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17386 N00.getValueType())) {
17387 return matcher.getNode(
17388 PreferredFusedOpcode, SL, VT,
17389 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17390 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
17391 }
17392 }
17393
17394 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
17395 // Note: Commutes FADD operands.
17396 if (matcher.match(N1, ISD::FP_EXTEND)) {
17397 SDValue N10 = N1.getOperand(0);
17398 if (isContractableFMUL(N10) &&
17399 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17400 N10.getValueType())) {
17401 return matcher.getNode(
17402 PreferredFusedOpcode, SL, VT,
17403 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
17404 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17405 }
17406 }
17407
17408 // More folding opportunities when target permits.
17409 if (Aggressive) {
17410 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
17411 // -> (fma x, y, (fma (fpext u), (fpext v), z))
17412 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17413 SDValue Z) {
17414 return matcher.getNode(
17415 PreferredFusedOpcode, SL, VT, X, Y,
17416 matcher.getNode(PreferredFusedOpcode, SL, VT,
17417 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17418 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17419 };
17420 if (isFusedOp(N0)) {
17421 SDValue N02 = N0.getOperand(2);
17422 if (matcher.match(N02, ISD::FP_EXTEND)) {
17423 SDValue N020 = N02.getOperand(0);
17424 if (isContractableFMUL(N020) &&
17425 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17426 N020.getValueType())) {
17427 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
17428 N020.getOperand(0), N020.getOperand(1),
17429 N1);
17430 }
17431 }
17432 }
17433
17434 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
17435 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
17436 // FIXME: This turns two single-precision and one double-precision
17437 // operation into two double-precision operations, which might not be
17438 // interesting for all targets, especially GPUs.
17439 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
17440 SDValue Z) {
17441 return matcher.getNode(
17442 PreferredFusedOpcode, SL, VT,
17443 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
17444 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
17445 matcher.getNode(PreferredFusedOpcode, SL, VT,
17446 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
17447 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
17448 };
17449 if (N0.getOpcode() == ISD::FP_EXTEND) {
17450 SDValue N00 = N0.getOperand(0);
17451 if (isFusedOp(N00)) {
17452 SDValue N002 = N00.getOperand(2);
17453 if (isContractableFMUL(N002) &&
17454 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17455 N00.getValueType())) {
17456 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
17457 N002.getOperand(0), N002.getOperand(1),
17458 N1);
17459 }
17460 }
17461 }
17462
17463 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
17464 // -> (fma y, z, (fma (fpext u), (fpext v), x))
17465 if (isFusedOp(N1)) {
17466 SDValue N12 = N1.getOperand(2);
17467 if (N12.getOpcode() == ISD::FP_EXTEND) {
17468 SDValue N120 = N12.getOperand(0);
17469 if (isContractableFMUL(N120) &&
17470 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17471 N120.getValueType())) {
17472 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
17473 N120.getOperand(0), N120.getOperand(1),
17474 N0);
17475 }
17476 }
17477 }
17478
17479 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
17480 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
17481 // FIXME: This turns two single-precision and one double-precision
17482 // operation into two double-precision operations, which might not be
17483 // interesting for all targets, especially GPUs.
17484 if (N1.getOpcode() == ISD::FP_EXTEND) {
17485 SDValue N10 = N1.getOperand(0);
17486 if (isFusedOp(N10)) {
17487 SDValue N102 = N10.getOperand(2);
17488 if (isContractableFMUL(N102) &&
17489 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17490 N10.getValueType())) {
17491 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
17492 N102.getOperand(0), N102.getOperand(1),
17493 N0);
17494 }
17495 }
17496 }
17497 }
17498
17499 return SDValue();
17500}
17501
17502/// Try to perform FMA combining on a given FSUB node.
17503template <class MatchContextClass>
17504SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
17505 SDValue N0 = N->getOperand(0);
17506 SDValue N1 = N->getOperand(1);
17507 EVT VT = N->getValueType(0);
17508 SDLoc SL(N);
17509 MatchContextClass matcher(DAG, TLI, N);
17510 const TargetOptions &Options = DAG.getTarget().Options;
17511
17512 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
17513
17514 // Floating-point multiply-add with intermediate rounding.
17515 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
17516 // FIXME: Add VP_FMAD opcode.
17517 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
17518
17519 // Floating-point multiply-add without intermediate rounding.
17520 bool HasFMA =
17521 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17523
17524 // No valid opcode, do not combine.
17525 if (!HasFMAD && !HasFMA)
17526 return SDValue();
17527
17528 const SDNodeFlags Flags = N->getFlags();
17529 bool AllowFusionGlobally =
17530 (Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD);
17531
17532 // If the subtraction is not contractable, do not combine.
17533 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
17534 return SDValue();
17535
17536 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
17537 return SDValue();
17538
17539 // Always prefer FMAD to FMA for precision.
17540 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17542 bool NoSignedZero = Flags.hasNoSignedZeros();
17543
17544 // Is the node an FMUL and contractable either due to global flags or
17545 // SDNodeFlags.
17546 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
17547 if (!matcher.match(N, ISD::FMUL))
17548 return false;
17549 return AllowFusionGlobally || N->getFlags().hasAllowContract();
17550 };
17551
17552 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17553 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
17554 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
17555 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
17556 XY.getOperand(1),
17557 matcher.getNode(ISD::FNEG, SL, VT, Z));
17558 }
17559 return SDValue();
17560 };
17561
17562 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17563 // Note: Commutes FSUB operands.
17564 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
17565 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
17566 return matcher.getNode(
17567 PreferredFusedOpcode, SL, VT,
17568 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
17569 YZ.getOperand(1), X);
17570 }
17571 return SDValue();
17572 };
17573
17574 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
17575 // prefer to fold the multiply with fewer uses.
17576 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
17577 (N0->use_size() > N1->use_size())) {
17578 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
17579 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17580 return V;
17581 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
17582 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17583 return V;
17584 } else {
17585 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
17586 if (SDValue V = tryToFoldXYSubZ(N0, N1))
17587 return V;
17588 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
17589 if (SDValue V = tryToFoldXSubYZ(N0, N1))
17590 return V;
17591 }
17592
17593 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
17594 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
17595 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
17596 SDValue N00 = N0.getOperand(0).getOperand(0);
17597 SDValue N01 = N0.getOperand(0).getOperand(1);
17598 return matcher.getNode(PreferredFusedOpcode, SL, VT,
17599 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
17600 matcher.getNode(ISD::FNEG, SL, VT, N1));
17601 }
17602
17603 // Look through FP_EXTEND nodes to do more combining.
17604
17605 // fold (fsub (fpext (fmul x, y)), z)
17606 // -> (fma (fpext x), (fpext y), (fneg z))
17607 if (matcher.match(N0, ISD::FP_EXTEND)) {
17608 SDValue N00 = N0.getOperand(0);
17609 if (isContractableFMUL(N00) &&
17610 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17611 N00.getValueType())) {
17612 return matcher.getNode(
17613 PreferredFusedOpcode, SL, VT,
17614 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17615 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17616 matcher.getNode(ISD::FNEG, SL, VT, N1));
17617 }
17618 }
17619
17620 // fold (fsub x, (fpext (fmul y, z)))
17621 // -> (fma (fneg (fpext y)), (fpext z), x)
17622 // Note: Commutes FSUB operands.
17623 if (matcher.match(N1, ISD::FP_EXTEND)) {
17624 SDValue N10 = N1.getOperand(0);
17625 if (isContractableFMUL(N10) &&
17626 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17627 N10.getValueType())) {
17628 return matcher.getNode(
17629 PreferredFusedOpcode, SL, VT,
17630 matcher.getNode(
17631 ISD::FNEG, SL, VT,
17632 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
17633 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
17634 }
17635 }
17636
17637 // fold (fsub (fpext (fneg (fmul, x, y))), z)
17638 // -> (fneg (fma (fpext x), (fpext y), z))
17639 // Note: This could be removed with appropriate canonicalization of the
17640 // input expression into (fneg (fadd (fpext (fmul, x, y)), z)). However, the
17641 // command line flag -fp-contract=fast and fast-math flag contract prevent
17642 // from implementing the canonicalization in visitFSUB.
17643 if (matcher.match(N0, ISD::FP_EXTEND)) {
17644 SDValue N00 = N0.getOperand(0);
17645 if (matcher.match(N00, ISD::FNEG)) {
17646 SDValue N000 = N00.getOperand(0);
17647 if (isContractableFMUL(N000) &&
17648 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17649 N00.getValueType())) {
17650 return matcher.getNode(
17651 ISD::FNEG, SL, VT,
17652 matcher.getNode(
17653 PreferredFusedOpcode, SL, VT,
17654 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17655 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17656 N1));
17657 }
17658 }
17659 }
17660
17661 // fold (fsub (fneg (fpext (fmul, x, y))), z)
17662 // -> (fneg (fma (fpext x)), (fpext y), z)
17663 // Note: This could be removed with appropriate canonicalization of the
17664 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
17665 // command line flag -fp-contract=fast and fast-math flag contract prevent
17666 // from implementing the canonicalization in visitFSUB.
17667 if (matcher.match(N0, ISD::FNEG)) {
17668 SDValue N00 = N0.getOperand(0);
17669 if (matcher.match(N00, ISD::FP_EXTEND)) {
17670 SDValue N000 = N00.getOperand(0);
17671 if (isContractableFMUL(N000) &&
17672 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17673 N000.getValueType())) {
17674 return matcher.getNode(
17675 ISD::FNEG, SL, VT,
17676 matcher.getNode(
17677 PreferredFusedOpcode, SL, VT,
17678 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
17679 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
17680 N1));
17681 }
17682 }
17683 }
17684
17685 auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
17686 return isContractableFMUL(N) && N->getFlags().hasAllowReassociation();
17687 };
17688
17689 auto isFusedOp = [&](SDValue N) {
17690 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
17691 };
17692
17693 // More folding opportunities when target permits.
17694 if (Aggressive && N->getFlags().hasAllowReassociation()) {
17695 bool CanFuse = N->getFlags().hasAllowContract();
17696 // fold (fsub (fma x, y, (fmul u, v)), z)
17697 // -> (fma x, y (fma u, v, (fneg z)))
17698 if (CanFuse && isFusedOp(N0) &&
17699 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
17700 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
17701 return matcher.getNode(
17702 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17703 matcher.getNode(PreferredFusedOpcode, SL, VT,
17704 N0.getOperand(2).getOperand(0),
17705 N0.getOperand(2).getOperand(1),
17706 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17707 }
17708
17709 // fold (fsub x, (fma y, z, (fmul u, v)))
17710 // -> (fma (fneg y), z, (fma (fneg u), v, x))
17711 if (CanFuse && isFusedOp(N1) &&
17712 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
17713 N1->hasOneUse() && NoSignedZero) {
17714 SDValue N20 = N1.getOperand(2).getOperand(0);
17715 SDValue N21 = N1.getOperand(2).getOperand(1);
17716 return matcher.getNode(
17717 PreferredFusedOpcode, SL, VT,
17718 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17719 N1.getOperand(1),
17720 matcher.getNode(PreferredFusedOpcode, SL, VT,
17721 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
17722 }
17723
17724 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
17725 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
17726 if (isFusedOp(N0) && N0->hasOneUse()) {
17727 SDValue N02 = N0.getOperand(2);
17728 if (matcher.match(N02, ISD::FP_EXTEND)) {
17729 SDValue N020 = N02.getOperand(0);
17730 if (isContractableAndReassociableFMUL(N020) &&
17731 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17732 N020.getValueType())) {
17733 return matcher.getNode(
17734 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
17735 matcher.getNode(
17736 PreferredFusedOpcode, SL, VT,
17737 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
17738 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
17739 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17740 }
17741 }
17742 }
17743
17744 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
17745 // -> (fma (fpext x), (fpext y),
17746 // (fma (fpext u), (fpext v), (fneg z)))
17747 // FIXME: This turns two single-precision and one double-precision
17748 // operation into two double-precision operations, which might not be
17749 // interesting for all targets, especially GPUs.
17750 if (matcher.match(N0, ISD::FP_EXTEND)) {
17751 SDValue N00 = N0.getOperand(0);
17752 if (isFusedOp(N00)) {
17753 SDValue N002 = N00.getOperand(2);
17754 if (isContractableAndReassociableFMUL(N002) &&
17755 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17756 N00.getValueType())) {
17757 return matcher.getNode(
17758 PreferredFusedOpcode, SL, VT,
17759 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
17760 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
17761 matcher.getNode(
17762 PreferredFusedOpcode, SL, VT,
17763 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
17764 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
17765 matcher.getNode(ISD::FNEG, SL, VT, N1)));
17766 }
17767 }
17768 }
17769
17770 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
17771 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
17772 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
17773 N1->hasOneUse()) {
17774 SDValue N120 = N1.getOperand(2).getOperand(0);
17775 if (isContractableAndReassociableFMUL(N120) &&
17776 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17777 N120.getValueType())) {
17778 SDValue N1200 = N120.getOperand(0);
17779 SDValue N1201 = N120.getOperand(1);
17780 return matcher.getNode(
17781 PreferredFusedOpcode, SL, VT,
17782 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
17783 N1.getOperand(1),
17784 matcher.getNode(
17785 PreferredFusedOpcode, SL, VT,
17786 matcher.getNode(ISD::FNEG, SL, VT,
17787 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
17788 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
17789 }
17790 }
17791
17792 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
17793 // -> (fma (fneg (fpext y)), (fpext z),
17794 // (fma (fneg (fpext u)), (fpext v), x))
17795 // FIXME: This turns two single-precision and one double-precision
17796 // operation into two double-precision operations, which might not be
17797 // interesting for all targets, especially GPUs.
17798 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
17799 SDValue CvtSrc = N1.getOperand(0);
17800 SDValue N100 = CvtSrc.getOperand(0);
17801 SDValue N101 = CvtSrc.getOperand(1);
17802 SDValue N102 = CvtSrc.getOperand(2);
17803 if (isContractableAndReassociableFMUL(N102) &&
17804 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
17805 CvtSrc.getValueType())) {
17806 SDValue N1020 = N102.getOperand(0);
17807 SDValue N1021 = N102.getOperand(1);
17808 return matcher.getNode(
17809 PreferredFusedOpcode, SL, VT,
17810 matcher.getNode(ISD::FNEG, SL, VT,
17811 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
17812 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
17813 matcher.getNode(
17814 PreferredFusedOpcode, SL, VT,
17815 matcher.getNode(ISD::FNEG, SL, VT,
17816 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
17817 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
17818 }
17819 }
17820 }
17821
17822 return SDValue();
17823}
17824
17825/// Try to perform FMA combining on a given FMUL node based on the distributive
17826/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
17827/// subtraction instead of addition).
17828SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
17829 SDValue N0 = N->getOperand(0);
17830 SDValue N1 = N->getOperand(1);
17831 EVT VT = N->getValueType(0);
17832 SDLoc SL(N);
17833
17834 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
17835
17836 const TargetOptions &Options = DAG.getTarget().Options;
17837
17838 // The transforms below are incorrect when x == 0 and y == inf, because the
17839 // intermediate multiplication produces a nan.
17840 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
17841 if (!FAdd->getFlags().hasNoInfs())
17842 return SDValue();
17843
17844 // Floating-point multiply-add without intermediate rounding.
17845 bool HasFMA =
17847 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
17849
17850 // Floating-point multiply-add with intermediate rounding. This can result
17851 // in a less precise result due to the changed rounding order.
17852 bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
17853
17854 // No valid opcode, do not combine.
17855 if (!HasFMAD && !HasFMA)
17856 return SDValue();
17857
17858 // Always prefer FMAD to FMA for precision.
17859 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
17861
17862 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
17863 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
17864 auto FuseFADD = [&](SDValue X, SDValue Y) {
17865 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
17866 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
17867 if (C->isExactlyValue(+1.0))
17868 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17869 Y);
17870 if (C->isExactlyValue(-1.0))
17871 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17872 DAG.getNode(ISD::FNEG, SL, VT, Y));
17873 }
17874 }
17875 return SDValue();
17876 };
17877
17878 if (SDValue FMA = FuseFADD(N0, N1))
17879 return FMA;
17880 if (SDValue FMA = FuseFADD(N1, N0))
17881 return FMA;
17882
17883 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
17884 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
17885 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
17886 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
17887 auto FuseFSUB = [&](SDValue X, SDValue Y) {
17888 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
17889 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
17890 if (C0->isExactlyValue(+1.0))
17891 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17892 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17893 Y);
17894 if (C0->isExactlyValue(-1.0))
17895 return DAG.getNode(PreferredFusedOpcode, SL, VT,
17896 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
17897 DAG.getNode(ISD::FNEG, SL, VT, Y));
17898 }
17899 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
17900 if (C1->isExactlyValue(+1.0))
17901 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17902 DAG.getNode(ISD::FNEG, SL, VT, Y));
17903 if (C1->isExactlyValue(-1.0))
17904 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
17905 Y);
17906 }
17907 }
17908 return SDValue();
17909 };
17910
17911 if (SDValue FMA = FuseFSUB(N0, N1))
17912 return FMA;
17913 if (SDValue FMA = FuseFSUB(N1, N0))
17914 return FMA;
17915
17916 return SDValue();
17917}
17918
17919SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
17920 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17921
17922 // FADD -> FMA combines:
17923 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
17924 if (Fused.getOpcode() != ISD::DELETED_NODE)
17925 AddToWorklist(Fused.getNode());
17926 return Fused;
17927 }
17928 return SDValue();
17929}
17930
17931SDValue DAGCombiner::visitFADD(SDNode *N) {
17932 SDValue N0 = N->getOperand(0);
17933 SDValue N1 = N->getOperand(1);
17934 bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
17935 bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
17936 EVT VT = N->getValueType(0);
17937 SDLoc DL(N);
17938 SDNodeFlags Flags = N->getFlags();
17939 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17940
17941 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17942 return R;
17943
17944 // fold (fadd c1, c2) -> c1 + c2
17945 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
17946 return C;
17947
17948 // canonicalize constant to RHS
17949 if (N0CFP && !N1CFP)
17950 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
17951
17952 // fold vector ops
17953 if (VT.isVector())
17954 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17955 return FoldedVOp;
17956
17957 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
17958 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
17959 if (N1C && N1C->isZero())
17960 if (N1C->isNegative() || Flags.hasNoSignedZeros() ||
17962 return N0;
17963
17964 if (SDValue NewSel = foldBinOpIntoSelect(N))
17965 return NewSel;
17966
17967 // fold (fadd A, (fneg B)) -> (fsub A, B)
17968 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17969 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
17970 N1, DAG, LegalOperations, ForCodeSize))
17971 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
17972
17973 // fold (fadd (fneg A), B) -> (fsub B, A)
17974 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
17975 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
17976 N0, DAG, LegalOperations, ForCodeSize))
17977 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
17978
17979 auto isFMulNegTwo = [](SDValue FMul) {
17980 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
17981 return false;
17982 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
17983 return C && C->isExactlyValue(-2.0);
17984 };
17985
17986 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
17987 if (isFMulNegTwo(N0)) {
17988 SDValue B = N0.getOperand(0);
17989 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17990 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
17991 }
17992 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
17993 if (isFMulNegTwo(N1)) {
17994 SDValue B = N1.getOperand(0);
17995 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
17996 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
17997 }
17998
17999 // No FP constant should be created after legalization as Instruction
18000 // Selection pass has a hard time dealing with FP constants.
18001 bool AllowNewConst = (Level < AfterLegalizeDAG);
18002
18003 // If nnan is enabled, fold lots of things.
18004 if (Flags.hasNoNaNs() && AllowNewConst) {
18005 // If allowed, fold (fadd (fneg x), x) -> 0.0
18006 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
18007 return DAG.getConstantFP(0.0, DL, VT);
18008
18009 // If allowed, fold (fadd x, (fneg x)) -> 0.0
18010 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
18011 return DAG.getConstantFP(0.0, DL, VT);
18012 }
18013
18014 // If reassoc and nsz, fold lots of things.
18015 // TODO: break out portions of the transformations below for which Unsafe is
18016 // considered and which do not require both nsz and reassoc
18017 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
18018 AllowNewConst) {
18019 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
18020 if (N1CFP && N0.getOpcode() == ISD::FADD &&
18022 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
18023 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
18024 }
18025
18026 // We can fold chains of FADD's of the same value into multiplications.
18027 // This transform is not safe in general because we are reducing the number
18028 // of rounding steps.
18029 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
18030 if (N0.getOpcode() == ISD::FMUL) {
18031 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
18032 bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
18033
18034 // (fadd (fmul x, c), x) -> (fmul x, c+1)
18035 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
18036 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
18037 DAG.getConstantFP(1.0, DL, VT));
18038 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
18039 }
18040
18041 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
18042 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
18043 N1.getOperand(0) == N1.getOperand(1) &&
18044 N0.getOperand(0) == N1.getOperand(0)) {
18045 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
18046 DAG.getConstantFP(2.0, DL, VT));
18047 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
18048 }
18049 }
18050
18051 if (N1.getOpcode() == ISD::FMUL) {
18052 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
18053 bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
18054
18055 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
18056 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
18057 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
18058 DAG.getConstantFP(1.0, DL, VT));
18059 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
18060 }
18061
18062 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
18063 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
18064 N0.getOperand(0) == N0.getOperand(1) &&
18065 N1.getOperand(0) == N0.getOperand(0)) {
18066 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
18067 DAG.getConstantFP(2.0, DL, VT));
18068 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
18069 }
18070 }
18071
18072 if (N0.getOpcode() == ISD::FADD) {
18073 bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
18074 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
18075 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
18076 (N0.getOperand(0) == N1)) {
18077 return DAG.getNode(ISD::FMUL, DL, VT, N1,
18078 DAG.getConstantFP(3.0, DL, VT));
18079 }
18080 }
18081
18082 if (N1.getOpcode() == ISD::FADD) {
18083 bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
18084 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
18085 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
18086 N1.getOperand(0) == N0) {
18087 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18088 DAG.getConstantFP(3.0, DL, VT));
18089 }
18090 }
18091
18092 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
18093 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
18094 N0.getOperand(0) == N0.getOperand(1) &&
18095 N1.getOperand(0) == N1.getOperand(1) &&
18096 N0.getOperand(0) == N1.getOperand(0)) {
18097 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
18098 DAG.getConstantFP(4.0, DL, VT));
18099 }
18100 }
18101 } // reassoc && nsz && AllowNewConst
18102
18103 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()) {
18104 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
18105 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
18106 VT, N0, N1, Flags))
18107 return SD;
18108 }
18109
18110 // FADD -> FMA combines:
18111 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
18112 if (Fused.getOpcode() != ISD::DELETED_NODE)
18113 AddToWorklist(Fused.getNode());
18114 return Fused;
18115 }
18116 return SDValue();
18117}
18118
18119SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
18120 SDValue Chain = N->getOperand(0);
18121 SDValue N0 = N->getOperand(1);
18122 SDValue N1 = N->getOperand(2);
18123 EVT VT = N->getValueType(0);
18124 EVT ChainVT = N->getValueType(1);
18125 SDLoc DL(N);
18126 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18127
18128 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
18129 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
18130 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
18131 N1, DAG, LegalOperations, ForCodeSize)) {
18132 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
18133 {Chain, N0, NegN1});
18134 }
18135
18136 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
18137 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
18138 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
18139 N0, DAG, LegalOperations, ForCodeSize)) {
18140 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
18141 {Chain, N1, NegN0});
18142 }
18143 return SDValue();
18144}
18145
18146SDValue DAGCombiner::visitFSUB(SDNode *N) {
18147 SDValue N0 = N->getOperand(0);
18148 SDValue N1 = N->getOperand(1);
18149 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
18150 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18151 EVT VT = N->getValueType(0);
18152 SDLoc DL(N);
18153 const SDNodeFlags Flags = N->getFlags();
18154 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18155
18156 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18157 return R;
18158
18159 // fold (fsub c1, c2) -> c1-c2
18160 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
18161 return C;
18162
18163 // fold vector ops
18164 if (VT.isVector())
18165 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18166 return FoldedVOp;
18167
18168 if (SDValue NewSel = foldBinOpIntoSelect(N))
18169 return NewSel;
18170
18171 // (fsub A, 0) -> A
18172 if (N1CFP && N1CFP->isZero()) {
18173 if (!N1CFP->isNegative() || Flags.hasNoSignedZeros() ||
18174 DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
18175 return N0;
18176 }
18177 }
18178
18179 if (N0 == N1) {
18180 // (fsub x, x) -> 0.0
18181 if (Flags.hasNoNaNs())
18182 return DAG.getConstantFP(0.0f, DL, VT);
18183 }
18184
18185 // (fsub -0.0, N1) -> -N1
18186 if (N0CFP && N0CFP->isZero()) {
18187 if (N0CFP->isNegative() || Flags.hasNoSignedZeros() ||
18188 DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
18189 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
18190 // flushed to zero, unless all users treat denorms as zero (DAZ).
18191 // FIXME: This transform will change the sign of a NaN and the behavior
18192 // of a signaling NaN. It is only valid when a NoNaN flag is present.
18193 DenormalMode DenormMode = DAG.getDenormalMode(VT);
18194 if (DenormMode == DenormalMode::getIEEE()) {
18195 if (SDValue NegN1 =
18196 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18197 return NegN1;
18198 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
18199 return DAG.getNode(ISD::FNEG, DL, VT, N1);
18200 }
18201 }
18202 }
18203
18204 if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
18205 N1.getOpcode() == ISD::FADD) {
18206 // X - (X + Y) -> -Y
18207 if (N0 == N1->getOperand(0))
18208 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
18209 // X - (Y + X) -> -Y
18210 if (N0 == N1->getOperand(1))
18211 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
18212 }
18213
18214 // fold (fsub A, (fneg B)) -> (fadd A, B)
18215 if (SDValue NegN1 =
18216 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
18217 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
18218
18219 // FSUB -> FMA combines:
18220 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
18221 AddToWorklist(Fused.getNode());
18222 return Fused;
18223 }
18224
18225 return SDValue();
18226}
18227
18228// Transform IEEE Floats:
18229// (fmul C, (uitofp Pow2))
18230// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
18231// (fdiv C, (uitofp Pow2))
18232// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
18233//
18234// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
18235// there is no need for more than an add/sub.
18236//
18237// This is valid under the following circumstances:
18238// 1) We are dealing with IEEE floats
18239// 2) C is normal
18240// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
18241// TODO: Much of this could also be used for generating `ldexp` on targets the
18242// prefer it.
18243SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
18244 EVT VT = N->getValueType(0);
18246 return SDValue();
18247
18248 SDValue ConstOp, Pow2Op;
18249
18250 std::optional<int> Mantissa;
18251 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
18252 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
18253 return false;
18254
18255 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
18256 Pow2Op = N->getOperand(1 - ConstOpIdx);
18257 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
18258 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
18259 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
18260 return false;
18261
18262 Pow2Op = Pow2Op.getOperand(0);
18263
18264 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
18265 // TODO: We could use knownbits to make this bound more precise.
18266 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
18267
18268 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
18269 if (CFP == nullptr)
18270 return false;
18271
18272 const APFloat &APF = CFP->getValueAPF();
18273
18274 // Make sure we have normal constant.
18275 if (!APF.isNormal())
18276 return false;
18277
18278 // Make sure the floats exponent is within the bounds that this transform
18279 // produces bitwise equals value.
18280 int CurExp = ilogb(APF);
18281 // FMul by pow2 will only increase exponent.
18282 int MinExp =
18283 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
18284 // FDiv by pow2 will only decrease exponent.
18285 int MaxExp =
18286 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
18287 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
18289 return false;
18290
18291 // Finally make sure we actually know the mantissa for the float type.
18292 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
18293 if (!Mantissa)
18294 Mantissa = ThisMantissa;
18295
18296 return *Mantissa == ThisMantissa && ThisMantissa > 0;
18297 };
18298
18299 // TODO: We may be able to include undefs.
18300 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
18301 };
18302
18303 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
18304 return SDValue();
18305
18306 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
18307 return SDValue();
18308
18309 // Get log2 after all other checks have taken place. This is because
18310 // BuildLogBase2 may create a new node.
18311 SDLoc DL(N);
18312 // Get Log2 type with same bitwidth as the float type (VT).
18313 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
18314 if (VT.isVector())
18315 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
18317
18318 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
18319 /*InexpensiveOnly*/ true, NewIntVT);
18320 if (!Log2)
18321 return SDValue();
18322
18323 // Perform actual transform.
18324 SDValue MantissaShiftCnt =
18325 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
18326 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
18327 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
18328 // cast. We could implement that by handle here to handle the casts.
18329 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
18330 SDValue ResAsInt =
18331 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
18332 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
18333 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
18334 return ResAsFP;
18335}
18336
18337SDValue DAGCombiner::visitFMUL(SDNode *N) {
18338 SDValue N0 = N->getOperand(0);
18339 SDValue N1 = N->getOperand(1);
18340 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
18341 EVT VT = N->getValueType(0);
18342 SDLoc DL(N);
18343 const SDNodeFlags Flags = N->getFlags();
18344 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18345
18346 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18347 return R;
18348
18349 // fold (fmul c1, c2) -> c1*c2
18350 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
18351 return C;
18352
18353 // canonicalize constant to RHS
18356 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
18357
18358 // fold vector ops
18359 if (VT.isVector())
18360 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18361 return FoldedVOp;
18362
18363 if (SDValue NewSel = foldBinOpIntoSelect(N))
18364 return NewSel;
18365
18366 if (Flags.hasAllowReassociation()) {
18367 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
18369 N0.getOpcode() == ISD::FMUL) {
18370 SDValue N00 = N0.getOperand(0);
18371 SDValue N01 = N0.getOperand(1);
18372 // Avoid an infinite loop by making sure that N00 is not a constant
18373 // (the inner multiply has not been constant folded yet).
18376 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
18377 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
18378 }
18379 }
18380
18381 // Match a special-case: we convert X * 2.0 into fadd.
18382 // fmul (fadd X, X), C -> fmul X, 2.0 * C
18383 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
18384 N0.getOperand(0) == N0.getOperand(1)) {
18385 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
18386 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
18387 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
18388 }
18389
18390 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
18391 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
18392 VT, N0, N1, Flags))
18393 return SD;
18394 }
18395
18396 // fold (fmul X, 2.0) -> (fadd X, X)
18397 if (N1CFP && N1CFP->isExactlyValue(+2.0))
18398 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
18399
18400 // fold (fmul X, -1.0) -> (fsub -0.0, X)
18401 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
18402 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
18403 return DAG.getNode(ISD::FSUB, DL, VT,
18404 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
18405 }
18406 }
18407
18408 // -N0 * -N1 --> N0 * N1
18413 SDValue NegN0 =
18414 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18415 if (NegN0) {
18416 HandleSDNode NegN0Handle(NegN0);
18417 SDValue NegN1 =
18418 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18419 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18421 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
18422 }
18423
18424 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
18425 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
18426 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
18427 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
18428 TLI.isOperationLegal(ISD::FABS, VT)) {
18429 SDValue Select = N0, X = N1;
18430 if (Select.getOpcode() != ISD::SELECT)
18431 std::swap(Select, X);
18432
18433 SDValue Cond = Select.getOperand(0);
18434 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
18435 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
18436
18437 if (TrueOpnd && FalseOpnd &&
18438 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
18439 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
18440 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
18441 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18442 switch (CC) {
18443 default: break;
18444 case ISD::SETOLT:
18445 case ISD::SETULT:
18446 case ISD::SETOLE:
18447 case ISD::SETULE:
18448 case ISD::SETLT:
18449 case ISD::SETLE:
18450 std::swap(TrueOpnd, FalseOpnd);
18451 [[fallthrough]];
18452 case ISD::SETOGT:
18453 case ISD::SETUGT:
18454 case ISD::SETOGE:
18455 case ISD::SETUGE:
18456 case ISD::SETGT:
18457 case ISD::SETGE:
18458 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
18459 TLI.isOperationLegal(ISD::FNEG, VT))
18460 return DAG.getNode(ISD::FNEG, DL, VT,
18461 DAG.getNode(ISD::FABS, DL, VT, X));
18462 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
18463 return DAG.getNode(ISD::FABS, DL, VT, X);
18464
18465 break;
18466 }
18467 }
18468 }
18469
18470 // FMUL -> FMA combines:
18471 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
18472 AddToWorklist(Fused.getNode());
18473 return Fused;
18474 }
18475
18476 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
18477 // able to run.
18478 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18479 return R;
18480
18481 return SDValue();
18482}
18483
18484template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
18485 SDValue N0 = N->getOperand(0);
18486 SDValue N1 = N->getOperand(1);
18487 SDValue N2 = N->getOperand(2);
18488 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
18489 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
18490 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
18491 EVT VT = N->getValueType(0);
18492 SDLoc DL(N);
18493 // FMA nodes have flags that propagate to the created nodes.
18494 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18495 MatchContextClass matcher(DAG, TLI, N);
18496
18497 // Constant fold FMA.
18498 if (SDValue C =
18499 DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
18500 return C;
18501
18502 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
18507 SDValue NegN0 =
18508 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18509 if (NegN0) {
18510 HandleSDNode NegN0Handle(NegN0);
18511 SDValue NegN1 =
18512 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18513 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18515 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
18516 }
18517
18518 if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) {
18519 if (N->getFlags().hasNoSignedZeros() ||
18520 (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
18521 if (N0CFP && N0CFP->isZero())
18522 return N2;
18523 if (N1CFP && N1CFP->isZero())
18524 return N2;
18525 }
18526 }
18527
18528 // FIXME: Support splat of constant.
18529 if (N0CFP && N0CFP->isExactlyValue(1.0))
18530 return matcher.getNode(ISD::FADD, DL, VT, N1, N2);
18531 if (N1CFP && N1CFP->isExactlyValue(1.0))
18532 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18533
18534 // Canonicalize (fma c, x, y) -> (fma x, c, y)
18537 return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2);
18538
18539 bool CanReassociate = N->getFlags().hasAllowReassociation();
18540 if (CanReassociate) {
18541 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
18542 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
18545 return matcher.getNode(
18546 ISD::FMUL, DL, VT, N0,
18547 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
18548 }
18549
18550 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
18551 if (matcher.match(N0, ISD::FMUL) &&
18554 return matcher.getNode(
18555 ISD::FMA, DL, VT, N0.getOperand(0),
18556 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
18557 }
18558 }
18559
18560 // (fma x, -1, y) -> (fadd (fneg x), y)
18561 // FIXME: Support splat of constant.
18562 if (N1CFP) {
18563 if (N1CFP->isExactlyValue(1.0))
18564 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
18565
18566 if (N1CFP->isExactlyValue(-1.0) &&
18567 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
18568 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
18569 AddToWorklist(RHSNeg.getNode());
18570 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
18571 }
18572
18573 // fma (fneg x), K, y -> fma x -K, y
18574 if (matcher.match(N0, ISD::FNEG) &&
18576 (N1.hasOneUse() &&
18577 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
18578 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
18579 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
18580 }
18581 }
18582
18583 // FIXME: Support splat of constant.
18584 if (CanReassociate) {
18585 // (fma x, c, x) -> (fmul x, (c+1))
18586 if (N1CFP && N0 == N2) {
18587 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18588 matcher.getNode(ISD::FADD, DL, VT, N1,
18589 DAG.getConstantFP(1.0, DL, VT)));
18590 }
18591
18592 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
18593 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
18594 return matcher.getNode(ISD::FMUL, DL, VT, N0,
18595 matcher.getNode(ISD::FADD, DL, VT, N1,
18596 DAG.getConstantFP(-1.0, DL, VT)));
18597 }
18598 }
18599
18600 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
18601 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
18602 if (!TLI.isFNegFree(VT))
18604 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
18605 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
18606 return SDValue();
18607}
18608
18609SDValue DAGCombiner::visitFMAD(SDNode *N) {
18610 SDValue N0 = N->getOperand(0);
18611 SDValue N1 = N->getOperand(1);
18612 SDValue N2 = N->getOperand(2);
18613 EVT VT = N->getValueType(0);
18614 SDLoc DL(N);
18615
18616 // Constant fold FMAD.
18617 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMAD, DL, VT, {N0, N1, N2}))
18618 return C;
18619
18620 return SDValue();
18621}
18622
18623SDValue DAGCombiner::visitFMULADD(SDNode *N) {
18624 SDValue N0 = N->getOperand(0);
18625 SDValue N1 = N->getOperand(1);
18626 SDValue N2 = N->getOperand(2);
18627 EVT VT = N->getValueType(0);
18628 SDLoc DL(N);
18629
18630 // Constant fold FMULADD.
18631 if (SDValue C =
18632 DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2}))
18633 return C;
18634
18635 return SDValue();
18636}
18637
18638// Combine multiple FDIVs with the same divisor into multiple FMULs by the
18639// reciprocal.
18640// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
18641// Notice that this is not always beneficial. One reason is different targets
18642// may have different costs for FDIV and FMUL, so sometimes the cost of two
18643// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
18644// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
18645SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
18646 // TODO: Limit this transform based on optsize/minsize - it always creates at
18647 // least 1 extra instruction. But the perf win may be substantial enough
18648 // that only minsize should restrict this.
18649 const SDNodeFlags Flags = N->getFlags();
18650 if (LegalDAG || !Flags.hasAllowReciprocal())
18651 return SDValue();
18652
18653 // Skip if current node is a reciprocal/fneg-reciprocal.
18654 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
18655 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
18656 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
18657 return SDValue();
18658
18659 // Exit early if the target does not want this transform or if there can't
18660 // possibly be enough uses of the divisor to make the transform worthwhile.
18661 unsigned MinUses = TLI.combineRepeatedFPDivisors();
18662
18663 // For splat vectors, scale the number of uses by the splat factor. If we can
18664 // convert the division into a scalar op, that will likely be much faster.
18665 unsigned NumElts = 1;
18666 EVT VT = N->getValueType(0);
18667 if (VT.isVector() && DAG.isSplatValue(N1))
18668 NumElts = VT.getVectorMinNumElements();
18669
18670 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
18671 return SDValue();
18672
18673 // Find all FDIV users of the same divisor.
18674 // Use a set because duplicates may be present in the user list.
18675 SetVector<SDNode *> Users;
18676 for (auto *U : N1->users()) {
18677 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
18678 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
18679 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
18680 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
18681 U->getFlags().hasAllowReassociation() &&
18682 U->getFlags().hasNoSignedZeros())
18683 continue;
18684
18685 // This division is eligible for optimization only if global unsafe math
18686 // is enabled or if this division allows reciprocal formation.
18687 if (U->getFlags().hasAllowReciprocal())
18688 Users.insert(U);
18689 }
18690 }
18691
18692 // Now that we have the actual number of divisor uses, make sure it meets
18693 // the minimum threshold specified by the target.
18694 if ((Users.size() * NumElts) < MinUses)
18695 return SDValue();
18696
18697 SDLoc DL(N);
18698 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18699 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
18700
18701 // Dividend / Divisor -> Dividend * Reciprocal
18702 for (auto *U : Users) {
18703 SDValue Dividend = U->getOperand(0);
18704 if (Dividend != FPOne) {
18705 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
18706 Reciprocal, Flags);
18707 CombineTo(U, NewNode);
18708 } else if (U != Reciprocal.getNode()) {
18709 // In the absence of fast-math-flags, this user node is always the
18710 // same node as Reciprocal, but with FMF they may be different nodes.
18711 CombineTo(U, Reciprocal);
18712 }
18713 }
18714 return SDValue(N, 0); // N was replaced.
18715}
18716
18717SDValue DAGCombiner::visitFDIV(SDNode *N) {
18718 SDValue N0 = N->getOperand(0);
18719 SDValue N1 = N->getOperand(1);
18720 EVT VT = N->getValueType(0);
18721 SDLoc DL(N);
18722 SDNodeFlags Flags = N->getFlags();
18723 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18724
18725 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18726 return R;
18727
18728 // fold (fdiv c1, c2) -> c1/c2
18729 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
18730 return C;
18731
18732 // fold vector ops
18733 if (VT.isVector())
18734 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
18735 return FoldedVOp;
18736
18737 if (SDValue NewSel = foldBinOpIntoSelect(N))
18738 return NewSel;
18739
18741 return V;
18742
18743 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
18744 // the loss is acceptable with AllowReciprocal.
18745 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
18746 // Compute the reciprocal 1.0 / c2.
18747 const APFloat &N1APF = N1CFP->getValueAPF();
18748 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
18750 // Only do the transform if the reciprocal is a legal fp immediate that
18751 // isn't too nasty (eg NaN, denormal, ...).
18752 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
18753 (st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
18754 (!LegalOperations ||
18755 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
18756 // backend)... we should handle this gracefully after Legalize.
18757 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
18759 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
18760 return DAG.getNode(ISD::FMUL, DL, VT, N0,
18761 DAG.getConstantFP(Recip, DL, VT));
18762 }
18763
18764 if (Flags.hasAllowReciprocal()) {
18765 // If this FDIV is part of a reciprocal square root, it may be folded
18766 // into a target-specific square root estimate instruction.
18767 bool N1AllowReciprocal = N1->getFlags().hasAllowReciprocal();
18768 if (N1.getOpcode() == ISD::FSQRT) {
18769 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0)))
18770 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18771 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
18772 N1.getOperand(0).getOpcode() == ISD::FSQRT &&
18773 N1AllowReciprocal) {
18774 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
18775 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
18776 AddToWorklist(RV.getNode());
18777 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18778 }
18779 } else if (N1.getOpcode() == ISD::FP_ROUND &&
18780 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18781 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
18782 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
18783 AddToWorklist(RV.getNode());
18784 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
18785 }
18786 } else if (N1.getOpcode() == ISD::FMUL) {
18787 // Look through an FMUL. Even though this won't remove the FDIV directly,
18788 // it's still worthwhile to get rid of the FSQRT if possible.
18789 SDValue Sqrt, Y;
18790 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
18791 Sqrt = N1.getOperand(0);
18792 Y = N1.getOperand(1);
18793 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
18794 Sqrt = N1.getOperand(1);
18795 Y = N1.getOperand(0);
18796 }
18797 if (Sqrt.getNode()) {
18798 // If the other multiply operand is known positive, pull it into the
18799 // sqrt. That will eliminate the division if we convert to an estimate.
18800 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
18801 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
18802 SDValue A;
18803 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
18804 A = Y.getOperand(0);
18805 else if (Y == Sqrt.getOperand(0))
18806 A = Y;
18807 if (A) {
18808 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
18809 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
18810 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
18811 SDValue AAZ =
18812 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
18813 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ))
18814 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
18815
18816 // Estimate creation failed. Clean up speculatively created nodes.
18817 recursivelyDeleteUnusedNodes(AAZ.getNode());
18818 }
18819 }
18820
18821 // We found a FSQRT, so try to make this fold:
18822 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
18823 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) {
18824 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
18825 AddToWorklist(Div.getNode());
18826 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
18827 }
18828 }
18829 }
18830
18831 // Fold into a reciprocal estimate and multiply instead of a real divide.
18832 if (Flags.hasNoInfs())
18833 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
18834 return RV;
18835 }
18836
18837 // Fold X/Sqrt(X) -> Sqrt(X)
18838 if ((Flags.hasNoSignedZeros() || DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) &&
18839 Flags.hasAllowReassociation())
18840 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
18841 return N1;
18842
18843 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
18848 SDValue NegN0 =
18849 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
18850 if (NegN0) {
18851 HandleSDNode NegN0Handle(NegN0);
18852 SDValue NegN1 =
18853 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
18854 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
18856 return DAG.getNode(ISD::FDIV, DL, VT, NegN0, NegN1);
18857 }
18858
18859 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
18860 return R;
18861
18862 return SDValue();
18863}
18864
18865SDValue DAGCombiner::visitFREM(SDNode *N) {
18866 SDValue N0 = N->getOperand(0);
18867 SDValue N1 = N->getOperand(1);
18868 EVT VT = N->getValueType(0);
18869 SDNodeFlags Flags = N->getFlags();
18870 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18871 SDLoc DL(N);
18872
18873 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
18874 return R;
18875
18876 // fold (frem c1, c2) -> fmod(c1,c2)
18877 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
18878 return C;
18879
18880 if (SDValue NewSel = foldBinOpIntoSelect(N))
18881 return NewSel;
18882
18883 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
18884 // power of 2.
18885 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
18889 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
18890 bool NeedsCopySign = !Flags.hasNoSignedZeros() &&
18891 !DAG.canIgnoreSignBitOfZero(SDValue(N, 0)) &&
18893 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
18894 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
18895 SDValue MLA;
18897 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
18898 N1, N0);
18899 } else {
18900 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
18901 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
18902 }
18903 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
18904 }
18905
18906 return SDValue();
18907}
18908
18909SDValue DAGCombiner::visitFSQRT(SDNode *N) {
18910 SDNodeFlags Flags = N->getFlags();
18911
18912 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
18913 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
18914 if (!Flags.hasApproximateFuncs() || !Flags.hasNoInfs())
18915 return SDValue();
18916
18917 SDValue N0 = N->getOperand(0);
18918 if (TLI.isFsqrtCheap(N0, DAG))
18919 return SDValue();
18920
18921 // FSQRT nodes have flags that propagate to the created nodes.
18922 SelectionDAG::FlagInserter FlagInserter(DAG, Flags);
18923 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
18924 // transform the fdiv, we may produce a sub-optimal estimate sequence
18925 // because the reciprocal calculation may not have to filter out a
18926 // 0.0 input.
18927 return buildSqrtEstimate(N0);
18928}
18929
18930/// copysign(x, fp_extend(y)) -> copysign(x, y)
18931/// copysign(x, fp_round(y)) -> copysign(x, y)
18932/// Operands to the functions are the type of X and Y respectively.
18933static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
18934 // Always fold no-op FP casts.
18935 if (XTy == YTy)
18936 return true;
18937
18938 // Do not optimize out type conversion of f128 type yet.
18939 // For some targets like x86_64, configuration is changed to keep one f128
18940 // value in one SSE register, but instruction selection cannot handle
18941 // FCOPYSIGN on SSE registers yet.
18942 if (YTy == MVT::f128)
18943 return false;
18944
18945 // Avoid mismatched vector operand types, for better instruction selection.
18946 return !YTy.isVector();
18947}
18948
18950 SDValue N1 = N->getOperand(1);
18951 if (N1.getOpcode() != ISD::FP_EXTEND &&
18952 N1.getOpcode() != ISD::FP_ROUND)
18953 return false;
18954 EVT N1VT = N1->getValueType(0);
18955 EVT N1Op0VT = N1->getOperand(0).getValueType();
18956 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
18957}
18958
18959SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
18960 SDValue N0 = N->getOperand(0);
18961 SDValue N1 = N->getOperand(1);
18962 EVT VT = N->getValueType(0);
18963 SDLoc DL(N);
18964
18965 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
18966 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
18967 return C;
18968
18969 // copysign(x, fp_extend(y)) -> copysign(x, y)
18970 // copysign(x, fp_round(y)) -> copysign(x, y)
18972 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
18973
18975 return SDValue(N, 0);
18976
18977 if (VT != N1.getValueType())
18978 return SDValue();
18979
18980 // If this is equivalent to a disjoint or, replace it with one. This can
18981 // happen if the sign operand is a sign mask (i.e., x << sign_bit_position).
18982 if (DAG.SignBitIsZeroFP(N0) &&
18984 // TODO: Just directly match the shift pattern. computeKnownBits is heavy
18985 // for a such a narrowly targeted case.
18986 EVT IntVT = VT.changeTypeToInteger();
18987 // TODO: It appears to be profitable in some situations to unconditionally
18988 // emit a fabs(n0) to perform this combine.
18989 SDValue CastSrc0 = DAG.getNode(ISD::BITCAST, DL, IntVT, N0);
18990 SDValue CastSrc1 = DAG.getNode(ISD::BITCAST, DL, IntVT, N1);
18991
18992 SDValue SignOr = DAG.getNode(ISD::OR, DL, IntVT, CastSrc0, CastSrc1,
18994 return DAG.getNode(ISD::BITCAST, DL, VT, SignOr);
18995 }
18996
18997 return SDValue();
18998}
18999
19000SDValue DAGCombiner::visitFPOW(SDNode *N) {
19001 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
19002 if (!ExponentC)
19003 return SDValue();
19004 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19005
19006 // Try to convert x ** (1/3) into cube root.
19007 // TODO: Handle the various flavors of long double.
19008 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
19009 // Some range near 1/3 should be fine.
19010 EVT VT = N->getValueType(0);
19011 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
19012 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
19013 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
19014 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
19015 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
19016 // For regular numbers, rounding may cause the results to differ.
19017 // Therefore, we require { nsz ninf nnan afn } for this transform.
19018 // TODO: We could select out the special cases if we don't have nsz/ninf.
19019 SDNodeFlags Flags = N->getFlags();
19020 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
19021 !Flags.hasApproximateFuncs())
19022 return SDValue();
19023
19024 // Do not create a cbrt() libcall if the target does not have it, and do not
19025 // turn a pow that has lowering support into a cbrt() libcall.
19026 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
19029 return SDValue();
19030
19031 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
19032 }
19033
19034 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
19035 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
19036 // TODO: This could be extended (using a target hook) to handle smaller
19037 // power-of-2 fractional exponents.
19038 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
19039 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
19040 if (ExponentIs025 || ExponentIs075) {
19041 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
19042 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
19043 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
19044 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
19045 // For regular numbers, rounding may cause the results to differ.
19046 // Therefore, we require { nsz ninf afn } for this transform.
19047 // TODO: We could select out the special cases if we don't have nsz/ninf.
19048 SDNodeFlags Flags = N->getFlags();
19049
19050 // We only need no signed zeros for the 0.25 case.
19051 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
19052 !Flags.hasApproximateFuncs())
19053 return SDValue();
19054
19055 // Don't double the number of libcalls. We are trying to inline fast code.
19057 return SDValue();
19058
19059 // Assume that libcalls are the smallest code.
19060 // TODO: This restriction should probably be lifted for vectors.
19061 if (ForCodeSize)
19062 return SDValue();
19063
19064 // pow(X, 0.25) --> sqrt(sqrt(X))
19065 SDLoc DL(N);
19066 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
19067 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
19068 if (ExponentIs025)
19069 return SqrtSqrt;
19070 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
19071 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
19072 }
19073
19074 return SDValue();
19075}
19076
19078 const TargetLowering &TLI) {
19079 // We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc.
19080 // Additionally, if there are clamps ([us]min or [us]max) around
19081 // the fpto[us]i, we can fold those into fminnum/fmaxnum around the ftrunc.
19082 // If NoSignedZerosFPMath is enabled, this is a direct replacement.
19083 // Otherwise, for strict math, we must handle edge cases:
19084 // 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0
19085 // as example, it first becomes integer 0, and is converted back to +0.0.
19086 // FTRUNC on its own could produce -0.0.
19087
19088 // FIXME: We should be able to use node-level FMF here.
19089 EVT VT = N->getValueType(0);
19090 if (!TLI.isOperationLegal(ISD::FTRUNC, VT))
19091 return SDValue();
19092
19093 bool IsUnsigned = N->getOpcode() == ISD::UINT_TO_FP;
19094 bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
19095 assert(IsSigned || IsUnsigned);
19096
19097 bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath ||
19099 // For signed conversions: The optimization changes signed zero behavior.
19100 if (IsSigned && !IsSignedZeroSafe)
19101 return SDValue();
19102 // For unsigned conversions, we need FABS to canonicalize -0.0 to +0.0
19103 // (unless outputting a signed zero is OK).
19104 if (IsUnsigned && !IsSignedZeroSafe && !TLI.isFAbsFree(VT))
19105 return SDValue();
19106
19107 // Collect potential clamp operations (outermost to innermost) and peel.
19108 struct ClampInfo {
19109 bool IsMin;
19111 };
19112 constexpr unsigned MaxClamps = 2;
19114 unsigned MinOp = IsUnsigned ? ISD::UMIN : ISD::SMIN;
19115 unsigned MaxOp = IsUnsigned ? ISD::UMAX : ISD::SMAX;
19116 SDValue IntVal = N->getOperand(0);
19117 for (unsigned Level = 0; Level < MaxClamps; ++Level) {
19118 if (!IntVal.hasOneUse() ||
19119 (IntVal.getOpcode() != MinOp && IntVal.getOpcode() != MaxOp))
19120 break;
19121 SDValue RHS = IntVal.getOperand(1);
19122 APInt IntConst;
19123 if (auto *IntConstNode = dyn_cast<ConstantSDNode>(RHS))
19124 IntConst = IntConstNode->getAPIntValue();
19125 else if (!ISD::isConstantSplatVector(RHS.getNode(), IntConst))
19126 return SDValue();
19127 APFloat FPConst(VT.getFltSemantics());
19128 FPConst.convertFromAPInt(IntConst, IsSigned, APFloat::rmNearestTiesToEven);
19129 // Verify roundtrip exactness.
19130 APSInt RoundTrip(IntConst.getBitWidth(), IsUnsigned);
19131 bool IsExact;
19132 if (FPConst.convertToInteger(RoundTrip, APFloat::rmTowardZero, &IsExact) !=
19133 APFloat::opOK ||
19134 !IsExact || static_cast<const APInt &>(RoundTrip) != IntConst)
19135 return SDValue();
19136 bool IsMin = IntVal.getOpcode() == MinOp;
19137 Clamps.push_back({IsMin, DAG.getConstantFP(FPConst, DL, VT)});
19138 IntVal = IntVal.getOperand(0);
19139 }
19140
19141 // Check that the sequence ends with the correct kind of fpto[us]i.
19142 unsigned FPToIntOp = IsUnsigned ? ISD::FP_TO_UINT : ISD::FP_TO_SINT;
19143 if (IntVal.getOpcode() != FPToIntOp ||
19144 IntVal.getOperand(0).getValueType() != VT)
19145 return SDValue();
19146
19147 SDValue Result = IntVal.getOperand(0);
19148 if (IsUnsigned && !IsSignedZeroSafe && TLI.isFAbsFree(VT))
19149 Result = DAG.getNode(ISD::FABS, DL, VT, Result);
19150 Result = DAG.getNode(ISD::FTRUNC, DL, VT, Result);
19151 // Apply clamps, if any, in reverse order (innermost first).
19152 for (const ClampInfo &Clamp : reverse(Clamps)) {
19153 unsigned FPClampOp =
19154 getMinMaxOpcodeForClamp(Clamp.IsMin, Result, Clamp.Constant, DAG, TLI);
19155 if (FPClampOp == ISD::DELETED_NODE)
19156 return SDValue();
19157 Result = DAG.getNode(FPClampOp, DL, VT, Result, Clamp.Constant);
19158 }
19159 return Result;
19160}
19161
19162SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
19163 SDValue N0 = N->getOperand(0);
19164 EVT VT = N->getValueType(0);
19165 EVT OpVT = N0.getValueType();
19166 SDLoc DL(N);
19167
19168 // [us]itofp(undef) = 0, because the result value is bounded.
19169 if (N0.isUndef())
19170 return DAG.getConstantFP(0.0, DL, VT);
19171
19172 // fold (sint_to_fp c1) -> c1fp
19173 // ...but only if the target supports immediate floating-point values
19174 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19175 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SINT_TO_FP, DL, VT, {N0}))
19176 return C;
19177
19178 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
19179 // but UINT_TO_FP is legal on this target, try to convert.
19180 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
19181 hasOperation(ISD::UINT_TO_FP, OpVT)) {
19182 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
19183 if (DAG.SignBitIsZero(N0))
19184 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0);
19185 }
19186
19187 // The next optimizations are desirable only if SELECT_CC can be lowered.
19188 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
19189 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
19190 !VT.isVector() &&
19191 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19192 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
19193 DAG.getConstantFP(0.0, DL, VT));
19194
19195 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
19196 // (select (setcc x, y, cc), 1.0, 0.0)
19197 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
19198 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
19199 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19200 return DAG.getSelect(DL, VT, N0.getOperand(0),
19201 DAG.getConstantFP(1.0, DL, VT),
19202 DAG.getConstantFP(0.0, DL, VT));
19203
19204 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
19205 return FTrunc;
19206
19207 // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
19208 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
19210 N0.getOperand(0).getValueType()))
19211 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
19212
19213 return SDValue();
19214}
19215
19216SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
19217 SDValue N0 = N->getOperand(0);
19218 EVT VT = N->getValueType(0);
19219 EVT OpVT = N0.getValueType();
19220 SDLoc DL(N);
19221
19222 // [us]itofp(undef) = 0, because the result value is bounded.
19223 if (N0.isUndef())
19224 return DAG.getConstantFP(0.0, DL, VT);
19225
19226 // fold (uint_to_fp c1) -> c1fp
19227 // ...but only if the target supports immediate floating-point values
19228 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19229 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UINT_TO_FP, DL, VT, {N0}))
19230 return C;
19231
19232 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
19233 // but SINT_TO_FP is legal on this target, try to convert.
19234 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
19235 hasOperation(ISD::SINT_TO_FP, OpVT)) {
19236 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
19237 if (DAG.SignBitIsZero(N0))
19238 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0);
19239 }
19240
19241 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
19242 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
19243 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
19244 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
19245 DAG.getConstantFP(0.0, DL, VT));
19246
19247 if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
19248 return FTrunc;
19249
19250 // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
19251 if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
19253 N0.getOperand(0).getValueType()))
19254 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
19255
19256 return SDValue();
19257}
19258
19259// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
19261 SDValue N0 = N->getOperand(0);
19262 EVT VT = N->getValueType(0);
19263
19264 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
19265 return SDValue();
19266
19267 SDValue Src = N0.getOperand(0);
19268 EVT SrcVT = Src.getValueType();
19269 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
19270 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
19271
19272 // We can safely assume the conversion won't overflow the output range,
19273 // because (for example) (uint8_t)18293.f is undefined behavior.
19274
19275 // Since we can assume the conversion won't overflow, our decision as to
19276 // whether the input will fit in the float should depend on the minimum
19277 // of the input range and output range.
19278
19279 // This means this is also safe for a signed input and unsigned output, since
19280 // a negative input would lead to undefined behavior.
19281 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
19282 unsigned OutputSize = (int)VT.getScalarSizeInBits();
19283 unsigned ActualSize = std::min(InputSize, OutputSize);
19284 const fltSemantics &Sem = N0.getValueType().getFltSemantics();
19285
19286 // We can only fold away the float conversion if the input range can be
19287 // represented exactly in the float range.
19288 if (APFloat::semanticsPrecision(Sem) >= ActualSize) {
19289 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
19290 unsigned ExtOp =
19291 IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
19292 return DAG.getNode(ExtOp, DL, VT, Src);
19293 }
19294 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
19295 return DAG.getNode(ISD::TRUNCATE, DL, VT, Src);
19296 return DAG.getBitcast(VT, Src);
19297 }
19298 return SDValue();
19299}
19300
19301SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
19302 SDValue N0 = N->getOperand(0);
19303 EVT VT = N->getValueType(0);
19304 SDLoc DL(N);
19305
19306 // fold (fp_to_sint undef) -> undef
19307 if (N0.isUndef())
19308 return DAG.getUNDEF(VT);
19309
19310 // fold (fp_to_sint c1fp) -> c1
19311 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_SINT, DL, VT, {N0}))
19312 return C;
19313
19314 return FoldIntToFPToInt(N, DL, DAG);
19315}
19316
19317SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
19318 SDValue N0 = N->getOperand(0);
19319 EVT VT = N->getValueType(0);
19320 SDLoc DL(N);
19321
19322 // fold (fp_to_uint undef) -> undef
19323 if (N0.isUndef())
19324 return DAG.getUNDEF(VT);
19325
19326 // fold (fp_to_uint c1fp) -> c1
19327 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_TO_UINT, DL, VT, {N0}))
19328 return C;
19329
19330 return FoldIntToFPToInt(N, DL, DAG);
19331}
19332
19333SDValue DAGCombiner::visitXROUND(SDNode *N) {
19334 SDValue N0 = N->getOperand(0);
19335 EVT VT = N->getValueType(0);
19336
19337 // fold (lrint|llrint undef) -> undef
19338 // fold (lround|llround undef) -> undef
19339 if (N0.isUndef())
19340 return DAG.getUNDEF(VT);
19341
19342 // fold (lrint|llrint c1fp) -> c1
19343 // fold (lround|llround c1fp) -> c1
19344 if (SDValue C =
19345 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0}))
19346 return C;
19347
19348 return SDValue();
19349}
19350
19351SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
19352 SDValue N0 = N->getOperand(0);
19353 SDValue N1 = N->getOperand(1);
19354 EVT VT = N->getValueType(0);
19355 SDLoc DL(N);
19356
19357 // fold (fp_round c1fp) -> c1fp
19358 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_ROUND, DL, VT, {N0, N1}))
19359 return C;
19360
19361 // fold (fp_round (fp_extend x)) -> x
19362 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
19363 return N0.getOperand(0);
19364
19365 // fold (fp_round (fp_round x)) -> (fp_round x)
19366 if (N0.getOpcode() == ISD::FP_ROUND) {
19367 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
19368 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
19369
19370 // Avoid folding legal fp_rounds into non-legal ones.
19371 if (!hasOperation(ISD::FP_ROUND, VT))
19372 return SDValue();
19373
19374 // Skip this folding if it results in an fp_round from f80 to f16.
19375 //
19376 // f80 to f16 always generates an expensive (and as yet, unimplemented)
19377 // libcall to __truncxfhf2 instead of selecting native f16 conversion
19378 // instructions from f32 or f64. Moreover, the first (value-preserving)
19379 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
19380 // x86.
19381 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
19382 return SDValue();
19383
19384 // If the first fp_round isn't a value preserving truncation, it might
19385 // introduce a tie in the second fp_round, that wouldn't occur in the
19386 // single-step fp_round we want to fold to.
19387 // In other words, double rounding isn't the same as rounding.
19388 // Also, this is a value preserving truncation iff both fp_round's are.
19389 if ((N->getFlags().hasAllowContract() &&
19390 N0->getFlags().hasAllowContract()) ||
19391 N0IsTrunc)
19392 return DAG.getNode(
19393 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
19394 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
19395 }
19396
19397 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
19398 // Note: From a legality perspective, this is a two step transform. First,
19399 // we duplicate the fp_round to the arguments of the copysign, then we
19400 // eliminate the fp_round on Y. The second step requires an additional
19401 // predicate to match the implementation above.
19402 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
19404 N0.getValueType())) {
19405 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
19406 N0.getOperand(0), N1);
19407 AddToWorklist(Tmp.getNode());
19408 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, Tmp, N0.getOperand(1));
19409 }
19410
19411 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19412 return NewVSel;
19413
19414 return SDValue();
19415}
19416
19417// Eliminate a floating-point widening of a narrowed value if the fast math
19418// flags allow it.
19420 SDValue N0 = N->getOperand(0);
19421 EVT VT = N->getValueType(0);
19422
19423 unsigned NarrowingOp;
19424 switch (N->getOpcode()) {
19425 case ISD::FP16_TO_FP:
19426 NarrowingOp = ISD::FP_TO_FP16;
19427 break;
19428 case ISD::BF16_TO_FP:
19429 NarrowingOp = ISD::FP_TO_BF16;
19430 break;
19431 case ISD::FP_EXTEND:
19432 NarrowingOp = ISD::FP_ROUND;
19433 break;
19434 default:
19435 llvm_unreachable("Expected widening FP cast");
19436 }
19437
19438 if (N0.getOpcode() == NarrowingOp && N0.getOperand(0).getValueType() == VT) {
19439 const SDNodeFlags NarrowFlags = N0->getFlags();
19440 const SDNodeFlags WidenFlags = N->getFlags();
19441 // Narrowing can introduce inf and change the encoding of a nan, so the
19442 // widen must have the nnan and ninf flags to indicate that we don't need to
19443 // care about that. We are also removing a rounding step, and that requires
19444 // both the narrow and widen to allow contraction.
19445 if (WidenFlags.hasNoNaNs() && WidenFlags.hasNoInfs() &&
19446 NarrowFlags.hasAllowContract() && WidenFlags.hasAllowContract()) {
19447 return N0.getOperand(0);
19448 }
19449 }
19450
19451 return SDValue();
19452}
19453
19454SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
19455 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19456 SDValue N0 = N->getOperand(0);
19457 EVT VT = N->getValueType(0);
19458 SDLoc DL(N);
19459
19460 if (VT.isVector())
19461 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
19462 return FoldedVOp;
19463
19464 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
19465 if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND)
19466 return SDValue();
19467
19468 // fold (fp_extend c1fp) -> c1fp
19469 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FP_EXTEND, DL, VT, {N0}))
19470 return C;
19471
19472 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
19473 if (N0.getOpcode() == ISD::FP16_TO_FP &&
19475 return DAG.getNode(ISD::FP16_TO_FP, DL, VT, N0.getOperand(0));
19476
19477 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
19478 // value of X.
19479 if (N0.getOpcode() == ISD::FP_ROUND && N0.getConstantOperandVal(1) == 1) {
19480 SDValue In = N0.getOperand(0);
19481 if (In.getValueType() == VT) return In;
19482 if (VT.bitsLT(In.getValueType()))
19483 return DAG.getNode(ISD::FP_ROUND, DL, VT, In, N0.getOperand(1));
19484 return DAG.getNode(ISD::FP_EXTEND, DL, VT, In);
19485 }
19486
19487 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
19488 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19490 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
19491 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT,
19492 LN0->getChain(),
19493 LN0->getBasePtr(), N0.getValueType(),
19494 LN0->getMemOperand());
19495 CombineTo(N, ExtLoad);
19496 CombineTo(
19497 N0.getNode(),
19498 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
19499 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
19500 ExtLoad.getValue(1));
19501 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19502 }
19503
19504 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
19505 return NewVSel;
19506
19507 if (SDValue CastEliminated = eliminateFPCastPair(N))
19508 return CastEliminated;
19509
19510 return SDValue();
19511}
19512
19513SDValue DAGCombiner::visitFCEIL(SDNode *N) {
19514 SDValue N0 = N->getOperand(0);
19515 EVT VT = N->getValueType(0);
19516
19517 // fold (fceil c1) -> fceil(c1)
19518 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0}))
19519 return C;
19520
19521 return SDValue();
19522}
19523
19524SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
19525 SDValue N0 = N->getOperand(0);
19526 EVT VT = N->getValueType(0);
19527
19528 // fold (ftrunc c1) -> ftrunc(c1)
19529 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0}))
19530 return C;
19531
19532 // fold ftrunc (known rounded int x) -> x
19533 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
19534 // likely to be generated to extract integer from a rounded floating value.
19535 switch (N0.getOpcode()) {
19536 default: break;
19537 case ISD::FRINT:
19538 case ISD::FTRUNC:
19539 case ISD::FNEARBYINT:
19540 case ISD::FROUNDEVEN:
19541 case ISD::FFLOOR:
19542 case ISD::FCEIL:
19543 return N0;
19544 }
19545
19546 return SDValue();
19547}
19548
19549SDValue DAGCombiner::visitFFREXP(SDNode *N) {
19550 SDValue N0 = N->getOperand(0);
19551
19552 // fold (ffrexp c1) -> ffrexp(c1)
19554 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
19555 return SDValue();
19556}
19557
19558SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
19559 SDValue N0 = N->getOperand(0);
19560 EVT VT = N->getValueType(0);
19561
19562 // fold (ffloor c1) -> ffloor(c1)
19563 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0}))
19564 return C;
19565
19566 return SDValue();
19567}
19568
19569SDValue DAGCombiner::visitFNEG(SDNode *N) {
19570 SDValue N0 = N->getOperand(0);
19571 EVT VT = N->getValueType(0);
19572 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19573
19574 // Constant fold FNEG.
19575 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0}))
19576 return C;
19577
19578 if (SDValue NegN0 =
19579 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
19580 return NegN0;
19581
19582 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
19583 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
19584 // know it was called from a context with a nsz flag if the input fsub does
19585 // not.
19586 if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
19587 N0.hasOneUse()) {
19588 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
19589 N0.getOperand(0));
19590 }
19591
19593 return SDValue(N, 0);
19594
19595 if (SDValue Cast = foldSignChangeInBitcast(N))
19596 return Cast;
19597
19598 return SDValue();
19599}
19600
19601SDValue DAGCombiner::visitFMinMax(SDNode *N) {
19602 SDValue N0 = N->getOperand(0);
19603 SDValue N1 = N->getOperand(1);
19604 EVT VT = N->getValueType(0);
19605 const SDNodeFlags Flags = N->getFlags();
19606 unsigned Opc = N->getOpcode();
19607 bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
19608 bool ReturnsOtherForAllNaNs =
19610 bool IsMin =
19612 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
19613
19614 // Constant fold.
19615 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
19616 return C;
19617
19618 // Canonicalize to constant on RHS.
19621 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
19622
19623 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
19624 const APFloat &AF = N1CFP->getValueAPF();
19625
19626 // minnum(X, qnan) -> X
19627 // maxnum(X, qnan) -> X
19628 // minimum(X, nan) -> qnan
19629 // maximum(X, nan) -> qnan
19630 // minimumnum(X, nan) -> X
19631 // maximumnum(X, nan) -> X
19632 if (AF.isNaN()) {
19633 if (PropAllNaNsToQNaNs) {
19634 if (AF.isSignaling())
19635 return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT);
19636 return N->getOperand(1);
19637 } else if (ReturnsOtherForAllNaNs || !AF.isSignaling()) {
19638 return N->getOperand(0);
19639 }
19640 return SDValue();
19641 }
19642
19643 // In the following folds, inf can be replaced with the largest finite
19644 // float, if the ninf flag is set.
19645 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
19646 // minimum(X, -inf) -> -inf if nnan
19647 // maximum(X, +inf) -> +inf if nnan
19648 // minimumnum(X, -inf) -> -inf
19649 // maximumnum(X, +inf) -> +inf
19650 if (IsMin == AF.isNegative() &&
19651 (ReturnsOtherForAllNaNs || Flags.hasNoNaNs()))
19652 return N->getOperand(1);
19653
19654 // minnum(X, +inf) -> X if nnan
19655 // maxnum(X, -inf) -> X if nnan
19656 // minimum(X, +inf) -> X (ignoring quieting of sNaNs)
19657 // maximum(X, -inf) -> X (ignoring quieting of sNaNs)
19658 // minimumnum(X, +inf) -> X if nnan
19659 // maximumnum(X, -inf) -> X if nnan
19660 if (IsMin != AF.isNegative() && (PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
19661 return N->getOperand(0);
19662 }
19663 }
19664
19665 // There are no VECREDUCE variants of FMINIMUMNUM or FMAXIMUMNUM
19667 return SDValue();
19668
19669 if (SDValue SD = reassociateReduction(
19670 PropAllNaNsToQNaNs
19673 Opc, SDLoc(N), VT, N0, N1, Flags))
19674 return SD;
19675
19676 return SDValue();
19677}
19678
19679SDValue DAGCombiner::visitFABS(SDNode *N) {
19680 SDValue N0 = N->getOperand(0);
19681 EVT VT = N->getValueType(0);
19682 SDLoc DL(N);
19683
19684 // fold (fabs c1) -> fabs(c1)
19685 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
19686 return C;
19687
19689 return SDValue(N, 0);
19690
19691 if (SDValue Cast = foldSignChangeInBitcast(N))
19692 return Cast;
19693
19694 return SDValue();
19695}
19696
19697SDValue DAGCombiner::visitBRCOND(SDNode *N) {
19698 SDValue Chain = N->getOperand(0);
19699 SDValue N1 = N->getOperand(1);
19700 SDValue N2 = N->getOperand(2);
19701
19702 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
19703 // nondeterministic jumps).
19704 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
19705 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19706 N1->getOperand(0), N2, N->getFlags());
19707 }
19708
19709 // Variant of the previous fold where there is a SETCC in between:
19710 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
19711 // =>
19712 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
19713 // =>
19714 // BRCOND(SETCC(X, CONST, Cond))
19715 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
19716 // isn't equivalent to true or false.
19717 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
19718 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
19719 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
19720 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
19722 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
19723 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
19724 bool Updated = false;
19725
19726 // Is 'X Cond C' always true or false?
19727 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
19728 bool False = (Cond == ISD::SETULT && C->isZero()) ||
19729 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
19730 (Cond == ISD::SETUGT && C->isAllOnes()) ||
19731 (Cond == ISD::SETGT && C->isMaxSignedValue());
19732 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
19733 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
19734 (Cond == ISD::SETUGE && C->isZero()) ||
19735 (Cond == ISD::SETGE && C->isMinSignedValue());
19736 return True || False;
19737 };
19738
19739 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
19740 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
19741 S0 = S0->getOperand(0);
19742 Updated = true;
19743 }
19744 }
19745 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
19746 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
19747 S1 = S1->getOperand(0);
19748 Updated = true;
19749 }
19750 }
19751
19752 if (Updated)
19753 return DAG.getNode(
19754 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
19755 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2,
19756 N->getFlags());
19757 }
19758
19759 // If N is a constant we could fold this into a fallthrough or unconditional
19760 // branch. However that doesn't happen very often in normal code, because
19761 // Instcombine/SimplifyCFG should have handled the available opportunities.
19762 // If we did this folding here, it would be necessary to update the
19763 // MachineBasicBlock CFG, which is awkward.
19764
19765 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
19766 // on the target, also copy fast math flags.
19767 if (N1.getOpcode() == ISD::SETCC &&
19769 N1.getOperand(0).getValueType())) {
19770 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain,
19771 N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2,
19772 N1->getFlags());
19773 }
19774
19775 if (N1.hasOneUse()) {
19776 // rebuildSetCC calls visitXor which may change the Chain when there is a
19777 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
19778 HandleSDNode ChainHandle(Chain);
19779 if (SDValue NewN1 = rebuildSetCC(N1))
19780 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
19781 ChainHandle.getValue(), NewN1, N2, N->getFlags());
19782 }
19783
19784 return SDValue();
19785}
19786
19787SDValue DAGCombiner::rebuildSetCC(SDValue N) {
19788 if (N.getOpcode() == ISD::SRL ||
19789 (N.getOpcode() == ISD::TRUNCATE &&
19790 (N.getOperand(0).hasOneUse() &&
19791 N.getOperand(0).getOpcode() == ISD::SRL))) {
19792 // Look pass the truncate.
19793 if (N.getOpcode() == ISD::TRUNCATE)
19794 N = N.getOperand(0);
19795
19796 // Match this pattern so that we can generate simpler code:
19797 //
19798 // %a = ...
19799 // %b = and i32 %a, 2
19800 // %c = srl i32 %b, 1
19801 // brcond i32 %c ...
19802 //
19803 // into
19804 //
19805 // %a = ...
19806 // %b = and i32 %a, 2
19807 // %c = setcc eq %b, 0
19808 // brcond %c ...
19809 //
19810 // This applies only when the AND constant value has one bit set and the
19811 // SRL constant is equal to the log2 of the AND constant. The back-end is
19812 // smart enough to convert the result into a TEST/JMP sequence.
19813 SDValue Op0 = N.getOperand(0);
19814 SDValue Op1 = N.getOperand(1);
19815
19816 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
19817 SDValue AndOp1 = Op0.getOperand(1);
19818
19819 if (AndOp1.getOpcode() == ISD::Constant) {
19820 const APInt &AndConst = AndOp1->getAsAPIntVal();
19821
19822 if (AndConst.isPowerOf2() &&
19823 Op1->getAsAPIntVal() == AndConst.logBase2()) {
19824 SDLoc DL(N);
19825 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
19826 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
19827 ISD::SETNE);
19828 }
19829 }
19830 }
19831 }
19832
19833 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
19834 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
19835 if (N.getOpcode() == ISD::XOR) {
19836 // Because we may call this on a speculatively constructed
19837 // SimplifiedSetCC Node, we need to simplify this node first.
19838 // Ideally this should be folded into SimplifySetCC and not
19839 // here. For now, grab a handle to N so we don't lose it from
19840 // replacements interal to the visit.
19841 HandleSDNode XORHandle(N);
19842 while (N.getOpcode() == ISD::XOR) {
19843 SDValue Tmp = visitXOR(N.getNode());
19844 // No simplification done.
19845 if (!Tmp.getNode())
19846 break;
19847 // Returning N is form in-visit replacement that may invalidated
19848 // N. Grab value from Handle.
19849 if (Tmp.getNode() == N.getNode())
19850 N = XORHandle.getValue();
19851 else // Node simplified. Try simplifying again.
19852 N = Tmp;
19853 }
19854
19855 if (N.getOpcode() != ISD::XOR)
19856 return N;
19857
19858 SDValue Op0 = N->getOperand(0);
19859 SDValue Op1 = N->getOperand(1);
19860
19861 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
19862 bool Equal = false;
19863 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
19864 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
19865 Op0.getValueType() == MVT::i1) {
19866 N = Op0;
19867 Op0 = N->getOperand(0);
19868 Op1 = N->getOperand(1);
19869 Equal = true;
19870 }
19871
19872 EVT SetCCVT = N.getValueType();
19873 if (LegalTypes)
19874 SetCCVT = getSetCCResultType(SetCCVT);
19875 // Replace the uses of XOR with SETCC. Note, avoid this transformation if
19876 // it would introduce illegal operations post-legalization as this can
19877 // result in infinite looping between converting xor->setcc here, and
19878 // expanding setcc->xor in LegalizeSetCCCondCode if requested.
19880 if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType()))
19881 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC);
19882 }
19883 }
19884
19885 return SDValue();
19886}
19887
19888// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
19889//
19890SDValue DAGCombiner::visitBR_CC(SDNode *N) {
19891 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
19892 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
19893
19894 // If N is a constant we could fold this into a fallthrough or unconditional
19895 // branch. However that doesn't happen very often in normal code, because
19896 // Instcombine/SimplifyCFG should have handled the available opportunities.
19897 // If we did this folding here, it would be necessary to update the
19898 // MachineBasicBlock CFG, which is awkward.
19899
19900 // Use SimplifySetCC to simplify SETCC's.
19902 CondLHS, CondRHS, CC->get(), SDLoc(N),
19903 false);
19904 if (Simp.getNode()) AddToWorklist(Simp.getNode());
19905
19906 // fold to a simpler setcc
19907 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
19908 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
19909 N->getOperand(0), Simp.getOperand(2),
19910 Simp.getOperand(0), Simp.getOperand(1),
19911 N->getOperand(4));
19912
19913 return SDValue();
19914}
19915
19916static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
19917 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
19918 const TargetLowering &TLI) {
19919 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19920 if (LD->isIndexed())
19921 return false;
19922 EVT VT = LD->getMemoryVT();
19923 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
19924 return false;
19925 Ptr = LD->getBasePtr();
19926 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19927 if (ST->isIndexed())
19928 return false;
19929 EVT VT = ST->getMemoryVT();
19930 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
19931 return false;
19932 Ptr = ST->getBasePtr();
19933 IsLoad = false;
19934 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19935 if (LD->isIndexed())
19936 return false;
19937 EVT VT = LD->getMemoryVT();
19938 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
19939 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
19940 return false;
19941 Ptr = LD->getBasePtr();
19942 IsMasked = true;
19944 if (ST->isIndexed())
19945 return false;
19946 EVT VT = ST->getMemoryVT();
19947 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
19948 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
19949 return false;
19950 Ptr = ST->getBasePtr();
19951 IsLoad = false;
19952 IsMasked = true;
19953 } else {
19954 return false;
19955 }
19956 return true;
19957}
19958
19959/// Try turning a load/store into a pre-indexed load/store when the base
19960/// pointer is an add or subtract and it has other uses besides the load/store.
19961/// After the transformation, the new indexed load/store has effectively folded
19962/// the add/subtract in and all of its other uses are redirected to the
19963/// new load/store.
19964bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
19965 if (Level < AfterLegalizeDAG)
19966 return false;
19967
19968 bool IsLoad = true;
19969 bool IsMasked = false;
19970 SDValue Ptr;
19971 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
19972 Ptr, TLI))
19973 return false;
19974
19975 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
19976 // out. There is no reason to make this a preinc/predec.
19977 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
19978 Ptr->hasOneUse())
19979 return false;
19980
19981 // Ask the target to do addressing mode selection.
19985 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
19986 return false;
19987
19988 // Backends without true r+i pre-indexed forms may need to pass a
19989 // constant base with a variable offset so that constant coercion
19990 // will work with the patterns in canonical form.
19991 bool Swapped = false;
19992 if (isa<ConstantSDNode>(BasePtr)) {
19993 std::swap(BasePtr, Offset);
19994 Swapped = true;
19995 }
19996
19997 // Don't create a indexed load / store with zero offset.
19999 return false;
20000
20001 // Try turning it into a pre-indexed load / store except when:
20002 // 1) The new base ptr is a frame index.
20003 // 2) If N is a store and the new base ptr is either the same as or is a
20004 // predecessor of the value being stored.
20005 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
20006 // that would create a cycle.
20007 // 4) All uses are load / store ops that use it as old base ptr.
20008
20009 // Check #1. Preinc'ing a frame index would require copying the stack pointer
20010 // (plus the implicit offset) to a register to preinc anyway.
20011 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
20012 return false;
20013
20014 // Check #2.
20015 if (!IsLoad) {
20016 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
20017 : cast<StoreSDNode>(N)->getValue();
20018
20019 // Would require a copy.
20020 if (Val == BasePtr)
20021 return false;
20022
20023 // Would create a cycle.
20024 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
20025 return false;
20026 }
20027
20028 // Caches for hasPredecessorHelper.
20029 SmallPtrSet<const SDNode *, 32> Visited;
20031 Worklist.push_back(N);
20032
20033 // If the offset is a constant, there may be other adds of constants that
20034 // can be folded with this one. We should do this to avoid having to keep
20035 // a copy of the original base pointer.
20036 SmallVector<SDNode *, 16> OtherUses;
20039 for (SDUse &Use : BasePtr->uses()) {
20040 // Skip the use that is Ptr and uses of other results from BasePtr's
20041 // node (important for nodes that return multiple results).
20042 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
20043 continue;
20044
20045 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
20046 MaxSteps))
20047 continue;
20048
20049 if (Use.getUser()->getOpcode() != ISD::ADD &&
20050 Use.getUser()->getOpcode() != ISD::SUB) {
20051 OtherUses.clear();
20052 break;
20053 }
20054
20055 SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1);
20056 if (!isa<ConstantSDNode>(Op1)) {
20057 OtherUses.clear();
20058 break;
20059 }
20060
20061 // FIXME: In some cases, we can be smarter about this.
20062 if (Op1.getValueType() != Offset.getValueType()) {
20063 OtherUses.clear();
20064 break;
20065 }
20066
20067 OtherUses.push_back(Use.getUser());
20068 }
20069
20070 if (Swapped)
20071 std::swap(BasePtr, Offset);
20072
20073 // Now check for #3 and #4.
20074 bool RealUse = false;
20075
20076 for (SDNode *User : Ptr->users()) {
20077 if (User == N)
20078 continue;
20079 if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
20080 return false;
20081
20082 // If Ptr may be folded in addressing mode of other use, then it's
20083 // not profitable to do this transformation.
20084 if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI))
20085 RealUse = true;
20086 }
20087
20088 if (!RealUse)
20089 return false;
20090
20092 if (!IsMasked) {
20093 if (IsLoad)
20094 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
20095 else
20096 Result =
20097 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
20098 } else {
20099 if (IsLoad)
20100 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
20101 Offset, AM);
20102 else
20103 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
20104 Offset, AM);
20105 }
20106 ++PreIndexedNodes;
20107 ++NodesCombined;
20108 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
20109 Result.dump(&DAG); dbgs() << '\n');
20110 WorklistRemover DeadNodes(*this);
20111 if (IsLoad) {
20112 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
20113 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
20114 } else {
20115 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
20116 }
20117
20118 // Finally, since the node is now dead, remove it from the graph.
20119 deleteAndRecombine(N);
20120
20121 if (Swapped)
20122 std::swap(BasePtr, Offset);
20123
20124 // Replace other uses of BasePtr that can be updated to use Ptr
20125 for (SDNode *OtherUse : OtherUses) {
20126 unsigned OffsetIdx = 1;
20127 if (OtherUse->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
20128 OffsetIdx = 0;
20129 assert(OtherUse->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() &&
20130 "Expected BasePtr operand");
20131
20132 // We need to replace ptr0 in the following expression:
20133 // x0 * offset0 + y0 * ptr0 = t0
20134 // knowing that
20135 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
20136 //
20137 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
20138 // indexed load/store and the expression that needs to be re-written.
20139 //
20140 // Therefore, we have:
20141 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
20142
20143 auto *CN = cast<ConstantSDNode>(OtherUse->getOperand(OffsetIdx));
20144 const APInt &Offset0 = CN->getAPIntValue();
20145 const APInt &Offset1 = Offset->getAsAPIntVal();
20146 int X0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
20147 int Y0 = (OtherUse->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
20148 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
20149 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
20150
20151 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
20152
20153 APInt CNV = Offset0;
20154 if (X0 < 0) CNV = -CNV;
20155 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
20156 else CNV = CNV - Offset1;
20157
20158 SDLoc DL(OtherUse);
20159
20160 // We can now generate the new expression.
20161 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
20162 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
20163
20164 SDValue NewUse =
20165 DAG.getNode(Opcode, DL, OtherUse->getValueType(0), NewOp1, NewOp2);
20166 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUse, 0), NewUse);
20167 deleteAndRecombine(OtherUse);
20168 }
20169
20170 // Replace the uses of Ptr with uses of the updated base value.
20171 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
20172 deleteAndRecombine(Ptr.getNode());
20173 AddToWorklist(Result.getNode());
20174
20175 return true;
20176}
20177
20178static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
20179 SDValue &BasePtr, SDValue &Offset,
20181 SelectionDAG &DAG,
20182 const TargetLowering &TLI) {
20183 if (PtrUse == N ||
20184 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
20185 return false;
20186
20187 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
20188 return false;
20189
20190 // Don't create a indexed load / store with zero offset.
20192 return false;
20193
20194 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
20195 return false;
20196
20199 for (SDNode *User : BasePtr->users()) {
20200 if (User == Ptr.getNode())
20201 continue;
20202
20203 // No if there's a later user which could perform the index instead.
20204 if (isa<MemSDNode>(User)) {
20205 bool IsLoad = true;
20206 bool IsMasked = false;
20207 SDValue OtherPtr;
20209 IsMasked, OtherPtr, TLI)) {
20211 Worklist.push_back(User);
20212 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps))
20213 return false;
20214 }
20215 }
20216
20217 // If all the uses are load / store addresses, then don't do the
20218 // transformation.
20219 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) {
20220 for (SDNode *UserUser : User->users())
20221 if (canFoldInAddressingMode(User, UserUser, DAG, TLI))
20222 return false;
20223 }
20224 }
20225 return true;
20226}
20227
20229 bool &IsMasked, SDValue &Ptr,
20230 SDValue &BasePtr, SDValue &Offset,
20232 SelectionDAG &DAG,
20233 const TargetLowering &TLI) {
20235 IsMasked, Ptr, TLI) ||
20236 Ptr->hasOneUse())
20237 return nullptr;
20238
20239 // Try turning it into a post-indexed load / store except when
20240 // 1) All uses are load / store ops that use it as base ptr (and
20241 // it may be folded as addressing mmode).
20242 // 2) Op must be independent of N, i.e. Op is neither a predecessor
20243 // nor a successor of N. Otherwise, if Op is folded that would
20244 // create a cycle.
20246 for (SDUse &U : Ptr->uses()) {
20247 if (U.getResNo() != Ptr.getResNo())
20248 continue;
20249
20250 // Check for #1.
20251 SDNode *Op = U.getUser();
20252 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
20253 continue;
20254
20255 // Check for #2.
20258 // Ptr is predecessor to both N and Op.
20259 Visited.insert(Ptr.getNode());
20260 Worklist.push_back(N);
20261 Worklist.push_back(Op);
20262 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
20263 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
20264 return Op;
20265 }
20266 return nullptr;
20267}
20268
20269/// Try to combine a load/store with a add/sub of the base pointer node into a
20270/// post-indexed load/store. The transformation folded the add/subtract into the
20271/// new indexed load/store effectively and all of its uses are redirected to the
20272/// new load/store.
20273bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
20274 if (Level < AfterLegalizeDAG)
20275 return false;
20276
20277 bool IsLoad = true;
20278 bool IsMasked = false;
20279 SDValue Ptr;
20283 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
20284 Offset, AM, DAG, TLI);
20285 if (!Op)
20286 return false;
20287
20289 if (!IsMasked)
20290 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
20291 Offset, AM)
20292 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
20293 BasePtr, Offset, AM);
20294 else
20295 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
20296 BasePtr, Offset, AM)
20297 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
20298 BasePtr, Offset, AM);
20299 ++PostIndexedNodes;
20300 ++NodesCombined;
20301 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
20302 Result.dump(&DAG); dbgs() << '\n');
20303 WorklistRemover DeadNodes(*this);
20304 if (IsLoad) {
20305 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
20306 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
20307 } else {
20308 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
20309 }
20310
20311 // Finally, since the node is now dead, remove it from the graph.
20312 deleteAndRecombine(N);
20313
20314 // Replace the uses of Use with uses of the updated base value.
20316 Result.getValue(IsLoad ? 1 : 0));
20317 deleteAndRecombine(Op);
20318 return true;
20319}
20320
20321/// Return the base-pointer arithmetic from an indexed \p LD.
20322SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
20323 ISD::MemIndexedMode AM = LD->getAddressingMode();
20324 assert(AM != ISD::UNINDEXED);
20325 SDValue BP = LD->getOperand(1);
20326 SDValue Inc = LD->getOperand(2);
20327
20328 // Some backends use TargetConstants for load offsets, but don't expect
20329 // TargetConstants in general ADD nodes. We can convert these constants into
20330 // regular Constants (if the constant is not opaque).
20332 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
20333 "Cannot split out indexing using opaque target constants");
20334 if (Inc.getOpcode() == ISD::TargetConstant) {
20335 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
20336 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
20337 ConstInc->getValueType(0));
20338 }
20339
20340 unsigned Opc =
20341 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
20342 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
20343}
20344
20346 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
20347}
20348
20349bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
20350 EVT STType = Val.getValueType();
20351 EVT STMemType = ST->getMemoryVT();
20352 if (STType == STMemType)
20353 return true;
20354 if (isTypeLegal(STMemType))
20355 return false; // fail.
20356 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
20357 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
20358 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
20359 return true;
20360 }
20361 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
20362 STType.isInteger() && STMemType.isInteger()) {
20363 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
20364 return true;
20365 }
20366 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
20367 Val = DAG.getBitcast(STMemType, Val);
20368 return true;
20369 }
20370 return false; // fail.
20371}
20372
20373bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
20374 EVT LDMemType = LD->getMemoryVT();
20375 EVT LDType = LD->getValueType(0);
20376 assert(Val.getValueType() == LDMemType &&
20377 "Attempting to extend value of non-matching type");
20378 if (LDType == LDMemType)
20379 return true;
20380 if (LDMemType.isInteger() && LDType.isInteger()) {
20381 switch (LD->getExtensionType()) {
20382 case ISD::NON_EXTLOAD:
20383 Val = DAG.getBitcast(LDType, Val);
20384 return true;
20385 case ISD::EXTLOAD:
20386 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
20387 return true;
20388 case ISD::SEXTLOAD:
20389 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
20390 return true;
20391 case ISD::ZEXTLOAD:
20392 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
20393 return true;
20394 }
20395 }
20396 return false;
20397}
20398
20399StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
20400 int64_t &Offset) {
20401 SDValue Chain = LD->getOperand(0);
20402
20403 // Look through CALLSEQ_START.
20404 if (Chain.getOpcode() == ISD::CALLSEQ_START)
20405 Chain = Chain->getOperand(0);
20406
20407 StoreSDNode *ST = nullptr;
20409 if (Chain.getOpcode() == ISD::TokenFactor) {
20410 // Look for unique store within the TokenFactor.
20411 for (SDValue Op : Chain->ops()) {
20412 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
20413 if (!Store)
20414 continue;
20415 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20416 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20417 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20418 continue;
20419 // Make sure the store is not aliased with any nodes in TokenFactor.
20420 GatherAllAliases(Store, Chain, Aliases);
20421 if (Aliases.empty() ||
20422 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
20423 ST = Store;
20424 break;
20425 }
20426 } else {
20427 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
20428 if (Store) {
20429 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
20430 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
20431 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
20432 ST = Store;
20433 }
20434 }
20435
20436 return ST;
20437}
20438
20439SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
20440 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
20441 return SDValue();
20442 SDValue Chain = LD->getOperand(0);
20443 int64_t Offset;
20444
20445 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
20446 // TODO: Relax this restriction for unordered atomics (see D66309)
20447 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
20448 return SDValue();
20449
20450 EVT LDType = LD->getValueType(0);
20451 EVT LDMemType = LD->getMemoryVT();
20452 EVT STMemType = ST->getMemoryVT();
20453 EVT STType = ST->getValue().getValueType();
20454
20455 // There are two cases to consider here:
20456 // 1. The store is fixed width and the load is scalable. In this case we
20457 // don't know at compile time if the store completely envelops the load
20458 // so we abandon the optimisation.
20459 // 2. The store is scalable and the load is fixed width. We could
20460 // potentially support a limited number of cases here, but there has been
20461 // no cost-benefit analysis to prove it's worth it.
20462 bool LdStScalable = LDMemType.isScalableVT();
20463 if (LdStScalable != STMemType.isScalableVT())
20464 return SDValue();
20465
20466 // If we are dealing with scalable vectors on a big endian platform the
20467 // calculation of offsets below becomes trickier, since we do not know at
20468 // compile time the absolute size of the vector. Until we've done more
20469 // analysis on big-endian platforms it seems better to bail out for now.
20470 if (LdStScalable && DAG.getDataLayout().isBigEndian())
20471 return SDValue();
20472
20473 // Normalize for Endianness. After this Offset=0 will denote that the least
20474 // significant bit in the loaded value maps to the least significant bit in
20475 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
20476 // n:th least significant byte of the stored value.
20477 int64_t OrigOffset = Offset;
20478 if (DAG.getDataLayout().isBigEndian())
20479 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
20480 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
20481 8 -
20482 Offset;
20483
20484 // Check that the stored value cover all bits that are loaded.
20485 bool STCoversLD;
20486
20487 TypeSize LdMemSize = LDMemType.getSizeInBits();
20488 TypeSize StMemSize = STMemType.getSizeInBits();
20489 if (LdStScalable)
20490 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
20491 else
20492 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
20493 StMemSize.getFixedValue());
20494
20495 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
20496 if (LD->isIndexed()) {
20497 // Cannot handle opaque target constants and we must respect the user's
20498 // request not to split indexes from loads.
20499 if (!canSplitIdx(LD))
20500 return SDValue();
20501 SDValue Idx = SplitIndexingFromLoad(LD);
20502 SDValue Ops[] = {Val, Idx, Chain};
20503 return CombineTo(LD, Ops, 3);
20504 }
20505 return CombineTo(LD, Val, Chain);
20506 };
20507
20508 if (!STCoversLD)
20509 return SDValue();
20510
20511 // Memory as copy space (potentially masked).
20512 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
20513 // Simple case: Direct non-truncating forwarding
20514 if (LDType.getSizeInBits() == LdMemSize)
20515 return ReplaceLd(LD, ST->getValue(), Chain);
20516 // Can we model the truncate and extension with an and mask?
20517 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
20518 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
20519 // Mask to size of LDMemType
20520 auto Mask =
20522 StMemSize.getFixedValue()),
20523 SDLoc(ST), STType);
20524 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
20525 return ReplaceLd(LD, Val, Chain);
20526 }
20527 }
20528
20529 // Handle some cases for big-endian that would be Offset 0 and handled for
20530 // little-endian.
20531 SDValue Val = ST->getValue();
20532 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
20533 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
20534 !LDType.isVector() && isTypeLegal(STType) &&
20535 TLI.isOperationLegal(ISD::SRL, STType)) {
20536 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
20537 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
20538 Offset = 0;
20539 }
20540 }
20541
20542 // TODO: Deal with nonzero offset.
20543 if (LD->getBasePtr().isUndef() || Offset != 0)
20544 return SDValue();
20545 // Model necessary truncations / extenstions.
20546 // Truncate Value To Stored Memory Size.
20547 do {
20548 if (!getTruncatedStoreValue(ST, Val))
20549 break;
20550 if (!isTypeLegal(LDMemType))
20551 break;
20552 if (STMemType != LDMemType) {
20553 // TODO: Support vectors? This requires extract_subvector/bitcast.
20554 if (!STMemType.isVector() && !LDMemType.isVector() &&
20555 STMemType.isInteger() && LDMemType.isInteger())
20556 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
20557 else
20558 break;
20559 }
20560 if (!extendLoadedValueToExtension(LD, Val))
20561 break;
20562 return ReplaceLd(LD, Val, Chain);
20563 } while (false);
20564
20565 // On failure, cleanup dead nodes we may have created.
20566 if (Val->use_empty())
20567 deleteAndRecombine(Val.getNode());
20568 return SDValue();
20569}
20570
20571SDValue DAGCombiner::visitLOAD(SDNode *N) {
20572 LoadSDNode *LD = cast<LoadSDNode>(N);
20573 SDValue Chain = LD->getChain();
20574 SDValue Ptr = LD->getBasePtr();
20575
20576 // If load is not volatile and there are no uses of the loaded value (and
20577 // the updated indexed value in case of indexed loads), change uses of the
20578 // chain value into uses of the chain input (i.e. delete the dead load).
20579 // TODO: Allow this for unordered atomics (see D66309)
20580 if (LD->isSimple()) {
20581 if (N->getValueType(1) == MVT::Other) {
20582 // Unindexed loads.
20583 if (!N->hasAnyUseOfValue(0)) {
20584 // It's not safe to use the two value CombineTo variant here. e.g.
20585 // v1, chain2 = load chain1, loc
20586 // v2, chain3 = load chain2, loc
20587 // v3 = add v2, c
20588 // Now we replace use of chain2 with chain1. This makes the second load
20589 // isomorphic to the one we are deleting, and thus makes this load live.
20590 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
20591 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
20592 dbgs() << "\n");
20593 WorklistRemover DeadNodes(*this);
20594 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
20595 AddUsersToWorklist(Chain.getNode());
20596 if (N->use_empty())
20597 deleteAndRecombine(N);
20598
20599 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20600 }
20601 } else {
20602 // Indexed loads.
20603 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
20604
20605 // If this load has an opaque TargetConstant offset, then we cannot split
20606 // the indexing into an add/sub directly (that TargetConstant may not be
20607 // valid for a different type of node, and we cannot convert an opaque
20608 // target constant into a regular constant).
20609 bool CanSplitIdx = canSplitIdx(LD);
20610
20611 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
20612 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
20613 SDValue Index;
20614 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
20615 Index = SplitIndexingFromLoad(LD);
20616 // Try to fold the base pointer arithmetic into subsequent loads and
20617 // stores.
20618 AddUsersToWorklist(N);
20619 } else
20620 Index = DAG.getUNDEF(N->getValueType(1));
20621 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
20622 dbgs() << "\nWith: "; Undef.dump(&DAG);
20623 dbgs() << " and 2 other values\n");
20624 WorklistRemover DeadNodes(*this);
20625 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
20626 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
20627 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
20628 deleteAndRecombine(N);
20629 return SDValue(N, 0); // Return N so it doesn't get rechecked!
20630 }
20631 }
20632 }
20633
20634 // If this load is directly stored, replace the load value with the stored
20635 // value.
20636 if (auto V = ForwardStoreValueToDirectLoad(LD))
20637 return V;
20638
20639 // Try to infer better alignment information than the load already has.
20640 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
20641 !LD->isAtomic()) {
20642 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
20643 if (*Alignment > LD->getAlign() &&
20644 isAligned(*Alignment, LD->getSrcValueOffset())) {
20645 SDValue NewLoad = DAG.getExtLoad(
20646 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
20647 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
20648 LD->getMemOperand()->getFlags(), LD->getAAInfo());
20649 // NewLoad will always be N as we are only refining the alignment
20650 assert(NewLoad.getNode() == N);
20651 (void)NewLoad;
20652 }
20653 }
20654 }
20655
20656 if (LD->isUnindexed()) {
20657 // Walk up chain skipping non-aliasing memory nodes.
20658 SDValue BetterChain = FindBetterChain(LD, Chain);
20659
20660 // If there is a better chain.
20661 if (Chain != BetterChain) {
20662 SDValue ReplLoad;
20663
20664 // Replace the chain to void dependency.
20665 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
20666 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
20667 BetterChain, Ptr, LD->getMemOperand());
20668 } else {
20669 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
20670 LD->getValueType(0),
20671 BetterChain, Ptr, LD->getMemoryVT(),
20672 LD->getMemOperand());
20673 }
20674
20675 // Create token factor to keep old chain connected.
20676 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
20677 MVT::Other, Chain, ReplLoad.getValue(1));
20678
20679 // Replace uses with load result and token factor
20680 return CombineTo(N, ReplLoad.getValue(0), Token);
20681 }
20682 }
20683
20684 // Try transforming N to an indexed load.
20685 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
20686 return SDValue(N, 0);
20687
20688 // Try to slice up N to more direct loads if the slices are mapped to
20689 // different register banks or pairing can take place.
20690 if (SliceUpLoad(N))
20691 return SDValue(N, 0);
20692
20693 return SDValue();
20694}
20695
20696namespace {
20697
20698/// Helper structure used to slice a load in smaller loads.
20699/// Basically a slice is obtained from the following sequence:
20700/// Origin = load Ty1, Base
20701/// Shift = srl Ty1 Origin, CstTy Amount
20702/// Inst = trunc Shift to Ty2
20703///
20704/// Then, it will be rewritten into:
20705/// Slice = load SliceTy, Base + SliceOffset
20706/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
20707///
20708/// SliceTy is deduced from the number of bits that are actually used to
20709/// build Inst.
20710struct LoadedSlice {
20711 /// Helper structure used to compute the cost of a slice.
20712 struct Cost {
20713 /// Are we optimizing for code size.
20714 bool ForCodeSize = false;
20715
20716 /// Various cost.
20717 unsigned Loads = 0;
20718 unsigned Truncates = 0;
20719 unsigned CrossRegisterBanksCopies = 0;
20720 unsigned ZExts = 0;
20721 unsigned Shift = 0;
20722
20723 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
20724
20725 /// Get the cost of one isolated slice.
20726 Cost(const LoadedSlice &LS, bool ForCodeSize)
20727 : ForCodeSize(ForCodeSize), Loads(1) {
20728 EVT TruncType = LS.Inst->getValueType(0);
20729 EVT LoadedType = LS.getLoadedType();
20730 if (TruncType != LoadedType &&
20731 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
20732 ZExts = 1;
20733 }
20734
20735 /// Account for slicing gain in the current cost.
20736 /// Slicing provide a few gains like removing a shift or a
20737 /// truncate. This method allows to grow the cost of the original
20738 /// load with the gain from this slice.
20739 void addSliceGain(const LoadedSlice &LS) {
20740 // Each slice saves a truncate.
20741 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
20742 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
20743 ++Truncates;
20744 // If there is a shift amount, this slice gets rid of it.
20745 if (LS.Shift)
20746 ++Shift;
20747 // If this slice can merge a cross register bank copy, account for it.
20748 if (LS.canMergeExpensiveCrossRegisterBankCopy())
20749 ++CrossRegisterBanksCopies;
20750 }
20751
20752 Cost &operator+=(const Cost &RHS) {
20753 Loads += RHS.Loads;
20754 Truncates += RHS.Truncates;
20755 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
20756 ZExts += RHS.ZExts;
20757 Shift += RHS.Shift;
20758 return *this;
20759 }
20760
20761 bool operator==(const Cost &RHS) const {
20762 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
20763 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
20764 ZExts == RHS.ZExts && Shift == RHS.Shift;
20765 }
20766
20767 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
20768
20769 bool operator<(const Cost &RHS) const {
20770 // Assume cross register banks copies are as expensive as loads.
20771 // FIXME: Do we want some more target hooks?
20772 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
20773 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
20774 // Unless we are optimizing for code size, consider the
20775 // expensive operation first.
20776 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
20777 return ExpensiveOpsLHS < ExpensiveOpsRHS;
20778 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
20779 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
20780 }
20781
20782 bool operator>(const Cost &RHS) const { return RHS < *this; }
20783
20784 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
20785
20786 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
20787 };
20788
20789 // The last instruction that represent the slice. This should be a
20790 // truncate instruction.
20791 SDNode *Inst;
20792
20793 // The original load instruction.
20794 LoadSDNode *Origin;
20795
20796 // The right shift amount in bits from the original load.
20797 unsigned Shift;
20798
20799 // The DAG from which Origin came from.
20800 // This is used to get some contextual information about legal types, etc.
20801 SelectionDAG *DAG;
20802
20803 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
20804 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
20805 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
20806
20807 /// Get the bits used in a chunk of bits \p BitWidth large.
20808 /// \return Result is \p BitWidth and has used bits set to 1 and
20809 /// not used bits set to 0.
20810 APInt getUsedBits() const {
20811 // Reproduce the trunc(lshr) sequence:
20812 // - Start from the truncated value.
20813 // - Zero extend to the desired bit width.
20814 // - Shift left.
20815 assert(Origin && "No original load to compare against.");
20816 unsigned BitWidth = Origin->getValueSizeInBits(0);
20817 assert(Inst && "This slice is not bound to an instruction");
20818 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
20819 "Extracted slice is bigger than the whole type!");
20820 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
20821 UsedBits.setAllBits();
20822 UsedBits = UsedBits.zext(BitWidth);
20823 UsedBits <<= Shift;
20824 return UsedBits;
20825 }
20826
20827 /// Get the size of the slice to be loaded in bytes.
20828 unsigned getLoadedSize() const {
20829 unsigned SliceSize = getUsedBits().popcount();
20830 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
20831 return SliceSize / 8;
20832 }
20833
20834 /// Get the type that will be loaded for this slice.
20835 /// Note: This may not be the final type for the slice.
20836 EVT getLoadedType() const {
20837 assert(DAG && "Missing context");
20838 LLVMContext &Ctxt = *DAG->getContext();
20839 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
20840 }
20841
20842 /// Get the alignment of the load used for this slice.
20843 Align getAlign() const {
20844 Align Alignment = Origin->getAlign();
20845 uint64_t Offset = getOffsetFromBase();
20846 if (Offset != 0)
20847 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
20848 return Alignment;
20849 }
20850
20851 /// Check if this slice can be rewritten with legal operations.
20852 bool isLegal() const {
20853 // An invalid slice is not legal.
20854 if (!Origin || !Inst || !DAG)
20855 return false;
20856
20857 // Offsets are for indexed load only, we do not handle that.
20858 if (!Origin->getOffset().isUndef())
20859 return false;
20860
20861 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20862
20863 // Check that the type is legal.
20864 EVT SliceType = getLoadedType();
20865 if (!TLI.isTypeLegal(SliceType))
20866 return false;
20867
20868 // Check that the load is legal for this type.
20869 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
20870 return false;
20871
20872 // Check that the offset can be computed.
20873 // 1. Check its type.
20874 EVT PtrType = Origin->getBasePtr().getValueType();
20875 if (PtrType == MVT::Untyped || PtrType.isExtended())
20876 return false;
20877
20878 // 2. Check that it fits in the immediate.
20879 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
20880 return false;
20881
20882 // 3. Check that the computation is legal.
20883 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
20884 return false;
20885
20886 // Check that the zext is legal if it needs one.
20887 EVT TruncateType = Inst->getValueType(0);
20888 if (TruncateType != SliceType &&
20889 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
20890 return false;
20891
20892 return true;
20893 }
20894
20895 /// Get the offset in bytes of this slice in the original chunk of
20896 /// bits.
20897 /// \pre DAG != nullptr.
20898 uint64_t getOffsetFromBase() const {
20899 assert(DAG && "Missing context.");
20900 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
20901 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
20902 uint64_t Offset = Shift / 8;
20903 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
20904 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
20905 "The size of the original loaded type is not a multiple of a"
20906 " byte.");
20907 // If Offset is bigger than TySizeInBytes, it means we are loading all
20908 // zeros. This should have been optimized before in the process.
20909 assert(TySizeInBytes > Offset &&
20910 "Invalid shift amount for given loaded size");
20911 if (IsBigEndian)
20912 Offset = TySizeInBytes - Offset - getLoadedSize();
20913 return Offset;
20914 }
20915
20916 /// Generate the sequence of instructions to load the slice
20917 /// represented by this object and redirect the uses of this slice to
20918 /// this new sequence of instructions.
20919 /// \pre this->Inst && this->Origin are valid Instructions and this
20920 /// object passed the legal check: LoadedSlice::isLegal returned true.
20921 /// \return The last instruction of the sequence used to load the slice.
20922 SDValue loadSlice() const {
20923 assert(Inst && Origin && "Unable to replace a non-existing slice.");
20924 const SDValue &OldBaseAddr = Origin->getBasePtr();
20925 SDValue BaseAddr = OldBaseAddr;
20926 // Get the offset in that chunk of bytes w.r.t. the endianness.
20927 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
20928 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
20929 if (Offset) {
20930 // BaseAddr = BaseAddr + Offset.
20931 EVT ArithType = BaseAddr.getValueType();
20932 SDLoc DL(Origin);
20933 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
20934 DAG->getConstant(Offset, DL, ArithType));
20935 }
20936
20937 // Create the type of the loaded slice according to its size.
20938 EVT SliceType = getLoadedType();
20939
20940 // Create the load for the slice.
20941 SDValue LastInst =
20942 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
20944 Origin->getMemOperand()->getFlags());
20945 // If the final type is not the same as the loaded type, this means that
20946 // we have to pad with zero. Create a zero extend for that.
20947 EVT FinalType = Inst->getValueType(0);
20948 if (SliceType != FinalType)
20949 LastInst =
20950 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
20951 return LastInst;
20952 }
20953
20954 /// Check if this slice can be merged with an expensive cross register
20955 /// bank copy. E.g.,
20956 /// i = load i32
20957 /// f = bitcast i32 i to float
20958 bool canMergeExpensiveCrossRegisterBankCopy() const {
20959 if (!Inst || !Inst->hasOneUse())
20960 return false;
20961 SDNode *User = *Inst->user_begin();
20962 if (User->getOpcode() != ISD::BITCAST)
20963 return false;
20964 assert(DAG && "Missing context");
20965 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
20966 EVT ResVT = User->getValueType(0);
20967 const TargetRegisterClass *ResRC =
20968 TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent());
20969 const TargetRegisterClass *ArgRC =
20970 TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(),
20971 User->getOperand(0)->isDivergent());
20972 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
20973 return false;
20974
20975 // At this point, we know that we perform a cross-register-bank copy.
20976 // Check if it is expensive.
20977 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
20978 // Assume bitcasts are cheap, unless both register classes do not
20979 // explicitly share a common sub class.
20980 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
20981 return false;
20982
20983 // Check if it will be merged with the load.
20984 // 1. Check the alignment / fast memory access constraint.
20985 unsigned IsFast = 0;
20986 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
20987 Origin->getAddressSpace(), getAlign(),
20988 Origin->getMemOperand()->getFlags(), &IsFast) ||
20989 !IsFast)
20990 return false;
20991
20992 // 2. Check that the load is a legal operation for that type.
20993 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
20994 return false;
20995
20996 // 3. Check that we do not have a zext in the way.
20997 if (Inst->getValueType(0) != getLoadedType())
20998 return false;
20999
21000 return true;
21001 }
21002};
21003
21004} // end anonymous namespace
21005
21006/// Check that all bits set in \p UsedBits form a dense region, i.e.,
21007/// \p UsedBits looks like 0..0 1..1 0..0.
21008static bool areUsedBitsDense(const APInt &UsedBits) {
21009 // If all the bits are one, this is dense!
21010 if (UsedBits.isAllOnes())
21011 return true;
21012
21013 // Get rid of the unused bits on the right.
21014 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
21015 // Get rid of the unused bits on the left.
21016 if (NarrowedUsedBits.countl_zero())
21017 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
21018 // Check that the chunk of bits is completely used.
21019 return NarrowedUsedBits.isAllOnes();
21020}
21021
21022/// Check whether or not \p First and \p Second are next to each other
21023/// in memory. This means that there is no hole between the bits loaded
21024/// by \p First and the bits loaded by \p Second.
21025static bool areSlicesNextToEachOther(const LoadedSlice &First,
21026 const LoadedSlice &Second) {
21027 assert(First.Origin == Second.Origin && First.Origin &&
21028 "Unable to match different memory origins.");
21029 APInt UsedBits = First.getUsedBits();
21030 assert((UsedBits & Second.getUsedBits()) == 0 &&
21031 "Slices are not supposed to overlap.");
21032 UsedBits |= Second.getUsedBits();
21033 return areUsedBitsDense(UsedBits);
21034}
21035
21036/// Adjust the \p GlobalLSCost according to the target
21037/// paring capabilities and the layout of the slices.
21038/// \pre \p GlobalLSCost should account for at least as many loads as
21039/// there is in the slices in \p LoadedSlices.
21041 LoadedSlice::Cost &GlobalLSCost) {
21042 unsigned NumberOfSlices = LoadedSlices.size();
21043 // If there is less than 2 elements, no pairing is possible.
21044 if (NumberOfSlices < 2)
21045 return;
21046
21047 // Sort the slices so that elements that are likely to be next to each
21048 // other in memory are next to each other in the list.
21049 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
21050 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
21051 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
21052 });
21053 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
21054 // First (resp. Second) is the first (resp. Second) potentially candidate
21055 // to be placed in a paired load.
21056 const LoadedSlice *First = nullptr;
21057 const LoadedSlice *Second = nullptr;
21058 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
21059 // Set the beginning of the pair.
21060 First = Second) {
21061 Second = &LoadedSlices[CurrSlice];
21062
21063 // If First is NULL, it means we start a new pair.
21064 // Get to the next slice.
21065 if (!First)
21066 continue;
21067
21068 EVT LoadedType = First->getLoadedType();
21069
21070 // If the types of the slices are different, we cannot pair them.
21071 if (LoadedType != Second->getLoadedType())
21072 continue;
21073
21074 // Check if the target supplies paired loads for this type.
21075 Align RequiredAlignment;
21076 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
21077 // move to the next pair, this type is hopeless.
21078 Second = nullptr;
21079 continue;
21080 }
21081 // Check if we meet the alignment requirement.
21082 if (First->getAlign() < RequiredAlignment)
21083 continue;
21084
21085 // Check that both loads are next to each other in memory.
21086 if (!areSlicesNextToEachOther(*First, *Second))
21087 continue;
21088
21089 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
21090 --GlobalLSCost.Loads;
21091 // Move to the next pair.
21092 Second = nullptr;
21093 }
21094}
21095
21096/// Check the profitability of all involved LoadedSlice.
21097/// Currently, it is considered profitable if there is exactly two
21098/// involved slices (1) which are (2) next to each other in memory, and
21099/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
21100///
21101/// Note: The order of the elements in \p LoadedSlices may be modified, but not
21102/// the elements themselves.
21103///
21104/// FIXME: When the cost model will be mature enough, we can relax
21105/// constraints (1) and (2).
21107 const APInt &UsedBits, bool ForCodeSize) {
21108 unsigned NumberOfSlices = LoadedSlices.size();
21110 return NumberOfSlices > 1;
21111
21112 // Check (1).
21113 if (NumberOfSlices != 2)
21114 return false;
21115
21116 // Check (2).
21117 if (!areUsedBitsDense(UsedBits))
21118 return false;
21119
21120 // Check (3).
21121 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
21122 // The original code has one big load.
21123 OrigCost.Loads = 1;
21124 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
21125 const LoadedSlice &LS = LoadedSlices[CurrSlice];
21126 // Accumulate the cost of all the slices.
21127 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
21128 GlobalSlicingCost += SliceCost;
21129
21130 // Account as cost in the original configuration the gain obtained
21131 // with the current slices.
21132 OrigCost.addSliceGain(LS);
21133 }
21134
21135 // If the target supports paired load, adjust the cost accordingly.
21136 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
21137 return OrigCost > GlobalSlicingCost;
21138}
21139
21140/// If the given load, \p LI, is used only by trunc or trunc(lshr)
21141/// operations, split it in the various pieces being extracted.
21142///
21143/// This sort of thing is introduced by SROA.
21144/// This slicing takes care not to insert overlapping loads.
21145/// \pre LI is a simple load (i.e., not an atomic or volatile load).
21146bool DAGCombiner::SliceUpLoad(SDNode *N) {
21147 if (Level < AfterLegalizeDAG)
21148 return false;
21149
21150 LoadSDNode *LD = cast<LoadSDNode>(N);
21151 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
21152 !LD->getValueType(0).isInteger())
21153 return false;
21154
21155 // The algorithm to split up a load of a scalable vector into individual
21156 // elements currently requires knowing the length of the loaded type,
21157 // so will need adjusting to work on scalable vectors.
21158 if (LD->getValueType(0).isScalableVector())
21159 return false;
21160
21161 // Keep track of already used bits to detect overlapping values.
21162 // In that case, we will just abort the transformation.
21163 APInt UsedBits(LD->getValueSizeInBits(0), 0);
21164
21165 SmallVector<LoadedSlice, 4> LoadedSlices;
21166
21167 // Check if this load is used as several smaller chunks of bits.
21168 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
21169 // of computation for each trunc.
21170 for (SDUse &U : LD->uses()) {
21171 // Skip the uses of the chain.
21172 if (U.getResNo() != 0)
21173 continue;
21174
21175 SDNode *User = U.getUser();
21176 unsigned Shift = 0;
21177
21178 // Check if this is a trunc(lshr).
21179 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
21180 isa<ConstantSDNode>(User->getOperand(1))) {
21181 Shift = User->getConstantOperandVal(1);
21182 User = *User->user_begin();
21183 }
21184
21185 // At this point, User is a Truncate, iff we encountered, trunc or
21186 // trunc(lshr).
21187 if (User->getOpcode() != ISD::TRUNCATE)
21188 return false;
21189
21190 // The width of the type must be a power of 2 and greater than 8-bits.
21191 // Otherwise the load cannot be represented in LLVM IR.
21192 // Moreover, if we shifted with a non-8-bits multiple, the slice
21193 // will be across several bytes. We do not support that.
21194 unsigned Width = User->getValueSizeInBits(0);
21195 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
21196 return false;
21197
21198 // Build the slice for this chain of computations.
21199 LoadedSlice LS(User, LD, Shift, &DAG);
21200 APInt CurrentUsedBits = LS.getUsedBits();
21201
21202 // Check if this slice overlaps with another.
21203 if ((CurrentUsedBits & UsedBits) != 0)
21204 return false;
21205 // Update the bits used globally.
21206 UsedBits |= CurrentUsedBits;
21207
21208 // Check if the new slice would be legal.
21209 if (!LS.isLegal())
21210 return false;
21211
21212 // Record the slice.
21213 LoadedSlices.push_back(LS);
21214 }
21215
21216 // Abort slicing if it does not seem to be profitable.
21217 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
21218 return false;
21219
21220 ++SlicedLoads;
21221
21222 // Rewrite each chain to use an independent load.
21223 // By construction, each chain can be represented by a unique load.
21224
21225 // Prepare the argument for the new token factor for all the slices.
21226 SmallVector<SDValue, 8> ArgChains;
21227 for (const LoadedSlice &LS : LoadedSlices) {
21228 SDValue SliceInst = LS.loadSlice();
21229 CombineTo(LS.Inst, SliceInst, true);
21230 if (SliceInst.getOpcode() != ISD::LOAD)
21231 SliceInst = SliceInst.getOperand(0);
21232 assert(SliceInst->getOpcode() == ISD::LOAD &&
21233 "It takes more than a zext to get to the loaded slice!!");
21234 ArgChains.push_back(SliceInst.getValue(1));
21235 }
21236
21237 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
21238 ArgChains);
21239 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
21240 AddToWorklist(Chain.getNode());
21241 return true;
21242}
21243
21244/// Check to see if V is (and load (ptr), imm), where the load is having
21245/// specific bytes cleared out. If so, return the byte size being masked out
21246/// and the shift amount.
21247static std::pair<unsigned, unsigned>
21249 std::pair<unsigned, unsigned> Result(0, 0);
21250
21251 // Check for the structure we're looking for.
21252 if (V->getOpcode() != ISD::AND ||
21253 !isa<ConstantSDNode>(V->getOperand(1)) ||
21254 !ISD::isNormalLoad(V->getOperand(0).getNode()))
21255 return Result;
21256
21257 // Check the chain and pointer.
21258 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
21259 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
21260
21261 // This only handles simple types.
21262 if (V.getValueType() != MVT::i16 &&
21263 V.getValueType() != MVT::i32 &&
21264 V.getValueType() != MVT::i64)
21265 return Result;
21266
21267 // Check the constant mask. Invert it so that the bits being masked out are
21268 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
21269 // follow the sign bit for uniformity.
21270 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
21271 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
21272 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
21273 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
21274 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
21275 if (NotMaskLZ == 64) return Result; // All zero mask.
21276
21277 // See if we have a continuous run of bits. If so, we have 0*1+0*
21278 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
21279 return Result;
21280
21281 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
21282 if (V.getValueType() != MVT::i64 && NotMaskLZ)
21283 NotMaskLZ -= 64-V.getValueSizeInBits();
21284
21285 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
21286 switch (MaskedBytes) {
21287 case 1:
21288 case 2:
21289 case 4: break;
21290 default: return Result; // All one mask, or 5-byte mask.
21291 }
21292
21293 // Verify that the first bit starts at a multiple of mask so that the access
21294 // is aligned the same as the access width.
21295 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
21296
21297 // For narrowing to be valid, it must be the case that the load the
21298 // immediately preceding memory operation before the store.
21299 if (LD == Chain.getNode())
21300 ; // ok.
21301 else if (Chain->getOpcode() == ISD::TokenFactor &&
21302 SDValue(LD, 1).hasOneUse()) {
21303 // LD has only 1 chain use so they are no indirect dependencies.
21304 if (!LD->isOperandOf(Chain.getNode()))
21305 return Result;
21306 } else
21307 return Result; // Fail.
21308
21309 Result.first = MaskedBytes;
21310 Result.second = NotMaskTZ/8;
21311 return Result;
21312}
21313
21314/// Check to see if IVal is something that provides a value as specified by
21315/// MaskInfo. If so, replace the specified store with a narrower store of
21316/// truncated IVal.
21317static SDValue
21318ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
21319 SDValue IVal, StoreSDNode *St,
21320 DAGCombiner *DC) {
21321 unsigned NumBytes = MaskInfo.first;
21322 unsigned ByteShift = MaskInfo.second;
21323 SelectionDAG &DAG = DC->getDAG();
21324
21325 // Check to see if IVal is all zeros in the part being masked in by the 'or'
21326 // that uses this. If not, this is not a replacement.
21327 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
21328 ByteShift*8, (ByteShift+NumBytes)*8);
21329 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
21330
21331 // Check that it is legal on the target to do this. It is legal if the new
21332 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
21333 // legalization. If the source type is legal, but the store type isn't, see
21334 // if we can use a truncating store.
21335 MVT VT = MVT::getIntegerVT(NumBytes * 8);
21336 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21337 bool UseTruncStore;
21338 if (DC->isTypeLegal(VT))
21339 UseTruncStore = false;
21340 else if (TLI.isTypeLegal(IVal.getValueType()) &&
21341 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
21342 UseTruncStore = true;
21343 else
21344 return SDValue();
21345
21346 // Can't do this for indexed stores.
21347 if (St->isIndexed())
21348 return SDValue();
21349
21350 // Check that the target doesn't think this is a bad idea.
21351 if (St->getMemOperand() &&
21352 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
21353 *St->getMemOperand()))
21354 return SDValue();
21355
21356 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
21357 // shifted by ByteShift and truncated down to NumBytes.
21358 if (ByteShift) {
21359 SDLoc DL(IVal);
21360 IVal = DAG.getNode(
21361 ISD::SRL, DL, IVal.getValueType(), IVal,
21362 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
21363 }
21364
21365 // Figure out the offset for the store and the alignment of the access.
21366 unsigned StOffset;
21367 if (DAG.getDataLayout().isLittleEndian())
21368 StOffset = ByteShift;
21369 else
21370 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
21371
21372 SDValue Ptr = St->getBasePtr();
21373 if (StOffset) {
21374 SDLoc DL(IVal);
21375 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(StOffset), DL);
21376 }
21377
21378 ++OpsNarrowed;
21379 if (UseTruncStore)
21380 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
21381 St->getPointerInfo().getWithOffset(StOffset), VT,
21382 St->getBaseAlign());
21383
21384 // Truncate down to the new size.
21385 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
21386
21387 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
21388 St->getPointerInfo().getWithOffset(StOffset),
21389 St->getBaseAlign());
21390}
21391
21392/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
21393/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
21394/// narrowing the load and store if it would end up being a win for performance
21395/// or code size.
21396SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
21397 StoreSDNode *ST = cast<StoreSDNode>(N);
21398 if (!ST->isSimple())
21399 return SDValue();
21400
21401 SDValue Chain = ST->getChain();
21402 SDValue Value = ST->getValue();
21403 SDValue Ptr = ST->getBasePtr();
21404 EVT VT = Value.getValueType();
21405
21406 if (ST->isTruncatingStore() || VT.isVector())
21407 return SDValue();
21408
21409 unsigned Opc = Value.getOpcode();
21410
21411 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
21412 !Value.hasOneUse())
21413 return SDValue();
21414
21415 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
21416 // is a byte mask indicating a consecutive number of bytes, check to see if
21417 // Y is known to provide just those bytes. If so, we try to replace the
21418 // load + replace + store sequence with a single (narrower) store, which makes
21419 // the load dead.
21421 std::pair<unsigned, unsigned> MaskedLoad;
21422 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
21423 if (MaskedLoad.first)
21424 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21425 Value.getOperand(1), ST,this))
21426 return NewST;
21427
21428 // Or is commutative, so try swapping X and Y.
21429 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
21430 if (MaskedLoad.first)
21431 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
21432 Value.getOperand(0), ST,this))
21433 return NewST;
21434 }
21435
21437 return SDValue();
21438
21439 if (Value.getOperand(1).getOpcode() != ISD::Constant)
21440 return SDValue();
21441
21442 SDValue N0 = Value.getOperand(0);
21443 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
21444 Chain == SDValue(N0.getNode(), 1)) {
21445 LoadSDNode *LD = cast<LoadSDNode>(N0);
21446 if (LD->getBasePtr() != Ptr ||
21447 LD->getPointerInfo().getAddrSpace() !=
21448 ST->getPointerInfo().getAddrSpace())
21449 return SDValue();
21450
21451 // Find the type NewVT to narrow the load / op / store to.
21452 SDValue N1 = Value.getOperand(1);
21453 unsigned BitWidth = N1.getValueSizeInBits();
21454 APInt Imm = N1->getAsAPIntVal();
21455 if (Opc == ISD::AND)
21456 Imm.flipAllBits();
21457 if (Imm == 0 || Imm.isAllOnes())
21458 return SDValue();
21459 // Find least/most significant bit that need to be part of the narrowed
21460 // operation. We assume target will need to address/access full bytes, so
21461 // we make sure to align LSB and MSB at byte boundaries.
21462 unsigned BitsPerByteMask = 7u;
21463 unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask;
21464 unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask;
21465 unsigned NewBW = NextPowerOf2(MSB - LSB);
21466 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21467 // The narrowing should be profitable, the load/store operation should be
21468 // legal (or custom) and the store size should be equal to the NewVT width.
21469 while (NewBW < BitWidth &&
21470 (NewVT.getStoreSizeInBits() != NewBW ||
21471 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
21473 !TLI.isNarrowingProfitable(N, VT, NewVT)))) {
21474 NewBW = NextPowerOf2(NewBW);
21475 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
21476 }
21477 if (NewBW >= BitWidth)
21478 return SDValue();
21479
21480 // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is
21481 // large enough to cover all bits that should be modified. This type might
21482 // however be larger than really needed (such as i32 while we actually only
21483 // need to modify one byte). Now we need to find our how to align the memory
21484 // accesses to satisfy preferred alignments as well as avoiding to access
21485 // memory outside the store size of the orignal access.
21486
21487 unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue();
21488
21489 // Let ShAmt denote amount of bits to skip, counted from the least
21490 // significant bits of Imm. And let PtrOff how much the pointer needs to be
21491 // offsetted (in bytes) for the new access.
21492 unsigned ShAmt = 0;
21493 uint64_t PtrOff = 0;
21494 for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) {
21495 // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB.
21496 if (ShAmt > LSB)
21497 return SDValue();
21498 if (ShAmt + NewBW < MSB)
21499 continue;
21500
21501 // Calculate PtrOff.
21502 unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian()
21503 ? VTStoreSize - NewBW - ShAmt
21504 : ShAmt;
21505 PtrOff = PtrAdjustmentInBits / 8;
21506
21507 // Now check if narrow access is allowed and fast, considering alignments.
21508 unsigned IsFast = 0;
21509 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21510 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
21511 LD->getAddressSpace(), NewAlign,
21512 LD->getMemOperand()->getFlags(), &IsFast) &&
21513 IsFast)
21514 break;
21515 }
21516 // If loop above did not find any accepted ShAmt we need to exit here.
21517 if (ShAmt + NewBW > VTStoreSize)
21518 return SDValue();
21519
21520 APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW);
21521 if (Opc == ISD::AND)
21522 NewImm.flipAllBits();
21523 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
21524 SDValue NewPtr =
21525 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
21526 SDValue NewLD =
21527 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
21528 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
21529 LD->getMemOperand()->getFlags(), LD->getAAInfo());
21530 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
21531 DAG.getConstant(NewImm, SDLoc(Value), NewVT));
21532 SDValue NewST =
21533 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
21534 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
21535
21536 AddToWorklist(NewPtr.getNode());
21537 AddToWorklist(NewLD.getNode());
21538 AddToWorklist(NewVal.getNode());
21539 WorklistRemover DeadNodes(*this);
21540 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
21541 ++OpsNarrowed;
21542 return NewST;
21543 }
21544
21545 return SDValue();
21546}
21547
21548/// For a given floating point load / store pair, if the load value isn't used
21549/// by any other operations, then consider transforming the pair to integer
21550/// load / store operations if the target deems the transformation profitable.
21551SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
21552 StoreSDNode *ST = cast<StoreSDNode>(N);
21553 SDValue Value = ST->getValue();
21554 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
21555 Value.hasOneUse()) {
21556 LoadSDNode *LD = cast<LoadSDNode>(Value);
21557 EVT VT = LD->getMemoryVT();
21558 if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() ||
21559 LD->isNonTemporal() || ST->isNonTemporal() ||
21560 LD->getPointerInfo().getAddrSpace() != 0 ||
21561 ST->getPointerInfo().getAddrSpace() != 0)
21562 return SDValue();
21563
21564 TypeSize VTSize = VT.getSizeInBits();
21565
21566 // We don't know the size of scalable types at compile time so we cannot
21567 // create an integer of the equivalent size.
21568 if (VTSize.isScalable())
21569 return SDValue();
21570
21571 unsigned FastLD = 0, FastST = 0;
21572 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
21573 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
21574 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
21577 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21578 *LD->getMemOperand(), &FastLD) ||
21579 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
21580 *ST->getMemOperand(), &FastST) ||
21581 !FastLD || !FastST)
21582 return SDValue();
21583
21584 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(),
21585 LD->getBasePtr(), LD->getMemOperand());
21586
21587 SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD,
21588 ST->getBasePtr(), ST->getMemOperand());
21589
21590 AddToWorklist(NewLD.getNode());
21591 AddToWorklist(NewST.getNode());
21592 WorklistRemover DeadNodes(*this);
21593 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
21594 ++LdStFP2Int;
21595 return NewST;
21596 }
21597
21598 return SDValue();
21599}
21600
21601// This is a helper function for visitMUL to check the profitability
21602// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
21603// MulNode is the original multiply, AddNode is (add x, c1),
21604// and ConstNode is c2.
21605//
21606// If the (add x, c1) has multiple uses, we could increase
21607// the number of adds if we make this transformation.
21608// It would only be worth doing this if we can remove a
21609// multiply in the process. Check for that here.
21610// To illustrate:
21611// (A + c1) * c3
21612// (A + c2) * c3
21613// We're checking for cases where we have common "c3 * A" expressions.
21614bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
21615 SDValue ConstNode) {
21616 // If the add only has one use, and the target thinks the folding is
21617 // profitable or does not lead to worse code, this would be OK to do.
21618 if (AddNode->hasOneUse() &&
21619 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
21620 return true;
21621
21622 // Walk all the users of the constant with which we're multiplying.
21623 for (SDNode *User : ConstNode->users()) {
21624 if (User == MulNode) // This use is the one we're on right now. Skip it.
21625 continue;
21626
21627 if (User->getOpcode() == ISD::MUL) { // We have another multiply use.
21628 SDNode *OtherOp;
21629 SDNode *MulVar = AddNode.getOperand(0).getNode();
21630
21631 // OtherOp is what we're multiplying against the constant.
21632 if (User->getOperand(0) == ConstNode)
21633 OtherOp = User->getOperand(1).getNode();
21634 else
21635 OtherOp = User->getOperand(0).getNode();
21636
21637 // Check to see if multiply is with the same operand of our "add".
21638 //
21639 // ConstNode = CONST
21640 // User = ConstNode * A <-- visiting User. OtherOp is A.
21641 // ...
21642 // AddNode = (A + c1) <-- MulVar is A.
21643 // = AddNode * ConstNode <-- current visiting instruction.
21644 //
21645 // If we make this transformation, we will have a common
21646 // multiply (ConstNode * A) that we can save.
21647 if (OtherOp == MulVar)
21648 return true;
21649
21650 // Now check to see if a future expansion will give us a common
21651 // multiply.
21652 //
21653 // ConstNode = CONST
21654 // AddNode = (A + c1)
21655 // ... = AddNode * ConstNode <-- current visiting instruction.
21656 // ...
21657 // OtherOp = (A + c2)
21658 // User = OtherOp * ConstNode <-- visiting User.
21659 //
21660 // If we make this transformation, we will have a common
21661 // multiply (CONST * A) after we also do the same transformation
21662 // to the "t2" instruction.
21663 if (OtherOp->getOpcode() == ISD::ADD &&
21665 OtherOp->getOperand(0).getNode() == MulVar)
21666 return true;
21667 }
21668 }
21669
21670 // Didn't find a case where this would be profitable.
21671 return false;
21672}
21673
21674SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
21675 unsigned NumStores) {
21677 SmallPtrSet<const SDNode *, 8> Visited;
21678 SDLoc StoreDL(StoreNodes[0].MemNode);
21679
21680 for (unsigned i = 0; i < NumStores; ++i) {
21681 Visited.insert(StoreNodes[i].MemNode);
21682 }
21683
21684 // don't include nodes that are children or repeated nodes.
21685 for (unsigned i = 0; i < NumStores; ++i) {
21686 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
21687 Chains.push_back(StoreNodes[i].MemNode->getChain());
21688 }
21689
21690 assert(!Chains.empty() && "Chain should have generated a chain");
21691 return DAG.getTokenFactor(StoreDL, Chains);
21692}
21693
21694bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
21695 const Value *UnderlyingObj = nullptr;
21696 for (const auto &MemOp : StoreNodes) {
21697 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
21698 // Pseudo value like stack frame has its own frame index and size, should
21699 // not use the first store's frame index for other frames.
21700 if (MMO->getPseudoValue())
21701 return false;
21702
21703 if (!MMO->getValue())
21704 return false;
21705
21706 const Value *Obj = getUnderlyingObject(MMO->getValue());
21707
21708 if (UnderlyingObj && UnderlyingObj != Obj)
21709 return false;
21710
21711 if (!UnderlyingObj)
21712 UnderlyingObj = Obj;
21713 }
21714
21715 return true;
21716}
21717
21718bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
21719 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
21720 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
21721 // Make sure we have something to merge.
21722 if (NumStores < 2)
21723 return false;
21724
21725 assert((!UseTrunc || !UseVector) &&
21726 "This optimization cannot emit a vector truncating store");
21727
21728 // The latest Node in the DAG.
21729 SDLoc DL(StoreNodes[0].MemNode);
21730
21731 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
21732 unsigned SizeInBits = NumStores * ElementSizeBits;
21733 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
21734
21735 std::optional<MachineMemOperand::Flags> Flags;
21736 AAMDNodes AAInfo;
21737 for (unsigned I = 0; I != NumStores; ++I) {
21738 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21739 if (!Flags) {
21740 Flags = St->getMemOperand()->getFlags();
21741 AAInfo = St->getAAInfo();
21742 continue;
21743 }
21744 // Skip merging if there's an inconsistent flag.
21745 if (Flags != St->getMemOperand()->getFlags())
21746 return false;
21747 // Concatenate AA metadata.
21748 AAInfo = AAInfo.concat(St->getAAInfo());
21749 }
21750
21751 EVT StoreTy;
21752 if (UseVector) {
21753 unsigned Elts = NumStores * NumMemElts;
21754 // Get the type for the merged vector store.
21755 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
21756 } else
21757 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
21758
21759 SDValue StoredVal;
21760 if (UseVector) {
21761 if (IsConstantSrc) {
21762 SmallVector<SDValue, 8> BuildVector;
21763 for (unsigned I = 0; I != NumStores; ++I) {
21764 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
21765 SDValue Val = St->getValue();
21766 // If constant is of the wrong type, convert it now. This comes up
21767 // when one of our stores was truncating.
21768 if (MemVT != Val.getValueType()) {
21769 Val = peekThroughBitcasts(Val);
21770 // Deal with constants of wrong size.
21771 if (ElementSizeBits != Val.getValueSizeInBits()) {
21772 auto *C = dyn_cast<ConstantSDNode>(Val);
21773 if (!C)
21774 // Not clear how to truncate FP values.
21775 // TODO: Handle truncation of build_vector constants
21776 return false;
21777
21778 EVT IntMemVT =
21780 Val = DAG.getConstant(C->getAPIntValue()
21781 .zextOrTrunc(Val.getValueSizeInBits())
21782 .zextOrTrunc(ElementSizeBits),
21783 SDLoc(C), IntMemVT);
21784 }
21785 // Make sure correctly size type is the correct type.
21786 Val = DAG.getBitcast(MemVT, Val);
21787 }
21788 BuildVector.push_back(Val);
21789 }
21790 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21792 DL, StoreTy, BuildVector);
21793 } else {
21795 for (unsigned i = 0; i < NumStores; ++i) {
21796 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
21798 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
21799 // type MemVT. If the underlying value is not the correct
21800 // type, but it is an extraction of an appropriate vector we
21801 // can recast Val to be of the correct type. This may require
21802 // converting between EXTRACT_VECTOR_ELT and
21803 // EXTRACT_SUBVECTOR.
21804 if ((MemVT != Val.getValueType()) &&
21807 EVT MemVTScalarTy = MemVT.getScalarType();
21808 // We may need to add a bitcast here to get types to line up.
21809 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
21810 Val = DAG.getBitcast(MemVT, Val);
21811 } else if (MemVT.isVector() &&
21813 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
21814 } else {
21815 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
21817 SDValue Vec = Val.getOperand(0);
21818 SDValue Idx = Val.getOperand(1);
21819 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
21820 }
21821 }
21822 Ops.push_back(Val);
21823 }
21824
21825 // Build the extracted vector elements back into a vector.
21826 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
21828 DL, StoreTy, Ops);
21829 }
21830 } else {
21831 // We should always use a vector store when merging extracted vector
21832 // elements, so this path implies a store of constants.
21833 assert(IsConstantSrc && "Merged vector elements should use vector store");
21834
21835 APInt StoreInt(SizeInBits, 0);
21836
21837 // Construct a single integer constant which is made of the smaller
21838 // constant inputs.
21839 bool IsLE = DAG.getDataLayout().isLittleEndian();
21840 for (unsigned i = 0; i < NumStores; ++i) {
21841 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
21842 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
21843
21844 SDValue Val = St->getValue();
21845 Val = peekThroughBitcasts(Val);
21846 StoreInt <<= ElementSizeBits;
21847 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
21848 StoreInt |= C->getAPIntValue()
21849 .zextOrTrunc(ElementSizeBits)
21850 .zextOrTrunc(SizeInBits);
21851 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
21852 StoreInt |= C->getValueAPF()
21853 .bitcastToAPInt()
21854 .zextOrTrunc(ElementSizeBits)
21855 .zextOrTrunc(SizeInBits);
21856 // If fp truncation is necessary give up for now.
21857 if (MemVT.getSizeInBits() != ElementSizeBits)
21858 return false;
21859 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
21861 // Not yet handled
21862 return false;
21863 } else {
21864 llvm_unreachable("Invalid constant element type");
21865 }
21866 }
21867
21868 // Create the new Load and Store operations.
21869 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
21870 }
21871
21872 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
21873 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
21874 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21875
21876 // make sure we use trunc store if it's necessary to be legal.
21877 // When generate the new widen store, if the first store's pointer info can
21878 // not be reused, discard the pointer info except the address space because
21879 // now the widen store can not be represented by the original pointer info
21880 // which is for the narrow memory object.
21881 SDValue NewStore;
21882 if (!UseTrunc) {
21883 NewStore = DAG.getStore(
21884 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
21885 CanReusePtrInfo
21886 ? FirstInChain->getPointerInfo()
21887 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21888 FirstInChain->getAlign(), *Flags, AAInfo);
21889 } else { // Must be realized as a trunc store
21890 EVT LegalizedStoredValTy =
21891 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
21892 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
21893 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
21894 SDValue ExtendedStoreVal =
21895 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
21896 LegalizedStoredValTy);
21897 NewStore = DAG.getTruncStore(
21898 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
21899 CanReusePtrInfo
21900 ? FirstInChain->getPointerInfo()
21901 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
21902 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
21903 AAInfo);
21904 }
21905
21906 // Replace all merged stores with the new store.
21907 for (unsigned i = 0; i < NumStores; ++i)
21908 CombineTo(StoreNodes[i].MemNode, NewStore);
21909
21910 AddToWorklist(NewChain.getNode());
21911 return true;
21912}
21913
21914SDNode *
21915DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
21916 SmallVectorImpl<MemOpLink> &StoreNodes) {
21917 // This holds the base pointer, index, and the offset in bytes from the base
21918 // pointer. We must have a base and an offset. Do not handle stores to undef
21919 // base pointers.
21920 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
21921 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
21922 return nullptr;
21923
21925 StoreSource StoreSrc = getStoreSource(Val);
21926 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
21927
21928 // Match on loadbaseptr if relevant.
21929 EVT MemVT = St->getMemoryVT();
21930 BaseIndexOffset LBasePtr;
21931 EVT LoadVT;
21932 if (StoreSrc == StoreSource::Load) {
21933 auto *Ld = cast<LoadSDNode>(Val);
21934 LBasePtr = BaseIndexOffset::match(Ld, DAG);
21935 LoadVT = Ld->getMemoryVT();
21936 // Load and store should be the same type.
21937 if (MemVT != LoadVT)
21938 return nullptr;
21939 // Loads must only have one use.
21940 if (!Ld->hasNUsesOfValue(1, 0))
21941 return nullptr;
21942 // The memory operands must not be volatile/indexed/atomic.
21943 // TODO: May be able to relax for unordered atomics (see D66309)
21944 if (!Ld->isSimple() || Ld->isIndexed())
21945 return nullptr;
21946 }
21947 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
21948 int64_t &Offset) -> bool {
21949 // The memory operands must not be volatile/indexed/atomic.
21950 // TODO: May be able to relax for unordered atomics (see D66309)
21951 if (!Other->isSimple() || Other->isIndexed())
21952 return false;
21953 // Don't mix temporal stores with non-temporal stores.
21954 if (St->isNonTemporal() != Other->isNonTemporal())
21955 return false;
21957 return false;
21958 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
21959 // Allow merging constants of different types as integers.
21960 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
21961 : Other->getMemoryVT() != MemVT;
21962 switch (StoreSrc) {
21963 case StoreSource::Load: {
21964 if (NoTypeMatch)
21965 return false;
21966 // The Load's Base Ptr must also match.
21967 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
21968 if (!OtherLd)
21969 return false;
21970 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
21971 if (LoadVT != OtherLd->getMemoryVT())
21972 return false;
21973 // Loads must only have one use.
21974 if (!OtherLd->hasNUsesOfValue(1, 0))
21975 return false;
21976 // The memory operands must not be volatile/indexed/atomic.
21977 // TODO: May be able to relax for unordered atomics (see D66309)
21978 if (!OtherLd->isSimple() || OtherLd->isIndexed())
21979 return false;
21980 // Don't mix temporal loads with non-temporal loads.
21981 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
21982 return false;
21984 *OtherLd))
21985 return false;
21986 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
21987 return false;
21988 break;
21989 }
21990 case StoreSource::Constant:
21991 if (NoTypeMatch)
21992 return false;
21993 if (getStoreSource(OtherBC) != StoreSource::Constant)
21994 return false;
21995 break;
21996 case StoreSource::Extract:
21997 // Do not merge truncated stores here.
21998 if (Other->isTruncatingStore())
21999 return false;
22000 if (!MemVT.bitsEq(OtherBC.getValueType()))
22001 return false;
22002 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
22003 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
22004 return false;
22005 break;
22006 default:
22007 llvm_unreachable("Unhandled store source for merging");
22008 }
22009 Ptr = BaseIndexOffset::match(Other, DAG);
22010 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
22011 };
22012
22013 // We are looking for a root node which is an ancestor to all mergable
22014 // stores. We search up through a load, to our root and then down
22015 // through all children. For instance we will find Store{1,2,3} if
22016 // St is Store1, Store2. or Store3 where the root is not a load
22017 // which always true for nonvolatile ops. TODO: Expand
22018 // the search to find all valid candidates through multiple layers of loads.
22019 //
22020 // Root
22021 // |-------|-------|
22022 // Load Load Store3
22023 // | |
22024 // Store1 Store2
22025 //
22026 // FIXME: We should be able to climb and
22027 // descend TokenFactors to find candidates as well.
22028
22029 SDNode *RootNode = St->getChain().getNode();
22030 // Bail out if we already analyzed this root node and found nothing.
22031 if (ChainsWithoutMergeableStores.contains(RootNode))
22032 return nullptr;
22033
22034 // Check if the pair of StoreNode and the RootNode already bail out many
22035 // times which is over the limit in dependence check.
22036 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
22037 SDNode *RootNode) -> bool {
22038 auto RootCount = StoreRootCountMap.find(StoreNode);
22039 return RootCount != StoreRootCountMap.end() &&
22040 RootCount->second.first == RootNode &&
22041 RootCount->second.second > StoreMergeDependenceLimit;
22042 };
22043
22044 auto TryToAddCandidate = [&](SDUse &Use) {
22045 // This must be a chain use.
22046 if (Use.getOperandNo() != 0)
22047 return;
22048 if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) {
22049 BaseIndexOffset Ptr;
22050 int64_t PtrDiff;
22051 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
22052 !OverLimitInDependenceCheck(OtherStore, RootNode))
22053 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
22054 }
22055 };
22056
22057 unsigned NumNodesExplored = 0;
22058 const unsigned MaxSearchNodes = 1024;
22059 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
22060 RootNode = Ldn->getChain().getNode();
22061 // Bail out if we already analyzed this root node and found nothing.
22062 if (ChainsWithoutMergeableStores.contains(RootNode))
22063 return nullptr;
22064 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
22065 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
22066 SDNode *User = I->getUser();
22067 if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain
22068 for (SDUse &U2 : User->uses())
22069 TryToAddCandidate(U2);
22070 }
22071 // Check stores that depend on the root (e.g. Store 3 in the chart above).
22072 if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) {
22073 TryToAddCandidate(*I);
22074 }
22075 }
22076 } else {
22077 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
22078 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
22079 TryToAddCandidate(*I);
22080 }
22081
22082 return RootNode;
22083}
22084
22085// We need to check that merging these stores does not cause a loop in the
22086// DAG. Any store candidate may depend on another candidate indirectly through
22087// its operands. Check in parallel by searching up from operands of candidates.
22088bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
22089 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
22090 SDNode *RootNode) {
22091 // FIXME: We should be able to truncate a full search of
22092 // predecessors by doing a BFS and keeping tabs the originating
22093 // stores from which worklist nodes come from in a similar way to
22094 // TokenFactor simplfication.
22095
22096 SmallPtrSet<const SDNode *, 32> Visited;
22098
22099 // RootNode is a predecessor to all candidates so we need not search
22100 // past it. Add RootNode (peeking through TokenFactors). Do not count
22101 // these towards size check.
22102
22103 Worklist.push_back(RootNode);
22104 while (!Worklist.empty()) {
22105 auto N = Worklist.pop_back_val();
22106 if (!Visited.insert(N).second)
22107 continue; // Already present in Visited.
22108 if (N->getOpcode() == ISD::TokenFactor) {
22109 for (SDValue Op : N->ops())
22110 Worklist.push_back(Op.getNode());
22111 }
22112 }
22113
22114 // Don't count pruning nodes towards max.
22115 unsigned int Max = 1024 + Visited.size();
22116 // Search Ops of store candidates.
22117 for (unsigned i = 0; i < NumStores; ++i) {
22118 SDNode *N = StoreNodes[i].MemNode;
22119 // Of the 4 Store Operands:
22120 // * Chain (Op 0) -> We have already considered these
22121 // in candidate selection, but only by following the
22122 // chain dependencies. We could still have a chain
22123 // dependency to a load, that has a non-chain dep to
22124 // another load, that depends on a store, etc. So it is
22125 // possible to have dependencies that consist of a mix
22126 // of chain and non-chain deps, and we need to include
22127 // chain operands in the analysis here..
22128 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
22129 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
22130 // but aren't necessarily fromt the same base node, so
22131 // cycles possible (e.g. via indexed store).
22132 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
22133 // non-indexed stores). Not constant on all targets (e.g. ARM)
22134 // and so can participate in a cycle.
22135 for (const SDValue &Op : N->op_values())
22136 Worklist.push_back(Op.getNode());
22137 }
22138 // Search through DAG. We can stop early if we find a store node.
22139 for (unsigned i = 0; i < NumStores; ++i)
22140 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
22141 Max)) {
22142 // If the searching bail out, record the StoreNode and RootNode in the
22143 // StoreRootCountMap. If we have seen the pair many times over a limit,
22144 // we won't add the StoreNode into StoreNodes set again.
22145 if (Visited.size() >= Max) {
22146 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
22147 if (RootCount.first == RootNode)
22148 RootCount.second++;
22149 else
22150 RootCount = {RootNode, 1};
22151 }
22152 return false;
22153 }
22154 return true;
22155}
22156
22157bool DAGCombiner::hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld) {
22158 SmallPtrSet<const SDNode *, 32> Visited;
22160 Worklist.emplace_back(St->getChain().getNode(), false);
22161
22162 while (!Worklist.empty()) {
22163 auto [Node, FoundCall] = Worklist.pop_back_val();
22164 if (!Visited.insert(Node).second || Node->getNumOperands() == 0)
22165 continue;
22166
22167 switch (Node->getOpcode()) {
22168 case ISD::CALLSEQ_END:
22169 Worklist.emplace_back(Node->getOperand(0).getNode(), true);
22170 break;
22171 case ISD::TokenFactor:
22172 for (SDValue Op : Node->ops())
22173 Worklist.emplace_back(Op.getNode(), FoundCall);
22174 break;
22175 case ISD::LOAD:
22176 if (Node == Ld)
22177 return FoundCall;
22178 [[fallthrough]];
22179 default:
22180 assert(Node->getOperand(0).getValueType() == MVT::Other &&
22181 "Invalid chain type");
22182 Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall);
22183 break;
22184 }
22185 }
22186 return false;
22187}
22188
22189unsigned
22190DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
22191 int64_t ElementSizeBytes) const {
22192 while (true) {
22193 // Find a store past the width of the first store.
22194 size_t StartIdx = 0;
22195 while ((StartIdx + 1 < StoreNodes.size()) &&
22196 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
22197 StoreNodes[StartIdx + 1].OffsetFromBase)
22198 ++StartIdx;
22199
22200 // Bail if we don't have enough candidates to merge.
22201 if (StartIdx + 1 >= StoreNodes.size())
22202 return 0;
22203
22204 // Trim stores that overlapped with the first store.
22205 if (StartIdx)
22206 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
22207
22208 // Scan the memory operations on the chain and find the first
22209 // non-consecutive store memory address.
22210 unsigned NumConsecutiveStores = 1;
22211 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
22212 // Check that the addresses are consecutive starting from the second
22213 // element in the list of stores.
22214 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
22215 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
22216 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
22217 break;
22218 NumConsecutiveStores = i + 1;
22219 }
22220 if (NumConsecutiveStores > 1)
22221 return NumConsecutiveStores;
22222
22223 // There are no consecutive stores at the start of the list.
22224 // Remove the first store and try again.
22225 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
22226 }
22227}
22228
22229bool DAGCombiner::tryStoreMergeOfConstants(
22230 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
22231 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
22232 LLVMContext &Context = *DAG.getContext();
22233 const DataLayout &DL = DAG.getDataLayout();
22234 int64_t ElementSizeBytes = MemVT.getStoreSize();
22235 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22236 bool MadeChange = false;
22237
22238 // Store the constants into memory as one consecutive store.
22239 while (NumConsecutiveStores >= 2) {
22240 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22241 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22242 Align FirstStoreAlign = FirstInChain->getAlign();
22243 unsigned LastLegalType = 1;
22244 unsigned LastLegalVectorType = 1;
22245 bool LastIntegerTrunc = false;
22246 bool NonZero = false;
22247 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
22248 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22249 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
22250 SDValue StoredVal = ST->getValue();
22251 bool IsElementZero = false;
22252 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
22253 IsElementZero = C->isZero();
22254 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
22255 IsElementZero = C->getConstantFPValue()->isNullValue();
22256 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
22257 IsElementZero = true;
22258 if (IsElementZero) {
22259 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
22260 FirstZeroAfterNonZero = i;
22261 }
22262 NonZero |= !IsElementZero;
22263
22264 // Find a legal type for the constant store.
22265 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
22266 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
22267 unsigned IsFast = 0;
22268
22269 // Break early when size is too large to be legal.
22270 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22271 break;
22272
22273 if (TLI.isTypeLegal(StoreTy) &&
22274 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22275 DAG.getMachineFunction()) &&
22276 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22277 *FirstInChain->getMemOperand(), &IsFast) &&
22278 IsFast) {
22279 LastIntegerTrunc = false;
22280 LastLegalType = i + 1;
22281 // Or check whether a truncstore is legal.
22282 } else if (TLI.getTypeAction(Context, StoreTy) ==
22284 EVT LegalizedStoredValTy =
22285 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
22286 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
22287 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
22288 DAG.getMachineFunction()) &&
22289 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22290 *FirstInChain->getMemOperand(), &IsFast) &&
22291 IsFast) {
22292 LastIntegerTrunc = true;
22293 LastLegalType = i + 1;
22294 }
22295 }
22296
22297 // We only use vectors if the target allows it and the function is not
22298 // marked with the noimplicitfloat attribute.
22299 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
22300 AllowVectors) {
22301 // Find a legal type for the vector store.
22302 unsigned Elts = (i + 1) * NumMemElts;
22303 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22304 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
22305 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
22306 TLI.allowsMemoryAccess(Context, DL, Ty,
22307 *FirstInChain->getMemOperand(), &IsFast) &&
22308 IsFast)
22309 LastLegalVectorType = i + 1;
22310 }
22311 }
22312
22313 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
22314 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
22315 bool UseTrunc = LastIntegerTrunc && !UseVector;
22316
22317 // Check if we found a legal integer type that creates a meaningful
22318 // merge.
22319 if (NumElem < 2) {
22320 // We know that candidate stores are in order and of correct
22321 // shape. While there is no mergeable sequence from the
22322 // beginning one may start later in the sequence. The only
22323 // reason a merge of size N could have failed where another of
22324 // the same size would not have, is if the alignment has
22325 // improved or we've dropped a non-zero value. Drop as many
22326 // candidates as we can here.
22327 unsigned NumSkip = 1;
22328 while ((NumSkip < NumConsecutiveStores) &&
22329 (NumSkip < FirstZeroAfterNonZero) &&
22330 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22331 NumSkip++;
22332
22333 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22334 NumConsecutiveStores -= NumSkip;
22335 continue;
22336 }
22337
22338 // Check that we can merge these candidates without causing a cycle.
22339 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
22340 RootNode)) {
22341 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22342 NumConsecutiveStores -= NumElem;
22343 continue;
22344 }
22345
22346 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
22347 /*IsConstantSrc*/ true,
22348 UseVector, UseTrunc);
22349
22350 // Remove merged stores for next iteration.
22351 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22352 NumConsecutiveStores -= NumElem;
22353 }
22354 return MadeChange;
22355}
22356
22357bool DAGCombiner::tryStoreMergeOfExtracts(
22358 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
22359 EVT MemVT, SDNode *RootNode) {
22360 LLVMContext &Context = *DAG.getContext();
22361 const DataLayout &DL = DAG.getDataLayout();
22362 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22363 bool MadeChange = false;
22364
22365 // Loop on Consecutive Stores on success.
22366 while (NumConsecutiveStores >= 2) {
22367 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22368 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22369 Align FirstStoreAlign = FirstInChain->getAlign();
22370 unsigned NumStoresToMerge = 1;
22371 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22372 // Find a legal type for the vector store.
22373 unsigned Elts = (i + 1) * NumMemElts;
22374 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
22375 unsigned IsFast = 0;
22376
22377 // Break early when size is too large to be legal.
22378 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
22379 break;
22380
22381 if (TLI.isTypeLegal(Ty) &&
22382 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
22383 TLI.allowsMemoryAccess(Context, DL, Ty,
22384 *FirstInChain->getMemOperand(), &IsFast) &&
22385 IsFast)
22386 NumStoresToMerge = i + 1;
22387 }
22388
22389 // Check if we found a legal integer type creating a meaningful
22390 // merge.
22391 if (NumStoresToMerge < 2) {
22392 // We know that candidate stores are in order and of correct
22393 // shape. While there is no mergeable sequence from the
22394 // beginning one may start later in the sequence. The only
22395 // reason a merge of size N could have failed where another of
22396 // the same size would not have, is if the alignment has
22397 // improved. Drop as many candidates as we can here.
22398 unsigned NumSkip = 1;
22399 while ((NumSkip < NumConsecutiveStores) &&
22400 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22401 NumSkip++;
22402
22403 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22404 NumConsecutiveStores -= NumSkip;
22405 continue;
22406 }
22407
22408 // Check that we can merge these candidates without causing a cycle.
22409 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
22410 RootNode)) {
22411 StoreNodes.erase(StoreNodes.begin(),
22412 StoreNodes.begin() + NumStoresToMerge);
22413 NumConsecutiveStores -= NumStoresToMerge;
22414 continue;
22415 }
22416
22417 MadeChange |= mergeStoresOfConstantsOrVecElts(
22418 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
22419 /*UseVector*/ true, /*UseTrunc*/ false);
22420
22421 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
22422 NumConsecutiveStores -= NumStoresToMerge;
22423 }
22424 return MadeChange;
22425}
22426
22427bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
22428 unsigned NumConsecutiveStores, EVT MemVT,
22429 SDNode *RootNode, bool AllowVectors,
22430 bool IsNonTemporalStore,
22431 bool IsNonTemporalLoad) {
22432 LLVMContext &Context = *DAG.getContext();
22433 const DataLayout &DL = DAG.getDataLayout();
22434 int64_t ElementSizeBytes = MemVT.getStoreSize();
22435 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
22436 bool MadeChange = false;
22437
22438 // Look for load nodes which are used by the stored values.
22439 SmallVector<MemOpLink, 8> LoadNodes;
22440
22441 // Find acceptable loads. Loads need to have the same chain (token factor),
22442 // must not be zext, volatile, indexed, and they must be consecutive.
22443 BaseIndexOffset LdBasePtr;
22444
22445 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
22446 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
22448 LoadSDNode *Ld = cast<LoadSDNode>(Val);
22449
22450 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
22451 // If this is not the first ptr that we check.
22452 int64_t LdOffset = 0;
22453 if (LdBasePtr.getBase().getNode()) {
22454 // The base ptr must be the same.
22455 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
22456 break;
22457 } else {
22458 // Check that all other base pointers are the same as this one.
22459 LdBasePtr = LdPtr;
22460 }
22461
22462 // We found a potential memory operand to merge.
22463 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
22464 }
22465
22466 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
22467 Align RequiredAlignment;
22468 bool NeedRotate = false;
22469 if (LoadNodes.size() == 2) {
22470 // If we have load/store pair instructions and we only have two values,
22471 // don't bother merging.
22472 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
22473 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
22474 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
22475 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
22476 break;
22477 }
22478 // If the loads are reversed, see if we can rotate the halves into place.
22479 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
22480 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
22481 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
22482 if (Offset0 - Offset1 == ElementSizeBytes &&
22483 (hasOperation(ISD::ROTL, PairVT) ||
22484 hasOperation(ISD::ROTR, PairVT))) {
22485 std::swap(LoadNodes[0], LoadNodes[1]);
22486 NeedRotate = true;
22487 }
22488 }
22489 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
22490 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
22491 Align FirstStoreAlign = FirstInChain->getAlign();
22492 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
22493
22494 // Scan the memory operations on the chain and find the first
22495 // non-consecutive load memory address. These variables hold the index in
22496 // the store node array.
22497
22498 unsigned LastConsecutiveLoad = 1;
22499
22500 // This variable refers to the size and not index in the array.
22501 unsigned LastLegalVectorType = 1;
22502 unsigned LastLegalIntegerType = 1;
22503 bool isDereferenceable = true;
22504 bool DoIntegerTruncate = false;
22505 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
22506 SDValue LoadChain = FirstLoad->getChain();
22507 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
22508 // All loads must share the same chain.
22509 if (LoadNodes[i].MemNode->getChain() != LoadChain)
22510 break;
22511
22512 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
22513 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
22514 break;
22515 LastConsecutiveLoad = i;
22516
22517 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
22518 isDereferenceable = false;
22519
22520 // Find a legal type for the vector store.
22521 unsigned Elts = (i + 1) * NumMemElts;
22522 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22523
22524 // Break early when size is too large to be legal.
22525 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
22526 break;
22527
22528 unsigned IsFastSt = 0;
22529 unsigned IsFastLd = 0;
22530 // Don't try vector types if we need a rotate. We may still fail the
22531 // legality checks for the integer type, but we can't handle the rotate
22532 // case with vectors.
22533 // FIXME: We could use a shuffle in place of the rotate.
22534 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
22535 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22536 DAG.getMachineFunction()) &&
22537 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22538 *FirstInChain->getMemOperand(), &IsFastSt) &&
22539 IsFastSt &&
22540 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22541 *FirstLoad->getMemOperand(), &IsFastLd) &&
22542 IsFastLd) {
22543 LastLegalVectorType = i + 1;
22544 }
22545
22546 // Find a legal type for the integer store.
22547 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
22548 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
22549 if (TLI.isTypeLegal(StoreTy) &&
22550 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
22551 DAG.getMachineFunction()) &&
22552 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22553 *FirstInChain->getMemOperand(), &IsFastSt) &&
22554 IsFastSt &&
22555 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22556 *FirstLoad->getMemOperand(), &IsFastLd) &&
22557 IsFastLd) {
22558 LastLegalIntegerType = i + 1;
22559 DoIntegerTruncate = false;
22560 // Or check whether a truncstore and extload is legal.
22561 } else if (TLI.getTypeAction(Context, StoreTy) ==
22563 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
22564 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
22565 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
22566 DAG.getMachineFunction()) &&
22567 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22568 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
22569 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
22570 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22571 *FirstInChain->getMemOperand(), &IsFastSt) &&
22572 IsFastSt &&
22573 TLI.allowsMemoryAccess(Context, DL, StoreTy,
22574 *FirstLoad->getMemOperand(), &IsFastLd) &&
22575 IsFastLd) {
22576 LastLegalIntegerType = i + 1;
22577 DoIntegerTruncate = true;
22578 }
22579 }
22580 }
22581
22582 // Only use vector types if the vector type is larger than the integer
22583 // type. If they are the same, use integers.
22584 bool UseVectorTy =
22585 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
22586 unsigned LastLegalType =
22587 std::max(LastLegalVectorType, LastLegalIntegerType);
22588
22589 // We add +1 here because the LastXXX variables refer to location while
22590 // the NumElem refers to array/index size.
22591 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
22592 NumElem = std::min(LastLegalType, NumElem);
22593 Align FirstLoadAlign = FirstLoad->getAlign();
22594
22595 if (NumElem < 2) {
22596 // We know that candidate stores are in order and of correct
22597 // shape. While there is no mergeable sequence from the
22598 // beginning one may start later in the sequence. The only
22599 // reason a merge of size N could have failed where another of
22600 // the same size would not have is if the alignment or either
22601 // the load or store has improved. Drop as many candidates as we
22602 // can here.
22603 unsigned NumSkip = 1;
22604 while ((NumSkip < LoadNodes.size()) &&
22605 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
22606 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
22607 NumSkip++;
22608 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
22609 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
22610 NumConsecutiveStores -= NumSkip;
22611 continue;
22612 }
22613
22614 // Check that we can merge these candidates without causing a cycle.
22615 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
22616 RootNode)) {
22617 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22618 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22619 NumConsecutiveStores -= NumElem;
22620 continue;
22621 }
22622
22623 // Find if it is better to use vectors or integers to load and store
22624 // to memory.
22625 EVT JointMemOpVT;
22626 if (UseVectorTy) {
22627 // Find a legal type for the vector store.
22628 unsigned Elts = NumElem * NumMemElts;
22629 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
22630 } else {
22631 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
22632 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
22633 }
22634
22635 // Check if there is a call in the load/store chain.
22636 if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT, JointMemOpVT) &&
22637 hasCallInLdStChain(cast<StoreSDNode>(StoreNodes[0].MemNode),
22638 cast<LoadSDNode>(LoadNodes[0].MemNode))) {
22639 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22640 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22641 NumConsecutiveStores -= NumElem;
22642 continue;
22643 }
22644
22645 SDLoc LoadDL(LoadNodes[0].MemNode);
22646 SDLoc StoreDL(StoreNodes[0].MemNode);
22647
22648 // The merged loads are required to have the same incoming chain, so
22649 // using the first's chain is acceptable.
22650
22651 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
22652 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
22653 AddToWorklist(NewStoreChain.getNode());
22654
22655 MachineMemOperand::Flags LdMMOFlags =
22656 isDereferenceable ? MachineMemOperand::MODereferenceable
22658 if (IsNonTemporalLoad)
22660
22661 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
22662
22663 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
22666
22667 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
22668
22669 SDValue NewLoad, NewStore;
22670 if (UseVectorTy || !DoIntegerTruncate) {
22671 NewLoad = DAG.getLoad(
22672 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
22673 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
22674 SDValue StoreOp = NewLoad;
22675 if (NeedRotate) {
22676 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
22677 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
22678 "Unexpected type for rotate-able load pair");
22679 SDValue RotAmt =
22680 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
22681 // Target can convert to the identical ROTR if it does not have ROTL.
22682 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
22683 }
22684 NewStore = DAG.getStore(
22685 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
22686 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22687 : MachinePointerInfo(FirstStoreAS),
22688 FirstStoreAlign, StMMOFlags);
22689 } else { // This must be the truncstore/extload case
22690 EVT ExtendedTy =
22691 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
22692 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
22693 FirstLoad->getChain(), FirstLoad->getBasePtr(),
22694 FirstLoad->getPointerInfo(), JointMemOpVT,
22695 FirstLoadAlign, LdMMOFlags);
22696 NewStore = DAG.getTruncStore(
22697 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
22698 CanReusePtrInfo ? FirstInChain->getPointerInfo()
22699 : MachinePointerInfo(FirstStoreAS),
22700 JointMemOpVT, FirstInChain->getAlign(),
22701 FirstInChain->getMemOperand()->getFlags());
22702 }
22703
22704 // Transfer chain users from old loads to the new load.
22705 for (unsigned i = 0; i < NumElem; ++i) {
22706 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
22708 SDValue(NewLoad.getNode(), 1));
22709 }
22710
22711 // Replace all stores with the new store. Recursively remove corresponding
22712 // values if they are no longer used.
22713 for (unsigned i = 0; i < NumElem; ++i) {
22714 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
22715 CombineTo(StoreNodes[i].MemNode, NewStore);
22716 if (Val->use_empty())
22717 recursivelyDeleteUnusedNodes(Val.getNode());
22718 }
22719
22720 MadeChange = true;
22721 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
22722 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
22723 NumConsecutiveStores -= NumElem;
22724 }
22725 return MadeChange;
22726}
22727
22728bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
22729 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
22730 return false;
22731
22732 // TODO: Extend this function to merge stores of scalable vectors.
22733 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
22734 // store since we know <vscale x 16 x i8> is exactly twice as large as
22735 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
22736 EVT MemVT = St->getMemoryVT();
22737 if (MemVT.isScalableVT())
22738 return false;
22739 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
22740 return false;
22741
22742 // This function cannot currently deal with non-byte-sized memory sizes.
22743 int64_t ElementSizeBytes = MemVT.getStoreSize();
22744 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
22745 return false;
22746
22747 // Do not bother looking at stored values that are not constants, loads, or
22748 // extracted vector elements.
22749 SDValue StoredVal = peekThroughBitcasts(St->getValue());
22750 const StoreSource StoreSrc = getStoreSource(StoredVal);
22751 if (StoreSrc == StoreSource::Unknown)
22752 return false;
22753
22754 SmallVector<MemOpLink, 8> StoreNodes;
22755 // Find potential store merge candidates by searching through chain sub-DAG
22756 SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
22757
22758 // Check if there is anything to merge.
22759 if (StoreNodes.size() < 2)
22760 return false;
22761
22762 // Sort the memory operands according to their distance from the
22763 // base pointer.
22764 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
22765 return LHS.OffsetFromBase < RHS.OffsetFromBase;
22766 });
22767
22768 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
22769 Attribute::NoImplicitFloat);
22770 bool IsNonTemporalStore = St->isNonTemporal();
22771 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
22772 cast<LoadSDNode>(StoredVal)->isNonTemporal();
22773
22774 // Store Merge attempts to merge the lowest stores. This generally
22775 // works out as if successful, as the remaining stores are checked
22776 // after the first collection of stores is merged. However, in the
22777 // case that a non-mergeable store is found first, e.g., {p[-2],
22778 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
22779 // mergeable cases. To prevent this, we prune such stores from the
22780 // front of StoreNodes here.
22781 bool MadeChange = false;
22782 while (StoreNodes.size() > 1) {
22783 unsigned NumConsecutiveStores =
22784 getConsecutiveStores(StoreNodes, ElementSizeBytes);
22785 // There are no more stores in the list to examine.
22786 if (NumConsecutiveStores == 0)
22787 return MadeChange;
22788
22789 // We have at least 2 consecutive stores. Try to merge them.
22790 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
22791 switch (StoreSrc) {
22792 case StoreSource::Constant:
22793 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
22794 MemVT, RootNode, AllowVectors);
22795 break;
22796
22797 case StoreSource::Extract:
22798 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
22799 MemVT, RootNode);
22800 break;
22801
22802 case StoreSource::Load:
22803 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
22804 MemVT, RootNode, AllowVectors,
22805 IsNonTemporalStore, IsNonTemporalLoad);
22806 break;
22807
22808 default:
22809 llvm_unreachable("Unhandled store source type");
22810 }
22811 }
22812
22813 // Remember if we failed to optimize, to save compile time.
22814 if (!MadeChange)
22815 ChainsWithoutMergeableStores.insert(RootNode);
22816
22817 return MadeChange;
22818}
22819
22820SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
22821 SDLoc SL(ST);
22822 SDValue ReplStore;
22823
22824 // Replace the chain to avoid dependency.
22825 if (ST->isTruncatingStore()) {
22826 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
22827 ST->getBasePtr(), ST->getMemoryVT(),
22828 ST->getMemOperand());
22829 } else {
22830 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
22831 ST->getMemOperand());
22832 }
22833
22834 // Create token to keep both nodes around.
22835 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
22836 MVT::Other, ST->getChain(), ReplStore);
22837
22838 // Make sure the new and old chains are cleaned up.
22839 AddToWorklist(Token.getNode());
22840
22841 // Don't add users to work list.
22842 return CombineTo(ST, Token, false);
22843}
22844
22845SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
22846 SDValue Value = ST->getValue();
22847 if (Value.getOpcode() == ISD::TargetConstantFP)
22848 return SDValue();
22849
22850 if (!ISD::isNormalStore(ST))
22851 return SDValue();
22852
22853 SDLoc DL(ST);
22854
22855 SDValue Chain = ST->getChain();
22856 SDValue Ptr = ST->getBasePtr();
22857
22858 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
22859
22860 // NOTE: If the original store is volatile, this transform must not increase
22861 // the number of stores. For example, on x86-32 an f64 can be stored in one
22862 // processor operation but an i64 (which is not legal) requires two. So the
22863 // transform should not be done in this case.
22864
22865 SDValue Tmp;
22866 switch (CFP->getSimpleValueType(0).SimpleTy) {
22867 default:
22868 llvm_unreachable("Unknown FP type");
22869 case MVT::f16: // We don't do this for these yet.
22870 case MVT::bf16:
22871 case MVT::f80:
22872 case MVT::f128:
22873 case MVT::ppcf128:
22874 return SDValue();
22875 case MVT::f32:
22876 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
22877 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
22878 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
22879 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
22880 MVT::i32);
22881 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
22882 }
22883
22884 return SDValue();
22885 case MVT::f64:
22886 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
22887 ST->isSimple()) ||
22888 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
22889 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
22890 getZExtValue(), SDLoc(CFP), MVT::i64);
22891 return DAG.getStore(Chain, DL, Tmp,
22892 Ptr, ST->getMemOperand());
22893 }
22894
22895 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
22896 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
22897 // Many FP stores are not made apparent until after legalize, e.g. for
22898 // argument passing. Since this is so common, custom legalize the
22899 // 64-bit integer store into two 32-bit stores.
22900 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
22901 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
22902 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
22903 if (DAG.getDataLayout().isBigEndian())
22904 std::swap(Lo, Hi);
22905
22906 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
22907 AAMDNodes AAInfo = ST->getAAInfo();
22908
22909 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
22910 ST->getBaseAlign(), MMOFlags, AAInfo);
22911 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(4), DL);
22912 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
22913 ST->getPointerInfo().getWithOffset(4),
22914 ST->getBaseAlign(), MMOFlags, AAInfo);
22915 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
22916 St0, St1);
22917 }
22918
22919 return SDValue();
22920 }
22921}
22922
22923// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
22924//
22925// If a store of a load with an element inserted into it has no other
22926// uses in between the chain, then we can consider the vector store
22927// dead and replace it with just the single scalar element store.
22928SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
22929 SDLoc DL(ST);
22930 SDValue Value = ST->getValue();
22931 SDValue Ptr = ST->getBasePtr();
22932 SDValue Chain = ST->getChain();
22933 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
22934 return SDValue();
22935
22936 SDValue Elt = Value.getOperand(1);
22937 SDValue Idx = Value.getOperand(2);
22938
22939 // If the element isn't byte sized or is implicitly truncated then we can't
22940 // compute an offset.
22941 EVT EltVT = Elt.getValueType();
22942 if (!EltVT.isByteSized() ||
22943 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
22944 return SDValue();
22945
22946 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
22947 if (!Ld || Ld->getBasePtr() != Ptr ||
22948 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
22949 !ISD::isNormalStore(ST) ||
22950 Ld->getAddressSpace() != ST->getAddressSpace() ||
22952 return SDValue();
22953
22954 unsigned IsFast;
22955 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22956 Elt.getValueType(), ST->getAddressSpace(),
22957 ST->getAlign(), ST->getMemOperand()->getFlags(),
22958 &IsFast) ||
22959 !IsFast)
22960 return SDValue();
22961
22962 MachinePointerInfo PointerInfo(ST->getAddressSpace());
22963
22964 // If the offset is a known constant then try to recover the pointer
22965 // info
22966 SDValue NewPtr;
22967 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
22968 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
22969 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
22970 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
22971 } else {
22972 // The original DAG loaded the entire vector from memory, so arithmetic
22973 // within it must be inbounds.
22974 NewPtr = TLI.getInboundsVectorElementPointer(DAG, Ptr, Value.getValueType(),
22975 Idx);
22976 }
22977
22978 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
22979 ST->getMemOperand()->getFlags());
22980}
22981
22982SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
22983 AtomicSDNode *ST = cast<AtomicSDNode>(N);
22984 SDValue Val = ST->getVal();
22985 EVT VT = Val.getValueType();
22986 EVT MemVT = ST->getMemoryVT();
22987
22988 if (MemVT.bitsLT(VT)) { // Is truncating store
22989 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
22990 MemVT.getScalarSizeInBits());
22991 // See if we can simplify the operation with SimplifyDemandedBits, which
22992 // only works if the value has a single use.
22993 if (SimplifyDemandedBits(Val, TruncDemandedBits))
22994 return SDValue(N, 0);
22995 }
22996
22997 return SDValue();
22998}
22999
23001 const SDLoc &Dl) {
23002 if (!Store->isSimple() || !ISD::isNormalStore(Store))
23003 return SDValue();
23004
23005 SDValue StoredVal = Store->getValue();
23006 SDValue StorePtr = Store->getBasePtr();
23007 SDValue StoreOffset = Store->getOffset();
23008 EVT VT = Store->getMemoryVT();
23009
23010 // Skip this combine for non-vector types and for <1 x ty> vectors, as they
23011 // will be scalarized later.
23012 if (!VT.isVector() || VT.isScalableVector() || VT.getVectorNumElements() == 1)
23013 return SDValue();
23014
23015 unsigned AddrSpace = Store->getAddressSpace();
23016 Align Alignment = Store->getAlign();
23017 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23018
23019 if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) ||
23020 !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment))
23021 return SDValue();
23022
23023 SDValue Mask, OtherVec, LoadCh;
23024 unsigned LoadPos;
23025 if (sd_match(StoredVal,
23026 m_VSelect(m_Value(Mask), m_Value(OtherVec),
23027 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
23028 m_Specific(StoreOffset))))) {
23029 LoadPos = 2;
23030 } else if (sd_match(StoredVal,
23031 m_VSelect(m_Value(Mask),
23032 m_Load(m_Value(LoadCh), m_Specific(StorePtr),
23033 m_Specific(StoreOffset)),
23034 m_Value(OtherVec)))) {
23035 LoadPos = 1;
23036 } else {
23037 return SDValue();
23038 }
23039
23040 auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos));
23041 if (!Load->isSimple() || !ISD::isNormalLoad(Load) ||
23042 Load->getAddressSpace() != AddrSpace)
23043 return SDValue();
23044
23045 if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh))
23046 return SDValue();
23047
23048 if (LoadPos == 1)
23049 Mask = DAG.getNOT(Dl, Mask, Mask.getValueType());
23050
23051 return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr,
23052 StoreOffset, Mask, VT, Store->getMemOperand(),
23053 Store->getAddressingMode());
23054}
23055
23056SDValue DAGCombiner::visitSTORE(SDNode *N) {
23057 StoreSDNode *ST = cast<StoreSDNode>(N);
23058 SDValue Chain = ST->getChain();
23059 SDValue Value = ST->getValue();
23060 SDValue Ptr = ST->getBasePtr();
23061
23062 // If this is a store of a bit convert, store the input value if the
23063 // resultant store does not need a higher alignment than the original.
23064 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
23065 ST->isUnindexed()) {
23066 EVT SVT = Value.getOperand(0).getValueType();
23067 // If the store is volatile, we only want to change the store type if the
23068 // resulting store is legal. Otherwise we might increase the number of
23069 // memory accesses. We don't care if the original type was legal or not
23070 // as we assume software couldn't rely on the number of accesses of an
23071 // illegal type.
23072 // TODO: May be able to relax for unordered atomics (see D66309)
23073 if (((!LegalOperations && ST->isSimple()) ||
23074 TLI.isOperationLegal(ISD::STORE, SVT)) &&
23075 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
23076 DAG, *ST->getMemOperand())) {
23077 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
23078 ST->getMemOperand());
23079 }
23080 }
23081
23082 // Turn 'store undef, Ptr' -> nothing.
23083 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
23084 return Chain;
23085
23086 // Try to infer better alignment information than the store already has.
23087 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
23088 !ST->isAtomic()) {
23089 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
23090 if (*Alignment > ST->getAlign() &&
23091 isAligned(*Alignment, ST->getSrcValueOffset())) {
23092 SDValue NewStore =
23093 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
23094 ST->getMemoryVT(), *Alignment,
23095 ST->getMemOperand()->getFlags(), ST->getAAInfo());
23096 // NewStore will always be N as we are only refining the alignment
23097 assert(NewStore.getNode() == N);
23098 (void)NewStore;
23099 }
23100 }
23101 }
23102
23103 // Try transforming a pair floating point load / store ops to integer
23104 // load / store ops.
23105 if (SDValue NewST = TransformFPLoadStorePair(N))
23106 return NewST;
23107
23108 // Try transforming several stores into STORE (BSWAP).
23109 if (SDValue Store = mergeTruncStores(ST))
23110 return Store;
23111
23112 if (ST->isUnindexed()) {
23113 // Walk up chain skipping non-aliasing memory nodes, on this store and any
23114 // adjacent stores.
23115 if (findBetterNeighborChains(ST)) {
23116 // replaceStoreChain uses CombineTo, which handled all of the worklist
23117 // manipulation. Return the original node to not do anything else.
23118 return SDValue(ST, 0);
23119 }
23120 Chain = ST->getChain();
23121 }
23122
23123 // FIXME: is there such a thing as a truncating indexed store?
23124 if (ST->isTruncatingStore() && ST->isUnindexed() &&
23125 Value.getValueType().isInteger() &&
23127 !cast<ConstantSDNode>(Value)->isOpaque())) {
23128 // Convert a truncating store of a extension into a standard store.
23129 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
23130 Value.getOpcode() == ISD::SIGN_EXTEND ||
23131 Value.getOpcode() == ISD::ANY_EXTEND) &&
23132 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
23133 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
23134 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
23135 ST->getMemOperand());
23136
23137 APInt TruncDemandedBits =
23138 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
23139 ST->getMemoryVT().getScalarSizeInBits());
23140
23141 // See if we can simplify the operation with SimplifyDemandedBits, which
23142 // only works if the value has a single use.
23143 AddToWorklist(Value.getNode());
23144 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
23145 // Re-visit the store if anything changed and the store hasn't been merged
23146 // with another node (N is deleted) SimplifyDemandedBits will add Value's
23147 // node back to the worklist if necessary, but we also need to re-visit
23148 // the Store node itself.
23149 if (N->getOpcode() != ISD::DELETED_NODE)
23150 AddToWorklist(N);
23151 return SDValue(N, 0);
23152 }
23153
23154 // Otherwise, see if we can simplify the input to this truncstore with
23155 // knowledge that only the low bits are being used. For example:
23156 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
23157 if (SDValue Shorter =
23158 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
23159 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
23160 ST->getMemOperand());
23161
23162 // If we're storing a truncated constant, see if we can simplify it.
23163 // TODO: Move this to targetShrinkDemandedConstant?
23164 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
23165 if (!Cst->isOpaque()) {
23166 const APInt &CValue = Cst->getAPIntValue();
23167 APInt NewVal = CValue & TruncDemandedBits;
23168 if (NewVal != CValue) {
23169 SDValue Shorter =
23170 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
23171 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
23172 ST->getMemoryVT(), ST->getMemOperand());
23173 }
23174 }
23175 }
23176
23177 // If this is a load followed by a store to the same location, then the store
23178 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
23179 // TODO: Add big-endian truncate support with test coverage.
23180 // TODO: Can relax for unordered atomics (see D66309)
23181 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
23183 : Value;
23184 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
23185 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
23186 ST->isUnindexed() && ST->isSimple() &&
23187 Ld->getAddressSpace() == ST->getAddressSpace() &&
23188 // There can't be any side effects between the load and store, such as
23189 // a call or store.
23191 // The store is dead, remove it.
23192 return Chain;
23193 }
23194 }
23195
23196 // Try scalarizing vector stores of loads where we only change one element
23197 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
23198 return NewST;
23199
23200 // TODO: Can relax for unordered atomics (see D66309)
23201 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
23202 if (ST->isUnindexed() && ST->isSimple() &&
23203 ST1->isUnindexed() && ST1->isSimple()) {
23204 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
23205 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
23206 ST->getAddressSpace() == ST1->getAddressSpace()) {
23207 // If this is a store followed by a store with the same value to the
23208 // same location, then the store is dead/noop.
23209 return Chain;
23210 }
23211
23212 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
23213 !ST1->getBasePtr().isUndef() &&
23214 ST->getAddressSpace() == ST1->getAddressSpace()) {
23215 // If we consider two stores and one smaller in size is a scalable
23216 // vector type and another one a bigger size store with a fixed type,
23217 // then we could not allow the scalable store removal because we don't
23218 // know its final size in the end.
23219 if (ST->getMemoryVT().isScalableVector() ||
23220 ST1->getMemoryVT().isScalableVector()) {
23221 if (ST1->getBasePtr() == Ptr &&
23222 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
23223 ST->getMemoryVT().getStoreSize())) {
23224 CombineTo(ST1, ST1->getChain());
23225 return SDValue(N, 0);
23226 }
23227 } else {
23228 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
23229 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
23230 // If this is a store who's preceding store to a subset of the current
23231 // location and no one other node is chained to that store we can
23232 // effectively drop the store. Do not remove stores to undef as they
23233 // may be used as data sinks.
23234 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
23235 ChainBase,
23236 ST1->getMemoryVT().getFixedSizeInBits())) {
23237 CombineTo(ST1, ST1->getChain());
23238 return SDValue(N, 0);
23239 }
23240 }
23241 }
23242 }
23243 }
23244
23245 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
23246 // truncating store. We can do this even if this is already a truncstore.
23247 if ((Value.getOpcode() == ISD::FP_ROUND ||
23248 Value.getOpcode() == ISD::TRUNCATE) &&
23249 Value->hasOneUse() && ST->isUnindexed() &&
23250 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
23251 ST->getMemoryVT(), LegalOperations)) {
23252 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
23253 Ptr, ST->getMemoryVT(), ST->getMemOperand());
23254 }
23255
23256 // Always perform this optimization before types are legal. If the target
23257 // prefers, also try this after legalization to catch stores that were created
23258 // by intrinsics or other nodes.
23259 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
23260 while (true) {
23261 // There can be multiple store sequences on the same chain.
23262 // Keep trying to merge store sequences until we are unable to do so
23263 // or until we merge the last store on the chain.
23264 bool Changed = mergeConsecutiveStores(ST);
23265 if (!Changed) break;
23266 // Return N as merge only uses CombineTo and no worklist clean
23267 // up is necessary.
23268 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
23269 return SDValue(N, 0);
23270 }
23271 }
23272
23273 // Try transforming N to an indexed store.
23274 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
23275 return SDValue(N, 0);
23276
23277 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
23278 //
23279 // Make sure to do this only after attempting to merge stores in order to
23280 // avoid changing the types of some subset of stores due to visit order,
23281 // preventing their merging.
23282 if (isa<ConstantFPSDNode>(ST->getValue())) {
23283 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
23284 return NewSt;
23285 }
23286
23287 if (SDValue NewSt = splitMergedValStore(ST))
23288 return NewSt;
23289
23290 if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N)))
23291 return MaskedStore;
23292
23293 return ReduceLoadOpStoreWidth(N);
23294}
23295
23296SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
23297 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
23298 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(), 0, false);
23299
23300 // We walk up the chains to find stores.
23301 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
23302 while (!Chains.empty()) {
23303 SDValue Chain = Chains.pop_back_val();
23304 if (!Chain.hasOneUse())
23305 continue;
23306 switch (Chain.getOpcode()) {
23307 case ISD::TokenFactor:
23308 for (unsigned Nops = Chain.getNumOperands(); Nops;)
23309 Chains.push_back(Chain.getOperand(--Nops));
23310 break;
23312 case ISD::LIFETIME_END:
23313 // We can forward past any lifetime start/end that can be proven not to
23314 // alias the node.
23315 if (!mayAlias(Chain.getNode(), N))
23316 Chains.push_back(Chain.getOperand(0));
23317 break;
23318 case ISD::STORE: {
23319 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
23320 // TODO: Can relax for unordered atomics (see D66309)
23321 if (!ST->isSimple() || ST->isIndexed())
23322 continue;
23323 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
23324 // The bounds of a scalable store are not known until runtime, so this
23325 // store cannot be elided.
23326 if (StoreSize.isScalable())
23327 continue;
23328 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
23329 // If we store purely within object bounds just before its lifetime ends,
23330 // we can remove the store.
23331 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
23332 if (LifetimeEndBase.contains(
23333 DAG, MFI.getObjectSize(LifetimeEnd->getFrameIndex()) * 8,
23334 StoreBase, StoreSize.getFixedValue() * 8)) {
23335 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
23336 dbgs() << "\nwithin LIFETIME_END of : ";
23337 LifetimeEndBase.dump(); dbgs() << "\n");
23338 CombineTo(ST, ST->getChain());
23339 return SDValue(N, 0);
23340 }
23341 }
23342 }
23343 }
23344 return SDValue();
23345}
23346
23347/// For the instruction sequence of store below, F and I values
23348/// are bundled together as an i64 value before being stored into memory.
23349/// Sometimes it is more efficent to generate separate stores for F and I,
23350/// which can remove the bitwise instructions or sink them to colder places.
23351///
23352/// (store (or (zext (bitcast F to i32) to i64),
23353/// (shl (zext I to i64), 32)), addr) -->
23354/// (store F, addr) and (store I, addr+4)
23355///
23356/// Similarly, splitting for other merged store can also be beneficial, like:
23357/// For pair of {i32, i32}, i64 store --> two i32 stores.
23358/// For pair of {i32, i16}, i64 store --> two i32 stores.
23359/// For pair of {i16, i16}, i32 store --> two i16 stores.
23360/// For pair of {i16, i8}, i32 store --> two i16 stores.
23361/// For pair of {i8, i8}, i16 store --> two i8 stores.
23362///
23363/// We allow each target to determine specifically which kind of splitting is
23364/// supported.
23365///
23366/// The store patterns are commonly seen from the simple code snippet below
23367/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
23368/// void goo(const std::pair<int, float> &);
23369/// hoo() {
23370/// ...
23371/// goo(std::make_pair(tmp, ftmp));
23372/// ...
23373/// }
23374///
23375SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
23376 if (OptLevel == CodeGenOptLevel::None)
23377 return SDValue();
23378
23379 // Can't change the number of memory accesses for a volatile store or break
23380 // atomicity for an atomic one.
23381 if (!ST->isSimple())
23382 return SDValue();
23383
23384 SDValue Val = ST->getValue();
23385 SDLoc DL(ST);
23386
23387 // Match OR operand.
23388 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
23389 return SDValue();
23390
23391 // Match SHL operand and get Lower and Higher parts of Val.
23392 SDValue Op1 = Val.getOperand(0);
23393 SDValue Op2 = Val.getOperand(1);
23394 SDValue Lo, Hi;
23395 if (Op1.getOpcode() != ISD::SHL) {
23396 std::swap(Op1, Op2);
23397 if (Op1.getOpcode() != ISD::SHL)
23398 return SDValue();
23399 }
23400 Lo = Op2;
23401 Hi = Op1.getOperand(0);
23402 if (!Op1.hasOneUse())
23403 return SDValue();
23404
23405 // Match shift amount to HalfValBitSize.
23406 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
23407 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
23408 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
23409 return SDValue();
23410
23411 // Lo and Hi are zero-extended from int with size less equal than 32
23412 // to i64.
23413 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
23414 !Lo.getOperand(0).getValueType().isScalarInteger() ||
23415 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
23416 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
23417 !Hi.getOperand(0).getValueType().isScalarInteger() ||
23418 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
23419 return SDValue();
23420
23421 // Use the EVT of low and high parts before bitcast as the input
23422 // of target query.
23423 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
23424 ? Lo.getOperand(0).getValueType()
23425 : Lo.getValueType();
23426 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
23427 ? Hi.getOperand(0).getValueType()
23428 : Hi.getValueType();
23429 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
23430 return SDValue();
23431
23432 // Start to split store.
23433 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
23434 AAMDNodes AAInfo = ST->getAAInfo();
23435
23436 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
23437 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
23438 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
23439 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
23440
23441 SDValue Chain = ST->getChain();
23442 SDValue Ptr = ST->getBasePtr();
23443 // Lower value store.
23444 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
23445 ST->getBaseAlign(), MMOFlags, AAInfo);
23446 Ptr =
23447 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
23448 // Higher value store.
23449 SDValue St1 = DAG.getStore(
23450 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
23451 ST->getBaseAlign(), MMOFlags, AAInfo);
23452 return St1;
23453}
23454
23455// Merge an insertion into an existing shuffle:
23456// (insert_vector_elt (vector_shuffle X, Y, Mask),
23457// .(extract_vector_elt X, N), InsIndex)
23458// --> (vector_shuffle X, Y, NewMask)
23459// and variations where shuffle operands may be CONCAT_VECTORS.
23461 SmallVectorImpl<int> &NewMask, SDValue Elt,
23462 unsigned InsIndex) {
23463 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23465 return false;
23466
23467 // Vec's operand 0 is using indices from 0 to N-1 and
23468 // operand 1 from N to 2N - 1, where N is the number of
23469 // elements in the vectors.
23470 SDValue InsertVal0 = Elt.getOperand(0);
23471 int ElementOffset = -1;
23472
23473 // We explore the inputs of the shuffle in order to see if we find the
23474 // source of the extract_vector_elt. If so, we can use it to modify the
23475 // shuffle rather than perform an insert_vector_elt.
23477 ArgWorkList.emplace_back(Mask.size(), Y);
23478 ArgWorkList.emplace_back(0, X);
23479
23480 while (!ArgWorkList.empty()) {
23481 int ArgOffset;
23482 SDValue ArgVal;
23483 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
23484
23485 if (ArgVal == InsertVal0) {
23486 ElementOffset = ArgOffset;
23487 break;
23488 }
23489
23490 // Peek through concat_vector.
23491 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
23492 int CurrentArgOffset =
23493 ArgOffset + ArgVal.getValueType().getVectorNumElements();
23494 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
23495 for (SDValue Op : reverse(ArgVal->ops())) {
23496 CurrentArgOffset -= Step;
23497 ArgWorkList.emplace_back(CurrentArgOffset, Op);
23498 }
23499
23500 // Make sure we went through all the elements and did not screw up index
23501 // computation.
23502 assert(CurrentArgOffset == ArgOffset);
23503 }
23504 }
23505
23506 // If we failed to find a match, see if we can replace an UNDEF shuffle
23507 // operand.
23508 if (ElementOffset == -1) {
23509 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
23510 return false;
23511 ElementOffset = Mask.size();
23512 Y = InsertVal0;
23513 }
23514
23515 NewMask.assign(Mask.begin(), Mask.end());
23516 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
23517 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
23518 "NewMask[InsIndex] is out of bound");
23519 return true;
23520}
23521
23522// Merge an insertion into an existing shuffle:
23523// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
23524// InsIndex)
23525// --> (vector_shuffle X, Y) and variations where shuffle operands may be
23526// CONCAT_VECTORS.
23527SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
23528 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23529 "Expected extract_vector_elt");
23530 SDValue InsertVal = N->getOperand(1);
23531 SDValue Vec = N->getOperand(0);
23532
23533 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
23534 if (!SVN || !Vec.hasOneUse())
23535 return SDValue();
23536
23537 ArrayRef<int> Mask = SVN->getMask();
23538 SDValue X = Vec.getOperand(0);
23539 SDValue Y = Vec.getOperand(1);
23540
23541 SmallVector<int, 16> NewMask(Mask);
23542 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
23543 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
23544 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
23545 if (LegalShuffle)
23546 return LegalShuffle;
23547 }
23548
23549 return SDValue();
23550}
23551
23552// Convert a disguised subvector insertion into a shuffle:
23553// insert_vector_elt V, (bitcast X from vector type), IdxC -->
23554// bitcast(shuffle (bitcast V), (extended X), Mask)
23555// Note: We do not use an insert_subvector node because that requires a
23556// legal subvector type.
23557SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
23558 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
23559 "Expected extract_vector_elt");
23560 SDValue InsertVal = N->getOperand(1);
23561
23562 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
23563 !InsertVal.getOperand(0).getValueType().isVector())
23564 return SDValue();
23565
23566 SDValue SubVec = InsertVal.getOperand(0);
23567 SDValue DestVec = N->getOperand(0);
23568 EVT SubVecVT = SubVec.getValueType();
23569 EVT VT = DestVec.getValueType();
23570 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
23571 // Bail out if the inserted value is larger than the vector element, as
23572 // insert_vector_elt performs an implicit truncation in this case.
23573 if (InsertVal.getValueType() != VT.getVectorElementType())
23574 return SDValue();
23575 // If the source only has a single vector element, the cost of creating adding
23576 // it to a vector is likely to exceed the cost of a insert_vector_elt.
23577 if (NumSrcElts == 1)
23578 return SDValue();
23579 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
23580 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
23581
23582 // Step 1: Create a shuffle mask that implements this insert operation. The
23583 // vector that we are inserting into will be operand 0 of the shuffle, so
23584 // those elements are just 'i'. The inserted subvector is in the first
23585 // positions of operand 1 of the shuffle. Example:
23586 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
23587 SmallVector<int, 16> Mask(NumMaskVals);
23588 for (unsigned i = 0; i != NumMaskVals; ++i) {
23589 if (i / NumSrcElts == InsIndex)
23590 Mask[i] = (i % NumSrcElts) + NumMaskVals;
23591 else
23592 Mask[i] = i;
23593 }
23594
23595 // Bail out if the target can not handle the shuffle we want to create.
23596 EVT SubVecEltVT = SubVecVT.getVectorElementType();
23597 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
23598 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
23599 return SDValue();
23600
23601 // Step 2: Create a wide vector from the inserted source vector by appending
23602 // undefined elements. This is the same size as our destination vector.
23603 SDLoc DL(N);
23604 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
23605 ConcatOps[0] = SubVec;
23606 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
23607
23608 // Step 3: Shuffle in the padded subvector.
23609 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
23610 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
23611 AddToWorklist(PaddedSubV.getNode());
23612 AddToWorklist(DestVecBC.getNode());
23613 AddToWorklist(Shuf.getNode());
23614 return DAG.getBitcast(VT, Shuf);
23615}
23616
23617// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
23618// possible and the new load will be quick. We use more loads but less shuffles
23619// and inserts.
23620SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
23621 EVT VT = N->getValueType(0);
23622
23623 // InsIndex is expected to be the first of last lane.
23624 if (!VT.isFixedLengthVector() ||
23625 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
23626 return SDValue();
23627
23628 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
23629 // depending on the InsIndex.
23630 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
23631 SDValue Scalar = N->getOperand(1);
23632 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
23633 return InsIndex == P.index() || P.value() < 0 ||
23634 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
23635 (InsIndex == VT.getVectorNumElements() - 1 &&
23636 P.value() == (int)P.index() + 1);
23637 }))
23638 return SDValue();
23639
23640 // We optionally skip over an extend so long as both loads are extended in the
23641 // same way from the same type.
23642 unsigned Extend = 0;
23643 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
23644 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
23645 Scalar.getOpcode() == ISD::ANY_EXTEND) {
23646 Extend = Scalar.getOpcode();
23647 Scalar = Scalar.getOperand(0);
23648 }
23649
23650 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
23651 if (!ScalarLoad)
23652 return SDValue();
23653
23654 SDValue Vec = Shuffle->getOperand(0);
23655 if (Extend) {
23656 if (Vec.getOpcode() != Extend)
23657 return SDValue();
23658 Vec = Vec.getOperand(0);
23659 }
23660 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
23661 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
23662 return SDValue();
23663
23664 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
23665 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
23666 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23667 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
23668 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
23669 return SDValue();
23670
23671 // Check that the offset between the pointers to produce a single continuous
23672 // load.
23673 if (InsIndex == 0) {
23674 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
23675 -1))
23676 return SDValue();
23677 } else {
23679 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
23680 return SDValue();
23681 }
23682
23683 // And that the new unaligned load will be fast.
23684 unsigned IsFast = 0;
23685 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
23686 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
23687 Vec.getValueType(), VecLoad->getAddressSpace(),
23688 NewAlign, VecLoad->getMemOperand()->getFlags(),
23689 &IsFast) ||
23690 !IsFast)
23691 return SDValue();
23692
23693 // Calculate the new Ptr and create the new load.
23694 SDLoc DL(N);
23695 SDValue Ptr = ScalarLoad->getBasePtr();
23696 if (InsIndex != 0)
23697 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
23698 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
23699 MachinePointerInfo PtrInfo =
23700 InsIndex == 0 ? ScalarLoad->getPointerInfo()
23701 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
23702
23703 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
23704 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
23705 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
23706 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
23707 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
23708}
23709
23710SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
23711 SDValue InVec = N->getOperand(0);
23712 SDValue InVal = N->getOperand(1);
23713 SDValue EltNo = N->getOperand(2);
23714 SDLoc DL(N);
23715
23716 EVT VT = InVec.getValueType();
23717 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
23718
23719 // Insert into out-of-bounds element is undefined.
23720 if (IndexC && VT.isFixedLengthVector() &&
23721 IndexC->getZExtValue() >= VT.getVectorNumElements())
23722 return DAG.getUNDEF(VT);
23723
23724 // Remove redundant insertions:
23725 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
23726 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23727 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
23728 return InVec;
23729
23730 // Remove insert of UNDEF/POISON elements.
23731 if (InVal.isUndef()) {
23732 if (InVal.getOpcode() == ISD::POISON || InVec.getOpcode() == ISD::UNDEF)
23733 return InVec;
23734 return DAG.getFreeze(InVec);
23735 }
23736
23737 if (!IndexC) {
23738 // If this is variable insert to undef vector, it might be better to splat:
23739 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
23740 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
23741 return DAG.getSplat(VT, DL, InVal);
23742
23743 // Extend this type to be byte-addressable
23744 EVT OldVT = VT;
23745 EVT EltVT = VT.getVectorElementType();
23746 bool IsByteSized = EltVT.isByteSized();
23747 if (!IsByteSized) {
23748 EltVT =
23750 VT = VT.changeElementType(EltVT);
23751 }
23752
23753 // Check if this operation will be handled the default way for its type.
23754 auto IsTypeDefaultHandled = [this](EVT VT) {
23755 return TLI.getTypeAction(*DAG.getContext(), VT) ==
23758 };
23759
23760 // Check if this operation is illegal and will be handled the default way,
23761 // even after extending the type to be byte-addressable.
23762 if (IsTypeDefaultHandled(OldVT) && IsTypeDefaultHandled(VT)) {
23763 // For each dynamic insertelt, the default way will save the vector to
23764 // the stack, store at an offset, and load the modified vector. This can
23765 // dramatically increase code size if we have a chain of insertelts on a
23766 // large vector: requiring O(V*C) stores/loads where V = length of
23767 // vector and C is length of chain. If each insertelt is only fed into the
23768 // next, the vector is write-only across this chain, and we can just
23769 // save once before the chain and load after in O(V + C) operations.
23771 unsigned NumDynamic = 1;
23772 while (true) {
23773 SDValue InVec = Seq.back()->getOperand(0);
23774 if (InVec.getOpcode() != ISD::INSERT_VECTOR_ELT)
23775 break;
23776 Seq.push_back(InVec.getNode());
23777 NumDynamic += !isa<ConstantSDNode>(InVec.getOperand(2));
23778 }
23779
23780 // It always and only makes sense to lower this sequence when we have more
23781 // than one dynamic insertelt, since we will not have more than V constant
23782 // insertelts, so we will be reducing the total number of stores+loads.
23783 if (NumDynamic > 1) {
23784 // In cases where the vector is illegal it will be broken down into
23785 // parts and stored in parts - we should use the alignment for the
23786 // smallest part.
23787 Align SmallestAlign = DAG.getReducedAlign(VT, /*UseABI=*/false);
23789 DAG.CreateStackTemporary(VT.getStoreSize(), SmallestAlign);
23790 auto &MF = DAG.getMachineFunction();
23791 int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
23792 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
23793
23794 // Save the vector to the stack
23795 SDValue InVec = Seq.back()->getOperand(0);
23796 if (!IsByteSized)
23797 InVec = DAG.getNode(ISD::ANY_EXTEND, DL, VT, InVec);
23798 SDValue Store = DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr,
23799 PtrInfo, SmallestAlign);
23800
23801 // Lower each dynamic insertelt to a store
23802 for (SDNode *N : reverse(Seq)) {
23803 SDValue Elmnt = N->getOperand(1);
23804 SDValue Index = N->getOperand(2);
23805
23806 // Check if we have to extend the element type
23807 if (!IsByteSized && Elmnt.getValueType().bitsLT(EltVT))
23808 Elmnt = DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Elmnt);
23809
23810 // Store the new element. This may be larger than the vector element
23811 // type, so use a truncating store.
23812 SDValue EltPtr =
23813 TLI.getVectorElementPointer(DAG, StackPtr, VT, Index);
23814 EVT EltVT = Elmnt.getValueType();
23815 Store = DAG.getTruncStore(
23816 Store, DL, Elmnt, EltPtr, MachinePointerInfo::getUnknownStack(MF),
23817 EltVT,
23818 commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
23819 }
23820
23821 // Load the saved vector from the stack
23822 SDValue Load =
23823 DAG.getLoad(VT, DL, Store, StackPtr, PtrInfo, SmallestAlign);
23824 SDValue LoadV = Load.getValue(0);
23825 return IsByteSized ? LoadV : DAG.getAnyExtOrTrunc(LoadV, DL, OldVT);
23826 }
23827 }
23828
23829 return SDValue();
23830 }
23831
23832 if (VT.isScalableVector())
23833 return SDValue();
23834
23835 unsigned NumElts = VT.getVectorNumElements();
23836
23837 // We must know which element is being inserted for folds below here.
23838 unsigned Elt = IndexC->getZExtValue();
23839
23840 // Handle <1 x ???> vector insertion special cases.
23841 if (NumElts == 1) {
23842 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
23843 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23844 InVal.getOperand(0).getValueType() == VT &&
23845 isNullConstant(InVal.getOperand(1)))
23846 return InVal.getOperand(0);
23847 }
23848
23849 // Canonicalize insert_vector_elt dag nodes.
23850 // Example:
23851 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
23852 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
23853 //
23854 // Do this only if the child insert_vector node has one use; also
23855 // do this only if indices are both constants and Idx1 < Idx0.
23856 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
23857 && isa<ConstantSDNode>(InVec.getOperand(2))) {
23858 unsigned OtherElt = InVec.getConstantOperandVal(2);
23859 if (Elt < OtherElt) {
23860 // Swap nodes.
23861 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
23862 InVec.getOperand(0), InVal, EltNo);
23863 AddToWorklist(NewOp.getNode());
23864 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
23865 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
23866 }
23867 }
23868
23869 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
23870 return Shuf;
23871
23872 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
23873 return Shuf;
23874
23875 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
23876 return Shuf;
23877
23878 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
23879 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
23880 // vXi1 vector - we don't need to recurse.
23881 if (NumElts == 1)
23882 return DAG.getBuildVector(VT, DL, {InVal});
23883
23884 // If we haven't already collected the element, insert into the op list.
23885 EVT MaxEltVT = InVal.getValueType();
23886 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
23887 unsigned Idx) {
23888 if (!Ops[Idx]) {
23889 Ops[Idx] = Elt;
23890 if (VT.isInteger()) {
23891 EVT EltVT = Elt.getValueType();
23892 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
23893 }
23894 }
23895 };
23896
23897 // Ensure all the operands are the same value type, fill any missing
23898 // operands with UNDEF and create the BUILD_VECTOR.
23899 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops,
23900 bool FreezeUndef = false) {
23901 assert(Ops.size() == NumElts && "Unexpected vector size");
23902 SDValue UndefOp = FreezeUndef ? DAG.getFreeze(DAG.getUNDEF(MaxEltVT))
23903 : DAG.getUNDEF(MaxEltVT);
23904 for (SDValue &Op : Ops) {
23905 if (Op)
23906 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
23907 else
23908 Op = UndefOp;
23909 }
23910 return DAG.getBuildVector(VT, DL, Ops);
23911 };
23912
23914 Ops[Elt] = InVal;
23915
23916 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
23917 for (SDValue CurVec = InVec; CurVec;) {
23918 // UNDEF - build new BUILD_VECTOR from already inserted operands.
23919 if (CurVec.isUndef())
23920 return CanonicalizeBuildVector(Ops);
23921
23922 // FREEZE(UNDEF) - build new BUILD_VECTOR from already inserted operands.
23923 if (ISD::isFreezeUndef(CurVec.getNode()) && CurVec.hasOneUse())
23924 return CanonicalizeBuildVector(Ops, /*FreezeUndef=*/true);
23925
23926 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
23927 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
23928 for (unsigned I = 0; I != NumElts; ++I)
23929 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
23930 return CanonicalizeBuildVector(Ops);
23931 }
23932
23933 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
23934 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
23935 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
23936 return CanonicalizeBuildVector(Ops);
23937 }
23938
23939 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
23940 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
23941 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
23942 if (CurIdx->getAPIntValue().ult(NumElts)) {
23943 unsigned Idx = CurIdx->getZExtValue();
23944 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
23945
23946 // Found entire BUILD_VECTOR.
23947 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
23948 return CanonicalizeBuildVector(Ops);
23949
23950 CurVec = CurVec->getOperand(0);
23951 continue;
23952 }
23953
23954 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
23955 // update the shuffle mask (and second operand if we started with unary
23956 // shuffle) and create a new legal shuffle.
23957 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
23958 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
23959 SDValue LHS = SVN->getOperand(0);
23960 SDValue RHS = SVN->getOperand(1);
23961 SmallVector<int, 16> Mask(SVN->getMask());
23962 bool Merged = true;
23963 for (auto I : enumerate(Ops)) {
23964 SDValue &Op = I.value();
23965 if (Op) {
23966 SmallVector<int, 16> NewMask;
23967 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
23968 Merged = false;
23969 break;
23970 }
23971 Mask = std::move(NewMask);
23972 }
23973 }
23974 if (Merged)
23975 if (SDValue NewShuffle =
23976 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
23977 return NewShuffle;
23978 }
23979
23980 if (!LegalOperations) {
23981 bool IsNull = llvm::isNullConstant(InVal);
23982 // We can convert to AND/OR mask if all insertions are zero or -1
23983 // respectively.
23984 if ((IsNull || llvm::isAllOnesConstant(InVal)) &&
23985 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
23986 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
23987 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
23988 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
23990
23991 // Build the mask and return the corresponding DAG node.
23992 auto BuildMaskAndNode = [&](SDValue TrueVal, SDValue FalseVal,
23993 unsigned MaskOpcode) {
23994 for (unsigned I = 0; I != NumElts; ++I)
23995 Mask[I] = Ops[I] ? TrueVal : FalseVal;
23996 return DAG.getNode(MaskOpcode, DL, VT, CurVec,
23997 DAG.getBuildVector(VT, DL, Mask));
23998 };
23999
24000 // If all elements are zero, we can use AND with all ones.
24001 if (IsNull)
24002 return BuildMaskAndNode(Zero, AllOnes, ISD::AND);
24003
24004 // If all elements are -1, we can use OR with zero.
24005 return BuildMaskAndNode(AllOnes, Zero, ISD::OR);
24006 }
24007 }
24008
24009 // Failed to find a match in the chain - bail.
24010 break;
24011 }
24012
24013 // See if we can fill in the missing constant elements as zeros.
24014 // TODO: Should we do this for any constant?
24015 APInt DemandedZeroElts = APInt::getZero(NumElts);
24016 for (unsigned I = 0; I != NumElts; ++I)
24017 if (!Ops[I])
24018 DemandedZeroElts.setBit(I);
24019
24020 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
24021 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
24022 : DAG.getConstantFP(0, DL, MaxEltVT);
24023 for (unsigned I = 0; I != NumElts; ++I)
24024 if (!Ops[I])
24025 Ops[I] = Zero;
24026
24027 return CanonicalizeBuildVector(Ops);
24028 }
24029 }
24030
24031 return SDValue();
24032}
24033
24034/// Transform a vector binary operation into a scalar binary operation by moving
24035/// the math/logic after an extract element of a vector.
24037 const SDLoc &DL, bool LegalTypes) {
24038 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24039 SDValue Vec = ExtElt->getOperand(0);
24040 SDValue Index = ExtElt->getOperand(1);
24041 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24042 unsigned Opc = Vec.getOpcode();
24043 if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
24044 Vec->getNumValues() != 1)
24045 return SDValue();
24046
24047 // Targets may want to avoid this to prevent an expensive register transfer.
24048 if (!TLI.shouldScalarizeBinop(Vec))
24049 return SDValue();
24050
24051 EVT ResVT = ExtElt->getValueType(0);
24052 if (Opc == ISD::SETCC &&
24053 (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
24054 return SDValue();
24055
24056 // Extracting an element of a vector constant is constant-folded, so this
24057 // transform is just replacing a vector op with a scalar op while moving the
24058 // extract.
24059 SDValue Op0 = Vec.getOperand(0);
24060 SDValue Op1 = Vec.getOperand(1);
24061 APInt SplatVal;
24062 if (!isAnyConstantBuildVector(Op0, true) &&
24063 !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
24064 !isAnyConstantBuildVector(Op1, true) &&
24065 !ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
24066 return SDValue();
24067
24068 // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
24069 // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
24070 if (Opc == ISD::SETCC) {
24071 EVT OpVT = Op0.getValueType().getVectorElementType();
24072 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
24073 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
24074 SDValue NewVal = DAG.getSetCC(
24075 DL, ResVT, Op0, Op1, cast<CondCodeSDNode>(Vec->getOperand(2))->get());
24076 // We may need to sign- or zero-extend the result to match the same
24077 // behaviour as the vector version of SETCC.
24078 unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType());
24079 if (ResVT != MVT::i1 &&
24080 VecBoolContents != TargetLowering::UndefinedBooleanContent &&
24081 VecBoolContents != TLI.getBooleanContents(ResVT)) {
24083 NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal,
24084 DAG.getValueType(MVT::i1));
24085 else
24086 NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1);
24087 }
24088 return NewVal;
24089 }
24090 Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
24091 Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
24092 return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
24093}
24094
24095// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
24096// recursively analyse all of it's users. and try to model themselves as
24097// bit sequence extractions. If all of them agree on the new, narrower element
24098// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
24099// new element type, do so now.
24100// This is mainly useful to recover from legalization that scalarized
24101// the vector as wide elements, but tries to rebuild it with narrower elements.
24102//
24103// Some more nodes could be modelled if that helps cover interesting patterns.
24104bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
24105 SDNode *N) {
24106 // We perform this optimization post type-legalization because
24107 // the type-legalizer often scalarizes integer-promoted vectors.
24108 // Performing this optimization before may cause legalizaton cycles.
24109 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
24110 return false;
24111
24112 // TODO: Add support for big-endian.
24113 if (DAG.getDataLayout().isBigEndian())
24114 return false;
24115
24116 SDValue VecOp = N->getOperand(0);
24117 EVT VecVT = VecOp.getValueType();
24118 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
24119
24120 // We must start with a constant extraction index.
24121 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
24122 if (!IndexC)
24123 return false;
24124
24125 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
24126 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
24127
24128 // TODO: deal with the case of implicit anyext of the extraction.
24129 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
24130 EVT ScalarVT = N->getValueType(0);
24131 if (VecVT.getScalarType() != ScalarVT)
24132 return false;
24133
24134 // TODO: deal with the cases other than everything being integer-typed.
24135 if (!ScalarVT.isScalarInteger())
24136 return false;
24137
24138 struct Entry {
24139 SDNode *Producer;
24140
24141 // Which bits of VecOp does it contain?
24142 unsigned BitPos;
24143 int NumBits;
24144 // NOTE: the actual width of \p Producer may be wider than NumBits!
24145
24146 Entry(Entry &&) = default;
24147 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
24148 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
24149
24150 Entry() = delete;
24151 Entry(const Entry &) = delete;
24152 Entry &operator=(const Entry &) = delete;
24153 Entry &operator=(Entry &&) = delete;
24154 };
24155 SmallVector<Entry, 32> Worklist;
24157
24158 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
24159 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
24160 /*NumBits=*/VecEltBitWidth);
24161
24162 while (!Worklist.empty()) {
24163 Entry E = Worklist.pop_back_val();
24164 // Does the node not even use any of the VecOp bits?
24165 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
24166 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
24167 return false; // Let's allow the other combines clean this up first.
24168 // Did we fail to model any of the users of the Producer?
24169 bool ProducerIsLeaf = false;
24170 // Look at each user of this Producer.
24171 for (SDNode *User : E.Producer->users()) {
24172 switch (User->getOpcode()) {
24173 // TODO: support ISD::BITCAST
24174 // TODO: support ISD::ANY_EXTEND
24175 // TODO: support ISD::ZERO_EXTEND
24176 // TODO: support ISD::SIGN_EXTEND
24177 case ISD::TRUNCATE:
24178 // Truncation simply means we keep position, but extract less bits.
24179 Worklist.emplace_back(User, E.BitPos,
24180 /*NumBits=*/User->getValueSizeInBits(0));
24181 break;
24182 // TODO: support ISD::SRA
24183 // TODO: support ISD::SHL
24184 case ISD::SRL:
24185 // We should be shifting the Producer by a constant amount.
24186 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
24187 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
24188 // Logical right-shift means that we start extraction later,
24189 // but stop it at the same position we did previously.
24190 unsigned ShAmt = ShAmtC->getZExtValue();
24191 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
24192 break;
24193 }
24194 [[fallthrough]];
24195 default:
24196 // We can not model this user of the Producer.
24197 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
24198 ProducerIsLeaf = true;
24199 // Profitability check: all users that we can not model
24200 // must be ISD::BUILD_VECTOR's.
24201 if (User->getOpcode() != ISD::BUILD_VECTOR)
24202 return false;
24203 break;
24204 }
24205 }
24206 if (ProducerIsLeaf)
24207 Leafs.emplace_back(std::move(E));
24208 }
24209
24210 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
24211
24212 // If we are still at the same element granularity, give up,
24213 if (NewVecEltBitWidth == VecEltBitWidth)
24214 return false;
24215
24216 // The vector width must be a multiple of the new element width.
24217 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
24218 return false;
24219
24220 // All leafs must agree on the new element width.
24221 // All leafs must not expect any "padding" bits ontop of that width.
24222 // All leafs must start extraction from multiple of that width.
24223 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
24224 return (unsigned)E.NumBits == NewVecEltBitWidth &&
24225 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
24226 E.BitPos % NewVecEltBitWidth == 0;
24227 }))
24228 return false;
24229
24230 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
24231 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
24232 VecVT.getSizeInBits() / NewVecEltBitWidth);
24233
24234 if (LegalTypes &&
24235 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
24236 return false;
24237
24238 if (LegalOperations &&
24239 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
24241 return false;
24242
24243 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
24244 for (const Entry &E : Leafs) {
24245 SDLoc DL(E.Producer);
24246 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
24247 assert(NewIndex < NewVecVT.getVectorNumElements() &&
24248 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
24249 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
24250 DAG.getVectorIdxConstant(NewIndex, DL));
24251 CombineTo(E.Producer, V);
24252 }
24253
24254 return true;
24255}
24256
24257SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
24258 SDValue VecOp = N->getOperand(0);
24259 SDValue Index = N->getOperand(1);
24260 EVT ScalarVT = N->getValueType(0);
24261 EVT VecVT = VecOp.getValueType();
24262 if (VecOp.isUndef())
24263 return DAG.getUNDEF(ScalarVT);
24264
24265 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
24266 //
24267 // This only really matters if the index is non-constant since other combines
24268 // on the constant elements already work.
24269 SDLoc DL(N);
24270 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
24271 Index == VecOp.getOperand(2)) {
24272 SDValue Elt = VecOp.getOperand(1);
24273 AddUsersToWorklist(VecOp.getNode());
24274 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
24275 }
24276
24277 // (vextract (scalar_to_vector val, 0) -> val
24278 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
24279 // Only 0'th element of SCALAR_TO_VECTOR is defined.
24280 if (DAG.isKnownNeverZero(Index))
24281 return DAG.getUNDEF(ScalarVT);
24282
24283 // Check if the result type doesn't match the inserted element type.
24284 // The inserted element and extracted element may have mismatched bitwidth.
24285 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
24286 SDValue InOp = VecOp.getOperand(0);
24287 if (InOp.getValueType() != ScalarVT) {
24288 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
24289 if (InOp.getValueType().bitsGT(ScalarVT))
24290 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
24291 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
24292 }
24293 return InOp;
24294 }
24295
24296 // extract_vector_elt of out-of-bounds element -> UNDEF
24297 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24298 if (IndexC && VecVT.isFixedLengthVector() &&
24299 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
24300 return DAG.getUNDEF(ScalarVT);
24301
24302 // extract_vector_elt (build_vector x, y), 1 -> y
24303 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
24304 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
24305 TLI.isTypeLegal(VecVT)) {
24306 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
24307 VecVT.isFixedLengthVector()) &&
24308 "BUILD_VECTOR used for scalable vectors");
24309 unsigned IndexVal =
24310 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
24311 SDValue Elt = VecOp.getOperand(IndexVal);
24312 EVT InEltVT = Elt.getValueType();
24313
24314 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
24315 isNullConstant(Elt)) {
24316 // Sometimes build_vector's scalar input types do not match result type.
24317 if (ScalarVT == InEltVT)
24318 return Elt;
24319
24320 // TODO: It may be useful to truncate if free if the build_vector
24321 // implicitly converts.
24322 }
24323 }
24324
24325 if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
24326 return BO;
24327
24328 if (VecVT.isScalableVector())
24329 return SDValue();
24330
24331 // All the code from this point onwards assumes fixed width vectors, but it's
24332 // possible that some of the combinations could be made to work for scalable
24333 // vectors too.
24334 unsigned NumElts = VecVT.getVectorNumElements();
24335 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
24336
24337 // See if the extracted element is constant, in which case fold it if its
24338 // a legal fp immediate.
24339 if (IndexC && ScalarVT.isFloatingPoint()) {
24340 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
24341 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
24342 if (KnownElt.isConstant()) {
24343 APFloat CstFP =
24344 APFloat(ScalarVT.getFltSemantics(), KnownElt.getConstant());
24345 if (TLI.isFPImmLegal(CstFP, ScalarVT))
24346 return DAG.getConstantFP(CstFP, DL, ScalarVT);
24347 }
24348 }
24349
24350 // TODO: These transforms should not require the 'hasOneUse' restriction, but
24351 // there are regressions on multiple targets without it. We can end up with a
24352 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
24353 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
24354 VecOp.hasOneUse()) {
24355 // The vector index of the LSBs of the source depend on the endian-ness.
24356 bool IsLE = DAG.getDataLayout().isLittleEndian();
24357 unsigned ExtractIndex = IndexC->getZExtValue();
24358 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
24359 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
24360 SDValue BCSrc = VecOp.getOperand(0);
24361 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
24362 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
24363
24364 // TODO: Add support for SCALAR_TO_VECTOR implicit truncation.
24365 if (LegalTypes && BCSrc.getValueType().isInteger() &&
24366 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24367 BCSrc.getScalarValueSizeInBits() ==
24369 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
24370 // trunc i64 X to i32
24371 SDValue X = BCSrc.getOperand(0);
24372 EVT XVT = X.getValueType();
24373 assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
24374 "Extract element and scalar to vector can't change element type "
24375 "from FP to integer.");
24376 unsigned XBitWidth = X.getValueSizeInBits();
24377 unsigned Scale = XBitWidth / VecEltBitWidth;
24378 BCTruncElt = IsLE ? 0 : Scale - 1;
24379
24380 // An extract element return value type can be wider than its vector
24381 // operand element type. In that case, the high bits are undefined, so
24382 // it's possible that we may need to extend rather than truncate.
24383 if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
24384 assert(XBitWidth % VecEltBitWidth == 0 &&
24385 "Scalar bitwidth must be a multiple of vector element bitwidth");
24386
24387 if (ExtractIndex != BCTruncElt) {
24388 unsigned ShiftIndex =
24389 IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
24390 X = DAG.getNode(
24391 ISD::SRL, DL, XVT, X,
24392 DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
24393 }
24394
24395 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
24396 }
24397 }
24398 }
24399
24400 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
24401 // We only perform this optimization before the op legalization phase because
24402 // we may introduce new vector instructions which are not backed by TD
24403 // patterns. For example on AVX, extracting elements from a wide vector
24404 // without using extract_subvector. However, if we can find an underlying
24405 // scalar value, then we can always use that.
24406 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
24407 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
24408 // Find the new index to extract from.
24409 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
24410
24411 // Extracting an undef index is undef.
24412 if (OrigElt == -1)
24413 return DAG.getUNDEF(ScalarVT);
24414
24415 // Select the right vector half to extract from.
24416 SDValue SVInVec;
24417 if (OrigElt < (int)NumElts) {
24418 SVInVec = VecOp.getOperand(0);
24419 } else {
24420 SVInVec = VecOp.getOperand(1);
24421 OrigElt -= NumElts;
24422 }
24423
24424 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
24425 // TODO: Check if shuffle mask is legal?
24426 if (LegalOperations && TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VecVT) &&
24427 !VecOp.hasOneUse())
24428 return SDValue();
24429
24430 SDValue InOp = SVInVec.getOperand(OrigElt);
24431 if (InOp.getValueType() != ScalarVT) {
24432 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
24433 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
24434 }
24435
24436 return InOp;
24437 }
24438
24439 // FIXME: We should handle recursing on other vector shuffles and
24440 // scalar_to_vector here as well.
24441
24442 if (!LegalOperations ||
24443 // FIXME: Should really be just isOperationLegalOrCustom.
24446 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
24447 DAG.getVectorIdxConstant(OrigElt, DL));
24448 }
24449 }
24450
24451 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
24452 // simplify it based on the (valid) extraction indices.
24453 if (llvm::all_of(VecOp->users(), [&](SDNode *Use) {
24454 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
24455 Use->getOperand(0) == VecOp &&
24456 isa<ConstantSDNode>(Use->getOperand(1));
24457 })) {
24458 APInt DemandedElts = APInt::getZero(NumElts);
24459 for (SDNode *User : VecOp->users()) {
24460 auto *CstElt = cast<ConstantSDNode>(User->getOperand(1));
24461 if (CstElt->getAPIntValue().ult(NumElts))
24462 DemandedElts.setBit(CstElt->getZExtValue());
24463 }
24464 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
24465 // We simplified the vector operand of this extract element. If this
24466 // extract is not dead, visit it again so it is folded properly.
24467 if (N->getOpcode() != ISD::DELETED_NODE)
24468 AddToWorklist(N);
24469 return SDValue(N, 0);
24470 }
24471 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
24472 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
24473 // We simplified the vector operand of this extract element. If this
24474 // extract is not dead, visit it again so it is folded properly.
24475 if (N->getOpcode() != ISD::DELETED_NODE)
24476 AddToWorklist(N);
24477 return SDValue(N, 0);
24478 }
24479 }
24480
24481 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
24482 return SDValue(N, 0);
24483
24484 // Everything under here is trying to match an extract of a loaded value.
24485 // If the result of load has to be truncated, then it's not necessarily
24486 // profitable.
24487 bool BCNumEltsChanged = false;
24488 EVT ExtVT = VecVT.getVectorElementType();
24489 EVT LVT = ExtVT;
24490 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
24491 return SDValue();
24492
24493 if (VecOp.getOpcode() == ISD::BITCAST) {
24494 // Don't duplicate a load with other uses.
24495 if (!VecOp.hasOneUse())
24496 return SDValue();
24497
24498 EVT BCVT = VecOp.getOperand(0).getValueType();
24499 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
24500 return SDValue();
24501 if (NumElts != BCVT.getVectorNumElements())
24502 BCNumEltsChanged = true;
24503 VecOp = VecOp.getOperand(0);
24504 ExtVT = BCVT.getVectorElementType();
24505 }
24506
24507 // extract (vector load $addr), i --> load $addr + i * size
24508 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
24509 ISD::isNormalLoad(VecOp.getNode()) &&
24510 !Index->hasPredecessor(VecOp.getNode())) {
24511 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
24512 if (VecLoad && VecLoad->isSimple()) {
24513 if (SDValue Scalarized = TLI.scalarizeExtractedVectorLoad(
24514 ScalarVT, SDLoc(N), VecVT, Index, VecLoad, DAG)) {
24515 ++OpsNarrowed;
24516 return Scalarized;
24517 }
24518 }
24519 }
24520
24521 // Perform only after legalization to ensure build_vector / vector_shuffle
24522 // optimizations have already been done.
24523 if (!LegalOperations || !IndexC)
24524 return SDValue();
24525
24526 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
24527 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
24528 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
24529 int Elt = IndexC->getZExtValue();
24530 LoadSDNode *LN0 = nullptr;
24531 if (ISD::isNormalLoad(VecOp.getNode())) {
24532 LN0 = cast<LoadSDNode>(VecOp);
24533 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24534 VecOp.getOperand(0).getValueType() == ExtVT &&
24535 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
24536 // Don't duplicate a load with other uses.
24537 if (!VecOp.hasOneUse())
24538 return SDValue();
24539
24540 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
24541 }
24542 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
24543 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
24544 // =>
24545 // (load $addr+1*size)
24546
24547 // Don't duplicate a load with other uses.
24548 if (!VecOp.hasOneUse())
24549 return SDValue();
24550
24551 // If the bit convert changed the number of elements, it is unsafe
24552 // to examine the mask.
24553 if (BCNumEltsChanged)
24554 return SDValue();
24555
24556 // Select the input vector, guarding against out of range extract vector.
24557 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
24558 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
24559
24560 if (VecOp.getOpcode() == ISD::BITCAST) {
24561 // Don't duplicate a load with other uses.
24562 if (!VecOp.hasOneUse())
24563 return SDValue();
24564
24565 VecOp = VecOp.getOperand(0);
24566 }
24567 if (ISD::isNormalLoad(VecOp.getNode())) {
24568 LN0 = cast<LoadSDNode>(VecOp);
24569 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
24570 Index = DAG.getConstant(Elt, DL, Index.getValueType());
24571 }
24572 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
24573 VecVT.getVectorElementType() == ScalarVT &&
24574 (!LegalTypes ||
24575 TLI.isTypeLegal(
24577 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
24578 // -> extract_vector_elt a, 0
24579 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
24580 // -> extract_vector_elt a, 1
24581 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
24582 // -> extract_vector_elt b, 0
24583 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
24584 // -> extract_vector_elt b, 1
24585 EVT ConcatVT = VecOp.getOperand(0).getValueType();
24586 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
24587 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
24588 Index.getValueType());
24589
24590 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
24592 ConcatVT.getVectorElementType(),
24593 ConcatOp, NewIdx);
24594 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
24595 }
24596
24597 // Make sure we found a non-volatile load and the extractelement is
24598 // the only use.
24599 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
24600 return SDValue();
24601
24602 // If Idx was -1 above, Elt is going to be -1, so just return undef.
24603 if (Elt == -1)
24604 return DAG.getUNDEF(LVT);
24605
24606 if (SDValue Scalarized =
24607 TLI.scalarizeExtractedVectorLoad(LVT, DL, VecVT, Index, LN0, DAG)) {
24608 ++OpsNarrowed;
24609 return Scalarized;
24610 }
24611
24612 return SDValue();
24613}
24614
24615// Simplify (build_vec (ext )) to (bitcast (build_vec ))
24616SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
24617 // We perform this optimization post type-legalization because
24618 // the type-legalizer often scalarizes integer-promoted vectors.
24619 // Performing this optimization before may create bit-casts which
24620 // will be type-legalized to complex code sequences.
24621 // We perform this optimization only before the operation legalizer because we
24622 // may introduce illegal operations.
24623 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
24624 return SDValue();
24625
24626 unsigned NumInScalars = N->getNumOperands();
24627 SDLoc DL(N);
24628 EVT VT = N->getValueType(0);
24629
24630 // Check to see if this is a BUILD_VECTOR of a bunch of values
24631 // which come from any_extend or zero_extend nodes. If so, we can create
24632 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
24633 // optimizations. We do not handle sign-extend because we can't fill the sign
24634 // using shuffles.
24635 EVT SourceType = MVT::Other;
24636 bool AllAnyExt = true;
24637
24638 for (unsigned i = 0; i != NumInScalars; ++i) {
24639 SDValue In = N->getOperand(i);
24640 // Ignore undef inputs.
24641 if (In.isUndef()) continue;
24642
24643 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
24644 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
24645
24646 // Abort if the element is not an extension.
24647 if (!ZeroExt && !AnyExt) {
24648 SourceType = MVT::Other;
24649 break;
24650 }
24651
24652 // The input is a ZeroExt or AnyExt. Check the original type.
24653 EVT InTy = In.getOperand(0).getValueType();
24654
24655 // Check that all of the widened source types are the same.
24656 if (SourceType == MVT::Other)
24657 // First time.
24658 SourceType = InTy;
24659 else if (InTy != SourceType) {
24660 // Multiple income types. Abort.
24661 SourceType = MVT::Other;
24662 break;
24663 }
24664
24665 // Check if all of the extends are ANY_EXTENDs.
24666 AllAnyExt &= AnyExt;
24667 }
24668
24669 // In order to have valid types, all of the inputs must be extended from the
24670 // same source type and all of the inputs must be any or zero extend.
24671 // Scalar sizes must be a power of two.
24672 EVT OutScalarTy = VT.getScalarType();
24673 bool ValidTypes =
24674 SourceType != MVT::Other &&
24677
24678 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
24679 // turn into a single shuffle instruction.
24680 if (!ValidTypes)
24681 return SDValue();
24682
24683 // If we already have a splat buildvector, then don't fold it if it means
24684 // introducing zeros.
24685 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
24686 return SDValue();
24687
24688 bool isLE = DAG.getDataLayout().isLittleEndian();
24689 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
24690 assert(ElemRatio > 1 && "Invalid element size ratio");
24691 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
24692 DAG.getConstant(0, DL, SourceType);
24693
24694 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
24695 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
24696
24697 // Populate the new build_vector
24698 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24699 SDValue Cast = N->getOperand(i);
24700 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
24701 Cast.getOpcode() == ISD::ZERO_EXTEND ||
24702 Cast.isUndef()) && "Invalid cast opcode");
24703 SDValue In;
24704 if (Cast.isUndef())
24705 In = DAG.getUNDEF(SourceType);
24706 else
24707 In = Cast->getOperand(0);
24708 unsigned Index = isLE ? (i * ElemRatio) :
24709 (i * ElemRatio + (ElemRatio - 1));
24710
24711 assert(Index < Ops.size() && "Invalid index");
24712 Ops[Index] = In;
24713 }
24714
24715 // The type of the new BUILD_VECTOR node.
24716 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
24717 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
24718 "Invalid vector size");
24719 // Check if the new vector type is legal.
24720 if (!isTypeLegal(VecVT) ||
24721 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
24723 return SDValue();
24724
24725 // Make the new BUILD_VECTOR.
24726 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
24727
24728 // The new BUILD_VECTOR node has the potential to be further optimized.
24729 AddToWorklist(BV.getNode());
24730 // Bitcast to the desired type.
24731 return DAG.getBitcast(VT, BV);
24732}
24733
24734// Simplify (build_vec (trunc $1)
24735// (trunc (srl $1 half-width))
24736// (trunc (srl $1 (2 * half-width))))
24737// to (bitcast $1)
24738SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
24739 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24740
24741 EVT VT = N->getValueType(0);
24742
24743 // Don't run this before LegalizeTypes if VT is legal.
24744 // Targets may have other preferences.
24745 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
24746 return SDValue();
24747
24748 // Only for little endian
24749 if (!DAG.getDataLayout().isLittleEndian())
24750 return SDValue();
24751
24752 EVT OutScalarTy = VT.getScalarType();
24753 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
24754
24755 // Only for power of two types to be sure that bitcast works well
24756 if (!isPowerOf2_64(ScalarTypeBitsize))
24757 return SDValue();
24758
24759 unsigned NumInScalars = N->getNumOperands();
24760
24761 // Look through bitcasts
24762 auto PeekThroughBitcast = [](SDValue Op) {
24763 if (Op.getOpcode() == ISD::BITCAST)
24764 return Op.getOperand(0);
24765 return Op;
24766 };
24767
24768 // The source value where all the parts are extracted.
24769 SDValue Src;
24770 for (unsigned i = 0; i != NumInScalars; ++i) {
24771 SDValue In = PeekThroughBitcast(N->getOperand(i));
24772 // Ignore undef inputs.
24773 if (In.isUndef()) continue;
24774
24775 if (In.getOpcode() != ISD::TRUNCATE)
24776 return SDValue();
24777
24778 In = PeekThroughBitcast(In.getOperand(0));
24779
24780 if (In.getOpcode() != ISD::SRL) {
24781 // For now only build_vec without shuffling, handle shifts here in the
24782 // future.
24783 if (i != 0)
24784 return SDValue();
24785
24786 Src = In;
24787 } else {
24788 // In is SRL
24789 SDValue part = PeekThroughBitcast(In.getOperand(0));
24790
24791 if (!Src) {
24792 Src = part;
24793 } else if (Src != part) {
24794 // Vector parts do not stem from the same variable
24795 return SDValue();
24796 }
24797
24798 SDValue ShiftAmtVal = In.getOperand(1);
24799 if (!isa<ConstantSDNode>(ShiftAmtVal))
24800 return SDValue();
24801
24802 uint64_t ShiftAmt = In.getConstantOperandVal(1);
24803
24804 // The extracted value is not extracted at the right position
24805 if (ShiftAmt != i * ScalarTypeBitsize)
24806 return SDValue();
24807 }
24808 }
24809
24810 // Only cast if the size is the same
24811 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
24812 return SDValue();
24813
24814 return DAG.getBitcast(VT, Src);
24815}
24816
24817SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
24818 ArrayRef<int> VectorMask,
24819 SDValue VecIn1, SDValue VecIn2,
24820 unsigned LeftIdx, bool DidSplitVec) {
24821 EVT VT = N->getValueType(0);
24822 EVT InVT1 = VecIn1.getValueType();
24823 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
24824
24825 unsigned NumElems = VT.getVectorNumElements();
24826 unsigned ShuffleNumElems = NumElems;
24827
24828 // If we artificially split a vector in two already, then the offsets in the
24829 // operands will all be based off of VecIn1, even those in VecIn2.
24830 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
24831
24832 uint64_t VTSize = VT.getFixedSizeInBits();
24833 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
24834 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
24835
24836 assert(InVT2Size <= InVT1Size &&
24837 "Inputs must be sorted to be in non-increasing vector size order.");
24838
24839 // We can't generate a shuffle node with mismatched input and output types.
24840 // Try to make the types match the type of the output.
24841 if (InVT1 != VT || InVT2 != VT) {
24842 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
24843 // If the output vector length is a multiple of both input lengths,
24844 // we can concatenate them and pad the rest with undefs.
24845 unsigned NumConcats = VTSize / InVT1Size;
24846 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
24847 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
24848 ConcatOps[0] = VecIn1;
24849 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
24850 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24851 VecIn2 = SDValue();
24852 } else if (InVT1Size == VTSize * 2) {
24853 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
24854 return SDValue();
24855
24856 if (!VecIn2.getNode()) {
24857 // If we only have one input vector, and it's twice the size of the
24858 // output, split it in two.
24859 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
24860 DAG.getVectorIdxConstant(NumElems, DL));
24861 VecIn1 = DAG.getExtractSubvector(DL, VT, VecIn1, 0);
24862 // Since we now have shorter input vectors, adjust the offset of the
24863 // second vector's start.
24864 Vec2Offset = NumElems;
24865 } else {
24866 assert(InVT2Size <= InVT1Size &&
24867 "Second input is not going to be larger than the first one.");
24868
24869 // VecIn1 is wider than the output, and we have another, possibly
24870 // smaller input. Pad the smaller input with undefs, shuffle at the
24871 // input vector width, and extract the output.
24872 // The shuffle type is different than VT, so check legality again.
24873 if (LegalOperations &&
24875 return SDValue();
24876
24877 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
24878 // lower it back into a BUILD_VECTOR. So if the inserted type is
24879 // illegal, don't even try.
24880 if (InVT1 != InVT2) {
24881 if (!TLI.isTypeLegal(InVT2))
24882 return SDValue();
24883 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24884 }
24885 ShuffleNumElems = NumElems * 2;
24886 }
24887 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
24888 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
24889 ConcatOps[0] = VecIn2;
24890 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
24891 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
24892 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
24893 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
24894 return SDValue();
24895 // If dest vector has less than two elements, then use shuffle and extract
24896 // from larger regs will cost even more.
24897 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
24898 return SDValue();
24899 assert(InVT2Size <= InVT1Size &&
24900 "Second input is not going to be larger than the first one.");
24901
24902 // VecIn1 is wider than the output, and we have another, possibly
24903 // smaller input. Pad the smaller input with undefs, shuffle at the
24904 // input vector width, and extract the output.
24905 // The shuffle type is different than VT, so check legality again.
24906 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
24907 return SDValue();
24908
24909 if (InVT1 != InVT2) {
24910 VecIn2 = DAG.getInsertSubvector(DL, DAG.getUNDEF(InVT1), VecIn2, 0);
24911 }
24912 ShuffleNumElems = InVT1Size / VTSize * NumElems;
24913 } else {
24914 // TODO: Support cases where the length mismatch isn't exactly by a
24915 // factor of 2.
24916 // TODO: Move this check upwards, so that if we have bad type
24917 // mismatches, we don't create any DAG nodes.
24918 return SDValue();
24919 }
24920 }
24921
24922 // Initialize mask to undef.
24923 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
24924
24925 // Only need to run up to the number of elements actually used, not the
24926 // total number of elements in the shuffle - if we are shuffling a wider
24927 // vector, the high lanes should be set to undef.
24928 for (unsigned i = 0; i != NumElems; ++i) {
24929 if (VectorMask[i] <= 0)
24930 continue;
24931
24932 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
24933 if (VectorMask[i] == (int)LeftIdx) {
24934 Mask[i] = ExtIndex;
24935 } else if (VectorMask[i] == (int)LeftIdx + 1) {
24936 Mask[i] = Vec2Offset + ExtIndex;
24937 }
24938 }
24939
24940 // The type the input vectors may have changed above.
24941 InVT1 = VecIn1.getValueType();
24942
24943 // If we already have a VecIn2, it should have the same type as VecIn1.
24944 // If we don't, get an undef/zero vector of the appropriate type.
24945 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
24946 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
24947
24948 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
24949 if (ShuffleNumElems > NumElems)
24950 Shuffle = DAG.getExtractSubvector(DL, VT, Shuffle, 0);
24951
24952 return Shuffle;
24953}
24954
24956 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
24957
24958 // First, determine where the build vector is not undef.
24959 // TODO: We could extend this to handle zero elements as well as undefs.
24960 int NumBVOps = BV->getNumOperands();
24961 int ZextElt = -1;
24962 for (int i = 0; i != NumBVOps; ++i) {
24963 SDValue Op = BV->getOperand(i);
24964 if (Op.isUndef())
24965 continue;
24966 if (ZextElt == -1)
24967 ZextElt = i;
24968 else
24969 return SDValue();
24970 }
24971 // Bail out if there's no non-undef element.
24972 if (ZextElt == -1)
24973 return SDValue();
24974
24975 // The build vector contains some number of undef elements and exactly
24976 // one other element. That other element must be a zero-extended scalar
24977 // extracted from a vector at a constant index to turn this into a shuffle.
24978 // Also, require that the build vector does not implicitly truncate/extend
24979 // its elements.
24980 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
24981 EVT VT = BV->getValueType(0);
24982 SDValue Zext = BV->getOperand(ZextElt);
24983 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
24987 return SDValue();
24988
24989 // The zero-extend must be a multiple of the source size, and we must be
24990 // building a vector of the same size as the source of the extract element.
24991 SDValue Extract = Zext.getOperand(0);
24992 unsigned DestSize = Zext.getValueSizeInBits();
24993 unsigned SrcSize = Extract.getValueSizeInBits();
24994 if (DestSize % SrcSize != 0 ||
24995 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
24996 return SDValue();
24997
24998 // Create a shuffle mask that will combine the extracted element with zeros
24999 // and undefs.
25000 int ZextRatio = DestSize / SrcSize;
25001 int NumMaskElts = NumBVOps * ZextRatio;
25002 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
25003 for (int i = 0; i != NumMaskElts; ++i) {
25004 if (i / ZextRatio == ZextElt) {
25005 // The low bits of the (potentially translated) extracted element map to
25006 // the source vector. The high bits map to zero. We will use a zero vector
25007 // as the 2nd source operand of the shuffle, so use the 1st element of
25008 // that vector (mask value is number-of-elements) for the high bits.
25009 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
25010 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
25011 : NumMaskElts;
25012 }
25013
25014 // Undef elements of the build vector remain undef because we initialize
25015 // the shuffle mask with -1.
25016 }
25017
25018 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
25019 // bitcast (shuffle V, ZeroVec, VectorMask)
25020 SDLoc DL(BV);
25021 EVT VecVT = Extract.getOperand(0).getValueType();
25022 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
25023 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25024 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
25025 ZeroVec, ShufMask, DAG);
25026 if (!Shuf)
25027 return SDValue();
25028 return DAG.getBitcast(VT, Shuf);
25029}
25030
25031// FIXME: promote to STLExtras.
25032template <typename R, typename T>
25033static auto getFirstIndexOf(R &&Range, const T &Val) {
25034 auto I = find(Range, Val);
25035 if (I == Range.end())
25036 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
25037 return std::distance(Range.begin(), I);
25038}
25039
25040// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
25041// operations. If the types of the vectors we're extracting from allow it,
25042// turn this into a vector_shuffle node.
25043SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
25044 SDLoc DL(N);
25045 EVT VT = N->getValueType(0);
25046
25047 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
25048 if (!isTypeLegal(VT))
25049 return SDValue();
25050
25052 return V;
25053
25054 // May only combine to shuffle after legalize if shuffle is legal.
25055 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
25056 return SDValue();
25057
25058 bool UsesZeroVector = false;
25059 unsigned NumElems = N->getNumOperands();
25060
25061 // Record, for each element of the newly built vector, which input vector
25062 // that element comes from. -1 stands for undef, 0 for the zero vector,
25063 // and positive values for the input vectors.
25064 // VectorMask maps each element to its vector number, and VecIn maps vector
25065 // numbers to their initial SDValues.
25066
25067 SmallVector<int, 8> VectorMask(NumElems, -1);
25069 VecIn.push_back(SDValue());
25070
25071 // If we have a single extract_element with a constant index, track the index
25072 // value.
25073 unsigned OneConstExtractIndex = ~0u;
25074
25075 // Count the number of extract_vector_elt sources (i.e. non-constant or undef)
25076 unsigned NumExtracts = 0;
25077
25078 for (unsigned i = 0; i != NumElems; ++i) {
25079 SDValue Op = N->getOperand(i);
25080
25081 if (Op.isUndef())
25082 continue;
25083
25084 // See if we can use a blend with a zero vector.
25085 // TODO: Should we generalize this to a blend with an arbitrary constant
25086 // vector?
25088 UsesZeroVector = true;
25089 VectorMask[i] = 0;
25090 continue;
25091 }
25092
25093 // Not an undef or zero. If the input is something other than an
25094 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
25095 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
25096 return SDValue();
25097
25098 SDValue ExtractedFromVec = Op.getOperand(0);
25099 if (ExtractedFromVec.getValueType().isScalableVector())
25100 return SDValue();
25101 auto *ExtractIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
25102 if (!ExtractIdx)
25103 return SDValue();
25104
25105 if (ExtractIdx->getAsAPIntVal().uge(
25106 ExtractedFromVec.getValueType().getVectorNumElements()))
25107 return SDValue();
25108
25109 // All inputs must have the same element type as the output.
25110 if (VT.getVectorElementType() !=
25111 ExtractedFromVec.getValueType().getVectorElementType())
25112 return SDValue();
25113
25114 OneConstExtractIndex = ExtractIdx->getZExtValue();
25115 ++NumExtracts;
25116
25117 // Have we seen this input vector before?
25118 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
25119 // a map back from SDValues to numbers isn't worth it.
25120 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
25121 if (Idx == -1) { // A new source vector?
25122 Idx = VecIn.size();
25123 VecIn.push_back(ExtractedFromVec);
25124 }
25125
25126 VectorMask[i] = Idx;
25127 }
25128
25129 // If we didn't find at least one input vector, bail out.
25130 if (VecIn.size() < 2)
25131 return SDValue();
25132
25133 // If all the Operands of BUILD_VECTOR extract from same
25134 // vector, then split the vector efficiently based on the maximum
25135 // vector access index and adjust the VectorMask and
25136 // VecIn accordingly.
25137 bool DidSplitVec = false;
25138 if (VecIn.size() == 2) {
25139 // If we only found a single constant indexed extract_vector_elt feeding the
25140 // build_vector, do not produce a more complicated shuffle if the extract is
25141 // cheap with other constant/undef elements. Skip broadcast patterns with
25142 // multiple uses in the build_vector.
25143
25144 // TODO: This should be more aggressive about skipping the shuffle
25145 // formation, particularly if VecIn[1].hasOneUse(), and regardless of the
25146 // index.
25147 if (NumExtracts == 1 &&
25150 TLI.isExtractVecEltCheap(VT, OneConstExtractIndex))
25151 return SDValue();
25152
25153 unsigned MaxIndex = 0;
25154 unsigned NearestPow2 = 0;
25155 SDValue Vec = VecIn.back();
25156 EVT InVT = Vec.getValueType();
25157 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
25158
25159 for (unsigned i = 0; i < NumElems; i++) {
25160 if (VectorMask[i] <= 0)
25161 continue;
25162 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
25163 IndexVec[i] = Index;
25164 MaxIndex = std::max(MaxIndex, Index);
25165 }
25166
25167 NearestPow2 = PowerOf2Ceil(MaxIndex);
25168 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
25169 NumElems * 2 < NearestPow2) {
25170 unsigned SplitSize = NearestPow2 / 2;
25171 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
25172 InVT.getVectorElementType(), SplitSize);
25173 if (TLI.isTypeLegal(SplitVT) &&
25174 SplitSize + SplitVT.getVectorNumElements() <=
25175 InVT.getVectorNumElements()) {
25176 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
25177 DAG.getVectorIdxConstant(SplitSize, DL));
25178 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
25179 DAG.getVectorIdxConstant(0, DL));
25180 VecIn.pop_back();
25181 VecIn.push_back(VecIn1);
25182 VecIn.push_back(VecIn2);
25183 DidSplitVec = true;
25184
25185 for (unsigned i = 0; i < NumElems; i++) {
25186 if (VectorMask[i] <= 0)
25187 continue;
25188 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
25189 }
25190 }
25191 }
25192 }
25193
25194 // Sort input vectors by decreasing vector element count,
25195 // while preserving the relative order of equally-sized vectors.
25196 // Note that we keep the first "implicit zero vector as-is.
25197 SmallVector<SDValue, 8> SortedVecIn(VecIn);
25198 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
25199 [](const SDValue &a, const SDValue &b) {
25200 return a.getValueType().getVectorNumElements() >
25201 b.getValueType().getVectorNumElements();
25202 });
25203
25204 // We now also need to rebuild the VectorMask, because it referenced element
25205 // order in VecIn, and we just sorted them.
25206 for (int &SourceVectorIndex : VectorMask) {
25207 if (SourceVectorIndex <= 0)
25208 continue;
25209 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
25210 assert(Idx > 0 && Idx < SortedVecIn.size() &&
25211 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
25212 SourceVectorIndex = Idx;
25213 }
25214
25215 VecIn = std::move(SortedVecIn);
25216
25217 // TODO: Should this fire if some of the input vectors has illegal type (like
25218 // it does now), or should we let legalization run its course first?
25219
25220 // Shuffle phase:
25221 // Take pairs of vectors, and shuffle them so that the result has elements
25222 // from these vectors in the correct places.
25223 // For example, given:
25224 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
25225 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
25226 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
25227 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
25228 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
25229 // We will generate:
25230 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
25231 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
25232 SmallVector<SDValue, 4> Shuffles;
25233 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
25234 unsigned LeftIdx = 2 * In + 1;
25235 SDValue VecLeft = VecIn[LeftIdx];
25236 SDValue VecRight =
25237 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
25238
25239 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
25240 VecRight, LeftIdx, DidSplitVec))
25241 Shuffles.push_back(Shuffle);
25242 else
25243 return SDValue();
25244 }
25245
25246 // If we need the zero vector as an "ingredient" in the blend tree, add it
25247 // to the list of shuffles.
25248 if (UsesZeroVector)
25249 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
25250 : DAG.getConstantFP(0.0, DL, VT));
25251
25252 // If we only have one shuffle, we're done.
25253 if (Shuffles.size() == 1)
25254 return Shuffles[0];
25255
25256 // Update the vector mask to point to the post-shuffle vectors.
25257 for (int &Vec : VectorMask)
25258 if (Vec == 0)
25259 Vec = Shuffles.size() - 1;
25260 else
25261 Vec = (Vec - 1) / 2;
25262
25263 // More than one shuffle. Generate a binary tree of blends, e.g. if from
25264 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
25265 // generate:
25266 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
25267 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
25268 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
25269 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
25270 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
25271 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
25272 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
25273
25274 // Make sure the initial size of the shuffle list is even.
25275 if (Shuffles.size() % 2)
25276 Shuffles.push_back(DAG.getUNDEF(VT));
25277
25278 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
25279 if (CurSize % 2) {
25280 Shuffles[CurSize] = DAG.getUNDEF(VT);
25281 CurSize++;
25282 }
25283 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
25284 int Left = 2 * In;
25285 int Right = 2 * In + 1;
25286 SmallVector<int, 8> Mask(NumElems, -1);
25287 SDValue L = Shuffles[Left];
25288 ArrayRef<int> LMask;
25289 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
25290 L.use_empty() && L.getOperand(1).isUndef() &&
25291 L.getOperand(0).getValueType() == L.getValueType();
25292 if (IsLeftShuffle) {
25293 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
25294 L = L.getOperand(0);
25295 }
25296 SDValue R = Shuffles[Right];
25297 ArrayRef<int> RMask;
25298 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
25299 R.use_empty() && R.getOperand(1).isUndef() &&
25300 R.getOperand(0).getValueType() == R.getValueType();
25301 if (IsRightShuffle) {
25302 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
25303 R = R.getOperand(0);
25304 }
25305 for (unsigned I = 0; I != NumElems; ++I) {
25306 if (VectorMask[I] == Left) {
25307 Mask[I] = I;
25308 if (IsLeftShuffle)
25309 Mask[I] = LMask[I];
25310 VectorMask[I] = In;
25311 } else if (VectorMask[I] == Right) {
25312 Mask[I] = I + NumElems;
25313 if (IsRightShuffle)
25314 Mask[I] = RMask[I] + NumElems;
25315 VectorMask[I] = In;
25316 }
25317 }
25318
25319 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
25320 }
25321 }
25322 return Shuffles[0];
25323}
25324
25325// Try to turn a build vector of zero extends of extract vector elts into a
25326// a vector zero extend and possibly an extract subvector.
25327// TODO: Support sign extend?
25328// TODO: Allow undef elements?
25329SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
25330 if (LegalOperations)
25331 return SDValue();
25332
25333 EVT VT = N->getValueType(0);
25334
25335 bool FoundZeroExtend = false;
25336 SDValue Op0 = N->getOperand(0);
25337 auto checkElem = [&](SDValue Op) -> int64_t {
25338 unsigned Opc = Op.getOpcode();
25339 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
25340 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
25341 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
25342 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
25343 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
25344 return C->getZExtValue();
25345 return -1;
25346 };
25347
25348 // Make sure the first element matches
25349 // (zext (extract_vector_elt X, C))
25350 // Offset must be a constant multiple of the
25351 // known-minimum vector length of the result type.
25352 int64_t Offset = checkElem(Op0);
25353 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
25354 return SDValue();
25355
25356 unsigned NumElems = N->getNumOperands();
25357 SDValue In = Op0.getOperand(0).getOperand(0);
25358 EVT InSVT = In.getValueType().getScalarType();
25359 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
25360
25361 // Don't create an illegal input type after type legalization.
25362 if (LegalTypes && !TLI.isTypeLegal(InVT))
25363 return SDValue();
25364
25365 // Ensure all the elements come from the same vector and are adjacent.
25366 for (unsigned i = 1; i != NumElems; ++i) {
25367 if ((Offset + i) != checkElem(N->getOperand(i)))
25368 return SDValue();
25369 }
25370
25371 SDLoc DL(N);
25372 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
25373 Op0.getOperand(0).getOperand(1));
25374 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
25375 VT, In);
25376}
25377
25378// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
25379// and all other elements being constant zero's, granularize the BUILD_VECTOR's
25380// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
25381// This patten can appear during legalization.
25382//
25383// NOTE: This can be generalized to allow more than a single
25384// non-constant-zero op, UNDEF's, and to be KnownBits-based,
25385SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
25386 // Don't run this after legalization. Targets may have other preferences.
25387 if (Level >= AfterLegalizeDAG)
25388 return SDValue();
25389
25390 // FIXME: support big-endian.
25391 if (DAG.getDataLayout().isBigEndian())
25392 return SDValue();
25393
25394 EVT VT = N->getValueType(0);
25395 EVT OpVT = N->getOperand(0).getValueType();
25396 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
25397
25398 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
25399
25400 if (!TLI.isTypeLegal(OpIntVT) ||
25401 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
25402 return SDValue();
25403
25404 unsigned EltBitwidth = VT.getScalarSizeInBits();
25405 // NOTE: the actual width of operands may be wider than that!
25406
25407 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
25408 // active bits they all have? We'll want to truncate them all to that width.
25409 unsigned ActiveBits = 0;
25410 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
25411 for (auto I : enumerate(N->ops())) {
25412 SDValue Op = I.value();
25413 // FIXME: support UNDEF elements?
25414 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
25415 unsigned OpActiveBits =
25416 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
25417 if (OpActiveBits == 0) {
25418 KnownZeroOps.setBit(I.index());
25419 continue;
25420 }
25421 // Profitability check: don't allow non-zero constant operands.
25422 return SDValue();
25423 }
25424 // Profitability check: there must only be a single non-zero operand,
25425 // and it must be the first operand of the BUILD_VECTOR.
25426 if (I.index() != 0)
25427 return SDValue();
25428 // The operand must be a zero-extension itself.
25429 // FIXME: this could be generalized to known leading zeros check.
25430 if (Op.getOpcode() != ISD::ZERO_EXTEND)
25431 return SDValue();
25432 unsigned CurrActiveBits =
25433 Op.getOperand(0).getValueSizeInBits().getFixedValue();
25434 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
25435 ActiveBits = CurrActiveBits;
25436 // We want to at least halve the element size.
25437 if (2 * ActiveBits > EltBitwidth)
25438 return SDValue();
25439 }
25440
25441 // This BUILD_VECTOR must have at least one non-constant-zero operand.
25442 if (ActiveBits == 0)
25443 return SDValue();
25444
25445 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
25446 // into how many chunks can we split our element width?
25447 EVT NewScalarIntVT, NewIntVT;
25448 std::optional<unsigned> Factor;
25449 // We can split the element into at least two chunks, but not into more
25450 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
25451 // for which the element width is a multiple of it,
25452 // and the resulting types/operations on that chunk width are legal.
25453 assert(2 * ActiveBits <= EltBitwidth &&
25454 "We know that half or less bits of the element are active.");
25455 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
25456 if (EltBitwidth % Scale != 0)
25457 continue;
25458 unsigned ChunkBitwidth = EltBitwidth / Scale;
25459 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
25460 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
25461 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
25462 Scale * N->getNumOperands());
25463 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
25464 (LegalOperations &&
25465 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
25467 continue;
25468 Factor = Scale;
25469 break;
25470 }
25471 if (!Factor)
25472 return SDValue();
25473
25474 SDLoc DL(N);
25475 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
25476
25477 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
25479 NewOps.reserve(NewIntVT.getVectorNumElements());
25480 for (auto I : enumerate(N->ops())) {
25481 SDValue Op = I.value();
25482 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
25483 unsigned SrcOpIdx = I.index();
25484 if (KnownZeroOps[SrcOpIdx]) {
25485 NewOps.append(*Factor, ZeroOp);
25486 continue;
25487 }
25488 Op = DAG.getBitcast(OpIntVT, Op);
25489 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
25490 NewOps.emplace_back(Op);
25491 NewOps.append(*Factor - 1, ZeroOp);
25492 }
25493 assert(NewOps.size() == NewIntVT.getVectorNumElements());
25494 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
25495 NewBV = DAG.getBitcast(VT, NewBV);
25496 return NewBV;
25497}
25498
25499SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
25500 EVT VT = N->getValueType(0);
25501
25502 // A vector built entirely of undefs is undef.
25504 return DAG.getUNDEF(VT);
25505
25506 // If this is a splat of a bitcast from another vector, change to a
25507 // concat_vector.
25508 // For example:
25509 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
25510 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
25511 //
25512 // If X is a build_vector itself, the concat can become a larger build_vector.
25513 // TODO: Maybe this is useful for non-splat too?
25514 if (!LegalOperations) {
25515 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
25516 // Only change build_vector to a concat_vector if the splat value type is
25517 // same as the vector element type.
25518 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
25520 EVT SrcVT = Splat.getValueType();
25521 if (SrcVT.isVector()) {
25522 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
25523 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
25524 SrcVT.getVectorElementType(), NumElts);
25525 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
25526 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
25527 SDValue Concat =
25528 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
25529 return DAG.getBitcast(VT, Concat);
25530 }
25531 }
25532 }
25533 }
25534
25535 // Check if we can express BUILD VECTOR via subvector extract.
25536 if (!LegalTypes && (N->getNumOperands() > 1)) {
25537 SDValue Op0 = N->getOperand(0);
25538 auto checkElem = [&](SDValue Op) -> uint64_t {
25539 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
25540 (Op0.getOperand(0) == Op.getOperand(0)))
25541 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
25542 return CNode->getZExtValue();
25543 return -1;
25544 };
25545
25546 int Offset = checkElem(Op0);
25547 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
25548 if (Offset + i != checkElem(N->getOperand(i))) {
25549 Offset = -1;
25550 break;
25551 }
25552 }
25553
25554 if ((Offset == 0) &&
25555 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
25556 return Op0.getOperand(0);
25557 if ((Offset != -1) &&
25558 ((Offset % N->getValueType(0).getVectorNumElements()) ==
25559 0)) // IDX must be multiple of output size.
25560 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
25561 Op0.getOperand(0), Op0.getOperand(1));
25562 }
25563
25564 if (SDValue V = convertBuildVecZextToZext(N))
25565 return V;
25566
25567 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
25568 return V;
25569
25570 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
25571 return V;
25572
25573 if (SDValue V = reduceBuildVecTruncToBitCast(N))
25574 return V;
25575
25576 if (SDValue V = reduceBuildVecToShuffle(N))
25577 return V;
25578
25579 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
25580 // Do this late as some of the above may replace the splat.
25583 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
25584 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
25585 }
25586
25587 return SDValue();
25588}
25589
25591 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25592 EVT OpVT = N->getOperand(0).getValueType();
25593
25594 // If the operands are legal vectors, leave them alone.
25595 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
25596 return SDValue();
25597
25598 SDLoc DL(N);
25599 EVT VT = N->getValueType(0);
25601 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
25602
25603 // Keep track of what we encounter.
25604 EVT AnyFPVT;
25605
25606 for (const SDValue &Op : N->ops()) {
25607 if (ISD::BITCAST == Op.getOpcode() &&
25608 !Op.getOperand(0).getValueType().isVector())
25609 Ops.push_back(Op.getOperand(0));
25610 else if (Op.isUndef())
25611 Ops.push_back(DAG.getNode(Op.getOpcode(), DL, SVT));
25612 else
25613 return SDValue();
25614
25615 // Note whether we encounter an integer or floating point scalar.
25616 // If it's neither, bail out, it could be something weird like x86mmx.
25617 EVT LastOpVT = Ops.back().getValueType();
25618 if (LastOpVT.isFloatingPoint())
25619 AnyFPVT = LastOpVT;
25620 else if (!LastOpVT.isInteger())
25621 return SDValue();
25622 }
25623
25624 // If any of the operands is a floating point scalar bitcast to a vector,
25625 // use floating point types throughout, and bitcast everything.
25626 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
25627 if (AnyFPVT != EVT()) {
25628 SVT = AnyFPVT;
25629 for (SDValue &Op : Ops) {
25630 if (Op.getValueType() == SVT)
25631 continue;
25632 if (Op.isUndef())
25633 Op = DAG.getNode(Op.getOpcode(), DL, SVT);
25634 else
25635 Op = DAG.getBitcast(SVT, Op);
25636 }
25637 }
25638
25639 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
25640 VT.getSizeInBits() / SVT.getSizeInBits());
25641 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
25642}
25643
25644// Attempt to merge nested concat_vectors/undefs.
25645// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
25646// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
25648 SelectionDAG &DAG) {
25649 EVT VT = N->getValueType(0);
25650
25651 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
25652 EVT SubVT;
25653 SDValue FirstConcat;
25654 for (const SDValue &Op : N->ops()) {
25655 if (Op.isUndef())
25656 continue;
25657 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
25658 return SDValue();
25659 if (!FirstConcat) {
25660 SubVT = Op.getOperand(0).getValueType();
25661 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
25662 return SDValue();
25663 FirstConcat = Op;
25664 continue;
25665 }
25666 if (SubVT != Op.getOperand(0).getValueType())
25667 return SDValue();
25668 }
25669 assert(FirstConcat && "Concat of all-undefs found");
25670
25671 SmallVector<SDValue> ConcatOps;
25672 for (const SDValue &Op : N->ops()) {
25673 if (Op.isUndef()) {
25674 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
25675 continue;
25676 }
25677 ConcatOps.append(Op->op_begin(), Op->op_end());
25678 }
25679 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
25680}
25681
25682// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
25683// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
25684// most two distinct vectors the same size as the result, attempt to turn this
25685// into a legal shuffle.
25687 EVT VT = N->getValueType(0);
25688 EVT OpVT = N->getOperand(0).getValueType();
25689
25690 // We currently can't generate an appropriate shuffle for a scalable vector.
25691 if (VT.isScalableVector())
25692 return SDValue();
25693
25694 int NumElts = VT.getVectorNumElements();
25695 int NumOpElts = OpVT.getVectorNumElements();
25696
25697 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
25699
25700 for (SDValue Op : N->ops()) {
25702
25703 // UNDEF nodes convert to UNDEF shuffle mask values.
25704 if (Op.isUndef()) {
25705 Mask.append((unsigned)NumOpElts, -1);
25706 continue;
25707 }
25708
25709 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
25710 return SDValue();
25711
25712 // What vector are we extracting the subvector from and at what index?
25713 SDValue ExtVec = Op.getOperand(0);
25714 int ExtIdx = Op.getConstantOperandVal(1);
25715
25716 // We want the EVT of the original extraction to correctly scale the
25717 // extraction index.
25718 EVT ExtVT = ExtVec.getValueType();
25719 ExtVec = peekThroughBitcasts(ExtVec);
25720
25721 // UNDEF nodes convert to UNDEF shuffle mask values.
25722 if (ExtVec.isUndef()) {
25723 Mask.append((unsigned)NumOpElts, -1);
25724 continue;
25725 }
25726
25727 // Ensure that we are extracting a subvector from a vector the same
25728 // size as the result.
25729 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
25730 return SDValue();
25731
25732 // Scale the subvector index to account for any bitcast.
25733 int NumExtElts = ExtVT.getVectorNumElements();
25734 if (0 == (NumExtElts % NumElts))
25735 ExtIdx /= (NumExtElts / NumElts);
25736 else if (0 == (NumElts % NumExtElts))
25737 ExtIdx *= (NumElts / NumExtElts);
25738 else
25739 return SDValue();
25740
25741 // At most we can reference 2 inputs in the final shuffle.
25742 if (SV0.isUndef() || SV0 == ExtVec) {
25743 SV0 = ExtVec;
25744 for (int i = 0; i != NumOpElts; ++i)
25745 Mask.push_back(i + ExtIdx);
25746 } else if (SV1.isUndef() || SV1 == ExtVec) {
25747 SV1 = ExtVec;
25748 for (int i = 0; i != NumOpElts; ++i)
25749 Mask.push_back(i + ExtIdx + NumElts);
25750 } else {
25751 return SDValue();
25752 }
25753 }
25754
25755 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25756 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
25757 DAG.getBitcast(VT, SV1), Mask, DAG);
25758}
25759
25761 unsigned CastOpcode = N->getOperand(0).getOpcode();
25762 switch (CastOpcode) {
25763 case ISD::SINT_TO_FP:
25764 case ISD::UINT_TO_FP:
25765 case ISD::FP_TO_SINT:
25766 case ISD::FP_TO_UINT:
25767 // TODO: Allow more opcodes?
25768 // case ISD::BITCAST:
25769 // case ISD::TRUNCATE:
25770 // case ISD::ZERO_EXTEND:
25771 // case ISD::SIGN_EXTEND:
25772 // case ISD::FP_EXTEND:
25773 break;
25774 default:
25775 return SDValue();
25776 }
25777
25778 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
25779 if (!SrcVT.isVector())
25780 return SDValue();
25781
25782 // All operands of the concat must be the same kind of cast from the same
25783 // source type.
25785 for (SDValue Op : N->ops()) {
25786 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
25787 Op.getOperand(0).getValueType() != SrcVT)
25788 return SDValue();
25789 SrcOps.push_back(Op.getOperand(0));
25790 }
25791
25792 // The wider cast must be supported by the target. This is unusual because
25793 // the operation support type parameter depends on the opcode. In addition,
25794 // check the other type in the cast to make sure this is really legal.
25795 EVT VT = N->getValueType(0);
25796 EVT SrcEltVT = SrcVT.getVectorElementType();
25797 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
25798 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
25799 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25800 switch (CastOpcode) {
25801 case ISD::SINT_TO_FP:
25802 case ISD::UINT_TO_FP:
25803 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
25804 !TLI.isTypeLegal(VT))
25805 return SDValue();
25806 break;
25807 case ISD::FP_TO_SINT:
25808 case ISD::FP_TO_UINT:
25809 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
25810 !TLI.isTypeLegal(ConcatSrcVT))
25811 return SDValue();
25812 break;
25813 default:
25814 llvm_unreachable("Unexpected cast opcode");
25815 }
25816
25817 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
25818 SDLoc DL(N);
25819 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
25820 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
25821}
25822
25823// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
25824// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
25825// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
25827 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25828 bool LegalOperations) {
25829 EVT VT = N->getValueType(0);
25830 EVT OpVT = N->getOperand(0).getValueType();
25831 if (VT.isScalableVector())
25832 return SDValue();
25833
25834 // For now, only allow simple 2-operand concatenations.
25835 if (N->getNumOperands() != 2)
25836 return SDValue();
25837
25838 // Don't create illegal types/shuffles when not allowed to.
25839 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25840 (LegalOperations &&
25842 return SDValue();
25843
25844 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
25845 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
25846 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
25847 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
25848 // (4) and for now, the SHUFFLE_VECTOR must be unary.
25849 ShuffleVectorSDNode *SVN = nullptr;
25850 for (SDValue Op : N->ops()) {
25851 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
25852 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
25853 all_of(N->ops(), [CurSVN](SDValue Op) {
25854 // FIXME: can we allow UNDEF operands?
25855 return !Op.isUndef() &&
25856 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
25857 })) {
25858 SVN = CurSVN;
25859 break;
25860 }
25861 }
25862 if (!SVN)
25863 return SDValue();
25864
25865 // We are going to pad the shuffle operands, so any indice, that was picking
25866 // from the second operand, must be adjusted.
25867 SmallVector<int, 16> AdjustedMask(SVN->getMask());
25868 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
25869
25870 // Identity masks for the operands of the (padded) shuffle.
25871 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
25872 MutableArrayRef<int> FirstShufOpIdentityMask =
25873 MutableArrayRef<int>(IdentityMask)
25875 MutableArrayRef<int> SecondShufOpIdentityMask =
25877 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
25878 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
25880
25881 // New combined shuffle mask.
25883 Mask.reserve(VT.getVectorNumElements());
25884 for (SDValue Op : N->ops()) {
25885 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
25886 if (Op.getNode() == SVN) {
25887 append_range(Mask, AdjustedMask);
25888 continue;
25889 }
25890 if (Op == SVN->getOperand(0)) {
25891 append_range(Mask, FirstShufOpIdentityMask);
25892 continue;
25893 }
25894 if (Op == SVN->getOperand(1)) {
25895 append_range(Mask, SecondShufOpIdentityMask);
25896 continue;
25897 }
25898 llvm_unreachable("Unexpected operand!");
25899 }
25900
25901 // Don't create illegal shuffle masks.
25902 if (!TLI.isShuffleMaskLegal(Mask, VT))
25903 return SDValue();
25904
25905 // Pad the shuffle operands with UNDEF.
25906 SDLoc dl(N);
25907 std::array<SDValue, 2> ShufOps;
25908 for (auto I : zip(SVN->ops(), ShufOps)) {
25909 SDValue ShufOp = std::get<0>(I);
25910 SDValue &NewShufOp = std::get<1>(I);
25911 if (ShufOp.isUndef())
25912 NewShufOp = DAG.getUNDEF(VT);
25913 else {
25914 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
25915 DAG.getUNDEF(OpVT));
25916 ShufOpParts[0] = ShufOp;
25917 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
25918 }
25919 }
25920 // Finally, create the new wide shuffle.
25921 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
25922}
25923
25925 const TargetLowering &TLI,
25926 bool LegalTypes,
25927 bool LegalOperations) {
25928 EVT VT = N->getValueType(0);
25929
25930 // Post-legalization we can only create wider SPLAT_VECTOR operations if both
25931 // the type and operation is legal. The Hexagon target has custom
25932 // legalization for SPLAT_VECTOR that splits the operation into two parts and
25933 // concatenates them. Therefore, custom lowering must also be rejected in
25934 // order to avoid an infinite loop.
25935 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
25936 (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT)))
25937 return SDValue();
25938
25939 SDValue Op0 = N->getOperand(0);
25940 if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR)
25941 return SDValue();
25942
25943 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0));
25944}
25945
25946SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
25947 // If we only have one input vector, we don't need to do any concatenation.
25948 if (N->getNumOperands() == 1)
25949 return N->getOperand(0);
25950
25951 // Check if all of the operands are undefs.
25952 EVT VT = N->getValueType(0);
25954 return DAG.getUNDEF(VT);
25955
25956 // Optimize concat_vectors where all but the first of the vectors are undef.
25957 if (all_of(drop_begin(N->ops()),
25958 [](const SDValue &Op) { return Op.isUndef(); })) {
25959 SDValue In = N->getOperand(0);
25960 assert(In.getValueType().isVector() && "Must concat vectors");
25961
25962 // If the input is a concat_vectors, just make a larger concat by padding
25963 // with smaller undefs.
25964 //
25965 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
25966 // here could cause an infinite loop. That legalizing happens when LegalDAG
25967 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
25968 // scalable.
25969 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
25970 !(LegalDAG && In.getValueType().isScalableVector())) {
25971 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
25973 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
25974 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25975 }
25976
25978
25979 // concat_vectors(scalar_to_vector(scalar), undef) ->
25980 // scalar_to_vector(scalar)
25981 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
25982 Scalar.hasOneUse()) {
25983 EVT SVT = Scalar.getValueType().getVectorElementType();
25984 if (SVT == Scalar.getOperand(0).getValueType())
25985 Scalar = Scalar.getOperand(0);
25986 }
25987
25988 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
25989 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
25990 // If the bitcast type isn't legal, it might be a trunc of a legal type;
25991 // look through the trunc so we can still do the transform:
25992 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
25993 if (Scalar->getOpcode() == ISD::TRUNCATE &&
25994 !TLI.isTypeLegal(Scalar.getValueType()) &&
25995 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
25996 Scalar = Scalar->getOperand(0);
25997
25998 EVT SclTy = Scalar.getValueType();
25999
26000 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
26001 return SDValue();
26002
26003 // Bail out if the vector size is not a multiple of the scalar size.
26004 if (VT.getSizeInBits() % SclTy.getSizeInBits())
26005 return SDValue();
26006
26007 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
26008 if (VNTNumElms < 2)
26009 return SDValue();
26010
26011 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
26012 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
26013 return SDValue();
26014
26015 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
26016 return DAG.getBitcast(VT, Res);
26017 }
26018 }
26019
26020 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
26021 // We have already tested above for an UNDEF only concatenation.
26022 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
26023 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
26024 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
26025 return Op.isUndef() || ISD::BUILD_VECTOR == Op.getOpcode();
26026 };
26027 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
26029 EVT SVT = VT.getScalarType();
26030
26031 EVT MinVT = SVT;
26032 if (!SVT.isFloatingPoint()) {
26033 // If BUILD_VECTOR are from built from integer, they may have different
26034 // operand types. Get the smallest type and truncate all operands to it.
26035 bool FoundMinVT = false;
26036 for (const SDValue &Op : N->ops())
26037 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
26038 EVT OpSVT = Op.getOperand(0).getValueType();
26039 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
26040 FoundMinVT = true;
26041 }
26042 assert(FoundMinVT && "Concat vector type mismatch");
26043 }
26044
26045 for (const SDValue &Op : N->ops()) {
26046 EVT OpVT = Op.getValueType();
26047 unsigned NumElts = OpVT.getVectorNumElements();
26048
26049 if (Op.isUndef())
26050 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
26051
26052 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
26053 if (SVT.isFloatingPoint()) {
26054 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
26055 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
26056 } else {
26057 for (unsigned i = 0; i != NumElts; ++i)
26058 Opnds.push_back(
26059 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
26060 }
26061 }
26062 }
26063
26064 assert(VT.getVectorNumElements() == Opnds.size() &&
26065 "Concat vector type mismatch");
26066 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
26067 }
26068
26069 if (SDValue V =
26070 combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations))
26071 return V;
26072
26073 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
26074 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
26076 return V;
26077
26078 if (Level <= AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
26079 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
26081 return V;
26082
26083 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
26085 return V;
26086 }
26087
26088 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
26089 return V;
26090
26092 N, DAG, TLI, LegalTypes, LegalOperations))
26093 return V;
26094
26095 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
26096 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
26097 // operands and look for a CONCAT operations that place the incoming vectors
26098 // at the exact same location.
26099 //
26100 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
26101 SDValue SingleSource = SDValue();
26102 unsigned PartNumElem =
26103 N->getOperand(0).getValueType().getVectorMinNumElements();
26104
26105 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
26106 SDValue Op = N->getOperand(i);
26107
26108 if (Op.isUndef())
26109 continue;
26110
26111 // Check if this is the identity extract:
26112 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
26113 return SDValue();
26114
26115 // Find the single incoming vector for the extract_subvector.
26116 if (SingleSource.getNode()) {
26117 if (Op.getOperand(0) != SingleSource)
26118 return SDValue();
26119 } else {
26120 SingleSource = Op.getOperand(0);
26121
26122 // Check the source type is the same as the type of the result.
26123 // If not, this concat may extend the vector, so we can not
26124 // optimize it away.
26125 if (SingleSource.getValueType() != N->getValueType(0))
26126 return SDValue();
26127 }
26128
26129 // Check that we are reading from the identity index.
26130 unsigned IdentityIndex = i * PartNumElem;
26131 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
26132 return SDValue();
26133 }
26134
26135 if (SingleSource.getNode())
26136 return SingleSource;
26137
26138 return SDValue();
26139}
26140
26141SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) {
26142 // Check to see if all operands are identical.
26143 if (!llvm::all_equal(N->op_values()))
26144 return SDValue();
26145
26146 // Check to see if the identical operand is a splat.
26147 if (!DAG.isSplatValue(N->getOperand(0)))
26148 return SDValue();
26149
26150 // interleave splat(X), splat(X).... --> splat(X), splat(X)....
26152 Ops.append(N->op_values().begin(), N->op_values().end());
26153 return CombineTo(N, &Ops);
26154}
26155
26156// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
26157// if the subvector can be sourced for free.
26158static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) {
26159 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
26160 V.getOperand(1).getValueType() == SubVT &&
26161 V.getConstantOperandAPInt(2) == Index) {
26162 return V.getOperand(1);
26163 }
26164 if (V.getOpcode() == ISD::CONCAT_VECTORS &&
26165 V.getOperand(0).getValueType() == SubVT &&
26166 (Index % SubVT.getVectorMinNumElements()) == 0) {
26167 uint64_t SubIdx = Index / SubVT.getVectorMinNumElements();
26168 return V.getOperand(SubIdx);
26169 }
26170 return SDValue();
26171}
26172
26174 unsigned Index, const SDLoc &DL,
26175 SelectionDAG &DAG,
26176 bool LegalOperations) {
26177 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26178 unsigned BinOpcode = BinOp.getOpcode();
26179 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
26180 return SDValue();
26181
26182 EVT VecVT = BinOp.getValueType();
26183 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
26184 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
26185 return SDValue();
26186 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
26187 return SDValue();
26188
26189 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
26190 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
26191
26192 // TODO: We could handle the case where only 1 operand is being inserted by
26193 // creating an extract of the other operand, but that requires checking
26194 // number of uses and/or costs.
26195 if (!Sub0 || !Sub1)
26196 return SDValue();
26197
26198 // We are inserting both operands of the wide binop only to extract back
26199 // to the narrow vector size. Eliminate all of the insert/extract:
26200 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
26201 return DAG.getNode(BinOpcode, DL, SubVT, Sub0, Sub1, BinOp->getFlags());
26202}
26203
26204/// If we are extracting a subvector produced by a wide binary operator try
26205/// to use a narrow binary operator and/or avoid concatenation and extraction.
26206static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
26207 const SDLoc &DL, SelectionDAG &DAG,
26208 bool LegalOperations) {
26209 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
26210 // some of these bailouts with other transforms.
26211
26212 if (SDValue V = narrowInsertExtractVectorBinOp(VT, Src, Index, DL, DAG,
26213 LegalOperations))
26214 return V;
26215
26216 // We are looking for an optionally bitcasted wide vector binary operator
26217 // feeding an extract subvector.
26218 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26219 SDValue BinOp = peekThroughBitcasts(Src);
26220 unsigned BOpcode = BinOp.getOpcode();
26221 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
26222 return SDValue();
26223
26224 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
26225 // reduced to the unary fneg when it is visited, and we probably want to deal
26226 // with fneg in a target-specific way.
26227 if (BOpcode == ISD::FSUB) {
26228 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
26229 if (C && C->getValueAPF().isNegZero())
26230 return SDValue();
26231 }
26232
26233 // The binop must be a vector type, so we can extract some fraction of it.
26234 EVT WideBVT = BinOp.getValueType();
26235 // The optimisations below currently assume we are dealing with fixed length
26236 // vectors. It is possible to add support for scalable vectors, but at the
26237 // moment we've done no analysis to prove whether they are profitable or not.
26238 if (!WideBVT.isFixedLengthVector())
26239 return SDValue();
26240
26241 assert((Index % VT.getVectorNumElements()) == 0 &&
26242 "Extract index is not a multiple of the vector length.");
26243
26244 // Bail out if this is not a proper multiple width extraction.
26245 unsigned WideWidth = WideBVT.getSizeInBits();
26246 unsigned NarrowWidth = VT.getSizeInBits();
26247 if (WideWidth % NarrowWidth != 0)
26248 return SDValue();
26249
26250 // Bail out if we are extracting a fraction of a single operation. This can
26251 // occur because we potentially looked through a bitcast of the binop.
26252 unsigned NarrowingRatio = WideWidth / NarrowWidth;
26253 unsigned WideNumElts = WideBVT.getVectorNumElements();
26254 if (WideNumElts % NarrowingRatio != 0)
26255 return SDValue();
26256
26257 // Bail out if the target does not support a narrower version of the binop.
26258 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
26259 WideNumElts / NarrowingRatio);
26260 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
26261 LegalOperations))
26262 return SDValue();
26263
26264 // If extraction is cheap, we don't need to look at the binop operands
26265 // for concat ops. The narrow binop alone makes this transform profitable.
26266 // We can't just reuse the original extract index operand because we may have
26267 // bitcasted.
26268 unsigned ConcatOpNum = Index / VT.getVectorNumElements();
26269 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
26270 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
26271 BinOp.hasOneUse() && Src->hasOneUse()) {
26272 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
26273 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
26274 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
26275 BinOp.getOperand(0), NewExtIndex);
26276 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
26277 BinOp.getOperand(1), NewExtIndex);
26278 SDValue NarrowBinOp =
26279 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
26280 return DAG.getBitcast(VT, NarrowBinOp);
26281 }
26282
26283 // Only handle the case where we are doubling and then halving. A larger ratio
26284 // may require more than two narrow binops to replace the wide binop.
26285 if (NarrowingRatio != 2)
26286 return SDValue();
26287
26288 // TODO: The motivating case for this transform is an x86 AVX1 target. That
26289 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
26290 // flavors, but no other 256-bit integer support. This could be extended to
26291 // handle any binop, but that may require fixing/adding other folds to avoid
26292 // codegen regressions.
26293 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
26294 return SDValue();
26295
26296 // We need at least one concatenation operation of a binop operand to make
26297 // this transform worthwhile. The concat must double the input vector sizes.
26298 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
26299 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
26300 return V.getOperand(ConcatOpNum);
26301 return SDValue();
26302 };
26303 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
26304 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
26305
26306 if (SubVecL || SubVecR) {
26307 // If a binop operand was not the result of a concat, we must extract a
26308 // half-sized operand for our new narrow binop:
26309 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
26310 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
26311 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
26312 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
26313 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
26314 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
26315 BinOp.getOperand(0), IndexC);
26316
26317 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
26318 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
26319 BinOp.getOperand(1), IndexC);
26320
26321 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
26322 return DAG.getBitcast(VT, NarrowBinOp);
26323 }
26324
26325 return SDValue();
26326}
26327
26328/// If we are extracting a subvector from a wide vector load, convert to a
26329/// narrow load to eliminate the extraction:
26330/// (extract_subvector (load wide vector)) --> (load narrow vector)
26331static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index,
26332 const SDLoc &DL, SelectionDAG &DAG) {
26333 // TODO: Add support for big-endian. The offset calculation must be adjusted.
26334 if (DAG.getDataLayout().isBigEndian())
26335 return SDValue();
26336
26337 auto *Ld = dyn_cast<LoadSDNode>(Src);
26338 if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple())
26339 return SDValue();
26340
26341 // We can only create byte sized loads.
26342 if (!VT.isByteSized())
26343 return SDValue();
26344
26345 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26347 return SDValue();
26348
26349 unsigned NumElts = VT.getVectorMinNumElements();
26350 // A fixed length vector being extracted from a scalable vector
26351 // may not be any *smaller* than the scalable one.
26352 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
26353 return SDValue();
26354
26355 // The definition of EXTRACT_SUBVECTOR states that the index must be a
26356 // multiple of the minimum number of elements in the result type.
26357 assert(Index % NumElts == 0 && "The extract subvector index is not a "
26358 "multiple of the result's element count");
26359
26360 // It's fine to use TypeSize here as we know the offset will not be negative.
26361 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
26362 std::optional<unsigned> ByteOffset;
26363 if (Offset.isFixed())
26364 ByteOffset = Offset.getFixedValue();
26365
26366 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT, ByteOffset))
26367 return SDValue();
26368
26369 // The narrow load will be offset from the base address of the old load if
26370 // we are extracting from something besides index 0 (little-endian).
26371 // TODO: Use "BaseIndexOffset" to make this more effective.
26372 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
26373
26375 MachineMemOperand *MMO;
26376 if (Offset.isScalable()) {
26377 MachinePointerInfo MPI =
26379 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, VT.getStoreSize());
26380 } else
26381 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
26382 VT.getStoreSize());
26383
26384 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
26385 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
26386 return NewLd;
26387}
26388
26389/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
26390/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
26391/// EXTRACT_SUBVECTOR(Op?, ?),
26392/// Mask'))
26393/// iff it is legal and profitable to do so. Notably, the trimmed mask
26394/// (containing only the elements that are extracted)
26395/// must reference at most two subvectors.
26397 unsigned Index,
26398 const SDLoc &DL,
26399 SelectionDAG &DAG,
26400 bool LegalOperations) {
26401 // Only deal with non-scalable vectors.
26402 EVT WideVT = Src.getValueType();
26403 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
26404 return SDValue();
26405
26406 // The operand must be a shufflevector.
26407 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(Src);
26408 if (!WideShuffleVector)
26409 return SDValue();
26410
26411 // The old shuffleneeds to go away.
26412 if (!WideShuffleVector->hasOneUse())
26413 return SDValue();
26414
26415 // And the narrow shufflevector that we'll form must be legal.
26416 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26417 if (LegalOperations &&
26419 return SDValue();
26420
26421 int NumEltsExtracted = NarrowVT.getVectorNumElements();
26422 assert((Index % NumEltsExtracted) == 0 &&
26423 "Extract index is not a multiple of the output vector length.");
26424
26425 int WideNumElts = WideVT.getVectorNumElements();
26426
26427 SmallVector<int, 16> NewMask;
26428 NewMask.reserve(NumEltsExtracted);
26429 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
26430 DemandedSubvectors;
26431
26432 // Try to decode the wide mask into narrow mask from at most two subvectors.
26433 for (int M : WideShuffleVector->getMask().slice(Index, NumEltsExtracted)) {
26434 assert((M >= -1) && (M < (2 * WideNumElts)) &&
26435 "Out-of-bounds shuffle mask?");
26436
26437 if (M < 0) {
26438 // Does not depend on operands, does not require adjustment.
26439 NewMask.emplace_back(M);
26440 continue;
26441 }
26442
26443 // From which operand of the shuffle does this shuffle mask element pick?
26444 int WideShufOpIdx = M / WideNumElts;
26445 // Which element of that operand is picked?
26446 int OpEltIdx = M % WideNumElts;
26447
26448 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
26449 "Shuffle mask vector decomposition failure.");
26450
26451 // And which NumEltsExtracted-sized subvector of that operand is that?
26452 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
26453 // And which element within that subvector of that operand is that?
26454 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
26455
26456 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
26457 "Shuffle mask subvector decomposition failure.");
26458
26459 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
26460 WideShufOpIdx * WideNumElts) == M &&
26461 "Shuffle mask full decomposition failure.");
26462
26463 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
26464
26465 if (Op.isUndef()) {
26466 // Picking from an undef operand. Let's adjust mask instead.
26467 NewMask.emplace_back(-1);
26468 continue;
26469 }
26470
26471 const std::pair<SDValue, int> DemandedSubvector =
26472 std::make_pair(Op, OpSubvecIdx);
26473
26474 if (DemandedSubvectors.insert(DemandedSubvector)) {
26475 if (DemandedSubvectors.size() > 2)
26476 return SDValue(); // We can't handle more than two subvectors.
26477 // How many elements into the WideVT does this subvector start?
26478 int Index = NumEltsExtracted * OpSubvecIdx;
26479 // Bail out if the extraction isn't going to be cheap.
26480 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
26481 return SDValue();
26482 }
26483
26484 // Ok, but from which operand of the new shuffle will this element pick?
26485 int NewOpIdx =
26486 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
26487 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
26488
26489 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
26490 NewMask.emplace_back(AdjM);
26491 }
26492 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
26493 assert(DemandedSubvectors.size() <= 2 &&
26494 "Should have ended up demanding at most two subvectors.");
26495
26496 // Did we discover that the shuffle does not actually depend on operands?
26497 if (DemandedSubvectors.empty())
26498 return DAG.getUNDEF(NarrowVT);
26499
26500 // Profitability check: only deal with extractions from the first subvector
26501 // unless the mask becomes an identity mask.
26502 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
26503 any_of(NewMask, [](int M) { return M < 0; }))
26504 for (auto &DemandedSubvector : DemandedSubvectors)
26505 if (DemandedSubvector.second != 0)
26506 return SDValue();
26507
26508 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
26509 // operand[s]/index[es], so there is no point in checking for it's legality.
26510
26511 // Do not turn a legal shuffle into an illegal one.
26512 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
26513 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
26514 return SDValue();
26515
26517 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
26518 &DemandedSubvector : DemandedSubvectors) {
26519 // How many elements into the WideVT does this subvector start?
26520 int Index = NumEltsExtracted * DemandedSubvector.second;
26521 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
26522 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
26523 DemandedSubvector.first, IndexC));
26524 }
26525 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
26526 "Should end up with either one or two ops");
26527
26528 // If we ended up with only one operand, pad with an undef.
26529 if (NewOps.size() == 1)
26530 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
26531
26532 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
26533}
26534
26535SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
26536 EVT NVT = N->getValueType(0);
26537 SDValue V = N->getOperand(0);
26538 uint64_t ExtIdx = N->getConstantOperandVal(1);
26539 SDLoc DL(N);
26540
26541 // Extract from UNDEF is UNDEF.
26542 if (V.isUndef())
26543 return DAG.getUNDEF(NVT);
26544
26545 if (SDValue NarrowLoad = narrowExtractedVectorLoad(NVT, V, ExtIdx, DL, DAG))
26546 return NarrowLoad;
26547
26548 // Combine an extract of an extract into a single extract_subvector.
26549 // ext (ext X, C), 0 --> ext X, C
26550 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
26551 // The index has to be a multiple of the new result type's known minimum
26552 // vector length.
26553 if (V.getConstantOperandVal(1) % NVT.getVectorMinNumElements() == 0 &&
26554 TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
26555 V.getConstantOperandVal(1)) &&
26557 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
26558 V.getOperand(1));
26559 }
26560 }
26561
26562 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
26563 if (V.getOpcode() == ISD::SPLAT_VECTOR)
26564 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
26565 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
26566 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
26567
26568 // extract_subvector(insert_subvector(x,y,c1),c2)
26569 // --> extract_subvector(y,c2-c1)
26570 // iff we're just extracting from the inserted subvector.
26571 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26572 SDValue InsSub = V.getOperand(1);
26573 EVT InsSubVT = InsSub.getValueType();
26574 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
26575 unsigned InsIdx = V.getConstantOperandVal(2);
26576 unsigned NumSubElts = NVT.getVectorMinNumElements();
26577 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
26578 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
26579 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
26580 V.getValueType().isFixedLengthVector())
26581 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
26582 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
26583 }
26584
26585 // Try to move vector bitcast after extract_subv by scaling extraction index:
26586 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
26587 if (V.getOpcode() == ISD::BITCAST &&
26588 V.getOperand(0).getValueType().isVector() &&
26589 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
26590 SDValue SrcOp = V.getOperand(0);
26591 EVT SrcVT = SrcOp.getValueType();
26592 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
26593 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
26594 if ((SrcNumElts % DestNumElts) == 0) {
26595 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
26596 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
26597 EVT NewExtVT =
26598 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
26600 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
26601 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26602 V.getOperand(0), NewIndex);
26603 return DAG.getBitcast(NVT, NewExtract);
26604 }
26605 }
26606 if ((DestNumElts % SrcNumElts) == 0) {
26607 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
26608 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
26609 ElementCount NewExtEC =
26610 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
26611 EVT ScalarVT = SrcVT.getScalarType();
26612 if ((ExtIdx % DestSrcRatio) == 0) {
26613 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
26614 EVT NewExtVT =
26615 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
26617 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26618 SDValue NewExtract =
26619 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
26620 V.getOperand(0), NewIndex);
26621 return DAG.getBitcast(NVT, NewExtract);
26622 }
26623 if (NewExtEC.isScalar() &&
26625 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
26626 SDValue NewExtract =
26627 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
26628 V.getOperand(0), NewIndex);
26629 return DAG.getBitcast(NVT, NewExtract);
26630 }
26631 }
26632 }
26633 }
26634 }
26635
26636 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
26637 unsigned ExtNumElts = NVT.getVectorMinNumElements();
26638 EVT ConcatSrcVT = V.getOperand(0).getValueType();
26639 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
26640 "Concat and extract subvector do not change element type");
26641
26642 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
26643 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
26644
26645 // If the concatenated source types match this extract, it's a direct
26646 // simplification:
26647 // extract_subvec (concat V1, V2, ...), i --> Vi
26648 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
26649 return V.getOperand(ConcatOpIdx);
26650
26651 // If the concatenated source vectors are a multiple length of this extract,
26652 // then extract a fraction of one of those source vectors directly from a
26653 // concat operand. Example:
26654 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
26655 // v2i8 extract_subvec v8i8 Y, 6
26656 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
26657 ConcatSrcNumElts % ExtNumElts == 0) {
26658 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
26659 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
26660 "Trying to extract from >1 concat operand?");
26661 assert(NewExtIdx % ExtNumElts == 0 &&
26662 "Extract index is not a multiple of the input vector length.");
26663 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
26664 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
26665 V.getOperand(ConcatOpIdx), NewIndexC);
26666 }
26667 }
26668
26670 NVT, V, ExtIdx, DL, DAG, LegalOperations))
26671 return Shuffle;
26672
26673 if (SDValue NarrowBOp =
26674 narrowExtractedVectorBinOp(NVT, V, ExtIdx, DL, DAG, LegalOperations))
26675 return NarrowBOp;
26676
26678
26679 // If the input is a build vector. Try to make a smaller build vector.
26680 if (V.getOpcode() == ISD::BUILD_VECTOR) {
26681 EVT InVT = V.getValueType();
26682 unsigned ExtractSize = NVT.getSizeInBits();
26683 unsigned EltSize = InVT.getScalarSizeInBits();
26684 // Only do this if we won't split any elements.
26685 if (ExtractSize % EltSize == 0) {
26686 unsigned NumElems = ExtractSize / EltSize;
26687 EVT EltVT = InVT.getVectorElementType();
26688 EVT ExtractVT =
26689 NumElems == 1 ? EltVT
26690 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
26691 if ((Level < AfterLegalizeDAG ||
26692 (NumElems == 1 ||
26693 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
26694 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
26695 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
26696
26697 if (NumElems == 1) {
26698 SDValue Src = V->getOperand(IdxVal);
26699 if (EltVT != Src.getValueType())
26700 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
26701 return DAG.getBitcast(NVT, Src);
26702 }
26703
26704 // Extract the pieces from the original build_vector.
26705 SDValue BuildVec =
26706 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
26707 return DAG.getBitcast(NVT, BuildVec);
26708 }
26709 }
26710 }
26711
26712 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
26713 // Handle only simple case where vector being inserted and vector
26714 // being extracted are of same size.
26715 EVT SmallVT = V.getOperand(1).getValueType();
26716 if (NVT.bitsEq(SmallVT)) {
26717 // Combine:
26718 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
26719 // Into:
26720 // indices are equal or bit offsets are equal => V1
26721 // otherwise => (extract_subvec V1, ExtIdx)
26722 uint64_t InsIdx = V.getConstantOperandVal(2);
26723 if (InsIdx * SmallVT.getScalarSizeInBits() ==
26724 ExtIdx * NVT.getScalarSizeInBits()) {
26725 if (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))
26726 return DAG.getBitcast(NVT, V.getOperand(1));
26727 } else {
26728 return DAG.getNode(
26730 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
26731 N->getOperand(1));
26732 }
26733 }
26734 }
26735
26736 // If only EXTRACT_SUBVECTOR nodes use the source vector we can
26737 // simplify it based on the (valid) extractions.
26738 if (!V.getValueType().isScalableVector() &&
26739 llvm::all_of(V->users(), [&](SDNode *Use) {
26740 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26741 Use->getOperand(0) == V;
26742 })) {
26743 unsigned NumElts = V.getValueType().getVectorNumElements();
26744 APInt DemandedElts = APInt::getZero(NumElts);
26745 for (SDNode *User : V->users()) {
26746 unsigned ExtIdx = User->getConstantOperandVal(1);
26747 unsigned NumSubElts = User->getValueType(0).getVectorNumElements();
26748 DemandedElts.setBits(ExtIdx, ExtIdx + NumSubElts);
26749 }
26750 if (SimplifyDemandedVectorElts(V, DemandedElts, /*AssumeSingleUse=*/true)) {
26751 // We simplified the vector operand of this extract subvector. If this
26752 // extract is not dead, visit it again so it is folded properly.
26753 if (N->getOpcode() != ISD::DELETED_NODE)
26754 AddToWorklist(N);
26755 return SDValue(N, 0);
26756 }
26757 } else {
26759 return SDValue(N, 0);
26760 }
26761
26762 return SDValue();
26763}
26764
26765/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
26766/// followed by concatenation. Narrow vector ops may have better performance
26767/// than wide ops, and this can unlock further narrowing of other vector ops.
26768/// Targets can invert this transform later if it is not profitable.
26770 SelectionDAG &DAG) {
26771 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
26772 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
26773 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
26774 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
26775 return SDValue();
26776
26777 // Split the wide shuffle mask into halves. Any mask element that is accessing
26778 // operand 1 is offset down to account for narrowing of the vectors.
26779 ArrayRef<int> Mask = Shuf->getMask();
26780 EVT VT = Shuf->getValueType(0);
26781 unsigned NumElts = VT.getVectorNumElements();
26782 unsigned HalfNumElts = NumElts / 2;
26783 SmallVector<int, 16> Mask0(HalfNumElts, -1);
26784 SmallVector<int, 16> Mask1(HalfNumElts, -1);
26785 for (unsigned i = 0; i != NumElts; ++i) {
26786 if (Mask[i] == -1)
26787 continue;
26788 // If we reference the upper (undef) subvector then the element is undef.
26789 if ((Mask[i] % NumElts) >= HalfNumElts)
26790 continue;
26791 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
26792 if (i < HalfNumElts)
26793 Mask0[i] = M;
26794 else
26795 Mask1[i - HalfNumElts] = M;
26796 }
26797
26798 // Ask the target if this is a valid transform.
26799 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26800 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
26801 HalfNumElts);
26802 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
26803 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
26804 return SDValue();
26805
26806 // shuffle (concat X, undef), (concat Y, undef), Mask -->
26807 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
26808 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
26809 SDLoc DL(Shuf);
26810 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
26811 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
26812 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
26813}
26814
26815// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
26816// or turn a shuffle of a single concat into simpler shuffle then concat.
26818 EVT VT = N->getValueType(0);
26819 unsigned NumElts = VT.getVectorNumElements();
26820
26821 SDValue N0 = N->getOperand(0);
26822 SDValue N1 = N->getOperand(1);
26824 ArrayRef<int> Mask = SVN->getMask();
26825
26827 EVT ConcatVT = N0.getOperand(0).getValueType();
26828 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
26829 unsigned NumConcats = NumElts / NumElemsPerConcat;
26830
26831 auto IsUndefMaskElt = [](int i) { return i == -1; };
26832
26833 // Special case: shuffle(concat(A,B)) can be more efficiently represented
26834 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
26835 // half vector elements.
26836 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
26837 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
26838 IsUndefMaskElt)) {
26839 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
26840 N0.getOperand(1),
26841 Mask.slice(0, NumElemsPerConcat));
26842 N1 = DAG.getUNDEF(ConcatVT);
26843 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
26844 }
26845
26846 // Look at every vector that's inserted. We're looking for exact
26847 // subvector-sized copies from a concatenated vector
26848 for (unsigned I = 0; I != NumConcats; ++I) {
26849 unsigned Begin = I * NumElemsPerConcat;
26850 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
26851
26852 // Make sure we're dealing with a copy.
26853 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
26854 Ops.push_back(DAG.getUNDEF(ConcatVT));
26855 continue;
26856 }
26857
26858 int OpIdx = -1;
26859 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
26860 if (IsUndefMaskElt(SubMask[i]))
26861 continue;
26862 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
26863 return SDValue();
26864 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
26865 if (0 <= OpIdx && EltOpIdx != OpIdx)
26866 return SDValue();
26867 OpIdx = EltOpIdx;
26868 }
26869 assert(0 <= OpIdx && "Unknown concat_vectors op");
26870
26871 if (OpIdx < (int)N0.getNumOperands())
26872 Ops.push_back(N0.getOperand(OpIdx));
26873 else
26874 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
26875 }
26876
26877 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26878}
26879
26880// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26881// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26882//
26883// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
26884// a simplification in some sense, but it isn't appropriate in general: some
26885// BUILD_VECTORs are substantially cheaper than others. The general case
26886// of a BUILD_VECTOR requires inserting each element individually (or
26887// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
26888// all constants is a single constant pool load. A BUILD_VECTOR where each
26889// element is identical is a splat. A BUILD_VECTOR where most of the operands
26890// are undef lowers to a small number of element insertions.
26891//
26892// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
26893// We don't fold shuffles where one side is a non-zero constant, and we don't
26894// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
26895// non-constant operands. This seems to work out reasonably well in practice.
26897 SelectionDAG &DAG,
26898 const TargetLowering &TLI) {
26899 EVT VT = SVN->getValueType(0);
26900 unsigned NumElts = VT.getVectorNumElements();
26901 SDValue N0 = SVN->getOperand(0);
26902 SDValue N1 = SVN->getOperand(1);
26903
26904 if (!N0->hasOneUse())
26905 return SDValue();
26906
26907 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
26908 // discussed above.
26909 if (!N1.isUndef()) {
26910 if (!N1->hasOneUse())
26911 return SDValue();
26912
26913 bool N0AnyConst = isAnyConstantBuildVector(N0);
26914 bool N1AnyConst = isAnyConstantBuildVector(N1);
26915 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
26916 return SDValue();
26917 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
26918 return SDValue();
26919 }
26920
26921 // If both inputs are splats of the same value then we can safely merge this
26922 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
26923 bool IsSplat = false;
26924 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
26925 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
26926 if (BV0 && BV1)
26927 if (SDValue Splat0 = BV0->getSplatValue())
26928 IsSplat = (Splat0 == BV1->getSplatValue());
26929
26931 SmallSet<SDValue, 16> DuplicateOps;
26932 for (int M : SVN->getMask()) {
26933 SDValue Op = DAG.getUNDEF(VT.getScalarType());
26934 if (M >= 0) {
26935 int Idx = M < (int)NumElts ? M : M - NumElts;
26936 SDValue &S = (M < (int)NumElts ? N0 : N1);
26937 if (S.getOpcode() == ISD::BUILD_VECTOR) {
26938 Op = S.getOperand(Idx);
26939 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
26940 SDValue Op0 = S.getOperand(0);
26941 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
26942 } else {
26943 // Operand can't be combined - bail out.
26944 return SDValue();
26945 }
26946 }
26947
26948 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
26949 // generating a splat; semantically, this is fine, but it's likely to
26950 // generate low-quality code if the target can't reconstruct an appropriate
26951 // shuffle.
26952 if (!Op.isUndef() && !isIntOrFPConstant(Op))
26953 if (!IsSplat && !DuplicateOps.insert(Op).second)
26954 return SDValue();
26955
26956 Ops.push_back(Op);
26957 }
26958
26959 // BUILD_VECTOR requires all inputs to be of the same type, find the
26960 // maximum type and extend them all.
26961 EVT SVT = VT.getScalarType();
26962 if (SVT.isInteger())
26963 for (SDValue &Op : Ops)
26964 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
26965 if (SVT != VT.getScalarType())
26966 for (SDValue &Op : Ops)
26967 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
26968 : (TLI.isZExtFree(Op.getValueType(), SVT)
26969 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
26970 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
26971 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
26972}
26973
26974// Match shuffles that can be converted to *_vector_extend_in_reg.
26975// This is often generated during legalization.
26976// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
26977// and returns the EVT to which the extension should be performed.
26978// NOTE: this assumes that the src is the first operand of the shuffle.
26980 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
26981 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
26982 bool LegalOperations) {
26983 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
26984
26985 // TODO Add support for big-endian when we have a test case.
26986 if (!VT.isInteger() || IsBigEndian)
26987 return std::nullopt;
26988
26989 unsigned NumElts = VT.getVectorNumElements();
26990 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26991
26992 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
26993 // power-of-2 extensions as they are the most likely.
26994 // FIXME: should try Scale == NumElts case too,
26995 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
26996 // The vector width must be a multiple of Scale.
26997 if (NumElts % Scale != 0)
26998 continue;
26999
27000 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
27001 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
27002
27003 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
27004 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
27005 continue;
27006
27007 if (Match(Scale))
27008 return OutVT;
27009 }
27010
27011 return std::nullopt;
27012}
27013
27014// Match shuffles that can be converted to any_vector_extend_in_reg.
27015// This is often generated during legalization.
27016// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
27018 SelectionDAG &DAG,
27019 const TargetLowering &TLI,
27020 bool LegalOperations) {
27021 EVT VT = SVN->getValueType(0);
27022 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
27023
27024 // TODO Add support for big-endian when we have a test case.
27025 if (!VT.isInteger() || IsBigEndian)
27026 return SDValue();
27027
27028 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
27029 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
27030 Mask = SVN->getMask()](unsigned Scale) {
27031 for (unsigned i = 0; i != NumElts; ++i) {
27032 if (Mask[i] < 0)
27033 continue;
27034 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
27035 continue;
27036 return false;
27037 }
27038 return true;
27039 };
27040
27041 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
27042 SDValue N0 = SVN->getOperand(0);
27043 // Never create an illegal type. Only create unsupported operations if we
27044 // are pre-legalization.
27045 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
27046 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
27047 if (!OutVT)
27048 return SDValue();
27049 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
27050}
27051
27052// Match shuffles that can be converted to zero_extend_vector_inreg.
27053// This is often generated during legalization.
27054// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
27056 SelectionDAG &DAG,
27057 const TargetLowering &TLI,
27058 bool LegalOperations) {
27059 bool LegalTypes = true;
27060 EVT VT = SVN->getValueType(0);
27061 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
27062 unsigned NumElts = VT.getVectorNumElements();
27063 unsigned EltSizeInBits = VT.getScalarSizeInBits();
27064
27065 // TODO: add support for big-endian when we have a test case.
27066 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
27067 if (!VT.isInteger() || IsBigEndian)
27068 return SDValue();
27069
27070 SmallVector<int, 16> Mask(SVN->getMask());
27071 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
27072 for (int &Indice : Mask) {
27073 if (Indice < 0)
27074 continue;
27075 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
27076 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
27077 Fn(Indice, OpIdx, OpEltIdx);
27078 }
27079 };
27080
27081 // Which elements of which operand does this shuffle demand?
27082 std::array<APInt, 2> OpsDemandedElts;
27083 for (APInt &OpDemandedElts : OpsDemandedElts)
27084 OpDemandedElts = APInt::getZero(NumElts);
27085 ForEachDecomposedIndice(
27086 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
27087 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
27088 });
27089
27090 // Element-wise(!), which of these demanded elements are know to be zero?
27091 std::array<APInt, 2> OpsKnownZeroElts;
27092 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
27093 std::get<2>(I) =
27094 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
27095
27096 // Manifest zeroable element knowledge in the shuffle mask.
27097 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
27098 // this is a local invention, but it won't leak into DAG.
27099 // FIXME: should we not manifest them, but just check when matching?
27100 bool HadZeroableElts = false;
27101 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
27102 int &Indice, int OpIdx, int OpEltIdx) {
27103 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
27104 Indice = -2; // Zeroable element.
27105 HadZeroableElts = true;
27106 }
27107 });
27108
27109 // Don't proceed unless we've refined at least one zeroable mask indice.
27110 // If we didn't, then we are still trying to match the same shuffle mask
27111 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
27112 // and evidently failed. Proceeding will lead to endless combine loops.
27113 if (!HadZeroableElts)
27114 return SDValue();
27115
27116 // The shuffle may be more fine-grained than we want. Widen elements first.
27117 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
27118 SmallVector<int, 16> ScaledMask;
27119 getShuffleMaskWithWidestElts(Mask, ScaledMask);
27120 assert(Mask.size() >= ScaledMask.size() &&
27121 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
27122 int Prescale = Mask.size() / ScaledMask.size();
27123
27124 NumElts = ScaledMask.size();
27125 EltSizeInBits *= Prescale;
27126
27127 EVT PrescaledVT = EVT::getVectorVT(
27128 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
27129 NumElts);
27130
27131 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
27132 return SDValue();
27133
27134 // For example,
27135 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
27136 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
27137 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
27138 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
27139 "Unexpected mask scaling factor.");
27140 ArrayRef<int> Mask = ScaledMask;
27141 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
27142 SrcElt != NumSrcElts; ++SrcElt) {
27143 // Analyze the shuffle mask in Scale-sized chunks.
27144 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
27145 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
27146 Mask = Mask.drop_front(MaskChunk.size());
27147 // The first indice in this chunk must be SrcElt, but not zero!
27148 // FIXME: undef should be fine, but that results in more-defined result.
27149 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
27150 return false;
27151 // The rest of the indices in this chunk must be zeros.
27152 // FIXME: undef should be fine, but that results in more-defined result.
27153 if (!all_of(MaskChunk.drop_front(1),
27154 [](int Indice) { return Indice == -2; }))
27155 return false;
27156 }
27157 assert(Mask.empty() && "Did not process the whole mask?");
27158 return true;
27159 };
27160
27161 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
27162 for (bool Commuted : {false, true}) {
27163 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
27164 if (Commuted)
27166 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
27167 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
27168 LegalOperations);
27169 if (OutVT)
27170 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
27171 DAG.getBitcast(PrescaledVT, Op)));
27172 }
27173 return SDValue();
27174}
27175
27176// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
27177// each source element of a large type into the lowest elements of a smaller
27178// destination type. This is often generated during legalization.
27179// If the source node itself was a '*_extend_vector_inreg' node then we should
27180// then be able to remove it.
27182 SelectionDAG &DAG) {
27183 EVT VT = SVN->getValueType(0);
27184 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
27185
27186 // TODO Add support for big-endian when we have a test case.
27187 if (!VT.isInteger() || IsBigEndian)
27188 return SDValue();
27189
27191
27192 unsigned Opcode = N0.getOpcode();
27193 if (!ISD::isExtVecInRegOpcode(Opcode))
27194 return SDValue();
27195
27196 SDValue N00 = N0.getOperand(0);
27197 ArrayRef<int> Mask = SVN->getMask();
27198 unsigned NumElts = VT.getVectorNumElements();
27199 unsigned EltSizeInBits = VT.getScalarSizeInBits();
27200 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
27201 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
27202
27203 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
27204 return SDValue();
27205 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
27206
27207 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
27208 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
27209 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
27210 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
27211 for (unsigned i = 0; i != NumElts; ++i) {
27212 if (Mask[i] < 0)
27213 continue;
27214 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
27215 continue;
27216 return false;
27217 }
27218 return true;
27219 };
27220
27221 // At the moment we just handle the case where we've truncated back to the
27222 // same size as before the extension.
27223 // TODO: handle more extension/truncation cases as cases arise.
27224 if (EltSizeInBits != ExtSrcSizeInBits)
27225 return SDValue();
27226 if (VT.getSizeInBits() != N00.getValueSizeInBits())
27227 return SDValue();
27228
27229 // We can remove *extend_vector_inreg only if the truncation happens at
27230 // the same scale as the extension.
27231 if (isTruncate(ExtScale))
27232 return DAG.getBitcast(VT, N00);
27233
27234 return SDValue();
27235}
27236
27237// Combine shuffles of splat-shuffles of the form:
27238// shuffle (shuffle V, undef, splat-mask), undef, M
27239// If splat-mask contains undef elements, we need to be careful about
27240// introducing undef's in the folded mask which are not the result of composing
27241// the masks of the shuffles.
27243 SelectionDAG &DAG) {
27244 EVT VT = Shuf->getValueType(0);
27245 unsigned NumElts = VT.getVectorNumElements();
27246
27247 if (!Shuf->getOperand(1).isUndef())
27248 return SDValue();
27249
27250 // See if this unary non-splat shuffle actually *is* a splat shuffle,
27251 // in disguise, with all demanded elements being identical.
27252 // FIXME: this can be done per-operand.
27253 if (!Shuf->isSplat()) {
27254 APInt DemandedElts(NumElts, 0);
27255 for (int Idx : Shuf->getMask()) {
27256 if (Idx < 0)
27257 continue; // Ignore sentinel indices.
27258 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
27259 DemandedElts.setBit(Idx);
27260 }
27261 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
27262 APInt UndefElts;
27263 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
27264 // Even if all demanded elements are splat, some of them could be undef.
27265 // Which lowest demanded element is *not* known-undef?
27266 std::optional<unsigned> MinNonUndefIdx;
27267 for (int Idx : Shuf->getMask()) {
27268 if (Idx < 0 || UndefElts[Idx])
27269 continue; // Ignore sentinel indices, and undef elements.
27270 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
27271 }
27272 if (!MinNonUndefIdx)
27273 return DAG.getUNDEF(VT); // All undef - result is undef.
27274 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
27275 SmallVector<int, 8> SplatMask(Shuf->getMask());
27276 for (int &Idx : SplatMask) {
27277 if (Idx < 0)
27278 continue; // Passthrough sentinel indices.
27279 // Otherwise, just pick the lowest demanded non-undef element.
27280 // Or sentinel undef, if we know we'd pick a known-undef element.
27281 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
27282 }
27283 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
27284 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
27285 Shuf->getOperand(1), SplatMask);
27286 }
27287 }
27288
27289 // If the inner operand is a known splat with no undefs, just return that directly.
27290 // TODO: Create DemandedElts mask from Shuf's mask.
27291 // TODO: Allow undef elements and merge with the shuffle code below.
27292 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
27293 return Shuf->getOperand(0);
27294
27296 if (!Splat || !Splat->isSplat())
27297 return SDValue();
27298
27299 ArrayRef<int> ShufMask = Shuf->getMask();
27300 ArrayRef<int> SplatMask = Splat->getMask();
27301 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
27302
27303 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
27304 // every undef mask element in the splat-shuffle has a corresponding undef
27305 // element in the user-shuffle's mask or if the composition of mask elements
27306 // would result in undef.
27307 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
27308 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
27309 // In this case it is not legal to simplify to the splat-shuffle because we
27310 // may be exposing the users of the shuffle an undef element at index 1
27311 // which was not there before the combine.
27312 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
27313 // In this case the composition of masks yields SplatMask, so it's ok to
27314 // simplify to the splat-shuffle.
27315 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
27316 // In this case the composed mask includes all undef elements of SplatMask
27317 // and in addition sets element zero to undef. It is safe to simplify to
27318 // the splat-shuffle.
27319 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
27320 ArrayRef<int> SplatMask) {
27321 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
27322 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
27323 SplatMask[UserMask[i]] != -1)
27324 return false;
27325 return true;
27326 };
27327 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
27328 return Shuf->getOperand(0);
27329
27330 // Create a new shuffle with a mask that is composed of the two shuffles'
27331 // masks.
27332 SmallVector<int, 32> NewMask;
27333 for (int Idx : ShufMask)
27334 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
27335
27336 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
27337 Splat->getOperand(0), Splat->getOperand(1),
27338 NewMask);
27339}
27340
27341// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
27342// the mask can be treated as a larger type.
27344 SelectionDAG &DAG,
27345 const TargetLowering &TLI,
27346 bool LegalOperations) {
27347 SDValue Op0 = SVN->getOperand(0);
27348 SDValue Op1 = SVN->getOperand(1);
27349 EVT VT = SVN->getValueType(0);
27350 if (Op0.getOpcode() != ISD::BITCAST)
27351 return SDValue();
27352 EVT InVT = Op0.getOperand(0).getValueType();
27353 if (!InVT.isVector() ||
27354 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
27355 Op1.getOperand(0).getValueType() != InVT)))
27356 return SDValue();
27358 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
27359 return SDValue();
27360
27361 int VTLanes = VT.getVectorNumElements();
27362 int InLanes = InVT.getVectorNumElements();
27363 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
27364 (LegalOperations &&
27366 return SDValue();
27367 int Factor = VTLanes / InLanes;
27368
27369 // Check that each group of lanes in the mask are either undef or make a valid
27370 // mask for the wider lane type.
27371 ArrayRef<int> Mask = SVN->getMask();
27372 SmallVector<int> NewMask;
27373 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
27374 return SDValue();
27375
27376 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
27377 return SDValue();
27378
27379 // Create the new shuffle with the new mask and bitcast it back to the
27380 // original type.
27381 SDLoc DL(SVN);
27382 Op0 = Op0.getOperand(0);
27383 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
27384 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
27385 return DAG.getBitcast(VT, NewShuf);
27386}
27387
27388/// Combine shuffle of shuffle of the form:
27389/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
27391 SelectionDAG &DAG) {
27392 if (!OuterShuf->getOperand(1).isUndef())
27393 return SDValue();
27394 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
27395 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
27396 return SDValue();
27397
27398 ArrayRef<int> OuterMask = OuterShuf->getMask();
27399 ArrayRef<int> InnerMask = InnerShuf->getMask();
27400 unsigned NumElts = OuterMask.size();
27401 assert(NumElts == InnerMask.size() && "Mask length mismatch");
27402 SmallVector<int, 32> CombinedMask(NumElts, -1);
27403 int SplatIndex = -1;
27404 for (unsigned i = 0; i != NumElts; ++i) {
27405 // Undef lanes remain undef.
27406 int OuterMaskElt = OuterMask[i];
27407 if (OuterMaskElt == -1)
27408 continue;
27409
27410 // Peek through the shuffle masks to get the underlying source element.
27411 int InnerMaskElt = InnerMask[OuterMaskElt];
27412 if (InnerMaskElt == -1)
27413 continue;
27414
27415 // Initialize the splatted element.
27416 if (SplatIndex == -1)
27417 SplatIndex = InnerMaskElt;
27418
27419 // Non-matching index - this is not a splat.
27420 if (SplatIndex != InnerMaskElt)
27421 return SDValue();
27422
27423 CombinedMask[i] = InnerMaskElt;
27424 }
27425 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
27426 getSplatIndex(CombinedMask) != -1) &&
27427 "Expected a splat mask");
27428
27429 // TODO: The transform may be a win even if the mask is not legal.
27430 EVT VT = OuterShuf->getValueType(0);
27431 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
27432 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
27433 return SDValue();
27434
27435 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
27436 InnerShuf->getOperand(1), CombinedMask);
27437}
27438
27439/// If the shuffle mask is taking exactly one element from the first vector
27440/// operand and passing through all other elements from the second vector
27441/// operand, return the index of the mask element that is choosing an element
27442/// from the first operand. Otherwise, return -1.
27444 int MaskSize = Mask.size();
27445 int EltFromOp0 = -1;
27446 // TODO: This does not match if there are undef elements in the shuffle mask.
27447 // Should we ignore undefs in the shuffle mask instead? The trade-off is
27448 // removing an instruction (a shuffle), but losing the knowledge that some
27449 // vector lanes are not needed.
27450 for (int i = 0; i != MaskSize; ++i) {
27451 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
27452 // We're looking for a shuffle of exactly one element from operand 0.
27453 if (EltFromOp0 != -1)
27454 return -1;
27455 EltFromOp0 = i;
27456 } else if (Mask[i] != i + MaskSize) {
27457 // Nothing from operand 1 can change lanes.
27458 return -1;
27459 }
27460 }
27461 return EltFromOp0;
27462}
27463
27464/// If a shuffle inserts exactly one element from a source vector operand into
27465/// another vector operand and we can access the specified element as a scalar,
27466/// then we can eliminate the shuffle.
27467SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
27468 // First, check if we are taking one element of a vector and shuffling that
27469 // element into another vector.
27470 ArrayRef<int> Mask = Shuf->getMask();
27471 SmallVector<int, 16> CommutedMask(Mask);
27472 SDValue Op0 = Shuf->getOperand(0);
27473 SDValue Op1 = Shuf->getOperand(1);
27474 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
27475 if (ShufOp0Index == -1) {
27476 // Commute mask and check again.
27478 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
27479 if (ShufOp0Index == -1)
27480 return SDValue();
27481 // Commute operands to match the commuted shuffle mask.
27482 std::swap(Op0, Op1);
27483 Mask = CommutedMask;
27484 }
27485
27486 // The shuffle inserts exactly one element from operand 0 into operand 1.
27487 // Now see if we can access that element as a scalar via a real insert element
27488 // instruction.
27489 // TODO: We can try harder to locate the element as a scalar. Examples: it
27490 // could be an operand of BUILD_VECTOR, or a constant.
27491 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
27492 "Shuffle mask value must be from operand 0");
27493
27494 SDValue Elt;
27495 if (sd_match(Op0, m_InsertElt(m_Value(), m_Value(Elt),
27496 m_SpecificInt(Mask[ShufOp0Index])))) {
27497 // There's an existing insertelement with constant insertion index, so we
27498 // don't need to check the legality/profitability of a replacement operation
27499 // that differs at most in the constant value. The target should be able to
27500 // lower any of those in a similar way. If not, legalization will expand
27501 // this to a scalar-to-vector plus shuffle.
27502 //
27503 // Note that the shuffle may move the scalar from the position that the
27504 // insert element used. Therefore, our new insert element occurs at the
27505 // shuffle's mask index value, not the insert's index value.
27506 //
27507 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
27508 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27509 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27510 Op1, Elt, NewInsIndex);
27511 }
27512
27513 if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
27514 return SDValue();
27515
27517 Mask[ShufOp0Index] == 0) {
27518 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
27519 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
27520 Op1, Elt, NewInsIndex);
27521 }
27522
27523 return SDValue();
27524}
27525
27526/// If we have a unary shuffle of a shuffle, see if it can be folded away
27527/// completely. This has the potential to lose undef knowledge because the first
27528/// shuffle may not have an undef mask element where the second one does. So
27529/// only call this after doing simplifications based on demanded elements.
27531 // shuf (shuf0 X, Y, Mask0), undef, Mask
27532 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
27533 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
27534 return SDValue();
27535
27536 ArrayRef<int> Mask = Shuf->getMask();
27537 ArrayRef<int> Mask0 = Shuf0->getMask();
27538 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
27539 // Ignore undef elements.
27540 if (Mask[i] == -1)
27541 continue;
27542 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
27543
27544 // Is the element of the shuffle operand chosen by this shuffle the same as
27545 // the element chosen by the shuffle operand itself?
27546 if (Mask0[Mask[i]] != Mask0[i])
27547 return SDValue();
27548 }
27549 // Every element of this shuffle is identical to the result of the previous
27550 // shuffle, so we can replace this value.
27551 return Shuf->getOperand(0);
27552}
27553
27554SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
27555 EVT VT = N->getValueType(0);
27556 unsigned NumElts = VT.getVectorNumElements();
27557
27558 SDValue N0 = N->getOperand(0);
27559 SDValue N1 = N->getOperand(1);
27560
27561 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
27562
27563 // Canonicalize shuffle undef, undef -> undef
27564 if (N0.isUndef() && N1.isUndef())
27565 return DAG.getUNDEF(VT);
27566
27567 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
27568
27569 // Canonicalize shuffle v, v -> v, undef
27570 if (N0 == N1)
27571 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
27572 createUnaryMask(SVN->getMask(), NumElts));
27573
27574 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
27575 if (N0.isUndef())
27576 return DAG.getCommutedVectorShuffle(*SVN);
27577
27578 // Remove references to rhs if it is undef
27579 if (N1.isUndef()) {
27580 bool Changed = false;
27581 SmallVector<int, 8> NewMask;
27582 for (unsigned i = 0; i != NumElts; ++i) {
27583 int Idx = SVN->getMaskElt(i);
27584 if (Idx >= (int)NumElts) {
27585 Idx = -1;
27586 Changed = true;
27587 }
27588 NewMask.push_back(Idx);
27589 }
27590 if (Changed)
27591 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
27592 }
27593
27594 if (SDValue InsElt = replaceShuffleOfInsert(SVN))
27595 return InsElt;
27596
27597 // A shuffle of a single vector that is a splatted value can always be folded.
27598 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
27599 return V;
27600
27601 if (SDValue V = formSplatFromShuffles(SVN, DAG))
27602 return V;
27603
27604 // If it is a splat, check if the argument vector is another splat or a
27605 // build_vector.
27606 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
27607 int SplatIndex = SVN->getSplatIndex();
27608 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
27609 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
27610 // splat (vector_bo L, R), Index -->
27611 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
27612 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
27613 SDLoc DL(N);
27614 EVT EltVT = VT.getScalarType();
27615 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
27616 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
27617 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
27618 SDValue NewBO =
27619 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
27620 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
27621 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
27622 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
27623 }
27624
27625 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
27626 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
27627 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
27628 N0.hasOneUse()) {
27629 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
27630 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
27631
27633 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
27634 if (Idx->getAPIntValue() == SplatIndex)
27635 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
27636
27637 // Look through a bitcast if LE and splatting lane 0, through to a
27638 // scalar_to_vector or a build_vector.
27639 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
27640 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
27643 EVT N00VT = N0.getOperand(0).getValueType();
27644 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
27645 VT.isInteger() && N00VT.isInteger()) {
27646 EVT InVT =
27649 SDLoc(N), InVT);
27650 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
27651 }
27652 }
27653 }
27654
27655 // If this is a bit convert that changes the element type of the vector but
27656 // not the number of vector elements, look through it. Be careful not to
27657 // look though conversions that change things like v4f32 to v2f64.
27658 SDNode *V = N0.getNode();
27659 if (V->getOpcode() == ISD::BITCAST) {
27660 SDValue ConvInput = V->getOperand(0);
27661 if (ConvInput.getValueType().isVector() &&
27662 ConvInput.getValueType().getVectorNumElements() == NumElts)
27663 V = ConvInput.getNode();
27664 }
27665
27666 if (V->getOpcode() == ISD::BUILD_VECTOR) {
27667 assert(V->getNumOperands() == NumElts &&
27668 "BUILD_VECTOR has wrong number of operands");
27669 SDValue Base;
27670 bool AllSame = true;
27671 for (unsigned i = 0; i != NumElts; ++i) {
27672 if (!V->getOperand(i).isUndef()) {
27673 Base = V->getOperand(i);
27674 break;
27675 }
27676 }
27677 // Splat of <u, u, u, u>, return <u, u, u, u>
27678 if (!Base.getNode())
27679 return N0;
27680 for (unsigned i = 0; i != NumElts; ++i) {
27681 if (V->getOperand(i) != Base) {
27682 AllSame = false;
27683 break;
27684 }
27685 }
27686 // Splat of <x, x, x, x>, return <x, x, x, x>
27687 if (AllSame)
27688 return N0;
27689
27690 // Canonicalize any other splat as a build_vector, but avoid defining any
27691 // undefined elements in the mask.
27692 SDValue Splatted = V->getOperand(SplatIndex);
27693 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
27694 EVT EltVT = Splatted.getValueType();
27695
27696 for (unsigned i = 0; i != NumElts; ++i) {
27697 if (SVN->getMaskElt(i) < 0)
27698 Ops[i] = DAG.getUNDEF(EltVT);
27699 }
27700
27701 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
27702
27703 // We may have jumped through bitcasts, so the type of the
27704 // BUILD_VECTOR may not match the type of the shuffle.
27705 if (V->getValueType(0) != VT)
27706 NewBV = DAG.getBitcast(VT, NewBV);
27707 return NewBV;
27708 }
27709 }
27710
27711 // Simplify source operands based on shuffle mask.
27713 return SDValue(N, 0);
27714
27715 // This is intentionally placed after demanded elements simplification because
27716 // it could eliminate knowledge of undef elements created by this shuffle.
27717 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
27718 return ShufOp;
27719
27720 // Match shuffles that can be converted to any_vector_extend_in_reg.
27721 if (SDValue V =
27722 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
27723 return V;
27724
27725 // Combine "truncate_vector_in_reg" style shuffles.
27726 if (SDValue V = combineTruncationShuffle(SVN, DAG))
27727 return V;
27728
27729 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
27730 Level < AfterLegalizeVectorOps &&
27731 (N1.isUndef() ||
27732 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
27733 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
27734 if (SDValue V = partitionShuffleOfConcats(N, DAG))
27735 return V;
27736 }
27737
27738 // A shuffle of a concat of the same narrow vector can be reduced to use
27739 // only low-half elements of a concat with undef:
27740 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
27741 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
27742 N0.getNumOperands() == 2 &&
27743 N0.getOperand(0) == N0.getOperand(1)) {
27744 int HalfNumElts = (int)NumElts / 2;
27745 SmallVector<int, 8> NewMask;
27746 for (unsigned i = 0; i != NumElts; ++i) {
27747 int Idx = SVN->getMaskElt(i);
27748 if (Idx >= HalfNumElts) {
27749 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
27750 Idx -= HalfNumElts;
27751 }
27752 NewMask.push_back(Idx);
27753 }
27754 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
27755 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
27756 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
27757 N0.getOperand(0), UndefVec);
27758 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
27759 }
27760 }
27761
27762 // See if we can replace a shuffle with an insert_subvector.
27763 // e.g. v2i32 into v8i32:
27764 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
27765 // --> insert_subvector(lhs,rhs1,4).
27766 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
27768 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
27769 // Ensure RHS subvectors are legal.
27770 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
27771 EVT SubVT = RHS.getOperand(0).getValueType();
27772 int NumSubVecs = RHS.getNumOperands();
27773 int NumSubElts = SubVT.getVectorNumElements();
27774 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
27775 if (!TLI.isTypeLegal(SubVT))
27776 return SDValue();
27777
27778 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
27779 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
27780 return SDValue();
27781
27782 // Search [NumSubElts] spans for RHS sequence.
27783 // TODO: Can we avoid nested loops to increase performance?
27784 SmallVector<int> InsertionMask(NumElts);
27785 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
27786 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
27787 // Reset mask to identity.
27788 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
27789
27790 // Add subvector insertion.
27791 std::iota(InsertionMask.begin() + SubIdx,
27792 InsertionMask.begin() + SubIdx + NumSubElts,
27793 NumElts + (SubVec * NumSubElts));
27794
27795 // See if the shuffle mask matches the reference insertion mask.
27796 bool MatchingShuffle = true;
27797 for (int i = 0; i != (int)NumElts; ++i) {
27798 int ExpectIdx = InsertionMask[i];
27799 int ActualIdx = Mask[i];
27800 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
27801 MatchingShuffle = false;
27802 break;
27803 }
27804 }
27805
27806 if (MatchingShuffle)
27807 return DAG.getInsertSubvector(SDLoc(N), LHS, RHS.getOperand(SubVec),
27808 SubIdx);
27809 }
27810 }
27811 return SDValue();
27812 };
27813 ArrayRef<int> Mask = SVN->getMask();
27814 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
27815 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
27816 return InsertN1;
27817 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
27818 SmallVector<int> CommuteMask(Mask);
27820 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
27821 return InsertN0;
27822 }
27823 }
27824
27825 // If we're not performing a select/blend shuffle, see if we can convert the
27826 // shuffle into a AND node, with all the out-of-lane elements are known zero.
27827 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
27828 bool IsInLaneMask = true;
27829 ArrayRef<int> Mask = SVN->getMask();
27830 SmallVector<int, 16> ClearMask(NumElts, -1);
27831 APInt DemandedLHS = APInt::getZero(NumElts);
27832 APInt DemandedRHS = APInt::getZero(NumElts);
27833 for (int I = 0; I != (int)NumElts; ++I) {
27834 int M = Mask[I];
27835 if (M < 0)
27836 continue;
27837 ClearMask[I] = M == I ? I : (I + NumElts);
27838 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
27839 if (M != I) {
27840 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
27841 Demanded.setBit(M % NumElts);
27842 }
27843 }
27844 // TODO: Should we try to mask with N1 as well?
27845 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
27846 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
27847 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
27848 SDLoc DL(N);
27849 EVT IntVT = VT.changeVectorElementTypeToInteger();
27850 EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
27851 // Transform the type to a legal type so that the buildvector constant
27852 // elements are not illegal. Make sure that the result is larger than the
27853 // original type, incase the value is split into two (eg i64->i32).
27854 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
27855 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
27856 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
27857 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
27858 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
27859 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
27860 for (int I = 0; I != (int)NumElts; ++I)
27861 if (0 <= Mask[I])
27862 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
27863
27864 // See if a clear mask is legal instead of going via
27865 // XformToShuffleWithZero which loses UNDEF mask elements.
27866 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
27867 return DAG.getBitcast(
27868 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
27869 DAG.getConstant(0, DL, IntVT), ClearMask));
27870
27871 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
27872 return DAG.getBitcast(
27873 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
27874 DAG.getBuildVector(IntVT, DL, AndMask)));
27875 }
27876 }
27877 }
27878
27879 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
27880 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
27881 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
27882 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
27883 return Res;
27884
27885 // If this shuffle only has a single input that is a bitcasted shuffle,
27886 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
27887 // back to their original types.
27888 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
27889 N1.isUndef() && Level < AfterLegalizeVectorOps &&
27890 TLI.isTypeLegal(VT)) {
27891
27893 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
27894 EVT SVT = VT.getScalarType();
27895 EVT InnerVT = BC0->getValueType(0);
27896 EVT InnerSVT = InnerVT.getScalarType();
27897
27898 // Determine which shuffle works with the smaller scalar type.
27899 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
27900 EVT ScaleSVT = ScaleVT.getScalarType();
27901
27902 if (TLI.isTypeLegal(ScaleVT) &&
27903 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
27904 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
27905 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27906 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
27907
27908 // Scale the shuffle masks to the smaller scalar type.
27909 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
27910 SmallVector<int, 8> InnerMask;
27911 SmallVector<int, 8> OuterMask;
27912 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
27913 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
27914
27915 // Merge the shuffle masks.
27916 SmallVector<int, 8> NewMask;
27917 for (int M : OuterMask)
27918 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
27919
27920 // Test for shuffle mask legality over both commutations.
27921 SDValue SV0 = BC0->getOperand(0);
27922 SDValue SV1 = BC0->getOperand(1);
27923 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27924 if (!LegalMask) {
27925 std::swap(SV0, SV1);
27927 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
27928 }
27929
27930 if (LegalMask) {
27931 SV0 = DAG.getBitcast(ScaleVT, SV0);
27932 SV1 = DAG.getBitcast(ScaleVT, SV1);
27933 return DAG.getBitcast(
27934 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
27935 }
27936 }
27937 }
27938 }
27939
27940 // Match shuffles of bitcasts, so long as the mask can be treated as the
27941 // larger type.
27942 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
27943 return V;
27944
27945 // Compute the combined shuffle mask for a shuffle with SV0 as the first
27946 // operand, and SV1 as the second operand.
27947 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
27948 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
27949 auto MergeInnerShuffle =
27950 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
27951 ShuffleVectorSDNode *OtherSVN, SDValue N1,
27952 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
27953 SmallVectorImpl<int> &Mask) -> bool {
27954 // Don't try to fold splats; they're likely to simplify somehow, or they
27955 // might be free.
27956 if (OtherSVN->isSplat())
27957 return false;
27958
27959 SV0 = SV1 = SDValue();
27960 Mask.clear();
27961
27962 for (unsigned i = 0; i != NumElts; ++i) {
27963 int Idx = SVN->getMaskElt(i);
27964 if (Idx < 0) {
27965 // Propagate Undef.
27966 Mask.push_back(Idx);
27967 continue;
27968 }
27969
27970 if (Commute)
27971 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
27972
27973 SDValue CurrentVec;
27974 if (Idx < (int)NumElts) {
27975 // This shuffle index refers to the inner shuffle N0. Lookup the inner
27976 // shuffle mask to identify which vector is actually referenced.
27977 Idx = OtherSVN->getMaskElt(Idx);
27978 if (Idx < 0) {
27979 // Propagate Undef.
27980 Mask.push_back(Idx);
27981 continue;
27982 }
27983 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
27984 : OtherSVN->getOperand(1);
27985 } else {
27986 // This shuffle index references an element within N1.
27987 CurrentVec = N1;
27988 }
27989
27990 // Simple case where 'CurrentVec' is UNDEF.
27991 if (CurrentVec.isUndef()) {
27992 Mask.push_back(-1);
27993 continue;
27994 }
27995
27996 // Canonicalize the shuffle index. We don't know yet if CurrentVec
27997 // will be the first or second operand of the combined shuffle.
27998 Idx = Idx % NumElts;
27999 if (!SV0.getNode() || SV0 == CurrentVec) {
28000 // Ok. CurrentVec is the left hand side.
28001 // Update the mask accordingly.
28002 SV0 = CurrentVec;
28003 Mask.push_back(Idx);
28004 continue;
28005 }
28006 if (!SV1.getNode() || SV1 == CurrentVec) {
28007 // Ok. CurrentVec is the right hand side.
28008 // Update the mask accordingly.
28009 SV1 = CurrentVec;
28010 Mask.push_back(Idx + NumElts);
28011 continue;
28012 }
28013
28014 // Last chance - see if the vector is another shuffle and if it
28015 // uses one of the existing candidate shuffle ops.
28016 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
28017 int InnerIdx = CurrentSVN->getMaskElt(Idx);
28018 if (InnerIdx < 0) {
28019 Mask.push_back(-1);
28020 continue;
28021 }
28022 SDValue InnerVec = (InnerIdx < (int)NumElts)
28023 ? CurrentSVN->getOperand(0)
28024 : CurrentSVN->getOperand(1);
28025 if (InnerVec.isUndef()) {
28026 Mask.push_back(-1);
28027 continue;
28028 }
28029 InnerIdx %= NumElts;
28030 if (InnerVec == SV0) {
28031 Mask.push_back(InnerIdx);
28032 continue;
28033 }
28034 if (InnerVec == SV1) {
28035 Mask.push_back(InnerIdx + NumElts);
28036 continue;
28037 }
28038 }
28039
28040 // Bail out if we cannot convert the shuffle pair into a single shuffle.
28041 return false;
28042 }
28043
28044 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
28045 return true;
28046
28047 // Avoid introducing shuffles with illegal mask.
28048 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
28049 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
28050 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
28051 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
28052 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
28053 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
28054 if (TLI.isShuffleMaskLegal(Mask, VT))
28055 return true;
28056
28057 std::swap(SV0, SV1);
28059 return TLI.isShuffleMaskLegal(Mask, VT);
28060 };
28061
28062 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
28063 // Canonicalize shuffles according to rules:
28064 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
28065 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
28066 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
28067 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
28069 // The incoming shuffle must be of the same type as the result of the
28070 // current shuffle.
28071 assert(N1->getOperand(0).getValueType() == VT &&
28072 "Shuffle types don't match");
28073
28074 SDValue SV0 = N1->getOperand(0);
28075 SDValue SV1 = N1->getOperand(1);
28076 bool HasSameOp0 = N0 == SV0;
28077 bool IsSV1Undef = SV1.isUndef();
28078 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
28079 // Commute the operands of this shuffle so merging below will trigger.
28080 return DAG.getCommutedVectorShuffle(*SVN);
28081 }
28082
28083 // Canonicalize splat shuffles to the RHS to improve merging below.
28084 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
28085 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
28086 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
28087 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
28088 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
28089 return DAG.getCommutedVectorShuffle(*SVN);
28090 }
28091
28092 // Try to fold according to rules:
28093 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
28094 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
28095 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
28096 // Don't try to fold shuffles with illegal type.
28097 // Only fold if this shuffle is the only user of the other shuffle.
28098 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
28099 for (int i = 0; i != 2; ++i) {
28100 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
28101 N->isOnlyUserOf(N->getOperand(i).getNode())) {
28102 // The incoming shuffle must be of the same type as the result of the
28103 // current shuffle.
28104 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
28105 assert(OtherSV->getOperand(0).getValueType() == VT &&
28106 "Shuffle types don't match");
28107
28108 SDValue SV0, SV1;
28109 SmallVector<int, 4> Mask;
28110 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
28111 SV0, SV1, Mask)) {
28112 // Check if all indices in Mask are Undef. In case, propagate Undef.
28113 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
28114 return DAG.getUNDEF(VT);
28115
28116 return DAG.getVectorShuffle(VT, SDLoc(N),
28117 SV0 ? SV0 : DAG.getUNDEF(VT),
28118 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
28119 }
28120 }
28121 }
28122
28123 // Merge shuffles through binops if we are able to merge it with at least
28124 // one other shuffles.
28125 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
28126 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
28127 unsigned SrcOpcode = N0.getOpcode();
28128 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
28129 (N1.isUndef() ||
28130 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
28131 // Get binop source ops, or just pass on the undef.
28132 SDValue Op00 = N0.getOperand(0);
28133 SDValue Op01 = N0.getOperand(1);
28134 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
28135 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
28136 // TODO: We might be able to relax the VT check but we don't currently
28137 // have any isBinOp() that has different result/ops VTs so play safe until
28138 // we have test coverage.
28139 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
28140 Op01.getValueType() == VT && Op11.getValueType() == VT &&
28141 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
28142 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
28143 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
28144 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
28145 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
28146 SmallVectorImpl<int> &Mask, bool LeftOp,
28147 bool Commute) {
28148 SDValue InnerN = Commute ? N1 : N0;
28149 SDValue Op0 = LeftOp ? Op00 : Op01;
28150 SDValue Op1 = LeftOp ? Op10 : Op11;
28151 if (Commute)
28152 std::swap(Op0, Op1);
28153 // Only accept the merged shuffle if we don't introduce undef elements,
28154 // or the inner shuffle already contained undef elements.
28155 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
28156 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
28157 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
28158 Mask) &&
28159 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
28160 llvm::none_of(Mask, [](int M) { return M < 0; }));
28161 };
28162
28163 // Ensure we don't increase the number of shuffles - we must merge a
28164 // shuffle from at least one of the LHS and RHS ops.
28165 bool MergedLeft = false;
28166 SDValue LeftSV0, LeftSV1;
28167 SmallVector<int, 4> LeftMask;
28168 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
28169 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
28170 MergedLeft = true;
28171 } else {
28172 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
28173 LeftSV0 = Op00, LeftSV1 = Op10;
28174 }
28175
28176 bool MergedRight = false;
28177 SDValue RightSV0, RightSV1;
28178 SmallVector<int, 4> RightMask;
28179 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
28180 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
28181 MergedRight = true;
28182 } else {
28183 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
28184 RightSV0 = Op01, RightSV1 = Op11;
28185 }
28186
28187 if (MergedLeft || MergedRight) {
28188 SDLoc DL(N);
28190 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
28191 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
28193 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
28194 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
28195 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
28196 }
28197 }
28198 }
28199 }
28200
28201 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
28202 return V;
28203
28204 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
28205 // Perform this really late, because it could eliminate knowledge
28206 // of undef elements created by this shuffle.
28207 if (Level < AfterLegalizeTypes)
28208 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
28209 LegalOperations))
28210 return V;
28211
28212 return SDValue();
28213}
28214
28215SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
28216 EVT VT = N->getValueType(0);
28217 if (!VT.isFixedLengthVector())
28218 return SDValue();
28219
28220 // Try to convert a scalar binop with an extracted vector element to a vector
28221 // binop. This is intended to reduce potentially expensive register moves.
28222 // TODO: Check if both operands are extracted.
28223 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
28224 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
28225 SDValue Scalar = N->getOperand(0);
28226 unsigned Opcode = Scalar.getOpcode();
28227 EVT VecEltVT = VT.getScalarType();
28228 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
28229 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
28230 Scalar.getOperand(0).getValueType() == VecEltVT &&
28231 Scalar.getOperand(1).getValueType() == VecEltVT &&
28232 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
28233 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
28234 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
28235 // Match an extract element and get a shuffle mask equivalent.
28236 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
28237
28238 for (int i : {0, 1}) {
28239 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
28240 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
28241 SDValue EE = Scalar.getOperand(i);
28242 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
28243 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
28244 EE.getOperand(0).getValueType() == VT &&
28246 // Mask = {ExtractIndex, undef, undef....}
28247 ShufMask[0] = EE.getConstantOperandVal(1);
28248 // Make sure the shuffle is legal if we are crossing lanes.
28249 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
28250 SDLoc DL(N);
28251 SDValue V[] = {EE.getOperand(0),
28252 DAG.getConstant(C->getAPIntValue(), DL, VT)};
28253 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
28254 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
28255 ShufMask);
28256 }
28257 }
28258 }
28259 }
28260
28261 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
28262 // with a VECTOR_SHUFFLE and possible truncate.
28263 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
28264 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
28265 return SDValue();
28266
28267 // If we have an implicit truncate, truncate here if it is legal.
28268 if (VecEltVT != Scalar.getValueType() &&
28269 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
28270 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
28271 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
28272 }
28273
28274 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
28275 if (!ExtIndexC)
28276 return SDValue();
28277
28278 SDValue SrcVec = Scalar.getOperand(0);
28279 EVT SrcVT = SrcVec.getValueType();
28280 unsigned SrcNumElts = SrcVT.getVectorNumElements();
28281 unsigned VTNumElts = VT.getVectorNumElements();
28282 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
28283 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
28284 SmallVector<int, 8> Mask(SrcNumElts, -1);
28285 Mask[0] = ExtIndexC->getZExtValue();
28286 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
28287 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
28288 if (!LegalShuffle)
28289 return SDValue();
28290
28291 // If the initial vector is the same size, the shuffle is the result.
28292 if (VT == SrcVT)
28293 return LegalShuffle;
28294
28295 // If not, shorten the shuffled vector.
28296 if (VTNumElts != SrcNumElts) {
28297 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
28298 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
28299 SrcVT.getVectorElementType(), VTNumElts);
28300 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
28301 ZeroIdx);
28302 }
28303 }
28304
28305 return SDValue();
28306}
28307
28308SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
28309 EVT VT = N->getValueType(0);
28310 SDValue N0 = N->getOperand(0);
28311 SDValue N1 = N->getOperand(1);
28312 SDValue N2 = N->getOperand(2);
28313 uint64_t InsIdx = N->getConstantOperandVal(2);
28314
28315 // Remove insert of UNDEF/POISON.
28316 if (N1.isUndef()) {
28317 if (N1.getOpcode() == ISD::POISON || N0.getOpcode() == ISD::UNDEF)
28318 return N0;
28319 return DAG.getFreeze(N0);
28320 }
28321
28322 // If this is an insert of an extracted vector into an undef/poison vector, we
28323 // can just use the input to the extract if the types match, and can simplify
28324 // in some cases even if they don't.
28325 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
28326 N1.getOperand(1) == N2) {
28327 EVT N1VT = N1.getValueType();
28328 EVT SrcVT = N1.getOperand(0).getValueType();
28329 if (SrcVT == VT) {
28330 // Need to ensure that result isn't more poisonous if skipping both the
28331 // extract+insert.
28332 if (N0.getOpcode() == ISD::POISON)
28333 return N1.getOperand(0);
28334 if (VT.isFixedLengthVector() && N1VT.isFixedLengthVector()) {
28335 unsigned SubVecNumElts = N1VT.getVectorNumElements();
28336 APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
28337 InsIdx + SubVecNumElts);
28338 if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0), ~EltMask))
28339 return N1.getOperand(0);
28340 } else if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0)))
28341 return N1.getOperand(0);
28342 }
28343 // TODO: To remove the zero check, need to adjust the offset to
28344 // a multiple of the new src type.
28345 if (isNullConstant(N2)) {
28346 if (VT.knownBitsGE(SrcVT) &&
28347 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
28348 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
28349 VT, N0, N1.getOperand(0), N2);
28350 else if (VT.knownBitsLE(SrcVT) &&
28351 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
28352 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
28353 VT, N1.getOperand(0), N2);
28354 }
28355 }
28356
28357 // Handle case where we've ended up inserting back into the source vector
28358 // we extracted the subvector from.
28359 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
28360 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
28361 N1.getOperand(1) == N2)
28362 return N0;
28363
28364 // Simplify scalar inserts into an undef vector:
28365 // insert_subvector undef, (splat X), N2 -> splat X
28366 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
28367 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
28368 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
28369
28370 // insert_subvector (splat X), (splat X), N2 -> splat X
28371 if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() &&
28372 N0.getOperand(0) == N1.getOperand(0))
28373 return N0;
28374
28375 // If we are inserting a bitcast value into an undef, with the same
28376 // number of elements, just use the bitcast input of the extract.
28377 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
28378 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
28379 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
28381 N1.getOperand(0).getOperand(1) == N2 &&
28383 VT.getVectorElementCount() &&
28385 VT.getSizeInBits()) {
28386 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
28387 }
28388
28389 // If both N1 and N2 are bitcast values on which insert_subvector
28390 // would makes sense, pull the bitcast through.
28391 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
28392 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
28393 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
28394 SDValue CN0 = N0.getOperand(0);
28395 SDValue CN1 = N1.getOperand(0);
28396 EVT CN0VT = CN0.getValueType();
28397 EVT CN1VT = CN1.getValueType();
28398 if (CN0VT.isVector() && CN1VT.isVector() &&
28399 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
28401 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
28402 CN0.getValueType(), CN0, CN1, N2);
28403 return DAG.getBitcast(VT, NewINSERT);
28404 }
28405 }
28406
28407 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
28408 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
28409 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
28410 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28411 N0.getOperand(1).getValueType() == N1.getValueType() &&
28412 N0.getOperand(2) == N2)
28413 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
28414 N1, N2);
28415
28416 // Eliminate an intermediate insert into an undef vector:
28417 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
28418 // insert_subvector undef, X, 0
28419 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
28420 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
28421 isNullConstant(N2))
28422 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
28423 N1.getOperand(1), N2);
28424
28425 // Push subvector bitcasts to the output, adjusting the index as we go.
28426 // insert_subvector(bitcast(v), bitcast(s), c1)
28427 // -> bitcast(insert_subvector(v, s, c2))
28428 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
28429 N1.getOpcode() == ISD::BITCAST) {
28430 SDValue N0Src = peekThroughBitcasts(N0);
28431 SDValue N1Src = peekThroughBitcasts(N1);
28432 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
28433 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
28434 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
28435 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
28436 EVT NewVT;
28437 SDLoc DL(N);
28438 SDValue NewIdx;
28439 LLVMContext &Ctx = *DAG.getContext();
28440 ElementCount NumElts = VT.getVectorElementCount();
28441 unsigned EltSizeInBits = VT.getScalarSizeInBits();
28442 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
28443 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
28444 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
28445 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
28446 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
28447 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
28448 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
28449 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
28450 NumElts.divideCoefficientBy(Scale));
28451 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
28452 }
28453 }
28454 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
28455 SDValue Res = DAG.getBitcast(NewVT, N0Src);
28456 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
28457 return DAG.getBitcast(VT, Res);
28458 }
28459 }
28460 }
28461
28462 // Canonicalize insert_subvector dag nodes.
28463 // Example:
28464 // (insert_subvector (insert_subvector A, Idx0), Idx1)
28465 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
28466 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
28467 N1.getValueType() == N0.getOperand(1).getValueType()) {
28468 unsigned OtherIdx = N0.getConstantOperandVal(2);
28469 if (InsIdx < OtherIdx) {
28470 // Swap nodes.
28471 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
28472 N0.getOperand(0), N1, N2);
28473 AddToWorklist(NewOp.getNode());
28474 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
28475 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
28476 }
28477 }
28478
28479 // If the input vector is a concatenation, and the insert replaces
28480 // one of the pieces, we can optimize into a single concat_vectors.
28481 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
28482 N0.getOperand(0).getValueType() == N1.getValueType() &&
28485 unsigned Factor = N1.getValueType().getVectorMinNumElements();
28487 Ops[InsIdx / Factor] = N1;
28488 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
28489 }
28490
28491 // Simplify source operands based on insertion.
28493 return SDValue(N, 0);
28494
28495 return SDValue();
28496}
28497
28498SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
28499 SDValue N0 = N->getOperand(0);
28500
28501 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
28502 if (N0->getOpcode() == ISD::FP16_TO_FP)
28503 return N0->getOperand(0);
28504
28505 return SDValue();
28506}
28507
28508SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
28509 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28510 auto Op = N->getOpcode();
28512 "opcode should be FP16_TO_FP or BF16_TO_FP.");
28513 SDValue N0 = N->getOperand(0);
28514
28515 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
28516 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28517 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
28518 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
28519 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
28520 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
28521 }
28522 }
28523
28524 if (SDValue CastEliminated = eliminateFPCastPair(N))
28525 return CastEliminated;
28526
28527 // Sometimes constants manage to survive very late in the pipeline, e.g.,
28528 // because they are wrapped inside the <1 x f16> type. Try one last time to
28529 // get rid of them.
28530 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
28531 N->getValueType(0), {N0});
28532 return Folded;
28533}
28534
28535SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
28536 SDValue N0 = N->getOperand(0);
28537
28538 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
28539 if (N0->getOpcode() == ISD::BF16_TO_FP)
28540 return N0->getOperand(0);
28541
28542 return SDValue();
28543}
28544
28545SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
28546 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
28547 return visitFP16_TO_FP(N);
28548}
28549
28550SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
28551 SDValue N0 = N->getOperand(0);
28552 EVT VT = N0.getValueType();
28553 unsigned Opcode = N->getOpcode();
28554
28555 // VECREDUCE over 1-element vector is just an extract.
28556 if (VT.getVectorElementCount().isScalar()) {
28557 SDLoc dl(N);
28558 SDValue Res =
28560 DAG.getVectorIdxConstant(0, dl));
28561 if (Res.getValueType() != N->getValueType(0))
28562 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
28563 return Res;
28564 }
28565
28566 // On an boolean vector an and/or reduction is the same as a umin/umax
28567 // reduction. Convert them if the latter is legal while the former isn't.
28568 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
28569 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
28571 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
28572 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
28574 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
28575 }
28576
28577 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
28578 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
28579 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
28580 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
28581 SDValue Vec = N0.getOperand(0);
28582 SDValue Subvec = N0.getOperand(1);
28583 if ((Opcode == ISD::VECREDUCE_OR &&
28584 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
28585 (Opcode == ISD::VECREDUCE_AND &&
28586 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
28587 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
28588 }
28589
28590 // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
28591 // Same for zext and anyext, and for and/or/xor reductions.
28592 if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
28593 Opcode == ISD::VECREDUCE_XOR) &&
28594 (N0.getOpcode() == ISD::SIGN_EXTEND ||
28595 N0.getOpcode() == ISD::ZERO_EXTEND ||
28596 N0.getOpcode() == ISD::ANY_EXTEND) &&
28597 TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
28598 SDValue Red = DAG.getNode(Opcode, SDLoc(N),
28600 N0.getOperand(0));
28601 return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
28602 }
28603 return SDValue();
28604}
28605
28606SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
28607 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
28608
28609 // FSUB -> FMA combines:
28610 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
28611 AddToWorklist(Fused.getNode());
28612 return Fused;
28613 }
28614 return SDValue();
28615}
28616
28617SDValue DAGCombiner::visitVPOp(SDNode *N) {
28618
28619 if (N->getOpcode() == ISD::VP_GATHER)
28620 if (SDValue SD = visitVPGATHER(N))
28621 return SD;
28622
28623 if (N->getOpcode() == ISD::VP_SCATTER)
28624 if (SDValue SD = visitVPSCATTER(N))
28625 return SD;
28626
28627 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
28628 if (SDValue SD = visitVP_STRIDED_LOAD(N))
28629 return SD;
28630
28631 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
28632 if (SDValue SD = visitVP_STRIDED_STORE(N))
28633 return SD;
28634
28635 // VP operations in which all vector elements are disabled - either by
28636 // determining that the mask is all false or that the EVL is 0 - can be
28637 // eliminated.
28638 bool AreAllEltsDisabled = false;
28639 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
28640 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
28641 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
28642 AreAllEltsDisabled |=
28643 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
28644
28645 // This is the only generic VP combine we support for now.
28646 if (!AreAllEltsDisabled) {
28647 switch (N->getOpcode()) {
28648 case ISD::VP_FADD:
28649 return visitVP_FADD(N);
28650 case ISD::VP_FSUB:
28651 return visitVP_FSUB(N);
28652 case ISD::VP_FMA:
28653 return visitFMA<VPMatchContext>(N);
28654 case ISD::VP_SELECT:
28655 return visitVP_SELECT(N);
28656 case ISD::VP_MUL:
28657 return visitMUL<VPMatchContext>(N);
28658 case ISD::VP_SUB:
28659 return foldSubCtlzNot<VPMatchContext>(N, DAG);
28660 default:
28661 break;
28662 }
28663 return SDValue();
28664 }
28665
28666 // Binary operations can be replaced by UNDEF.
28667 if (ISD::isVPBinaryOp(N->getOpcode()))
28668 return DAG.getUNDEF(N->getValueType(0));
28669
28670 // VP Memory operations can be replaced by either the chain (stores) or the
28671 // chain + undef (loads).
28672 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
28673 if (MemSD->writeMem())
28674 return MemSD->getChain();
28675 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
28676 }
28677
28678 // Reduction operations return the start operand when no elements are active.
28679 if (ISD::isVPReduction(N->getOpcode()))
28680 return N->getOperand(0);
28681
28682 return SDValue();
28683}
28684
28685SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
28686 SDValue Chain = N->getOperand(0);
28687 SDValue Ptr = N->getOperand(1);
28688 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28689
28690 // Check if the memory, where FP state is written to, is used only in a single
28691 // load operation.
28692 LoadSDNode *LdNode = nullptr;
28693 for (auto *U : Ptr->users()) {
28694 if (U == N)
28695 continue;
28696 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
28697 if (LdNode && LdNode != Ld)
28698 return SDValue();
28699 LdNode = Ld;
28700 continue;
28701 }
28702 return SDValue();
28703 }
28704 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28705 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28707 return SDValue();
28708
28709 // Check if the loaded value is used only in a store operation.
28710 StoreSDNode *StNode = nullptr;
28711 for (SDUse &U : LdNode->uses()) {
28712 if (U.getResNo() == 0) {
28713 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
28714 if (StNode)
28715 return SDValue();
28716 StNode = St;
28717 } else {
28718 return SDValue();
28719 }
28720 }
28721 }
28722 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28723 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28724 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28725 return SDValue();
28726
28727 // Create new node GET_FPENV_MEM, which uses the store address to write FP
28728 // environment.
28729 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
28730 StNode->getMemOperand());
28731 CombineTo(StNode, Res, false);
28732 return Res;
28733}
28734
28735SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
28736 SDValue Chain = N->getOperand(0);
28737 SDValue Ptr = N->getOperand(1);
28738 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
28739
28740 // Check if the address of FP state is used also in a store operation only.
28741 StoreSDNode *StNode = nullptr;
28742 for (auto *U : Ptr->users()) {
28743 if (U == N)
28744 continue;
28745 if (auto *St = dyn_cast<StoreSDNode>(U)) {
28746 if (StNode && StNode != St)
28747 return SDValue();
28748 StNode = St;
28749 continue;
28750 }
28751 return SDValue();
28752 }
28753 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
28754 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
28755 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
28756 return SDValue();
28757
28758 // Check if the stored value is loaded from some location and the loaded
28759 // value is used only in the store operation.
28760 SDValue StValue = StNode->getValue();
28761 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
28762 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
28763 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
28764 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
28765 return SDValue();
28766
28767 // Create new node SET_FPENV_MEM, which uses the load address to read FP
28768 // environment.
28769 SDValue Res =
28770 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
28771 LdNode->getMemOperand());
28772 return Res;
28773}
28774
28775/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
28776/// with the destination vector and a zero vector.
28777/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
28778/// vector_shuffle V, Zero, <0, 4, 2, 4>
28779SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
28780 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
28781
28782 EVT VT = N->getValueType(0);
28783 SDValue LHS = N->getOperand(0);
28784 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
28785 SDLoc DL(N);
28786
28787 // Make sure we're not running after operation legalization where it
28788 // may have custom lowered the vector shuffles.
28789 if (LegalOperations)
28790 return SDValue();
28791
28792 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
28793 return SDValue();
28794
28795 EVT RVT = RHS.getValueType();
28796 unsigned NumElts = RHS.getNumOperands();
28797
28798 // Attempt to create a valid clear mask, splitting the mask into
28799 // sub elements and checking to see if each is
28800 // all zeros or all ones - suitable for shuffle masking.
28801 auto BuildClearMask = [&](int Split) {
28802 int NumSubElts = NumElts * Split;
28803 int NumSubBits = RVT.getScalarSizeInBits() / Split;
28804
28805 SmallVector<int, 8> Indices;
28806 for (int i = 0; i != NumSubElts; ++i) {
28807 int EltIdx = i / Split;
28808 int SubIdx = i % Split;
28809 SDValue Elt = RHS.getOperand(EltIdx);
28810 // X & undef --> 0 (not undef). So this lane must be converted to choose
28811 // from the zero constant vector (same as if the element had all 0-bits).
28812 if (Elt.isUndef()) {
28813 Indices.push_back(i + NumSubElts);
28814 continue;
28815 }
28816
28817 std::optional<APInt> Bits = Elt->bitcastToAPInt();
28818 if (!Bits)
28819 return SDValue();
28820
28821 // Extract the sub element from the constant bit mask.
28822 if (DAG.getDataLayout().isBigEndian())
28823 *Bits =
28824 Bits->extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
28825 else
28826 *Bits = Bits->extractBits(NumSubBits, SubIdx * NumSubBits);
28827
28828 if (Bits->isAllOnes())
28829 Indices.push_back(i);
28830 else if (*Bits == 0)
28831 Indices.push_back(i + NumSubElts);
28832 else
28833 return SDValue();
28834 }
28835
28836 // Let's see if the target supports this vector_shuffle.
28837 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
28838 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
28839 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
28840 return SDValue();
28841
28842 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
28843 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
28844 DAG.getBitcast(ClearVT, LHS),
28845 Zero, Indices));
28846 };
28847
28848 // Determine maximum split level (byte level masking).
28849 int MaxSplit = 1;
28850 if (RVT.getScalarSizeInBits() % 8 == 0)
28851 MaxSplit = RVT.getScalarSizeInBits() / 8;
28852
28853 for (int Split = 1; Split <= MaxSplit; ++Split)
28854 if (RVT.getScalarSizeInBits() % Split == 0)
28855 if (SDValue S = BuildClearMask(Split))
28856 return S;
28857
28858 return SDValue();
28859}
28860
28861/// If a vector binop is performed on splat values, it may be profitable to
28862/// extract, scalarize, and insert/splat.
28864 const SDLoc &DL, bool LegalTypes) {
28865 SDValue N0 = N->getOperand(0);
28866 SDValue N1 = N->getOperand(1);
28867 unsigned Opcode = N->getOpcode();
28868 EVT VT = N->getValueType(0);
28869 EVT EltVT = VT.getVectorElementType();
28870 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28871
28872 // TODO: Remove/replace the extract cost check? If the elements are available
28873 // as scalars, then there may be no extract cost. Should we ask if
28874 // inserting a scalar back into a vector is cheap instead?
28875 int Index0, Index1;
28876 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28877 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
28878 // Extract element from splat_vector should be free.
28879 // TODO: use DAG.isSplatValue instead?
28880 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
28882 if (!Src0 || !Src1 || Index0 != Index1 ||
28883 Src0.getValueType().getVectorElementType() != EltVT ||
28884 Src1.getValueType().getVectorElementType() != EltVT ||
28885 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
28886 // If before type legalization, allow scalar types that will eventually be
28887 // made legal.
28889 Opcode, LegalTypes
28890 ? EltVT
28891 : TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
28892 return SDValue();
28893
28894 // FIXME: Type legalization can't handle illegal MULHS/MULHU.
28895 if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
28896 return SDValue();
28897
28898 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode()) {
28899 // All but one element should have an undef input, which will fold to a
28900 // constant or undef. Avoid splatting which would over-define potentially
28901 // undefined elements.
28902
28903 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
28904 // build_vec ..undef, (bo X, Y), undef...
28905 SmallVector<SDValue, 16> EltsX, EltsY, EltsResult;
28906 DAG.ExtractVectorElements(Src0, EltsX);
28907 DAG.ExtractVectorElements(Src1, EltsY);
28908
28909 for (auto [X, Y] : zip(EltsX, EltsY))
28910 EltsResult.push_back(DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()));
28911 return DAG.getBuildVector(VT, DL, EltsResult);
28912 }
28913
28914 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28915 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
28916 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
28917 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
28918
28919 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
28920 return DAG.getSplat(VT, DL, ScalarBO);
28921}
28922
28923/// Visit a vector cast operation, like FP_EXTEND.
28924SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
28925 EVT VT = N->getValueType(0);
28926 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
28927 EVT EltVT = VT.getVectorElementType();
28928 unsigned Opcode = N->getOpcode();
28929
28930 SDValue N0 = N->getOperand(0);
28931 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28932
28933 // TODO: promote operation might be also good here?
28934 int Index0;
28935 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
28936 if (Src0 &&
28937 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
28938 TLI.isExtractVecEltCheap(VT, Index0)) &&
28939 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
28940 TLI.preferScalarizeSplat(N)) {
28941 EVT SrcVT = N0.getValueType();
28942 EVT SrcEltVT = SrcVT.getVectorElementType();
28943 if (!LegalTypes || TLI.isTypeLegal(SrcEltVT)) {
28944 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
28945 SDValue Elt =
28946 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
28947 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
28948 if (VT.isScalableVector())
28949 return DAG.getSplatVector(VT, DL, ScalarBO);
28951 return DAG.getBuildVector(VT, DL, Ops);
28952 }
28953 }
28954
28955 return SDValue();
28956}
28957
28958/// Visit a binary vector operation, like ADD.
28959SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
28960 EVT VT = N->getValueType(0);
28961 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
28962
28963 SDValue LHS = N->getOperand(0);
28964 SDValue RHS = N->getOperand(1);
28965 unsigned Opcode = N->getOpcode();
28966 SDNodeFlags Flags = N->getFlags();
28967
28968 // Move unary shuffles with identical masks after a vector binop:
28969 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
28970 // --> shuffle (VBinOp A, B), Undef, Mask
28971 // This does not require type legality checks because we are creating the
28972 // same types of operations that are in the original sequence. We do have to
28973 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
28974 // though. This code is adapted from the identical transform in instcombine.
28975 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
28976 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
28977 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
28978 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
28979 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
28980 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
28981 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
28982 RHS.getOperand(0), Flags);
28983 SDValue UndefV = LHS.getOperand(1);
28984 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
28985 }
28986
28987 // Try to sink a splat shuffle after a binop with a uniform constant.
28988 // This is limited to cases where neither the shuffle nor the constant have
28989 // undefined elements because that could be poison-unsafe or inhibit
28990 // demanded elements analysis. It is further limited to not change a splat
28991 // of an inserted scalar because that may be optimized better by
28992 // load-folding or other target-specific behaviors.
28993 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
28994 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
28995 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
28996 // binop (splat X), (splat C) --> splat (binop X, C)
28997 SDValue X = Shuf0->getOperand(0);
28998 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
28999 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
29000 Shuf0->getMask());
29001 }
29002 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
29003 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
29004 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
29005 // binop (splat C), (splat X) --> splat (binop C, X)
29006 SDValue X = Shuf1->getOperand(0);
29007 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
29008 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
29009 Shuf1->getMask());
29010 }
29011 }
29012
29013 // The following pattern is likely to emerge with vector reduction ops. Moving
29014 // the binary operation ahead of insertion may allow using a narrower vector
29015 // instruction that has better performance than the wide version of the op:
29016 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
29017 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
29018 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
29019 LHS.getOperand(2) == RHS.getOperand(2) &&
29020 (LHS.hasOneUse() || RHS.hasOneUse())) {
29021 SDValue X = LHS.getOperand(1);
29022 SDValue Y = RHS.getOperand(1);
29023 SDValue Z = LHS.getOperand(2);
29024 EVT NarrowVT = X.getValueType();
29025 if (NarrowVT == Y.getValueType() &&
29026 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
29027 LegalOperations)) {
29028 // (binop undef, undef) may not return undef, so compute that result.
29029 SDValue VecC =
29030 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
29031 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
29032 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
29033 }
29034 }
29035
29036 // Make sure all but the first op are undef or constant.
29037 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
29038 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
29039 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
29040 return Op.isUndef() ||
29041 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
29042 });
29043 };
29044
29045 // The following pattern is likely to emerge with vector reduction ops. Moving
29046 // the binary operation ahead of the concat may allow using a narrower vector
29047 // instruction that has better performance than the wide version of the op:
29048 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
29049 // concat (VBinOp X, Y), VecC
29050 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
29051 (LHS.hasOneUse() || RHS.hasOneUse())) {
29052 EVT NarrowVT = LHS.getOperand(0).getValueType();
29053 if (NarrowVT == RHS.getOperand(0).getValueType() &&
29054 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
29055 unsigned NumOperands = LHS.getNumOperands();
29056 SmallVector<SDValue, 4> ConcatOps;
29057 for (unsigned i = 0; i != NumOperands; ++i) {
29058 // This constant fold for operands 1 and up.
29059 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
29060 RHS.getOperand(i)));
29061 }
29062
29063 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
29064 }
29065 }
29066
29067 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
29068 return V;
29069
29070 return SDValue();
29071}
29072
29073SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
29074 SDValue N2) {
29075 assert(N0.getOpcode() == ISD::SETCC &&
29076 "First argument must be a SetCC node!");
29077
29078 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
29079 cast<CondCodeSDNode>(N0.getOperand(2))->get());
29080
29081 // If we got a simplified select_cc node back from SimplifySelectCC, then
29082 // break it down into a new SETCC node, and a new SELECT node, and then return
29083 // the SELECT node, since we were called with a SELECT node.
29084 if (SCC.getNode()) {
29085 // Check to see if we got a select_cc back (to turn into setcc/select).
29086 // Otherwise, just return whatever node we got back, like fabs.
29087 if (SCC.getOpcode() == ISD::SELECT_CC) {
29088 const SDNodeFlags Flags = N0->getFlags();
29089 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
29090 N0.getValueType(),
29091 SCC.getOperand(0), SCC.getOperand(1),
29092 SCC.getOperand(4), Flags);
29093 AddToWorklist(SETCC.getNode());
29094 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
29095 SCC.getOperand(2), SCC.getOperand(3), Flags);
29096 }
29097
29098 return SCC;
29099 }
29100 return SDValue();
29101}
29102
29103/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
29104/// being selected between, see if we can simplify the select. Callers of this
29105/// should assume that TheSelect is deleted if this returns true. As such, they
29106/// should return the appropriate thing (e.g. the node) back to the top-level of
29107/// the DAG combiner loop to avoid it being looked at.
29108bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
29109 SDValue RHS) {
29110 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
29111 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
29112 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
29113 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
29114 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
29115 SDValue Sqrt = RHS;
29116 ISD::CondCode CC;
29117 SDValue CmpLHS;
29118 const ConstantFPSDNode *Zero = nullptr;
29119
29120 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
29121 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
29122 CmpLHS = TheSelect->getOperand(0);
29123 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
29124 } else {
29125 // SELECT or VSELECT
29126 SDValue Cmp = TheSelect->getOperand(0);
29127 if (Cmp.getOpcode() == ISD::SETCC) {
29128 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
29129 CmpLHS = Cmp.getOperand(0);
29130 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
29131 }
29132 }
29133 if (Zero && Zero->isZero() &&
29134 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
29135 CC == ISD::SETULT || CC == ISD::SETLT)) {
29136 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
29137 CombineTo(TheSelect, Sqrt);
29138 return true;
29139 }
29140 }
29141 }
29142 // Cannot simplify select with vector condition
29143 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
29144
29145 // If this is a select from two identical things, try to pull the operation
29146 // through the select.
29147 if (LHS.getOpcode() != RHS.getOpcode() ||
29148 !LHS.hasOneUse() || !RHS.hasOneUse())
29149 return false;
29150
29151 // If this is a load and the token chain is identical, replace the select
29152 // of two loads with a load through a select of the address to load from.
29153 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
29154 // constants have been dropped into the constant pool.
29155 if (LHS.getOpcode() == ISD::LOAD) {
29156 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
29157 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
29158
29159 // Token chains must be identical.
29160 if (LHS.getOperand(0) != RHS.getOperand(0) ||
29161 // Do not let this transformation reduce the number of volatile loads.
29162 // Be conservative for atomics for the moment
29163 // TODO: This does appear to be legal for unordered atomics (see D66309)
29164 !LLD->isSimple() || !RLD->isSimple() ||
29165 // FIXME: If either is a pre/post inc/dec load,
29166 // we'd need to split out the address adjustment.
29167 LLD->isIndexed() || RLD->isIndexed() ||
29168 // If this is an EXTLOAD, the VT's must match.
29169 LLD->getMemoryVT() != RLD->getMemoryVT() ||
29170 // If this is an EXTLOAD, the kind of extension must match.
29171 (LLD->getExtensionType() != RLD->getExtensionType() &&
29172 // The only exception is if one of the extensions is anyext.
29173 LLD->getExtensionType() != ISD::EXTLOAD &&
29174 RLD->getExtensionType() != ISD::EXTLOAD) ||
29175 // FIXME: this discards src value information. This is
29176 // over-conservative. It would be beneficial to be able to remember
29177 // both potential memory locations. Since we are discarding
29178 // src value info, don't do the transformation if the memory
29179 // locations are not in the same address space.
29180 LLD->getPointerInfo().getAddrSpace() !=
29181 RLD->getPointerInfo().getAddrSpace() ||
29182 // We can't produce a CMOV of a TargetFrameIndex since we won't
29183 // generate the address generation required.
29186 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
29187 LLD->getBasePtr().getValueType()))
29188 return false;
29189
29190 // The loads must not depend on one another.
29191 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
29192 return false;
29193
29194 // Check that the select condition doesn't reach either load. If so,
29195 // folding this will induce a cycle into the DAG. If not, this is safe to
29196 // xform, so create a select of the addresses.
29197
29198 SmallPtrSet<const SDNode *, 32> Visited;
29200
29201 // Always fail if LLD and RLD are not independent. TheSelect is a
29202 // predecessor to all Nodes in question so we need not search past it.
29203
29204 Visited.insert(TheSelect);
29205 Worklist.push_back(LLD);
29206 Worklist.push_back(RLD);
29207
29208 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
29209 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
29210 return false;
29211
29212 SDValue Addr;
29213 if (TheSelect->getOpcode() == ISD::SELECT) {
29214 // We cannot do this optimization if any pair of {RLD, LLD} is a
29215 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
29216 // Loads, we only need to check if CondNode is a successor to one of the
29217 // loads. We can further avoid this if there's no use of their chain
29218 // value.
29219 SDNode *CondNode = TheSelect->getOperand(0).getNode();
29220 Worklist.push_back(CondNode);
29221
29222 if ((LLD->hasAnyUseOfValue(1) &&
29223 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
29224 (RLD->hasAnyUseOfValue(1) &&
29225 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
29226 return false;
29227
29228 Addr = DAG.getSelect(SDLoc(TheSelect),
29229 LLD->getBasePtr().getValueType(),
29230 TheSelect->getOperand(0), LLD->getBasePtr(),
29231 RLD->getBasePtr());
29232 } else { // Otherwise SELECT_CC
29233 // We cannot do this optimization if any pair of {RLD, LLD} is a
29234 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
29235 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
29236 // one of the loads. We can further avoid this if there's no use of their
29237 // chain value.
29238
29239 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
29240 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
29241 Worklist.push_back(CondLHS);
29242 Worklist.push_back(CondRHS);
29243
29244 if ((LLD->hasAnyUseOfValue(1) &&
29245 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
29246 (RLD->hasAnyUseOfValue(1) &&
29247 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
29248 return false;
29249
29250 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
29251 LLD->getBasePtr().getValueType(),
29252 TheSelect->getOperand(0),
29253 TheSelect->getOperand(1),
29254 LLD->getBasePtr(), RLD->getBasePtr(),
29255 TheSelect->getOperand(4));
29256 }
29257
29258 SDValue Load;
29259 // It is safe to replace the two loads if they have different alignments,
29260 // but the new load must be the minimum (most restrictive) alignment of the
29261 // inputs.
29262 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
29263 unsigned AddrSpace = LLD->getAddressSpace();
29264 assert(AddrSpace == RLD->getAddressSpace());
29265
29266 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
29267 if (!RLD->isInvariant())
29268 MMOFlags &= ~MachineMemOperand::MOInvariant;
29269 if (!RLD->isDereferenceable())
29270 MMOFlags &= ~MachineMemOperand::MODereferenceable;
29271 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
29272 // FIXME: Discards pointer and AA info.
29273 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
29274 LLD->getChain(), Addr, MachinePointerInfo(AddrSpace),
29275 Alignment, MMOFlags);
29276 } else {
29277 // FIXME: Discards pointer and AA info.
29278 Load = DAG.getExtLoad(
29280 : LLD->getExtensionType(),
29281 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
29282 MachinePointerInfo(AddrSpace), LLD->getMemoryVT(), Alignment,
29283 MMOFlags);
29284 }
29285
29286 // Users of the select now use the result of the load.
29287 CombineTo(TheSelect, Load);
29288
29289 // Users of the old loads now use the new load's chain. We know the
29290 // old-load value is dead now.
29291 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
29292 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
29293 return true;
29294 }
29295
29296 return false;
29297}
29298
29299/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
29300/// bitwise 'and'.
29301SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
29302 SDValue N1, SDValue N2, SDValue N3,
29303 ISD::CondCode CC) {
29304 // If this is a select where the false operand is zero and the compare is a
29305 // check of the sign bit, see if we can perform the "gzip trick":
29306 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
29307 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
29308 EVT XType = N0.getValueType();
29309 EVT AType = N2.getValueType();
29310 if (!isNullConstant(N3) || !XType.bitsGE(AType))
29311 return SDValue();
29312
29313 // If the comparison is testing for a positive value, we have to invert
29314 // the sign bit mask, so only do that transform if the target has a bitwise
29315 // 'and not' instruction (the invert is free).
29316 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
29317 // (X > -1) ? A : 0
29318 // (X > 0) ? X : 0 <-- This is canonical signed max.
29319 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
29320 return SDValue();
29321 } else if (CC == ISD::SETLT) {
29322 // (X < 0) ? A : 0
29323 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
29324 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
29325 return SDValue();
29326 } else {
29327 return SDValue();
29328 }
29329
29330 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
29331 // constant.
29332 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
29333 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
29334 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
29335 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
29336 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
29337 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
29338 AddToWorklist(Shift.getNode());
29339
29340 if (XType.bitsGT(AType)) {
29341 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
29342 AddToWorklist(Shift.getNode());
29343 }
29344
29345 if (CC == ISD::SETGT)
29346 Shift = DAG.getNOT(DL, Shift, AType);
29347
29348 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
29349 }
29350 }
29351
29352 unsigned ShCt = XType.getSizeInBits() - 1;
29353 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
29354 return SDValue();
29355
29356 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
29357 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
29358 AddToWorklist(Shift.getNode());
29359
29360 if (XType.bitsGT(AType)) {
29361 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
29362 AddToWorklist(Shift.getNode());
29363 }
29364
29365 if (CC == ISD::SETGT)
29366 Shift = DAG.getNOT(DL, Shift, AType);
29367
29368 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
29369}
29370
29371// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
29372SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
29373 SDValue N0 = N->getOperand(0);
29374 SDValue N1 = N->getOperand(1);
29375 SDValue N2 = N->getOperand(2);
29376 SDLoc DL(N);
29377
29378 unsigned BinOpc = N1.getOpcode();
29379 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
29380 (N1.getResNo() != N2.getResNo()))
29381 return SDValue();
29382
29383 // The use checks are intentionally on SDNode because we may be dealing
29384 // with opcodes that produce more than one SDValue.
29385 // TODO: Do we really need to check N0 (the condition operand of the select)?
29386 // But removing that clause could cause an infinite loop...
29387 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
29388 return SDValue();
29389
29390 // Binops may include opcodes that return multiple values, so all values
29391 // must be created/propagated from the newly created binops below.
29392 SDVTList OpVTs = N1->getVTList();
29393
29394 // Fold select(cond, binop(x, y), binop(z, y))
29395 // --> binop(select(cond, x, z), y)
29396 if (N1.getOperand(1) == N2.getOperand(1)) {
29397 SDValue N10 = N1.getOperand(0);
29398 SDValue N20 = N2.getOperand(0);
29399 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
29400 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
29401 SDValue NewBinOp =
29402 DAG.getNode(BinOpc, DL, OpVTs, {NewSel, N1.getOperand(1)}, Flags);
29403 return SDValue(NewBinOp.getNode(), N1.getResNo());
29404 }
29405
29406 // Fold select(cond, binop(x, y), binop(x, z))
29407 // --> binop(x, select(cond, y, z))
29408 if (N1.getOperand(0) == N2.getOperand(0)) {
29409 SDValue N11 = N1.getOperand(1);
29410 SDValue N21 = N2.getOperand(1);
29411 // Second op VT might be different (e.g. shift amount type)
29412 if (N11.getValueType() == N21.getValueType()) {
29413 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
29414 SDNodeFlags Flags = N1->getFlags() & N2->getFlags();
29415 SDValue NewBinOp =
29416 DAG.getNode(BinOpc, DL, OpVTs, {N1.getOperand(0), NewSel}, Flags);
29417 return SDValue(NewBinOp.getNode(), N1.getResNo());
29418 }
29419 }
29420
29421 // TODO: Handle isCommutativeBinOp patterns as well?
29422 return SDValue();
29423}
29424
29425// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
29426SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
29427 SDValue N0 = N->getOperand(0);
29428 EVT VT = N->getValueType(0);
29429 bool IsFabs = N->getOpcode() == ISD::FABS;
29430 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
29431
29432 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
29433 return SDValue();
29434
29435 SDValue Int = N0.getOperand(0);
29436 EVT IntVT = Int.getValueType();
29437
29438 // The operand to cast should be integer.
29439 if (!IntVT.isInteger() || IntVT.isVector())
29440 return SDValue();
29441
29442 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
29443 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
29444 APInt SignMask;
29445 if (N0.getValueType().isVector()) {
29446 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
29447 // 0x7f...) per element and splat it.
29449 if (IsFabs)
29450 SignMask = ~SignMask;
29451 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
29452 } else {
29453 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
29454 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
29455 if (IsFabs)
29456 SignMask = ~SignMask;
29457 }
29458 SDLoc DL(N0);
29459 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
29460 DAG.getConstant(SignMask, DL, IntVT));
29461 AddToWorklist(Int.getNode());
29462 return DAG.getBitcast(VT, Int);
29463}
29464
29465/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
29466/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
29467/// in it. This may be a win when the constant is not otherwise available
29468/// because it replaces two constant pool loads with one.
29469SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
29470 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
29471 ISD::CondCode CC) {
29473 return SDValue();
29474
29475 // If we are before legalize types, we want the other legalization to happen
29476 // first (for example, to avoid messing with soft float).
29477 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
29478 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
29479 EVT VT = N2.getValueType();
29480 if (!TV || !FV || !TLI.isTypeLegal(VT))
29481 return SDValue();
29482
29483 // If a constant can be materialized without loads, this does not make sense.
29485 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
29486 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
29487 return SDValue();
29488
29489 // If both constants have multiple uses, then we won't need to do an extra
29490 // load. The values are likely around in registers for other users.
29491 if (!TV->hasOneUse() && !FV->hasOneUse())
29492 return SDValue();
29493
29494 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
29495 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
29496 Type *FPTy = Elts[0]->getType();
29497 const DataLayout &TD = DAG.getDataLayout();
29498
29499 // Create a ConstantArray of the two constants.
29500 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
29501 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
29502 TD.getPrefTypeAlign(FPTy));
29503 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
29504
29505 // Get offsets to the 0 and 1 elements of the array, so we can select between
29506 // them.
29507 SDValue Zero = DAG.getIntPtrConstant(0, DL);
29508 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
29509 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
29510 SDValue Cond =
29511 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
29512 AddToWorklist(Cond.getNode());
29513 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
29514 AddToWorklist(CstOffset.getNode());
29515 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
29516 AddToWorklist(CPIdx.getNode());
29517 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
29519 DAG.getMachineFunction()), Alignment);
29520}
29521
29522/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
29523/// where 'cond' is the comparison specified by CC.
29524SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
29525 SDValue N2, SDValue N3, ISD::CondCode CC,
29526 bool NotExtCompare) {
29527 // (x ? y : y) -> y.
29528 if (N2 == N3) return N2;
29529
29530 EVT CmpOpVT = N0.getValueType();
29531 EVT CmpResVT = getSetCCResultType(CmpOpVT);
29532 EVT VT = N2.getValueType();
29533 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
29534 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
29535 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
29536
29537 // Determine if the condition we're dealing with is constant.
29538 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
29539 AddToWorklist(SCC.getNode());
29540 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
29541 // fold select_cc true, x, y -> x
29542 // fold select_cc false, x, y -> y
29543 return !(SCCC->isZero()) ? N2 : N3;
29544 }
29545 }
29546
29547 if (SDValue V =
29548 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
29549 return V;
29550
29551 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
29552 return V;
29553
29554 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
29555 // where y is has a single bit set.
29556 // A plaintext description would be, we can turn the SELECT_CC into an AND
29557 // when the condition can be materialized as an all-ones register. Any
29558 // single bit-test can be materialized as an all-ones register with
29559 // shift-left and shift-right-arith.
29560 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
29561 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
29562 SDValue AndLHS = N0->getOperand(0);
29563 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
29564 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
29565 // Shift the tested bit over the sign bit.
29566 const APInt &AndMask = ConstAndRHS->getAPIntValue();
29567 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
29568 unsigned ShCt = AndMask.getBitWidth() - 1;
29569 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
29570 SDLoc(AndLHS));
29571 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
29572
29573 // Now arithmetic right shift it all the way over, so the result is
29574 // either all-ones, or zero.
29575 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
29576 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
29577
29578 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
29579 }
29580 }
29581 }
29582
29583 // fold select C, 16, 0 -> shl C, 4
29584 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
29585 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
29586
29587 if ((Fold || Swap) &&
29588 TLI.getBooleanContents(CmpOpVT) ==
29590 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
29592
29593 if (Swap) {
29594 CC = ISD::getSetCCInverse(CC, CmpOpVT);
29595 std::swap(N2C, N3C);
29596 }
29597
29598 // If the caller doesn't want us to simplify this into a zext of a compare,
29599 // don't do it.
29600 if (NotExtCompare && N2C->isOne())
29601 return SDValue();
29602
29603 SDValue Temp, SCC;
29604 // zext (setcc n0, n1)
29605 if (LegalTypes) {
29606 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
29607 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
29608 } else {
29609 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
29610 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
29611 }
29612
29613 AddToWorklist(SCC.getNode());
29614 AddToWorklist(Temp.getNode());
29615
29616 if (N2C->isOne())
29617 return Temp;
29618
29619 unsigned ShCt = N2C->getAPIntValue().logBase2();
29620 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
29621 return SDValue();
29622
29623 // shl setcc result by log2 n2c
29624 return DAG.getNode(
29625 ISD::SHL, DL, N2.getValueType(), Temp,
29626 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
29627 }
29628
29629 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
29630 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
29631 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
29632 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
29633 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
29634 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
29635 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
29636 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
29637 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
29638 SDValue ValueOnZero = N2;
29639 SDValue Count = N3;
29640 // If the condition is NE instead of E, swap the operands.
29641 if (CC == ISD::SETNE)
29642 std::swap(ValueOnZero, Count);
29643 // Check if the value on zero is a constant equal to the bits in the type.
29644 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
29645 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
29646 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
29647 // legal, combine to just cttz.
29648 if ((Count.getOpcode() == ISD::CTTZ ||
29649 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
29650 N0 == Count.getOperand(0) &&
29651 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
29652 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
29653 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
29654 // legal, combine to just ctlz.
29655 if ((Count.getOpcode() == ISD::CTLZ ||
29656 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
29657 N0 == Count.getOperand(0) &&
29658 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
29659 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
29660 }
29661 }
29662 }
29663
29664 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
29665 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
29666 if (!NotExtCompare && N1C && N2C && N3C &&
29667 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
29668 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
29669 (N1C->isZero() && CC == ISD::SETLT)) &&
29670 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
29671 SDValue ASHR =
29672 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29674 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29675 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29676 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
29677 }
29678
29679 // Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1
29680 if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() &&
29681 N2C->isOne() && N3C->isAllOnes() &&
29682 !TLI.shouldAvoidTransformToShift(CmpOpVT,
29683 CmpOpVT.getScalarSizeInBits() - 1)) {
29684 SDValue ASHR =
29685 DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
29687 CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL));
29688 return DAG.getNode(ISD::OR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT),
29689 DAG.getConstant(1, DL, VT));
29690 }
29691
29692 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29693 return S;
29694 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
29695 return S;
29696 if (SDValue ABD = foldSelectToABD(N0, N1, N2, N3, CC, DL))
29697 return ABD;
29698
29699 return SDValue();
29700}
29701
29703 const TargetLowering &TLI) {
29704 // Match a pattern such as:
29705 // (X | (X >> C0) | (X >> C1) | ...) & Mask
29706 // This extracts contiguous parts of X and ORs them together before comparing.
29707 // We can optimize this so that we directly check (X & SomeMask) instead,
29708 // eliminating the shifts.
29709
29710 EVT VT = Root.getValueType();
29711
29712 // TODO: Support vectors?
29713 if (!VT.isScalarInteger() || Root.getOpcode() != ISD::AND)
29714 return SDValue();
29715
29716 SDValue N0 = Root.getOperand(0);
29717 SDValue N1 = Root.getOperand(1);
29718
29719 if (N0.getOpcode() != ISD::OR || !isa<ConstantSDNode>(N1))
29720 return SDValue();
29721
29722 APInt RootMask = cast<ConstantSDNode>(N1)->getAsAPIntVal();
29723
29724 SDValue Src;
29725 const auto IsSrc = [&](SDValue V) {
29726 if (!Src) {
29727 Src = V;
29728 return true;
29729 }
29730
29731 return Src == V;
29732 };
29733
29734 SmallVector<SDValue> Worklist = {N0};
29735 APInt PartsMask(VT.getSizeInBits(), 0);
29736 while (!Worklist.empty()) {
29737 SDValue V = Worklist.pop_back_val();
29738 if (!V.hasOneUse() && (Src && Src != V))
29739 return SDValue();
29740
29741 if (V.getOpcode() == ISD::OR) {
29742 Worklist.push_back(V.getOperand(0));
29743 Worklist.push_back(V.getOperand(1));
29744 continue;
29745 }
29746
29747 if (V.getOpcode() == ISD::SRL) {
29748 SDValue ShiftSrc = V.getOperand(0);
29749 SDValue ShiftAmt = V.getOperand(1);
29750
29751 if (!IsSrc(ShiftSrc) || !isa<ConstantSDNode>(ShiftAmt))
29752 return SDValue();
29753
29754 auto ShiftAmtVal = cast<ConstantSDNode>(ShiftAmt)->getAsZExtVal();
29755 if (ShiftAmtVal > RootMask.getBitWidth())
29756 return SDValue();
29757
29758 PartsMask |= (RootMask << ShiftAmtVal);
29759 continue;
29760 }
29761
29762 if (IsSrc(V)) {
29763 PartsMask |= RootMask;
29764 continue;
29765 }
29766
29767 return SDValue();
29768 }
29769
29770 if (!Src)
29771 return SDValue();
29772
29773 SDLoc DL(Root);
29774 return DAG.getNode(ISD::AND, DL, VT,
29775 {Src, DAG.getConstant(PartsMask, DL, VT)});
29776}
29777
29778/// This is a stub for TargetLowering::SimplifySetCC.
29779SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
29780 ISD::CondCode Cond, const SDLoc &DL,
29781 bool foldBooleans) {
29782 TargetLowering::DAGCombinerInfo
29783 DagCombineInfo(DAG, Level, false, this);
29784 if (SDValue C =
29785 TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL))
29786 return C;
29787
29789 isNullConstant(N1)) {
29790
29791 if (SDValue Res = matchMergedBFX(N0, DAG, TLI))
29792 return DAG.getSetCC(DL, VT, Res, N1, Cond);
29793 }
29794
29795 return SDValue();
29796}
29797
29798/// Given an ISD::SDIV node expressing a divide by constant, return
29799/// a DAG expression to select that will generate the same value by multiplying
29800/// by a magic number.
29801/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29802SDValue DAGCombiner::BuildSDIV(SDNode *N) {
29803 // when optimising for minimum size, we don't want to expand a div to a mul
29804 // and a shift.
29806 return SDValue();
29807
29809 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29810 for (SDNode *N : Built)
29811 AddToWorklist(N);
29812 return S;
29813 }
29814
29815 return SDValue();
29816}
29817
29818/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
29819/// DAG expression that will generate the same value by right shifting.
29820SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
29821 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29822 if (!C)
29823 return SDValue();
29824
29825 // Avoid division by zero.
29826 if (C->isZero())
29827 return SDValue();
29828
29830 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
29831 for (SDNode *N : Built)
29832 AddToWorklist(N);
29833 return S;
29834 }
29835
29836 return SDValue();
29837}
29838
29839/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
29840/// expression that will generate the same value by multiplying by a magic
29841/// number.
29842/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
29843SDValue DAGCombiner::BuildUDIV(SDNode *N) {
29844 // when optimising for minimum size, we don't want to expand a div to a mul
29845 // and a shift.
29847 return SDValue();
29848
29850 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
29851 for (SDNode *N : Built)
29852 AddToWorklist(N);
29853 return S;
29854 }
29855
29856 return SDValue();
29857}
29858
29859/// Given an ISD::SREM node expressing a remainder by constant power of 2,
29860/// return a DAG expression that will generate the same value.
29861SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
29862 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
29863 if (!C)
29864 return SDValue();
29865
29866 // Avoid division by zero.
29867 if (C->isZero())
29868 return SDValue();
29869
29871 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
29872 for (SDNode *N : Built)
29873 AddToWorklist(N);
29874 return S;
29875 }
29876
29877 return SDValue();
29878}
29879
29880// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
29881//
29882// Returns the node that represents `Log2(Op)`. This may create a new node. If
29883// we are unable to compute `Log2(Op)` its return `SDValue()`.
29884//
29885// All nodes will be created at `DL` and the output will be of type `VT`.
29886//
29887// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
29888// `AssumeNonZero` if this function should simply assume (not require proving
29889// `Op` is non-zero).
29891 SDValue Op, unsigned Depth,
29892 bool AssumeNonZero) {
29893 assert(VT.isInteger() && "Only integer types are supported!");
29894
29895 auto PeekThroughCastsAndTrunc = [](SDValue V) {
29896 while (true) {
29897 switch (V.getOpcode()) {
29898 case ISD::TRUNCATE:
29899 case ISD::ZERO_EXTEND:
29900 V = V.getOperand(0);
29901 break;
29902 default:
29903 return V;
29904 }
29905 }
29906 };
29907
29908 if (VT.isScalableVector())
29909 return SDValue();
29910
29911 Op = PeekThroughCastsAndTrunc(Op);
29912
29913 // Helper for determining whether a value is a power-2 constant scalar or a
29914 // vector of such elements.
29915 SmallVector<APInt> Pow2Constants;
29916 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
29917 if (C->isZero() || C->isOpaque())
29918 return false;
29919 // TODO: We may also be able to support negative powers of 2 here.
29920 if (C->getAPIntValue().isPowerOf2()) {
29921 Pow2Constants.emplace_back(C->getAPIntValue());
29922 return true;
29923 }
29924 return false;
29925 };
29926
29927 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo, /*AllowUndefs=*/false,
29928 /*AllowTruncation=*/true)) {
29929 if (!VT.isVector())
29930 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
29931 // We need to create a build vector
29932 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
29933 return DAG.getSplat(VT, DL,
29934 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
29935 VT.getScalarType()));
29936 SmallVector<SDValue> Log2Ops;
29937 for (const APInt &Pow2 : Pow2Constants)
29938 Log2Ops.emplace_back(
29939 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
29940 return DAG.getBuildVector(VT, DL, Log2Ops);
29941 }
29942
29943 if (Depth >= DAG.MaxRecursionDepth)
29944 return SDValue();
29945
29946 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
29947 // Peek through zero extend. We can't peek through truncates since this
29948 // function is called on a shift amount. We must ensure that all of the bits
29949 // above the original shift amount are zeroed by this function.
29950 while (ToCast.getOpcode() == ISD::ZERO_EXTEND)
29951 ToCast = ToCast.getOperand(0);
29952 EVT CurVT = ToCast.getValueType();
29953 if (NewVT == CurVT)
29954 return ToCast;
29955
29956 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
29957 return DAG.getBitcast(NewVT, ToCast);
29958
29959 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
29960 };
29961
29962 // log2(X << Y) -> log2(X) + Y
29963 if (Op.getOpcode() == ISD::SHL) {
29964 // 1 << Y and X nuw/nsw << Y are all non-zero.
29965 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
29966 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
29967 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
29968 Depth + 1, AssumeNonZero))
29969 return DAG.getNode(ISD::ADD, DL, VT, LogX,
29970 CastToVT(VT, Op.getOperand(1)));
29971 }
29972
29973 // c ? X : Y -> c ? Log2(X) : Log2(Y)
29974 SDValue Cond, TVal, FVal;
29976 m_Value(FVal))))) {
29977 if (SDValue LogX =
29978 takeInexpensiveLog2(DAG, DL, VT, TVal, Depth + 1, AssumeNonZero))
29979 if (SDValue LogY =
29980 takeInexpensiveLog2(DAG, DL, VT, FVal, Depth + 1, AssumeNonZero))
29981 return DAG.getSelect(DL, VT, Cond, LogX, LogY);
29982 }
29983
29984 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
29985 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
29986 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
29987 Op.hasOneUse()) {
29988 // Use AssumeNonZero as false here. Otherwise we can hit case where
29989 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
29990 if (SDValue LogX =
29991 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
29992 /*AssumeNonZero*/ false))
29993 if (SDValue LogY =
29994 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
29995 /*AssumeNonZero*/ false))
29996 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
29997 }
29998
29999 return SDValue();
30000}
30001
30002/// Determines the LogBase2 value for a non-null input value using the
30003/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
30004SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
30005 bool KnownNonZero, bool InexpensiveOnly,
30006 std::optional<EVT> OutVT) {
30007 EVT VT = OutVT ? *OutVT : V.getValueType();
30008 SDValue InexpensiveLogBase2 =
30009 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
30010 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
30011 return InexpensiveLogBase2;
30012
30013 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
30014 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
30015 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
30016 return LogBase2;
30017}
30018
30019/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
30020/// For the reciprocal, we need to find the zero of the function:
30021/// F(X) = 1/X - A [which has a zero at X = 1/A]
30022/// =>
30023/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
30024/// does not require additional intermediate precision]
30025/// For the last iteration, put numerator N into it to gain more precision:
30026/// Result = N X_i + X_i (N - N A X_i)
30027SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
30028 SDNodeFlags Flags) {
30029 if (LegalDAG)
30030 return SDValue();
30031
30032 // TODO: Handle extended types?
30033 EVT VT = Op.getValueType();
30034 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
30035 VT.getScalarType() != MVT::f64)
30036 return SDValue();
30037
30038 // If estimates are explicitly disabled for this function, we're done.
30039 MachineFunction &MF = DAG.getMachineFunction();
30040 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
30041 if (Enabled == TLI.ReciprocalEstimate::Disabled)
30042 return SDValue();
30043
30044 // Estimates may be explicitly enabled for this type with a custom number of
30045 // refinement steps.
30046 int Iterations = TLI.getDivRefinementSteps(VT, MF);
30047 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
30048 AddToWorklist(Est.getNode());
30049
30050 SDLoc DL(Op);
30051 if (Iterations) {
30052 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
30053
30054 // Newton iterations: Est = Est + Est (N - Arg * Est)
30055 // If this is the last iteration, also multiply by the numerator.
30056 for (int i = 0; i < Iterations; ++i) {
30057 SDValue MulEst = Est;
30058
30059 if (i == Iterations - 1) {
30060 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
30061 AddToWorklist(MulEst.getNode());
30062 }
30063
30064 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
30065 AddToWorklist(NewEst.getNode());
30066
30067 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
30068 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
30069 AddToWorklist(NewEst.getNode());
30070
30071 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
30072 AddToWorklist(NewEst.getNode());
30073
30074 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
30075 AddToWorklist(Est.getNode());
30076 }
30077 } else {
30078 // If no iterations are available, multiply with N.
30079 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
30080 AddToWorklist(Est.getNode());
30081 }
30082
30083 return Est;
30084 }
30085
30086 return SDValue();
30087}
30088
30089/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
30090/// For the reciprocal sqrt, we need to find the zero of the function:
30091/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
30092/// =>
30093/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
30094/// As a result, we precompute A/2 prior to the iteration loop.
30095SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
30096 unsigned Iterations, bool Reciprocal) {
30097 EVT VT = Arg.getValueType();
30098 SDLoc DL(Arg);
30099 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
30100
30101 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
30102 // this entire sequence requires only one FP constant.
30103 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
30104 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
30105
30106 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
30107 for (unsigned i = 0; i < Iterations; ++i) {
30108 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
30109 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
30110 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
30111 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
30112 }
30113
30114 // If non-reciprocal square root is requested, multiply the result by Arg.
30115 if (!Reciprocal)
30116 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
30117
30118 return Est;
30119}
30120
30121/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
30122/// For the reciprocal sqrt, we need to find the zero of the function:
30123/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
30124/// =>
30125/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
30126SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
30127 unsigned Iterations, bool Reciprocal) {
30128 EVT VT = Arg.getValueType();
30129 SDLoc DL(Arg);
30130 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
30131 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
30132
30133 // This routine must enter the loop below to work correctly
30134 // when (Reciprocal == false).
30135 assert(Iterations > 0);
30136
30137 // Newton iterations for reciprocal square root:
30138 // E = (E * -0.5) * ((A * E) * E + -3.0)
30139 for (unsigned i = 0; i < Iterations; ++i) {
30140 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est);
30141 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est);
30142 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree);
30143
30144 // When calculating a square root at the last iteration build:
30145 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
30146 // (notice a common subexpression)
30147 SDValue LHS;
30148 if (Reciprocal || (i + 1) < Iterations) {
30149 // RSQRT: LHS = (E * -0.5)
30150 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
30151 } else {
30152 // SQRT: LHS = (A * E) * -0.5
30153 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf);
30154 }
30155
30156 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS);
30157 }
30158
30159 return Est;
30160}
30161
30162/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
30163/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
30164/// Op can be zero.
30165SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) {
30166 if (LegalDAG)
30167 return SDValue();
30168
30169 // TODO: Handle extended types?
30170 EVT VT = Op.getValueType();
30171 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
30172 VT.getScalarType() != MVT::f64)
30173 return SDValue();
30174
30175 // If estimates are explicitly disabled for this function, we're done.
30176 MachineFunction &MF = DAG.getMachineFunction();
30177 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
30178 if (Enabled == TLI.ReciprocalEstimate::Disabled)
30179 return SDValue();
30180
30181 // Estimates may be explicitly enabled for this type with a custom number of
30182 // refinement steps.
30183 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
30184
30185 bool UseOneConstNR = false;
30186 if (SDValue Est =
30187 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
30188 Reciprocal)) {
30189 AddToWorklist(Est.getNode());
30190
30191 if (Iterations > 0)
30192 Est = UseOneConstNR
30193 ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal)
30194 : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal);
30195 if (!Reciprocal) {
30196 SDLoc DL(Op);
30197 // Try the target specific test first.
30198 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
30199
30200 // The estimate is now completely wrong if the input was exactly 0.0 or
30201 // possibly a denormal. Force the answer to 0.0 or value provided by
30202 // target for those cases.
30203 Est = DAG.getSelect(DL, VT, Test,
30204 TLI.getSqrtResultForDenormInput(Op, DAG), Est);
30205 }
30206 return Est;
30207 }
30208
30209 return SDValue();
30210}
30211
30212SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) {
30213 return buildSqrtEstimateImpl(Op, true);
30214}
30215
30216SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) {
30217 return buildSqrtEstimateImpl(Op, false);
30218}
30219
30220/// Return true if there is any possibility that the two addresses overlap.
30221bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
30222
30223 struct MemUseCharacteristics {
30224 bool IsVolatile;
30225 bool IsAtomic;
30227 int64_t Offset;
30228 LocationSize NumBytes;
30229 MachineMemOperand *MMO;
30230 };
30231
30232 auto getCharacteristics = [this](SDNode *N) -> MemUseCharacteristics {
30233 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
30234 int64_t Offset = 0;
30235 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
30236 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
30237 : (LSN->getAddressingMode() == ISD::PRE_DEC)
30238 ? -1 * C->getSExtValue()
30239 : 0;
30240 TypeSize Size = LSN->getMemoryVT().getStoreSize();
30241 return {LSN->isVolatile(), LSN->isAtomic(),
30242 LSN->getBasePtr(), Offset /*base offset*/,
30243 LocationSize::precise(Size), LSN->getMemOperand()};
30244 }
30245 if (const auto *LN = cast<LifetimeSDNode>(N)) {
30246 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
30247 return {false /*isVolatile*/,
30248 /*isAtomic*/ false,
30249 LN->getOperand(1),
30250 0,
30251 LocationSize::precise(MFI.getObjectSize(LN->getFrameIndex())),
30252 (MachineMemOperand *)nullptr};
30253 }
30254 // Default.
30255 return {false /*isvolatile*/,
30256 /*isAtomic*/ false,
30257 SDValue(),
30258 (int64_t)0 /*offset*/,
30260 (MachineMemOperand *)nullptr};
30261 };
30262
30263 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
30264 MUC1 = getCharacteristics(Op1);
30265
30266 // If they are to the same address, then they must be aliases.
30267 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
30268 MUC0.Offset == MUC1.Offset)
30269 return true;
30270
30271 // If they are both volatile then they cannot be reordered.
30272 if (MUC0.IsVolatile && MUC1.IsVolatile)
30273 return true;
30274
30275 // Be conservative about atomics for the moment
30276 // TODO: This is way overconservative for unordered atomics (see D66309)
30277 if (MUC0.IsAtomic && MUC1.IsAtomic)
30278 return true;
30279
30280 if (MUC0.MMO && MUC1.MMO) {
30281 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
30282 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
30283 return false;
30284 }
30285
30286 // If NumBytes is scalable and offset is not 0, conservatively return may
30287 // alias
30288 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
30289 MUC0.Offset != 0) ||
30290 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
30291 MUC1.Offset != 0))
30292 return true;
30293 // Try to prove that there is aliasing, or that there is no aliasing. Either
30294 // way, we can return now. If nothing can be proved, proceed with more tests.
30295 bool IsAlias;
30296 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
30297 DAG, IsAlias))
30298 return IsAlias;
30299
30300 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
30301 // either are not known.
30302 if (!MUC0.MMO || !MUC1.MMO)
30303 return true;
30304
30305 // If one operation reads from invariant memory, and the other may store, they
30306 // cannot alias. These should really be checking the equivalent of mayWrite,
30307 // but it only matters for memory nodes other than load /store.
30308 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
30309 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
30310 return false;
30311
30312 // If we know required SrcValue1 and SrcValue2 have relatively large
30313 // alignment compared to the size and offset of the access, we may be able
30314 // to prove they do not alias. This check is conservative for now to catch
30315 // cases created by splitting vector types, it only works when the offsets are
30316 // multiples of the size of the data.
30317 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
30318 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
30319 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
30320 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
30321 LocationSize Size0 = MUC0.NumBytes;
30322 LocationSize Size1 = MUC1.NumBytes;
30323
30324 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
30325 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
30326 !Size1.isScalable() && Size0 == Size1 &&
30327 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
30328 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
30329 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
30330 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
30331 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
30332
30333 // There is no overlap between these relatively aligned accesses of
30334 // similar size. Return no alias.
30335 if ((OffAlign0 + static_cast<int64_t>(
30336 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
30337 (OffAlign1 + static_cast<int64_t>(
30338 Size1.getValue().getKnownMinValue())) <= OffAlign0)
30339 return false;
30340 }
30341
30344 : DAG.getSubtarget().useAA();
30345#ifndef NDEBUG
30346 if (CombinerAAOnlyFunc.getNumOccurrences() &&
30348 UseAA = false;
30349#endif
30350
30351 if (UseAA && BatchAA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
30352 Size0.hasValue() && Size1.hasValue() &&
30353 // Can't represent a scalable size + fixed offset in LocationSize
30354 (!Size0.isScalable() || SrcValOffset0 == 0) &&
30355 (!Size1.isScalable() || SrcValOffset1 == 0)) {
30356 // Use alias analysis information.
30357 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
30358 int64_t Overlap0 =
30359 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
30360 int64_t Overlap1 =
30361 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
30362 LocationSize Loc0 =
30363 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
30364 LocationSize Loc1 =
30365 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
30366 if (BatchAA->isNoAlias(
30367 MemoryLocation(MUC0.MMO->getValue(), Loc0,
30368 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
30369 MemoryLocation(MUC1.MMO->getValue(), Loc1,
30370 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
30371 return false;
30372 }
30373
30374 // Otherwise we have to assume they alias.
30375 return true;
30376}
30377
30378/// Walk up chain skipping non-aliasing memory nodes,
30379/// looking for aliasing nodes and adding them to the Aliases vector.
30380void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
30381 SmallVectorImpl<SDValue> &Aliases) {
30382 SmallVector<SDValue, 8> Chains; // List of chains to visit.
30383 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
30384
30385 // Get alias information for node.
30386 // TODO: relax aliasing for unordered atomics (see D66309)
30387 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
30388
30389 // Starting off.
30390 Chains.push_back(OriginalChain);
30391 unsigned Depth = 0;
30392
30393 // Attempt to improve chain by a single step
30394 auto ImproveChain = [&](SDValue &C) -> bool {
30395 switch (C.getOpcode()) {
30396 case ISD::EntryToken:
30397 // No need to mark EntryToken.
30398 C = SDValue();
30399 return true;
30400 case ISD::LOAD:
30401 case ISD::STORE: {
30402 // Get alias information for C.
30403 // TODO: Relax aliasing for unordered atomics (see D66309)
30404 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
30405 cast<LSBaseSDNode>(C.getNode())->isSimple();
30406 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
30407 // Look further up the chain.
30408 C = C.getOperand(0);
30409 return true;
30410 }
30411 // Alias, so stop here.
30412 return false;
30413 }
30414
30415 case ISD::CopyFromReg:
30416 // Always forward past CopyFromReg.
30417 C = C.getOperand(0);
30418 return true;
30419
30421 case ISD::LIFETIME_END: {
30422 // We can forward past any lifetime start/end that can be proven not to
30423 // alias the memory access.
30424 if (!mayAlias(N, C.getNode())) {
30425 // Look further up the chain.
30426 C = C.getOperand(0);
30427 return true;
30428 }
30429 return false;
30430 }
30431 default:
30432 return false;
30433 }
30434 };
30435
30436 // Look at each chain and determine if it is an alias. If so, add it to the
30437 // aliases list. If not, then continue up the chain looking for the next
30438 // candidate.
30439 while (!Chains.empty()) {
30440 SDValue Chain = Chains.pop_back_val();
30441
30442 // Don't bother if we've seen Chain before.
30443 if (!Visited.insert(Chain.getNode()).second)
30444 continue;
30445
30446 // For TokenFactor nodes, look at each operand and only continue up the
30447 // chain until we reach the depth limit.
30448 //
30449 // FIXME: The depth check could be made to return the last non-aliasing
30450 // chain we found before we hit a tokenfactor rather than the original
30451 // chain.
30452 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
30453 Aliases.clear();
30454 Aliases.push_back(OriginalChain);
30455 return;
30456 }
30457
30458 if (Chain.getOpcode() == ISD::TokenFactor) {
30459 // We have to check each of the operands of the token factor for "small"
30460 // token factors, so we queue them up. Adding the operands to the queue
30461 // (stack) in reverse order maintains the original order and increases the
30462 // likelihood that getNode will find a matching token factor (CSE.)
30463 if (Chain.getNumOperands() > 16) {
30464 Aliases.push_back(Chain);
30465 continue;
30466 }
30467 for (unsigned n = Chain.getNumOperands(); n;)
30468 Chains.push_back(Chain.getOperand(--n));
30469 ++Depth;
30470 continue;
30471 }
30472 // Everything else
30473 if (ImproveChain(Chain)) {
30474 // Updated Chain Found, Consider new chain if one exists.
30475 if (Chain.getNode())
30476 Chains.push_back(Chain);
30477 ++Depth;
30478 continue;
30479 }
30480 // No Improved Chain Possible, treat as Alias.
30481 Aliases.push_back(Chain);
30482 }
30483}
30484
30485/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
30486/// (aliasing node.)
30487SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
30488 if (OptLevel == CodeGenOptLevel::None)
30489 return OldChain;
30490
30491 // Ops for replacing token factor.
30493
30494 // Accumulate all the aliases to this node.
30495 GatherAllAliases(N, OldChain, Aliases);
30496
30497 // If no operands then chain to entry token.
30498 if (Aliases.empty())
30499 return DAG.getEntryNode();
30500
30501 // If a single operand then chain to it. We don't need to revisit it.
30502 if (Aliases.size() == 1)
30503 return Aliases[0];
30504
30505 // Construct a custom tailored token factor.
30506 return DAG.getTokenFactor(SDLoc(N), Aliases);
30507}
30508
30509// This function tries to collect a bunch of potentially interesting
30510// nodes to improve the chains of, all at once. This might seem
30511// redundant, as this function gets called when visiting every store
30512// node, so why not let the work be done on each store as it's visited?
30513//
30514// I believe this is mainly important because mergeConsecutiveStores
30515// is unable to deal with merging stores of different sizes, so unless
30516// we improve the chains of all the potential candidates up-front
30517// before running mergeConsecutiveStores, it might only see some of
30518// the nodes that will eventually be candidates, and then not be able
30519// to go from a partially-merged state to the desired final
30520// fully-merged state.
30521
30522bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
30523 SmallVector<StoreSDNode *, 8> ChainedStores;
30524 StoreSDNode *STChain = St;
30525 // Intervals records which offsets from BaseIndex have been covered. In
30526 // the common case, every store writes to the immediately previous address
30527 // space and thus merged with the previous interval at insertion time.
30528
30529 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
30530 IntervalMapHalfOpenInfo<int64_t>>;
30531 IMap::Allocator A;
30532 IMap Intervals(A);
30533
30534 // This holds the base pointer, index, and the offset in bytes from the base
30535 // pointer.
30536 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30537
30538 // We must have a base and an offset.
30539 if (!BasePtr.getBase().getNode())
30540 return false;
30541
30542 // Do not handle stores to undef base pointers.
30543 if (BasePtr.getBase().isUndef())
30544 return false;
30545
30546 // Do not handle stores to opaque types
30547 if (St->getMemoryVT().isZeroSized())
30548 return false;
30549
30550 // BaseIndexOffset assumes that offsets are fixed-size, which
30551 // is not valid for scalable vectors where the offsets are
30552 // scaled by `vscale`, so bail out early.
30553 if (St->getMemoryVT().isScalableVT())
30554 return false;
30555
30556 // Add ST's interval.
30557 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
30558 std::monostate{});
30559
30560 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
30561 if (Chain->getMemoryVT().isScalableVector())
30562 return false;
30563
30564 // If the chain has more than one use, then we can't reorder the mem ops.
30565 if (!SDValue(Chain, 0)->hasOneUse())
30566 break;
30567 // TODO: Relax for unordered atomics (see D66309)
30568 if (!Chain->isSimple() || Chain->isIndexed())
30569 break;
30570
30571 // Find the base pointer and offset for this memory node.
30572 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
30573 // Check that the base pointer is the same as the original one.
30574 int64_t Offset;
30575 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
30576 break;
30577 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
30578 // Make sure we don't overlap with other intervals by checking the ones to
30579 // the left or right before inserting.
30580 auto I = Intervals.find(Offset);
30581 // If there's a next interval, we should end before it.
30582 if (I != Intervals.end() && I.start() < (Offset + Length))
30583 break;
30584 // If there's a previous interval, we should start after it.
30585 if (I != Intervals.begin() && (--I).stop() <= Offset)
30586 break;
30587 Intervals.insert(Offset, Offset + Length, std::monostate{});
30588
30589 ChainedStores.push_back(Chain);
30590 STChain = Chain;
30591 }
30592
30593 // If we didn't find a chained store, exit.
30594 if (ChainedStores.empty())
30595 return false;
30596
30597 // Improve all chained stores (St and ChainedStores members) starting from
30598 // where the store chain ended and return single TokenFactor.
30599 SDValue NewChain = STChain->getChain();
30601 for (unsigned I = ChainedStores.size(); I;) {
30602 StoreSDNode *S = ChainedStores[--I];
30603 SDValue BetterChain = FindBetterChain(S, NewChain);
30605 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
30606 TFOps.push_back(SDValue(S, 0));
30607 ChainedStores[I] = S;
30608 }
30609
30610 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
30611 SDValue BetterChain = FindBetterChain(St, NewChain);
30612 SDValue NewST;
30613 if (St->isTruncatingStore())
30614 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
30615 St->getBasePtr(), St->getMemoryVT(),
30616 St->getMemOperand());
30617 else
30618 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
30619 St->getBasePtr(), St->getMemOperand());
30620
30621 TFOps.push_back(NewST);
30622
30623 // If we improved every element of TFOps, then we've lost the dependence on
30624 // NewChain to successors of St and we need to add it back to TFOps. Do so at
30625 // the beginning to keep relative order consistent with FindBetterChains.
30626 auto hasImprovedChain = [&](SDValue ST) -> bool {
30627 return ST->getOperand(0) != NewChain;
30628 };
30629 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
30630 if (AddNewChain)
30631 TFOps.insert(TFOps.begin(), NewChain);
30632
30633 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
30634 CombineTo(St, TF);
30635
30636 // Add TF and its operands to the worklist.
30637 AddToWorklist(TF.getNode());
30638 for (const SDValue &Op : TF->ops())
30639 AddToWorklist(Op.getNode());
30640 AddToWorklist(STChain);
30641 return true;
30642}
30643
30644bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
30645 if (OptLevel == CodeGenOptLevel::None)
30646 return false;
30647
30648 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
30649
30650 // We must have a base and an offset.
30651 if (!BasePtr.getBase().getNode())
30652 return false;
30653
30654 // Do not handle stores to undef base pointers.
30655 if (BasePtr.getBase().isUndef())
30656 return false;
30657
30658 // Directly improve a chain of disjoint stores starting at St.
30659 if (parallelizeChainedStores(St))
30660 return true;
30661
30662 // Improve St's Chain..
30663 SDValue BetterChain = FindBetterChain(St, St->getChain());
30664 if (St->getChain() != BetterChain) {
30665 replaceStoreChain(St, BetterChain);
30666 return true;
30667 }
30668 return false;
30669}
30670
30671/// This is the entry point for the file.
30673 CodeGenOptLevel OptLevel) {
30674 /// This is the main entry point to this class.
30675 DAGCombiner(*this, BatchAA, OptLevel).Run(Level);
30676}
return SDValue()
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
constexpr LLT S1
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldExtractSubvectorFromShuffleVector(EVT NarrowVT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG, const SDLoc &Dl)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static unsigned getMinMaxOpcodeForClamp(bool IsMin, SDValue Operand1, SDValue Operand2, SelectionDAG &DAG, const TargetLowering &TLI)
Returns an appropriate FP min/max opcode for clamping operations.
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static SDValue narrowExtractedVectorLoad(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static cl::opt< bool > DisableCombines("combiner-disabled", cl::Hidden, cl::init(false), cl::desc("Disable the DAG combiner"))
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL)
Fold "masked merge" expressions like (m & x) | (~m & y) and its DeMorgan variant (~m | x) & (m | y) i...
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static unsigned getMinMaxOpcodeForCompareFold(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static cl::opt< bool > ReduceLoadOpStoreWidthForceNarrowingProfitable("combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences"))
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT, SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue FoldIntToFPToInt(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue detectUSatUPattern(SDValue In, EVT VT)
Detect patterns of truncation with unsigned saturation:
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG, const SDLoc &DL)
Detect patterns of truncation with unsigned saturation:
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, SDValue FVal, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate, bool FromAdd)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, bool LegalTypes)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue matchMergedBFX(SDValue Root, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue detectSSatSPattern(SDValue In, EVT VT)
Detect patterns of truncation with signed saturation: (truncate (smin (smax (x, signed_min_of_dest_ty...
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static SDValue eliminateFPCastPair(SDNode *N)
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
ByteProvider< SDNode * > SDByteProvider
Recursively traverses the expression calculating the origin of the requested byte of the given value.
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static SDValue narrowInsertExtractVectorBinOp(EVT SubVT, SDValue BinOp, unsigned Index, const SDLoc &DL, SelectionDAG &DAG, bool LegalOperations)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
dxil translate DXIL Translate Metadata
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
static MaybeAlign getAlign(Value *Ptr)
iv Induction Variable Users
Definition IVUsers.cpp:48
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T
#define T1
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static bool isSimple(Instruction *I)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr int Concat[]
Value * RHS
Value * LHS
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:298
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:294
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:290
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:331
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:304
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1102
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1190
bool isNegative() const
Definition APFloat.h:1431
bool isNormal() const
Definition APFloat.h:1435
bool isDenormal() const
Definition APFloat.h:1432
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1414
const fltSemantics & getSemantics() const
Definition APFloat.h:1439
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
Definition APFloat.h:1298
bool isNaN() const
Definition APFloat.h:1429
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1329
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1070
bool isSignaling() const
Definition APFloat.h:1433
APInt bitcastToAPInt() const
Definition APFloat.h:1335
bool isLargest() const
Definition APFloat.h:1447
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
bool isInfinity() const
Definition APFloat.h:1428
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1971
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition APInt.cpp:644
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
APInt abs() const
Get the absolute value.
Definition APInt.h:1796
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition APInt.h:467
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1250
int32_t exactLogBase2() const
Definition APInt.h:1784
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1935
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1532
unsigned countLeadingZeros() const
Definition APInt.h:1607
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1453
unsigned logBase2() const
Definition APInt.h:1762
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1960
bool isMask(unsigned numBits) const
Definition APInt.h:489
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1151
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1368
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1657
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static LLVM_ABI bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
LLVM_ABI bool isConstant() const
Represents known origin of an individual byte in combine pattern.
static ByteProvider getConstantZero()
static ByteProvider getSrc(std::optional< SDNode * > Val, int64_t ByteOffset, int64_t VectorOffset)
Combiner implementation.
Definition Combiner.h:34
ISD::CondCode get() const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange truncate(uint32_t BitWidth, unsigned NoWrapKind=0) const
Return a new range in the specified integer type, which must be strictly smaller than the current typ...
const APInt & getUpper() const
Return the upper value for this range.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:214
bool isBigEndian() const
Definition DataLayout.h:215
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static bool shouldExecute(CounterInfo &Counter)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
const_iterator find(KeyT x) const
find - Return an iterator pointing to the first interval ending at or after x, or end().
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
void clearRanges()
Unset the tracked range metadata.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition ArrayRef.h:419
iterator end() const
Definition ArrayRef.h:343
iterator begin() const
Definition ArrayRef.h:342
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:412
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
LLVM_ABI bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
std::optional< APInt > bitcastToAPInt() const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
LLVM_ABI bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isAnyAdd() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI std::optional< bool > isBoolConstant(SDValue N) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags, bool AllowCommute=false)
Get the specified node if it's already available, or else return NULL.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI void Combine(CombineLevel Level, BatchAAResults *BatchAA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
LLVM_ABI SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI bool canIgnoreSignBitOfZero(const SDUse &Use) const
Check if a use of a float value is insensitive to signed zeros.
LLVM_ABI bool SignBitIsZeroFP(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero, for a floating-point value.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
bool isGuaranteedNotToBePoison(SDValue Op, unsigned Depth=0) const
Return true if this function can prove that Op is never poison.
LLVM_ABI SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI void DeleteNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
LLVM_ABI bool isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
LLVM_ABI SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
LLVM_ABI SDValue FoldConstantBuildVector(BuildVectorSDNode *BV, const SDLoc &DL, EVT DstEltVT)
Fold BUILD_VECTOR of constants/undefs to the destination type BUILD_VECTOR of constants/undefs elemen...
LLVM_ABI SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
LLVM_ABI OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
LLVM_ABI SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
LLVM_ABI bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
LLVM_ABI SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
LLVM_ABI bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
LLVM_ABI void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void resize(size_type N)
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
EVT getLegalTypeToTransformTo(LLVMContext &Context, EVT VT) const
Perform getTypeToTransformTo repeatedly until a legal type is obtained.
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isPartialReduceMLALegalOrCustom(unsigned Opc, EVT AccVT, EVT InputVT) const
Return true if a PARTIAL_REDUCE_U/SMLA node with the specified types is legal or custom for this targ...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N) const
Return true if it is profitable to fold a pair of shifts into a mask.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool canTransformPtrArithOutOfBounds(const Function &F, EVT PtrVT) const
True if the target allows transformations of in-bounds pointer arithmetic that cause out-of-bounds in...
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool shouldMergeStoreOfLoadsOverCall(EVT, EVT) const
Returns true if it's profitable to allow merging store of loads when there are functions calls betwee...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
virtual bool isTargetCanonicalSelect(SDNode *N) const
Return true if the given select/vselect should be considered canonical and not be transformed.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:233
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
int getNumOccurrences() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition APInt.h:2249
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition APInt.h:2254
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition APInt.h:2259
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition APInt.h:2264
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:809
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:782
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:506
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ POISON
POISON - A poison node.
Definition ISDOpcodes.h:231
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:595
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:773
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:389
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:517
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:395
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:843
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:513
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:870
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:579
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:412
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:746
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:900
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition ISDOpcodes.h:993
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:523
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:983
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:834
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:714
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:664
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:781
@ PARTIAL_REDUCE_FMLA
@ TRUNCATE_SSAT_U
Definition ISDOpcodes.h:863
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition ISDOpcodes.h:817
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:630
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:690
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:536
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:786
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:671
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:703
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:764
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:644
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:609
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition ISDOpcodes.h:48
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:571
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:840
@ TargetConstantFP
Definition ISDOpcodes.h:175
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:801
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:381
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ TargetFrameIndex
Definition ISDOpcodes.h:182
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:889
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:878
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:726
@ LIFETIME_START
This corresponds to the llvm.lifetime.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:968
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:795
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:916
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ GET_FPENV_MEM
Gets the current floating-point environment.
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition ISDOpcodes.h:280
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:709
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:422
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:560
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:949
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:698
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:911
@ EXPERIMENTAL_VECTOR_HISTOGRAM
Experimental vector histogram intrinsic Operands: Input Chain, Inc, Mask, Base, Index,...
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:935
@ VECREDUCE_FMINIMUM
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:846
@ BRCOND
BRCOND - Conditional branch.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:529
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ SET_FPENV_MEM
Sets the current floating point environment.
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:861
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:721
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:865
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:333
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:551
bool isIndexTypeSigned(MemIndexType IndexType)
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns ISD::(U|S)MAX and ISD::(U|S)MIN,...
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
LLVM_ABI bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T, const FTy &F, const CCTy &CC)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
auto m_UMinLike(const LHS &L, const RHS &R)
auto m_SelectLike(const T0_P &Cond, const T1_P &T, const T2_P &F)
auto m_UMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Abs(const Opnd &Op)
Or< Preds... > m_AnyOf(const Preds &...preds)
And< Preds... > m_AllOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
ReassociatableOpc_match< PatternTs... > m_ReassociatableAdd(const PatternTs &...Patterns)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
auto m_SMaxLike(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_Ctlz(const Opnd &Op)
TernaryOpc_match< T0_P, T1_P, T2_P > m_VSelect(const T0_P &Cond, const T1_P &T, const T2_P &F)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
UnaryOpc_match< Opnd > m_UnaryOp(unsigned Opc, const Opnd &Op)
auto m_SMinLike(const LHS &L, const RHS &R)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
ReassociatableOpc_match< PatternTs... > m_ReassociatableAnd(const PatternTs &...Patterns)
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
Not(const Pred &P) -> Not< Pred >
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
constexpr double e
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2106
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1613
LLVM_ABI SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2530
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2114
bool operator>=(int64_t V1, const APSInt &V2)
Definition APSInt.h:361
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2184
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1516
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1595
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
LLVM_ABI bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1551
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
bool operator>(int64_t V1, const APSInt &V2)
Definition APSInt.h:363
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition Error.h:221
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
CombineLevel
Definition DAGCombine.h:15
@ AfterLegalizeDAG
Definition DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
@ AfterLegalizeTypes
Definition DAGCombine.h:17
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2156
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool isZeroOrZeroSplatFP(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant (+/-)0.0 floating-point value or a splatted vector thereof (wi...
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition ValueTypes.h:279
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
EVT changeElementType(EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:113
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:187
bool isFixedLengthVector() const
Definition ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:419
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition ValueTypes.h:256
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition ValueTypes.h:268
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition ValueTypes.h:132
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
void setAllowContract(bool b)
bool hasNoUnsignedWrap() const
void setAllowReassociation(bool b)
void setAllowReciprocal(bool b)
bool hasAllowContract() const
bool hasApproximateFuncs() const
void setApproximateFuncs(bool b)
bool hasNoSignedWrap() const
bool hasAllowReciprocal() const
bool hasAllowReassociation() const
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...