LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
51#include "llvm/Support/Regex.h"
54#include <cstdint>
55#include <cstring>
56#include <numeric>
57
58using namespace llvm;
59
60static cl::opt<bool>
61 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
62 cl::desc("Disable autoupgrade of debug info"));
63
64static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
65
66// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
67// changed their type from v4f32 to v2i64.
69 Function *&NewFn) {
70 // Check whether this is an old version of the function, which received
71 // v4f32 arguments.
72 Type *Arg0Type = F->getFunctionType()->getParamType(0);
73 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
74 return false;
75
76 // Yes, it's old, replace it with new version.
77 rename(F);
78 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
79 return true;
80}
81
82// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
83// arguments have changed their type from i32 to i8.
85 Function *&NewFn) {
86 // Check that the last argument is an i32.
87 Type *LastArgType = F->getFunctionType()->getParamType(
88 F->getFunctionType()->getNumParams() - 1);
89 if (!LastArgType->isIntegerTy(32))
90 return false;
91
92 // Move this function aside and map down.
93 rename(F);
94 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
95 return true;
96}
97
98// Upgrade the declaration of fp compare intrinsics that change return type
99// from scalar to vXi1 mask.
101 Function *&NewFn) {
102 // Check if the return type is a vector.
103 if (F->getReturnType()->isVectorTy())
104 return false;
105
106 rename(F);
107 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
108 return true;
109}
110
111// Upgrade the declaration of multiply and add bytes intrinsics whose input
112// arguments' types have changed from vectors of i32 to vectors of i8
114 Function *&NewFn) {
115 // check if input argument type is a vector of i8
116 Type *Arg1Type = F->getFunctionType()->getParamType(1);
117 Type *Arg2Type = F->getFunctionType()->getParamType(2);
118 if (Arg1Type->isVectorTy() &&
119 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
120 Arg2Type->isVectorTy() &&
121 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
122 return false;
123
124 rename(F);
125 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
126 return true;
127}
128
129// Upgrade the declaration of multipy and add words intrinsics whose input
130// arguments' types have changed to vectors of i32 to vectors of i16
132 Function *&NewFn) {
133 // check if input argument type is a vector of i16
134 Type *Arg1Type = F->getFunctionType()->getParamType(1);
135 Type *Arg2Type = F->getFunctionType()->getParamType(2);
136 if (Arg1Type->isVectorTy() &&
137 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
138 Arg2Type->isVectorTy() &&
139 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
140 return false;
141
142 rename(F);
143 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
144 return true;
145}
146
148 Function *&NewFn) {
149 if (F->getReturnType()->getScalarType()->isBFloatTy())
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 // All of the intrinsics matches below should be marked with which llvm
169 // version started autoupgrading them. At some point in the future we would
170 // like to use this information to remove upgrade code for some older
171 // intrinsics. It is currently undecided how we will determine that future
172 // point.
173 if (Name.consume_front("avx."))
174 return (Name.starts_with("blend.p") || // Added in 3.7
175 Name == "cvt.ps2.pd.256" || // Added in 3.9
176 Name == "cvtdq2.pd.256" || // Added in 3.9
177 Name == "cvtdq2.ps.256" || // Added in 7.0
178 Name.starts_with("movnt.") || // Added in 3.2
179 Name.starts_with("sqrt.p") || // Added in 7.0
180 Name.starts_with("storeu.") || // Added in 3.9
181 Name.starts_with("vbroadcast.s") || // Added in 3.5
182 Name.starts_with("vbroadcastf128") || // Added in 4.0
183 Name.starts_with("vextractf128.") || // Added in 3.7
184 Name.starts_with("vinsertf128.") || // Added in 3.7
185 Name.starts_with("vperm2f128.") || // Added in 6.0
186 Name.starts_with("vpermil.")); // Added in 3.1
187
188 if (Name.consume_front("avx2."))
189 return (Name == "movntdqa" || // Added in 5.0
190 Name.starts_with("pabs.") || // Added in 6.0
191 Name.starts_with("padds.") || // Added in 8.0
192 Name.starts_with("paddus.") || // Added in 8.0
193 Name.starts_with("pblendd.") || // Added in 3.7
194 Name == "pblendw" || // Added in 3.7
195 Name.starts_with("pbroadcast") || // Added in 3.8
196 Name.starts_with("pcmpeq.") || // Added in 3.1
197 Name.starts_with("pcmpgt.") || // Added in 3.1
198 Name.starts_with("pmax") || // Added in 3.9
199 Name.starts_with("pmin") || // Added in 3.9
200 Name.starts_with("pmovsx") || // Added in 3.9
201 Name.starts_with("pmovzx") || // Added in 3.9
202 Name == "pmul.dq" || // Added in 7.0
203 Name == "pmulu.dq" || // Added in 7.0
204 Name.starts_with("psll.dq") || // Added in 3.7
205 Name.starts_with("psrl.dq") || // Added in 3.7
206 Name.starts_with("psubs.") || // Added in 8.0
207 Name.starts_with("psubus.") || // Added in 8.0
208 Name.starts_with("vbroadcast") || // Added in 3.8
209 Name == "vbroadcasti128" || // Added in 3.7
210 Name == "vextracti128" || // Added in 3.7
211 Name == "vinserti128" || // Added in 3.7
212 Name == "vperm2i128"); // Added in 6.0
213
214 if (Name.consume_front("avx512.")) {
215 if (Name.consume_front("mask."))
216 // 'avx512.mask.*'
217 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
218 Name.starts_with("and.") || // Added in 3.9
219 Name.starts_with("andn.") || // Added in 3.9
220 Name.starts_with("broadcast.s") || // Added in 3.9
221 Name.starts_with("broadcastf32x4.") || // Added in 6.0
222 Name.starts_with("broadcastf32x8.") || // Added in 6.0
223 Name.starts_with("broadcastf64x2.") || // Added in 6.0
224 Name.starts_with("broadcastf64x4.") || // Added in 6.0
225 Name.starts_with("broadcasti32x4.") || // Added in 6.0
226 Name.starts_with("broadcasti32x8.") || // Added in 6.0
227 Name.starts_with("broadcasti64x2.") || // Added in 6.0
228 Name.starts_with("broadcasti64x4.") || // Added in 6.0
229 Name.starts_with("cmp.b") || // Added in 5.0
230 Name.starts_with("cmp.d") || // Added in 5.0
231 Name.starts_with("cmp.q") || // Added in 5.0
232 Name.starts_with("cmp.w") || // Added in 5.0
233 Name.starts_with("compress.b") || // Added in 9.0
234 Name.starts_with("compress.d") || // Added in 9.0
235 Name.starts_with("compress.p") || // Added in 9.0
236 Name.starts_with("compress.q") || // Added in 9.0
237 Name.starts_with("compress.store.") || // Added in 7.0
238 Name.starts_with("compress.w") || // Added in 9.0
239 Name.starts_with("conflict.") || // Added in 9.0
240 Name.starts_with("cvtdq2pd.") || // Added in 4.0
241 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
242 Name == "cvtpd2dq.256" || // Added in 7.0
243 Name == "cvtpd2ps.256" || // Added in 7.0
244 Name == "cvtps2pd.128" || // Added in 7.0
245 Name == "cvtps2pd.256" || // Added in 7.0
246 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
247 Name == "cvtqq2ps.256" || // Added in 9.0
248 Name == "cvtqq2ps.512" || // Added in 9.0
249 Name == "cvttpd2dq.256" || // Added in 7.0
250 Name == "cvttps2dq.128" || // Added in 7.0
251 Name == "cvttps2dq.256" || // Added in 7.0
252 Name.starts_with("cvtudq2pd.") || // Added in 4.0
253 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
254 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
255 Name == "cvtuqq2ps.256" || // Added in 9.0
256 Name == "cvtuqq2ps.512" || // Added in 9.0
257 Name.starts_with("dbpsadbw.") || // Added in 7.0
258 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
259 Name.starts_with("expand.b") || // Added in 9.0
260 Name.starts_with("expand.d") || // Added in 9.0
261 Name.starts_with("expand.load.") || // Added in 7.0
262 Name.starts_with("expand.p") || // Added in 9.0
263 Name.starts_with("expand.q") || // Added in 9.0
264 Name.starts_with("expand.w") || // Added in 9.0
265 Name.starts_with("fpclass.p") || // Added in 7.0
266 Name.starts_with("insert") || // Added in 4.0
267 Name.starts_with("load.") || // Added in 3.9
268 Name.starts_with("loadu.") || // Added in 3.9
269 Name.starts_with("lzcnt.") || // Added in 5.0
270 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
271 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
272 Name.starts_with("movddup") || // Added in 3.9
273 Name.starts_with("move.s") || // Added in 4.0
274 Name.starts_with("movshdup") || // Added in 3.9
275 Name.starts_with("movsldup") || // Added in 3.9
276 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
277 Name.starts_with("or.") || // Added in 3.9
278 Name.starts_with("pabs.") || // Added in 6.0
279 Name.starts_with("packssdw.") || // Added in 5.0
280 Name.starts_with("packsswb.") || // Added in 5.0
281 Name.starts_with("packusdw.") || // Added in 5.0
282 Name.starts_with("packuswb.") || // Added in 5.0
283 Name.starts_with("padd.") || // Added in 4.0
284 Name.starts_with("padds.") || // Added in 8.0
285 Name.starts_with("paddus.") || // Added in 8.0
286 Name.starts_with("palignr.") || // Added in 3.9
287 Name.starts_with("pand.") || // Added in 3.9
288 Name.starts_with("pandn.") || // Added in 3.9
289 Name.starts_with("pavg") || // Added in 6.0
290 Name.starts_with("pbroadcast") || // Added in 6.0
291 Name.starts_with("pcmpeq.") || // Added in 3.9
292 Name.starts_with("pcmpgt.") || // Added in 3.9
293 Name.starts_with("perm.df.") || // Added in 3.9
294 Name.starts_with("perm.di.") || // Added in 3.9
295 Name.starts_with("permvar.") || // Added in 7.0
296 Name.starts_with("pmaddubs.w.") || // Added in 7.0
297 Name.starts_with("pmaddw.d.") || // Added in 7.0
298 Name.starts_with("pmax") || // Added in 4.0
299 Name.starts_with("pmin") || // Added in 4.0
300 Name == "pmov.qd.256" || // Added in 9.0
301 Name == "pmov.qd.512" || // Added in 9.0
302 Name == "pmov.wb.256" || // Added in 9.0
303 Name == "pmov.wb.512" || // Added in 9.0
304 Name.starts_with("pmovsx") || // Added in 4.0
305 Name.starts_with("pmovzx") || // Added in 4.0
306 Name.starts_with("pmul.dq.") || // Added in 4.0
307 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
308 Name.starts_with("pmulh.w.") || // Added in 7.0
309 Name.starts_with("pmulhu.w.") || // Added in 7.0
310 Name.starts_with("pmull.") || // Added in 4.0
311 Name.starts_with("pmultishift.qb.") || // Added in 8.0
312 Name.starts_with("pmulu.dq.") || // Added in 4.0
313 Name.starts_with("por.") || // Added in 3.9
314 Name.starts_with("prol.") || // Added in 8.0
315 Name.starts_with("prolv.") || // Added in 8.0
316 Name.starts_with("pror.") || // Added in 8.0
317 Name.starts_with("prorv.") || // Added in 8.0
318 Name.starts_with("pshuf.b.") || // Added in 4.0
319 Name.starts_with("pshuf.d.") || // Added in 3.9
320 Name.starts_with("pshufh.w.") || // Added in 3.9
321 Name.starts_with("pshufl.w.") || // Added in 3.9
322 Name.starts_with("psll.d") || // Added in 4.0
323 Name.starts_with("psll.q") || // Added in 4.0
324 Name.starts_with("psll.w") || // Added in 4.0
325 Name.starts_with("pslli") || // Added in 4.0
326 Name.starts_with("psllv") || // Added in 4.0
327 Name.starts_with("psra.d") || // Added in 4.0
328 Name.starts_with("psra.q") || // Added in 4.0
329 Name.starts_with("psra.w") || // Added in 4.0
330 Name.starts_with("psrai") || // Added in 4.0
331 Name.starts_with("psrav") || // Added in 4.0
332 Name.starts_with("psrl.d") || // Added in 4.0
333 Name.starts_with("psrl.q") || // Added in 4.0
334 Name.starts_with("psrl.w") || // Added in 4.0
335 Name.starts_with("psrli") || // Added in 4.0
336 Name.starts_with("psrlv") || // Added in 4.0
337 Name.starts_with("psub.") || // Added in 4.0
338 Name.starts_with("psubs.") || // Added in 8.0
339 Name.starts_with("psubus.") || // Added in 8.0
340 Name.starts_with("pternlog.") || // Added in 7.0
341 Name.starts_with("punpckh") || // Added in 3.9
342 Name.starts_with("punpckl") || // Added in 3.9
343 Name.starts_with("pxor.") || // Added in 3.9
344 Name.starts_with("shuf.f") || // Added in 6.0
345 Name.starts_with("shuf.i") || // Added in 6.0
346 Name.starts_with("shuf.p") || // Added in 4.0
347 Name.starts_with("sqrt.p") || // Added in 7.0
348 Name.starts_with("store.b.") || // Added in 3.9
349 Name.starts_with("store.d.") || // Added in 3.9
350 Name.starts_with("store.p") || // Added in 3.9
351 Name.starts_with("store.q.") || // Added in 3.9
352 Name.starts_with("store.w.") || // Added in 3.9
353 Name == "store.ss" || // Added in 7.0
354 Name.starts_with("storeu.") || // Added in 3.9
355 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
356 Name.starts_with("ucmp.") || // Added in 5.0
357 Name.starts_with("unpckh.") || // Added in 3.9
358 Name.starts_with("unpckl.") || // Added in 3.9
359 Name.starts_with("valign.") || // Added in 4.0
360 Name == "vcvtph2ps.128" || // Added in 11.0
361 Name == "vcvtph2ps.256" || // Added in 11.0
362 Name.starts_with("vextract") || // Added in 4.0
363 Name.starts_with("vfmadd.") || // Added in 7.0
364 Name.starts_with("vfmaddsub.") || // Added in 7.0
365 Name.starts_with("vfnmadd.") || // Added in 7.0
366 Name.starts_with("vfnmsub.") || // Added in 7.0
367 Name.starts_with("vpdpbusd.") || // Added in 7.0
368 Name.starts_with("vpdpbusds.") || // Added in 7.0
369 Name.starts_with("vpdpwssd.") || // Added in 7.0
370 Name.starts_with("vpdpwssds.") || // Added in 7.0
371 Name.starts_with("vpermi2var.") || // Added in 7.0
372 Name.starts_with("vpermil.p") || // Added in 3.9
373 Name.starts_with("vpermilvar.") || // Added in 4.0
374 Name.starts_with("vpermt2var.") || // Added in 7.0
375 Name.starts_with("vpmadd52") || // Added in 7.0
376 Name.starts_with("vpshld.") || // Added in 7.0
377 Name.starts_with("vpshldv.") || // Added in 8.0
378 Name.starts_with("vpshrd.") || // Added in 7.0
379 Name.starts_with("vpshrdv.") || // Added in 8.0
380 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
381 Name.starts_with("xor.")); // Added in 3.9
382
383 if (Name.consume_front("mask3."))
384 // 'avx512.mask3.*'
385 return (Name.starts_with("vfmadd.") || // Added in 7.0
386 Name.starts_with("vfmaddsub.") || // Added in 7.0
387 Name.starts_with("vfmsub.") || // Added in 7.0
388 Name.starts_with("vfmsubadd.") || // Added in 7.0
389 Name.starts_with("vfnmsub.")); // Added in 7.0
390
391 if (Name.consume_front("maskz."))
392 // 'avx512.maskz.*'
393 return (Name.starts_with("pternlog.") || // Added in 7.0
394 Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmaddsub.") || // Added in 7.0
396 Name.starts_with("vpdpbusd.") || // Added in 7.0
397 Name.starts_with("vpdpbusds.") || // Added in 7.0
398 Name.starts_with("vpdpwssd.") || // Added in 7.0
399 Name.starts_with("vpdpwssds.") || // Added in 7.0
400 Name.starts_with("vpermt2var.") || // Added in 7.0
401 Name.starts_with("vpmadd52") || // Added in 7.0
402 Name.starts_with("vpshldv.") || // Added in 8.0
403 Name.starts_with("vpshrdv.")); // Added in 8.0
404
405 // 'avx512.*'
406 return (Name == "movntdqa" || // Added in 5.0
407 Name == "pmul.dq.512" || // Added in 7.0
408 Name == "pmulu.dq.512" || // Added in 7.0
409 Name.starts_with("broadcastm") || // Added in 6.0
410 Name.starts_with("cmp.p") || // Added in 12.0
411 Name.starts_with("cvtb2mask.") || // Added in 7.0
412 Name.starts_with("cvtd2mask.") || // Added in 7.0
413 Name.starts_with("cvtmask2") || // Added in 5.0
414 Name.starts_with("cvtq2mask.") || // Added in 7.0
415 Name == "cvtusi2sd" || // Added in 7.0
416 Name.starts_with("cvtw2mask.") || // Added in 7.0
417 Name == "kand.w" || // Added in 7.0
418 Name == "kandn.w" || // Added in 7.0
419 Name == "knot.w" || // Added in 7.0
420 Name == "kor.w" || // Added in 7.0
421 Name == "kortestc.w" || // Added in 7.0
422 Name == "kortestz.w" || // Added in 7.0
423 Name.starts_with("kunpck") || // added in 6.0
424 Name == "kxnor.w" || // Added in 7.0
425 Name == "kxor.w" || // Added in 7.0
426 Name.starts_with("padds.") || // Added in 8.0
427 Name.starts_with("pbroadcast") || // Added in 3.9
428 Name.starts_with("prol") || // Added in 8.0
429 Name.starts_with("pror") || // Added in 8.0
430 Name.starts_with("psll.dq") || // Added in 3.9
431 Name.starts_with("psrl.dq") || // Added in 3.9
432 Name.starts_with("psubs.") || // Added in 8.0
433 Name.starts_with("ptestm") || // Added in 6.0
434 Name.starts_with("ptestnm") || // Added in 6.0
435 Name.starts_with("storent.") || // Added in 3.9
436 Name.starts_with("vbroadcast.s") || // Added in 7.0
437 Name.starts_with("vpshld.") || // Added in 8.0
438 Name.starts_with("vpshrd.")); // Added in 8.0
439 }
440
441 if (Name.consume_front("fma."))
442 return (Name.starts_with("vfmadd.") || // Added in 7.0
443 Name.starts_with("vfmsub.") || // Added in 7.0
444 Name.starts_with("vfmsubadd.") || // Added in 7.0
445 Name.starts_with("vfnmadd.") || // Added in 7.0
446 Name.starts_with("vfnmsub.")); // Added in 7.0
447
448 if (Name.consume_front("fma4."))
449 return Name.starts_with("vfmadd.s"); // Added in 7.0
450
451 if (Name.consume_front("sse."))
452 return (Name == "add.ss" || // Added in 4.0
453 Name == "cvtsi2ss" || // Added in 7.0
454 Name == "cvtsi642ss" || // Added in 7.0
455 Name == "div.ss" || // Added in 4.0
456 Name == "mul.ss" || // Added in 4.0
457 Name.starts_with("sqrt.p") || // Added in 7.0
458 Name == "sqrt.ss" || // Added in 7.0
459 Name.starts_with("storeu.") || // Added in 3.9
460 Name == "sub.ss"); // Added in 4.0
461
462 if (Name.consume_front("sse2."))
463 return (Name == "add.sd" || // Added in 4.0
464 Name == "cvtdq2pd" || // Added in 3.9
465 Name == "cvtdq2ps" || // Added in 7.0
466 Name == "cvtps2pd" || // Added in 3.9
467 Name == "cvtsi2sd" || // Added in 7.0
468 Name == "cvtsi642sd" || // Added in 7.0
469 Name == "cvtss2sd" || // Added in 7.0
470 Name == "div.sd" || // Added in 4.0
471 Name == "mul.sd" || // Added in 4.0
472 Name.starts_with("padds.") || // Added in 8.0
473 Name.starts_with("paddus.") || // Added in 8.0
474 Name.starts_with("pcmpeq.") || // Added in 3.1
475 Name.starts_with("pcmpgt.") || // Added in 3.1
476 Name == "pmaxs.w" || // Added in 3.9
477 Name == "pmaxu.b" || // Added in 3.9
478 Name == "pmins.w" || // Added in 3.9
479 Name == "pminu.b" || // Added in 3.9
480 Name == "pmulu.dq" || // Added in 7.0
481 Name.starts_with("pshuf") || // Added in 3.9
482 Name.starts_with("psll.dq") || // Added in 3.7
483 Name.starts_with("psrl.dq") || // Added in 3.7
484 Name.starts_with("psubs.") || // Added in 8.0
485 Name.starts_with("psubus.") || // Added in 8.0
486 Name.starts_with("sqrt.p") || // Added in 7.0
487 Name == "sqrt.sd" || // Added in 7.0
488 Name == "storel.dq" || // Added in 3.9
489 Name.starts_with("storeu.") || // Added in 3.9
490 Name == "sub.sd"); // Added in 4.0
491
492 if (Name.consume_front("sse41."))
493 return (Name.starts_with("blendp") || // Added in 3.7
494 Name == "movntdqa" || // Added in 5.0
495 Name == "pblendw" || // Added in 3.7
496 Name == "pmaxsb" || // Added in 3.9
497 Name == "pmaxsd" || // Added in 3.9
498 Name == "pmaxud" || // Added in 3.9
499 Name == "pmaxuw" || // Added in 3.9
500 Name == "pminsb" || // Added in 3.9
501 Name == "pminsd" || // Added in 3.9
502 Name == "pminud" || // Added in 3.9
503 Name == "pminuw" || // Added in 3.9
504 Name.starts_with("pmovsx") || // Added in 3.8
505 Name.starts_with("pmovzx") || // Added in 3.9
506 Name == "pmuldq"); // Added in 7.0
507
508 if (Name.consume_front("sse42."))
509 return Name == "crc32.64.8"; // Added in 3.4
510
511 if (Name.consume_front("sse4a."))
512 return Name.starts_with("movnt."); // Added in 3.9
513
514 if (Name.consume_front("ssse3."))
515 return (Name == "pabs.b.128" || // Added in 6.0
516 Name == "pabs.d.128" || // Added in 6.0
517 Name == "pabs.w.128"); // Added in 6.0
518
519 if (Name.consume_front("xop."))
520 return (Name == "vpcmov" || // Added in 3.8
521 Name == "vpcmov.256" || // Added in 5.0
522 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
523 Name.starts_with("vprot")); // Added in 8.0
524
525 return (Name == "addcarry.u32" || // Added in 8.0
526 Name == "addcarry.u64" || // Added in 8.0
527 Name == "addcarryx.u32" || // Added in 8.0
528 Name == "addcarryx.u64" || // Added in 8.0
529 Name == "subborrow.u32" || // Added in 8.0
530 Name == "subborrow.u64" || // Added in 8.0
531 Name.starts_with("vcvtph2ps.")); // Added in 11.0
532}
533
535 Function *&NewFn) {
536 // Only handle intrinsics that start with "x86.".
537 if (!Name.consume_front("x86."))
538 return false;
539
540 if (shouldUpgradeX86Intrinsic(F, Name)) {
541 NewFn = nullptr;
542 return true;
543 }
544
545 if (Name == "rdtscp") { // Added in 8.0
546 // If this intrinsic has 0 operands, it's the new version.
547 if (F->getFunctionType()->getNumParams() == 0)
548 return false;
549
550 rename(F);
551 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
552 Intrinsic::x86_rdtscp);
553 return true;
554 }
555
557
558 // SSE4.1 ptest functions may have an old signature.
559 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
561 .Case("c", Intrinsic::x86_sse41_ptestc)
562 .Case("z", Intrinsic::x86_sse41_ptestz)
563 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
566 return upgradePTESTIntrinsic(F, ID, NewFn);
567
568 return false;
569 }
570
571 // Several blend and other instructions with masks used the wrong number of
572 // bits.
573
574 // Added in 3.6
576 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
577 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
578 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
579 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
580 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
581 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
584 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
585
586 if (Name.consume_front("avx512.")) {
587 if (Name.consume_front("mask.cmp.")) {
588 // Added in 7.0
590 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
598 return upgradeX86MaskedFPCompare(F, ID, NewFn);
599 } else if (Name.starts_with("vpdpbusd.") ||
600 Name.starts_with("vpdpbusds.")) {
601 // Added in 21.1
603 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
604 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
605 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
606 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
607 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
608 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
611 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
612 } else if (Name.starts_with("vpdpwssd.") ||
613 Name.starts_with("vpdpwssds.")) {
614 // Added in 21.1
616 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
617 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
618 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
619 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
620 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
621 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
624 return upgradeX86MultiplyAddWords(F, ID, NewFn);
625 }
626 return false; // No other 'x86.avx512.*'.
627 }
628
629 if (Name.consume_front("avx2.")) {
630 if (Name.consume_front("vpdpb")) {
631 // Added in 21.1
633 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
634 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
635 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
636 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
637 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
638 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
639 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
640 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
641 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
642 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
643 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
644 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
647 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
648 } else if (Name.consume_front("vpdpw")) {
649 // Added in 21.1
651 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
652 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
653 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
654 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
655 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
656 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
657 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
658 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
659 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
660 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
661 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
662 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
665 return upgradeX86MultiplyAddWords(F, ID, NewFn);
666 }
667 return false; // No other 'x86.avx2.*'
668 }
669
670 if (Name.consume_front("avx10.")) {
671 if (Name.consume_front("vpdpb")) {
672 // Added in 21.1
674 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
675 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
676 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
677 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
678 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
679 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
682 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
683 } else if (Name.consume_front("vpdpw")) {
685 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
686 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
687 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
688 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
689 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
690 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
693 return upgradeX86MultiplyAddWords(F, ID, NewFn);
694 }
695 return false; // No other 'x86.avx10.*'
696 }
697
698 if (Name.consume_front("avx512bf16.")) {
699 // Added in 9.0
701 .Case("cvtne2ps2bf16.128",
702 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .Case("cvtne2ps2bf16.256",
704 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .Case("cvtne2ps2bf16.512",
706 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .Case("mask.cvtneps2bf16.128",
708 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .Case("cvtneps2bf16.256",
710 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .Case("cvtneps2bf16.512",
712 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
715 return upgradeX86BF16Intrinsic(F, ID, NewFn);
716
717 // Added in 9.0
719 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
724 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
725 return false; // No other 'x86.avx512bf16.*'.
726 }
727
728 if (Name.consume_front("xop.")) {
730 if (Name.starts_with("vpermil2")) { // Added in 3.9
731 // Upgrade any XOP PERMIL2 index operand still using a float/double
732 // vector.
733 auto Idx = F->getFunctionType()->getParamType(2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
743 else
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
745 }
746 } else if (F->arg_size() == 2)
747 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
749 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
750 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
752
754 rename(F);
755 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
756 return true;
757 }
758 return false; // No other 'x86.xop.*'
759 }
760
761 if (Name == "seh.recoverfp") {
762 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
763 Intrinsic::eh_recoverfp);
764 return true;
765 }
766
767 return false;
768}
769
770// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
771// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
773 StringRef Name,
774 Function *&NewFn) {
775 if (Name.starts_with("rbit")) {
776 // '(arm|aarch64).rbit'.
778 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
779 return true;
780 }
781
782 if (Name == "thread.pointer") {
783 // '(arm|aarch64).thread.pointer'.
785 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
786 return true;
787 }
788
789 bool Neon = Name.consume_front("neon.");
790 if (Neon) {
791 // '(arm|aarch64).neon.*'.
792 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
793 // v16i8 respectively.
794 if (Name.consume_front("bfdot.")) {
795 // (arm|aarch64).neon.bfdot.*'.
798 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
799 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
800 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
803 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
806 LLVMContext &Ctx = F->getParent()->getContext();
807 std::array<Type *, 2> Tys{
808 {F->getReturnType(),
809 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
810 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
811 return true;
812 }
813 return false; // No other '(arm|aarch64).neon.bfdot.*'.
814 }
815
816 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
817 // anymore and accept v8bf16 instead of v16i8.
818 if (Name.consume_front("bfm")) {
819 // (arm|aarch64).neon.bfm*'.
820 if (Name.consume_back(".v4f32.v16i8")) {
821 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
824 .Case("mla",
825 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
826 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
827 .Case("lalb",
828 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
829 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
830 .Case("lalt",
831 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
832 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
835 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
836 return true;
837 }
838 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
839 }
840 return false; // No other '(arm|aarch64).neon.bfm*.
841 }
842 // Continue on to Aarch64 Neon or Arm Neon.
843 }
844 // Continue on to Arm or Aarch64.
845
846 if (IsArm) {
847 // 'arm.*'.
848 if (Neon) {
849 // 'arm.neon.*'.
851 .StartsWith("vclz.", Intrinsic::ctlz)
852 .StartsWith("vcnt.", Intrinsic::ctpop)
853 .StartsWith("vqadds.", Intrinsic::sadd_sat)
854 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
855 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
856 .StartsWith("vqsubu.", Intrinsic::usub_sat)
857 .StartsWith("vrinta.", Intrinsic::round)
858 .StartsWith("vrintn.", Intrinsic::roundeven)
859 .StartsWith("vrintm.", Intrinsic::floor)
860 .StartsWith("vrintp.", Intrinsic::ceil)
861 .StartsWith("vrintx.", Intrinsic::rint)
862 .StartsWith("vrintz.", Intrinsic::trunc)
865 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
866 F->arg_begin()->getType());
867 return true;
868 }
869
870 if (Name.consume_front("vst")) {
871 // 'arm.neon.vst*'.
872 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
874 if (vstRegex.match(Name, &Groups)) {
875 static const Intrinsic::ID StoreInts[] = {
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
878
879 static const Intrinsic::ID StoreLaneInts[] = {
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
882
883 auto fArgs = F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
885 if (Groups[1].size() == 1)
887 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
888 else
890 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
891 return true;
892 }
893 return false; // No other 'arm.neon.vst*'.
894 }
895
896 return false; // No other 'arm.neon.*'.
897 }
898
899 if (Name.consume_front("mve.")) {
900 // 'arm.mve.*'.
901 if (Name == "vctp64") {
902 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
903 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
904 // the function and deal with it below in UpgradeIntrinsicCall.
905 rename(F);
906 return true;
907 }
908 return false; // Not 'arm.mve.vctp64'.
909 }
910
911 if (Name.starts_with("vrintn.v")) {
913 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
914 return true;
915 }
916
917 // These too are changed to accept a v2i1 instead of the old v4i1.
918 if (Name.consume_back(".v4i1")) {
919 // 'arm.mve.*.v4i1'.
920 if (Name.consume_back(".predicated.v2i64.v4i32"))
921 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
922 return Name == "mull.int" || Name == "vqdmull";
923
924 if (Name.consume_back(".v2i64")) {
925 // 'arm.mve.*.v2i64.v4i1'
926 bool IsGather = Name.consume_front("vldr.gather.");
927 if (IsGather || Name.consume_front("vstr.scatter.")) {
928 if (Name.consume_front("base.")) {
929 // Optional 'wb.' prefix.
930 Name.consume_front("wb.");
931 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
932 // predicated.v2i64.v2i64.v4i1'.
933 return Name == "predicated.v2i64";
934 }
935
936 if (Name.consume_front("offset.predicated."))
937 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
938 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
939
940 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
941 return false;
942 }
943
944 return false; // No other 'arm.mve.*.v2i64.v4i1'.
945 }
946 return false; // No other 'arm.mve.*.v4i1'.
947 }
948 return false; // No other 'arm.mve.*'.
949 }
950
951 if (Name.consume_front("cde.vcx")) {
952 // 'arm.cde.vcx*'.
953 if (Name.consume_back(".predicated.v2i64.v4i1"))
954 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
955 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
956 Name == "3q" || Name == "3qa";
957
958 return false; // No other 'arm.cde.vcx*'.
959 }
960 } else {
961 // 'aarch64.*'.
962 if (Neon) {
963 // 'aarch64.neon.*'.
965 .StartsWith("frintn", Intrinsic::roundeven)
966 .StartsWith("rbit", Intrinsic::bitreverse)
969 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
970 F->arg_begin()->getType());
971 return true;
972 }
973
974 if (Name.starts_with("addp")) {
975 // 'aarch64.neon.addp*'.
976 if (F->arg_size() != 2)
977 return false; // Invalid IR.
978 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
981 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
982 return true;
983 }
984 }
985
986 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
987 if (Name.starts_with("bfcvt")) {
988 NewFn = nullptr;
989 return true;
990 }
991
992 return false; // No other 'aarch64.neon.*'.
993 }
994 if (Name.consume_front("sve.")) {
995 // 'aarch64.sve.*'.
996 if (Name.consume_front("bf")) {
997 if (Name.consume_back(".lane")) {
998 // 'aarch64.sve.bf*.lane'.
1001 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1006 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1007 return true;
1008 }
1009 return false; // No other 'aarch64.sve.bf*.lane'.
1010 }
1011 return false; // No other 'aarch64.sve.bf*'.
1012 }
1013
1014 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1015 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1016 NewFn = nullptr;
1017 return true;
1018 }
1019
1020 if (Name.consume_front("addqv")) {
1021 // 'aarch64.sve.addqv'.
1022 if (!F->getReturnType()->isFPOrFPVectorTy())
1023 return false;
1024
1025 auto Args = F->getFunctionType()->params();
1026 Type *Tys[] = {F->getReturnType(), Args[1]};
1028 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1029 return true;
1030 }
1031
1032 if (Name.consume_front("ld")) {
1033 // 'aarch64.sve.ld*'.
1034 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.match(Name)) {
1036 Type *ScalarTy =
1037 cast<VectorType>(F->getReturnType())->getElementType();
1038 ElementCount EC =
1039 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1040 Type *Ty = VectorType::get(ScalarTy, EC);
1041 static const Intrinsic::ID LoadIDs[] = {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1045 };
1046 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1047 LoadIDs[Name[0] - '2'], Ty);
1048 return true;
1049 }
1050 return false; // No other 'aarch64.sve.ld*'.
1051 }
1052
1053 if (Name.consume_front("tuple.")) {
1054 // 'aarch64.sve.tuple.*'.
1055 if (Name.starts_with("get")) {
1056 // 'aarch64.sve.tuple.get*'.
1057 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1059 F->getParent(), Intrinsic::vector_extract, Tys);
1060 return true;
1061 }
1062
1063 if (Name.starts_with("set")) {
1064 // 'aarch64.sve.tuple.set*'.
1065 auto Args = F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1068 F->getParent(), Intrinsic::vector_insert, Tys);
1069 return true;
1070 }
1071
1072 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.match(Name)) {
1074 // 'aarch64.sve.tuple.create*'.
1075 auto Args = F->getFunctionType()->params();
1076 Type *Tys[] = {F->getReturnType(), Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1079 return true;
1080 }
1081 return false; // No other 'aarch64.sve.tuple.*'.
1082 }
1083
1084 if (Name.starts_with("rev.nxv")) {
1085 // 'aarch64.sve.rev.<Ty>'
1087 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1088 return true;
1089 }
1090
1091 return false; // No other 'aarch64.sve.*'.
1092 }
1093 }
1094 return false; // No other 'arm.*', 'aarch64.*'.
1095}
1096
1098 StringRef Name) {
1099 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1102 .Case("im2col.3d",
1103 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1104 .Case("im2col.4d",
1105 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1106 .Case("im2col.5d",
1107 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1114
1116 return ID;
1117
1118 // These intrinsics may need upgrade for two reasons:
1119 // (1) When the address-space of the first argument is shared[AS=3]
1120 // (and we upgrade it to use shared_cluster address-space[AS=7])
1121 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1123 return ID;
1124
1125 // (2) When there are only two boolean flag arguments at the end:
1126 //
1127 // The last three parameters of the older version of these
1128 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1129 //
1130 // The newer version reads as:
1131 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1132 //
1133 // So, when the type of the [N-3]rd argument is "not i1", then
1134 // it is the older version and we need to upgrade.
1135 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1137 if (!ArgType->isIntegerTy(1))
1138 return ID;
1139 }
1140
1142}
1143
1145 StringRef Name) {
1146 if (Name.consume_front("mapa.shared.cluster"))
1147 if (F->getReturnType()->getPointerAddressSpace() ==
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1150
1151 if (Name.consume_front("cp.async.bulk.")) {
1154 .Case("global.to.shared.cluster",
1155 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .Case("shared.cta.to.cluster",
1157 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1159
1161 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1163 return ID;
1164 }
1165
1167}
1168
1170 if (Name.consume_front("fma.rn."))
1171 return StringSwitch<Intrinsic::ID>(Name)
1172 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1173 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1174 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1175 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1176 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1177 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1178 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1179 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1180 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1181 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1182 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1183 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1185
1186 if (Name.consume_front("fmax."))
1187 return StringSwitch<Intrinsic::ID>(Name)
1188 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1189 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1190 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1191 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1192 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1193 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1194 .Case("ftz.nan.xorsign.abs.bf16",
1195 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1196 .Case("ftz.nan.xorsign.abs.bf16x2",
1197 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1198 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1199 .Case("ftz.xorsign.abs.bf16x2",
1200 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1201 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1202 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1203 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1204 .Case("nan.xorsign.abs.bf16x2",
1205 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1206 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1207 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1209
1210 if (Name.consume_front("fmin."))
1211 return StringSwitch<Intrinsic::ID>(Name)
1212 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1213 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1214 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1215 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1216 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1217 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1218 .Case("ftz.nan.xorsign.abs.bf16",
1219 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1220 .Case("ftz.nan.xorsign.abs.bf16x2",
1221 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1222 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1223 .Case("ftz.xorsign.abs.bf16x2",
1224 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1225 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1226 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1227 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1228 .Case("nan.xorsign.abs.bf16x2",
1229 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1230 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1231 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1233
1234 if (Name.consume_front("neg."))
1235 return StringSwitch<Intrinsic::ID>(Name)
1236 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1237 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1239
1241}
1242
1244 return Name.consume_front("local") || Name.consume_front("shared") ||
1245 Name.consume_front("global") || Name.consume_front("constant") ||
1246 Name.consume_front("param");
1247}
1248
1250 bool CanUpgradeDebugIntrinsicsToRecords) {
1251 assert(F && "Illegal to upgrade a non-existent Function.");
1252
1253 StringRef Name = F->getName();
1254
1255 // Quickly eliminate it, if it's not a candidate.
1256 if (!Name.consume_front("llvm.") || Name.empty())
1257 return false;
1258
1259 switch (Name[0]) {
1260 default: break;
1261 case 'a': {
1262 bool IsArm = Name.consume_front("arm.");
1263 if (IsArm || Name.consume_front("aarch64.")) {
1264 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1265 return true;
1266 break;
1267 }
1268
1269 if (Name.consume_front("amdgcn.")) {
1270 if (Name == "alignbit") {
1271 // Target specific intrinsic became redundant
1273 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1274 return true;
1275 }
1276
1277 if (Name.consume_front("atomic.")) {
1278 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1279 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1280 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1281 // and usub_sat so there's no new declaration.
1282 NewFn = nullptr;
1283 return true;
1284 }
1285 break; // No other 'amdgcn.atomic.*'
1286 }
1287
1288 // Legacy wmma iu intrinsics without the optional clamp operand.
1289 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1290 F->arg_size() == 7) {
1291 NewFn = nullptr;
1292 return true;
1293 }
1294 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1295 F->arg_size() == 8) {
1296 NewFn = nullptr;
1297 return true;
1298 }
1299
1300 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1301 Name.consume_front("flat.atomic.")) {
1302 if (Name.starts_with("fadd") ||
1303 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1304 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1305 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1306 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1307 // declaration.
1308 NewFn = nullptr;
1309 return true;
1310 }
1311 }
1312
1313 if (Name.starts_with("ldexp.")) {
1314 // Target specific intrinsic became redundant
1316 F->getParent(), Intrinsic::ldexp,
1317 {F->getReturnType(), F->getArg(1)->getType()});
1318 return true;
1319 }
1320 break; // No other 'amdgcn.*'
1321 }
1322
1323 break;
1324 }
1325 case 'c': {
1326 if (F->arg_size() == 1) {
1328 .StartsWith("ctlz.", Intrinsic::ctlz)
1329 .StartsWith("cttz.", Intrinsic::cttz)
1332 rename(F);
1333 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1334 F->arg_begin()->getType());
1335 return true;
1336 }
1337 }
1338
1339 if (F->arg_size() == 2 && Name == "coro.end") {
1340 rename(F);
1341 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1342 Intrinsic::coro_end);
1343 return true;
1344 }
1345
1346 break;
1347 }
1348 case 'd':
1349 if (Name.consume_front("dbg.")) {
1350 // Mark debug intrinsics for upgrade to new debug format.
1351 if (CanUpgradeDebugIntrinsicsToRecords) {
1352 if (Name == "addr" || Name == "value" || Name == "assign" ||
1353 Name == "declare" || Name == "label") {
1354 // There's no function to replace these with.
1355 NewFn = nullptr;
1356 // But we do want these to get upgraded.
1357 return true;
1358 }
1359 }
1360 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1361 // converted to DbgVariableRecords later.
1362 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1363 rename(F);
1364 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1365 Intrinsic::dbg_value);
1366 return true;
1367 }
1368 break; // No other 'dbg.*'.
1369 }
1370 break;
1371 case 'e':
1372 if (Name.consume_front("experimental.vector.")) {
1375 // Skip over extract.last.active, otherwise it will be 'upgraded'
1376 // to a regular vector extract which is a different operation.
1377 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1378 .StartsWith("extract.", Intrinsic::vector_extract)
1379 .StartsWith("insert.", Intrinsic::vector_insert)
1380 .StartsWith("reverse.", Intrinsic::vector_reverse)
1381 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1382 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1383 .StartsWith("partial.reduce.add",
1384 Intrinsic::vector_partial_reduce_add)
1387 const auto *FT = F->getFunctionType();
1389 if (ID == Intrinsic::vector_extract ||
1390 ID == Intrinsic::vector_interleave2)
1391 // Extracting overloads the return type.
1392 Tys.push_back(FT->getReturnType());
1393 if (ID != Intrinsic::vector_interleave2)
1394 Tys.push_back(FT->getParamType(0));
1395 if (ID == Intrinsic::vector_insert ||
1396 ID == Intrinsic::vector_partial_reduce_add)
1397 // Inserting overloads the inserted type.
1398 Tys.push_back(FT->getParamType(1));
1399 rename(F);
1400 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1401 return true;
1402 }
1403
1404 if (Name.consume_front("reduce.")) {
1406 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1407 if (R.match(Name, &Groups))
1409 .Case("add", Intrinsic::vector_reduce_add)
1410 .Case("mul", Intrinsic::vector_reduce_mul)
1411 .Case("and", Intrinsic::vector_reduce_and)
1412 .Case("or", Intrinsic::vector_reduce_or)
1413 .Case("xor", Intrinsic::vector_reduce_xor)
1414 .Case("smax", Intrinsic::vector_reduce_smax)
1415 .Case("smin", Intrinsic::vector_reduce_smin)
1416 .Case("umax", Intrinsic::vector_reduce_umax)
1417 .Case("umin", Intrinsic::vector_reduce_umin)
1418 .Case("fmax", Intrinsic::vector_reduce_fmax)
1419 .Case("fmin", Intrinsic::vector_reduce_fmin)
1421
1422 bool V2 = false;
1424 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1425 Groups.clear();
1426 V2 = true;
1427 if (R2.match(Name, &Groups))
1429 .Case("fadd", Intrinsic::vector_reduce_fadd)
1430 .Case("fmul", Intrinsic::vector_reduce_fmul)
1432 }
1434 rename(F);
1435 auto Args = F->getFunctionType()->params();
1436 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1437 {Args[V2 ? 1 : 0]});
1438 return true;
1439 }
1440 break; // No other 'expermental.vector.reduce.*'.
1441 }
1442
1443 if (Name.consume_front("splice"))
1444 return true;
1445 break; // No other 'experimental.vector.*'.
1446 }
1447 if (Name.consume_front("experimental.stepvector.")) {
1448 Intrinsic::ID ID = Intrinsic::stepvector;
1449 rename(F);
1451 F->getParent(), ID, F->getFunctionType()->getReturnType());
1452 return true;
1453 }
1454 break; // No other 'e*'.
1455 case 'f':
1456 if (Name.starts_with("flt.rounds")) {
1457 rename(F);
1458 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1459 Intrinsic::get_rounding);
1460 return true;
1461 }
1462 break;
1463 case 'i':
1464 if (Name.starts_with("invariant.group.barrier")) {
1465 // Rename invariant.group.barrier to launder.invariant.group
1466 auto Args = F->getFunctionType()->params();
1467 Type* ObjectPtr[1] = {Args[0]};
1468 rename(F);
1470 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1471 return true;
1472 }
1473 break;
1474 case 'l':
1475 if ((Name.starts_with("lifetime.start") ||
1476 Name.starts_with("lifetime.end")) &&
1477 F->arg_size() == 2) {
1478 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1479 ? Intrinsic::lifetime_start
1480 : Intrinsic::lifetime_end;
1481 rename(F);
1482 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1483 F->getArg(0)->getType());
1484 return true;
1485 }
1486 break;
1487 case 'm': {
1488 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1489 // alignment parameter to embedding the alignment as an attribute of
1490 // the pointer args.
1491 if (unsigned ID = StringSwitch<unsigned>(Name)
1492 .StartsWith("memcpy.", Intrinsic::memcpy)
1493 .StartsWith("memmove.", Intrinsic::memmove)
1494 .Default(0)) {
1495 if (F->arg_size() == 5) {
1496 rename(F);
1497 // Get the types of dest, src, and len
1498 ArrayRef<Type *> ParamTypes =
1499 F->getFunctionType()->params().slice(0, 3);
1500 NewFn =
1501 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1502 return true;
1503 }
1504 }
1505 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1506 rename(F);
1507 // Get the types of dest, and len
1508 const auto *FT = F->getFunctionType();
1509 Type *ParamTypes[2] = {
1510 FT->getParamType(0), // Dest
1511 FT->getParamType(2) // len
1512 };
1513 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1514 Intrinsic::memset, ParamTypes);
1515 return true;
1516 }
1517
1518 unsigned MaskedID =
1520 .StartsWith("masked.load", Intrinsic::masked_load)
1521 .StartsWith("masked.gather", Intrinsic::masked_gather)
1522 .StartsWith("masked.store", Intrinsic::masked_store)
1523 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1524 .Default(0);
1525 if (MaskedID && F->arg_size() == 4) {
1526 rename(F);
1527 if (MaskedID == Intrinsic::masked_load ||
1528 MaskedID == Intrinsic::masked_gather) {
1530 F->getParent(), MaskedID,
1531 {F->getReturnType(), F->getArg(0)->getType()});
1532 return true;
1533 }
1535 F->getParent(), MaskedID,
1536 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1537 return true;
1538 }
1539 break;
1540 }
1541 case 'n': {
1542 if (Name.consume_front("nvvm.")) {
1543 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1544 if (F->arg_size() == 1) {
1545 Intrinsic::ID IID =
1547 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1548 .Case("clz.i", Intrinsic::ctlz)
1549 .Case("popc.i", Intrinsic::ctpop)
1551 if (IID != Intrinsic::not_intrinsic) {
1552 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1553 {F->getReturnType()});
1554 return true;
1555 }
1556 }
1557
1558 // Check for nvvm intrinsics that need a return type adjustment.
1559 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1561 if (IID != Intrinsic::not_intrinsic) {
1562 NewFn = nullptr;
1563 return true;
1564 }
1565 }
1566
1567 // Upgrade Distributed Shared Memory Intrinsics
1569 if (IID != Intrinsic::not_intrinsic) {
1570 rename(F);
1571 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1572 return true;
1573 }
1574
1575 // Upgrade TMA copy G2S Intrinsics
1577 if (IID != Intrinsic::not_intrinsic) {
1578 rename(F);
1579 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1580 return true;
1581 }
1582
1583 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1584 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1585 //
1586 // TODO: We could add lohi.i2d.
1587 bool Expand = false;
1588 if (Name.consume_front("abs."))
1589 // nvvm.abs.{i,ii}
1590 Expand =
1591 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1592 else if (Name.consume_front("fabs."))
1593 // nvvm.fabs.{f,ftz.f,d}
1594 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1595 else if (Name.consume_front("ex2.approx."))
1596 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1597 Expand =
1598 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1599 else if (Name.consume_front("max.") || Name.consume_front("min."))
1600 // nvvm.{min,max}.{i,ii,ui,ull}
1601 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1602 Name == "ui" || Name == "ull";
1603 else if (Name.consume_front("atomic.load."))
1604 // nvvm.atomic.load.add.{f32,f64}.p
1605 // nvvm.atomic.load.{inc,dec}.32.p
1606 Expand = StringSwitch<bool>(Name)
1607 .StartsWith("add.f32.p", true)
1608 .StartsWith("add.f64.p", true)
1609 .StartsWith("inc.32.p", true)
1610 .StartsWith("dec.32.p", true)
1611 .Default(false);
1612 else if (Name.consume_front("bitcast."))
1613 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1614 Expand =
1615 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1616 else if (Name.consume_front("rotate."))
1617 // nvvm.rotate.{b32,b64,right.b64}
1618 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1619 else if (Name.consume_front("ptr.gen.to."))
1620 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1621 Expand = consumeNVVMPtrAddrSpace(Name);
1622 else if (Name.consume_front("ptr."))
1623 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1624 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1625 else if (Name.consume_front("ldg.global."))
1626 // nvvm.ldg.global.{i,p,f}
1627 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1628 Name.starts_with("p."));
1629 else
1630 Expand = StringSwitch<bool>(Name)
1631 .Case("barrier0", true)
1632 .Case("barrier.n", true)
1633 .Case("barrier.sync.cnt", true)
1634 .Case("barrier.sync", true)
1635 .Case("barrier", true)
1636 .Case("bar.sync", true)
1637 .Case("barrier0.popc", true)
1638 .Case("barrier0.and", true)
1639 .Case("barrier0.or", true)
1640 .Case("clz.ll", true)
1641 .Case("popc.ll", true)
1642 .Case("h2f", true)
1643 .Case("swap.lo.hi.b64", true)
1644 .Case("tanh.approx.f32", true)
1645 .Default(false);
1646
1647 if (Expand) {
1648 NewFn = nullptr;
1649 return true;
1650 }
1651 break; // No other 'nvvm.*'.
1652 }
1653 break;
1654 }
1655 case 'o':
1656 if (Name.starts_with("objectsize.")) {
1657 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1658 if (F->arg_size() == 2 || F->arg_size() == 3) {
1659 rename(F);
1660 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1661 Intrinsic::objectsize, Tys);
1662 return true;
1663 }
1664 }
1665 break;
1666
1667 case 'p':
1668 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1669 rename(F);
1671 F->getParent(), Intrinsic::ptr_annotation,
1672 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1673 return true;
1674 }
1675 break;
1676
1677 case 'r': {
1678 if (Name.consume_front("riscv.")) {
1681 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1682 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1683 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1684 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1687 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1688 rename(F);
1689 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1690 return true;
1691 }
1692 break; // No other applicable upgrades.
1693 }
1694
1696 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1697 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1700 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1701 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1702 rename(F);
1703 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1704 return true;
1705 }
1706 break; // No other applicable upgrades.
1707 }
1708
1710 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1711 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1712 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1713 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1714 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1715 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1718 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1719 rename(F);
1720 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1721 return true;
1722 }
1723 break; // No other applicable upgrades.
1724 }
1725 break; // No other 'riscv.*' intrinsics
1726 }
1727 } break;
1728
1729 case 's':
1730 if (Name == "stackprotectorcheck") {
1731 NewFn = nullptr;
1732 return true;
1733 }
1734 break;
1735
1736 case 't':
1737 if (Name == "thread.pointer") {
1739 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1740 return true;
1741 }
1742 break;
1743
1744 case 'v': {
1745 if (Name == "var.annotation" && F->arg_size() == 4) {
1746 rename(F);
1748 F->getParent(), Intrinsic::var_annotation,
1749 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1750 return true;
1751 }
1752 if (Name.consume_front("vector.splice")) {
1753 if (Name.starts_with(".left") || Name.starts_with(".right"))
1754 break;
1755 return true;
1756 }
1757 break;
1758 }
1759
1760 case 'w':
1761 if (Name.consume_front("wasm.")) {
1764 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1765 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1766 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1769 rename(F);
1770 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1771 F->getReturnType());
1772 return true;
1773 }
1774
1775 if (Name.consume_front("dot.i8x16.i7x16.")) {
1777 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1778 .Case("add.signed",
1779 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1782 rename(F);
1783 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1784 return true;
1785 }
1786 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1787 }
1788 break; // No other 'wasm.*'.
1789 }
1790 break;
1791
1792 case 'x':
1793 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1794 return true;
1795 }
1796
1797 auto *ST = dyn_cast<StructType>(F->getReturnType());
1798 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1799 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1800 // Replace return type with literal non-packed struct. Only do this for
1801 // intrinsics declared to return a struct, not for intrinsics with
1802 // overloaded return type, in which case the exact struct type will be
1803 // mangled into the name.
1806 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1807 auto *FT = F->getFunctionType();
1808 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1809 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1810 std::string Name = F->getName().str();
1811 rename(F);
1812 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1813 Name, F->getParent());
1814
1815 // The new function may also need remangling.
1816 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1817 NewFn = *Result;
1818 return true;
1819 }
1820 }
1821
1822 // Remangle our intrinsic since we upgrade the mangling
1824 if (Result != std::nullopt) {
1825 NewFn = *Result;
1826 return true;
1827 }
1828
1829 // This may not belong here. This function is effectively being overloaded
1830 // to both detect an intrinsic which needs upgrading, and to provide the
1831 // upgraded form of the intrinsic. We should perhaps have two separate
1832 // functions for this.
1833 return false;
1834}
1835
1837 bool CanUpgradeDebugIntrinsicsToRecords) {
1838 NewFn = nullptr;
1839 bool Upgraded =
1840 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1841
1842 // Upgrade intrinsic attributes. This does not change the function.
1843 if (NewFn)
1844 F = NewFn;
1845 if (Intrinsic::ID id = F->getIntrinsicID()) {
1846 // Only do this if the intrinsic signature is valid.
1847 SmallVector<Type *> OverloadTys;
1848 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1849 F->setAttributes(
1850 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1851 }
1852 return Upgraded;
1853}
1854
1856 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1857 GV->getName() == "llvm.global_dtors")) ||
1858 !GV->hasInitializer())
1859 return nullptr;
1861 if (!ATy)
1862 return nullptr;
1864 if (!STy || STy->getNumElements() != 2)
1865 return nullptr;
1866
1867 LLVMContext &C = GV->getContext();
1868 IRBuilder<> IRB(C);
1869 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1870 IRB.getPtrTy());
1871 Constant *Init = GV->getInitializer();
1872 unsigned N = Init->getNumOperands();
1873 std::vector<Constant *> NewCtors(N);
1874 for (unsigned i = 0; i != N; ++i) {
1875 auto Ctor = cast<Constant>(Init->getOperand(i));
1876 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1877 Ctor->getAggregateElement(1),
1879 }
1880 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1881
1882 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1883 NewInit, GV->getName());
1884}
1885
1886// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1887// to byte shuffles.
1889 unsigned Shift) {
1890 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1891 unsigned NumElts = ResultTy->getNumElements() * 8;
1892
1893 // Bitcast from a 64-bit element type to a byte element type.
1894 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1895 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1896
1897 // We'll be shuffling in zeroes.
1898 Value *Res = Constant::getNullValue(VecTy);
1899
1900 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1901 // we'll just return the zero vector.
1902 if (Shift < 16) {
1903 int Idxs[64];
1904 // 256/512-bit version is split into 2/4 16-byte lanes.
1905 for (unsigned l = 0; l != NumElts; l += 16)
1906 for (unsigned i = 0; i != 16; ++i) {
1907 unsigned Idx = NumElts + i - Shift;
1908 if (Idx < NumElts)
1909 Idx -= NumElts - 16; // end of lane, switch operand.
1910 Idxs[l + i] = Idx + l;
1911 }
1912
1913 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1914 }
1915
1916 // Bitcast back to a 64-bit element type.
1917 return Builder.CreateBitCast(Res, ResultTy, "cast");
1918}
1919
1920// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1921// to byte shuffles.
1923 unsigned Shift) {
1924 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1925 unsigned NumElts = ResultTy->getNumElements() * 8;
1926
1927 // Bitcast from a 64-bit element type to a byte element type.
1928 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1929 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1930
1931 // We'll be shuffling in zeroes.
1932 Value *Res = Constant::getNullValue(VecTy);
1933
1934 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1935 // we'll just return the zero vector.
1936 if (Shift < 16) {
1937 int Idxs[64];
1938 // 256/512-bit version is split into 2/4 16-byte lanes.
1939 for (unsigned l = 0; l != NumElts; l += 16)
1940 for (unsigned i = 0; i != 16; ++i) {
1941 unsigned Idx = i + Shift;
1942 if (Idx >= 16)
1943 Idx += NumElts - 16; // end of lane, switch operand.
1944 Idxs[l + i] = Idx + l;
1945 }
1946
1947 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1948 }
1949
1950 // Bitcast back to a 64-bit element type.
1951 return Builder.CreateBitCast(Res, ResultTy, "cast");
1952}
1953
1954static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1955 unsigned NumElts) {
1956 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1958 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1959 Mask = Builder.CreateBitCast(Mask, MaskTy);
1960
1961 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1962 // i8 and we need to extract down to the right number of elements.
1963 if (NumElts <= 4) {
1964 int Indices[4];
1965 for (unsigned i = 0; i != NumElts; ++i)
1966 Indices[i] = i;
1967 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1968 "extract");
1969 }
1970
1971 return Mask;
1972}
1973
1974static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1975 Value *Op1) {
1976 // If the mask is all ones just emit the first operation.
1977 if (const auto *C = dyn_cast<Constant>(Mask))
1978 if (C->isAllOnesValue())
1979 return Op0;
1980
1981 Mask = getX86MaskVec(Builder, Mask,
1982 cast<FixedVectorType>(Op0->getType())->getNumElements());
1983 return Builder.CreateSelect(Mask, Op0, Op1);
1984}
1985
1986static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1987 Value *Op1) {
1988 // If the mask is all ones just emit the first operation.
1989 if (const auto *C = dyn_cast<Constant>(Mask))
1990 if (C->isAllOnesValue())
1991 return Op0;
1992
1993 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1994 Mask->getType()->getIntegerBitWidth());
1995 Mask = Builder.CreateBitCast(Mask, MaskTy);
1996 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1997 return Builder.CreateSelect(Mask, Op0, Op1);
1998}
1999
2000// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2001// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2002// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2004 Value *Op1, Value *Shift,
2005 Value *Passthru, Value *Mask,
2006 bool IsVALIGN) {
2007 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2008
2009 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2010 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2011 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2012 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2013
2014 // Mask the immediate for VALIGN.
2015 if (IsVALIGN)
2016 ShiftVal &= (NumElts - 1);
2017
2018 // If palignr is shifting the pair of vectors more than the size of two
2019 // lanes, emit zero.
2020 if (ShiftVal >= 32)
2022
2023 // If palignr is shifting the pair of input vectors more than one lane,
2024 // but less than two lanes, convert to shifting in zeroes.
2025 if (ShiftVal > 16) {
2026 ShiftVal -= 16;
2027 Op1 = Op0;
2029 }
2030
2031 int Indices[64];
2032 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2033 for (unsigned l = 0; l < NumElts; l += 16) {
2034 for (unsigned i = 0; i != 16; ++i) {
2035 unsigned Idx = ShiftVal + i;
2036 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2037 Idx += NumElts - 16; // End of lane, switch operand.
2038 Indices[l + i] = Idx + l;
2039 }
2040 }
2041
2042 Value *Align = Builder.CreateShuffleVector(
2043 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2044
2045 return emitX86Select(Builder, Mask, Align, Passthru);
2046}
2047
2049 bool ZeroMask, bool IndexForm) {
2050 Type *Ty = CI.getType();
2051 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2052 unsigned EltWidth = Ty->getScalarSizeInBits();
2053 bool IsFloat = Ty->isFPOrFPVectorTy();
2054 Intrinsic::ID IID;
2055 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2056 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2057 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2058 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2059 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2060 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2061 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2062 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2063 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2064 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2065 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2066 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2067 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2068 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2069 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2070 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2071 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2072 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2073 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2074 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2075 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2076 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2077 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2078 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2079 else if (VecWidth == 128 && EltWidth == 16)
2080 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2081 else if (VecWidth == 256 && EltWidth == 16)
2082 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2083 else if (VecWidth == 512 && EltWidth == 16)
2084 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2085 else if (VecWidth == 128 && EltWidth == 8)
2086 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2087 else if (VecWidth == 256 && EltWidth == 8)
2088 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2089 else if (VecWidth == 512 && EltWidth == 8)
2090 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2091 else
2092 llvm_unreachable("Unexpected intrinsic");
2093
2094 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2095 CI.getArgOperand(2) };
2096
2097 // If this isn't index form we need to swap operand 0 and 1.
2098 if (!IndexForm)
2099 std::swap(Args[0], Args[1]);
2100
2101 Value *V = Builder.CreateIntrinsic(IID, Args);
2102 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2103 : Builder.CreateBitCast(CI.getArgOperand(1),
2104 Ty);
2105 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2106}
2107
2109 Intrinsic::ID IID) {
2110 Type *Ty = CI.getType();
2111 Value *Op0 = CI.getOperand(0);
2112 Value *Op1 = CI.getOperand(1);
2113 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2114
2115 if (CI.arg_size() == 4) { // For masked intrinsics.
2116 Value *VecSrc = CI.getOperand(2);
2117 Value *Mask = CI.getOperand(3);
2118 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2119 }
2120 return Res;
2121}
2122
2124 bool IsRotateRight) {
2125 Type *Ty = CI.getType();
2126 Value *Src = CI.getArgOperand(0);
2127 Value *Amt = CI.getArgOperand(1);
2128
2129 // Amount may be scalar immediate, in which case create a splat vector.
2130 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2131 // we only care about the lowest log2 bits anyway.
2132 if (Amt->getType() != Ty) {
2133 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2134 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2135 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2136 }
2137
2138 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2139 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2140
2141 if (CI.arg_size() == 4) { // For masked intrinsics.
2142 Value *VecSrc = CI.getOperand(2);
2143 Value *Mask = CI.getOperand(3);
2144 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2145 }
2146 return Res;
2147}
2148
2149static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2150 bool IsSigned) {
2151 Type *Ty = CI.getType();
2152 Value *LHS = CI.getArgOperand(0);
2153 Value *RHS = CI.getArgOperand(1);
2154
2155 CmpInst::Predicate Pred;
2156 switch (Imm) {
2157 case 0x0:
2158 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2159 break;
2160 case 0x1:
2161 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2162 break;
2163 case 0x2:
2164 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2165 break;
2166 case 0x3:
2167 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2168 break;
2169 case 0x4:
2170 Pred = ICmpInst::ICMP_EQ;
2171 break;
2172 case 0x5:
2173 Pred = ICmpInst::ICMP_NE;
2174 break;
2175 case 0x6:
2176 return Constant::getNullValue(Ty); // FALSE
2177 case 0x7:
2178 return Constant::getAllOnesValue(Ty); // TRUE
2179 default:
2180 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2181 }
2182
2183 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2184 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2185 return Ext;
2186}
2187
2189 bool IsShiftRight, bool ZeroMask) {
2190 Type *Ty = CI.getType();
2191 Value *Op0 = CI.getArgOperand(0);
2192 Value *Op1 = CI.getArgOperand(1);
2193 Value *Amt = CI.getArgOperand(2);
2194
2195 if (IsShiftRight)
2196 std::swap(Op0, Op1);
2197
2198 // Amount may be scalar immediate, in which case create a splat vector.
2199 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2200 // we only care about the lowest log2 bits anyway.
2201 if (Amt->getType() != Ty) {
2202 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2203 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2204 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2205 }
2206
2207 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2208 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2209
2210 unsigned NumArgs = CI.arg_size();
2211 if (NumArgs >= 4) { // For masked intrinsics.
2212 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2213 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2214 CI.getArgOperand(0);
2215 Value *Mask = CI.getOperand(NumArgs - 1);
2216 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2217 }
2218 return Res;
2219}
2220
2222 Value *Mask, bool Aligned) {
2223 const Align Alignment =
2224 Aligned
2225 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2226 : Align(1);
2227
2228 // If the mask is all ones just emit a regular store.
2229 if (const auto *C = dyn_cast<Constant>(Mask))
2230 if (C->isAllOnesValue())
2231 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2232
2233 // Convert the mask from an integer type to a vector of i1.
2234 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2235 Mask = getX86MaskVec(Builder, Mask, NumElts);
2236 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2237}
2238
2240 Value *Passthru, Value *Mask, bool Aligned) {
2241 Type *ValTy = Passthru->getType();
2242 const Align Alignment =
2243 Aligned
2244 ? Align(
2246 8)
2247 : Align(1);
2248
2249 // If the mask is all ones just emit a regular store.
2250 if (const auto *C = dyn_cast<Constant>(Mask))
2251 if (C->isAllOnesValue())
2252 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2253
2254 // Convert the mask from an integer type to a vector of i1.
2255 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2256 Mask = getX86MaskVec(Builder, Mask, NumElts);
2257 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2258}
2259
2260static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2261 Type *Ty = CI.getType();
2262 Value *Op0 = CI.getArgOperand(0);
2263 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2264 {Op0, Builder.getInt1(false)});
2265 if (CI.arg_size() == 3)
2266 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2267 return Res;
2268}
2269
2270static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2271 Type *Ty = CI.getType();
2272
2273 // Arguments have a vXi32 type so cast to vXi64.
2274 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2275 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2276
2277 if (IsSigned) {
2278 // Shift left then arithmetic shift right.
2279 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2280 LHS = Builder.CreateShl(LHS, ShiftAmt);
2281 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2282 RHS = Builder.CreateShl(RHS, ShiftAmt);
2283 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2284 } else {
2285 // Clear the upper bits.
2286 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2287 LHS = Builder.CreateAnd(LHS, Mask);
2288 RHS = Builder.CreateAnd(RHS, Mask);
2289 }
2290
2291 Value *Res = Builder.CreateMul(LHS, RHS);
2292
2293 if (CI.arg_size() == 4)
2294 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2295
2296 return Res;
2297}
2298
2299// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2301 Value *Mask) {
2302 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2303 if (Mask) {
2304 const auto *C = dyn_cast<Constant>(Mask);
2305 if (!C || !C->isAllOnesValue())
2306 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2307 }
2308
2309 if (NumElts < 8) {
2310 int Indices[8];
2311 for (unsigned i = 0; i != NumElts; ++i)
2312 Indices[i] = i;
2313 for (unsigned i = NumElts; i != 8; ++i)
2314 Indices[i] = NumElts + i % NumElts;
2315 Vec = Builder.CreateShuffleVector(Vec,
2317 Indices);
2318 }
2319 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2320}
2321
2323 unsigned CC, bool Signed) {
2324 Value *Op0 = CI.getArgOperand(0);
2325 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2326
2327 Value *Cmp;
2328 if (CC == 3) {
2330 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2331 } else if (CC == 7) {
2333 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2334 } else {
2336 switch (CC) {
2337 default: llvm_unreachable("Unknown condition code");
2338 case 0: Pred = ICmpInst::ICMP_EQ; break;
2339 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2340 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2341 case 4: Pred = ICmpInst::ICMP_NE; break;
2342 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2343 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2344 }
2345 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2346 }
2347
2348 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2349
2350 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2351}
2352
2353// Replace a masked intrinsic with an older unmasked intrinsic.
2355 Intrinsic::ID IID) {
2356 Value *Rep =
2357 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2358 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2359}
2360
2362 Value* A = CI.getArgOperand(0);
2363 Value* B = CI.getArgOperand(1);
2364 Value* Src = CI.getArgOperand(2);
2365 Value* Mask = CI.getArgOperand(3);
2366
2367 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2368 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2369 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2370 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2371 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2372 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2373}
2374
2376 Value* Op = CI.getArgOperand(0);
2377 Type* ReturnOp = CI.getType();
2378 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2379 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2380 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2381}
2382
2383// Replace intrinsic with unmasked version and a select.
2385 CallBase &CI, Value *&Rep) {
2386 Name = Name.substr(12); // Remove avx512.mask.
2387
2388 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2389 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2390 Intrinsic::ID IID;
2391 if (Name.starts_with("max.p")) {
2392 if (VecWidth == 128 && EltWidth == 32)
2393 IID = Intrinsic::x86_sse_max_ps;
2394 else if (VecWidth == 128 && EltWidth == 64)
2395 IID = Intrinsic::x86_sse2_max_pd;
2396 else if (VecWidth == 256 && EltWidth == 32)
2397 IID = Intrinsic::x86_avx_max_ps_256;
2398 else if (VecWidth == 256 && EltWidth == 64)
2399 IID = Intrinsic::x86_avx_max_pd_256;
2400 else
2401 llvm_unreachable("Unexpected intrinsic");
2402 } else if (Name.starts_with("min.p")) {
2403 if (VecWidth == 128 && EltWidth == 32)
2404 IID = Intrinsic::x86_sse_min_ps;
2405 else if (VecWidth == 128 && EltWidth == 64)
2406 IID = Intrinsic::x86_sse2_min_pd;
2407 else if (VecWidth == 256 && EltWidth == 32)
2408 IID = Intrinsic::x86_avx_min_ps_256;
2409 else if (VecWidth == 256 && EltWidth == 64)
2410 IID = Intrinsic::x86_avx_min_pd_256;
2411 else
2412 llvm_unreachable("Unexpected intrinsic");
2413 } else if (Name.starts_with("pshuf.b.")) {
2414 if (VecWidth == 128)
2415 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2416 else if (VecWidth == 256)
2417 IID = Intrinsic::x86_avx2_pshuf_b;
2418 else if (VecWidth == 512)
2419 IID = Intrinsic::x86_avx512_pshuf_b_512;
2420 else
2421 llvm_unreachable("Unexpected intrinsic");
2422 } else if (Name.starts_with("pmul.hr.sw.")) {
2423 if (VecWidth == 128)
2424 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2425 else if (VecWidth == 256)
2426 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2427 else if (VecWidth == 512)
2428 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2429 else
2430 llvm_unreachable("Unexpected intrinsic");
2431 } else if (Name.starts_with("pmulh.w.")) {
2432 if (VecWidth == 128)
2433 IID = Intrinsic::x86_sse2_pmulh_w;
2434 else if (VecWidth == 256)
2435 IID = Intrinsic::x86_avx2_pmulh_w;
2436 else if (VecWidth == 512)
2437 IID = Intrinsic::x86_avx512_pmulh_w_512;
2438 else
2439 llvm_unreachable("Unexpected intrinsic");
2440 } else if (Name.starts_with("pmulhu.w.")) {
2441 if (VecWidth == 128)
2442 IID = Intrinsic::x86_sse2_pmulhu_w;
2443 else if (VecWidth == 256)
2444 IID = Intrinsic::x86_avx2_pmulhu_w;
2445 else if (VecWidth == 512)
2446 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2447 else
2448 llvm_unreachable("Unexpected intrinsic");
2449 } else if (Name.starts_with("pmaddw.d.")) {
2450 if (VecWidth == 128)
2451 IID = Intrinsic::x86_sse2_pmadd_wd;
2452 else if (VecWidth == 256)
2453 IID = Intrinsic::x86_avx2_pmadd_wd;
2454 else if (VecWidth == 512)
2455 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2456 else
2457 llvm_unreachable("Unexpected intrinsic");
2458 } else if (Name.starts_with("pmaddubs.w.")) {
2459 if (VecWidth == 128)
2460 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2461 else if (VecWidth == 256)
2462 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2463 else if (VecWidth == 512)
2464 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2465 else
2466 llvm_unreachable("Unexpected intrinsic");
2467 } else if (Name.starts_with("packsswb.")) {
2468 if (VecWidth == 128)
2469 IID = Intrinsic::x86_sse2_packsswb_128;
2470 else if (VecWidth == 256)
2471 IID = Intrinsic::x86_avx2_packsswb;
2472 else if (VecWidth == 512)
2473 IID = Intrinsic::x86_avx512_packsswb_512;
2474 else
2475 llvm_unreachable("Unexpected intrinsic");
2476 } else if (Name.starts_with("packssdw.")) {
2477 if (VecWidth == 128)
2478 IID = Intrinsic::x86_sse2_packssdw_128;
2479 else if (VecWidth == 256)
2480 IID = Intrinsic::x86_avx2_packssdw;
2481 else if (VecWidth == 512)
2482 IID = Intrinsic::x86_avx512_packssdw_512;
2483 else
2484 llvm_unreachable("Unexpected intrinsic");
2485 } else if (Name.starts_with("packuswb.")) {
2486 if (VecWidth == 128)
2487 IID = Intrinsic::x86_sse2_packuswb_128;
2488 else if (VecWidth == 256)
2489 IID = Intrinsic::x86_avx2_packuswb;
2490 else if (VecWidth == 512)
2491 IID = Intrinsic::x86_avx512_packuswb_512;
2492 else
2493 llvm_unreachable("Unexpected intrinsic");
2494 } else if (Name.starts_with("packusdw.")) {
2495 if (VecWidth == 128)
2496 IID = Intrinsic::x86_sse41_packusdw;
2497 else if (VecWidth == 256)
2498 IID = Intrinsic::x86_avx2_packusdw;
2499 else if (VecWidth == 512)
2500 IID = Intrinsic::x86_avx512_packusdw_512;
2501 else
2502 llvm_unreachable("Unexpected intrinsic");
2503 } else if (Name.starts_with("vpermilvar.")) {
2504 if (VecWidth == 128 && EltWidth == 32)
2505 IID = Intrinsic::x86_avx_vpermilvar_ps;
2506 else if (VecWidth == 128 && EltWidth == 64)
2507 IID = Intrinsic::x86_avx_vpermilvar_pd;
2508 else if (VecWidth == 256 && EltWidth == 32)
2509 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2510 else if (VecWidth == 256 && EltWidth == 64)
2511 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2512 else if (VecWidth == 512 && EltWidth == 32)
2513 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2514 else if (VecWidth == 512 && EltWidth == 64)
2515 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2516 else
2517 llvm_unreachable("Unexpected intrinsic");
2518 } else if (Name == "cvtpd2dq.256") {
2519 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2520 } else if (Name == "cvtpd2ps.256") {
2521 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2522 } else if (Name == "cvttpd2dq.256") {
2523 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2524 } else if (Name == "cvttps2dq.128") {
2525 IID = Intrinsic::x86_sse2_cvttps2dq;
2526 } else if (Name == "cvttps2dq.256") {
2527 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2528 } else if (Name.starts_with("permvar.")) {
2529 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2530 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2531 IID = Intrinsic::x86_avx2_permps;
2532 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2533 IID = Intrinsic::x86_avx2_permd;
2534 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2535 IID = Intrinsic::x86_avx512_permvar_df_256;
2536 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2537 IID = Intrinsic::x86_avx512_permvar_di_256;
2538 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2539 IID = Intrinsic::x86_avx512_permvar_sf_512;
2540 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2541 IID = Intrinsic::x86_avx512_permvar_si_512;
2542 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2543 IID = Intrinsic::x86_avx512_permvar_df_512;
2544 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2545 IID = Intrinsic::x86_avx512_permvar_di_512;
2546 else if (VecWidth == 128 && EltWidth == 16)
2547 IID = Intrinsic::x86_avx512_permvar_hi_128;
2548 else if (VecWidth == 256 && EltWidth == 16)
2549 IID = Intrinsic::x86_avx512_permvar_hi_256;
2550 else if (VecWidth == 512 && EltWidth == 16)
2551 IID = Intrinsic::x86_avx512_permvar_hi_512;
2552 else if (VecWidth == 128 && EltWidth == 8)
2553 IID = Intrinsic::x86_avx512_permvar_qi_128;
2554 else if (VecWidth == 256 && EltWidth == 8)
2555 IID = Intrinsic::x86_avx512_permvar_qi_256;
2556 else if (VecWidth == 512 && EltWidth == 8)
2557 IID = Intrinsic::x86_avx512_permvar_qi_512;
2558 else
2559 llvm_unreachable("Unexpected intrinsic");
2560 } else if (Name.starts_with("dbpsadbw.")) {
2561 if (VecWidth == 128)
2562 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2563 else if (VecWidth == 256)
2564 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2565 else if (VecWidth == 512)
2566 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2567 else
2568 llvm_unreachable("Unexpected intrinsic");
2569 } else if (Name.starts_with("pmultishift.qb.")) {
2570 if (VecWidth == 128)
2571 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2572 else if (VecWidth == 256)
2573 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2574 else if (VecWidth == 512)
2575 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2576 else
2577 llvm_unreachable("Unexpected intrinsic");
2578 } else if (Name.starts_with("conflict.")) {
2579 if (Name[9] == 'd' && VecWidth == 128)
2580 IID = Intrinsic::x86_avx512_conflict_d_128;
2581 else if (Name[9] == 'd' && VecWidth == 256)
2582 IID = Intrinsic::x86_avx512_conflict_d_256;
2583 else if (Name[9] == 'd' && VecWidth == 512)
2584 IID = Intrinsic::x86_avx512_conflict_d_512;
2585 else if (Name[9] == 'q' && VecWidth == 128)
2586 IID = Intrinsic::x86_avx512_conflict_q_128;
2587 else if (Name[9] == 'q' && VecWidth == 256)
2588 IID = Intrinsic::x86_avx512_conflict_q_256;
2589 else if (Name[9] == 'q' && VecWidth == 512)
2590 IID = Intrinsic::x86_avx512_conflict_q_512;
2591 else
2592 llvm_unreachable("Unexpected intrinsic");
2593 } else if (Name.starts_with("pavg.")) {
2594 if (Name[5] == 'b' && VecWidth == 128)
2595 IID = Intrinsic::x86_sse2_pavg_b;
2596 else if (Name[5] == 'b' && VecWidth == 256)
2597 IID = Intrinsic::x86_avx2_pavg_b;
2598 else if (Name[5] == 'b' && VecWidth == 512)
2599 IID = Intrinsic::x86_avx512_pavg_b_512;
2600 else if (Name[5] == 'w' && VecWidth == 128)
2601 IID = Intrinsic::x86_sse2_pavg_w;
2602 else if (Name[5] == 'w' && VecWidth == 256)
2603 IID = Intrinsic::x86_avx2_pavg_w;
2604 else if (Name[5] == 'w' && VecWidth == 512)
2605 IID = Intrinsic::x86_avx512_pavg_w_512;
2606 else
2607 llvm_unreachable("Unexpected intrinsic");
2608 } else
2609 return false;
2610
2611 SmallVector<Value *, 4> Args(CI.args());
2612 Args.pop_back();
2613 Args.pop_back();
2614 Rep = Builder.CreateIntrinsic(IID, Args);
2615 unsigned NumArgs = CI.arg_size();
2616 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2617 CI.getArgOperand(NumArgs - 2));
2618 return true;
2619}
2620
2621/// Upgrade comment in call to inline asm that represents an objc retain release
2622/// marker.
2623void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2624 size_t Pos;
2625 if (AsmStr->find("mov\tfp") == 0 &&
2626 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2627 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2628 AsmStr->replace(Pos, 1, ";");
2629 }
2630}
2631
2633 Function *F, IRBuilder<> &Builder) {
2634 Value *Rep = nullptr;
2635
2636 if (Name == "abs.i" || Name == "abs.ll") {
2637 Value *Arg = CI->getArgOperand(0);
2638 Value *Neg = Builder.CreateNeg(Arg, "neg");
2639 Value *Cmp = Builder.CreateICmpSGE(
2640 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2641 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2642 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2643 Type *Ty = (Name == "abs.bf16")
2644 ? Builder.getBFloatTy()
2645 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2646 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2647 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2648 Rep = Builder.CreateBitCast(Abs, CI->getType());
2649 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2650 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2651 : Intrinsic::nvvm_fabs;
2652 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2653 } else if (Name.consume_front("ex2.approx.")) {
2654 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2655 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2656 : Intrinsic::nvvm_ex2_approx;
2657 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2658 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2659 Name.starts_with("atomic.load.add.f64.p")) {
2660 Value *Ptr = CI->getArgOperand(0);
2661 Value *Val = CI->getArgOperand(1);
2662 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2664 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2665 Name.starts_with("atomic.load.dec.32.p")) {
2666 Value *Ptr = CI->getArgOperand(0);
2667 Value *Val = CI->getArgOperand(1);
2668 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2670 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2672 } else if (Name.consume_front("max.") &&
2673 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2674 Name == "ui" || Name == "ull")) {
2675 Value *Arg0 = CI->getArgOperand(0);
2676 Value *Arg1 = CI->getArgOperand(1);
2677 Value *Cmp = Name.starts_with("u")
2678 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2679 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2680 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2681 } else if (Name.consume_front("min.") &&
2682 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2683 Name == "ui" || Name == "ull")) {
2684 Value *Arg0 = CI->getArgOperand(0);
2685 Value *Arg1 = CI->getArgOperand(1);
2686 Value *Cmp = Name.starts_with("u")
2687 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2688 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2689 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2690 } else if (Name == "clz.ll") {
2691 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2692 Value *Arg = CI->getArgOperand(0);
2693 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2694 {Arg, Builder.getFalse()},
2695 /*FMFSource=*/nullptr, "ctlz");
2696 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2697 } else if (Name == "popc.ll") {
2698 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2699 // i64.
2700 Value *Arg = CI->getArgOperand(0);
2701 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2702 Arg, /*FMFSource=*/nullptr, "ctpop");
2703 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2704 } else if (Name == "h2f") {
2705 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2706 {Builder.getFloatTy()}, CI->getArgOperand(0),
2707 /*FMFSource=*/nullptr, "h2f");
2708 } else if (Name.consume_front("bitcast.") &&
2709 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2710 Name == "d2ll")) {
2711 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2712 } else if (Name == "rotate.b32") {
2713 Value *Arg = CI->getOperand(0);
2714 Value *ShiftAmt = CI->getOperand(1);
2715 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2716 {Arg, Arg, ShiftAmt});
2717 } else if (Name == "rotate.b64") {
2718 Type *Int64Ty = Builder.getInt64Ty();
2719 Value *Arg = CI->getOperand(0);
2720 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2721 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2722 {Arg, Arg, ZExtShiftAmt});
2723 } else if (Name == "rotate.right.b64") {
2724 Type *Int64Ty = Builder.getInt64Ty();
2725 Value *Arg = CI->getOperand(0);
2726 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2727 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2728 {Arg, Arg, ZExtShiftAmt});
2729 } else if (Name == "swap.lo.hi.b64") {
2730 Type *Int64Ty = Builder.getInt64Ty();
2731 Value *Arg = CI->getOperand(0);
2732 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2733 {Arg, Arg, Builder.getInt64(32)});
2734 } else if ((Name.consume_front("ptr.gen.to.") &&
2735 consumeNVVMPtrAddrSpace(Name)) ||
2736 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2737 Name.starts_with(".to.gen"))) {
2738 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2739 } else if (Name.consume_front("ldg.global")) {
2740 Value *Ptr = CI->getArgOperand(0);
2741 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2742 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2743 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2744 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2745 MDNode *MD = MDNode::get(Builder.getContext(), {});
2746 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2747 return LD;
2748 } else if (Name == "tanh.approx.f32") {
2749 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2750 FastMathFlags FMF;
2751 FMF.setApproxFunc();
2752 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2753 FMF);
2754 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2755 Value *Arg =
2756 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2757 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2758 {}, {Arg});
2759 } else if (Name == "barrier") {
2760 Rep = Builder.CreateIntrinsic(
2761 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2762 {CI->getArgOperand(0), CI->getArgOperand(1)});
2763 } else if (Name == "barrier.sync") {
2764 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2765 {CI->getArgOperand(0)});
2766 } else if (Name == "barrier.sync.cnt") {
2767 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2768 {CI->getArgOperand(0), CI->getArgOperand(1)});
2769 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2770 Name == "barrier0.or") {
2771 Value *C = CI->getArgOperand(0);
2772 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2773
2774 Intrinsic::ID IID =
2776 .Case("barrier0.popc",
2777 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2778 .Case("barrier0.and",
2779 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2780 .Case("barrier0.or",
2781 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2782 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2783 Rep = Builder.CreateZExt(Bar, CI->getType());
2784 } else {
2786 if (IID != Intrinsic::not_intrinsic &&
2787 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2788 rename(F);
2789 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2791 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2792 Value *Arg = CI->getArgOperand(I);
2793 Type *OldType = Arg->getType();
2794 Type *NewType = NewFn->getArg(I)->getType();
2795 Args.push_back(
2796 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2797 ? Builder.CreateBitCast(Arg, NewType)
2798 : Arg);
2799 }
2800 Rep = Builder.CreateCall(NewFn, Args);
2801 if (F->getReturnType()->isIntegerTy())
2802 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2803 }
2804 }
2805
2806 return Rep;
2807}
2808
2810 IRBuilder<> &Builder) {
2811 LLVMContext &C = F->getContext();
2812 Value *Rep = nullptr;
2813
2814 if (Name.starts_with("sse4a.movnt.")) {
2816 Elts.push_back(
2817 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2818 MDNode *Node = MDNode::get(C, Elts);
2819
2820 Value *Arg0 = CI->getArgOperand(0);
2821 Value *Arg1 = CI->getArgOperand(1);
2822
2823 // Nontemporal (unaligned) store of the 0'th element of the float/double
2824 // vector.
2825 Value *Extract =
2826 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2827
2828 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2829 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2830 } else if (Name.starts_with("avx.movnt.") ||
2831 Name.starts_with("avx512.storent.")) {
2833 Elts.push_back(
2834 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2835 MDNode *Node = MDNode::get(C, Elts);
2836
2837 Value *Arg0 = CI->getArgOperand(0);
2838 Value *Arg1 = CI->getArgOperand(1);
2839
2840 StoreInst *SI = Builder.CreateAlignedStore(
2841 Arg1, Arg0,
2843 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2844 } else if (Name == "sse2.storel.dq") {
2845 Value *Arg0 = CI->getArgOperand(0);
2846 Value *Arg1 = CI->getArgOperand(1);
2847
2848 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2849 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2850 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2851 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2852 } else if (Name.starts_with("sse.storeu.") ||
2853 Name.starts_with("sse2.storeu.") ||
2854 Name.starts_with("avx.storeu.")) {
2855 Value *Arg0 = CI->getArgOperand(0);
2856 Value *Arg1 = CI->getArgOperand(1);
2857 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2858 } else if (Name == "avx512.mask.store.ss") {
2859 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2860 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2861 Mask, false);
2862 } else if (Name.starts_with("avx512.mask.store")) {
2863 // "avx512.mask.storeu." or "avx512.mask.store."
2864 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2865 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2866 CI->getArgOperand(2), Aligned);
2867 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2868 // Upgrade packed integer vector compare intrinsics to compare instructions.
2869 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2870 bool CmpEq = Name[9] == 'e';
2871 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2872 CI->getArgOperand(0), CI->getArgOperand(1));
2873 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2874 } else if (Name.starts_with("avx512.broadcastm")) {
2875 Type *ExtTy = Type::getInt32Ty(C);
2876 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2877 ExtTy = Type::getInt64Ty(C);
2878 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2879 ExtTy->getPrimitiveSizeInBits();
2880 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2881 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2882 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2883 Value *Vec = CI->getArgOperand(0);
2884 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2885 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2886 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2887 } else if (Name.starts_with("avx.sqrt.p") ||
2888 Name.starts_with("sse2.sqrt.p") ||
2889 Name.starts_with("sse.sqrt.p")) {
2890 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2891 {CI->getArgOperand(0)});
2892 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2893 if (CI->arg_size() == 4 &&
2894 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2895 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2896 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2897 : Intrinsic::x86_avx512_sqrt_pd_512;
2898
2899 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2900 Rep = Builder.CreateIntrinsic(IID, Args);
2901 } else {
2902 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2903 {CI->getArgOperand(0)});
2904 }
2905 Rep =
2906 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2907 } else if (Name.starts_with("avx512.ptestm") ||
2908 Name.starts_with("avx512.ptestnm")) {
2909 Value *Op0 = CI->getArgOperand(0);
2910 Value *Op1 = CI->getArgOperand(1);
2911 Value *Mask = CI->getArgOperand(2);
2912 Rep = Builder.CreateAnd(Op0, Op1);
2913 llvm::Type *Ty = Op0->getType();
2915 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2918 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2919 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2920 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2921 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2922 ->getNumElements();
2923 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2924 Rep =
2925 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2926 } else if (Name.starts_with("avx512.kunpck")) {
2927 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2928 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2929 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2930 int Indices[64];
2931 for (unsigned i = 0; i != NumElts; ++i)
2932 Indices[i] = i;
2933
2934 // First extract half of each vector. This gives better codegen than
2935 // doing it in a single shuffle.
2936 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2937 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2938 // Concat the vectors.
2939 // NOTE: Operands have to be swapped to match intrinsic definition.
2940 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2941 Rep = Builder.CreateBitCast(Rep, CI->getType());
2942 } else if (Name == "avx512.kand.w") {
2943 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2944 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2945 Rep = Builder.CreateAnd(LHS, RHS);
2946 Rep = Builder.CreateBitCast(Rep, CI->getType());
2947 } else if (Name == "avx512.kandn.w") {
2948 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2949 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2950 LHS = Builder.CreateNot(LHS);
2951 Rep = Builder.CreateAnd(LHS, RHS);
2952 Rep = Builder.CreateBitCast(Rep, CI->getType());
2953 } else if (Name == "avx512.kor.w") {
2954 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2955 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2956 Rep = Builder.CreateOr(LHS, RHS);
2957 Rep = Builder.CreateBitCast(Rep, CI->getType());
2958 } else if (Name == "avx512.kxor.w") {
2959 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2960 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2961 Rep = Builder.CreateXor(LHS, RHS);
2962 Rep = Builder.CreateBitCast(Rep, CI->getType());
2963 } else if (Name == "avx512.kxnor.w") {
2964 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2965 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2966 LHS = Builder.CreateNot(LHS);
2967 Rep = Builder.CreateXor(LHS, RHS);
2968 Rep = Builder.CreateBitCast(Rep, CI->getType());
2969 } else if (Name == "avx512.knot.w") {
2970 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2971 Rep = Builder.CreateNot(Rep);
2972 Rep = Builder.CreateBitCast(Rep, CI->getType());
2973 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2974 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2975 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2976 Rep = Builder.CreateOr(LHS, RHS);
2977 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2978 Value *C;
2979 if (Name[14] == 'c')
2980 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2981 else
2982 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2983 Rep = Builder.CreateICmpEQ(Rep, C);
2984 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2985 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2986 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2987 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2988 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2989 Type *I32Ty = Type::getInt32Ty(C);
2990 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2991 ConstantInt::get(I32Ty, 0));
2992 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2993 ConstantInt::get(I32Ty, 0));
2994 Value *EltOp;
2995 if (Name.contains(".add."))
2996 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2997 else if (Name.contains(".sub."))
2998 EltOp = Builder.CreateFSub(Elt0, Elt1);
2999 else if (Name.contains(".mul."))
3000 EltOp = Builder.CreateFMul(Elt0, Elt1);
3001 else
3002 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3003 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3004 ConstantInt::get(I32Ty, 0));
3005 } else if (Name.starts_with("avx512.mask.pcmp")) {
3006 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3007 bool CmpEq = Name[16] == 'e';
3008 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3009 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3010 Type *OpTy = CI->getArgOperand(0)->getType();
3011 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3012 Intrinsic::ID IID;
3013 switch (VecWidth) {
3014 default:
3015 llvm_unreachable("Unexpected intrinsic");
3016 case 128:
3017 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3018 break;
3019 case 256:
3020 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3021 break;
3022 case 512:
3023 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3024 break;
3025 }
3026
3027 Rep =
3028 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3029 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3030 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3031 Type *OpTy = CI->getArgOperand(0)->getType();
3032 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3033 unsigned EltWidth = OpTy->getScalarSizeInBits();
3034 Intrinsic::ID IID;
3035 if (VecWidth == 128 && EltWidth == 32)
3036 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3037 else if (VecWidth == 256 && EltWidth == 32)
3038 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3039 else if (VecWidth == 512 && EltWidth == 32)
3040 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3041 else if (VecWidth == 128 && EltWidth == 64)
3042 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3043 else if (VecWidth == 256 && EltWidth == 64)
3044 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3045 else if (VecWidth == 512 && EltWidth == 64)
3046 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3047 else
3048 llvm_unreachable("Unexpected intrinsic");
3049
3050 Rep =
3051 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3052 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3053 } else if (Name.starts_with("avx512.cmp.p")) {
3054 SmallVector<Value *, 4> Args(CI->args());
3055 Type *OpTy = Args[0]->getType();
3056 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3057 unsigned EltWidth = OpTy->getScalarSizeInBits();
3058 Intrinsic::ID IID;
3059 if (VecWidth == 128 && EltWidth == 32)
3060 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3061 else if (VecWidth == 256 && EltWidth == 32)
3062 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3063 else if (VecWidth == 512 && EltWidth == 32)
3064 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3065 else if (VecWidth == 128 && EltWidth == 64)
3066 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3067 else if (VecWidth == 256 && EltWidth == 64)
3068 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3069 else if (VecWidth == 512 && EltWidth == 64)
3070 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3071 else
3072 llvm_unreachable("Unexpected intrinsic");
3073
3075 if (VecWidth == 512)
3076 std::swap(Mask, Args.back());
3077 Args.push_back(Mask);
3078
3079 Rep = Builder.CreateIntrinsic(IID, Args);
3080 } else if (Name.starts_with("avx512.mask.cmp.")) {
3081 // Integer compare intrinsics.
3082 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3083 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3084 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3085 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3086 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3087 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3088 Name.starts_with("avx512.cvtw2mask.") ||
3089 Name.starts_with("avx512.cvtd2mask.") ||
3090 Name.starts_with("avx512.cvtq2mask.")) {
3091 Value *Op = CI->getArgOperand(0);
3092 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3093 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3094 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3095 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3096 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3097 Name.starts_with("avx512.mask.pabs")) {
3098 Rep = upgradeAbs(Builder, *CI);
3099 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3100 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3101 Name.starts_with("avx512.mask.pmaxs")) {
3102 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3103 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3104 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3105 Name.starts_with("avx512.mask.pmaxu")) {
3106 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3107 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3108 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3109 Name.starts_with("avx512.mask.pmins")) {
3110 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3111 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3112 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3113 Name.starts_with("avx512.mask.pminu")) {
3114 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3115 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3116 Name == "avx512.pmulu.dq.512" ||
3117 Name.starts_with("avx512.mask.pmulu.dq.")) {
3118 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3119 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3120 Name == "avx512.pmul.dq.512" ||
3121 Name.starts_with("avx512.mask.pmul.dq.")) {
3122 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3123 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3124 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3125 Rep =
3126 Builder.CreateSIToFP(CI->getArgOperand(1),
3127 cast<VectorType>(CI->getType())->getElementType());
3128 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3129 } else if (Name == "avx512.cvtusi2sd") {
3130 Rep =
3131 Builder.CreateUIToFP(CI->getArgOperand(1),
3132 cast<VectorType>(CI->getType())->getElementType());
3133 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3134 } else if (Name == "sse2.cvtss2sd") {
3135 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3136 Rep = Builder.CreateFPExt(
3137 Rep, cast<VectorType>(CI->getType())->getElementType());
3138 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3139 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3140 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3141 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3142 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3143 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3144 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3145 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3146 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3147 Name == "avx512.mask.cvtqq2ps.256" ||
3148 Name == "avx512.mask.cvtqq2ps.512" ||
3149 Name == "avx512.mask.cvtuqq2ps.256" ||
3150 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3151 Name == "avx.cvt.ps2.pd.256" ||
3152 Name == "avx512.mask.cvtps2pd.128" ||
3153 Name == "avx512.mask.cvtps2pd.256") {
3154 auto *DstTy = cast<FixedVectorType>(CI->getType());
3155 Rep = CI->getArgOperand(0);
3156 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3157
3158 unsigned NumDstElts = DstTy->getNumElements();
3159 if (NumDstElts < SrcTy->getNumElements()) {
3160 assert(NumDstElts == 2 && "Unexpected vector size");
3161 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3162 }
3163
3164 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3165 bool IsUnsigned = Name.contains("cvtu");
3166 if (IsPS2PD)
3167 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3168 else if (CI->arg_size() == 4 &&
3169 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3170 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3171 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3172 : Intrinsic::x86_avx512_sitofp_round;
3173 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3174 {Rep, CI->getArgOperand(3)});
3175 } else {
3176 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3177 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3178 }
3179
3180 if (CI->arg_size() >= 3)
3181 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3182 CI->getArgOperand(1));
3183 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3184 Name.starts_with("vcvtph2ps.")) {
3185 auto *DstTy = cast<FixedVectorType>(CI->getType());
3186 Rep = CI->getArgOperand(0);
3187 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3188 unsigned NumDstElts = DstTy->getNumElements();
3189 if (NumDstElts != SrcTy->getNumElements()) {
3190 assert(NumDstElts == 4 && "Unexpected vector size");
3191 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3192 }
3193 Rep = Builder.CreateBitCast(
3194 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3195 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3196 if (CI->arg_size() >= 3)
3197 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3198 CI->getArgOperand(1));
3199 } else if (Name.starts_with("avx512.mask.load")) {
3200 // "avx512.mask.loadu." or "avx512.mask.load."
3201 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3202 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3203 CI->getArgOperand(2), Aligned);
3204 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3205 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3206 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3207 ResultTy->getNumElements());
3208
3209 Rep = Builder.CreateIntrinsic(
3210 Intrinsic::masked_expandload, ResultTy,
3211 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3212 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3213 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3214 Value *MaskVec =
3215 getX86MaskVec(Builder, CI->getArgOperand(2),
3216 cast<FixedVectorType>(ResultTy)->getNumElements());
3217
3218 Rep = Builder.CreateIntrinsic(
3219 Intrinsic::masked_compressstore, ResultTy,
3220 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3221 } else if (Name.starts_with("avx512.mask.compress.") ||
3222 Name.starts_with("avx512.mask.expand.")) {
3223 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3224
3225 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3226 ResultTy->getNumElements());
3227
3228 bool IsCompress = Name[12] == 'c';
3229 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3230 : Intrinsic::x86_avx512_mask_expand;
3231 Rep = Builder.CreateIntrinsic(
3232 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3233 } else if (Name.starts_with("xop.vpcom")) {
3234 bool IsSigned;
3235 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3236 Name.ends_with("uq"))
3237 IsSigned = false;
3238 else if (Name.ends_with("b") || Name.ends_with("w") ||
3239 Name.ends_with("d") || Name.ends_with("q"))
3240 IsSigned = true;
3241 else
3242 llvm_unreachable("Unknown suffix");
3243
3244 unsigned Imm;
3245 if (CI->arg_size() == 3) {
3246 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3247 } else {
3248 Name = Name.substr(9); // strip off "xop.vpcom"
3249 if (Name.starts_with("lt"))
3250 Imm = 0;
3251 else if (Name.starts_with("le"))
3252 Imm = 1;
3253 else if (Name.starts_with("gt"))
3254 Imm = 2;
3255 else if (Name.starts_with("ge"))
3256 Imm = 3;
3257 else if (Name.starts_with("eq"))
3258 Imm = 4;
3259 else if (Name.starts_with("ne"))
3260 Imm = 5;
3261 else if (Name.starts_with("false"))
3262 Imm = 6;
3263 else if (Name.starts_with("true"))
3264 Imm = 7;
3265 else
3266 llvm_unreachable("Unknown condition");
3267 }
3268
3269 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3270 } else if (Name.starts_with("xop.vpcmov")) {
3271 Value *Sel = CI->getArgOperand(2);
3272 Value *NotSel = Builder.CreateNot(Sel);
3273 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3274 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3275 Rep = Builder.CreateOr(Sel0, Sel1);
3276 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3277 Name.starts_with("avx512.mask.prol")) {
3278 Rep = upgradeX86Rotate(Builder, *CI, false);
3279 } else if (Name.starts_with("avx512.pror") ||
3280 Name.starts_with("avx512.mask.pror")) {
3281 Rep = upgradeX86Rotate(Builder, *CI, true);
3282 } else if (Name.starts_with("avx512.vpshld.") ||
3283 Name.starts_with("avx512.mask.vpshld") ||
3284 Name.starts_with("avx512.maskz.vpshld")) {
3285 bool ZeroMask = Name[11] == 'z';
3286 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3287 } else if (Name.starts_with("avx512.vpshrd.") ||
3288 Name.starts_with("avx512.mask.vpshrd") ||
3289 Name.starts_with("avx512.maskz.vpshrd")) {
3290 bool ZeroMask = Name[11] == 'z';
3291 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3292 } else if (Name == "sse42.crc32.64.8") {
3293 Value *Trunc0 =
3294 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3295 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3296 {Trunc0, CI->getArgOperand(1)});
3297 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3298 } else if (Name.starts_with("avx.vbroadcast.s") ||
3299 Name.starts_with("avx512.vbroadcast.s")) {
3300 // Replace broadcasts with a series of insertelements.
3301 auto *VecTy = cast<FixedVectorType>(CI->getType());
3302 Type *EltTy = VecTy->getElementType();
3303 unsigned EltNum = VecTy->getNumElements();
3304 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3305 Type *I32Ty = Type::getInt32Ty(C);
3306 Rep = PoisonValue::get(VecTy);
3307 for (unsigned I = 0; I < EltNum; ++I)
3308 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3309 } else if (Name.starts_with("sse41.pmovsx") ||
3310 Name.starts_with("sse41.pmovzx") ||
3311 Name.starts_with("avx2.pmovsx") ||
3312 Name.starts_with("avx2.pmovzx") ||
3313 Name.starts_with("avx512.mask.pmovsx") ||
3314 Name.starts_with("avx512.mask.pmovzx")) {
3315 auto *DstTy = cast<FixedVectorType>(CI->getType());
3316 unsigned NumDstElts = DstTy->getNumElements();
3317
3318 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3319 SmallVector<int, 8> ShuffleMask(NumDstElts);
3320 for (unsigned i = 0; i != NumDstElts; ++i)
3321 ShuffleMask[i] = i;
3322
3323 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3324
3325 bool DoSext = Name.contains("pmovsx");
3326 Rep =
3327 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3328 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3329 if (CI->arg_size() == 3)
3330 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3331 CI->getArgOperand(1));
3332 } else if (Name == "avx512.mask.pmov.qd.256" ||
3333 Name == "avx512.mask.pmov.qd.512" ||
3334 Name == "avx512.mask.pmov.wb.256" ||
3335 Name == "avx512.mask.pmov.wb.512") {
3336 Type *Ty = CI->getArgOperand(1)->getType();
3337 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3338 Rep =
3339 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3340 } else if (Name.starts_with("avx.vbroadcastf128") ||
3341 Name == "avx2.vbroadcasti128") {
3342 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3343 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3344 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3345 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3346 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3347 if (NumSrcElts == 2)
3348 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3349 else
3350 Rep = Builder.CreateShuffleVector(Load,
3351 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3352 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3353 Name.starts_with("avx512.mask.shuf.f")) {
3354 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3355 Type *VT = CI->getType();
3356 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3357 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3358 unsigned ControlBitsMask = NumLanes - 1;
3359 unsigned NumControlBits = NumLanes / 2;
3360 SmallVector<int, 8> ShuffleMask(0);
3361
3362 for (unsigned l = 0; l != NumLanes; ++l) {
3363 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3364 // We actually need the other source.
3365 if (l >= NumLanes / 2)
3366 LaneMask += NumLanes;
3367 for (unsigned i = 0; i != NumElementsInLane; ++i)
3368 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3369 }
3370 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3371 CI->getArgOperand(1), ShuffleMask);
3372 Rep =
3373 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3374 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3375 Name.starts_with("avx512.mask.broadcasti")) {
3376 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3377 ->getNumElements();
3378 unsigned NumDstElts =
3379 cast<FixedVectorType>(CI->getType())->getNumElements();
3380
3381 SmallVector<int, 8> ShuffleMask(NumDstElts);
3382 for (unsigned i = 0; i != NumDstElts; ++i)
3383 ShuffleMask[i] = i % NumSrcElts;
3384
3385 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3386 CI->getArgOperand(0), ShuffleMask);
3387 Rep =
3388 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3389 } else if (Name.starts_with("avx2.pbroadcast") ||
3390 Name.starts_with("avx2.vbroadcast") ||
3391 Name.starts_with("avx512.pbroadcast") ||
3392 Name.starts_with("avx512.mask.broadcast.s")) {
3393 // Replace vp?broadcasts with a vector shuffle.
3394 Value *Op = CI->getArgOperand(0);
3395 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3396 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3399 Rep = Builder.CreateShuffleVector(Op, M);
3400
3401 if (CI->arg_size() == 3)
3402 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3403 CI->getArgOperand(1));
3404 } else if (Name.starts_with("sse2.padds.") ||
3405 Name.starts_with("avx2.padds.") ||
3406 Name.starts_with("avx512.padds.") ||
3407 Name.starts_with("avx512.mask.padds.")) {
3408 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3409 } else if (Name.starts_with("sse2.psubs.") ||
3410 Name.starts_with("avx2.psubs.") ||
3411 Name.starts_with("avx512.psubs.") ||
3412 Name.starts_with("avx512.mask.psubs.")) {
3413 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3414 } else if (Name.starts_with("sse2.paddus.") ||
3415 Name.starts_with("avx2.paddus.") ||
3416 Name.starts_with("avx512.mask.paddus.")) {
3417 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3418 } else if (Name.starts_with("sse2.psubus.") ||
3419 Name.starts_with("avx2.psubus.") ||
3420 Name.starts_with("avx512.mask.psubus.")) {
3421 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3422 } else if (Name.starts_with("avx512.mask.palignr.")) {
3423 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3424 CI->getArgOperand(1), CI->getArgOperand(2),
3425 CI->getArgOperand(3), CI->getArgOperand(4),
3426 false);
3427 } else if (Name.starts_with("avx512.mask.valign.")) {
3429 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3430 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3431 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3432 // 128/256-bit shift left specified in bits.
3433 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3434 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3435 Shift / 8); // Shift is in bits.
3436 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3437 // 128/256-bit shift right specified in bits.
3438 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3439 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3440 Shift / 8); // Shift is in bits.
3441 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3442 Name == "avx512.psll.dq.512") {
3443 // 128/256/512-bit shift left specified in bytes.
3444 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3445 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3446 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3447 Name == "avx512.psrl.dq.512") {
3448 // 128/256/512-bit shift right specified in bytes.
3449 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3450 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3451 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3452 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3453 Name.starts_with("avx2.pblendd.")) {
3454 Value *Op0 = CI->getArgOperand(0);
3455 Value *Op1 = CI->getArgOperand(1);
3456 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3457 auto *VecTy = cast<FixedVectorType>(CI->getType());
3458 unsigned NumElts = VecTy->getNumElements();
3459
3460 SmallVector<int, 16> Idxs(NumElts);
3461 for (unsigned i = 0; i != NumElts; ++i)
3462 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3463
3464 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3465 } else if (Name.starts_with("avx.vinsertf128.") ||
3466 Name == "avx2.vinserti128" ||
3467 Name.starts_with("avx512.mask.insert")) {
3468 Value *Op0 = CI->getArgOperand(0);
3469 Value *Op1 = CI->getArgOperand(1);
3470 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3471 unsigned DstNumElts =
3472 cast<FixedVectorType>(CI->getType())->getNumElements();
3473 unsigned SrcNumElts =
3474 cast<FixedVectorType>(Op1->getType())->getNumElements();
3475 unsigned Scale = DstNumElts / SrcNumElts;
3476
3477 // Mask off the high bits of the immediate value; hardware ignores those.
3478 Imm = Imm % Scale;
3479
3480 // Extend the second operand into a vector the size of the destination.
3481 SmallVector<int, 8> Idxs(DstNumElts);
3482 for (unsigned i = 0; i != SrcNumElts; ++i)
3483 Idxs[i] = i;
3484 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3485 Idxs[i] = SrcNumElts;
3486 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3487
3488 // Insert the second operand into the first operand.
3489
3490 // Note that there is no guarantee that instruction lowering will actually
3491 // produce a vinsertf128 instruction for the created shuffles. In
3492 // particular, the 0 immediate case involves no lane changes, so it can
3493 // be handled as a blend.
3494
3495 // Example of shuffle mask for 32-bit elements:
3496 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3497 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3498
3499 // First fill with identify mask.
3500 for (unsigned i = 0; i != DstNumElts; ++i)
3501 Idxs[i] = i;
3502 // Then replace the elements where we need to insert.
3503 for (unsigned i = 0; i != SrcNumElts; ++i)
3504 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3505 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3506
3507 // If the intrinsic has a mask operand, handle that.
3508 if (CI->arg_size() == 5)
3509 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3510 CI->getArgOperand(3));
3511 } else if (Name.starts_with("avx.vextractf128.") ||
3512 Name == "avx2.vextracti128" ||
3513 Name.starts_with("avx512.mask.vextract")) {
3514 Value *Op0 = CI->getArgOperand(0);
3515 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3516 unsigned DstNumElts =
3517 cast<FixedVectorType>(CI->getType())->getNumElements();
3518 unsigned SrcNumElts =
3519 cast<FixedVectorType>(Op0->getType())->getNumElements();
3520 unsigned Scale = SrcNumElts / DstNumElts;
3521
3522 // Mask off the high bits of the immediate value; hardware ignores those.
3523 Imm = Imm % Scale;
3524
3525 // Get indexes for the subvector of the input vector.
3526 SmallVector<int, 8> Idxs(DstNumElts);
3527 for (unsigned i = 0; i != DstNumElts; ++i) {
3528 Idxs[i] = i + (Imm * DstNumElts);
3529 }
3530 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3531
3532 // If the intrinsic has a mask operand, handle that.
3533 if (CI->arg_size() == 4)
3534 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3535 CI->getArgOperand(2));
3536 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3537 Name.starts_with("avx512.mask.perm.di.")) {
3538 Value *Op0 = CI->getArgOperand(0);
3539 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3540 auto *VecTy = cast<FixedVectorType>(CI->getType());
3541 unsigned NumElts = VecTy->getNumElements();
3542
3543 SmallVector<int, 8> Idxs(NumElts);
3544 for (unsigned i = 0; i != NumElts; ++i)
3545 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3546
3547 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3548
3549 if (CI->arg_size() == 4)
3550 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3551 CI->getArgOperand(2));
3552 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3553 // The immediate permute control byte looks like this:
3554 // [1:0] - select 128 bits from sources for low half of destination
3555 // [2] - ignore
3556 // [3] - zero low half of destination
3557 // [5:4] - select 128 bits from sources for high half of destination
3558 // [6] - ignore
3559 // [7] - zero high half of destination
3560
3561 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3562
3563 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3564 unsigned HalfSize = NumElts / 2;
3565 SmallVector<int, 8> ShuffleMask(NumElts);
3566
3567 // Determine which operand(s) are actually in use for this instruction.
3568 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3569 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3570
3571 // If needed, replace operands based on zero mask.
3572 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3573 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3574
3575 // Permute low half of result.
3576 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3577 for (unsigned i = 0; i < HalfSize; ++i)
3578 ShuffleMask[i] = StartIndex + i;
3579
3580 // Permute high half of result.
3581 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3582 for (unsigned i = 0; i < HalfSize; ++i)
3583 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3584
3585 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3586
3587 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3588 Name.starts_with("avx512.mask.vpermil.p") ||
3589 Name.starts_with("avx512.mask.pshuf.d.")) {
3590 Value *Op0 = CI->getArgOperand(0);
3591 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3592 auto *VecTy = cast<FixedVectorType>(CI->getType());
3593 unsigned NumElts = VecTy->getNumElements();
3594 // Calculate the size of each index in the immediate.
3595 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3596 unsigned IdxMask = ((1 << IdxSize) - 1);
3597
3598 SmallVector<int, 8> Idxs(NumElts);
3599 // Lookup the bits for this element, wrapping around the immediate every
3600 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3601 // to offset by the first index of each group.
3602 for (unsigned i = 0; i != NumElts; ++i)
3603 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3604
3605 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3606
3607 if (CI->arg_size() == 4)
3608 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3609 CI->getArgOperand(2));
3610 } else if (Name == "sse2.pshufl.w" ||
3611 Name.starts_with("avx512.mask.pshufl.w.")) {
3612 Value *Op0 = CI->getArgOperand(0);
3613 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3614 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3615
3616 SmallVector<int, 16> Idxs(NumElts);
3617 for (unsigned l = 0; l != NumElts; l += 8) {
3618 for (unsigned i = 0; i != 4; ++i)
3619 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3620 for (unsigned i = 4; i != 8; ++i)
3621 Idxs[i + l] = i + l;
3622 }
3623
3624 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3625
3626 if (CI->arg_size() == 4)
3627 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3628 CI->getArgOperand(2));
3629 } else if (Name == "sse2.pshufh.w" ||
3630 Name.starts_with("avx512.mask.pshufh.w.")) {
3631 Value *Op0 = CI->getArgOperand(0);
3632 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3633 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3634
3635 SmallVector<int, 16> Idxs(NumElts);
3636 for (unsigned l = 0; l != NumElts; l += 8) {
3637 for (unsigned i = 0; i != 4; ++i)
3638 Idxs[i + l] = i + l;
3639 for (unsigned i = 0; i != 4; ++i)
3640 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3641 }
3642
3643 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3644
3645 if (CI->arg_size() == 4)
3646 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3647 CI->getArgOperand(2));
3648 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3649 Value *Op0 = CI->getArgOperand(0);
3650 Value *Op1 = CI->getArgOperand(1);
3651 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3652 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3653
3654 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3655 unsigned HalfLaneElts = NumLaneElts / 2;
3656
3657 SmallVector<int, 16> Idxs(NumElts);
3658 for (unsigned i = 0; i != NumElts; ++i) {
3659 // Base index is the starting element of the lane.
3660 Idxs[i] = i - (i % NumLaneElts);
3661 // If we are half way through the lane switch to the other source.
3662 if ((i % NumLaneElts) >= HalfLaneElts)
3663 Idxs[i] += NumElts;
3664 // Now select the specific element. By adding HalfLaneElts bits from
3665 // the immediate. Wrapping around the immediate every 8-bits.
3666 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3667 }
3668
3669 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3670
3671 Rep =
3672 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3673 } else if (Name.starts_with("avx512.mask.movddup") ||
3674 Name.starts_with("avx512.mask.movshdup") ||
3675 Name.starts_with("avx512.mask.movsldup")) {
3676 Value *Op0 = CI->getArgOperand(0);
3677 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3678 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3679
3680 unsigned Offset = 0;
3681 if (Name.starts_with("avx512.mask.movshdup."))
3682 Offset = 1;
3683
3684 SmallVector<int, 16> Idxs(NumElts);
3685 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3686 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3687 Idxs[i + l + 0] = i + l + Offset;
3688 Idxs[i + l + 1] = i + l + Offset;
3689 }
3690
3691 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3692
3693 Rep =
3694 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3695 } else if (Name.starts_with("avx512.mask.punpckl") ||
3696 Name.starts_with("avx512.mask.unpckl.")) {
3697 Value *Op0 = CI->getArgOperand(0);
3698 Value *Op1 = CI->getArgOperand(1);
3699 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3700 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3701
3702 SmallVector<int, 64> Idxs(NumElts);
3703 for (int l = 0; l != NumElts; l += NumLaneElts)
3704 for (int i = 0; i != NumLaneElts; ++i)
3705 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3706
3707 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3708
3709 Rep =
3710 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3711 } else if (Name.starts_with("avx512.mask.punpckh") ||
3712 Name.starts_with("avx512.mask.unpckh.")) {
3713 Value *Op0 = CI->getArgOperand(0);
3714 Value *Op1 = CI->getArgOperand(1);
3715 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3716 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3717
3718 SmallVector<int, 64> Idxs(NumElts);
3719 for (int l = 0; l != NumElts; l += NumLaneElts)
3720 for (int i = 0; i != NumLaneElts; ++i)
3721 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3722
3723 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3724
3725 Rep =
3726 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3727 } else if (Name.starts_with("avx512.mask.and.") ||
3728 Name.starts_with("avx512.mask.pand.")) {
3729 VectorType *FTy = cast<VectorType>(CI->getType());
3731 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3732 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3733 Rep = Builder.CreateBitCast(Rep, FTy);
3734 Rep =
3735 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3736 } else if (Name.starts_with("avx512.mask.andn.") ||
3737 Name.starts_with("avx512.mask.pandn.")) {
3738 VectorType *FTy = cast<VectorType>(CI->getType());
3740 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3741 Rep = Builder.CreateAnd(Rep,
3742 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3743 Rep = Builder.CreateBitCast(Rep, FTy);
3744 Rep =
3745 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3746 } else if (Name.starts_with("avx512.mask.or.") ||
3747 Name.starts_with("avx512.mask.por.")) {
3748 VectorType *FTy = cast<VectorType>(CI->getType());
3750 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3751 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3752 Rep = Builder.CreateBitCast(Rep, FTy);
3753 Rep =
3754 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3755 } else if (Name.starts_with("avx512.mask.xor.") ||
3756 Name.starts_with("avx512.mask.pxor.")) {
3757 VectorType *FTy = cast<VectorType>(CI->getType());
3759 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3760 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3761 Rep = Builder.CreateBitCast(Rep, FTy);
3762 Rep =
3763 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3764 } else if (Name.starts_with("avx512.mask.padd.")) {
3765 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3766 Rep =
3767 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3768 } else if (Name.starts_with("avx512.mask.psub.")) {
3769 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3770 Rep =
3771 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3772 } else if (Name.starts_with("avx512.mask.pmull.")) {
3773 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3774 Rep =
3775 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3776 } else if (Name.starts_with("avx512.mask.add.p")) {
3777 if (Name.ends_with(".512")) {
3778 Intrinsic::ID IID;
3779 if (Name[17] == 's')
3780 IID = Intrinsic::x86_avx512_add_ps_512;
3781 else
3782 IID = Intrinsic::x86_avx512_add_pd_512;
3783
3784 Rep = Builder.CreateIntrinsic(
3785 IID,
3786 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3787 } else {
3788 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3789 }
3790 Rep =
3791 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3792 } else if (Name.starts_with("avx512.mask.div.p")) {
3793 if (Name.ends_with(".512")) {
3794 Intrinsic::ID IID;
3795 if (Name[17] == 's')
3796 IID = Intrinsic::x86_avx512_div_ps_512;
3797 else
3798 IID = Intrinsic::x86_avx512_div_pd_512;
3799
3800 Rep = Builder.CreateIntrinsic(
3801 IID,
3802 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3803 } else {
3804 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3805 }
3806 Rep =
3807 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3808 } else if (Name.starts_with("avx512.mask.mul.p")) {
3809 if (Name.ends_with(".512")) {
3810 Intrinsic::ID IID;
3811 if (Name[17] == 's')
3812 IID = Intrinsic::x86_avx512_mul_ps_512;
3813 else
3814 IID = Intrinsic::x86_avx512_mul_pd_512;
3815
3816 Rep = Builder.CreateIntrinsic(
3817 IID,
3818 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3819 } else {
3820 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3821 }
3822 Rep =
3823 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3824 } else if (Name.starts_with("avx512.mask.sub.p")) {
3825 if (Name.ends_with(".512")) {
3826 Intrinsic::ID IID;
3827 if (Name[17] == 's')
3828 IID = Intrinsic::x86_avx512_sub_ps_512;
3829 else
3830 IID = Intrinsic::x86_avx512_sub_pd_512;
3831
3832 Rep = Builder.CreateIntrinsic(
3833 IID,
3834 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3835 } else {
3836 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3837 }
3838 Rep =
3839 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3840 } else if ((Name.starts_with("avx512.mask.max.p") ||
3841 Name.starts_with("avx512.mask.min.p")) &&
3842 Name.drop_front(18) == ".512") {
3843 bool IsDouble = Name[17] == 'd';
3844 bool IsMin = Name[13] == 'i';
3845 static const Intrinsic::ID MinMaxTbl[2][2] = {
3846 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3847 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3848 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3849
3850 Rep = Builder.CreateIntrinsic(
3851 IID,
3852 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3853 Rep =
3854 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3855 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3856 Rep =
3857 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3858 {CI->getArgOperand(0), Builder.getInt1(false)});
3859 Rep =
3860 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3861 } else if (Name.starts_with("avx512.mask.psll")) {
3862 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3863 bool IsVariable = Name[16] == 'v';
3864 char Size = Name[16] == '.' ? Name[17]
3865 : Name[17] == '.' ? Name[18]
3866 : Name[18] == '.' ? Name[19]
3867 : Name[20];
3868
3869 Intrinsic::ID IID;
3870 if (IsVariable && Name[17] != '.') {
3871 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3872 IID = Intrinsic::x86_avx2_psllv_q;
3873 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3874 IID = Intrinsic::x86_avx2_psllv_q_256;
3875 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3876 IID = Intrinsic::x86_avx2_psllv_d;
3877 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3878 IID = Intrinsic::x86_avx2_psllv_d_256;
3879 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3880 IID = Intrinsic::x86_avx512_psllv_w_128;
3881 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3882 IID = Intrinsic::x86_avx512_psllv_w_256;
3883 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3884 IID = Intrinsic::x86_avx512_psllv_w_512;
3885 else
3886 llvm_unreachable("Unexpected size");
3887 } else if (Name.ends_with(".128")) {
3888 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3889 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3890 : Intrinsic::x86_sse2_psll_d;
3891 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3892 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3893 : Intrinsic::x86_sse2_psll_q;
3894 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3895 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3896 : Intrinsic::x86_sse2_psll_w;
3897 else
3898 llvm_unreachable("Unexpected size");
3899 } else if (Name.ends_with(".256")) {
3900 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3901 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3902 : Intrinsic::x86_avx2_psll_d;
3903 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3904 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3905 : Intrinsic::x86_avx2_psll_q;
3906 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3907 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3908 : Intrinsic::x86_avx2_psll_w;
3909 else
3910 llvm_unreachable("Unexpected size");
3911 } else {
3912 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3913 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3914 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3915 : Intrinsic::x86_avx512_psll_d_512;
3916 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3917 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3918 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3919 : Intrinsic::x86_avx512_psll_q_512;
3920 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3921 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3922 : Intrinsic::x86_avx512_psll_w_512;
3923 else
3924 llvm_unreachable("Unexpected size");
3925 }
3926
3927 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3928 } else if (Name.starts_with("avx512.mask.psrl")) {
3929 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3930 bool IsVariable = Name[16] == 'v';
3931 char Size = Name[16] == '.' ? Name[17]
3932 : Name[17] == '.' ? Name[18]
3933 : Name[18] == '.' ? Name[19]
3934 : Name[20];
3935
3936 Intrinsic::ID IID;
3937 if (IsVariable && Name[17] != '.') {
3938 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3939 IID = Intrinsic::x86_avx2_psrlv_q;
3940 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3941 IID = Intrinsic::x86_avx2_psrlv_q_256;
3942 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3943 IID = Intrinsic::x86_avx2_psrlv_d;
3944 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3945 IID = Intrinsic::x86_avx2_psrlv_d_256;
3946 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3947 IID = Intrinsic::x86_avx512_psrlv_w_128;
3948 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3949 IID = Intrinsic::x86_avx512_psrlv_w_256;
3950 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3951 IID = Intrinsic::x86_avx512_psrlv_w_512;
3952 else
3953 llvm_unreachable("Unexpected size");
3954 } else if (Name.ends_with(".128")) {
3955 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3956 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3957 : Intrinsic::x86_sse2_psrl_d;
3958 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3959 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3960 : Intrinsic::x86_sse2_psrl_q;
3961 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3962 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3963 : Intrinsic::x86_sse2_psrl_w;
3964 else
3965 llvm_unreachable("Unexpected size");
3966 } else if (Name.ends_with(".256")) {
3967 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3968 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3969 : Intrinsic::x86_avx2_psrl_d;
3970 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3971 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3972 : Intrinsic::x86_avx2_psrl_q;
3973 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3974 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3975 : Intrinsic::x86_avx2_psrl_w;
3976 else
3977 llvm_unreachable("Unexpected size");
3978 } else {
3979 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3980 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3981 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3982 : Intrinsic::x86_avx512_psrl_d_512;
3983 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3984 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3985 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3986 : Intrinsic::x86_avx512_psrl_q_512;
3987 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3988 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3989 : Intrinsic::x86_avx512_psrl_w_512;
3990 else
3991 llvm_unreachable("Unexpected size");
3992 }
3993
3994 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3995 } else if (Name.starts_with("avx512.mask.psra")) {
3996 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3997 bool IsVariable = Name[16] == 'v';
3998 char Size = Name[16] == '.' ? Name[17]
3999 : Name[17] == '.' ? Name[18]
4000 : Name[18] == '.' ? Name[19]
4001 : Name[20];
4002
4003 Intrinsic::ID IID;
4004 if (IsVariable && Name[17] != '.') {
4005 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4006 IID = Intrinsic::x86_avx2_psrav_d;
4007 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4008 IID = Intrinsic::x86_avx2_psrav_d_256;
4009 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4010 IID = Intrinsic::x86_avx512_psrav_w_128;
4011 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4012 IID = Intrinsic::x86_avx512_psrav_w_256;
4013 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4014 IID = Intrinsic::x86_avx512_psrav_w_512;
4015 else
4016 llvm_unreachable("Unexpected size");
4017 } else if (Name.ends_with(".128")) {
4018 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4019 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4020 : Intrinsic::x86_sse2_psra_d;
4021 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4022 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4023 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4024 : Intrinsic::x86_avx512_psra_q_128;
4025 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4026 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4027 : Intrinsic::x86_sse2_psra_w;
4028 else
4029 llvm_unreachable("Unexpected size");
4030 } else if (Name.ends_with(".256")) {
4031 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4032 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4033 : Intrinsic::x86_avx2_psra_d;
4034 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4035 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4036 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4037 : Intrinsic::x86_avx512_psra_q_256;
4038 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4039 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4040 : Intrinsic::x86_avx2_psra_w;
4041 else
4042 llvm_unreachable("Unexpected size");
4043 } else {
4044 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4045 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4046 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4047 : Intrinsic::x86_avx512_psra_d_512;
4048 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4049 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4050 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4051 : Intrinsic::x86_avx512_psra_q_512;
4052 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4053 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4054 : Intrinsic::x86_avx512_psra_w_512;
4055 else
4056 llvm_unreachable("Unexpected size");
4057 }
4058
4059 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4060 } else if (Name.starts_with("avx512.mask.move.s")) {
4061 Rep = upgradeMaskedMove(Builder, *CI);
4062 } else if (Name.starts_with("avx512.cvtmask2")) {
4063 Rep = upgradeMaskToInt(Builder, *CI);
4064 } else if (Name.ends_with(".movntdqa")) {
4066 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4067
4068 LoadInst *LI = Builder.CreateAlignedLoad(
4069 CI->getType(), CI->getArgOperand(0),
4071 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4072 Rep = LI;
4073 } else if (Name.starts_with("fma.vfmadd.") ||
4074 Name.starts_with("fma.vfmsub.") ||
4075 Name.starts_with("fma.vfnmadd.") ||
4076 Name.starts_with("fma.vfnmsub.")) {
4077 bool NegMul = Name[6] == 'n';
4078 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4079 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4080
4081 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4082 CI->getArgOperand(2)};
4083
4084 if (IsScalar) {
4085 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4086 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4087 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4088 }
4089
4090 if (NegMul && !IsScalar)
4091 Ops[0] = Builder.CreateFNeg(Ops[0]);
4092 if (NegMul && IsScalar)
4093 Ops[1] = Builder.CreateFNeg(Ops[1]);
4094 if (NegAcc)
4095 Ops[2] = Builder.CreateFNeg(Ops[2]);
4096
4097 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4098
4099 if (IsScalar)
4100 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4101 } else if (Name.starts_with("fma4.vfmadd.s")) {
4102 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4103 CI->getArgOperand(2)};
4104
4105 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4106 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4107 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4108
4109 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4110
4111 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4112 Rep, (uint64_t)0);
4113 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4114 Name.starts_with("avx512.maskz.vfmadd.s") ||
4115 Name.starts_with("avx512.mask3.vfmadd.s") ||
4116 Name.starts_with("avx512.mask3.vfmsub.s") ||
4117 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4118 bool IsMask3 = Name[11] == '3';
4119 bool IsMaskZ = Name[11] == 'z';
4120 // Drop the "avx512.mask." to make it easier.
4121 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4122 bool NegMul = Name[2] == 'n';
4123 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4124
4125 Value *A = CI->getArgOperand(0);
4126 Value *B = CI->getArgOperand(1);
4127 Value *C = CI->getArgOperand(2);
4128
4129 if (NegMul && (IsMask3 || IsMaskZ))
4130 A = Builder.CreateFNeg(A);
4131 if (NegMul && !(IsMask3 || IsMaskZ))
4132 B = Builder.CreateFNeg(B);
4133 if (NegAcc)
4134 C = Builder.CreateFNeg(C);
4135
4136 A = Builder.CreateExtractElement(A, (uint64_t)0);
4137 B = Builder.CreateExtractElement(B, (uint64_t)0);
4138 C = Builder.CreateExtractElement(C, (uint64_t)0);
4139
4140 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4141 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4142 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4143
4144 Intrinsic::ID IID;
4145 if (Name.back() == 'd')
4146 IID = Intrinsic::x86_avx512_vfmadd_f64;
4147 else
4148 IID = Intrinsic::x86_avx512_vfmadd_f32;
4149 Rep = Builder.CreateIntrinsic(IID, Ops);
4150 } else {
4151 Rep = Builder.CreateFMA(A, B, C);
4152 }
4153
4154 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4155 : IsMask3 ? C
4156 : A;
4157
4158 // For Mask3 with NegAcc, we need to create a new extractelement that
4159 // avoids the negation above.
4160 if (NegAcc && IsMask3)
4161 PassThru =
4162 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4163
4164 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4165 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4166 (uint64_t)0);
4167 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4168 Name.starts_with("avx512.mask.vfnmadd.p") ||
4169 Name.starts_with("avx512.mask.vfnmsub.p") ||
4170 Name.starts_with("avx512.mask3.vfmadd.p") ||
4171 Name.starts_with("avx512.mask3.vfmsub.p") ||
4172 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4173 Name.starts_with("avx512.maskz.vfmadd.p")) {
4174 bool IsMask3 = Name[11] == '3';
4175 bool IsMaskZ = Name[11] == 'z';
4176 // Drop the "avx512.mask." to make it easier.
4177 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4178 bool NegMul = Name[2] == 'n';
4179 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4180
4181 Value *A = CI->getArgOperand(0);
4182 Value *B = CI->getArgOperand(1);
4183 Value *C = CI->getArgOperand(2);
4184
4185 if (NegMul && (IsMask3 || IsMaskZ))
4186 A = Builder.CreateFNeg(A);
4187 if (NegMul && !(IsMask3 || IsMaskZ))
4188 B = Builder.CreateFNeg(B);
4189 if (NegAcc)
4190 C = Builder.CreateFNeg(C);
4191
4192 if (CI->arg_size() == 5 &&
4193 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4194 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4195 Intrinsic::ID IID;
4196 // Check the character before ".512" in string.
4197 if (Name[Name.size() - 5] == 's')
4198 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4199 else
4200 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4201
4202 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4203 } else {
4204 Rep = Builder.CreateFMA(A, B, C);
4205 }
4206
4207 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4208 : IsMask3 ? CI->getArgOperand(2)
4209 : CI->getArgOperand(0);
4210
4211 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4212 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4213 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4214 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4215 Intrinsic::ID IID;
4216 if (VecWidth == 128 && EltWidth == 32)
4217 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4218 else if (VecWidth == 256 && EltWidth == 32)
4219 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4220 else if (VecWidth == 128 && EltWidth == 64)
4221 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4222 else if (VecWidth == 256 && EltWidth == 64)
4223 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4224 else
4225 llvm_unreachable("Unexpected intrinsic");
4226
4227 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4228 CI->getArgOperand(2)};
4229 Ops[2] = Builder.CreateFNeg(Ops[2]);
4230 Rep = Builder.CreateIntrinsic(IID, Ops);
4231 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4232 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4233 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4234 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4235 bool IsMask3 = Name[11] == '3';
4236 bool IsMaskZ = Name[11] == 'z';
4237 // Drop the "avx512.mask." to make it easier.
4238 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4239 bool IsSubAdd = Name[3] == 's';
4240 if (CI->arg_size() == 5) {
4241 Intrinsic::ID IID;
4242 // Check the character before ".512" in string.
4243 if (Name[Name.size() - 5] == 's')
4244 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4245 else
4246 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4247
4248 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4249 CI->getArgOperand(2), CI->getArgOperand(4)};
4250 if (IsSubAdd)
4251 Ops[2] = Builder.CreateFNeg(Ops[2]);
4252
4253 Rep = Builder.CreateIntrinsic(IID, Ops);
4254 } else {
4255 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4256
4257 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4258 CI->getArgOperand(2)};
4259
4261 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4262 Value *Odd = Builder.CreateCall(FMA, Ops);
4263 Ops[2] = Builder.CreateFNeg(Ops[2]);
4264 Value *Even = Builder.CreateCall(FMA, Ops);
4265
4266 if (IsSubAdd)
4267 std::swap(Even, Odd);
4268
4269 SmallVector<int, 32> Idxs(NumElts);
4270 for (int i = 0; i != NumElts; ++i)
4271 Idxs[i] = i + (i % 2) * NumElts;
4272
4273 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4274 }
4275
4276 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4277 : IsMask3 ? CI->getArgOperand(2)
4278 : CI->getArgOperand(0);
4279
4280 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4281 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4282 Name.starts_with("avx512.maskz.pternlog.")) {
4283 bool ZeroMask = Name[11] == 'z';
4284 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4285 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4286 Intrinsic::ID IID;
4287 if (VecWidth == 128 && EltWidth == 32)
4288 IID = Intrinsic::x86_avx512_pternlog_d_128;
4289 else if (VecWidth == 256 && EltWidth == 32)
4290 IID = Intrinsic::x86_avx512_pternlog_d_256;
4291 else if (VecWidth == 512 && EltWidth == 32)
4292 IID = Intrinsic::x86_avx512_pternlog_d_512;
4293 else if (VecWidth == 128 && EltWidth == 64)
4294 IID = Intrinsic::x86_avx512_pternlog_q_128;
4295 else if (VecWidth == 256 && EltWidth == 64)
4296 IID = Intrinsic::x86_avx512_pternlog_q_256;
4297 else if (VecWidth == 512 && EltWidth == 64)
4298 IID = Intrinsic::x86_avx512_pternlog_q_512;
4299 else
4300 llvm_unreachable("Unexpected intrinsic");
4301
4302 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4303 CI->getArgOperand(2), CI->getArgOperand(3)};
4304 Rep = Builder.CreateIntrinsic(IID, Args);
4305 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4306 : CI->getArgOperand(0);
4307 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4308 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4309 Name.starts_with("avx512.maskz.vpmadd52")) {
4310 bool ZeroMask = Name[11] == 'z';
4311 bool High = Name[20] == 'h' || Name[21] == 'h';
4312 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4313 Intrinsic::ID IID;
4314 if (VecWidth == 128 && !High)
4315 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4316 else if (VecWidth == 256 && !High)
4317 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4318 else if (VecWidth == 512 && !High)
4319 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4320 else if (VecWidth == 128 && High)
4321 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4322 else if (VecWidth == 256 && High)
4323 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4324 else if (VecWidth == 512 && High)
4325 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4326 else
4327 llvm_unreachable("Unexpected intrinsic");
4328
4329 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4330 CI->getArgOperand(2)};
4331 Rep = Builder.CreateIntrinsic(IID, Args);
4332 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4333 : CI->getArgOperand(0);
4334 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4335 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4336 Name.starts_with("avx512.mask.vpermt2var.") ||
4337 Name.starts_with("avx512.maskz.vpermt2var.")) {
4338 bool ZeroMask = Name[11] == 'z';
4339 bool IndexForm = Name[17] == 'i';
4340 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4341 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4342 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4343 Name.starts_with("avx512.mask.vpdpbusds.") ||
4344 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4345 bool ZeroMask = Name[11] == 'z';
4346 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4347 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4348 Intrinsic::ID IID;
4349 if (VecWidth == 128 && !IsSaturating)
4350 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4351 else if (VecWidth == 256 && !IsSaturating)
4352 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4353 else if (VecWidth == 512 && !IsSaturating)
4354 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4355 else if (VecWidth == 128 && IsSaturating)
4356 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4357 else if (VecWidth == 256 && IsSaturating)
4358 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4359 else if (VecWidth == 512 && IsSaturating)
4360 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4361 else
4362 llvm_unreachable("Unexpected intrinsic");
4363
4364 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4365 CI->getArgOperand(2)};
4366
4367 // Input arguments types were incorrectly set to vectors of i32 before but
4368 // they should be vectors of i8. Insert bit cast when encountering the old
4369 // types
4370 if (Args[1]->getType()->isVectorTy() &&
4371 cast<VectorType>(Args[1]->getType())
4372 ->getElementType()
4373 ->isIntegerTy(32) &&
4374 Args[2]->getType()->isVectorTy() &&
4375 cast<VectorType>(Args[2]->getType())
4376 ->getElementType()
4377 ->isIntegerTy(32)) {
4378 Type *NewArgType = nullptr;
4379 if (VecWidth == 128)
4380 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4381 else if (VecWidth == 256)
4382 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4383 else if (VecWidth == 512)
4384 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4385 else
4386 llvm_unreachable("Unexpected vector bit width");
4387
4388 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4389 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4390 }
4391
4392 Rep = Builder.CreateIntrinsic(IID, Args);
4393 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4394 : CI->getArgOperand(0);
4395 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4396 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4397 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4398 Name.starts_with("avx512.mask.vpdpwssds.") ||
4399 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4400 bool ZeroMask = Name[11] == 'z';
4401 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4402 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4403 Intrinsic::ID IID;
4404 if (VecWidth == 128 && !IsSaturating)
4405 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4406 else if (VecWidth == 256 && !IsSaturating)
4407 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4408 else if (VecWidth == 512 && !IsSaturating)
4409 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4410 else if (VecWidth == 128 && IsSaturating)
4411 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4412 else if (VecWidth == 256 && IsSaturating)
4413 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4414 else if (VecWidth == 512 && IsSaturating)
4415 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4416 else
4417 llvm_unreachable("Unexpected intrinsic");
4418
4419 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4420 CI->getArgOperand(2)};
4421
4422 // Input arguments types were incorrectly set to vectors of i32 before but
4423 // they should be vectors of i16. Insert bit cast when encountering the old
4424 // types
4425 if (Args[1]->getType()->isVectorTy() &&
4426 cast<VectorType>(Args[1]->getType())
4427 ->getElementType()
4428 ->isIntegerTy(32) &&
4429 Args[2]->getType()->isVectorTy() &&
4430 cast<VectorType>(Args[2]->getType())
4431 ->getElementType()
4432 ->isIntegerTy(32)) {
4433 Type *NewArgType = nullptr;
4434 if (VecWidth == 128)
4435 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4436 else if (VecWidth == 256)
4437 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4438 else if (VecWidth == 512)
4439 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4440 else
4441 llvm_unreachable("Unexpected vector bit width");
4442
4443 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4444 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4445 }
4446
4447 Rep = Builder.CreateIntrinsic(IID, Args);
4448 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4449 : CI->getArgOperand(0);
4450 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4451 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4452 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4453 Name == "subborrow.u32" || Name == "subborrow.u64") {
4454 Intrinsic::ID IID;
4455 if (Name[0] == 'a' && Name.back() == '2')
4456 IID = Intrinsic::x86_addcarry_32;
4457 else if (Name[0] == 'a' && Name.back() == '4')
4458 IID = Intrinsic::x86_addcarry_64;
4459 else if (Name[0] == 's' && Name.back() == '2')
4460 IID = Intrinsic::x86_subborrow_32;
4461 else if (Name[0] == 's' && Name.back() == '4')
4462 IID = Intrinsic::x86_subborrow_64;
4463 else
4464 llvm_unreachable("Unexpected intrinsic");
4465
4466 // Make a call with 3 operands.
4467 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4468 CI->getArgOperand(2)};
4469 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4470
4471 // Extract the second result and store it.
4472 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4473 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4474 // Replace the original call result with the first result of the new call.
4475 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4476
4477 CI->replaceAllUsesWith(CF);
4478 Rep = nullptr;
4479 } else if (Name.starts_with("avx512.mask.") &&
4480 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4481 // Rep will be updated by the call in the condition.
4482 }
4483
4484 return Rep;
4485}
4486
4488 Function *F, IRBuilder<> &Builder) {
4489 if (Name.starts_with("neon.bfcvt")) {
4490 if (Name.starts_with("neon.bfcvtn2")) {
4491 SmallVector<int, 32> LoMask(4);
4492 std::iota(LoMask.begin(), LoMask.end(), 0);
4493 SmallVector<int, 32> ConcatMask(8);
4494 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4495 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4496 Value *Trunc =
4497 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4498 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4499 } else if (Name.starts_with("neon.bfcvtn")) {
4500 SmallVector<int, 32> ConcatMask(8);
4501 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4502 Type *V4BF16 =
4503 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4504 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4505 dbgs() << "Trunc: " << *Trunc << "\n";
4506 return Builder.CreateShuffleVector(
4507 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4508 } else {
4509 return Builder.CreateFPTrunc(CI->getOperand(0),
4510 Type::getBFloatTy(F->getContext()));
4511 }
4512 } else if (Name.starts_with("sve.fcvt")) {
4513 Intrinsic::ID NewID =
4515 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4516 .Case("sve.fcvtnt.bf16f32",
4517 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4519 if (NewID == Intrinsic::not_intrinsic)
4520 llvm_unreachable("Unhandled Intrinsic!");
4521
4522 SmallVector<Value *, 3> Args(CI->args());
4523
4524 // The original intrinsics incorrectly used a predicate based on the
4525 // smallest element type rather than the largest.
4526 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4527 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4528
4529 if (Args[1]->getType() != BadPredTy)
4530 llvm_unreachable("Unexpected predicate type!");
4531
4532 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4533 BadPredTy, Args[1]);
4534 Args[1] = Builder.CreateIntrinsic(
4535 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4536
4537 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4538 CI->getName());
4539 }
4540
4541 llvm_unreachable("Unhandled Intrinsic!");
4542}
4543
4545 IRBuilder<> &Builder) {
4546 if (Name == "mve.vctp64.old") {
4547 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4548 // correct type.
4549 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4550 CI->getArgOperand(0),
4551 /*FMFSource=*/nullptr, CI->getName());
4552 Value *C1 = Builder.CreateIntrinsic(
4553 Intrinsic::arm_mve_pred_v2i,
4554 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4555 return Builder.CreateIntrinsic(
4556 Intrinsic::arm_mve_pred_i2v,
4557 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4558 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4559 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4560 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4561 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4562 Name ==
4563 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4564 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4565 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4566 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4567 Name ==
4568 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4569 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4570 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4571 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4572 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4573 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4574 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4575 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4576 std::vector<Type *> Tys;
4577 unsigned ID = CI->getIntrinsicID();
4578 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4579 switch (ID) {
4580 case Intrinsic::arm_mve_mull_int_predicated:
4581 case Intrinsic::arm_mve_vqdmull_predicated:
4582 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4583 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4584 break;
4585 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4586 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4587 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4588 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4589 V2I1Ty};
4590 break;
4591 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4592 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4593 CI->getOperand(1)->getType(), V2I1Ty};
4594 break;
4595 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4596 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4597 CI->getOperand(2)->getType(), V2I1Ty};
4598 break;
4599 case Intrinsic::arm_cde_vcx1q_predicated:
4600 case Intrinsic::arm_cde_vcx1qa_predicated:
4601 case Intrinsic::arm_cde_vcx2q_predicated:
4602 case Intrinsic::arm_cde_vcx2qa_predicated:
4603 case Intrinsic::arm_cde_vcx3q_predicated:
4604 case Intrinsic::arm_cde_vcx3qa_predicated:
4605 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4606 break;
4607 default:
4608 llvm_unreachable("Unhandled Intrinsic!");
4609 }
4610
4611 std::vector<Value *> Ops;
4612 for (Value *Op : CI->args()) {
4613 Type *Ty = Op->getType();
4614 if (Ty->getScalarSizeInBits() == 1) {
4615 Value *C1 = Builder.CreateIntrinsic(
4616 Intrinsic::arm_mve_pred_v2i,
4617 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4618 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4619 }
4620 Ops.push_back(Op);
4621 }
4622
4623 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4624 CI->getName());
4625 }
4626 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4627}
4628
4629// These are expected to have the arguments:
4630// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4631//
4632// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4633//
4635 Function *F, IRBuilder<> &Builder) {
4636 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4637 // for compatibility.
4638 auto UpgradeLegacyWMMAIUIntrinsicCall =
4639 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4640 ArrayRef<Type *> OverloadTys) -> Value * {
4641 // Prepare arguments, append clamp=0 for compatibility
4642 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4643 Args.push_back(Builder.getFalse());
4644
4645 // Insert the declaration for the right overload types
4647 F->getParent(), F->getIntrinsicID(), OverloadTys);
4648
4649 // Copy operand bundles if any
4651 CI->getOperandBundlesAsDefs(Bundles);
4652
4653 // Create the new call and copy calling properties
4654 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4655 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4656 NewCall->setCallingConv(CI->getCallingConv());
4657 NewCall->setAttributes(CI->getAttributes());
4658 NewCall->setDebugLoc(CI->getDebugLoc());
4659 NewCall->copyMetadata(*CI);
4660 return NewCall;
4661 };
4662
4663 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4664 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4665 "intrinsic should have 7 arguments");
4666 Type *T1 = CI->getArgOperand(4)->getType();
4667 Type *T2 = CI->getArgOperand(1)->getType();
4668 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4669 }
4670 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4671 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4672 "intrinsic should have 8 arguments");
4673 Type *T1 = CI->getArgOperand(4)->getType();
4674 Type *T2 = CI->getArgOperand(1)->getType();
4675 Type *T3 = CI->getArgOperand(3)->getType();
4676 Type *T4 = CI->getArgOperand(5)->getType();
4677 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4678 }
4679
4680 AtomicRMWInst::BinOp RMWOp =
4682 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4683 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4684 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4685 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4686 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4687 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4688 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4689 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4690 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4691 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4692 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4693 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4694 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4695
4696 unsigned NumOperands = CI->getNumOperands();
4697 if (NumOperands < 3) // Malformed bitcode.
4698 return nullptr;
4699
4700 Value *Ptr = CI->getArgOperand(0);
4701 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4702 if (!PtrTy) // Malformed.
4703 return nullptr;
4704
4705 Value *Val = CI->getArgOperand(1);
4706 if (Val->getType() != CI->getType()) // Malformed.
4707 return nullptr;
4708
4709 ConstantInt *OrderArg = nullptr;
4710 bool IsVolatile = false;
4711
4712 // These should have 5 arguments (plus the callee). A separate version of the
4713 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4714 if (NumOperands > 3)
4715 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4716
4717 // Ignore scope argument at 3
4718
4719 if (NumOperands > 5) {
4720 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4721 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4722 }
4723
4725 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4726 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4729
4730 LLVMContext &Ctx = F->getContext();
4731
4732 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4733 Type *RetTy = CI->getType();
4734 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4735 if (VT->getElementType()->isIntegerTy(16)) {
4736 VectorType *AsBF16 =
4737 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4738 Val = Builder.CreateBitCast(Val, AsBF16);
4739 }
4740 }
4741
4742 // The scope argument never really worked correctly. Use agent as the most
4743 // conservative option which should still always produce the instruction.
4744 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4745 AtomicRMWInst *RMW =
4746 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4747
4748 unsigned AddrSpace = PtrTy->getAddressSpace();
4749 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4750 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4751 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4752 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4753 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4754 }
4755
4756 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4757 MDBuilder MDB(F->getContext());
4758 MDNode *RangeNotPrivate =
4761 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4762 }
4763
4764 if (IsVolatile)
4765 RMW->setVolatile(true);
4766
4767 return Builder.CreateBitCast(RMW, RetTy);
4768}
4769
4770/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4771/// plain MDNode, as it's the verifier's job to check these are the correct
4772/// types later.
4773static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4774 if (Op < CI->arg_size()) {
4775 if (MetadataAsValue *MAV =
4777 Metadata *MD = MAV->getMetadata();
4778 return dyn_cast_if_present<MDNode>(MD);
4779 }
4780 }
4781 return nullptr;
4782}
4783
4784/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4785static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4786 if (Op < CI->arg_size())
4788 return MAV->getMetadata();
4789 return nullptr;
4790}
4791
4793 // The MDNode attached to this instruction might not be the correct type,
4794 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4795 return I->getDebugLoc().getAsMDNode();
4796}
4797
4798/// Convert debug intrinsic calls to non-instruction debug records.
4799/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4800/// \p CI - The debug intrinsic call.
4802 DbgRecord *DR = nullptr;
4803 if (Name == "label") {
4805 CI->getDebugLoc());
4806 } else if (Name == "assign") {
4809 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4810 unwrapMAVMetadataOp(CI, 4),
4811 /*The address is a Value ref, it will be stored as a Metadata */
4812 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4813 } else if (Name == "declare") {
4816 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4817 getDebugLocSafe(CI));
4818 } else if (Name == "addr") {
4819 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4820 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4821 // Don't try to add something to the expression if it's not an expression.
4822 // Instead, allow the verifier to fail later.
4823 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4824 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4825 }
4828 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4829 getDebugLocSafe(CI));
4830 } else if (Name == "value") {
4831 // An old version of dbg.value had an extra offset argument.
4832 unsigned VarOp = 1;
4833 unsigned ExprOp = 2;
4834 if (CI->arg_size() == 4) {
4836 // Nonzero offset dbg.values get dropped without a replacement.
4837 if (!Offset || !Offset->isZeroValue())
4838 return;
4839 VarOp = 2;
4840 ExprOp = 3;
4841 }
4844 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4845 nullptr, getDebugLocSafe(CI));
4846 }
4847 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4848 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4849}
4850
4853 if (!Offset)
4854 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4855 int64_t OffsetVal = Offset->getSExtValue();
4856 return Builder.CreateIntrinsic(OffsetVal >= 0
4857 ? Intrinsic::vector_splice_left
4858 : Intrinsic::vector_splice_right,
4859 CI->getType(),
4860 {CI->getArgOperand(0), CI->getArgOperand(1),
4861 Builder.getInt32(std::abs(OffsetVal))});
4862}
4863
4864/// Upgrade a call to an old intrinsic. All argument and return casting must be
4865/// provided to seamlessly integrate with existing context.
4867 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4868 // checks the callee's function type matches. It's likely we need to handle
4869 // type changes here.
4871 if (!F)
4872 return;
4873
4874 LLVMContext &C = CI->getContext();
4875 IRBuilder<> Builder(C);
4876 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4877
4878 if (!NewFn) {
4879 // Get the Function's name.
4880 StringRef Name = F->getName();
4881
4882 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4883 Name = Name.substr(5);
4884
4885 bool IsX86 = Name.consume_front("x86.");
4886 bool IsNVVM = Name.consume_front("nvvm.");
4887 bool IsAArch64 = Name.consume_front("aarch64.");
4888 bool IsARM = Name.consume_front("arm.");
4889 bool IsAMDGCN = Name.consume_front("amdgcn.");
4890 bool IsDbg = Name.consume_front("dbg.");
4891 bool IsOldSplice =
4892 (Name.consume_front("experimental.vector.splice") ||
4893 Name.consume_front("vector.splice")) &&
4894 !(Name.starts_with(".left") || Name.starts_with(".right"));
4895 Value *Rep = nullptr;
4896
4897 if (!IsX86 && Name == "stackprotectorcheck") {
4898 Rep = nullptr;
4899 } else if (IsNVVM) {
4900 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4901 } else if (IsX86) {
4902 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4903 } else if (IsAArch64) {
4904 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4905 } else if (IsARM) {
4906 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4907 } else if (IsAMDGCN) {
4908 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4909 } else if (IsDbg) {
4911 } else if (IsOldSplice) {
4912 Rep = upgradeVectorSplice(CI, Builder);
4913 } else {
4914 llvm_unreachable("Unknown function for CallBase upgrade.");
4915 }
4916
4917 if (Rep)
4918 CI->replaceAllUsesWith(Rep);
4919 CI->eraseFromParent();
4920 return;
4921 }
4922
4923 const auto &DefaultCase = [&]() -> void {
4924 if (F == NewFn)
4925 return;
4926
4927 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4928 // Handle generic mangling change.
4929 assert(
4930 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4931 "Unknown function for CallBase upgrade and isn't just a name change");
4932 CI->setCalledFunction(NewFn);
4933 return;
4934 }
4935
4936 // This must be an upgrade from a named to a literal struct.
4937 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4938 assert(OldST != NewFn->getReturnType() &&
4939 "Return type must have changed");
4940 assert(OldST->getNumElements() ==
4941 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4942 "Must have same number of elements");
4943
4944 SmallVector<Value *> Args(CI->args());
4945 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4946 NewCI->setAttributes(CI->getAttributes());
4947 Value *Res = PoisonValue::get(OldST);
4948 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4949 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4950 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4951 }
4952 CI->replaceAllUsesWith(Res);
4953 CI->eraseFromParent();
4954 return;
4955 }
4956
4957 // We're probably about to produce something invalid. Let the verifier catch
4958 // it instead of dying here.
4959 CI->setCalledOperand(
4961 return;
4962 };
4963 CallInst *NewCall = nullptr;
4964 switch (NewFn->getIntrinsicID()) {
4965 default: {
4966 DefaultCase();
4967 return;
4968 }
4969 case Intrinsic::arm_neon_vst1:
4970 case Intrinsic::arm_neon_vst2:
4971 case Intrinsic::arm_neon_vst3:
4972 case Intrinsic::arm_neon_vst4:
4973 case Intrinsic::arm_neon_vst2lane:
4974 case Intrinsic::arm_neon_vst3lane:
4975 case Intrinsic::arm_neon_vst4lane: {
4976 SmallVector<Value *, 4> Args(CI->args());
4977 NewCall = Builder.CreateCall(NewFn, Args);
4978 break;
4979 }
4980 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4981 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4982 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4983 LLVMContext &Ctx = F->getParent()->getContext();
4984 SmallVector<Value *, 4> Args(CI->args());
4985 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4986 cast<ConstantInt>(Args[3])->getZExtValue());
4987 NewCall = Builder.CreateCall(NewFn, Args);
4988 break;
4989 }
4990 case Intrinsic::aarch64_sve_ld3_sret:
4991 case Intrinsic::aarch64_sve_ld4_sret:
4992 case Intrinsic::aarch64_sve_ld2_sret: {
4993 StringRef Name = F->getName();
4994 Name = Name.substr(5);
4995 unsigned N = StringSwitch<unsigned>(Name)
4996 .StartsWith("aarch64.sve.ld2", 2)
4997 .StartsWith("aarch64.sve.ld3", 3)
4998 .StartsWith("aarch64.sve.ld4", 4)
4999 .Default(0);
5000 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5001 unsigned MinElts = RetTy->getMinNumElements() / N;
5002 SmallVector<Value *, 2> Args(CI->args());
5003 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5004 Value *Ret = llvm::PoisonValue::get(RetTy);
5005 for (unsigned I = 0; I < N; I++) {
5006 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5007 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5008 }
5009 NewCall = dyn_cast<CallInst>(Ret);
5010 break;
5011 }
5012
5013 case Intrinsic::coro_end: {
5014 SmallVector<Value *, 3> Args(CI->args());
5015 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5016 NewCall = Builder.CreateCall(NewFn, Args);
5017 break;
5018 }
5019
5020 case Intrinsic::vector_extract: {
5021 StringRef Name = F->getName();
5022 Name = Name.substr(5); // Strip llvm
5023 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5024 DefaultCase();
5025 return;
5026 }
5027 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5028 unsigned MinElts = RetTy->getMinNumElements();
5029 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5030 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5031 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5032 break;
5033 }
5034
5035 case Intrinsic::vector_insert: {
5036 StringRef Name = F->getName();
5037 Name = Name.substr(5);
5038 if (!Name.starts_with("aarch64.sve.tuple")) {
5039 DefaultCase();
5040 return;
5041 }
5042 if (Name.starts_with("aarch64.sve.tuple.set")) {
5043 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5044 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5045 Value *NewIdx =
5046 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5047 NewCall = Builder.CreateCall(
5048 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5049 break;
5050 }
5051 if (Name.starts_with("aarch64.sve.tuple.create")) {
5052 unsigned N = StringSwitch<unsigned>(Name)
5053 .StartsWith("aarch64.sve.tuple.create2", 2)
5054 .StartsWith("aarch64.sve.tuple.create3", 3)
5055 .StartsWith("aarch64.sve.tuple.create4", 4)
5056 .Default(0);
5057 assert(N > 1 && "Create is expected to be between 2-4");
5058 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5059 Value *Ret = llvm::PoisonValue::get(RetTy);
5060 unsigned MinElts = RetTy->getMinNumElements() / N;
5061 for (unsigned I = 0; I < N; I++) {
5062 Value *V = CI->getArgOperand(I);
5063 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5064 }
5065 NewCall = dyn_cast<CallInst>(Ret);
5066 }
5067 break;
5068 }
5069
5070 case Intrinsic::arm_neon_bfdot:
5071 case Intrinsic::arm_neon_bfmmla:
5072 case Intrinsic::arm_neon_bfmlalb:
5073 case Intrinsic::arm_neon_bfmlalt:
5074 case Intrinsic::aarch64_neon_bfdot:
5075 case Intrinsic::aarch64_neon_bfmmla:
5076 case Intrinsic::aarch64_neon_bfmlalb:
5077 case Intrinsic::aarch64_neon_bfmlalt: {
5079 assert(CI->arg_size() == 3 &&
5080 "Mismatch between function args and call args");
5081 size_t OperandWidth =
5083 assert((OperandWidth == 64 || OperandWidth == 128) &&
5084 "Unexpected operand width");
5085 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5086 auto Iter = CI->args().begin();
5087 Args.push_back(*Iter++);
5088 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5089 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5090 NewCall = Builder.CreateCall(NewFn, Args);
5091 break;
5092 }
5093
5094 case Intrinsic::bitreverse:
5095 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5096 break;
5097
5098 case Intrinsic::ctlz:
5099 case Intrinsic::cttz: {
5100 if (CI->arg_size() != 1) {
5101 DefaultCase();
5102 return;
5103 }
5104
5105 NewCall =
5106 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5107 break;
5108 }
5109
5110 case Intrinsic::objectsize: {
5111 Value *NullIsUnknownSize =
5112 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5113 Value *Dynamic =
5114 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5115 NewCall = Builder.CreateCall(
5116 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5117 break;
5118 }
5119
5120 case Intrinsic::ctpop:
5121 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5122 break;
5123
5124 case Intrinsic::convert_from_fp16:
5125 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5126 break;
5127
5128 case Intrinsic::dbg_value: {
5129 StringRef Name = F->getName();
5130 Name = Name.substr(5); // Strip llvm.
5131 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5132 if (Name.starts_with("dbg.addr")) {
5134 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5135 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5136 NewCall =
5137 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5138 MetadataAsValue::get(C, Expr)});
5139 break;
5140 }
5141
5142 // Upgrade from the old version that had an extra offset argument.
5143 assert(CI->arg_size() == 4);
5144 // Drop nonzero offsets instead of attempting to upgrade them.
5146 if (Offset->isZeroValue()) {
5147 NewCall = Builder.CreateCall(
5148 NewFn,
5149 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5150 break;
5151 }
5152 CI->eraseFromParent();
5153 return;
5154 }
5155
5156 case Intrinsic::ptr_annotation:
5157 // Upgrade from versions that lacked the annotation attribute argument.
5158 if (CI->arg_size() != 4) {
5159 DefaultCase();
5160 return;
5161 }
5162
5163 // Create a new call with an added null annotation attribute argument.
5164 NewCall = Builder.CreateCall(
5165 NewFn,
5166 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5167 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5168 NewCall->takeName(CI);
5169 CI->replaceAllUsesWith(NewCall);
5170 CI->eraseFromParent();
5171 return;
5172
5173 case Intrinsic::var_annotation:
5174 // Upgrade from versions that lacked the annotation attribute argument.
5175 if (CI->arg_size() != 4) {
5176 DefaultCase();
5177 return;
5178 }
5179 // Create a new call with an added null annotation attribute argument.
5180 NewCall = Builder.CreateCall(
5181 NewFn,
5182 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5183 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5184 NewCall->takeName(CI);
5185 CI->replaceAllUsesWith(NewCall);
5186 CI->eraseFromParent();
5187 return;
5188
5189 case Intrinsic::riscv_aes32dsi:
5190 case Intrinsic::riscv_aes32dsmi:
5191 case Intrinsic::riscv_aes32esi:
5192 case Intrinsic::riscv_aes32esmi:
5193 case Intrinsic::riscv_sm4ks:
5194 case Intrinsic::riscv_sm4ed: {
5195 // The last argument to these intrinsics used to be i8 and changed to i32.
5196 // The type overload for sm4ks and sm4ed was removed.
5197 Value *Arg2 = CI->getArgOperand(2);
5198 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5199 return;
5200
5201 Value *Arg0 = CI->getArgOperand(0);
5202 Value *Arg1 = CI->getArgOperand(1);
5203 if (CI->getType()->isIntegerTy(64)) {
5204 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5205 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5206 }
5207
5208 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5209 cast<ConstantInt>(Arg2)->getZExtValue());
5210
5211 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5212 Value *Res = NewCall;
5213 if (Res->getType() != CI->getType())
5214 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5215 NewCall->takeName(CI);
5216 CI->replaceAllUsesWith(Res);
5217 CI->eraseFromParent();
5218 return;
5219 }
5220 case Intrinsic::nvvm_mapa_shared_cluster: {
5221 // Create a new call with the correct address space.
5222 NewCall =
5223 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5224 Value *Res = NewCall;
5225 Res = Builder.CreateAddrSpaceCast(
5226 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5227 NewCall->takeName(CI);
5228 CI->replaceAllUsesWith(Res);
5229 CI->eraseFromParent();
5230 return;
5231 }
5232 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5233 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5234 // Create a new call with the correct address space.
5235 SmallVector<Value *, 4> Args(CI->args());
5236 Args[0] = Builder.CreateAddrSpaceCast(
5237 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5238
5239 NewCall = Builder.CreateCall(NewFn, Args);
5240 NewCall->takeName(CI);
5241 CI->replaceAllUsesWith(NewCall);
5242 CI->eraseFromParent();
5243 return;
5244 }
5245 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5246 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5247 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5248 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5249 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5250 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5251 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5252 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5253 SmallVector<Value *, 16> Args(CI->args());
5254
5255 // Create AddrSpaceCast to shared_cluster if needed.
5256 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5257 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5259 Args[0] = Builder.CreateAddrSpaceCast(
5260 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5261
5262 // Attach the flag argument for cta_group, with a
5263 // default value of 0. This handles case (2) in
5264 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5265 size_t NumArgs = CI->arg_size();
5266 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5267 if (!FlagArg->getType()->isIntegerTy(1))
5268 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5269
5270 NewCall = Builder.CreateCall(NewFn, Args);
5271 NewCall->takeName(CI);
5272 CI->replaceAllUsesWith(NewCall);
5273 CI->eraseFromParent();
5274 return;
5275 }
5276 case Intrinsic::riscv_sha256sig0:
5277 case Intrinsic::riscv_sha256sig1:
5278 case Intrinsic::riscv_sha256sum0:
5279 case Intrinsic::riscv_sha256sum1:
5280 case Intrinsic::riscv_sm3p0:
5281 case Intrinsic::riscv_sm3p1: {
5282 // The last argument to these intrinsics used to be i8 and changed to i32.
5283 // The type overload for sm4ks and sm4ed was removed.
5284 if (!CI->getType()->isIntegerTy(64))
5285 return;
5286
5287 Value *Arg =
5288 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5289
5290 NewCall = Builder.CreateCall(NewFn, Arg);
5291 Value *Res =
5292 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5293 NewCall->takeName(CI);
5294 CI->replaceAllUsesWith(Res);
5295 CI->eraseFromParent();
5296 return;
5297 }
5298
5299 case Intrinsic::x86_xop_vfrcz_ss:
5300 case Intrinsic::x86_xop_vfrcz_sd:
5301 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5302 break;
5303
5304 case Intrinsic::x86_xop_vpermil2pd:
5305 case Intrinsic::x86_xop_vpermil2ps:
5306 case Intrinsic::x86_xop_vpermil2pd_256:
5307 case Intrinsic::x86_xop_vpermil2ps_256: {
5308 SmallVector<Value *, 4> Args(CI->args());
5309 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5310 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5311 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5312 NewCall = Builder.CreateCall(NewFn, Args);
5313 break;
5314 }
5315
5316 case Intrinsic::x86_sse41_ptestc:
5317 case Intrinsic::x86_sse41_ptestz:
5318 case Intrinsic::x86_sse41_ptestnzc: {
5319 // The arguments for these intrinsics used to be v4f32, and changed
5320 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5321 // So, the only thing required is a bitcast for both arguments.
5322 // First, check the arguments have the old type.
5323 Value *Arg0 = CI->getArgOperand(0);
5324 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5325 return;
5326
5327 // Old intrinsic, add bitcasts
5328 Value *Arg1 = CI->getArgOperand(1);
5329
5330 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5331
5332 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5333 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5334
5335 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5336 break;
5337 }
5338
5339 case Intrinsic::x86_rdtscp: {
5340 // This used to take 1 arguments. If we have no arguments, it is already
5341 // upgraded.
5342 if (CI->getNumOperands() == 0)
5343 return;
5344
5345 NewCall = Builder.CreateCall(NewFn);
5346 // Extract the second result and store it.
5347 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5348 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5349 // Replace the original call result with the first result of the new call.
5350 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5351
5352 NewCall->takeName(CI);
5353 CI->replaceAllUsesWith(TSC);
5354 CI->eraseFromParent();
5355 return;
5356 }
5357
5358 case Intrinsic::x86_sse41_insertps:
5359 case Intrinsic::x86_sse41_dppd:
5360 case Intrinsic::x86_sse41_dpps:
5361 case Intrinsic::x86_sse41_mpsadbw:
5362 case Intrinsic::x86_avx_dp_ps_256:
5363 case Intrinsic::x86_avx2_mpsadbw: {
5364 // Need to truncate the last argument from i32 to i8 -- this argument models
5365 // an inherently 8-bit immediate operand to these x86 instructions.
5366 SmallVector<Value *, 4> Args(CI->args());
5367
5368 // Replace the last argument with a trunc.
5369 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5370 NewCall = Builder.CreateCall(NewFn, Args);
5371 break;
5372 }
5373
5374 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5375 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5376 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5377 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5378 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5379 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5380 SmallVector<Value *, 4> Args(CI->args());
5381 unsigned NumElts =
5382 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5383 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5384
5385 NewCall = Builder.CreateCall(NewFn, Args);
5386 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5387
5388 NewCall->takeName(CI);
5389 CI->replaceAllUsesWith(Res);
5390 CI->eraseFromParent();
5391 return;
5392 }
5393
5394 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5395 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5396 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5397 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5398 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5399 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5400 SmallVector<Value *, 4> Args(CI->args());
5401 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5402 if (NewFn->getIntrinsicID() ==
5403 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5404 Args[1] = Builder.CreateBitCast(
5405 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5406
5407 NewCall = Builder.CreateCall(NewFn, Args);
5408 Value *Res = Builder.CreateBitCast(
5409 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5410
5411 NewCall->takeName(CI);
5412 CI->replaceAllUsesWith(Res);
5413 CI->eraseFromParent();
5414 return;
5415 }
5416 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5417 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5418 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5419 SmallVector<Value *, 4> Args(CI->args());
5420 unsigned NumElts =
5421 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5422 Args[1] = Builder.CreateBitCast(
5423 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5424 Args[2] = Builder.CreateBitCast(
5425 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5426
5427 NewCall = Builder.CreateCall(NewFn, Args);
5428 break;
5429 }
5430
5431 case Intrinsic::thread_pointer: {
5432 NewCall = Builder.CreateCall(NewFn, {});
5433 break;
5434 }
5435
5436 case Intrinsic::memcpy:
5437 case Intrinsic::memmove:
5438 case Intrinsic::memset: {
5439 // We have to make sure that the call signature is what we're expecting.
5440 // We only want to change the old signatures by removing the alignment arg:
5441 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5442 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5443 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5444 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5445 // Note: i8*'s in the above can be any pointer type
5446 if (CI->arg_size() != 5) {
5447 DefaultCase();
5448 return;
5449 }
5450 // Remove alignment argument (3), and add alignment attributes to the
5451 // dest/src pointers.
5452 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5453 CI->getArgOperand(2), CI->getArgOperand(4)};
5454 NewCall = Builder.CreateCall(NewFn, Args);
5455 AttributeList OldAttrs = CI->getAttributes();
5456 AttributeList NewAttrs = AttributeList::get(
5457 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5458 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5459 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5460 NewCall->setAttributes(NewAttrs);
5461 auto *MemCI = cast<MemIntrinsic>(NewCall);
5462 // All mem intrinsics support dest alignment.
5464 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5465 // Memcpy/Memmove also support source alignment.
5466 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5467 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5468 break;
5469 }
5470
5471 case Intrinsic::masked_load:
5472 case Intrinsic::masked_gather:
5473 case Intrinsic::masked_store:
5474 case Intrinsic::masked_scatter: {
5475 if (CI->arg_size() != 4) {
5476 DefaultCase();
5477 return;
5478 }
5479
5480 auto GetMaybeAlign = [](Value *Op) {
5481 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5482 uint64_t Val = CI->getZExtValue();
5483 if (Val == 0)
5484 return MaybeAlign();
5485 if (isPowerOf2_64(Val))
5486 return MaybeAlign(Val);
5487 }
5488 reportFatalUsageError("Invalid alignment argument");
5489 };
5490 auto GetAlign = [&](Value *Op) {
5491 MaybeAlign Align = GetMaybeAlign(Op);
5492 if (Align)
5493 return *Align;
5494 reportFatalUsageError("Invalid zero alignment argument");
5495 };
5496
5497 const DataLayout &DL = CI->getDataLayout();
5498 switch (NewFn->getIntrinsicID()) {
5499 case Intrinsic::masked_load:
5500 NewCall = Builder.CreateMaskedLoad(
5501 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5502 CI->getArgOperand(2), CI->getArgOperand(3));
5503 break;
5504 case Intrinsic::masked_gather:
5505 NewCall = Builder.CreateMaskedGather(
5506 CI->getType(), CI->getArgOperand(0),
5507 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5508 CI->getType()->getScalarType()),
5509 CI->getArgOperand(2), CI->getArgOperand(3));
5510 break;
5511 case Intrinsic::masked_store:
5512 NewCall = Builder.CreateMaskedStore(
5513 CI->getArgOperand(0), CI->getArgOperand(1),
5514 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5515 break;
5516 case Intrinsic::masked_scatter:
5517 NewCall = Builder.CreateMaskedScatter(
5518 CI->getArgOperand(0), CI->getArgOperand(1),
5519 DL.getValueOrABITypeAlignment(
5520 GetMaybeAlign(CI->getArgOperand(2)),
5521 CI->getArgOperand(0)->getType()->getScalarType()),
5522 CI->getArgOperand(3));
5523 break;
5524 default:
5525 llvm_unreachable("Unexpected intrinsic ID");
5526 }
5527 // Previous metadata is still valid.
5528 NewCall->copyMetadata(*CI);
5529 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5530 break;
5531 }
5532
5533 case Intrinsic::lifetime_start:
5534 case Intrinsic::lifetime_end: {
5535 if (CI->arg_size() != 2) {
5536 DefaultCase();
5537 return;
5538 }
5539
5540 Value *Ptr = CI->getArgOperand(1);
5541 // Try to strip pointer casts, such that the lifetime works on an alloca.
5542 Ptr = Ptr->stripPointerCasts();
5543 if (isa<AllocaInst>(Ptr)) {
5544 // Don't use NewFn, as we might have looked through an addrspacecast.
5545 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5546 NewCall = Builder.CreateLifetimeStart(Ptr);
5547 else
5548 NewCall = Builder.CreateLifetimeEnd(Ptr);
5549 break;
5550 }
5551
5552 // Otherwise remove the lifetime marker.
5553 CI->eraseFromParent();
5554 return;
5555 }
5556
5557 case Intrinsic::x86_avx512_vpdpbusd_128:
5558 case Intrinsic::x86_avx512_vpdpbusd_256:
5559 case Intrinsic::x86_avx512_vpdpbusd_512:
5560 case Intrinsic::x86_avx512_vpdpbusds_128:
5561 case Intrinsic::x86_avx512_vpdpbusds_256:
5562 case Intrinsic::x86_avx512_vpdpbusds_512:
5563 case Intrinsic::x86_avx2_vpdpbssd_128:
5564 case Intrinsic::x86_avx2_vpdpbssd_256:
5565 case Intrinsic::x86_avx10_vpdpbssd_512:
5566 case Intrinsic::x86_avx2_vpdpbssds_128:
5567 case Intrinsic::x86_avx2_vpdpbssds_256:
5568 case Intrinsic::x86_avx10_vpdpbssds_512:
5569 case Intrinsic::x86_avx2_vpdpbsud_128:
5570 case Intrinsic::x86_avx2_vpdpbsud_256:
5571 case Intrinsic::x86_avx10_vpdpbsud_512:
5572 case Intrinsic::x86_avx2_vpdpbsuds_128:
5573 case Intrinsic::x86_avx2_vpdpbsuds_256:
5574 case Intrinsic::x86_avx10_vpdpbsuds_512:
5575 case Intrinsic::x86_avx2_vpdpbuud_128:
5576 case Intrinsic::x86_avx2_vpdpbuud_256:
5577 case Intrinsic::x86_avx10_vpdpbuud_512:
5578 case Intrinsic::x86_avx2_vpdpbuuds_128:
5579 case Intrinsic::x86_avx2_vpdpbuuds_256:
5580 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5581 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5582 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5583 CI->getArgOperand(2)};
5584 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5585 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5586 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5587
5588 NewCall = Builder.CreateCall(NewFn, Args);
5589 break;
5590 }
5591 case Intrinsic::x86_avx512_vpdpwssd_128:
5592 case Intrinsic::x86_avx512_vpdpwssd_256:
5593 case Intrinsic::x86_avx512_vpdpwssd_512:
5594 case Intrinsic::x86_avx512_vpdpwssds_128:
5595 case Intrinsic::x86_avx512_vpdpwssds_256:
5596 case Intrinsic::x86_avx512_vpdpwssds_512:
5597 case Intrinsic::x86_avx2_vpdpwsud_128:
5598 case Intrinsic::x86_avx2_vpdpwsud_256:
5599 case Intrinsic::x86_avx10_vpdpwsud_512:
5600 case Intrinsic::x86_avx2_vpdpwsuds_128:
5601 case Intrinsic::x86_avx2_vpdpwsuds_256:
5602 case Intrinsic::x86_avx10_vpdpwsuds_512:
5603 case Intrinsic::x86_avx2_vpdpwusd_128:
5604 case Intrinsic::x86_avx2_vpdpwusd_256:
5605 case Intrinsic::x86_avx10_vpdpwusd_512:
5606 case Intrinsic::x86_avx2_vpdpwusds_128:
5607 case Intrinsic::x86_avx2_vpdpwusds_256:
5608 case Intrinsic::x86_avx10_vpdpwusds_512:
5609 case Intrinsic::x86_avx2_vpdpwuud_128:
5610 case Intrinsic::x86_avx2_vpdpwuud_256:
5611 case Intrinsic::x86_avx10_vpdpwuud_512:
5612 case Intrinsic::x86_avx2_vpdpwuuds_128:
5613 case Intrinsic::x86_avx2_vpdpwuuds_256:
5614 case Intrinsic::x86_avx10_vpdpwuuds_512:
5615 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5616 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5617 CI->getArgOperand(2)};
5618 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5619 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5620 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5621
5622 NewCall = Builder.CreateCall(NewFn, Args);
5623 break;
5624 }
5625 assert(NewCall && "Should have either set this variable or returned through "
5626 "the default case");
5627 NewCall->takeName(CI);
5628 CI->replaceAllUsesWith(NewCall);
5629 CI->eraseFromParent();
5630}
5631
5633 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5634
5635 // Check if this function should be upgraded and get the replacement function
5636 // if there is one.
5637 Function *NewFn;
5638 if (UpgradeIntrinsicFunction(F, NewFn)) {
5639 // Replace all users of the old function with the new function or new
5640 // instructions. This is not a range loop because the call is deleted.
5641 for (User *U : make_early_inc_range(F->users()))
5642 if (CallBase *CB = dyn_cast<CallBase>(U))
5643 UpgradeIntrinsicCall(CB, NewFn);
5644
5645 // Remove old function, no longer used, from the module.
5646 if (F != NewFn)
5647 F->eraseFromParent();
5648 }
5649}
5650
5652 const unsigned NumOperands = MD.getNumOperands();
5653 if (NumOperands == 0)
5654 return &MD; // Invalid, punt to a verifier error.
5655
5656 // Check if the tag uses struct-path aware TBAA format.
5657 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5658 return &MD;
5659
5660 auto &Context = MD.getContext();
5661 if (NumOperands == 3) {
5662 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5663 MDNode *ScalarType = MDNode::get(Context, Elts);
5664 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5665 Metadata *Elts2[] = {ScalarType, ScalarType,
5668 MD.getOperand(2)};
5669 return MDNode::get(Context, Elts2);
5670 }
5671 // Create a MDNode <MD, MD, offset 0>
5673 Type::getInt64Ty(Context)))};
5674 return MDNode::get(Context, Elts);
5675}
5676
5678 Instruction *&Temp) {
5679 if (Opc != Instruction::BitCast)
5680 return nullptr;
5681
5682 Temp = nullptr;
5683 Type *SrcTy = V->getType();
5684 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5685 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5686 LLVMContext &Context = V->getContext();
5687
5688 // We have no information about target data layout, so we assume that
5689 // the maximum pointer size is 64bit.
5690 Type *MidTy = Type::getInt64Ty(Context);
5691 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5692
5693 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5694 }
5695
5696 return nullptr;
5697}
5698
5700 if (Opc != Instruction::BitCast)
5701 return nullptr;
5702
5703 Type *SrcTy = C->getType();
5704 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5705 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5706 LLVMContext &Context = C->getContext();
5707
5708 // We have no information about target data layout, so we assume that
5709 // the maximum pointer size is 64bit.
5710 Type *MidTy = Type::getInt64Ty(Context);
5711
5713 DestTy);
5714 }
5715
5716 return nullptr;
5717}
5718
5719/// Check the debug info version number, if it is out-dated, drop the debug
5720/// info. Return true if module is modified.
5723 return false;
5724
5725 llvm::TimeTraceScope timeScope("Upgrade debug info");
5726 // We need to get metadata before the module is verified (i.e., getModuleFlag
5727 // makes assumptions that we haven't verified yet). Carefully extract the flag
5728 // from the metadata.
5729 unsigned Version = 0;
5730 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5731 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5732 if (Flag->getNumOperands() < 3)
5733 return false;
5734 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5735 return K->getString() == "Debug Info Version";
5736 return false;
5737 });
5738 if (OpIt != ModFlags->op_end()) {
5739 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5740 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5741 Version = CI->getZExtValue();
5742 }
5743 }
5744
5746 bool BrokenDebugInfo = false;
5747 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5748 report_fatal_error("Broken module found, compilation aborted!");
5749 if (!BrokenDebugInfo)
5750 // Everything is ok.
5751 return false;
5752 else {
5753 // Diagnose malformed debug info.
5755 M.getContext().diagnose(Diag);
5756 }
5757 }
5758 bool Modified = StripDebugInfo(M);
5760 // Diagnose a version mismatch.
5762 M.getContext().diagnose(DiagVersion);
5763 }
5764 return Modified;
5765}
5766
5767static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5768 GlobalValue *GV, const Metadata *V) {
5769 Function *F = cast<Function>(GV);
5770
5771 constexpr StringLiteral DefaultValue = "1";
5772 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5773 unsigned Length = 0;
5774
5775 if (F->hasFnAttribute(Attr)) {
5776 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5777 // parse these elements placing them into Vect3
5778 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5779 for (; Length < 3 && !S.empty(); Length++) {
5780 auto [Part, Rest] = S.split(',');
5781 Vect3[Length] = Part.trim();
5782 S = Rest;
5783 }
5784 }
5785
5786 const unsigned Dim = DimC - 'x';
5787 assert(Dim < 3 && "Unexpected dim char");
5788
5789 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5790
5791 // local variable required for StringRef in Vect3 to point to.
5792 const std::string VStr = llvm::utostr(VInt);
5793 Vect3[Dim] = VStr;
5794 Length = std::max(Length, Dim + 1);
5795
5796 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5797 F->addFnAttr(Attr, NewAttr);
5798}
5799
5800static inline bool isXYZ(StringRef S) {
5801 return S == "x" || S == "y" || S == "z";
5802}
5803
5805 const Metadata *V) {
5806 if (K == "kernel") {
5808 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5809 return true;
5810 }
5811 if (K == "align") {
5812 // V is a bitfeild specifying two 16-bit values. The alignment value is
5813 // specfied in low 16-bits, The index is specified in the high bits. For the
5814 // index, 0 indicates the return value while higher values correspond to
5815 // each parameter (idx = param + 1).
5816 const uint64_t AlignIdxValuePair =
5817 mdconst::extract<ConstantInt>(V)->getZExtValue();
5818 const unsigned Idx = (AlignIdxValuePair >> 16);
5819 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5820 cast<Function>(GV)->addAttributeAtIndex(
5821 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5822 return true;
5823 }
5824 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5825 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5826 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5827 return true;
5828 }
5829 if (K == "minctasm") {
5830 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5831 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5832 return true;
5833 }
5834 if (K == "maxnreg") {
5835 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5836 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5837 return true;
5838 }
5839 if (K.consume_front("maxntid") && isXYZ(K)) {
5840 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5841 return true;
5842 }
5843 if (K.consume_front("reqntid") && isXYZ(K)) {
5844 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5845 return true;
5846 }
5847 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5848 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5849 return true;
5850 }
5851 if (K == "grid_constant") {
5852 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5853 for (const auto &Op : cast<MDNode>(V)->operands()) {
5854 // For some reason, the index is 1-based in the metadata. Good thing we're
5855 // able to auto-upgrade it!
5856 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5857 cast<Function>(GV)->addParamAttr(Index, Attr);
5858 }
5859 return true;
5860 }
5861
5862 return false;
5863}
5864
5866 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5867 if (!NamedMD)
5868 return;
5869
5870 SmallVector<MDNode *, 8> NewNodes;
5872 for (MDNode *MD : NamedMD->operands()) {
5873 if (!SeenNodes.insert(MD).second)
5874 continue;
5875
5876 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5877 if (!GV)
5878 continue;
5879
5880 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5881
5882 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5883 // Each nvvm.annotations metadata entry will be of the following form:
5884 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5885 // start index = 1, to skip the global variable key
5886 // increment = 2, to skip the value for each property-value pairs
5887 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5888 MDString *K = cast<MDString>(MD->getOperand(j));
5889 const MDOperand &V = MD->getOperand(j + 1);
5890 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5891 if (!Upgraded)
5892 NewOperands.append({K, V});
5893 }
5894
5895 if (NewOperands.size() > 1)
5896 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5897 }
5898
5899 NamedMD->clearOperands();
5900 for (MDNode *N : NewNodes)
5901 NamedMD->addOperand(N);
5902}
5903
5904/// This checks for objc retain release marker which should be upgraded. It
5905/// returns true if module is modified.
5907 bool Changed = false;
5908 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5909 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5910 if (ModRetainReleaseMarker) {
5911 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5912 if (Op) {
5913 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5914 if (ID) {
5915 SmallVector<StringRef, 4> ValueComp;
5916 ID->getString().split(ValueComp, "#");
5917 if (ValueComp.size() == 2) {
5918 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5919 ID = MDString::get(M.getContext(), NewValue);
5920 }
5921 M.addModuleFlag(Module::Error, MarkerKey, ID);
5922 M.eraseNamedMetadata(ModRetainReleaseMarker);
5923 Changed = true;
5924 }
5925 }
5926 }
5927 return Changed;
5928}
5929
5931 // This lambda converts normal function calls to ARC runtime functions to
5932 // intrinsic calls.
5933 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5934 llvm::Intrinsic::ID IntrinsicFunc) {
5935 Function *Fn = M.getFunction(OldFunc);
5936
5937 if (!Fn)
5938 return;
5939
5940 Function *NewFn =
5941 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5942
5943 for (User *U : make_early_inc_range(Fn->users())) {
5945 if (!CI || CI->getCalledFunction() != Fn)
5946 continue;
5947
5948 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5949 FunctionType *NewFuncTy = NewFn->getFunctionType();
5951
5952 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5953 // value to the return type of the old function.
5954 if (NewFuncTy->getReturnType() != CI->getType() &&
5955 !CastInst::castIsValid(Instruction::BitCast, CI,
5956 NewFuncTy->getReturnType()))
5957 continue;
5958
5959 bool InvalidCast = false;
5960
5961 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5962 Value *Arg = CI->getArgOperand(I);
5963
5964 // Bitcast argument to the parameter type of the new function if it's
5965 // not a variadic argument.
5966 if (I < NewFuncTy->getNumParams()) {
5967 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5968 // to the parameter type of the new function.
5969 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5970 NewFuncTy->getParamType(I))) {
5971 InvalidCast = true;
5972 break;
5973 }
5974 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5975 }
5976 Args.push_back(Arg);
5977 }
5978
5979 if (InvalidCast)
5980 continue;
5981
5982 // Create a call instruction that calls the new function.
5983 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5984 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5985 NewCall->takeName(CI);
5986
5987 // Bitcast the return value back to the type of the old call.
5988 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5989
5990 if (!CI->use_empty())
5991 CI->replaceAllUsesWith(NewRetVal);
5992 CI->eraseFromParent();
5993 }
5994
5995 if (Fn->use_empty())
5996 Fn->eraseFromParent();
5997 };
5998
5999 // Unconditionally convert a call to "clang.arc.use" to a call to
6000 // "llvm.objc.clang.arc.use".
6001 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6002
6003 // Upgrade the retain release marker. If there is no need to upgrade
6004 // the marker, that means either the module is already new enough to contain
6005 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6007 return;
6008
6009 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6010 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6011 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6012 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6013 {"objc_autoreleaseReturnValue",
6014 llvm::Intrinsic::objc_autoreleaseReturnValue},
6015 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6016 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6017 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6018 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6019 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6020 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6021 {"objc_release", llvm::Intrinsic::objc_release},
6022 {"objc_retain", llvm::Intrinsic::objc_retain},
6023 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6024 {"objc_retainAutoreleaseReturnValue",
6025 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6026 {"objc_retainAutoreleasedReturnValue",
6027 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6028 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6029 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6030 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6031 {"objc_unsafeClaimAutoreleasedReturnValue",
6032 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6033 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6034 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6035 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6036 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6037 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6038 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6039 {"objc_arc_annotation_topdown_bbstart",
6040 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6041 {"objc_arc_annotation_topdown_bbend",
6042 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6043 {"objc_arc_annotation_bottomup_bbstart",
6044 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6045 {"objc_arc_annotation_bottomup_bbend",
6046 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6047
6048 for (auto &I : RuntimeFuncs)
6049 UpgradeToIntrinsic(I.first, I.second);
6050}
6051
6053 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6054 if (!ModFlags)
6055 return false;
6056
6057 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6058 bool HasSwiftVersionFlag = false;
6059 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6060 uint32_t SwiftABIVersion;
6061 auto Int8Ty = Type::getInt8Ty(M.getContext());
6062 auto Int32Ty = Type::getInt32Ty(M.getContext());
6063
6064 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6065 MDNode *Op = ModFlags->getOperand(I);
6066 if (Op->getNumOperands() != 3)
6067 continue;
6068 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6069 if (!ID)
6070 continue;
6071 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6072 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6073 Type::getInt32Ty(M.getContext()), B)),
6074 MDString::get(M.getContext(), ID->getString()),
6075 Op->getOperand(2)};
6076 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6077 Changed = true;
6078 };
6079
6080 if (ID->getString() == "Objective-C Image Info Version")
6081 HasObjCFlag = true;
6082 if (ID->getString() == "Objective-C Class Properties")
6083 HasClassProperties = true;
6084 // Upgrade PIC from Error/Max to Min.
6085 if (ID->getString() == "PIC Level") {
6086 if (auto *Behavior =
6088 uint64_t V = Behavior->getLimitedValue();
6089 if (V == Module::Error || V == Module::Max)
6090 SetBehavior(Module::Min);
6091 }
6092 }
6093 // Upgrade "PIE Level" from Error to Max.
6094 if (ID->getString() == "PIE Level")
6095 if (auto *Behavior =
6097 if (Behavior->getLimitedValue() == Module::Error)
6098 SetBehavior(Module::Max);
6099
6100 // Upgrade branch protection and return address signing module flags. The
6101 // module flag behavior for these fields were Error and now they are Min.
6102 if (ID->getString() == "branch-target-enforcement" ||
6103 ID->getString().starts_with("sign-return-address")) {
6104 if (auto *Behavior =
6106 if (Behavior->getLimitedValue() == Module::Error) {
6107 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6108 Metadata *Ops[3] = {
6109 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6110 Op->getOperand(1), Op->getOperand(2)};
6111 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6112 Changed = true;
6113 }
6114 }
6115 }
6116
6117 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6118 // section name so that llvm-lto will not complain about mismatching
6119 // module flags that is functionally the same.
6120 if (ID->getString() == "Objective-C Image Info Section") {
6121 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6122 SmallVector<StringRef, 4> ValueComp;
6123 Value->getString().split(ValueComp, " ");
6124 if (ValueComp.size() != 1) {
6125 std::string NewValue;
6126 for (auto &S : ValueComp)
6127 NewValue += S.str();
6128 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6129 MDString::get(M.getContext(), NewValue)};
6130 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6131 Changed = true;
6132 }
6133 }
6134 }
6135
6136 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6137 // If the higher bits are set, it adds new module flag for swift info.
6138 if (ID->getString() == "Objective-C Garbage Collection") {
6139 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6140 if (Md) {
6141 assert(Md->getValue() && "Expected non-empty metadata");
6142 auto Type = Md->getValue()->getType();
6143 if (Type == Int8Ty)
6144 continue;
6145 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6146 if ((Val & 0xff) != Val) {
6147 HasSwiftVersionFlag = true;
6148 SwiftABIVersion = (Val & 0xff00) >> 8;
6149 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6150 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6151 }
6152 Metadata *Ops[3] = {
6154 Op->getOperand(1),
6155 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6156 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6157 Changed = true;
6158 }
6159 }
6160
6161 if (ID->getString() == "amdgpu_code_object_version") {
6162 Metadata *Ops[3] = {
6163 Op->getOperand(0),
6164 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6165 Op->getOperand(2)};
6166 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6167 Changed = true;
6168 }
6169 }
6170
6171 // "Objective-C Class Properties" is recently added for Objective-C. We
6172 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6173 // flag of value 0, so we can correclty downgrade this flag when trying to
6174 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6175 // this module flag.
6176 if (HasObjCFlag && !HasClassProperties) {
6177 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6178 (uint32_t)0);
6179 Changed = true;
6180 }
6181
6182 if (HasSwiftVersionFlag) {
6183 M.addModuleFlag(Module::Error, "Swift ABI Version",
6184 SwiftABIVersion);
6185 M.addModuleFlag(Module::Error, "Swift Major Version",
6186 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6187 M.addModuleFlag(Module::Error, "Swift Minor Version",
6188 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6189 Changed = true;
6190 }
6191
6192 return Changed;
6193}
6194
6196 auto TrimSpaces = [](StringRef Section) -> std::string {
6197 SmallVector<StringRef, 5> Components;
6198 Section.split(Components, ',');
6199
6200 SmallString<32> Buffer;
6201 raw_svector_ostream OS(Buffer);
6202
6203 for (auto Component : Components)
6204 OS << ',' << Component.trim();
6205
6206 return std::string(OS.str().substr(1));
6207 };
6208
6209 for (auto &GV : M.globals()) {
6210 if (!GV.hasSection())
6211 continue;
6212
6213 StringRef Section = GV.getSection();
6214
6215 if (!Section.starts_with("__DATA, __objc_catlist"))
6216 continue;
6217
6218 // __DATA, __objc_catlist, regular, no_dead_strip
6219 // __DATA,__objc_catlist,regular,no_dead_strip
6220 GV.setSection(TrimSpaces(Section));
6221 }
6222}
6223
6224namespace {
6225// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6226// callsites within a function that did not also have the strictfp attribute.
6227// Since 10.0, if strict FP semantics are needed within a function, the
6228// function must have the strictfp attribute and all calls within the function
6229// must also have the strictfp attribute. This latter restriction is
6230// necessary to prevent unwanted libcall simplification when a function is
6231// being cloned (such as for inlining).
6232//
6233// The "dangling" strictfp attribute usage was only used to prevent constant
6234// folding and other libcall simplification. The nobuiltin attribute on the
6235// callsite has the same effect.
6236struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6237 StrictFPUpgradeVisitor() = default;
6238
6239 void visitCallBase(CallBase &Call) {
6240 if (!Call.isStrictFP())
6241 return;
6243 return;
6244 // If we get here, the caller doesn't have the strictfp attribute
6245 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6246 Call.removeFnAttr(Attribute::StrictFP);
6247 Call.addFnAttr(Attribute::NoBuiltin);
6248 }
6249};
6250
6251/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6252struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6253 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6254 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6255
6256 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6257 if (!RMW.isFloatingPointOperation())
6258 return;
6259
6260 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6261 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6262 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6263 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6264 }
6265};
6266} // namespace
6267
6269 // If a function definition doesn't have the strictfp attribute,
6270 // convert any callsite strictfp attributes to nobuiltin.
6271 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6272 StrictFPUpgradeVisitor SFPV;
6273 SFPV.visit(F);
6274 }
6275
6276 // Remove all incompatibile attributes from function.
6277 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6278 F.getReturnType(), F.getAttributes().getRetAttrs()));
6279 for (auto &Arg : F.args())
6280 Arg.removeAttrs(
6281 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6282
6283 // Older versions of LLVM treated an "implicit-section-name" attribute
6284 // similarly to directly setting the section on a Function.
6285 if (Attribute A = F.getFnAttribute("implicit-section-name");
6286 A.isValid() && A.isStringAttribute()) {
6287 F.setSection(A.getValueAsString());
6288 F.removeFnAttr("implicit-section-name");
6289 }
6290
6291 if (!F.empty()) {
6292 // For some reason this is called twice, and the first time is before any
6293 // instructions are loaded into the body.
6294
6295 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6296 A.isValid()) {
6297
6298 if (A.getValueAsBool()) {
6299 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6300 Visitor.visit(F);
6301 }
6302
6303 // We will leave behind dead attribute uses on external declarations, but
6304 // clang never added these to declarations anyway.
6305 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6306 }
6307 }
6308}
6309
6310// Check if the function attribute is not present and set it.
6312 StringRef Value) {
6313 if (!F.hasFnAttribute(FnAttrName))
6314 F.addFnAttr(FnAttrName, Value);
6315}
6316
6317// Check if the function attribute is not present and set it if needed.
6318// If the attribute is "false" then removes it.
6319// If the attribute is "true" resets it to a valueless attribute.
6320static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6321 if (!F.hasFnAttribute(FnAttrName)) {
6322 if (Set)
6323 F.addFnAttr(FnAttrName);
6324 } else {
6325 auto A = F.getFnAttribute(FnAttrName);
6326 if ("false" == A.getValueAsString())
6327 F.removeFnAttr(FnAttrName);
6328 else if ("true" == A.getValueAsString()) {
6329 F.removeFnAttr(FnAttrName);
6330 F.addFnAttr(FnAttrName);
6331 }
6332 }
6333}
6334
6336 Triple T(M.getTargetTriple());
6337 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6338 return;
6339
6340 uint64_t BTEValue = 0;
6341 uint64_t BPPLRValue = 0;
6342 uint64_t GCSValue = 0;
6343 uint64_t SRAValue = 0;
6344 uint64_t SRAALLValue = 0;
6345 uint64_t SRABKeyValue = 0;
6346
6347 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6348 if (ModFlags) {
6349 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6350 MDNode *Op = ModFlags->getOperand(I);
6351 if (Op->getNumOperands() != 3)
6352 continue;
6353
6354 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6355 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6356 if (!ID || !CI)
6357 continue;
6358
6359 StringRef IDStr = ID->getString();
6360 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6361 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6362 : IDStr == "guarded-control-stack" ? &GCSValue
6363 : IDStr == "sign-return-address" ? &SRAValue
6364 : IDStr == "sign-return-address-all" ? &SRAALLValue
6365 : IDStr == "sign-return-address-with-bkey"
6366 ? &SRABKeyValue
6367 : nullptr;
6368 if (!ValPtr)
6369 continue;
6370
6371 *ValPtr = CI->getZExtValue();
6372 if (*ValPtr == 2)
6373 return;
6374 }
6375 }
6376
6377 bool BTE = BTEValue == 1;
6378 bool BPPLR = BPPLRValue == 1;
6379 bool GCS = GCSValue == 1;
6380 bool SRA = SRAValue == 1;
6381
6382 StringRef SignTypeValue = "non-leaf";
6383 if (SRA && SRAALLValue == 1)
6384 SignTypeValue = "all";
6385
6386 StringRef SignKeyValue = "a_key";
6387 if (SRA && SRABKeyValue == 1)
6388 SignKeyValue = "b_key";
6389
6390 for (Function &F : M.getFunctionList()) {
6391 if (F.isDeclaration())
6392 continue;
6393
6394 if (SRA) {
6395 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6396 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6397 } else {
6398 if (auto A = F.getFnAttribute("sign-return-address");
6399 A.isValid() && "none" == A.getValueAsString()) {
6400 F.removeFnAttr("sign-return-address");
6401 F.removeFnAttr("sign-return-address-key");
6402 }
6403 }
6404 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6405 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6406 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6407 }
6408
6409 if (BTE)
6410 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6411 if (BPPLR)
6412 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6413 if (GCS)
6414 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6415 if (SRA) {
6416 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6417 if (SRAALLValue == 1)
6418 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6419 if (SRABKeyValue == 1)
6420 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6421 }
6422}
6423
6424static bool isOldLoopArgument(Metadata *MD) {
6425 auto *T = dyn_cast_or_null<MDTuple>(MD);
6426 if (!T)
6427 return false;
6428 if (T->getNumOperands() < 1)
6429 return false;
6430 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6431 if (!S)
6432 return false;
6433 return S->getString().starts_with("llvm.vectorizer.");
6434}
6435
6437 StringRef OldPrefix = "llvm.vectorizer.";
6438 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6439
6440 if (OldTag == "llvm.vectorizer.unroll")
6441 return MDString::get(C, "llvm.loop.interleave.count");
6442
6443 return MDString::get(
6444 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6445 .str());
6446}
6447
6449 auto *T = dyn_cast_or_null<MDTuple>(MD);
6450 if (!T)
6451 return MD;
6452 if (T->getNumOperands() < 1)
6453 return MD;
6454 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6455 if (!OldTag)
6456 return MD;
6457 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6458 return MD;
6459
6460 // This has an old tag. Upgrade it.
6462 Ops.reserve(T->getNumOperands());
6463 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6464 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6465 Ops.push_back(T->getOperand(I));
6466
6467 return MDTuple::get(T->getContext(), Ops);
6468}
6469
6471 auto *T = dyn_cast<MDTuple>(&N);
6472 if (!T)
6473 return &N;
6474
6475 if (none_of(T->operands(), isOldLoopArgument))
6476 return &N;
6477
6479 Ops.reserve(T->getNumOperands());
6480 for (Metadata *MD : T->operands())
6481 Ops.push_back(upgradeLoopArgument(MD));
6482
6483 return MDTuple::get(T->getContext(), Ops);
6484}
6485
6487 Triple T(TT);
6488 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6489 // the address space of globals to 1. This does not apply to SPIRV Logical.
6490 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6491 !DL.contains("-G") && !DL.starts_with("G")) {
6492 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6493 }
6494
6495 if (T.isLoongArch64() || T.isRISCV64()) {
6496 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6497 auto I = DL.find("-n64-");
6498 if (I != StringRef::npos)
6499 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6500 return DL.str();
6501 }
6502
6503 // AMDGPU data layout upgrades.
6504 std::string Res = DL.str();
6505 if (T.isAMDGPU()) {
6506 // Define address spaces for constants.
6507 if (!DL.contains("-G") && !DL.starts_with("G"))
6508 Res.append(Res.empty() ? "G1" : "-G1");
6509
6510 // AMDGCN data layout upgrades.
6511 if (T.isAMDGCN()) {
6512
6513 // Add missing non-integral declarations.
6514 // This goes before adding new address spaces to prevent incoherent string
6515 // values.
6516 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6517 Res.append("-ni:7:8:9");
6518 // Update ni:7 to ni:7:8:9.
6519 if (DL.ends_with("ni:7"))
6520 Res.append(":8:9");
6521 if (DL.ends_with("ni:7:8"))
6522 Res.append(":9");
6523
6524 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6525 // resources) An empty data layout has already been upgraded to G1 by now.
6526 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6527 Res.append("-p7:160:256:256:32");
6528 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6529 Res.append("-p8:128:128:128:48");
6530 constexpr StringRef OldP8("-p8:128:128-");
6531 if (DL.contains(OldP8))
6532 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6533 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6534 Res.append("-p9:192:256:256:32");
6535 }
6536
6537 // Upgrade the ELF mangling mode.
6538 if (!DL.contains("m:e"))
6539 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6540
6541 return Res;
6542 }
6543
6544 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6545 // If the datalayout matches the expected format, add pointer size address
6546 // spaces to the datalayout.
6547 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6548 if (!DL.contains(AddrSpaces)) {
6550 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6551 if (R.match(Res, &Groups))
6552 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6553 }
6554 };
6555
6556 // AArch64 data layout upgrades.
6557 if (T.isAArch64()) {
6558 // Add "-Fn32"
6559 if (!DL.empty() && !DL.contains("-Fn32"))
6560 Res.append("-Fn32");
6561 AddPtr32Ptr64AddrSpaces();
6562 return Res;
6563 }
6564
6565 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6566 T.isWasm()) {
6567 // Mips64 with o32 ABI did not add "-i128:128".
6568 // Add "-i128:128"
6569 std::string I64 = "-i64:64";
6570 std::string I128 = "-i128:128";
6571 if (!StringRef(Res).contains(I128)) {
6572 size_t Pos = Res.find(I64);
6573 if (Pos != size_t(-1))
6574 Res.insert(Pos + I64.size(), I128);
6575 }
6576 }
6577
6578 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6579 size_t Pos = Res.find("-S128");
6580 if (Pos == StringRef::npos)
6581 Pos = Res.size();
6582 Res.insert(Pos, "-f64:32:64");
6583 }
6584
6585 if (!T.isX86())
6586 return Res;
6587
6588 AddPtr32Ptr64AddrSpaces();
6589
6590 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6591 // for i128 operations prior to this being reflected in the data layout, and
6592 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6593 // boundaries, so although this is a breaking change, the upgrade is expected
6594 // to fix more IR than it breaks.
6595 // Intel MCU is an exception and uses 4-byte-alignment.
6596 if (!T.isOSIAMCU()) {
6597 std::string I128 = "-i128:128";
6598 if (StringRef Ref = Res; !Ref.contains(I128)) {
6600 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6601 if (R.match(Res, &Groups))
6602 Res = (Groups[1] + I128 + Groups[3]).str();
6603 }
6604 }
6605
6606 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6607 // Raising the alignment is safe because Clang did not produce f80 values in
6608 // the MSVC environment before this upgrade was added.
6609 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6610 StringRef Ref = Res;
6611 auto I = Ref.find("-f80:32-");
6612 if (I != StringRef::npos)
6613 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6614 }
6615
6616 return Res;
6617}
6618
6619void llvm::UpgradeAttributes(AttrBuilder &B) {
6620 StringRef FramePointer;
6621 Attribute A = B.getAttribute("no-frame-pointer-elim");
6622 if (A.isValid()) {
6623 // The value can be "true" or "false".
6624 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6625 B.removeAttribute("no-frame-pointer-elim");
6626 }
6627 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6628 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6629 if (FramePointer != "all")
6630 FramePointer = "non-leaf";
6631 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6632 }
6633 if (!FramePointer.empty())
6634 B.addAttribute("frame-pointer", FramePointer);
6635
6636 A = B.getAttribute("null-pointer-is-valid");
6637 if (A.isValid()) {
6638 // The value can be "true" or "false".
6639 bool NullPointerIsValid = A.getValueAsString() == "true";
6640 B.removeAttribute("null-pointer-is-valid");
6641 if (NullPointerIsValid)
6642 B.addAttribute(Attribute::NullPointerIsValid);
6643 }
6644}
6645
6646void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6647 // clang.arc.attachedcall bundles are now required to have an operand.
6648 // If they don't, it's okay to drop them entirely: when there is an operand,
6649 // the "attachedcall" is meaningful and required, but without an operand,
6650 // it's just a marker NOP. Dropping it merely prevents an optimization.
6651 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6652 return OBD.getTag() == "clang.arc.attachedcall" &&
6653 OBD.inputs().empty();
6654 });
6655}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:222
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:171
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:451
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2794
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:183
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:104
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1757
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1853
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:824
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:826
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:283
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Value * getOperand(unsigned i) const
Definition User.h:233
unsigned getNumOperands() const
Definition User.h:255
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1106
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:708
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2168
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106