LLVM 20.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DebugInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstVisitor.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Metadata.h"
39#include "llvm/IR/Module.h"
40#include "llvm/IR/Value.h"
41#include "llvm/IR/Verifier.h"
45#include "llvm/Support/Regex.h"
47#include <cstring>
48#include <numeric>
49
50using namespace llvm;
51
52static cl::opt<bool>
53 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
54 cl::desc("Disable autoupgrade of debug info"));
55
56static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
57
58// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
59// changed their type from v4f32 to v2i64.
61 Function *&NewFn) {
62 // Check whether this is an old version of the function, which received
63 // v4f32 arguments.
64 Type *Arg0Type = F->getFunctionType()->getParamType(0);
65 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
66 return false;
67
68 // Yes, it's old, replace it with new version.
69 rename(F);
70 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
71 return true;
72}
73
74// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
75// arguments have changed their type from i32 to i8.
77 Function *&NewFn) {
78 // Check that the last argument is an i32.
79 Type *LastArgType = F->getFunctionType()->getParamType(
80 F->getFunctionType()->getNumParams() - 1);
81 if (!LastArgType->isIntegerTy(32))
82 return false;
83
84 // Move this function aside and map down.
85 rename(F);
86 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
87 return true;
88}
89
90// Upgrade the declaration of fp compare intrinsics that change return type
91// from scalar to vXi1 mask.
93 Function *&NewFn) {
94 // Check if the return type is a vector.
95 if (F->getReturnType()->isVectorTy())
96 return false;
97
98 rename(F);
99 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
100 return true;
101}
102
104 Function *&NewFn) {
105 if (F->getReturnType()->getScalarType()->isBFloatTy())
106 return false;
107
108 rename(F);
109 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
110 return true;
111}
112
114 Function *&NewFn) {
115 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
116 return false;
117
118 rename(F);
119 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
120 return true;
121}
122
124 // All of the intrinsics matches below should be marked with which llvm
125 // version started autoupgrading them. At some point in the future we would
126 // like to use this information to remove upgrade code for some older
127 // intrinsics. It is currently undecided how we will determine that future
128 // point.
129 if (Name.consume_front("avx."))
130 return (Name.starts_with("blend.p") || // Added in 3.7
131 Name == "cvt.ps2.pd.256" || // Added in 3.9
132 Name == "cvtdq2.pd.256" || // Added in 3.9
133 Name == "cvtdq2.ps.256" || // Added in 7.0
134 Name.starts_with("movnt.") || // Added in 3.2
135 Name.starts_with("sqrt.p") || // Added in 7.0
136 Name.starts_with("storeu.") || // Added in 3.9
137 Name.starts_with("vbroadcast.s") || // Added in 3.5
138 Name.starts_with("vbroadcastf128") || // Added in 4.0
139 Name.starts_with("vextractf128.") || // Added in 3.7
140 Name.starts_with("vinsertf128.") || // Added in 3.7
141 Name.starts_with("vperm2f128.") || // Added in 6.0
142 Name.starts_with("vpermil.")); // Added in 3.1
143
144 if (Name.consume_front("avx2."))
145 return (Name == "movntdqa" || // Added in 5.0
146 Name.starts_with("pabs.") || // Added in 6.0
147 Name.starts_with("padds.") || // Added in 8.0
148 Name.starts_with("paddus.") || // Added in 8.0
149 Name.starts_with("pblendd.") || // Added in 3.7
150 Name == "pblendw" || // Added in 3.7
151 Name.starts_with("pbroadcast") || // Added in 3.8
152 Name.starts_with("pcmpeq.") || // Added in 3.1
153 Name.starts_with("pcmpgt.") || // Added in 3.1
154 Name.starts_with("pmax") || // Added in 3.9
155 Name.starts_with("pmin") || // Added in 3.9
156 Name.starts_with("pmovsx") || // Added in 3.9
157 Name.starts_with("pmovzx") || // Added in 3.9
158 Name == "pmul.dq" || // Added in 7.0
159 Name == "pmulu.dq" || // Added in 7.0
160 Name.starts_with("psll.dq") || // Added in 3.7
161 Name.starts_with("psrl.dq") || // Added in 3.7
162 Name.starts_with("psubs.") || // Added in 8.0
163 Name.starts_with("psubus.") || // Added in 8.0
164 Name.starts_with("vbroadcast") || // Added in 3.8
165 Name == "vbroadcasti128" || // Added in 3.7
166 Name == "vextracti128" || // Added in 3.7
167 Name == "vinserti128" || // Added in 3.7
168 Name == "vperm2i128"); // Added in 6.0
169
170 if (Name.consume_front("avx512.")) {
171 if (Name.consume_front("mask."))
172 // 'avx512.mask.*'
173 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
174 Name.starts_with("and.") || // Added in 3.9
175 Name.starts_with("andn.") || // Added in 3.9
176 Name.starts_with("broadcast.s") || // Added in 3.9
177 Name.starts_with("broadcastf32x4.") || // Added in 6.0
178 Name.starts_with("broadcastf32x8.") || // Added in 6.0
179 Name.starts_with("broadcastf64x2.") || // Added in 6.0
180 Name.starts_with("broadcastf64x4.") || // Added in 6.0
181 Name.starts_with("broadcasti32x4.") || // Added in 6.0
182 Name.starts_with("broadcasti32x8.") || // Added in 6.0
183 Name.starts_with("broadcasti64x2.") || // Added in 6.0
184 Name.starts_with("broadcasti64x4.") || // Added in 6.0
185 Name.starts_with("cmp.b") || // Added in 5.0
186 Name.starts_with("cmp.d") || // Added in 5.0
187 Name.starts_with("cmp.q") || // Added in 5.0
188 Name.starts_with("cmp.w") || // Added in 5.0
189 Name.starts_with("compress.b") || // Added in 9.0
190 Name.starts_with("compress.d") || // Added in 9.0
191 Name.starts_with("compress.p") || // Added in 9.0
192 Name.starts_with("compress.q") || // Added in 9.0
193 Name.starts_with("compress.store.") || // Added in 7.0
194 Name.starts_with("compress.w") || // Added in 9.0
195 Name.starts_with("conflict.") || // Added in 9.0
196 Name.starts_with("cvtdq2pd.") || // Added in 4.0
197 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
198 Name == "cvtpd2dq.256" || // Added in 7.0
199 Name == "cvtpd2ps.256" || // Added in 7.0
200 Name == "cvtps2pd.128" || // Added in 7.0
201 Name == "cvtps2pd.256" || // Added in 7.0
202 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
203 Name == "cvtqq2ps.256" || // Added in 9.0
204 Name == "cvtqq2ps.512" || // Added in 9.0
205 Name == "cvttpd2dq.256" || // Added in 7.0
206 Name == "cvttps2dq.128" || // Added in 7.0
207 Name == "cvttps2dq.256" || // Added in 7.0
208 Name.starts_with("cvtudq2pd.") || // Added in 4.0
209 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
210 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
211 Name == "cvtuqq2ps.256" || // Added in 9.0
212 Name == "cvtuqq2ps.512" || // Added in 9.0
213 Name.starts_with("dbpsadbw.") || // Added in 7.0
214 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
215 Name.starts_with("expand.b") || // Added in 9.0
216 Name.starts_with("expand.d") || // Added in 9.0
217 Name.starts_with("expand.load.") || // Added in 7.0
218 Name.starts_with("expand.p") || // Added in 9.0
219 Name.starts_with("expand.q") || // Added in 9.0
220 Name.starts_with("expand.w") || // Added in 9.0
221 Name.starts_with("fpclass.p") || // Added in 7.0
222 Name.starts_with("insert") || // Added in 4.0
223 Name.starts_with("load.") || // Added in 3.9
224 Name.starts_with("loadu.") || // Added in 3.9
225 Name.starts_with("lzcnt.") || // Added in 5.0
226 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
227 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
228 Name.starts_with("movddup") || // Added in 3.9
229 Name.starts_with("move.s") || // Added in 4.0
230 Name.starts_with("movshdup") || // Added in 3.9
231 Name.starts_with("movsldup") || // Added in 3.9
232 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
233 Name.starts_with("or.") || // Added in 3.9
234 Name.starts_with("pabs.") || // Added in 6.0
235 Name.starts_with("packssdw.") || // Added in 5.0
236 Name.starts_with("packsswb.") || // Added in 5.0
237 Name.starts_with("packusdw.") || // Added in 5.0
238 Name.starts_with("packuswb.") || // Added in 5.0
239 Name.starts_with("padd.") || // Added in 4.0
240 Name.starts_with("padds.") || // Added in 8.0
241 Name.starts_with("paddus.") || // Added in 8.0
242 Name.starts_with("palignr.") || // Added in 3.9
243 Name.starts_with("pand.") || // Added in 3.9
244 Name.starts_with("pandn.") || // Added in 3.9
245 Name.starts_with("pavg") || // Added in 6.0
246 Name.starts_with("pbroadcast") || // Added in 6.0
247 Name.starts_with("pcmpeq.") || // Added in 3.9
248 Name.starts_with("pcmpgt.") || // Added in 3.9
249 Name.starts_with("perm.df.") || // Added in 3.9
250 Name.starts_with("perm.di.") || // Added in 3.9
251 Name.starts_with("permvar.") || // Added in 7.0
252 Name.starts_with("pmaddubs.w.") || // Added in 7.0
253 Name.starts_with("pmaddw.d.") || // Added in 7.0
254 Name.starts_with("pmax") || // Added in 4.0
255 Name.starts_with("pmin") || // Added in 4.0
256 Name == "pmov.qd.256" || // Added in 9.0
257 Name == "pmov.qd.512" || // Added in 9.0
258 Name == "pmov.wb.256" || // Added in 9.0
259 Name == "pmov.wb.512" || // Added in 9.0
260 Name.starts_with("pmovsx") || // Added in 4.0
261 Name.starts_with("pmovzx") || // Added in 4.0
262 Name.starts_with("pmul.dq.") || // Added in 4.0
263 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
264 Name.starts_with("pmulh.w.") || // Added in 7.0
265 Name.starts_with("pmulhu.w.") || // Added in 7.0
266 Name.starts_with("pmull.") || // Added in 4.0
267 Name.starts_with("pmultishift.qb.") || // Added in 8.0
268 Name.starts_with("pmulu.dq.") || // Added in 4.0
269 Name.starts_with("por.") || // Added in 3.9
270 Name.starts_with("prol.") || // Added in 8.0
271 Name.starts_with("prolv.") || // Added in 8.0
272 Name.starts_with("pror.") || // Added in 8.0
273 Name.starts_with("prorv.") || // Added in 8.0
274 Name.starts_with("pshuf.b.") || // Added in 4.0
275 Name.starts_with("pshuf.d.") || // Added in 3.9
276 Name.starts_with("pshufh.w.") || // Added in 3.9
277 Name.starts_with("pshufl.w.") || // Added in 3.9
278 Name.starts_with("psll.d") || // Added in 4.0
279 Name.starts_with("psll.q") || // Added in 4.0
280 Name.starts_with("psll.w") || // Added in 4.0
281 Name.starts_with("pslli") || // Added in 4.0
282 Name.starts_with("psllv") || // Added in 4.0
283 Name.starts_with("psra.d") || // Added in 4.0
284 Name.starts_with("psra.q") || // Added in 4.0
285 Name.starts_with("psra.w") || // Added in 4.0
286 Name.starts_with("psrai") || // Added in 4.0
287 Name.starts_with("psrav") || // Added in 4.0
288 Name.starts_with("psrl.d") || // Added in 4.0
289 Name.starts_with("psrl.q") || // Added in 4.0
290 Name.starts_with("psrl.w") || // Added in 4.0
291 Name.starts_with("psrli") || // Added in 4.0
292 Name.starts_with("psrlv") || // Added in 4.0
293 Name.starts_with("psub.") || // Added in 4.0
294 Name.starts_with("psubs.") || // Added in 8.0
295 Name.starts_with("psubus.") || // Added in 8.0
296 Name.starts_with("pternlog.") || // Added in 7.0
297 Name.starts_with("punpckh") || // Added in 3.9
298 Name.starts_with("punpckl") || // Added in 3.9
299 Name.starts_with("pxor.") || // Added in 3.9
300 Name.starts_with("shuf.f") || // Added in 6.0
301 Name.starts_with("shuf.i") || // Added in 6.0
302 Name.starts_with("shuf.p") || // Added in 4.0
303 Name.starts_with("sqrt.p") || // Added in 7.0
304 Name.starts_with("store.b.") || // Added in 3.9
305 Name.starts_with("store.d.") || // Added in 3.9
306 Name.starts_with("store.p") || // Added in 3.9
307 Name.starts_with("store.q.") || // Added in 3.9
308 Name.starts_with("store.w.") || // Added in 3.9
309 Name == "store.ss" || // Added in 7.0
310 Name.starts_with("storeu.") || // Added in 3.9
311 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
312 Name.starts_with("ucmp.") || // Added in 5.0
313 Name.starts_with("unpckh.") || // Added in 3.9
314 Name.starts_with("unpckl.") || // Added in 3.9
315 Name.starts_with("valign.") || // Added in 4.0
316 Name == "vcvtph2ps.128" || // Added in 11.0
317 Name == "vcvtph2ps.256" || // Added in 11.0
318 Name.starts_with("vextract") || // Added in 4.0
319 Name.starts_with("vfmadd.") || // Added in 7.0
320 Name.starts_with("vfmaddsub.") || // Added in 7.0
321 Name.starts_with("vfnmadd.") || // Added in 7.0
322 Name.starts_with("vfnmsub.") || // Added in 7.0
323 Name.starts_with("vpdpbusd.") || // Added in 7.0
324 Name.starts_with("vpdpbusds.") || // Added in 7.0
325 Name.starts_with("vpdpwssd.") || // Added in 7.0
326 Name.starts_with("vpdpwssds.") || // Added in 7.0
327 Name.starts_with("vpermi2var.") || // Added in 7.0
328 Name.starts_with("vpermil.p") || // Added in 3.9
329 Name.starts_with("vpermilvar.") || // Added in 4.0
330 Name.starts_with("vpermt2var.") || // Added in 7.0
331 Name.starts_with("vpmadd52") || // Added in 7.0
332 Name.starts_with("vpshld.") || // Added in 7.0
333 Name.starts_with("vpshldv.") || // Added in 8.0
334 Name.starts_with("vpshrd.") || // Added in 7.0
335 Name.starts_with("vpshrdv.") || // Added in 8.0
336 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
337 Name.starts_with("xor.")); // Added in 3.9
338
339 if (Name.consume_front("mask3."))
340 // 'avx512.mask3.*'
341 return (Name.starts_with("vfmadd.") || // Added in 7.0
342 Name.starts_with("vfmaddsub.") || // Added in 7.0
343 Name.starts_with("vfmsub.") || // Added in 7.0
344 Name.starts_with("vfmsubadd.") || // Added in 7.0
345 Name.starts_with("vfnmsub.")); // Added in 7.0
346
347 if (Name.consume_front("maskz."))
348 // 'avx512.maskz.*'
349 return (Name.starts_with("pternlog.") || // Added in 7.0
350 Name.starts_with("vfmadd.") || // Added in 7.0
351 Name.starts_with("vfmaddsub.") || // Added in 7.0
352 Name.starts_with("vpdpbusd.") || // Added in 7.0
353 Name.starts_with("vpdpbusds.") || // Added in 7.0
354 Name.starts_with("vpdpwssd.") || // Added in 7.0
355 Name.starts_with("vpdpwssds.") || // Added in 7.0
356 Name.starts_with("vpermt2var.") || // Added in 7.0
357 Name.starts_with("vpmadd52") || // Added in 7.0
358 Name.starts_with("vpshldv.") || // Added in 8.0
359 Name.starts_with("vpshrdv.")); // Added in 8.0
360
361 // 'avx512.*'
362 return (Name == "movntdqa" || // Added in 5.0
363 Name == "pmul.dq.512" || // Added in 7.0
364 Name == "pmulu.dq.512" || // Added in 7.0
365 Name.starts_with("broadcastm") || // Added in 6.0
366 Name.starts_with("cmp.p") || // Added in 12.0
367 Name.starts_with("cvtb2mask.") || // Added in 7.0
368 Name.starts_with("cvtd2mask.") || // Added in 7.0
369 Name.starts_with("cvtmask2") || // Added in 5.0
370 Name.starts_with("cvtq2mask.") || // Added in 7.0
371 Name == "cvtusi2sd" || // Added in 7.0
372 Name.starts_with("cvtw2mask.") || // Added in 7.0
373 Name == "kand.w" || // Added in 7.0
374 Name == "kandn.w" || // Added in 7.0
375 Name == "knot.w" || // Added in 7.0
376 Name == "kor.w" || // Added in 7.0
377 Name == "kortestc.w" || // Added in 7.0
378 Name == "kortestz.w" || // Added in 7.0
379 Name.starts_with("kunpck") || // added in 6.0
380 Name == "kxnor.w" || // Added in 7.0
381 Name == "kxor.w" || // Added in 7.0
382 Name.starts_with("padds.") || // Added in 8.0
383 Name.starts_with("pbroadcast") || // Added in 3.9
384 Name.starts_with("prol") || // Added in 8.0
385 Name.starts_with("pror") || // Added in 8.0
386 Name.starts_with("psll.dq") || // Added in 3.9
387 Name.starts_with("psrl.dq") || // Added in 3.9
388 Name.starts_with("psubs.") || // Added in 8.0
389 Name.starts_with("ptestm") || // Added in 6.0
390 Name.starts_with("ptestnm") || // Added in 6.0
391 Name.starts_with("storent.") || // Added in 3.9
392 Name.starts_with("vbroadcast.s") || // Added in 7.0
393 Name.starts_with("vpshld.") || // Added in 8.0
394 Name.starts_with("vpshrd.")); // Added in 8.0
395 }
396
397 if (Name.consume_front("fma."))
398 return (Name.starts_with("vfmadd.") || // Added in 7.0
399 Name.starts_with("vfmsub.") || // Added in 7.0
400 Name.starts_with("vfmsubadd.") || // Added in 7.0
401 Name.starts_with("vfnmadd.") || // Added in 7.0
402 Name.starts_with("vfnmsub.")); // Added in 7.0
403
404 if (Name.consume_front("fma4."))
405 return Name.starts_with("vfmadd.s"); // Added in 7.0
406
407 if (Name.consume_front("sse."))
408 return (Name == "add.ss" || // Added in 4.0
409 Name == "cvtsi2ss" || // Added in 7.0
410 Name == "cvtsi642ss" || // Added in 7.0
411 Name == "div.ss" || // Added in 4.0
412 Name == "mul.ss" || // Added in 4.0
413 Name.starts_with("sqrt.p") || // Added in 7.0
414 Name == "sqrt.ss" || // Added in 7.0
415 Name.starts_with("storeu.") || // Added in 3.9
416 Name == "sub.ss"); // Added in 4.0
417
418 if (Name.consume_front("sse2."))
419 return (Name == "add.sd" || // Added in 4.0
420 Name == "cvtdq2pd" || // Added in 3.9
421 Name == "cvtdq2ps" || // Added in 7.0
422 Name == "cvtps2pd" || // Added in 3.9
423 Name == "cvtsi2sd" || // Added in 7.0
424 Name == "cvtsi642sd" || // Added in 7.0
425 Name == "cvtss2sd" || // Added in 7.0
426 Name == "div.sd" || // Added in 4.0
427 Name == "mul.sd" || // Added in 4.0
428 Name.starts_with("padds.") || // Added in 8.0
429 Name.starts_with("paddus.") || // Added in 8.0
430 Name.starts_with("pcmpeq.") || // Added in 3.1
431 Name.starts_with("pcmpgt.") || // Added in 3.1
432 Name == "pmaxs.w" || // Added in 3.9
433 Name == "pmaxu.b" || // Added in 3.9
434 Name == "pmins.w" || // Added in 3.9
435 Name == "pminu.b" || // Added in 3.9
436 Name == "pmulu.dq" || // Added in 7.0
437 Name.starts_with("pshuf") || // Added in 3.9
438 Name.starts_with("psll.dq") || // Added in 3.7
439 Name.starts_with("psrl.dq") || // Added in 3.7
440 Name.starts_with("psubs.") || // Added in 8.0
441 Name.starts_with("psubus.") || // Added in 8.0
442 Name.starts_with("sqrt.p") || // Added in 7.0
443 Name == "sqrt.sd" || // Added in 7.0
444 Name == "storel.dq" || // Added in 3.9
445 Name.starts_with("storeu.") || // Added in 3.9
446 Name == "sub.sd"); // Added in 4.0
447
448 if (Name.consume_front("sse41."))
449 return (Name.starts_with("blendp") || // Added in 3.7
450 Name == "movntdqa" || // Added in 5.0
451 Name == "pblendw" || // Added in 3.7
452 Name == "pmaxsb" || // Added in 3.9
453 Name == "pmaxsd" || // Added in 3.9
454 Name == "pmaxud" || // Added in 3.9
455 Name == "pmaxuw" || // Added in 3.9
456 Name == "pminsb" || // Added in 3.9
457 Name == "pminsd" || // Added in 3.9
458 Name == "pminud" || // Added in 3.9
459 Name == "pminuw" || // Added in 3.9
460 Name.starts_with("pmovsx") || // Added in 3.8
461 Name.starts_with("pmovzx") || // Added in 3.9
462 Name == "pmuldq"); // Added in 7.0
463
464 if (Name.consume_front("sse42."))
465 return Name == "crc32.64.8"; // Added in 3.4
466
467 if (Name.consume_front("sse4a."))
468 return Name.starts_with("movnt."); // Added in 3.9
469
470 if (Name.consume_front("ssse3."))
471 return (Name == "pabs.b.128" || // Added in 6.0
472 Name == "pabs.d.128" || // Added in 6.0
473 Name == "pabs.w.128"); // Added in 6.0
474
475 if (Name.consume_front("xop."))
476 return (Name == "vpcmov" || // Added in 3.8
477 Name == "vpcmov.256" || // Added in 5.0
478 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
479 Name.starts_with("vprot")); // Added in 8.0
480
481 return (Name == "addcarry.u32" || // Added in 8.0
482 Name == "addcarry.u64" || // Added in 8.0
483 Name == "addcarryx.u32" || // Added in 8.0
484 Name == "addcarryx.u64" || // Added in 8.0
485 Name == "subborrow.u32" || // Added in 8.0
486 Name == "subborrow.u64" || // Added in 8.0
487 Name.starts_with("vcvtph2ps.")); // Added in 11.0
488}
489
491 Function *&NewFn) {
492 // Only handle intrinsics that start with "x86.".
493 if (!Name.consume_front("x86."))
494 return false;
495
497 NewFn = nullptr;
498 return true;
499 }
500
501 if (Name == "rdtscp") { // Added in 8.0
502 // If this intrinsic has 0 operands, it's the new version.
503 if (F->getFunctionType()->getNumParams() == 0)
504 return false;
505
506 rename(F);
507 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
508 Intrinsic::x86_rdtscp);
509 return true;
510 }
511
513
514 // SSE4.1 ptest functions may have an old signature.
515 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
517 .Case("c", Intrinsic::x86_sse41_ptestc)
518 .Case("z", Intrinsic::x86_sse41_ptestz)
519 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
522 return upgradePTESTIntrinsic(F, ID, NewFn);
523
524 return false;
525 }
526
527 // Several blend and other instructions with masks used the wrong number of
528 // bits.
529
530 // Added in 3.6
532 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
533 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
534 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
535 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
536 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
537 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
540 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
541
542 if (Name.consume_front("avx512.mask.cmp.")) {
543 // Added in 7.0
545 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
546 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
547 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
548 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
549 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
550 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
553 return upgradeX86MaskedFPCompare(F, ID, NewFn);
554 return false; // No other 'x86.avx523.mask.cmp.*'.
555 }
556
557 if (Name.consume_front("avx512bf16.")) {
558 // Added in 9.0
560 .Case("cvtne2ps2bf16.128",
561 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
562 .Case("cvtne2ps2bf16.256",
563 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
564 .Case("cvtne2ps2bf16.512",
565 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
566 .Case("mask.cvtneps2bf16.128",
567 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
568 .Case("cvtneps2bf16.256",
569 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
570 .Case("cvtneps2bf16.512",
571 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
574 return upgradeX86BF16Intrinsic(F, ID, NewFn);
575
576 // Added in 9.0
578 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
579 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
580 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
583 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
584 return false; // No other 'x86.avx512bf16.*'.
585 }
586
587 if (Name.consume_front("xop.")) {
589 if (Name.starts_with("vpermil2")) { // Added in 3.9
590 // Upgrade any XOP PERMIL2 index operand still using a float/double
591 // vector.
592 auto Idx = F->getFunctionType()->getParamType(2);
593 if (Idx->isFPOrFPVectorTy()) {
594 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
595 unsigned EltSize = Idx->getScalarSizeInBits();
596 if (EltSize == 64 && IdxSize == 128)
597 ID = Intrinsic::x86_xop_vpermil2pd;
598 else if (EltSize == 32 && IdxSize == 128)
599 ID = Intrinsic::x86_xop_vpermil2ps;
600 else if (EltSize == 64 && IdxSize == 256)
601 ID = Intrinsic::x86_xop_vpermil2pd_256;
602 else
603 ID = Intrinsic::x86_xop_vpermil2ps_256;
604 }
605 } else if (F->arg_size() == 2)
606 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
608 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
609 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
611
613 rename(F);
614 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
615 return true;
616 }
617 return false; // No other 'x86.xop.*'
618 }
619
620 if (Name == "seh.recoverfp") {
621 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
622 Intrinsic::eh_recoverfp);
623 return true;
624 }
625
626 return false;
627}
628
629// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
630// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
633 Function *&NewFn) {
634 if (Name.starts_with("rbit")) {
635 // '(arm|aarch64).rbit'.
637 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
638 return true;
639 }
640
641 if (Name == "thread.pointer") {
642 // '(arm|aarch64).thread.pointer'.
643 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
644 Intrinsic::thread_pointer);
645 return true;
646 }
647
648 bool Neon = Name.consume_front("neon.");
649 if (Neon) {
650 // '(arm|aarch64).neon.*'.
651 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
652 // v16i8 respectively.
653 if (Name.consume_front("bfdot.")) {
654 // (arm|aarch64).neon.bfdot.*'.
657 .Cases("v2f32.v8i8", "v4f32.v16i8",
658 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
659 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
662 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
663 assert((OperandWidth == 64 || OperandWidth == 128) &&
664 "Unexpected operand width");
665 LLVMContext &Ctx = F->getParent()->getContext();
666 std::array<Type *, 2> Tys{
667 {F->getReturnType(),
668 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
669 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
670 return true;
671 }
672 return false; // No other '(arm|aarch64).neon.bfdot.*'.
673 }
674
675 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
676 // anymore and accept v8bf16 instead of v16i8.
677 if (Name.consume_front("bfm")) {
678 // (arm|aarch64).neon.bfm*'.
679 if (Name.consume_back(".v4f32.v16i8")) {
680 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
683 .Case("mla",
684 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
685 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
686 .Case("lalb",
687 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
688 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
689 .Case("lalt",
690 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
691 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
694 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
695 return true;
696 }
697 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
698 }
699 return false; // No other '(arm|aarch64).neon.bfm*.
700 }
701 // Continue on to Aarch64 Neon or Arm Neon.
702 }
703 // Continue on to Arm or Aarch64.
704
705 if (IsArm) {
706 // 'arm.*'.
707 if (Neon) {
708 // 'arm.neon.*'.
710 .StartsWith("vclz.", Intrinsic::ctlz)
711 .StartsWith("vcnt.", Intrinsic::ctpop)
712 .StartsWith("vqadds.", Intrinsic::sadd_sat)
713 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
714 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
715 .StartsWith("vqsubu.", Intrinsic::usub_sat)
718 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
719 F->arg_begin()->getType());
720 return true;
721 }
722
723 if (Name.consume_front("vst")) {
724 // 'arm.neon.vst*'.
725 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
727 if (vstRegex.match(Name, &Groups)) {
728 static const Intrinsic::ID StoreInts[] = {
729 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
730 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
731
732 static const Intrinsic::ID StoreLaneInts[] = {
733 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
734 Intrinsic::arm_neon_vst4lane};
735
736 auto fArgs = F->getFunctionType()->params();
737 Type *Tys[] = {fArgs[0], fArgs[1]};
738 if (Groups[1].size() == 1)
740 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
741 else
743 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
744 return true;
745 }
746 return false; // No other 'arm.neon.vst*'.
747 }
748
749 return false; // No other 'arm.neon.*'.
750 }
751
752 if (Name.consume_front("mve.")) {
753 // 'arm.mve.*'.
754 if (Name == "vctp64") {
755 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
756 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
757 // the function and deal with it below in UpgradeIntrinsicCall.
758 rename(F);
759 return true;
760 }
761 return false; // Not 'arm.mve.vctp64'.
762 }
763
764 // These too are changed to accept a v2i1 instead of the old v4i1.
765 if (Name.consume_back(".v4i1")) {
766 // 'arm.mve.*.v4i1'.
767 if (Name.consume_back(".predicated.v2i64.v4i32"))
768 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
769 return Name == "mull.int" || Name == "vqdmull";
770
771 if (Name.consume_back(".v2i64")) {
772 // 'arm.mve.*.v2i64.v4i1'
773 bool IsGather = Name.consume_front("vldr.gather.");
774 if (IsGather || Name.consume_front("vstr.scatter.")) {
775 if (Name.consume_front("base.")) {
776 // Optional 'wb.' prefix.
777 Name.consume_front("wb.");
778 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
779 // predicated.v2i64.v2i64.v4i1'.
780 return Name == "predicated.v2i64";
781 }
782
783 if (Name.consume_front("offset.predicated."))
784 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
785 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
786
787 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
788 return false;
789 }
790
791 return false; // No other 'arm.mve.*.v2i64.v4i1'.
792 }
793 return false; // No other 'arm.mve.*.v4i1'.
794 }
795 return false; // No other 'arm.mve.*'.
796 }
797
798 if (Name.consume_front("cde.vcx")) {
799 // 'arm.cde.vcx*'.
800 if (Name.consume_back(".predicated.v2i64.v4i1"))
801 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
802 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
803 Name == "3q" || Name == "3qa";
804
805 return false; // No other 'arm.cde.vcx*'.
806 }
807 } else {
808 // 'aarch64.*'.
809 if (Neon) {
810 // 'aarch64.neon.*'.
812 .StartsWith("frintn", Intrinsic::roundeven)
813 .StartsWith("rbit", Intrinsic::bitreverse)
816 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
817 F->arg_begin()->getType());
818 return true;
819 }
820
821 if (Name.starts_with("addp")) {
822 // 'aarch64.neon.addp*'.
823 if (F->arg_size() != 2)
824 return false; // Invalid IR.
825 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
826 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
828 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
829 return true;
830 }
831 }
832
833 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
834 if (Name.starts_with("bfcvt")) {
835 NewFn = nullptr;
836 return true;
837 }
838
839 return false; // No other 'aarch64.neon.*'.
840 }
841 if (Name.consume_front("sve.")) {
842 // 'aarch64.sve.*'.
843 if (Name.consume_front("bf")) {
844 if (Name.consume_back(".lane")) {
845 // 'aarch64.sve.bf*.lane'.
848 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
849 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
850 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
853 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
854 return true;
855 }
856 return false; // No other 'aarch64.sve.bf*.lane'.
857 }
858 return false; // No other 'aarch64.sve.bf*'.
859 }
860
861 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
862 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
863 NewFn = nullptr;
864 return true;
865 }
866
867 if (Name.consume_front("addqv")) {
868 // 'aarch64.sve.addqv'.
869 if (!F->getReturnType()->isFPOrFPVectorTy())
870 return false;
871
872 auto Args = F->getFunctionType()->params();
873 Type *Tys[] = {F->getReturnType(), Args[1]};
875 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
876 return true;
877 }
878
879 if (Name.consume_front("ld")) {
880 // 'aarch64.sve.ld*'.
881 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
882 if (LdRegex.match(Name)) {
883 Type *ScalarTy =
884 cast<VectorType>(F->getReturnType())->getElementType();
885 ElementCount EC =
886 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
887 Type *Ty = VectorType::get(ScalarTy, EC);
888 static const Intrinsic::ID LoadIDs[] = {
889 Intrinsic::aarch64_sve_ld2_sret,
890 Intrinsic::aarch64_sve_ld3_sret,
891 Intrinsic::aarch64_sve_ld4_sret,
892 };
893 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
894 LoadIDs[Name[0] - '2'], Ty);
895 return true;
896 }
897 return false; // No other 'aarch64.sve.ld*'.
898 }
899
900 if (Name.consume_front("tuple.")) {
901 // 'aarch64.sve.tuple.*'.
902 if (Name.starts_with("get")) {
903 // 'aarch64.sve.tuple.get*'.
904 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
906 F->getParent(), Intrinsic::vector_extract, Tys);
907 return true;
908 }
909
910 if (Name.starts_with("set")) {
911 // 'aarch64.sve.tuple.set*'.
912 auto Args = F->getFunctionType()->params();
913 Type *Tys[] = {Args[0], Args[2], Args[1]};
915 F->getParent(), Intrinsic::vector_insert, Tys);
916 return true;
917 }
918
919 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
920 if (CreateTupleRegex.match(Name)) {
921 // 'aarch64.sve.tuple.create*'.
922 auto Args = F->getFunctionType()->params();
923 Type *Tys[] = {F->getReturnType(), Args[1]};
925 F->getParent(), Intrinsic::vector_insert, Tys);
926 return true;
927 }
928 return false; // No other 'aarch64.sve.tuple.*'.
929 }
930 return false; // No other 'aarch64.sve.*'.
931 }
932 }
933 return false; // No other 'arm.*', 'aarch64.*'.
934}
935
937 if (Name.consume_front("abs."))
939 .Case("bf16", Intrinsic::nvvm_abs_bf16)
940 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
942
943 if (Name.consume_front("fma.rn."))
945 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
946 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
947 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
948 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
949 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
950 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
951 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
952 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
953 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
954 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
955 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
956 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
958
959 if (Name.consume_front("fmax."))
961 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
962 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
963 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
964 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
965 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
966 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
967 .Case("ftz.nan.xorsign.abs.bf16",
968 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
969 .Case("ftz.nan.xorsign.abs.bf16x2",
970 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
971 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
972 .Case("ftz.xorsign.abs.bf16x2",
973 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
974 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
975 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
976 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
977 .Case("nan.xorsign.abs.bf16x2",
978 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
979 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
980 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
982
983 if (Name.consume_front("fmin."))
985 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
986 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
987 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
988 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
989 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
990 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
991 .Case("ftz.nan.xorsign.abs.bf16",
992 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
993 .Case("ftz.nan.xorsign.abs.bf16x2",
994 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
995 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
996 .Case("ftz.xorsign.abs.bf16x2",
997 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
998 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
999 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1000 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1001 .Case("nan.xorsign.abs.bf16x2",
1002 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1003 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1004 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1006
1007 if (Name.consume_front("neg."))
1009 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1010 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1012
1014}
1015
1017 bool CanUpgradeDebugIntrinsicsToRecords) {
1018 assert(F && "Illegal to upgrade a non-existent Function.");
1019
1020 StringRef Name = F->getName();
1021
1022 // Quickly eliminate it, if it's not a candidate.
1023 if (!Name.consume_front("llvm.") || Name.empty())
1024 return false;
1025
1026 switch (Name[0]) {
1027 default: break;
1028 case 'a': {
1029 bool IsArm = Name.consume_front("arm.");
1030 if (IsArm || Name.consume_front("aarch64.")) {
1031 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1032 return true;
1033 break;
1034 }
1035
1036 if (Name.consume_front("amdgcn.")) {
1037 if (Name == "alignbit") {
1038 // Target specific intrinsic became redundant
1040 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1041 return true;
1042 }
1043
1044 if (Name.consume_front("atomic.")) {
1045 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1046 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1047 // there's no new declaration.
1048 NewFn = nullptr;
1049 return true;
1050 }
1051 break; // No other 'amdgcn.atomic.*'
1052 }
1053
1054 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1055 Name.consume_front("flat.atomic.")) {
1056 if (Name.starts_with("fadd") ||
1057 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1058 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1059 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1060 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1061 // declaration.
1062 NewFn = nullptr;
1063 return true;
1064 }
1065 }
1066
1067 if (Name.starts_with("ldexp.")) {
1068 // Target specific intrinsic became redundant
1070 F->getParent(), Intrinsic::ldexp,
1071 {F->getReturnType(), F->getArg(1)->getType()});
1072 return true;
1073 }
1074 break; // No other 'amdgcn.*'
1075 }
1076
1077 break;
1078 }
1079 case 'c': {
1080 if (F->arg_size() == 1) {
1082 .StartsWith("ctlz.", Intrinsic::ctlz)
1083 .StartsWith("cttz.", Intrinsic::cttz)
1086 rename(F);
1087 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1088 F->arg_begin()->getType());
1089 return true;
1090 }
1091 }
1092
1093 if (F->arg_size() == 2 && Name == "coro.end") {
1094 rename(F);
1095 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1096 Intrinsic::coro_end);
1097 return true;
1098 }
1099
1100 break;
1101 }
1102 case 'd':
1103 if (Name.consume_front("dbg.")) {
1104 // Mark debug intrinsics for upgrade to new debug format.
1105 if (CanUpgradeDebugIntrinsicsToRecords &&
1106 F->getParent()->IsNewDbgInfoFormat) {
1107 if (Name == "addr" || Name == "value" || Name == "assign" ||
1108 Name == "declare" || Name == "label") {
1109 // There's no function to replace these with.
1110 NewFn = nullptr;
1111 // But we do want these to get upgraded.
1112 return true;
1113 }
1114 }
1115 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1116 // converted to DbgVariableRecords later.
1117 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1118 rename(F);
1119 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1120 Intrinsic::dbg_value);
1121 return true;
1122 }
1123 break; // No other 'dbg.*'.
1124 }
1125 break;
1126 case 'e':
1127 if (Name.consume_front("experimental.vector.")) {
1130 // Skip over extract.last.active, otherwise it will be 'upgraded'
1131 // to a regular vector extract which is a different operation.
1132 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1133 .StartsWith("extract.", Intrinsic::vector_extract)
1134 .StartsWith("insert.", Intrinsic::vector_insert)
1135 .StartsWith("splice.", Intrinsic::vector_splice)
1136 .StartsWith("reverse.", Intrinsic::vector_reverse)
1137 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1138 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1141 const auto *FT = F->getFunctionType();
1143 if (ID == Intrinsic::vector_extract ||
1144 ID == Intrinsic::vector_interleave2)
1145 // Extracting overloads the return type.
1146 Tys.push_back(FT->getReturnType());
1147 if (ID != Intrinsic::vector_interleave2)
1148 Tys.push_back(FT->getParamType(0));
1149 if (ID == Intrinsic::vector_insert)
1150 // Inserting overloads the inserted type.
1151 Tys.push_back(FT->getParamType(1));
1152 rename(F);
1153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1154 return true;
1155 }
1156
1157 if (Name.consume_front("reduce.")) {
1159 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1160 if (R.match(Name, &Groups))
1162 .Case("add", Intrinsic::vector_reduce_add)
1163 .Case("mul", Intrinsic::vector_reduce_mul)
1164 .Case("and", Intrinsic::vector_reduce_and)
1165 .Case("or", Intrinsic::vector_reduce_or)
1166 .Case("xor", Intrinsic::vector_reduce_xor)
1167 .Case("smax", Intrinsic::vector_reduce_smax)
1168 .Case("smin", Intrinsic::vector_reduce_smin)
1169 .Case("umax", Intrinsic::vector_reduce_umax)
1170 .Case("umin", Intrinsic::vector_reduce_umin)
1171 .Case("fmax", Intrinsic::vector_reduce_fmax)
1172 .Case("fmin", Intrinsic::vector_reduce_fmin)
1174
1175 bool V2 = false;
1177 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1178 Groups.clear();
1179 V2 = true;
1180 if (R2.match(Name, &Groups))
1182 .Case("fadd", Intrinsic::vector_reduce_fadd)
1183 .Case("fmul", Intrinsic::vector_reduce_fmul)
1185 }
1187 rename(F);
1188 auto Args = F->getFunctionType()->params();
1189 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1190 {Args[V2 ? 1 : 0]});
1191 return true;
1192 }
1193 break; // No other 'expermental.vector.reduce.*'.
1194 }
1195 break; // No other 'experimental.vector.*'.
1196 }
1197 if (Name.consume_front("experimental.stepvector.")) {
1198 Intrinsic::ID ID = Intrinsic::stepvector;
1199 rename(F);
1201 F->getParent(), ID, F->getFunctionType()->getReturnType());
1202 return true;
1203 }
1204 break; // No other 'e*'.
1205 case 'f':
1206 if (Name.starts_with("flt.rounds")) {
1207 rename(F);
1208 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1209 Intrinsic::get_rounding);
1210 return true;
1211 }
1212 break;
1213 case 'i':
1214 if (Name.starts_with("invariant.group.barrier")) {
1215 // Rename invariant.group.barrier to launder.invariant.group
1216 auto Args = F->getFunctionType()->params();
1217 Type* ObjectPtr[1] = {Args[0]};
1218 rename(F);
1220 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1221 return true;
1222 }
1223 break;
1224 case 'm': {
1225 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1226 // alignment parameter to embedding the alignment as an attribute of
1227 // the pointer args.
1228 if (unsigned ID = StringSwitch<unsigned>(Name)
1229 .StartsWith("memcpy.", Intrinsic::memcpy)
1230 .StartsWith("memmove.", Intrinsic::memmove)
1231 .Default(0)) {
1232 if (F->arg_size() == 5) {
1233 rename(F);
1234 // Get the types of dest, src, and len
1235 ArrayRef<Type *> ParamTypes =
1236 F->getFunctionType()->params().slice(0, 3);
1237 NewFn =
1238 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1239 return true;
1240 }
1241 }
1242 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1243 rename(F);
1244 // Get the types of dest, and len
1245 const auto *FT = F->getFunctionType();
1246 Type *ParamTypes[2] = {
1247 FT->getParamType(0), // Dest
1248 FT->getParamType(2) // len
1249 };
1250 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1251 Intrinsic::memset, ParamTypes);
1252 return true;
1253 }
1254 break;
1255 }
1256 case 'n': {
1257 if (Name.consume_front("nvvm.")) {
1258 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1259 if (F->arg_size() == 1) {
1260 Intrinsic::ID IID =
1262 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1263 .Case("clz.i", Intrinsic::ctlz)
1264 .Case("popc.i", Intrinsic::ctpop)
1266 if (IID != Intrinsic::not_intrinsic) {
1267 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1268 {F->getReturnType()});
1269 return true;
1270 }
1271 }
1272
1273 // Check for nvvm intrinsics that need a return type adjustment.
1274 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1276 if (IID != Intrinsic::not_intrinsic) {
1277 NewFn = nullptr;
1278 return true;
1279 }
1280 }
1281
1282 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1283 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1284 //
1285 // TODO: We could add lohi.i2d.
1286 bool Expand = false;
1287 if (Name.consume_front("abs."))
1288 // nvvm.abs.{i,ii}
1289 Expand = Name == "i" || Name == "ll";
1290 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1291 Expand = true;
1292 else if (Name.consume_front("max.") || Name.consume_front("min."))
1293 // nvvm.{min,max}.{i,ii,ui,ull}
1294 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1295 Name == "ui" || Name == "ull";
1296 else if (Name.consume_front("atomic.load.add."))
1297 // nvvm.atomic.load.add.{f32.p,f64.p}
1298 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1299 else if (Name.consume_front("bitcast."))
1300 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1301 Expand =
1302 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1303 else if (Name.consume_front("rotate."))
1304 // nvvm.rotate.{b32,b64,right.b64}
1305 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1306 else if (Name.consume_front("ptr.gen.to."))
1307 // nvvm.ptr.gen.to.{local,shared,global,constant}
1308 Expand = Name.starts_with("local") || Name.starts_with("shared") ||
1309 Name.starts_with("global") || Name.starts_with("constant");
1310 else if (Name.consume_front("ptr."))
1311 // nvvm.ptr.{local,shared,global,constant}.to.gen
1312 Expand =
1313 (Name.consume_front("local") || Name.consume_front("shared") ||
1314 Name.consume_front("global") || Name.consume_front("constant")) &&
1315 Name.starts_with(".to.gen");
1316 else if (Name.consume_front("ldg.global."))
1317 // nvvm.ldg.global.{i,p,f}
1318 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1319 Name.starts_with("p."));
1320 else
1321 Expand = false;
1322
1323 if (Expand) {
1324 NewFn = nullptr;
1325 return true;
1326 }
1327 break; // No other 'nvvm.*'.
1328 }
1329 break;
1330 }
1331 case 'o':
1332 // We only need to change the name to match the mangling including the
1333 // address space.
1334 if (Name.starts_with("objectsize.")) {
1335 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1336 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1337 F->getName() !=
1338 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1339 rename(F);
1340 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1341 Intrinsic::objectsize, Tys);
1342 return true;
1343 }
1344 }
1345 break;
1346
1347 case 'p':
1348 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1349 rename(F);
1351 F->getParent(), Intrinsic::ptr_annotation,
1352 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1353 return true;
1354 }
1355 break;
1356
1357 case 'r': {
1358 if (Name.consume_front("riscv.")) {
1361 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1362 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1363 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1364 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1367 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1368 rename(F);
1369 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1370 return true;
1371 }
1372 break; // No other applicable upgrades.
1373 }
1374
1376 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1377 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1380 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1381 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1382 rename(F);
1383 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1384 return true;
1385 }
1386 break; // No other applicable upgrades.
1387 }
1388
1390 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1391 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1392 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1393 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1394 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1395 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1398 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1399 rename(F);
1400 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1401 return true;
1402 }
1403 break; // No other applicable upgrades.
1404 }
1405 break; // No other 'riscv.*' intrinsics
1406 }
1407 } break;
1408
1409 case 's':
1410 if (Name == "stackprotectorcheck") {
1411 NewFn = nullptr;
1412 return true;
1413 }
1414 break;
1415
1416 case 'v': {
1417 if (Name == "var.annotation" && F->arg_size() == 4) {
1418 rename(F);
1420 F->getParent(), Intrinsic::var_annotation,
1421 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1422 return true;
1423 }
1424 break;
1425 }
1426
1427 case 'w':
1428 if (Name.consume_front("wasm.")) {
1431 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1432 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1433 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1436 rename(F);
1437 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1438 F->getReturnType());
1439 return true;
1440 }
1441
1442 if (Name.consume_front("dot.i8x16.i7x16.")) {
1444 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1445 .Case("add.signed",
1446 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1449 rename(F);
1450 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1451 return true;
1452 }
1453 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1454 }
1455 break; // No other 'wasm.*'.
1456 }
1457 break;
1458
1459 case 'x':
1460 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1461 return true;
1462 }
1463
1464 auto *ST = dyn_cast<StructType>(F->getReturnType());
1465 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1466 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1467 // Replace return type with literal non-packed struct. Only do this for
1468 // intrinsics declared to return a struct, not for intrinsics with
1469 // overloaded return type, in which case the exact struct type will be
1470 // mangled into the name.
1473 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1474 auto *FT = F->getFunctionType();
1475 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1476 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1477 std::string Name = F->getName().str();
1478 rename(F);
1479 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1480 Name, F->getParent());
1481
1482 // The new function may also need remangling.
1483 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1484 NewFn = *Result;
1485 return true;
1486 }
1487 }
1488
1489 // Remangle our intrinsic since we upgrade the mangling
1491 if (Result != std::nullopt) {
1492 NewFn = *Result;
1493 return true;
1494 }
1495
1496 // This may not belong here. This function is effectively being overloaded
1497 // to both detect an intrinsic which needs upgrading, and to provide the
1498 // upgraded form of the intrinsic. We should perhaps have two separate
1499 // functions for this.
1500 return false;
1501}
1502
1504 bool CanUpgradeDebugIntrinsicsToRecords) {
1505 NewFn = nullptr;
1506 bool Upgraded =
1507 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1508 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1509
1510 // Upgrade intrinsic attributes. This does not change the function.
1511 if (NewFn)
1512 F = NewFn;
1513 if (Intrinsic::ID id = F->getIntrinsicID())
1514 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1515 return Upgraded;
1516}
1517
1519 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1520 GV->getName() == "llvm.global_dtors")) ||
1521 !GV->hasInitializer())
1522 return nullptr;
1523 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1524 if (!ATy)
1525 return nullptr;
1526 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1527 if (!STy || STy->getNumElements() != 2)
1528 return nullptr;
1529
1530 LLVMContext &C = GV->getContext();
1531 IRBuilder<> IRB(C);
1532 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1533 IRB.getPtrTy());
1534 Constant *Init = GV->getInitializer();
1535 unsigned N = Init->getNumOperands();
1536 std::vector<Constant *> NewCtors(N);
1537 for (unsigned i = 0; i != N; ++i) {
1538 auto Ctor = cast<Constant>(Init->getOperand(i));
1539 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1540 Ctor->getAggregateElement(1),
1542 }
1543 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1544
1545 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1546 NewInit, GV->getName());
1547}
1548
1549// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1550// to byte shuffles.
1552 unsigned Shift) {
1553 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1554 unsigned NumElts = ResultTy->getNumElements() * 8;
1555
1556 // Bitcast from a 64-bit element type to a byte element type.
1557 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1558 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1559
1560 // We'll be shuffling in zeroes.
1561 Value *Res = Constant::getNullValue(VecTy);
1562
1563 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1564 // we'll just return the zero vector.
1565 if (Shift < 16) {
1566 int Idxs[64];
1567 // 256/512-bit version is split into 2/4 16-byte lanes.
1568 for (unsigned l = 0; l != NumElts; l += 16)
1569 for (unsigned i = 0; i != 16; ++i) {
1570 unsigned Idx = NumElts + i - Shift;
1571 if (Idx < NumElts)
1572 Idx -= NumElts - 16; // end of lane, switch operand.
1573 Idxs[l + i] = Idx + l;
1574 }
1575
1576 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1577 }
1578
1579 // Bitcast back to a 64-bit element type.
1580 return Builder.CreateBitCast(Res, ResultTy, "cast");
1581}
1582
1583// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1584// to byte shuffles.
1586 unsigned Shift) {
1587 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1588 unsigned NumElts = ResultTy->getNumElements() * 8;
1589
1590 // Bitcast from a 64-bit element type to a byte element type.
1591 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1592 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1593
1594 // We'll be shuffling in zeroes.
1595 Value *Res = Constant::getNullValue(VecTy);
1596
1597 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1598 // we'll just return the zero vector.
1599 if (Shift < 16) {
1600 int Idxs[64];
1601 // 256/512-bit version is split into 2/4 16-byte lanes.
1602 for (unsigned l = 0; l != NumElts; l += 16)
1603 for (unsigned i = 0; i != 16; ++i) {
1604 unsigned Idx = i + Shift;
1605 if (Idx >= 16)
1606 Idx += NumElts - 16; // end of lane, switch operand.
1607 Idxs[l + i] = Idx + l;
1608 }
1609
1610 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1611 }
1612
1613 // Bitcast back to a 64-bit element type.
1614 return Builder.CreateBitCast(Res, ResultTy, "cast");
1615}
1616
1617static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1618 unsigned NumElts) {
1619 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1621 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1622 Mask = Builder.CreateBitCast(Mask, MaskTy);
1623
1624 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1625 // i8 and we need to extract down to the right number of elements.
1626 if (NumElts <= 4) {
1627 int Indices[4];
1628 for (unsigned i = 0; i != NumElts; ++i)
1629 Indices[i] = i;
1630 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1631 "extract");
1632 }
1633
1634 return Mask;
1635}
1636
1637static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1638 Value *Op1) {
1639 // If the mask is all ones just emit the first operation.
1640 if (const auto *C = dyn_cast<Constant>(Mask))
1641 if (C->isAllOnesValue())
1642 return Op0;
1643
1644 Mask = getX86MaskVec(Builder, Mask,
1645 cast<FixedVectorType>(Op0->getType())->getNumElements());
1646 return Builder.CreateSelect(Mask, Op0, Op1);
1647}
1648
1649static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1650 Value *Op1) {
1651 // If the mask is all ones just emit the first operation.
1652 if (const auto *C = dyn_cast<Constant>(Mask))
1653 if (C->isAllOnesValue())
1654 return Op0;
1655
1656 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1657 Mask->getType()->getIntegerBitWidth());
1658 Mask = Builder.CreateBitCast(Mask, MaskTy);
1659 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1660 return Builder.CreateSelect(Mask, Op0, Op1);
1661}
1662
1663// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1664// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1665// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1667 Value *Op1, Value *Shift,
1668 Value *Passthru, Value *Mask,
1669 bool IsVALIGN) {
1670 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1671
1672 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1673 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1674 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1675 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1676
1677 // Mask the immediate for VALIGN.
1678 if (IsVALIGN)
1679 ShiftVal &= (NumElts - 1);
1680
1681 // If palignr is shifting the pair of vectors more than the size of two
1682 // lanes, emit zero.
1683 if (ShiftVal >= 32)
1685
1686 // If palignr is shifting the pair of input vectors more than one lane,
1687 // but less than two lanes, convert to shifting in zeroes.
1688 if (ShiftVal > 16) {
1689 ShiftVal -= 16;
1690 Op1 = Op0;
1692 }
1693
1694 int Indices[64];
1695 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1696 for (unsigned l = 0; l < NumElts; l += 16) {
1697 for (unsigned i = 0; i != 16; ++i) {
1698 unsigned Idx = ShiftVal + i;
1699 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1700 Idx += NumElts - 16; // End of lane, switch operand.
1701 Indices[l + i] = Idx + l;
1702 }
1703 }
1704
1705 Value *Align = Builder.CreateShuffleVector(
1706 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1707
1708 return emitX86Select(Builder, Mask, Align, Passthru);
1709}
1710
1712 bool ZeroMask, bool IndexForm) {
1713 Type *Ty = CI.getType();
1714 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1715 unsigned EltWidth = Ty->getScalarSizeInBits();
1716 bool IsFloat = Ty->isFPOrFPVectorTy();
1717 Intrinsic::ID IID;
1718 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1719 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1720 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1721 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1722 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1723 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1724 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1725 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1726 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1727 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1728 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1729 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1730 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1731 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1732 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1733 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1734 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1735 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1736 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1737 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1738 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1739 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1740 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1741 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1742 else if (VecWidth == 128 && EltWidth == 16)
1743 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1744 else if (VecWidth == 256 && EltWidth == 16)
1745 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1746 else if (VecWidth == 512 && EltWidth == 16)
1747 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1748 else if (VecWidth == 128 && EltWidth == 8)
1749 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1750 else if (VecWidth == 256 && EltWidth == 8)
1751 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1752 else if (VecWidth == 512 && EltWidth == 8)
1753 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1754 else
1755 llvm_unreachable("Unexpected intrinsic");
1756
1757 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1758 CI.getArgOperand(2) };
1759
1760 // If this isn't index form we need to swap operand 0 and 1.
1761 if (!IndexForm)
1762 std::swap(Args[0], Args[1]);
1763
1764 Value *V = Builder.CreateIntrinsic(IID, {}, Args);
1765 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1766 : Builder.CreateBitCast(CI.getArgOperand(1),
1767 Ty);
1768 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1769}
1770
1772 Intrinsic::ID IID) {
1773 Type *Ty = CI.getType();
1774 Value *Op0 = CI.getOperand(0);
1775 Value *Op1 = CI.getOperand(1);
1776 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1777
1778 if (CI.arg_size() == 4) { // For masked intrinsics.
1779 Value *VecSrc = CI.getOperand(2);
1780 Value *Mask = CI.getOperand(3);
1781 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1782 }
1783 return Res;
1784}
1785
1787 bool IsRotateRight) {
1788 Type *Ty = CI.getType();
1789 Value *Src = CI.getArgOperand(0);
1790 Value *Amt = CI.getArgOperand(1);
1791
1792 // Amount may be scalar immediate, in which case create a splat vector.
1793 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1794 // we only care about the lowest log2 bits anyway.
1795 if (Amt->getType() != Ty) {
1796 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1797 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1798 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1799 }
1800
1801 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1802 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
1803
1804 if (CI.arg_size() == 4) { // For masked intrinsics.
1805 Value *VecSrc = CI.getOperand(2);
1806 Value *Mask = CI.getOperand(3);
1807 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1808 }
1809 return Res;
1810}
1811
1812static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1813 bool IsSigned) {
1814 Type *Ty = CI.getType();
1815 Value *LHS = CI.getArgOperand(0);
1816 Value *RHS = CI.getArgOperand(1);
1817
1818 CmpInst::Predicate Pred;
1819 switch (Imm) {
1820 case 0x0:
1821 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1822 break;
1823 case 0x1:
1824 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1825 break;
1826 case 0x2:
1827 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1828 break;
1829 case 0x3:
1830 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1831 break;
1832 case 0x4:
1833 Pred = ICmpInst::ICMP_EQ;
1834 break;
1835 case 0x5:
1836 Pred = ICmpInst::ICMP_NE;
1837 break;
1838 case 0x6:
1839 return Constant::getNullValue(Ty); // FALSE
1840 case 0x7:
1841 return Constant::getAllOnesValue(Ty); // TRUE
1842 default:
1843 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1844 }
1845
1846 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1847 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1848 return Ext;
1849}
1850
1852 bool IsShiftRight, bool ZeroMask) {
1853 Type *Ty = CI.getType();
1854 Value *Op0 = CI.getArgOperand(0);
1855 Value *Op1 = CI.getArgOperand(1);
1856 Value *Amt = CI.getArgOperand(2);
1857
1858 if (IsShiftRight)
1859 std::swap(Op0, Op1);
1860
1861 // Amount may be scalar immediate, in which case create a splat vector.
1862 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1863 // we only care about the lowest log2 bits anyway.
1864 if (Amt->getType() != Ty) {
1865 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1866 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1867 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1868 }
1869
1870 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1871 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
1872
1873 unsigned NumArgs = CI.arg_size();
1874 if (NumArgs >= 4) { // For masked intrinsics.
1875 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1876 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1877 CI.getArgOperand(0);
1878 Value *Mask = CI.getOperand(NumArgs - 1);
1879 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1880 }
1881 return Res;
1882}
1883
1885 Value *Mask, bool Aligned) {
1886 const Align Alignment =
1887 Aligned
1888 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1889 : Align(1);
1890
1891 // If the mask is all ones just emit a regular store.
1892 if (const auto *C = dyn_cast<Constant>(Mask))
1893 if (C->isAllOnesValue())
1894 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1895
1896 // Convert the mask from an integer type to a vector of i1.
1897 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1898 Mask = getX86MaskVec(Builder, Mask, NumElts);
1899 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1900}
1901
1903 Value *Passthru, Value *Mask, bool Aligned) {
1904 Type *ValTy = Passthru->getType();
1905 const Align Alignment =
1906 Aligned
1907 ? Align(
1909 8)
1910 : Align(1);
1911
1912 // If the mask is all ones just emit a regular store.
1913 if (const auto *C = dyn_cast<Constant>(Mask))
1914 if (C->isAllOnesValue())
1915 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1916
1917 // Convert the mask from an integer type to a vector of i1.
1918 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1919 Mask = getX86MaskVec(Builder, Mask, NumElts);
1920 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1921}
1922
1923static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1924 Type *Ty = CI.getType();
1925 Value *Op0 = CI.getArgOperand(0);
1926 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
1927 {Op0, Builder.getInt1(false)});
1928 if (CI.arg_size() == 3)
1929 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1930 return Res;
1931}
1932
1933static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1934 Type *Ty = CI.getType();
1935
1936 // Arguments have a vXi32 type so cast to vXi64.
1937 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1938 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1939
1940 if (IsSigned) {
1941 // Shift left then arithmetic shift right.
1942 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1943 LHS = Builder.CreateShl(LHS, ShiftAmt);
1944 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1945 RHS = Builder.CreateShl(RHS, ShiftAmt);
1946 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1947 } else {
1948 // Clear the upper bits.
1949 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1950 LHS = Builder.CreateAnd(LHS, Mask);
1951 RHS = Builder.CreateAnd(RHS, Mask);
1952 }
1953
1954 Value *Res = Builder.CreateMul(LHS, RHS);
1955
1956 if (CI.arg_size() == 4)
1957 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1958
1959 return Res;
1960}
1961
1962// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1964 Value *Mask) {
1965 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1966 if (Mask) {
1967 const auto *C = dyn_cast<Constant>(Mask);
1968 if (!C || !C->isAllOnesValue())
1969 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1970 }
1971
1972 if (NumElts < 8) {
1973 int Indices[8];
1974 for (unsigned i = 0; i != NumElts; ++i)
1975 Indices[i] = i;
1976 for (unsigned i = NumElts; i != 8; ++i)
1977 Indices[i] = NumElts + i % NumElts;
1978 Vec = Builder.CreateShuffleVector(Vec,
1980 Indices);
1981 }
1982 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1983}
1984
1986 unsigned CC, bool Signed) {
1987 Value *Op0 = CI.getArgOperand(0);
1988 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1989
1990 Value *Cmp;
1991 if (CC == 3) {
1993 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1994 } else if (CC == 7) {
1996 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1997 } else {
1999 switch (CC) {
2000 default: llvm_unreachable("Unknown condition code");
2001 case 0: Pred = ICmpInst::ICMP_EQ; break;
2002 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2003 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2004 case 4: Pred = ICmpInst::ICMP_NE; break;
2005 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2006 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2007 }
2008 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2009 }
2010
2011 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2012
2013 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2014}
2015
2016// Replace a masked intrinsic with an older unmasked intrinsic.
2018 Intrinsic::ID IID) {
2019 Value *Rep = Builder.CreateIntrinsic(
2020 IID, {}, {CI.getArgOperand(0), CI.getArgOperand(1)});
2021 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2022}
2023
2025 Value* A = CI.getArgOperand(0);
2026 Value* B = CI.getArgOperand(1);
2027 Value* Src = CI.getArgOperand(2);
2028 Value* Mask = CI.getArgOperand(3);
2029
2030 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2031 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2032 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2033 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2034 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2035 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2036}
2037
2039 Value* Op = CI.getArgOperand(0);
2040 Type* ReturnOp = CI.getType();
2041 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2042 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2043 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2044}
2045
2046// Replace intrinsic with unmasked version and a select.
2048 CallBase &CI, Value *&Rep) {
2049 Name = Name.substr(12); // Remove avx512.mask.
2050
2051 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2052 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2053 Intrinsic::ID IID;
2054 if (Name.starts_with("max.p")) {
2055 if (VecWidth == 128 && EltWidth == 32)
2056 IID = Intrinsic::x86_sse_max_ps;
2057 else if (VecWidth == 128 && EltWidth == 64)
2058 IID = Intrinsic::x86_sse2_max_pd;
2059 else if (VecWidth == 256 && EltWidth == 32)
2060 IID = Intrinsic::x86_avx_max_ps_256;
2061 else if (VecWidth == 256 && EltWidth == 64)
2062 IID = Intrinsic::x86_avx_max_pd_256;
2063 else
2064 llvm_unreachable("Unexpected intrinsic");
2065 } else if (Name.starts_with("min.p")) {
2066 if (VecWidth == 128 && EltWidth == 32)
2067 IID = Intrinsic::x86_sse_min_ps;
2068 else if (VecWidth == 128 && EltWidth == 64)
2069 IID = Intrinsic::x86_sse2_min_pd;
2070 else if (VecWidth == 256 && EltWidth == 32)
2071 IID = Intrinsic::x86_avx_min_ps_256;
2072 else if (VecWidth == 256 && EltWidth == 64)
2073 IID = Intrinsic::x86_avx_min_pd_256;
2074 else
2075 llvm_unreachable("Unexpected intrinsic");
2076 } else if (Name.starts_with("pshuf.b.")) {
2077 if (VecWidth == 128)
2078 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2079 else if (VecWidth == 256)
2080 IID = Intrinsic::x86_avx2_pshuf_b;
2081 else if (VecWidth == 512)
2082 IID = Intrinsic::x86_avx512_pshuf_b_512;
2083 else
2084 llvm_unreachable("Unexpected intrinsic");
2085 } else if (Name.starts_with("pmul.hr.sw.")) {
2086 if (VecWidth == 128)
2087 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2088 else if (VecWidth == 256)
2089 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2090 else if (VecWidth == 512)
2091 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2092 else
2093 llvm_unreachable("Unexpected intrinsic");
2094 } else if (Name.starts_with("pmulh.w.")) {
2095 if (VecWidth == 128)
2096 IID = Intrinsic::x86_sse2_pmulh_w;
2097 else if (VecWidth == 256)
2098 IID = Intrinsic::x86_avx2_pmulh_w;
2099 else if (VecWidth == 512)
2100 IID = Intrinsic::x86_avx512_pmulh_w_512;
2101 else
2102 llvm_unreachable("Unexpected intrinsic");
2103 } else if (Name.starts_with("pmulhu.w.")) {
2104 if (VecWidth == 128)
2105 IID = Intrinsic::x86_sse2_pmulhu_w;
2106 else if (VecWidth == 256)
2107 IID = Intrinsic::x86_avx2_pmulhu_w;
2108 else if (VecWidth == 512)
2109 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2110 else
2111 llvm_unreachable("Unexpected intrinsic");
2112 } else if (Name.starts_with("pmaddw.d.")) {
2113 if (VecWidth == 128)
2114 IID = Intrinsic::x86_sse2_pmadd_wd;
2115 else if (VecWidth == 256)
2116 IID = Intrinsic::x86_avx2_pmadd_wd;
2117 else if (VecWidth == 512)
2118 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2119 else
2120 llvm_unreachable("Unexpected intrinsic");
2121 } else if (Name.starts_with("pmaddubs.w.")) {
2122 if (VecWidth == 128)
2123 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2124 else if (VecWidth == 256)
2125 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2126 else if (VecWidth == 512)
2127 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2128 else
2129 llvm_unreachable("Unexpected intrinsic");
2130 } else if (Name.starts_with("packsswb.")) {
2131 if (VecWidth == 128)
2132 IID = Intrinsic::x86_sse2_packsswb_128;
2133 else if (VecWidth == 256)
2134 IID = Intrinsic::x86_avx2_packsswb;
2135 else if (VecWidth == 512)
2136 IID = Intrinsic::x86_avx512_packsswb_512;
2137 else
2138 llvm_unreachable("Unexpected intrinsic");
2139 } else if (Name.starts_with("packssdw.")) {
2140 if (VecWidth == 128)
2141 IID = Intrinsic::x86_sse2_packssdw_128;
2142 else if (VecWidth == 256)
2143 IID = Intrinsic::x86_avx2_packssdw;
2144 else if (VecWidth == 512)
2145 IID = Intrinsic::x86_avx512_packssdw_512;
2146 else
2147 llvm_unreachable("Unexpected intrinsic");
2148 } else if (Name.starts_with("packuswb.")) {
2149 if (VecWidth == 128)
2150 IID = Intrinsic::x86_sse2_packuswb_128;
2151 else if (VecWidth == 256)
2152 IID = Intrinsic::x86_avx2_packuswb;
2153 else if (VecWidth == 512)
2154 IID = Intrinsic::x86_avx512_packuswb_512;
2155 else
2156 llvm_unreachable("Unexpected intrinsic");
2157 } else if (Name.starts_with("packusdw.")) {
2158 if (VecWidth == 128)
2159 IID = Intrinsic::x86_sse41_packusdw;
2160 else if (VecWidth == 256)
2161 IID = Intrinsic::x86_avx2_packusdw;
2162 else if (VecWidth == 512)
2163 IID = Intrinsic::x86_avx512_packusdw_512;
2164 else
2165 llvm_unreachable("Unexpected intrinsic");
2166 } else if (Name.starts_with("vpermilvar.")) {
2167 if (VecWidth == 128 && EltWidth == 32)
2168 IID = Intrinsic::x86_avx_vpermilvar_ps;
2169 else if (VecWidth == 128 && EltWidth == 64)
2170 IID = Intrinsic::x86_avx_vpermilvar_pd;
2171 else if (VecWidth == 256 && EltWidth == 32)
2172 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2173 else if (VecWidth == 256 && EltWidth == 64)
2174 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2175 else if (VecWidth == 512 && EltWidth == 32)
2176 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2177 else if (VecWidth == 512 && EltWidth == 64)
2178 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2179 else
2180 llvm_unreachable("Unexpected intrinsic");
2181 } else if (Name == "cvtpd2dq.256") {
2182 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2183 } else if (Name == "cvtpd2ps.256") {
2184 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2185 } else if (Name == "cvttpd2dq.256") {
2186 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2187 } else if (Name == "cvttps2dq.128") {
2188 IID = Intrinsic::x86_sse2_cvttps2dq;
2189 } else if (Name == "cvttps2dq.256") {
2190 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2191 } else if (Name.starts_with("permvar.")) {
2192 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2193 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2194 IID = Intrinsic::x86_avx2_permps;
2195 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2196 IID = Intrinsic::x86_avx2_permd;
2197 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2198 IID = Intrinsic::x86_avx512_permvar_df_256;
2199 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2200 IID = Intrinsic::x86_avx512_permvar_di_256;
2201 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2202 IID = Intrinsic::x86_avx512_permvar_sf_512;
2203 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2204 IID = Intrinsic::x86_avx512_permvar_si_512;
2205 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2206 IID = Intrinsic::x86_avx512_permvar_df_512;
2207 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2208 IID = Intrinsic::x86_avx512_permvar_di_512;
2209 else if (VecWidth == 128 && EltWidth == 16)
2210 IID = Intrinsic::x86_avx512_permvar_hi_128;
2211 else if (VecWidth == 256 && EltWidth == 16)
2212 IID = Intrinsic::x86_avx512_permvar_hi_256;
2213 else if (VecWidth == 512 && EltWidth == 16)
2214 IID = Intrinsic::x86_avx512_permvar_hi_512;
2215 else if (VecWidth == 128 && EltWidth == 8)
2216 IID = Intrinsic::x86_avx512_permvar_qi_128;
2217 else if (VecWidth == 256 && EltWidth == 8)
2218 IID = Intrinsic::x86_avx512_permvar_qi_256;
2219 else if (VecWidth == 512 && EltWidth == 8)
2220 IID = Intrinsic::x86_avx512_permvar_qi_512;
2221 else
2222 llvm_unreachable("Unexpected intrinsic");
2223 } else if (Name.starts_with("dbpsadbw.")) {
2224 if (VecWidth == 128)
2225 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2226 else if (VecWidth == 256)
2227 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2228 else if (VecWidth == 512)
2229 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2230 else
2231 llvm_unreachable("Unexpected intrinsic");
2232 } else if (Name.starts_with("pmultishift.qb.")) {
2233 if (VecWidth == 128)
2234 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2235 else if (VecWidth == 256)
2236 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2237 else if (VecWidth == 512)
2238 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2239 else
2240 llvm_unreachable("Unexpected intrinsic");
2241 } else if (Name.starts_with("conflict.")) {
2242 if (Name[9] == 'd' && VecWidth == 128)
2243 IID = Intrinsic::x86_avx512_conflict_d_128;
2244 else if (Name[9] == 'd' && VecWidth == 256)
2245 IID = Intrinsic::x86_avx512_conflict_d_256;
2246 else if (Name[9] == 'd' && VecWidth == 512)
2247 IID = Intrinsic::x86_avx512_conflict_d_512;
2248 else if (Name[9] == 'q' && VecWidth == 128)
2249 IID = Intrinsic::x86_avx512_conflict_q_128;
2250 else if (Name[9] == 'q' && VecWidth == 256)
2251 IID = Intrinsic::x86_avx512_conflict_q_256;
2252 else if (Name[9] == 'q' && VecWidth == 512)
2253 IID = Intrinsic::x86_avx512_conflict_q_512;
2254 else
2255 llvm_unreachable("Unexpected intrinsic");
2256 } else if (Name.starts_with("pavg.")) {
2257 if (Name[5] == 'b' && VecWidth == 128)
2258 IID = Intrinsic::x86_sse2_pavg_b;
2259 else if (Name[5] == 'b' && VecWidth == 256)
2260 IID = Intrinsic::x86_avx2_pavg_b;
2261 else if (Name[5] == 'b' && VecWidth == 512)
2262 IID = Intrinsic::x86_avx512_pavg_b_512;
2263 else if (Name[5] == 'w' && VecWidth == 128)
2264 IID = Intrinsic::x86_sse2_pavg_w;
2265 else if (Name[5] == 'w' && VecWidth == 256)
2266 IID = Intrinsic::x86_avx2_pavg_w;
2267 else if (Name[5] == 'w' && VecWidth == 512)
2268 IID = Intrinsic::x86_avx512_pavg_w_512;
2269 else
2270 llvm_unreachable("Unexpected intrinsic");
2271 } else
2272 return false;
2273
2274 SmallVector<Value *, 4> Args(CI.args());
2275 Args.pop_back();
2276 Args.pop_back();
2277 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2278 unsigned NumArgs = CI.arg_size();
2279 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2280 CI.getArgOperand(NumArgs - 2));
2281 return true;
2282}
2283
2284/// Upgrade comment in call to inline asm that represents an objc retain release
2285/// marker.
2286void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2287 size_t Pos;
2288 if (AsmStr->find("mov\tfp") == 0 &&
2289 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2290 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2291 AsmStr->replace(Pos, 1, ";");
2292 }
2293}
2294
2296 Function *F, IRBuilder<> &Builder) {
2297 Value *Rep = nullptr;
2298
2299 if (Name == "abs.i" || Name == "abs.ll") {
2300 Value *Arg = CI->getArgOperand(0);
2301 Value *Neg = Builder.CreateNeg(Arg, "neg");
2302 Value *Cmp = Builder.CreateICmpSGE(
2303 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2304 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2305 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2306 Name.starts_with("atomic.load.add.f64.p")) {
2307 Value *Ptr = CI->getArgOperand(0);
2308 Value *Val = CI->getArgOperand(1);
2309 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2310 AtomicOrdering::SequentiallyConsistent);
2311 } else if (Name.consume_front("max.") &&
2312 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2313 Name == "ui" || Name == "ull")) {
2314 Value *Arg0 = CI->getArgOperand(0);
2315 Value *Arg1 = CI->getArgOperand(1);
2316 Value *Cmp = Name.starts_with("u")
2317 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2318 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2319 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2320 } else if (Name.consume_front("min.") &&
2321 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2322 Name == "ui" || Name == "ull")) {
2323 Value *Arg0 = CI->getArgOperand(0);
2324 Value *Arg1 = CI->getArgOperand(1);
2325 Value *Cmp = Name.starts_with("u")
2326 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2327 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2328 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2329 } else if (Name == "clz.ll") {
2330 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2331 Value *Arg = CI->getArgOperand(0);
2332 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2333 {Arg, Builder.getFalse()},
2334 /*FMFSource=*/nullptr, "ctlz");
2335 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2336 } else if (Name == "popc.ll") {
2337 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2338 // i64.
2339 Value *Arg = CI->getArgOperand(0);
2340 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2341 Arg, /*FMFSource=*/nullptr, "ctpop");
2342 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2343 } else if (Name == "h2f") {
2344 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2345 {Builder.getFloatTy()}, CI->getArgOperand(0),
2346 /*FMFSource=*/nullptr, "h2f");
2347 } else if (Name.consume_front("bitcast.") &&
2348 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2349 Name == "d2ll")) {
2350 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2351 } else if (Name == "rotate.b32") {
2352 Value *Arg = CI->getOperand(0);
2353 Value *ShiftAmt = CI->getOperand(1);
2354 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2355 {Arg, Arg, ShiftAmt});
2356 } else if (Name == "rotate.b64") {
2357 Type *Int64Ty = Builder.getInt64Ty();
2358 Value *Arg = CI->getOperand(0);
2359 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2360 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2361 {Arg, Arg, ZExtShiftAmt});
2362 } else if (Name == "rotate.right.b64") {
2363 Type *Int64Ty = Builder.getInt64Ty();
2364 Value *Arg = CI->getOperand(0);
2365 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2366 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2367 {Arg, Arg, ZExtShiftAmt});
2368 } else if ((Name.consume_front("ptr.gen.to.") &&
2369 (Name.starts_with("local") || Name.starts_with("shared") ||
2370 Name.starts_with("global") || Name.starts_with("constant"))) ||
2371 (Name.consume_front("ptr.") &&
2372 (Name.consume_front("local") || Name.consume_front("shared") ||
2373 Name.consume_front("global") ||
2374 Name.consume_front("constant")) &&
2375 Name.starts_with(".to.gen"))) {
2376 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2377 } else if (Name.consume_front("ldg.global")) {
2378 Value *Ptr = CI->getArgOperand(0);
2379 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2380 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2381 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2382 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2383 MDNode *MD = MDNode::get(Builder.getContext(), {});
2384 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2385 return LD;
2386 } else {
2388 if (IID != Intrinsic::not_intrinsic &&
2389 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2390 rename(F);
2391 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2393 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2394 Value *Arg = CI->getArgOperand(I);
2395 Type *OldType = Arg->getType();
2396 Type *NewType = NewFn->getArg(I)->getType();
2397 Args.push_back(
2398 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2399 ? Builder.CreateBitCast(Arg, NewType)
2400 : Arg);
2401 }
2402 Rep = Builder.CreateCall(NewFn, Args);
2403 if (F->getReturnType()->isIntegerTy())
2404 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2405 }
2406 }
2407
2408 return Rep;
2409}
2410
2412 IRBuilder<> &Builder) {
2413 LLVMContext &C = F->getContext();
2414 Value *Rep = nullptr;
2415
2416 if (Name.starts_with("sse4a.movnt.")) {
2418 Elts.push_back(
2419 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2420 MDNode *Node = MDNode::get(C, Elts);
2421
2422 Value *Arg0 = CI->getArgOperand(0);
2423 Value *Arg1 = CI->getArgOperand(1);
2424
2425 // Nontemporal (unaligned) store of the 0'th element of the float/double
2426 // vector.
2427 Value *Extract =
2428 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2429
2430 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2431 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2432 } else if (Name.starts_with("avx.movnt.") ||
2433 Name.starts_with("avx512.storent.")) {
2435 Elts.push_back(
2436 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2437 MDNode *Node = MDNode::get(C, Elts);
2438
2439 Value *Arg0 = CI->getArgOperand(0);
2440 Value *Arg1 = CI->getArgOperand(1);
2441
2442 StoreInst *SI = Builder.CreateAlignedStore(
2443 Arg1, Arg0,
2445 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2446 } else if (Name == "sse2.storel.dq") {
2447 Value *Arg0 = CI->getArgOperand(0);
2448 Value *Arg1 = CI->getArgOperand(1);
2449
2450 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2451 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2452 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2453 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2454 } else if (Name.starts_with("sse.storeu.") ||
2455 Name.starts_with("sse2.storeu.") ||
2456 Name.starts_with("avx.storeu.")) {
2457 Value *Arg0 = CI->getArgOperand(0);
2458 Value *Arg1 = CI->getArgOperand(1);
2459 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2460 } else if (Name == "avx512.mask.store.ss") {
2461 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2462 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2463 Mask, false);
2464 } else if (Name.starts_with("avx512.mask.store")) {
2465 // "avx512.mask.storeu." or "avx512.mask.store."
2466 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2467 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2468 CI->getArgOperand(2), Aligned);
2469 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2470 // Upgrade packed integer vector compare intrinsics to compare instructions.
2471 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2472 bool CmpEq = Name[9] == 'e';
2473 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2474 CI->getArgOperand(0), CI->getArgOperand(1));
2475 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2476 } else if (Name.starts_with("avx512.broadcastm")) {
2477 Type *ExtTy = Type::getInt32Ty(C);
2478 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2479 ExtTy = Type::getInt64Ty(C);
2480 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2481 ExtTy->getPrimitiveSizeInBits();
2482 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2483 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2484 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2485 Value *Vec = CI->getArgOperand(0);
2486 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2487 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2488 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2489 } else if (Name.starts_with("avx.sqrt.p") ||
2490 Name.starts_with("sse2.sqrt.p") ||
2491 Name.starts_with("sse.sqrt.p")) {
2492 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2493 {CI->getArgOperand(0)});
2494 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2495 if (CI->arg_size() == 4 &&
2496 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2497 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2498 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2499 : Intrinsic::x86_avx512_sqrt_pd_512;
2500
2501 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2502 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2503 } else {
2504 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2505 {CI->getArgOperand(0)});
2506 }
2507 Rep =
2508 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2509 } else if (Name.starts_with("avx512.ptestm") ||
2510 Name.starts_with("avx512.ptestnm")) {
2511 Value *Op0 = CI->getArgOperand(0);
2512 Value *Op1 = CI->getArgOperand(1);
2513 Value *Mask = CI->getArgOperand(2);
2514 Rep = Builder.CreateAnd(Op0, Op1);
2515 llvm::Type *Ty = Op0->getType();
2517 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2518 ? ICmpInst::ICMP_NE
2519 : ICmpInst::ICMP_EQ;
2520 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2521 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2522 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2523 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2524 ->getNumElements();
2525 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2526 Rep =
2527 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2528 } else if (Name.starts_with("avx512.kunpck")) {
2529 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2530 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2531 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2532 int Indices[64];
2533 for (unsigned i = 0; i != NumElts; ++i)
2534 Indices[i] = i;
2535
2536 // First extract half of each vector. This gives better codegen than
2537 // doing it in a single shuffle.
2538 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2539 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2540 // Concat the vectors.
2541 // NOTE: Operands have to be swapped to match intrinsic definition.
2542 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2543 Rep = Builder.CreateBitCast(Rep, CI->getType());
2544 } else if (Name == "avx512.kand.w") {
2545 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2546 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2547 Rep = Builder.CreateAnd(LHS, RHS);
2548 Rep = Builder.CreateBitCast(Rep, CI->getType());
2549 } else if (Name == "avx512.kandn.w") {
2550 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2551 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2552 LHS = Builder.CreateNot(LHS);
2553 Rep = Builder.CreateAnd(LHS, RHS);
2554 Rep = Builder.CreateBitCast(Rep, CI->getType());
2555 } else if (Name == "avx512.kor.w") {
2556 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2557 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2558 Rep = Builder.CreateOr(LHS, RHS);
2559 Rep = Builder.CreateBitCast(Rep, CI->getType());
2560 } else if (Name == "avx512.kxor.w") {
2561 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2562 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2563 Rep = Builder.CreateXor(LHS, RHS);
2564 Rep = Builder.CreateBitCast(Rep, CI->getType());
2565 } else if (Name == "avx512.kxnor.w") {
2566 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2567 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2568 LHS = Builder.CreateNot(LHS);
2569 Rep = Builder.CreateXor(LHS, RHS);
2570 Rep = Builder.CreateBitCast(Rep, CI->getType());
2571 } else if (Name == "avx512.knot.w") {
2572 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2573 Rep = Builder.CreateNot(Rep);
2574 Rep = Builder.CreateBitCast(Rep, CI->getType());
2575 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2576 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2577 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2578 Rep = Builder.CreateOr(LHS, RHS);
2579 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2580 Value *C;
2581 if (Name[14] == 'c')
2582 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2583 else
2584 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2585 Rep = Builder.CreateICmpEQ(Rep, C);
2586 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2587 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2588 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2589 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2590 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2591 Type *I32Ty = Type::getInt32Ty(C);
2592 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2593 ConstantInt::get(I32Ty, 0));
2594 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2595 ConstantInt::get(I32Ty, 0));
2596 Value *EltOp;
2597 if (Name.contains(".add."))
2598 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2599 else if (Name.contains(".sub."))
2600 EltOp = Builder.CreateFSub(Elt0, Elt1);
2601 else if (Name.contains(".mul."))
2602 EltOp = Builder.CreateFMul(Elt0, Elt1);
2603 else
2604 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2605 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2606 ConstantInt::get(I32Ty, 0));
2607 } else if (Name.starts_with("avx512.mask.pcmp")) {
2608 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2609 bool CmpEq = Name[16] == 'e';
2610 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2611 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2612 Type *OpTy = CI->getArgOperand(0)->getType();
2613 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2614 Intrinsic::ID IID;
2615 switch (VecWidth) {
2616 default:
2617 llvm_unreachable("Unexpected intrinsic");
2618 case 128:
2619 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2620 break;
2621 case 256:
2622 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2623 break;
2624 case 512:
2625 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2626 break;
2627 }
2628
2629 Rep = Builder.CreateIntrinsic(IID, {},
2630 {CI->getOperand(0), CI->getArgOperand(1)});
2631 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2632 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2633 Type *OpTy = CI->getArgOperand(0)->getType();
2634 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2635 unsigned EltWidth = OpTy->getScalarSizeInBits();
2636 Intrinsic::ID IID;
2637 if (VecWidth == 128 && EltWidth == 32)
2638 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2639 else if (VecWidth == 256 && EltWidth == 32)
2640 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2641 else if (VecWidth == 512 && EltWidth == 32)
2642 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2643 else if (VecWidth == 128 && EltWidth == 64)
2644 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2645 else if (VecWidth == 256 && EltWidth == 64)
2646 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2647 else if (VecWidth == 512 && EltWidth == 64)
2648 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2649 else
2650 llvm_unreachable("Unexpected intrinsic");
2651
2652 Rep = Builder.CreateIntrinsic(IID, {},
2653 {CI->getOperand(0), CI->getArgOperand(1)});
2654 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2655 } else if (Name.starts_with("avx512.cmp.p")) {
2656 SmallVector<Value *, 4> Args(CI->args());
2657 Type *OpTy = Args[0]->getType();
2658 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2659 unsigned EltWidth = OpTy->getScalarSizeInBits();
2660 Intrinsic::ID IID;
2661 if (VecWidth == 128 && EltWidth == 32)
2662 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2663 else if (VecWidth == 256 && EltWidth == 32)
2664 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2665 else if (VecWidth == 512 && EltWidth == 32)
2666 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2667 else if (VecWidth == 128 && EltWidth == 64)
2668 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2669 else if (VecWidth == 256 && EltWidth == 64)
2670 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2671 else if (VecWidth == 512 && EltWidth == 64)
2672 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2673 else
2674 llvm_unreachable("Unexpected intrinsic");
2675
2677 if (VecWidth == 512)
2678 std::swap(Mask, Args.back());
2679 Args.push_back(Mask);
2680
2681 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2682 } else if (Name.starts_with("avx512.mask.cmp.")) {
2683 // Integer compare intrinsics.
2684 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2685 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2686 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2687 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2688 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2689 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2690 Name.starts_with("avx512.cvtw2mask.") ||
2691 Name.starts_with("avx512.cvtd2mask.") ||
2692 Name.starts_with("avx512.cvtq2mask.")) {
2693 Value *Op = CI->getArgOperand(0);
2694 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2695 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2696 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2697 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2698 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2699 Name.starts_with("avx512.mask.pabs")) {
2700 Rep = upgradeAbs(Builder, *CI);
2701 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2702 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2703 Name.starts_with("avx512.mask.pmaxs")) {
2704 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2705 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2706 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2707 Name.starts_with("avx512.mask.pmaxu")) {
2708 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2709 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2710 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2711 Name.starts_with("avx512.mask.pmins")) {
2712 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2713 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2714 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2715 Name.starts_with("avx512.mask.pminu")) {
2716 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2717 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2718 Name == "avx512.pmulu.dq.512" ||
2719 Name.starts_with("avx512.mask.pmulu.dq.")) {
2720 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2721 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2722 Name == "avx512.pmul.dq.512" ||
2723 Name.starts_with("avx512.mask.pmul.dq.")) {
2724 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2725 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2726 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2727 Rep =
2728 Builder.CreateSIToFP(CI->getArgOperand(1),
2729 cast<VectorType>(CI->getType())->getElementType());
2730 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2731 } else if (Name == "avx512.cvtusi2sd") {
2732 Rep =
2733 Builder.CreateUIToFP(CI->getArgOperand(1),
2734 cast<VectorType>(CI->getType())->getElementType());
2735 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2736 } else if (Name == "sse2.cvtss2sd") {
2737 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2738 Rep = Builder.CreateFPExt(
2739 Rep, cast<VectorType>(CI->getType())->getElementType());
2740 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2741 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2742 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2743 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2744 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2745 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2746 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2747 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2748 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2749 Name == "avx512.mask.cvtqq2ps.256" ||
2750 Name == "avx512.mask.cvtqq2ps.512" ||
2751 Name == "avx512.mask.cvtuqq2ps.256" ||
2752 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2753 Name == "avx.cvt.ps2.pd.256" ||
2754 Name == "avx512.mask.cvtps2pd.128" ||
2755 Name == "avx512.mask.cvtps2pd.256") {
2756 auto *DstTy = cast<FixedVectorType>(CI->getType());
2757 Rep = CI->getArgOperand(0);
2758 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2759
2760 unsigned NumDstElts = DstTy->getNumElements();
2761 if (NumDstElts < SrcTy->getNumElements()) {
2762 assert(NumDstElts == 2 && "Unexpected vector size");
2763 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2764 }
2765
2766 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2767 bool IsUnsigned = Name.contains("cvtu");
2768 if (IsPS2PD)
2769 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2770 else if (CI->arg_size() == 4 &&
2771 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2772 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2773 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2774 : Intrinsic::x86_avx512_sitofp_round;
2775 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2776 {Rep, CI->getArgOperand(3)});
2777 } else {
2778 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2779 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2780 }
2781
2782 if (CI->arg_size() >= 3)
2783 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2784 CI->getArgOperand(1));
2785 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2786 Name.starts_with("vcvtph2ps.")) {
2787 auto *DstTy = cast<FixedVectorType>(CI->getType());
2788 Rep = CI->getArgOperand(0);
2789 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2790 unsigned NumDstElts = DstTy->getNumElements();
2791 if (NumDstElts != SrcTy->getNumElements()) {
2792 assert(NumDstElts == 4 && "Unexpected vector size");
2793 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2794 }
2795 Rep = Builder.CreateBitCast(
2796 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2797 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2798 if (CI->arg_size() >= 3)
2799 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2800 CI->getArgOperand(1));
2801 } else if (Name.starts_with("avx512.mask.load")) {
2802 // "avx512.mask.loadu." or "avx512.mask.load."
2803 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2804 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2805 CI->getArgOperand(2), Aligned);
2806 } else if (Name.starts_with("avx512.mask.expand.load.")) {
2807 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2808 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2809 ResultTy->getNumElements());
2810
2811 Rep = Builder.CreateIntrinsic(
2812 Intrinsic::masked_expandload, ResultTy,
2813 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
2814 } else if (Name.starts_with("avx512.mask.compress.store.")) {
2815 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2816 Value *MaskVec =
2817 getX86MaskVec(Builder, CI->getArgOperand(2),
2818 cast<FixedVectorType>(ResultTy)->getNumElements());
2819
2820 Rep = Builder.CreateIntrinsic(
2821 Intrinsic::masked_compressstore, ResultTy,
2822 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
2823 } else if (Name.starts_with("avx512.mask.compress.") ||
2824 Name.starts_with("avx512.mask.expand.")) {
2825 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2826
2827 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2828 ResultTy->getNumElements());
2829
2830 bool IsCompress = Name[12] == 'c';
2831 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2832 : Intrinsic::x86_avx512_mask_expand;
2833 Rep = Builder.CreateIntrinsic(
2834 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
2835 } else if (Name.starts_with("xop.vpcom")) {
2836 bool IsSigned;
2837 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2838 Name.ends_with("uq"))
2839 IsSigned = false;
2840 else if (Name.ends_with("b") || Name.ends_with("w") ||
2841 Name.ends_with("d") || Name.ends_with("q"))
2842 IsSigned = true;
2843 else
2844 llvm_unreachable("Unknown suffix");
2845
2846 unsigned Imm;
2847 if (CI->arg_size() == 3) {
2848 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2849 } else {
2850 Name = Name.substr(9); // strip off "xop.vpcom"
2851 if (Name.starts_with("lt"))
2852 Imm = 0;
2853 else if (Name.starts_with("le"))
2854 Imm = 1;
2855 else if (Name.starts_with("gt"))
2856 Imm = 2;
2857 else if (Name.starts_with("ge"))
2858 Imm = 3;
2859 else if (Name.starts_with("eq"))
2860 Imm = 4;
2861 else if (Name.starts_with("ne"))
2862 Imm = 5;
2863 else if (Name.starts_with("false"))
2864 Imm = 6;
2865 else if (Name.starts_with("true"))
2866 Imm = 7;
2867 else
2868 llvm_unreachable("Unknown condition");
2869 }
2870
2871 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2872 } else if (Name.starts_with("xop.vpcmov")) {
2873 Value *Sel = CI->getArgOperand(2);
2874 Value *NotSel = Builder.CreateNot(Sel);
2875 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2876 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2877 Rep = Builder.CreateOr(Sel0, Sel1);
2878 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
2879 Name.starts_with("avx512.mask.prol")) {
2880 Rep = upgradeX86Rotate(Builder, *CI, false);
2881 } else if (Name.starts_with("avx512.pror") ||
2882 Name.starts_with("avx512.mask.pror")) {
2883 Rep = upgradeX86Rotate(Builder, *CI, true);
2884 } else if (Name.starts_with("avx512.vpshld.") ||
2885 Name.starts_with("avx512.mask.vpshld") ||
2886 Name.starts_with("avx512.maskz.vpshld")) {
2887 bool ZeroMask = Name[11] == 'z';
2888 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2889 } else if (Name.starts_with("avx512.vpshrd.") ||
2890 Name.starts_with("avx512.mask.vpshrd") ||
2891 Name.starts_with("avx512.maskz.vpshrd")) {
2892 bool ZeroMask = Name[11] == 'z';
2893 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2894 } else if (Name == "sse42.crc32.64.8") {
2895 Value *Trunc0 =
2896 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2897 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8, {},
2898 {Trunc0, CI->getArgOperand(1)});
2899 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2900 } else if (Name.starts_with("avx.vbroadcast.s") ||
2901 Name.starts_with("avx512.vbroadcast.s")) {
2902 // Replace broadcasts with a series of insertelements.
2903 auto *VecTy = cast<FixedVectorType>(CI->getType());
2904 Type *EltTy = VecTy->getElementType();
2905 unsigned EltNum = VecTy->getNumElements();
2906 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2907 Type *I32Ty = Type::getInt32Ty(C);
2908 Rep = PoisonValue::get(VecTy);
2909 for (unsigned I = 0; I < EltNum; ++I)
2910 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
2911 } else if (Name.starts_with("sse41.pmovsx") ||
2912 Name.starts_with("sse41.pmovzx") ||
2913 Name.starts_with("avx2.pmovsx") ||
2914 Name.starts_with("avx2.pmovzx") ||
2915 Name.starts_with("avx512.mask.pmovsx") ||
2916 Name.starts_with("avx512.mask.pmovzx")) {
2917 auto *DstTy = cast<FixedVectorType>(CI->getType());
2918 unsigned NumDstElts = DstTy->getNumElements();
2919
2920 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2921 SmallVector<int, 8> ShuffleMask(NumDstElts);
2922 for (unsigned i = 0; i != NumDstElts; ++i)
2923 ShuffleMask[i] = i;
2924
2925 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2926
2927 bool DoSext = Name.contains("pmovsx");
2928 Rep =
2929 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
2930 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2931 if (CI->arg_size() == 3)
2932 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2933 CI->getArgOperand(1));
2934 } else if (Name == "avx512.mask.pmov.qd.256" ||
2935 Name == "avx512.mask.pmov.qd.512" ||
2936 Name == "avx512.mask.pmov.wb.256" ||
2937 Name == "avx512.mask.pmov.wb.512") {
2938 Type *Ty = CI->getArgOperand(1)->getType();
2939 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2940 Rep =
2941 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2942 } else if (Name.starts_with("avx.vbroadcastf128") ||
2943 Name == "avx2.vbroadcasti128") {
2944 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2945 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2946 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2947 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2948 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
2949 if (NumSrcElts == 2)
2950 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2951 else
2952 Rep = Builder.CreateShuffleVector(Load,
2953 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2954 } else if (Name.starts_with("avx512.mask.shuf.i") ||
2955 Name.starts_with("avx512.mask.shuf.f")) {
2956 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2957 Type *VT = CI->getType();
2958 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2959 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2960 unsigned ControlBitsMask = NumLanes - 1;
2961 unsigned NumControlBits = NumLanes / 2;
2962 SmallVector<int, 8> ShuffleMask(0);
2963
2964 for (unsigned l = 0; l != NumLanes; ++l) {
2965 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2966 // We actually need the other source.
2967 if (l >= NumLanes / 2)
2968 LaneMask += NumLanes;
2969 for (unsigned i = 0; i != NumElementsInLane; ++i)
2970 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2971 }
2972 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2973 CI->getArgOperand(1), ShuffleMask);
2974 Rep =
2975 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
2976 } else if (Name.starts_with("avx512.mask.broadcastf") ||
2977 Name.starts_with("avx512.mask.broadcasti")) {
2978 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2979 ->getNumElements();
2980 unsigned NumDstElts =
2981 cast<FixedVectorType>(CI->getType())->getNumElements();
2982
2983 SmallVector<int, 8> ShuffleMask(NumDstElts);
2984 for (unsigned i = 0; i != NumDstElts; ++i)
2985 ShuffleMask[i] = i % NumSrcElts;
2986
2987 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2988 CI->getArgOperand(0), ShuffleMask);
2989 Rep =
2990 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2991 } else if (Name.starts_with("avx2.pbroadcast") ||
2992 Name.starts_with("avx2.vbroadcast") ||
2993 Name.starts_with("avx512.pbroadcast") ||
2994 Name.starts_with("avx512.mask.broadcast.s")) {
2995 // Replace vp?broadcasts with a vector shuffle.
2996 Value *Op = CI->getArgOperand(0);
2997 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2998 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3001 Rep = Builder.CreateShuffleVector(Op, M);
3002
3003 if (CI->arg_size() == 3)
3004 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3005 CI->getArgOperand(1));
3006 } else if (Name.starts_with("sse2.padds.") ||
3007 Name.starts_with("avx2.padds.") ||
3008 Name.starts_with("avx512.padds.") ||
3009 Name.starts_with("avx512.mask.padds.")) {
3010 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3011 } else if (Name.starts_with("sse2.psubs.") ||
3012 Name.starts_with("avx2.psubs.") ||
3013 Name.starts_with("avx512.psubs.") ||
3014 Name.starts_with("avx512.mask.psubs.")) {
3015 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3016 } else if (Name.starts_with("sse2.paddus.") ||
3017 Name.starts_with("avx2.paddus.") ||
3018 Name.starts_with("avx512.mask.paddus.")) {
3019 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3020 } else if (Name.starts_with("sse2.psubus.") ||
3021 Name.starts_with("avx2.psubus.") ||
3022 Name.starts_with("avx512.mask.psubus.")) {
3023 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3024 } else if (Name.starts_with("avx512.mask.palignr.")) {
3025 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3026 CI->getArgOperand(1), CI->getArgOperand(2),
3027 CI->getArgOperand(3), CI->getArgOperand(4),
3028 false);
3029 } else if (Name.starts_with("avx512.mask.valign.")) {
3031 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3032 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3033 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3034 // 128/256-bit shift left specified in bits.
3035 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3036 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3037 Shift / 8); // Shift is in bits.
3038 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3039 // 128/256-bit shift right specified in bits.
3040 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3041 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3042 Shift / 8); // Shift is in bits.
3043 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3044 Name == "avx512.psll.dq.512") {
3045 // 128/256/512-bit shift left specified in bytes.
3046 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3047 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3048 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3049 Name == "avx512.psrl.dq.512") {
3050 // 128/256/512-bit shift right specified in bytes.
3051 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3052 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3053 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3054 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3055 Name.starts_with("avx2.pblendd.")) {
3056 Value *Op0 = CI->getArgOperand(0);
3057 Value *Op1 = CI->getArgOperand(1);
3058 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3059 auto *VecTy = cast<FixedVectorType>(CI->getType());
3060 unsigned NumElts = VecTy->getNumElements();
3061
3062 SmallVector<int, 16> Idxs(NumElts);
3063 for (unsigned i = 0; i != NumElts; ++i)
3064 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3065
3066 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3067 } else if (Name.starts_with("avx.vinsertf128.") ||
3068 Name == "avx2.vinserti128" ||
3069 Name.starts_with("avx512.mask.insert")) {
3070 Value *Op0 = CI->getArgOperand(0);
3071 Value *Op1 = CI->getArgOperand(1);
3072 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3073 unsigned DstNumElts =
3074 cast<FixedVectorType>(CI->getType())->getNumElements();
3075 unsigned SrcNumElts =
3076 cast<FixedVectorType>(Op1->getType())->getNumElements();
3077 unsigned Scale = DstNumElts / SrcNumElts;
3078
3079 // Mask off the high bits of the immediate value; hardware ignores those.
3080 Imm = Imm % Scale;
3081
3082 // Extend the second operand into a vector the size of the destination.
3083 SmallVector<int, 8> Idxs(DstNumElts);
3084 for (unsigned i = 0; i != SrcNumElts; ++i)
3085 Idxs[i] = i;
3086 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3087 Idxs[i] = SrcNumElts;
3088 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3089
3090 // Insert the second operand into the first operand.
3091
3092 // Note that there is no guarantee that instruction lowering will actually
3093 // produce a vinsertf128 instruction for the created shuffles. In
3094 // particular, the 0 immediate case involves no lane changes, so it can
3095 // be handled as a blend.
3096
3097 // Example of shuffle mask for 32-bit elements:
3098 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3099 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3100
3101 // First fill with identify mask.
3102 for (unsigned i = 0; i != DstNumElts; ++i)
3103 Idxs[i] = i;
3104 // Then replace the elements where we need to insert.
3105 for (unsigned i = 0; i != SrcNumElts; ++i)
3106 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3107 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3108
3109 // If the intrinsic has a mask operand, handle that.
3110 if (CI->arg_size() == 5)
3111 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3112 CI->getArgOperand(3));
3113 } else if (Name.starts_with("avx.vextractf128.") ||
3114 Name == "avx2.vextracti128" ||
3115 Name.starts_with("avx512.mask.vextract")) {
3116 Value *Op0 = CI->getArgOperand(0);
3117 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3118 unsigned DstNumElts =
3119 cast<FixedVectorType>(CI->getType())->getNumElements();
3120 unsigned SrcNumElts =
3121 cast<FixedVectorType>(Op0->getType())->getNumElements();
3122 unsigned Scale = SrcNumElts / DstNumElts;
3123
3124 // Mask off the high bits of the immediate value; hardware ignores those.
3125 Imm = Imm % Scale;
3126
3127 // Get indexes for the subvector of the input vector.
3128 SmallVector<int, 8> Idxs(DstNumElts);
3129 for (unsigned i = 0; i != DstNumElts; ++i) {
3130 Idxs[i] = i + (Imm * DstNumElts);
3131 }
3132 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3133
3134 // If the intrinsic has a mask operand, handle that.
3135 if (CI->arg_size() == 4)
3136 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3137 CI->getArgOperand(2));
3138 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3139 Name.starts_with("avx512.mask.perm.di.")) {
3140 Value *Op0 = CI->getArgOperand(0);
3141 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3142 auto *VecTy = cast<FixedVectorType>(CI->getType());
3143 unsigned NumElts = VecTy->getNumElements();
3144
3145 SmallVector<int, 8> Idxs(NumElts);
3146 for (unsigned i = 0; i != NumElts; ++i)
3147 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3148
3149 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3150
3151 if (CI->arg_size() == 4)
3152 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3153 CI->getArgOperand(2));
3154 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3155 // The immediate permute control byte looks like this:
3156 // [1:0] - select 128 bits from sources for low half of destination
3157 // [2] - ignore
3158 // [3] - zero low half of destination
3159 // [5:4] - select 128 bits from sources for high half of destination
3160 // [6] - ignore
3161 // [7] - zero high half of destination
3162
3163 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3164
3165 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3166 unsigned HalfSize = NumElts / 2;
3167 SmallVector<int, 8> ShuffleMask(NumElts);
3168
3169 // Determine which operand(s) are actually in use for this instruction.
3170 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3171 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3172
3173 // If needed, replace operands based on zero mask.
3174 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3175 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3176
3177 // Permute low half of result.
3178 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3179 for (unsigned i = 0; i < HalfSize; ++i)
3180 ShuffleMask[i] = StartIndex + i;
3181
3182 // Permute high half of result.
3183 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3184 for (unsigned i = 0; i < HalfSize; ++i)
3185 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3186
3187 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3188
3189 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3190 Name.starts_with("avx512.mask.vpermil.p") ||
3191 Name.starts_with("avx512.mask.pshuf.d.")) {
3192 Value *Op0 = CI->getArgOperand(0);
3193 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3194 auto *VecTy = cast<FixedVectorType>(CI->getType());
3195 unsigned NumElts = VecTy->getNumElements();
3196 // Calculate the size of each index in the immediate.
3197 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3198 unsigned IdxMask = ((1 << IdxSize) - 1);
3199
3200 SmallVector<int, 8> Idxs(NumElts);
3201 // Lookup the bits for this element, wrapping around the immediate every
3202 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3203 // to offset by the first index of each group.
3204 for (unsigned i = 0; i != NumElts; ++i)
3205 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3206
3207 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3208
3209 if (CI->arg_size() == 4)
3210 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3211 CI->getArgOperand(2));
3212 } else if (Name == "sse2.pshufl.w" ||
3213 Name.starts_with("avx512.mask.pshufl.w.")) {
3214 Value *Op0 = CI->getArgOperand(0);
3215 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3216 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3217
3218 SmallVector<int, 16> Idxs(NumElts);
3219 for (unsigned l = 0; l != NumElts; l += 8) {
3220 for (unsigned i = 0; i != 4; ++i)
3221 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3222 for (unsigned i = 4; i != 8; ++i)
3223 Idxs[i + l] = i + l;
3224 }
3225
3226 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3227
3228 if (CI->arg_size() == 4)
3229 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3230 CI->getArgOperand(2));
3231 } else if (Name == "sse2.pshufh.w" ||
3232 Name.starts_with("avx512.mask.pshufh.w.")) {
3233 Value *Op0 = CI->getArgOperand(0);
3234 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3235 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3236
3237 SmallVector<int, 16> Idxs(NumElts);
3238 for (unsigned l = 0; l != NumElts; l += 8) {
3239 for (unsigned i = 0; i != 4; ++i)
3240 Idxs[i + l] = i + l;
3241 for (unsigned i = 0; i != 4; ++i)
3242 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3243 }
3244
3245 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3246
3247 if (CI->arg_size() == 4)
3248 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3249 CI->getArgOperand(2));
3250 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3251 Value *Op0 = CI->getArgOperand(0);
3252 Value *Op1 = CI->getArgOperand(1);
3253 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3254 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3255
3256 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3257 unsigned HalfLaneElts = NumLaneElts / 2;
3258
3259 SmallVector<int, 16> Idxs(NumElts);
3260 for (unsigned i = 0; i != NumElts; ++i) {
3261 // Base index is the starting element of the lane.
3262 Idxs[i] = i - (i % NumLaneElts);
3263 // If we are half way through the lane switch to the other source.
3264 if ((i % NumLaneElts) >= HalfLaneElts)
3265 Idxs[i] += NumElts;
3266 // Now select the specific element. By adding HalfLaneElts bits from
3267 // the immediate. Wrapping around the immediate every 8-bits.
3268 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3269 }
3270
3271 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3272
3273 Rep =
3274 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3275 } else if (Name.starts_with("avx512.mask.movddup") ||
3276 Name.starts_with("avx512.mask.movshdup") ||
3277 Name.starts_with("avx512.mask.movsldup")) {
3278 Value *Op0 = CI->getArgOperand(0);
3279 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3280 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3281
3282 unsigned Offset = 0;
3283 if (Name.starts_with("avx512.mask.movshdup."))
3284 Offset = 1;
3285
3286 SmallVector<int, 16> Idxs(NumElts);
3287 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3288 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3289 Idxs[i + l + 0] = i + l + Offset;
3290 Idxs[i + l + 1] = i + l + Offset;
3291 }
3292
3293 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3294
3295 Rep =
3296 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3297 } else if (Name.starts_with("avx512.mask.punpckl") ||
3298 Name.starts_with("avx512.mask.unpckl.")) {
3299 Value *Op0 = CI->getArgOperand(0);
3300 Value *Op1 = CI->getArgOperand(1);
3301 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3302 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3303
3304 SmallVector<int, 64> Idxs(NumElts);
3305 for (int l = 0; l != NumElts; l += NumLaneElts)
3306 for (int i = 0; i != NumLaneElts; ++i)
3307 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3308
3309 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3310
3311 Rep =
3312 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3313 } else if (Name.starts_with("avx512.mask.punpckh") ||
3314 Name.starts_with("avx512.mask.unpckh.")) {
3315 Value *Op0 = CI->getArgOperand(0);
3316 Value *Op1 = CI->getArgOperand(1);
3317 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3318 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3319
3320 SmallVector<int, 64> Idxs(NumElts);
3321 for (int l = 0; l != NumElts; l += NumLaneElts)
3322 for (int i = 0; i != NumLaneElts; ++i)
3323 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3324
3325 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3326
3327 Rep =
3328 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3329 } else if (Name.starts_with("avx512.mask.and.") ||
3330 Name.starts_with("avx512.mask.pand.")) {
3331 VectorType *FTy = cast<VectorType>(CI->getType());
3332 VectorType *ITy = VectorType::getInteger(FTy);
3333 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3334 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3335 Rep = Builder.CreateBitCast(Rep, FTy);
3336 Rep =
3337 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3338 } else if (Name.starts_with("avx512.mask.andn.") ||
3339 Name.starts_with("avx512.mask.pandn.")) {
3340 VectorType *FTy = cast<VectorType>(CI->getType());
3341 VectorType *ITy = VectorType::getInteger(FTy);
3342 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3343 Rep = Builder.CreateAnd(Rep,
3344 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3345 Rep = Builder.CreateBitCast(Rep, FTy);
3346 Rep =
3347 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3348 } else if (Name.starts_with("avx512.mask.or.") ||
3349 Name.starts_with("avx512.mask.por.")) {
3350 VectorType *FTy = cast<VectorType>(CI->getType());
3351 VectorType *ITy = VectorType::getInteger(FTy);
3352 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3353 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3354 Rep = Builder.CreateBitCast(Rep, FTy);
3355 Rep =
3356 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3357 } else if (Name.starts_with("avx512.mask.xor.") ||
3358 Name.starts_with("avx512.mask.pxor.")) {
3359 VectorType *FTy = cast<VectorType>(CI->getType());
3360 VectorType *ITy = VectorType::getInteger(FTy);
3361 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3362 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3363 Rep = Builder.CreateBitCast(Rep, FTy);
3364 Rep =
3365 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3366 } else if (Name.starts_with("avx512.mask.padd.")) {
3367 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3368 Rep =
3369 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3370 } else if (Name.starts_with("avx512.mask.psub.")) {
3371 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3372 Rep =
3373 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3374 } else if (Name.starts_with("avx512.mask.pmull.")) {
3375 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3376 Rep =
3377 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3378 } else if (Name.starts_with("avx512.mask.add.p")) {
3379 if (Name.ends_with(".512")) {
3380 Intrinsic::ID IID;
3381 if (Name[17] == 's')
3382 IID = Intrinsic::x86_avx512_add_ps_512;
3383 else
3384 IID = Intrinsic::x86_avx512_add_pd_512;
3385
3386 Rep = Builder.CreateIntrinsic(
3387 IID, {},
3388 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3389 } else {
3390 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3391 }
3392 Rep =
3393 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3394 } else if (Name.starts_with("avx512.mask.div.p")) {
3395 if (Name.ends_with(".512")) {
3396 Intrinsic::ID IID;
3397 if (Name[17] == 's')
3398 IID = Intrinsic::x86_avx512_div_ps_512;
3399 else
3400 IID = Intrinsic::x86_avx512_div_pd_512;
3401
3402 Rep = Builder.CreateIntrinsic(
3403 IID, {},
3404 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3405 } else {
3406 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3407 }
3408 Rep =
3409 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3410 } else if (Name.starts_with("avx512.mask.mul.p")) {
3411 if (Name.ends_with(".512")) {
3412 Intrinsic::ID IID;
3413 if (Name[17] == 's')
3414 IID = Intrinsic::x86_avx512_mul_ps_512;
3415 else
3416 IID = Intrinsic::x86_avx512_mul_pd_512;
3417
3418 Rep = Builder.CreateIntrinsic(
3419 IID, {},
3420 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3421 } else {
3422 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3423 }
3424 Rep =
3425 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3426 } else if (Name.starts_with("avx512.mask.sub.p")) {
3427 if (Name.ends_with(".512")) {
3428 Intrinsic::ID IID;
3429 if (Name[17] == 's')
3430 IID = Intrinsic::x86_avx512_sub_ps_512;
3431 else
3432 IID = Intrinsic::x86_avx512_sub_pd_512;
3433
3434 Rep = Builder.CreateIntrinsic(
3435 IID, {},
3436 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3437 } else {
3438 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3439 }
3440 Rep =
3441 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3442 } else if ((Name.starts_with("avx512.mask.max.p") ||
3443 Name.starts_with("avx512.mask.min.p")) &&
3444 Name.drop_front(18) == ".512") {
3445 bool IsDouble = Name[17] == 'd';
3446 bool IsMin = Name[13] == 'i';
3447 static const Intrinsic::ID MinMaxTbl[2][2] = {
3448 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3449 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3450 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3451
3452 Rep = Builder.CreateIntrinsic(
3453 IID, {},
3454 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3455 Rep =
3456 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3457 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3458 Rep =
3459 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3460 {CI->getArgOperand(0), Builder.getInt1(false)});
3461 Rep =
3462 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3463 } else if (Name.starts_with("avx512.mask.psll")) {
3464 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3465 bool IsVariable = Name[16] == 'v';
3466 char Size = Name[16] == '.' ? Name[17]
3467 : Name[17] == '.' ? Name[18]
3468 : Name[18] == '.' ? Name[19]
3469 : Name[20];
3470
3471 Intrinsic::ID IID;
3472 if (IsVariable && Name[17] != '.') {
3473 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3474 IID = Intrinsic::x86_avx2_psllv_q;
3475 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3476 IID = Intrinsic::x86_avx2_psllv_q_256;
3477 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3478 IID = Intrinsic::x86_avx2_psllv_d;
3479 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3480 IID = Intrinsic::x86_avx2_psllv_d_256;
3481 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3482 IID = Intrinsic::x86_avx512_psllv_w_128;
3483 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3484 IID = Intrinsic::x86_avx512_psllv_w_256;
3485 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3486 IID = Intrinsic::x86_avx512_psllv_w_512;
3487 else
3488 llvm_unreachable("Unexpected size");
3489 } else if (Name.ends_with(".128")) {
3490 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3491 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3492 : Intrinsic::x86_sse2_psll_d;
3493 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3494 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3495 : Intrinsic::x86_sse2_psll_q;
3496 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3497 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3498 : Intrinsic::x86_sse2_psll_w;
3499 else
3500 llvm_unreachable("Unexpected size");
3501 } else if (Name.ends_with(".256")) {
3502 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3503 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3504 : Intrinsic::x86_avx2_psll_d;
3505 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3506 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3507 : Intrinsic::x86_avx2_psll_q;
3508 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3509 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3510 : Intrinsic::x86_avx2_psll_w;
3511 else
3512 llvm_unreachable("Unexpected size");
3513 } else {
3514 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3515 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3516 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3517 : Intrinsic::x86_avx512_psll_d_512;
3518 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3519 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3520 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3521 : Intrinsic::x86_avx512_psll_q_512;
3522 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3523 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3524 : Intrinsic::x86_avx512_psll_w_512;
3525 else
3526 llvm_unreachable("Unexpected size");
3527 }
3528
3529 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3530 } else if (Name.starts_with("avx512.mask.psrl")) {
3531 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3532 bool IsVariable = Name[16] == 'v';
3533 char Size = Name[16] == '.' ? Name[17]
3534 : Name[17] == '.' ? Name[18]
3535 : Name[18] == '.' ? Name[19]
3536 : Name[20];
3537
3538 Intrinsic::ID IID;
3539 if (IsVariable && Name[17] != '.') {
3540 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3541 IID = Intrinsic::x86_avx2_psrlv_q;
3542 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3543 IID = Intrinsic::x86_avx2_psrlv_q_256;
3544 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3545 IID = Intrinsic::x86_avx2_psrlv_d;
3546 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3547 IID = Intrinsic::x86_avx2_psrlv_d_256;
3548 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3549 IID = Intrinsic::x86_avx512_psrlv_w_128;
3550 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3551 IID = Intrinsic::x86_avx512_psrlv_w_256;
3552 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3553 IID = Intrinsic::x86_avx512_psrlv_w_512;
3554 else
3555 llvm_unreachable("Unexpected size");
3556 } else if (Name.ends_with(".128")) {
3557 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3558 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3559 : Intrinsic::x86_sse2_psrl_d;
3560 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3561 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3562 : Intrinsic::x86_sse2_psrl_q;
3563 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3564 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3565 : Intrinsic::x86_sse2_psrl_w;
3566 else
3567 llvm_unreachable("Unexpected size");
3568 } else if (Name.ends_with(".256")) {
3569 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3570 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3571 : Intrinsic::x86_avx2_psrl_d;
3572 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3573 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3574 : Intrinsic::x86_avx2_psrl_q;
3575 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3576 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3577 : Intrinsic::x86_avx2_psrl_w;
3578 else
3579 llvm_unreachable("Unexpected size");
3580 } else {
3581 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3582 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3583 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3584 : Intrinsic::x86_avx512_psrl_d_512;
3585 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3586 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3587 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3588 : Intrinsic::x86_avx512_psrl_q_512;
3589 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3590 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3591 : Intrinsic::x86_avx512_psrl_w_512;
3592 else
3593 llvm_unreachable("Unexpected size");
3594 }
3595
3596 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3597 } else if (Name.starts_with("avx512.mask.psra")) {
3598 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3599 bool IsVariable = Name[16] == 'v';
3600 char Size = Name[16] == '.' ? Name[17]
3601 : Name[17] == '.' ? Name[18]
3602 : Name[18] == '.' ? Name[19]
3603 : Name[20];
3604
3605 Intrinsic::ID IID;
3606 if (IsVariable && Name[17] != '.') {
3607 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3608 IID = Intrinsic::x86_avx2_psrav_d;
3609 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3610 IID = Intrinsic::x86_avx2_psrav_d_256;
3611 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3612 IID = Intrinsic::x86_avx512_psrav_w_128;
3613 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3614 IID = Intrinsic::x86_avx512_psrav_w_256;
3615 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3616 IID = Intrinsic::x86_avx512_psrav_w_512;
3617 else
3618 llvm_unreachable("Unexpected size");
3619 } else if (Name.ends_with(".128")) {
3620 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3621 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3622 : Intrinsic::x86_sse2_psra_d;
3623 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3624 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3625 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3626 : Intrinsic::x86_avx512_psra_q_128;
3627 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3628 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3629 : Intrinsic::x86_sse2_psra_w;
3630 else
3631 llvm_unreachable("Unexpected size");
3632 } else if (Name.ends_with(".256")) {
3633 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3634 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3635 : Intrinsic::x86_avx2_psra_d;
3636 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3637 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3638 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3639 : Intrinsic::x86_avx512_psra_q_256;
3640 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3641 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3642 : Intrinsic::x86_avx2_psra_w;
3643 else
3644 llvm_unreachable("Unexpected size");
3645 } else {
3646 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3647 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3648 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3649 : Intrinsic::x86_avx512_psra_d_512;
3650 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3651 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3652 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3653 : Intrinsic::x86_avx512_psra_q_512;
3654 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3655 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3656 : Intrinsic::x86_avx512_psra_w_512;
3657 else
3658 llvm_unreachable("Unexpected size");
3659 }
3660
3661 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3662 } else if (Name.starts_with("avx512.mask.move.s")) {
3663 Rep = upgradeMaskedMove(Builder, *CI);
3664 } else if (Name.starts_with("avx512.cvtmask2")) {
3665 Rep = upgradeMaskToInt(Builder, *CI);
3666 } else if (Name.ends_with(".movntdqa")) {
3668 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3669
3670 LoadInst *LI = Builder.CreateAlignedLoad(
3671 CI->getType(), CI->getArgOperand(0),
3673 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3674 Rep = LI;
3675 } else if (Name.starts_with("fma.vfmadd.") ||
3676 Name.starts_with("fma.vfmsub.") ||
3677 Name.starts_with("fma.vfnmadd.") ||
3678 Name.starts_with("fma.vfnmsub.")) {
3679 bool NegMul = Name[6] == 'n';
3680 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3681 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3682
3683 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3684 CI->getArgOperand(2)};
3685
3686 if (IsScalar) {
3687 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3688 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3689 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3690 }
3691
3692 if (NegMul && !IsScalar)
3693 Ops[0] = Builder.CreateFNeg(Ops[0]);
3694 if (NegMul && IsScalar)
3695 Ops[1] = Builder.CreateFNeg(Ops[1]);
3696 if (NegAcc)
3697 Ops[2] = Builder.CreateFNeg(Ops[2]);
3698
3699 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3700
3701 if (IsScalar)
3702 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3703 } else if (Name.starts_with("fma4.vfmadd.s")) {
3704 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3705 CI->getArgOperand(2)};
3706
3707 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3708 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3709 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3710
3711 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3712
3714 Rep, (uint64_t)0);
3715 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3716 Name.starts_with("avx512.maskz.vfmadd.s") ||
3717 Name.starts_with("avx512.mask3.vfmadd.s") ||
3718 Name.starts_with("avx512.mask3.vfmsub.s") ||
3719 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3720 bool IsMask3 = Name[11] == '3';
3721 bool IsMaskZ = Name[11] == 'z';
3722 // Drop the "avx512.mask." to make it easier.
3723 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3724 bool NegMul = Name[2] == 'n';
3725 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3726
3727 Value *A = CI->getArgOperand(0);
3728 Value *B = CI->getArgOperand(1);
3729 Value *C = CI->getArgOperand(2);
3730
3731 if (NegMul && (IsMask3 || IsMaskZ))
3732 A = Builder.CreateFNeg(A);
3733 if (NegMul && !(IsMask3 || IsMaskZ))
3734 B = Builder.CreateFNeg(B);
3735 if (NegAcc)
3736 C = Builder.CreateFNeg(C);
3737
3738 A = Builder.CreateExtractElement(A, (uint64_t)0);
3739 B = Builder.CreateExtractElement(B, (uint64_t)0);
3740 C = Builder.CreateExtractElement(C, (uint64_t)0);
3741
3742 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3743 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3744 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3745
3746 Intrinsic::ID IID;
3747 if (Name.back() == 'd')
3748 IID = Intrinsic::x86_avx512_vfmadd_f64;
3749 else
3750 IID = Intrinsic::x86_avx512_vfmadd_f32;
3751 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3752 } else {
3753 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3754 }
3755
3756 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3757 : IsMask3 ? C
3758 : A;
3759
3760 // For Mask3 with NegAcc, we need to create a new extractelement that
3761 // avoids the negation above.
3762 if (NegAcc && IsMask3)
3763 PassThru =
3764 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3765
3766 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3767 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3768 (uint64_t)0);
3769 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3770 Name.starts_with("avx512.mask.vfnmadd.p") ||
3771 Name.starts_with("avx512.mask.vfnmsub.p") ||
3772 Name.starts_with("avx512.mask3.vfmadd.p") ||
3773 Name.starts_with("avx512.mask3.vfmsub.p") ||
3774 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3775 Name.starts_with("avx512.maskz.vfmadd.p")) {
3776 bool IsMask3 = Name[11] == '3';
3777 bool IsMaskZ = Name[11] == 'z';
3778 // Drop the "avx512.mask." to make it easier.
3779 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3780 bool NegMul = Name[2] == 'n';
3781 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3782
3783 Value *A = CI->getArgOperand(0);
3784 Value *B = CI->getArgOperand(1);
3785 Value *C = CI->getArgOperand(2);
3786
3787 if (NegMul && (IsMask3 || IsMaskZ))
3788 A = Builder.CreateFNeg(A);
3789 if (NegMul && !(IsMask3 || IsMaskZ))
3790 B = Builder.CreateFNeg(B);
3791 if (NegAcc)
3792 C = Builder.CreateFNeg(C);
3793
3794 if (CI->arg_size() == 5 &&
3795 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3796 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3797 Intrinsic::ID IID;
3798 // Check the character before ".512" in string.
3799 if (Name[Name.size() - 5] == 's')
3800 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3801 else
3802 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3803
3804 Rep = Builder.CreateIntrinsic(IID, {}, {A, B, C, CI->getArgOperand(4)});
3805 } else {
3806 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3807 }
3808
3809 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3810 : IsMask3 ? CI->getArgOperand(2)
3811 : CI->getArgOperand(0);
3812
3813 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3814 } else if (Name.starts_with("fma.vfmsubadd.p")) {
3815 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3816 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3817 Intrinsic::ID IID;
3818 if (VecWidth == 128 && EltWidth == 32)
3819 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3820 else if (VecWidth == 256 && EltWidth == 32)
3821 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3822 else if (VecWidth == 128 && EltWidth == 64)
3823 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3824 else if (VecWidth == 256 && EltWidth == 64)
3825 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3826 else
3827 llvm_unreachable("Unexpected intrinsic");
3828
3829 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3830 CI->getArgOperand(2)};
3831 Ops[2] = Builder.CreateFNeg(Ops[2]);
3832 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3833 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3834 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3835 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3836 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
3837 bool IsMask3 = Name[11] == '3';
3838 bool IsMaskZ = Name[11] == 'z';
3839 // Drop the "avx512.mask." to make it easier.
3840 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3841 bool IsSubAdd = Name[3] == 's';
3842 if (CI->arg_size() == 5) {
3843 Intrinsic::ID IID;
3844 // Check the character before ".512" in string.
3845 if (Name[Name.size() - 5] == 's')
3846 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3847 else
3848 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3849
3850 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3851 CI->getArgOperand(2), CI->getArgOperand(4)};
3852 if (IsSubAdd)
3853 Ops[2] = Builder.CreateFNeg(Ops[2]);
3854
3855 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3856 } else {
3857 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3858
3859 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3860 CI->getArgOperand(2)};
3861
3863 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
3864 Value *Odd = Builder.CreateCall(FMA, Ops);
3865 Ops[2] = Builder.CreateFNeg(Ops[2]);
3866 Value *Even = Builder.CreateCall(FMA, Ops);
3867
3868 if (IsSubAdd)
3869 std::swap(Even, Odd);
3870
3871 SmallVector<int, 32> Idxs(NumElts);
3872 for (int i = 0; i != NumElts; ++i)
3873 Idxs[i] = i + (i % 2) * NumElts;
3874
3875 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3876 }
3877
3878 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3879 : IsMask3 ? CI->getArgOperand(2)
3880 : CI->getArgOperand(0);
3881
3882 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3883 } else if (Name.starts_with("avx512.mask.pternlog.") ||
3884 Name.starts_with("avx512.maskz.pternlog.")) {
3885 bool ZeroMask = Name[11] == 'z';
3886 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3887 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3888 Intrinsic::ID IID;
3889 if (VecWidth == 128 && EltWidth == 32)
3890 IID = Intrinsic::x86_avx512_pternlog_d_128;
3891 else if (VecWidth == 256 && EltWidth == 32)
3892 IID = Intrinsic::x86_avx512_pternlog_d_256;
3893 else if (VecWidth == 512 && EltWidth == 32)
3894 IID = Intrinsic::x86_avx512_pternlog_d_512;
3895 else if (VecWidth == 128 && EltWidth == 64)
3896 IID = Intrinsic::x86_avx512_pternlog_q_128;
3897 else if (VecWidth == 256 && EltWidth == 64)
3898 IID = Intrinsic::x86_avx512_pternlog_q_256;
3899 else if (VecWidth == 512 && EltWidth == 64)
3900 IID = Intrinsic::x86_avx512_pternlog_q_512;
3901 else
3902 llvm_unreachable("Unexpected intrinsic");
3903
3904 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3905 CI->getArgOperand(2), CI->getArgOperand(3)};
3906 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3907 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3908 : CI->getArgOperand(0);
3909 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3910 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
3911 Name.starts_with("avx512.maskz.vpmadd52")) {
3912 bool ZeroMask = Name[11] == 'z';
3913 bool High = Name[20] == 'h' || Name[21] == 'h';
3914 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3915 Intrinsic::ID IID;
3916 if (VecWidth == 128 && !High)
3917 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3918 else if (VecWidth == 256 && !High)
3919 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3920 else if (VecWidth == 512 && !High)
3921 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3922 else if (VecWidth == 128 && High)
3923 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3924 else if (VecWidth == 256 && High)
3925 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3926 else if (VecWidth == 512 && High)
3927 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3928 else
3929 llvm_unreachable("Unexpected intrinsic");
3930
3931 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3932 CI->getArgOperand(2)};
3933 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3934 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3935 : CI->getArgOperand(0);
3936 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3937 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
3938 Name.starts_with("avx512.mask.vpermt2var.") ||
3939 Name.starts_with("avx512.maskz.vpermt2var.")) {
3940 bool ZeroMask = Name[11] == 'z';
3941 bool IndexForm = Name[17] == 'i';
3942 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3943 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
3944 Name.starts_with("avx512.maskz.vpdpbusd.") ||
3945 Name.starts_with("avx512.mask.vpdpbusds.") ||
3946 Name.starts_with("avx512.maskz.vpdpbusds.")) {
3947 bool ZeroMask = Name[11] == 'z';
3948 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3949 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3950 Intrinsic::ID IID;
3951 if (VecWidth == 128 && !IsSaturating)
3952 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3953 else if (VecWidth == 256 && !IsSaturating)
3954 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3955 else if (VecWidth == 512 && !IsSaturating)
3956 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3957 else if (VecWidth == 128 && IsSaturating)
3958 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3959 else if (VecWidth == 256 && IsSaturating)
3960 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3961 else if (VecWidth == 512 && IsSaturating)
3962 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3963 else
3964 llvm_unreachable("Unexpected intrinsic");
3965
3966 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3967 CI->getArgOperand(2)};
3968 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3969 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3970 : CI->getArgOperand(0);
3971 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3972 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
3973 Name.starts_with("avx512.maskz.vpdpwssd.") ||
3974 Name.starts_with("avx512.mask.vpdpwssds.") ||
3975 Name.starts_with("avx512.maskz.vpdpwssds.")) {
3976 bool ZeroMask = Name[11] == 'z';
3977 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3978 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3979 Intrinsic::ID IID;
3980 if (VecWidth == 128 && !IsSaturating)
3981 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3982 else if (VecWidth == 256 && !IsSaturating)
3983 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3984 else if (VecWidth == 512 && !IsSaturating)
3985 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3986 else if (VecWidth == 128 && IsSaturating)
3987 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3988 else if (VecWidth == 256 && IsSaturating)
3989 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3990 else if (VecWidth == 512 && IsSaturating)
3991 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3992 else
3993 llvm_unreachable("Unexpected intrinsic");
3994
3995 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3996 CI->getArgOperand(2)};
3997 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3998 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3999 : CI->getArgOperand(0);
4000 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4001 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4002 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4003 Name == "subborrow.u32" || Name == "subborrow.u64") {
4004 Intrinsic::ID IID;
4005 if (Name[0] == 'a' && Name.back() == '2')
4006 IID = Intrinsic::x86_addcarry_32;
4007 else if (Name[0] == 'a' && Name.back() == '4')
4008 IID = Intrinsic::x86_addcarry_64;
4009 else if (Name[0] == 's' && Name.back() == '2')
4010 IID = Intrinsic::x86_subborrow_32;
4011 else if (Name[0] == 's' && Name.back() == '4')
4012 IID = Intrinsic::x86_subborrow_64;
4013 else
4014 llvm_unreachable("Unexpected intrinsic");
4015
4016 // Make a call with 3 operands.
4017 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4018 CI->getArgOperand(2)};
4019 Value *NewCall = Builder.CreateIntrinsic(IID, {}, Args);
4020
4021 // Extract the second result and store it.
4022 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4023 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4024 // Replace the original call result with the first result of the new call.
4025 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4026
4027 CI->replaceAllUsesWith(CF);
4028 Rep = nullptr;
4029 } else if (Name.starts_with("avx512.mask.") &&
4030 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4031 // Rep will be updated by the call in the condition.
4032 }
4033
4034 return Rep;
4035}
4036
4038 Function *F, IRBuilder<> &Builder) {
4039 if (Name.starts_with("neon.bfcvt")) {
4040 if (Name.starts_with("neon.bfcvtn2")) {
4041 SmallVector<int, 32> LoMask(4);
4042 std::iota(LoMask.begin(), LoMask.end(), 0);
4043 SmallVector<int, 32> ConcatMask(8);
4044 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4045 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4046 Value *Trunc =
4047 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4048 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4049 } else if (Name.starts_with("neon.bfcvtn")) {
4050 SmallVector<int, 32> ConcatMask(8);
4051 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4052 Type *V4BF16 =
4053 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4054 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4055 dbgs() << "Trunc: " << *Trunc << "\n";
4056 return Builder.CreateShuffleVector(
4057 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4058 } else {
4059 return Builder.CreateFPTrunc(CI->getOperand(0),
4060 Type::getBFloatTy(F->getContext()));
4061 }
4062 } else if (Name.starts_with("sve.fcvt")) {
4063 Intrinsic::ID NewID =
4065 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4066 .Case("sve.fcvtnt.bf16f32",
4067 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4069 if (NewID == Intrinsic::not_intrinsic)
4070 llvm_unreachable("Unhandled Intrinsic!");
4071
4072 SmallVector<Value *, 3> Args(CI->args());
4073
4074 // The original intrinsics incorrectly used a predicate based on the
4075 // smallest element type rather than the largest.
4076 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4077 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4078
4079 if (Args[1]->getType() != BadPredTy)
4080 llvm_unreachable("Unexpected predicate type!");
4081
4082 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4083 BadPredTy, Args[1]);
4084 Args[1] = Builder.CreateIntrinsic(
4085 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4086
4087 return Builder.CreateIntrinsic(NewID, {}, Args, /*FMFSource=*/nullptr,
4088 CI->getName());
4089 }
4090
4091 llvm_unreachable("Unhandled Intrinsic!");
4092}
4093
4095 IRBuilder<> &Builder) {
4096 if (Name == "mve.vctp64.old") {
4097 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4098 // correct type.
4099 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4100 CI->getArgOperand(0),
4101 /*FMFSource=*/nullptr, CI->getName());
4102 Value *C1 = Builder.CreateIntrinsic(
4103 Intrinsic::arm_mve_pred_v2i,
4104 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4105 return Builder.CreateIntrinsic(
4106 Intrinsic::arm_mve_pred_i2v,
4107 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4108 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4109 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4110 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4111 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4112 Name ==
4113 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4114 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4115 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4116 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4117 Name ==
4118 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4119 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4120 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4121 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4122 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4123 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4124 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4125 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4126 std::vector<Type *> Tys;
4127 unsigned ID = CI->getIntrinsicID();
4128 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4129 switch (ID) {
4130 case Intrinsic::arm_mve_mull_int_predicated:
4131 case Intrinsic::arm_mve_vqdmull_predicated:
4132 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4133 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4134 break;
4135 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4136 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4137 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4138 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4139 V2I1Ty};
4140 break;
4141 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4142 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4143 CI->getOperand(1)->getType(), V2I1Ty};
4144 break;
4145 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4146 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4147 CI->getOperand(2)->getType(), V2I1Ty};
4148 break;
4149 case Intrinsic::arm_cde_vcx1q_predicated:
4150 case Intrinsic::arm_cde_vcx1qa_predicated:
4151 case Intrinsic::arm_cde_vcx2q_predicated:
4152 case Intrinsic::arm_cde_vcx2qa_predicated:
4153 case Intrinsic::arm_cde_vcx3q_predicated:
4154 case Intrinsic::arm_cde_vcx3qa_predicated:
4155 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4156 break;
4157 default:
4158 llvm_unreachable("Unhandled Intrinsic!");
4159 }
4160
4161 std::vector<Value *> Ops;
4162 for (Value *Op : CI->args()) {
4163 Type *Ty = Op->getType();
4164 if (Ty->getScalarSizeInBits() == 1) {
4165 Value *C1 = Builder.CreateIntrinsic(
4166 Intrinsic::arm_mve_pred_v2i,
4167 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4168 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4169 }
4170 Ops.push_back(Op);
4171 }
4172
4173 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4174 CI->getName());
4175 }
4176 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4177}
4178
4179// These are expected to have the arguments:
4180// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4181//
4182// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4183//
4185 Function *F, IRBuilder<> &Builder) {
4186 AtomicRMWInst::BinOp RMWOp =
4188 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4189 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4190 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4191 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4192 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4193 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4194 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4195 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4196 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4197 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4198 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4199
4200 unsigned NumOperands = CI->getNumOperands();
4201 if (NumOperands < 3) // Malformed bitcode.
4202 return nullptr;
4203
4204 Value *Ptr = CI->getArgOperand(0);
4205 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4206 if (!PtrTy) // Malformed.
4207 return nullptr;
4208
4209 Value *Val = CI->getArgOperand(1);
4210 if (Val->getType() != CI->getType()) // Malformed.
4211 return nullptr;
4212
4213 ConstantInt *OrderArg = nullptr;
4214 bool IsVolatile = false;
4215
4216 // These should have 5 arguments (plus the callee). A separate version of the
4217 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4218 if (NumOperands > 3)
4219 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4220
4221 // Ignore scope argument at 3
4222
4223 if (NumOperands > 5) {
4224 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4225 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4226 }
4227
4228 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4229 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4230 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4231 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4232 Order = AtomicOrdering::SequentiallyConsistent;
4233
4234 LLVMContext &Ctx = F->getContext();
4235
4236 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4237 Type *RetTy = CI->getType();
4238 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4239 if (VT->getElementType()->isIntegerTy(16)) {
4240 VectorType *AsBF16 =
4241 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4242 Val = Builder.CreateBitCast(Val, AsBF16);
4243 }
4244 }
4245
4246 // The scope argument never really worked correctly. Use agent as the most
4247 // conservative option which should still always produce the instruction.
4248 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4249 AtomicRMWInst *RMW =
4250 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4251
4252 unsigned AddrSpace = PtrTy->getAddressSpace();
4253 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4254 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4255 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4256 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4257 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4258 }
4259
4260 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4261 MDBuilder MDB(F->getContext());
4262 MDNode *RangeNotPrivate =
4265 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4266 }
4267
4268 if (IsVolatile)
4269 RMW->setVolatile(true);
4270
4271 return Builder.CreateBitCast(RMW, RetTy);
4272}
4273
4274/// Helper to unwrap intrinsic call MetadataAsValue operands.
4275template <typename MDType>
4276static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
4277 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4278 return dyn_cast<MDType>(MAV->getMetadata());
4279 return nullptr;
4280}
4281
4282/// Convert debug intrinsic calls to non-instruction debug records.
4283/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4284/// \p CI - The debug intrinsic call.
4286 DbgRecord *DR = nullptr;
4287 if (Name == "label") {
4288 DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
4289 } else if (Name == "assign") {
4290 DR = new DbgVariableRecord(
4291 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4292 unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
4293 unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
4294 CI->getDebugLoc());
4295 } else if (Name == "declare") {
4296 DR = new DbgVariableRecord(
4297 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4298 unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
4299 DbgVariableRecord::LocationType::Declare);
4300 } else if (Name == "addr") {
4301 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4302 DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
4303 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4304 DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
4305 unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
4306 CI->getDebugLoc());
4307 } else if (Name == "value") {
4308 // An old version of dbg.value had an extra offset argument.
4309 unsigned VarOp = 1;
4310 unsigned ExprOp = 2;
4311 if (CI->arg_size() == 4) {
4312 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4313 // Nonzero offset dbg.values get dropped without a replacement.
4314 if (!Offset || !Offset->isZeroValue())
4315 return;
4316 VarOp = 2;
4317 ExprOp = 3;
4318 }
4319 DR = new DbgVariableRecord(
4320 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
4321 unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
4322 }
4323 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4324 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4325}
4326
4327/// Upgrade a call to an old intrinsic. All argument and return casting must be
4328/// provided to seamlessly integrate with existing context.
4330 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4331 // checks the callee's function type matches. It's likely we need to handle
4332 // type changes here.
4333 Function *F = dyn_cast<Function>(CI->getCalledOperand());
4334 if (!F)
4335 return;
4336
4337 LLVMContext &C = CI->getContext();
4338 IRBuilder<> Builder(C);
4339 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4340
4341 if (!NewFn) {
4342 bool FallthroughToDefaultUpgrade = false;
4343 // Get the Function's name.
4344 StringRef Name = F->getName();
4345
4346 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4347 Name = Name.substr(5);
4348
4349 bool IsX86 = Name.consume_front("x86.");
4350 bool IsNVVM = Name.consume_front("nvvm.");
4351 bool IsAArch64 = Name.consume_front("aarch64.");
4352 bool IsARM = Name.consume_front("arm.");
4353 bool IsAMDGCN = Name.consume_front("amdgcn.");
4354 bool IsDbg = Name.consume_front("dbg.");
4355 Value *Rep = nullptr;
4356
4357 if (!IsX86 && Name == "stackprotectorcheck") {
4358 Rep = nullptr;
4359 } else if (IsNVVM) {
4360 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4361 } else if (IsX86) {
4362 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4363 } else if (IsAArch64) {
4364 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4365 } else if (IsARM) {
4366 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4367 } else if (IsAMDGCN) {
4368 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4369 } else if (IsDbg) {
4370 // We might have decided we don't want the new format after all between
4371 // first requesting the upgrade and now; skip the conversion if that is
4372 // the case, and check here to see if the intrinsic needs to be upgraded
4373 // normally.
4374 if (!CI->getModule()->IsNewDbgInfoFormat) {
4375 bool NeedsUpgrade =
4376 upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);
4377 if (!NeedsUpgrade)
4378 return;
4379 FallthroughToDefaultUpgrade = true;
4380 } else {
4382 }
4383 } else {
4384 llvm_unreachable("Unknown function for CallBase upgrade.");
4385 }
4386
4387 if (!FallthroughToDefaultUpgrade) {
4388 if (Rep)
4389 CI->replaceAllUsesWith(Rep);
4390 CI->eraseFromParent();
4391 return;
4392 }
4393 }
4394
4395 const auto &DefaultCase = [&]() -> void {
4396 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4397 // Handle generic mangling change.
4398 assert(
4399 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4400 "Unknown function for CallBase upgrade and isn't just a name change");
4401 CI->setCalledFunction(NewFn);
4402 return;
4403 }
4404
4405 // This must be an upgrade from a named to a literal struct.
4406 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4407 assert(OldST != NewFn->getReturnType() &&
4408 "Return type must have changed");
4409 assert(OldST->getNumElements() ==
4410 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4411 "Must have same number of elements");
4412
4413 SmallVector<Value *> Args(CI->args());
4414 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4415 NewCI->setAttributes(CI->getAttributes());
4416 Value *Res = PoisonValue::get(OldST);
4417 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4418 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4419 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4420 }
4421 CI->replaceAllUsesWith(Res);
4422 CI->eraseFromParent();
4423 return;
4424 }
4425
4426 // We're probably about to produce something invalid. Let the verifier catch
4427 // it instead of dying here.
4428 CI->setCalledOperand(
4430 return;
4431 };
4432 CallInst *NewCall = nullptr;
4433 switch (NewFn->getIntrinsicID()) {
4434 default: {
4435 DefaultCase();
4436 return;
4437 }
4438 case Intrinsic::arm_neon_vst1:
4439 case Intrinsic::arm_neon_vst2:
4440 case Intrinsic::arm_neon_vst3:
4441 case Intrinsic::arm_neon_vst4:
4442 case Intrinsic::arm_neon_vst2lane:
4443 case Intrinsic::arm_neon_vst3lane:
4444 case Intrinsic::arm_neon_vst4lane: {
4445 SmallVector<Value *, 4> Args(CI->args());
4446 NewCall = Builder.CreateCall(NewFn, Args);
4447 break;
4448 }
4449 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4450 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4451 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4452 LLVMContext &Ctx = F->getParent()->getContext();
4453 SmallVector<Value *, 4> Args(CI->args());
4454 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4455 cast<ConstantInt>(Args[3])->getZExtValue());
4456 NewCall = Builder.CreateCall(NewFn, Args);
4457 break;
4458 }
4459 case Intrinsic::aarch64_sve_ld3_sret:
4460 case Intrinsic::aarch64_sve_ld4_sret:
4461 case Intrinsic::aarch64_sve_ld2_sret: {
4462 StringRef Name = F->getName();
4463 Name = Name.substr(5);
4464 unsigned N = StringSwitch<unsigned>(Name)
4465 .StartsWith("aarch64.sve.ld2", 2)
4466 .StartsWith("aarch64.sve.ld3", 3)
4467 .StartsWith("aarch64.sve.ld4", 4)
4468 .Default(0);
4469 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4470 unsigned MinElts = RetTy->getMinNumElements() / N;
4471 SmallVector<Value *, 2> Args(CI->args());
4472 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4474 for (unsigned I = 0; I < N; I++) {
4475 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4476 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4477 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4478 }
4479 NewCall = dyn_cast<CallInst>(Ret);
4480 break;
4481 }
4482
4483 case Intrinsic::coro_end: {
4484 SmallVector<Value *, 3> Args(CI->args());
4485 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4486 NewCall = Builder.CreateCall(NewFn, Args);
4487 break;
4488 }
4489
4490 case Intrinsic::vector_extract: {
4491 StringRef Name = F->getName();
4492 Name = Name.substr(5); // Strip llvm
4493 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4494 DefaultCase();
4495 return;
4496 }
4497 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4498 unsigned MinElts = RetTy->getMinNumElements();
4499 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4500 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4501 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4502 break;
4503 }
4504
4505 case Intrinsic::vector_insert: {
4506 StringRef Name = F->getName();
4507 Name = Name.substr(5);
4508 if (!Name.starts_with("aarch64.sve.tuple")) {
4509 DefaultCase();
4510 return;
4511 }
4512 if (Name.starts_with("aarch64.sve.tuple.set")) {
4513 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4514 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4515 Value *NewIdx =
4516 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4517 NewCall = Builder.CreateCall(
4518 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4519 break;
4520 }
4521 if (Name.starts_with("aarch64.sve.tuple.create")) {
4522 unsigned N = StringSwitch<unsigned>(Name)
4523 .StartsWith("aarch64.sve.tuple.create2", 2)
4524 .StartsWith("aarch64.sve.tuple.create3", 3)
4525 .StartsWith("aarch64.sve.tuple.create4", 4)
4526 .Default(0);
4527 assert(N > 1 && "Create is expected to be between 2-4");
4528 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4530 unsigned MinElts = RetTy->getMinNumElements() / N;
4531 for (unsigned I = 0; I < N; I++) {
4532 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4533 Value *V = CI->getArgOperand(I);
4534 Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4535 }
4536 NewCall = dyn_cast<CallInst>(Ret);
4537 }
4538 break;
4539 }
4540
4541 case Intrinsic::arm_neon_bfdot:
4542 case Intrinsic::arm_neon_bfmmla:
4543 case Intrinsic::arm_neon_bfmlalb:
4544 case Intrinsic::arm_neon_bfmlalt:
4545 case Intrinsic::aarch64_neon_bfdot:
4546 case Intrinsic::aarch64_neon_bfmmla:
4547 case Intrinsic::aarch64_neon_bfmlalb:
4548 case Intrinsic::aarch64_neon_bfmlalt: {
4550 assert(CI->arg_size() == 3 &&
4551 "Mismatch between function args and call args");
4552 size_t OperandWidth =
4554 assert((OperandWidth == 64 || OperandWidth == 128) &&
4555 "Unexpected operand width");
4556 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4557 auto Iter = CI->args().begin();
4558 Args.push_back(*Iter++);
4559 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4560 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4561 NewCall = Builder.CreateCall(NewFn, Args);
4562 break;
4563 }
4564
4565 case Intrinsic::bitreverse:
4566 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4567 break;
4568
4569 case Intrinsic::ctlz:
4570 case Intrinsic::cttz:
4571 assert(CI->arg_size() == 1 &&
4572 "Mismatch between function args and call args");
4573 NewCall =
4574 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4575 break;
4576
4577 case Intrinsic::objectsize: {
4578 Value *NullIsUnknownSize =
4579 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4580 Value *Dynamic =
4581 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4582 NewCall = Builder.CreateCall(
4583 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4584 break;
4585 }
4586
4587 case Intrinsic::ctpop:
4588 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4589 break;
4590
4591 case Intrinsic::convert_from_fp16:
4592 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4593 break;
4594
4595 case Intrinsic::dbg_value: {
4596 StringRef Name = F->getName();
4597 Name = Name.substr(5); // Strip llvm.
4598 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4599 if (Name.starts_with("dbg.addr")) {
4600 DIExpression *Expr = cast<DIExpression>(
4601 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4602 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4603 NewCall =
4604 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4605 MetadataAsValue::get(C, Expr)});
4606 break;
4607 }
4608
4609 // Upgrade from the old version that had an extra offset argument.
4610 assert(CI->arg_size() == 4);
4611 // Drop nonzero offsets instead of attempting to upgrade them.
4612 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4613 if (Offset->isZeroValue()) {
4614 NewCall = Builder.CreateCall(
4615 NewFn,
4616 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4617 break;
4618 }
4619 CI->eraseFromParent();
4620 return;
4621 }
4622
4623 case Intrinsic::ptr_annotation:
4624 // Upgrade from versions that lacked the annotation attribute argument.
4625 if (CI->arg_size() != 4) {
4626 DefaultCase();
4627 return;
4628 }
4629
4630 // Create a new call with an added null annotation attribute argument.
4631 NewCall =
4632 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4633 CI->getArgOperand(2), CI->getArgOperand(3),
4634 Constant::getNullValue(Builder.getPtrTy())});
4635 NewCall->takeName(CI);
4636 CI->replaceAllUsesWith(NewCall);
4637 CI->eraseFromParent();
4638 return;
4639
4640 case Intrinsic::var_annotation:
4641 // Upgrade from versions that lacked the annotation attribute argument.
4642 if (CI->arg_size() != 4) {
4643 DefaultCase();
4644 return;
4645 }
4646 // Create a new call with an added null annotation attribute argument.
4647 NewCall =
4648 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4649 CI->getArgOperand(2), CI->getArgOperand(3),
4650 Constant::getNullValue(Builder.getPtrTy())});
4651 NewCall->takeName(CI);
4652 CI->replaceAllUsesWith(NewCall);
4653 CI->eraseFromParent();
4654 return;
4655
4656 case Intrinsic::riscv_aes32dsi:
4657 case Intrinsic::riscv_aes32dsmi:
4658 case Intrinsic::riscv_aes32esi:
4659 case Intrinsic::riscv_aes32esmi:
4660 case Intrinsic::riscv_sm4ks:
4661 case Intrinsic::riscv_sm4ed: {
4662 // The last argument to these intrinsics used to be i8 and changed to i32.
4663 // The type overload for sm4ks and sm4ed was removed.
4664 Value *Arg2 = CI->getArgOperand(2);
4665 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4666 return;
4667
4668 Value *Arg0 = CI->getArgOperand(0);
4669 Value *Arg1 = CI->getArgOperand(1);
4670 if (CI->getType()->isIntegerTy(64)) {
4671 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4672 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4673 }
4674
4675 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4676 cast<ConstantInt>(Arg2)->getZExtValue());
4677
4678 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4679 Value *Res = NewCall;
4680 if (Res->getType() != CI->getType())
4681 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4682 NewCall->takeName(CI);
4683 CI->replaceAllUsesWith(Res);
4684 CI->eraseFromParent();
4685 return;
4686 }
4687 case Intrinsic::riscv_sha256sig0:
4688 case Intrinsic::riscv_sha256sig1:
4689 case Intrinsic::riscv_sha256sum0:
4690 case Intrinsic::riscv_sha256sum1:
4691 case Intrinsic::riscv_sm3p0:
4692 case Intrinsic::riscv_sm3p1: {
4693 // The last argument to these intrinsics used to be i8 and changed to i32.
4694 // The type overload for sm4ks and sm4ed was removed.
4695 if (!CI->getType()->isIntegerTy(64))
4696 return;
4697
4698 Value *Arg =
4699 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4700
4701 NewCall = Builder.CreateCall(NewFn, Arg);
4702 Value *Res =
4703 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4704 NewCall->takeName(CI);
4705 CI->replaceAllUsesWith(Res);
4706 CI->eraseFromParent();
4707 return;
4708 }
4709
4710 case Intrinsic::x86_xop_vfrcz_ss:
4711 case Intrinsic::x86_xop_vfrcz_sd:
4712 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4713 break;
4714
4715 case Intrinsic::x86_xop_vpermil2pd:
4716 case Intrinsic::x86_xop_vpermil2ps:
4717 case Intrinsic::x86_xop_vpermil2pd_256:
4718 case Intrinsic::x86_xop_vpermil2ps_256: {
4719 SmallVector<Value *, 4> Args(CI->args());
4720 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4721 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4722 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4723 NewCall = Builder.CreateCall(NewFn, Args);
4724 break;
4725 }
4726
4727 case Intrinsic::x86_sse41_ptestc:
4728 case Intrinsic::x86_sse41_ptestz:
4729 case Intrinsic::x86_sse41_ptestnzc: {
4730 // The arguments for these intrinsics used to be v4f32, and changed
4731 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4732 // So, the only thing required is a bitcast for both arguments.
4733 // First, check the arguments have the old type.
4734 Value *Arg0 = CI->getArgOperand(0);
4735 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4736 return;
4737
4738 // Old intrinsic, add bitcasts
4739 Value *Arg1 = CI->getArgOperand(1);
4740
4741 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4742
4743 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4744 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4745
4746 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4747 break;
4748 }
4749
4750 case Intrinsic::x86_rdtscp: {
4751 // This used to take 1 arguments. If we have no arguments, it is already
4752 // upgraded.
4753 if (CI->getNumOperands() == 0)
4754 return;
4755
4756 NewCall = Builder.CreateCall(NewFn);
4757 // Extract the second result and store it.
4758 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4759 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
4760 // Replace the original call result with the first result of the new call.
4761 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4762
4763 NewCall->takeName(CI);
4764 CI->replaceAllUsesWith(TSC);
4765 CI->eraseFromParent();
4766 return;
4767 }
4768
4769 case Intrinsic::x86_sse41_insertps:
4770 case Intrinsic::x86_sse41_dppd:
4771 case Intrinsic::x86_sse41_dpps:
4772 case Intrinsic::x86_sse41_mpsadbw:
4773 case Intrinsic::x86_avx_dp_ps_256:
4774 case Intrinsic::x86_avx2_mpsadbw: {
4775 // Need to truncate the last argument from i32 to i8 -- this argument models
4776 // an inherently 8-bit immediate operand to these x86 instructions.
4777 SmallVector<Value *, 4> Args(CI->args());
4778
4779 // Replace the last argument with a trunc.
4780 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4781 NewCall = Builder.CreateCall(NewFn, Args);
4782 break;
4783 }
4784
4785 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4786 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4787 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4788 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4789 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4790 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4791 SmallVector<Value *, 4> Args(CI->args());
4792 unsigned NumElts =
4793 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4794 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4795
4796 NewCall = Builder.CreateCall(NewFn, Args);
4797 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4798
4799 NewCall->takeName(CI);
4800 CI->replaceAllUsesWith(Res);
4801 CI->eraseFromParent();
4802 return;
4803 }
4804
4805 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4806 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4807 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4808 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4809 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4810 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4811 SmallVector<Value *, 4> Args(CI->args());
4812 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4813 if (NewFn->getIntrinsicID() ==
4814 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4815 Args[1] = Builder.CreateBitCast(
4816 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4817
4818 NewCall = Builder.CreateCall(NewFn, Args);
4819 Value *Res = Builder.CreateBitCast(
4820 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4821
4822 NewCall->takeName(CI);
4823 CI->replaceAllUsesWith(Res);
4824 CI->eraseFromParent();
4825 return;
4826 }
4827 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4828 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4829 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4830 SmallVector<Value *, 4> Args(CI->args());
4831 unsigned NumElts =
4832 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4833 Args[1] = Builder.CreateBitCast(
4834 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4835 Args[2] = Builder.CreateBitCast(
4836 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4837
4838 NewCall = Builder.CreateCall(NewFn, Args);
4839 break;
4840 }
4841
4842 case Intrinsic::thread_pointer: {
4843 NewCall = Builder.CreateCall(NewFn, {});
4844 break;
4845 }
4846
4847 case Intrinsic::memcpy:
4848 case Intrinsic::memmove:
4849 case Intrinsic::memset: {
4850 // We have to make sure that the call signature is what we're expecting.
4851 // We only want to change the old signatures by removing the alignment arg:
4852 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4853 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4854 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4855 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4856 // Note: i8*'s in the above can be any pointer type
4857 if (CI->arg_size() != 5) {
4858 DefaultCase();
4859 return;
4860 }
4861 // Remove alignment argument (3), and add alignment attributes to the
4862 // dest/src pointers.
4863 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4864 CI->getArgOperand(2), CI->getArgOperand(4)};
4865 NewCall = Builder.CreateCall(NewFn, Args);
4866 AttributeList OldAttrs = CI->getAttributes();
4868 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4869 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4870 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4871 NewCall->setAttributes(NewAttrs);
4872 auto *MemCI = cast<MemIntrinsic>(NewCall);
4873 // All mem intrinsics support dest alignment.
4874 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4875 MemCI->setDestAlignment(Align->getMaybeAlignValue());
4876 // Memcpy/Memmove also support source alignment.
4877 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4878 MTI->setSourceAlignment(Align->getMaybeAlignValue());
4879 break;
4880 }
4881 }
4882 assert(NewCall && "Should have either set this variable or returned through "
4883 "the default case");
4884 NewCall->takeName(CI);
4885 CI->replaceAllUsesWith(NewCall);
4886 CI->eraseFromParent();
4887}
4888
4890 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4891
4892 // Check if this function should be upgraded and get the replacement function
4893 // if there is one.
4894 Function *NewFn;
4895 if (UpgradeIntrinsicFunction(F, NewFn)) {
4896 // Replace all users of the old function with the new function or new
4897 // instructions. This is not a range loop because the call is deleted.
4898 for (User *U : make_early_inc_range(F->users()))
4899 if (CallBase *CB = dyn_cast<CallBase>(U))
4900 UpgradeIntrinsicCall(CB, NewFn);
4901
4902 // Remove old function, no longer used, from the module.
4903 F->eraseFromParent();
4904 }
4905}
4906
4908 const unsigned NumOperands = MD.getNumOperands();
4909 if (NumOperands == 0)
4910 return &MD; // Invalid, punt to a verifier error.
4911
4912 // Check if the tag uses struct-path aware TBAA format.
4913 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4914 return &MD;
4915
4916 auto &Context = MD.getContext();
4917 if (NumOperands == 3) {
4918 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4919 MDNode *ScalarType = MDNode::get(Context, Elts);
4920 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4921 Metadata *Elts2[] = {ScalarType, ScalarType,
4924 MD.getOperand(2)};
4925 return MDNode::get(Context, Elts2);
4926 }
4927 // Create a MDNode <MD, MD, offset 0>
4929 Type::getInt64Ty(Context)))};
4930 return MDNode::get(Context, Elts);
4931}
4932
4934 Instruction *&Temp) {
4935 if (Opc != Instruction::BitCast)
4936 return nullptr;
4937
4938 Temp = nullptr;
4939 Type *SrcTy = V->getType();
4940 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4941 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4942 LLVMContext &Context = V->getContext();
4943
4944 // We have no information about target data layout, so we assume that
4945 // the maximum pointer size is 64bit.
4946 Type *MidTy = Type::getInt64Ty(Context);
4947 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4948
4949 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4950 }
4951
4952 return nullptr;
4953}
4954
4956 if (Opc != Instruction::BitCast)
4957 return nullptr;
4958
4959 Type *SrcTy = C->getType();
4960 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4961 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4962 LLVMContext &Context = C->getContext();
4963
4964 // We have no information about target data layout, so we assume that
4965 // the maximum pointer size is 64bit.
4966 Type *MidTy = Type::getInt64Ty(Context);
4967
4969 DestTy);
4970 }
4971
4972 return nullptr;
4973}
4974
4975/// Check the debug info version number, if it is out-dated, drop the debug
4976/// info. Return true if module is modified.
4979 return false;
4980
4981 // We need to get metadata before the module is verified (i.e., getModuleFlag
4982 // makes assumptions that we haven't verified yet). Carefully extract the flag
4983 // from the metadata.
4984 unsigned Version = 0;
4985 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
4986 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
4987 if (Flag->getNumOperands() < 3)
4988 return false;
4989 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
4990 return K->getString() == "Debug Info Version";
4991 return false;
4992 });
4993 if (OpIt != ModFlags->op_end()) {
4994 const MDOperand &ValOp = (*OpIt)->getOperand(2);
4995 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
4996 Version = CI->getZExtValue();
4997 }
4998 }
4999
5000 if (Version == DEBUG_METADATA_VERSION) {
5001 bool BrokenDebugInfo = false;
5002 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5003 report_fatal_error("Broken module found, compilation aborted!");
5004 if (!BrokenDebugInfo)
5005 // Everything is ok.
5006 return false;
5007 else {
5008 // Diagnose malformed debug info.
5010 M.getContext().diagnose(Diag);
5011 }
5012 }
5013 bool Modified = StripDebugInfo(M);
5014 if (Modified && Version != DEBUG_METADATA_VERSION) {
5015 // Diagnose a version mismatch.
5016 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5017 M.getContext().diagnose(DiagVersion);
5018 }
5019 return Modified;
5020}
5021
5022/// This checks for objc retain release marker which should be upgraded. It
5023/// returns true if module is modified.
5025 bool Changed = false;
5026 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5027 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5028 if (ModRetainReleaseMarker) {
5029 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5030 if (Op) {
5031 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5032 if (ID) {
5033 SmallVector<StringRef, 4> ValueComp;
5034 ID->getString().split(ValueComp, "#");
5035 if (ValueComp.size() == 2) {
5036 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5037 ID = MDString::get(M.getContext(), NewValue);
5038 }
5039 M.addModuleFlag(Module::Error, MarkerKey, ID);
5040 M.eraseNamedMetadata(ModRetainReleaseMarker);
5041 Changed = true;
5042 }
5043 }
5044 }
5045 return Changed;
5046}
5047
5049 // This lambda converts normal function calls to ARC runtime functions to
5050 // intrinsic calls.
5051 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5052 llvm::Intrinsic::ID IntrinsicFunc) {
5053 Function *Fn = M.getFunction(OldFunc);
5054
5055 if (!Fn)
5056 return;
5057
5058 Function *NewFn =
5059 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5060
5061 for (User *U : make_early_inc_range(Fn->users())) {
5062 CallInst *CI = dyn_cast<CallInst>(U);
5063 if (!CI || CI->getCalledFunction() != Fn)
5064 continue;
5065
5066 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5067 FunctionType *NewFuncTy = NewFn->getFunctionType();
5069
5070 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5071 // value to the return type of the old function.
5072 if (NewFuncTy->getReturnType() != CI->getType() &&
5073 !CastInst::castIsValid(Instruction::BitCast, CI,
5074 NewFuncTy->getReturnType()))
5075 continue;
5076
5077 bool InvalidCast = false;
5078
5079 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5080 Value *Arg = CI->getArgOperand(I);
5081
5082 // Bitcast argument to the parameter type of the new function if it's
5083 // not a variadic argument.
5084 if (I < NewFuncTy->getNumParams()) {
5085 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5086 // to the parameter type of the new function.
5087 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5088 NewFuncTy->getParamType(I))) {
5089 InvalidCast = true;
5090 break;
5091 }
5092 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5093 }
5094 Args.push_back(Arg);
5095 }
5096
5097 if (InvalidCast)
5098 continue;
5099
5100 // Create a call instruction that calls the new function.
5101 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5102 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5103 NewCall->takeName(CI);
5104
5105 // Bitcast the return value back to the type of the old call.
5106 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5107
5108 if (!CI->use_empty())
5109 CI->replaceAllUsesWith(NewRetVal);
5110 CI->eraseFromParent();
5111 }
5112
5113 if (Fn->use_empty())
5114 Fn->eraseFromParent();
5115 };
5116
5117 // Unconditionally convert a call to "clang.arc.use" to a call to
5118 // "llvm.objc.clang.arc.use".
5119 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5120
5121 // Upgrade the retain release marker. If there is no need to upgrade
5122 // the marker, that means either the module is already new enough to contain
5123 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5125 return;
5126
5127 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5128 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5129 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5130 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5131 {"objc_autoreleaseReturnValue",
5132 llvm::Intrinsic::objc_autoreleaseReturnValue},
5133 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5134 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5135 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5136 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5137 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5138 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5139 {"objc_release", llvm::Intrinsic::objc_release},
5140 {"objc_retain", llvm::Intrinsic::objc_retain},
5141 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5142 {"objc_retainAutoreleaseReturnValue",
5143 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5144 {"objc_retainAutoreleasedReturnValue",
5145 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5146 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5147 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5148 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5149 {"objc_unsafeClaimAutoreleasedReturnValue",
5150 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5151 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5152 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5153 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5154 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5155 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5156 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5157 {"objc_arc_annotation_topdown_bbstart",
5158 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5159 {"objc_arc_annotation_topdown_bbend",
5160 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5161 {"objc_arc_annotation_bottomup_bbstart",
5162 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5163 {"objc_arc_annotation_bottomup_bbend",
5164 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5165
5166 for (auto &I : RuntimeFuncs)
5167 UpgradeToIntrinsic(I.first, I.second);
5168}
5169
5171 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5172 if (!ModFlags)
5173 return false;
5174
5175 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5176 bool HasSwiftVersionFlag = false;
5177 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5178 uint32_t SwiftABIVersion;
5179 auto Int8Ty = Type::getInt8Ty(M.getContext());
5180 auto Int32Ty = Type::getInt32Ty(M.getContext());
5181
5182 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5183 MDNode *Op = ModFlags->getOperand(I);
5184 if (Op->getNumOperands() != 3)
5185 continue;
5186 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5187 if (!ID)
5188 continue;
5189 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5190 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5191 Type::getInt32Ty(M.getContext()), B)),
5192 MDString::get(M.getContext(), ID->getString()),
5193 Op->getOperand(2)};
5194 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5195 Changed = true;
5196 };
5197
5198 if (ID->getString() == "Objective-C Image Info Version")
5199 HasObjCFlag = true;
5200 if (ID->getString() == "Objective-C Class Properties")
5201 HasClassProperties = true;
5202 // Upgrade PIC from Error/Max to Min.
5203 if (ID->getString() == "PIC Level") {
5204 if (auto *Behavior =
5205 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5206 uint64_t V = Behavior->getLimitedValue();
5207 if (V == Module::Error || V == Module::Max)
5208 SetBehavior(Module::Min);
5209 }
5210 }
5211 // Upgrade "PIE Level" from Error to Max.
5212 if (ID->getString() == "PIE Level")
5213 if (auto *Behavior =
5214 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5215 if (Behavior->getLimitedValue() == Module::Error)
5216 SetBehavior(Module::Max);
5217
5218 // Upgrade branch protection and return address signing module flags. The
5219 // module flag behavior for these fields were Error and now they are Min.
5220 if (ID->getString() == "branch-target-enforcement" ||
5221 ID->getString().starts_with("sign-return-address")) {
5222 if (auto *Behavior =
5223 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5224 if (Behavior->getLimitedValue() == Module::Error) {
5225 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5226 Metadata *Ops[3] = {
5227 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5228 Op->getOperand(1), Op->getOperand(2)};
5229 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5230 Changed = true;
5231 }
5232 }
5233 }
5234
5235 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5236 // section name so that llvm-lto will not complain about mismatching
5237 // module flags that is functionally the same.
5238 if (ID->getString() == "Objective-C Image Info Section") {
5239 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5240 SmallVector<StringRef, 4> ValueComp;
5241 Value->getString().split(ValueComp, " ");
5242 if (ValueComp.size() != 1) {
5243 std::string NewValue;
5244 for (auto &S : ValueComp)
5245 NewValue += S.str();
5246 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5247 MDString::get(M.getContext(), NewValue)};
5248 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5249 Changed = true;
5250 }
5251 }
5252 }
5253
5254 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5255 // If the higher bits are set, it adds new module flag for swift info.
5256 if (ID->getString() == "Objective-C Garbage Collection") {
5257 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5258 if (Md) {
5259 assert(Md->getValue() && "Expected non-empty metadata");
5260 auto Type = Md->getValue()->getType();
5261 if (Type == Int8Ty)
5262 continue;
5263 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5264 if ((Val & 0xff) != Val) {
5265 HasSwiftVersionFlag = true;
5266 SwiftABIVersion = (Val & 0xff00) >> 8;
5267 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5268 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5269 }
5270 Metadata *Ops[3] = {
5271 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5272 Op->getOperand(1),
5273 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5274 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5275 Changed = true;
5276 }
5277 }
5278
5279 if (ID->getString() == "amdgpu_code_object_version") {
5280 Metadata *Ops[3] = {
5281 Op->getOperand(0),
5282 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5283 Op->getOperand(2)};
5284 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5285 Changed = true;
5286 }
5287 }
5288
5289 // "Objective-C Class Properties" is recently added for Objective-C. We
5290 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5291 // flag of value 0, so we can correclty downgrade this flag when trying to
5292 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5293 // this module flag.
5294 if (HasObjCFlag && !HasClassProperties) {
5295 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5296 (uint32_t)0);
5297 Changed = true;
5298 }
5299
5300 if (HasSwiftVersionFlag) {
5301 M.addModuleFlag(Module::Error, "Swift ABI Version",
5302 SwiftABIVersion);
5303 M.addModuleFlag(Module::Error, "Swift Major Version",
5304 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5305 M.addModuleFlag(Module::Error, "Swift Minor Version",
5306 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5307 Changed = true;
5308 }
5309
5310 return Changed;
5311}
5312
5314 auto TrimSpaces = [](StringRef Section) -> std::string {
5315 SmallVector<StringRef, 5> Components;
5316 Section.split(Components, ',');
5317
5318 SmallString<32> Buffer;
5319 raw_svector_ostream OS(Buffer);
5320
5321 for (auto Component : Components)
5322 OS << ',' << Component.trim();
5323
5324 return std::string(OS.str().substr(1));
5325 };
5326
5327 for (auto &GV : M.globals()) {
5328 if (!GV.hasSection())
5329 continue;
5330
5331 StringRef Section = GV.getSection();
5332
5333 if (!Section.starts_with("__DATA, __objc_catlist"))
5334 continue;
5335
5336 // __DATA, __objc_catlist, regular, no_dead_strip
5337 // __DATA,__objc_catlist,regular,no_dead_strip
5338 GV.setSection(TrimSpaces(Section));
5339 }
5340}
5341
5342namespace {
5343// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5344// callsites within a function that did not also have the strictfp attribute.
5345// Since 10.0, if strict FP semantics are needed within a function, the
5346// function must have the strictfp attribute and all calls within the function
5347// must also have the strictfp attribute. This latter restriction is
5348// necessary to prevent unwanted libcall simplification when a function is
5349// being cloned (such as for inlining).
5350//
5351// The "dangling" strictfp attribute usage was only used to prevent constant
5352// folding and other libcall simplification. The nobuiltin attribute on the
5353// callsite has the same effect.
5354struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5355 StrictFPUpgradeVisitor() = default;
5356
5357 void visitCallBase(CallBase &Call) {
5358 if (!Call.isStrictFP())
5359 return;
5360 if (isa<ConstrainedFPIntrinsic>(&Call))
5361 return;
5362 // If we get here, the caller doesn't have the strictfp attribute
5363 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5364 Call.removeFnAttr(Attribute::StrictFP);
5365 Call.addFnAttr(Attribute::NoBuiltin);
5366 }
5367};
5368
5369/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5370struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5371 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5372 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5373
5375 if (!RMW.isFloatingPointOperation())
5376 return;
5377
5378 MDNode *Empty = MDNode::get(RMW.getContext(), {});
5379 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5380 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5381 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5382 }
5383};
5384} // namespace
5385
5387 // If a function definition doesn't have the strictfp attribute,
5388 // convert any callsite strictfp attributes to nobuiltin.
5389 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5390 StrictFPUpgradeVisitor SFPV;
5391 SFPV.visit(F);
5392 }
5393
5394 // Remove all incompatibile attributes from function.
5395 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5396 F.getReturnType(), F.getAttributes().getRetAttrs()));
5397 for (auto &Arg : F.args())
5398 Arg.removeAttrs(
5399 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5400
5401 // Older versions of LLVM treated an "implicit-section-name" attribute
5402 // similarly to directly setting the section on a Function.
5403 if (Attribute A = F.getFnAttribute("implicit-section-name");
5404 A.isValid() && A.isStringAttribute()) {
5405 F.setSection(A.getValueAsString());
5406 F.removeFnAttr("implicit-section-name");
5407 }
5408
5409 if (!F.empty()) {
5410 // For some reason this is called twice, and the first time is before any
5411 // instructions are loaded into the body.
5412
5413 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5414 A.isValid()) {
5415
5416 if (A.getValueAsBool()) {
5417 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5418 Visitor.visit(F);
5419 }
5420
5421 // We will leave behind dead attribute uses on external declarations, but
5422 // clang never added these to declarations anyway.
5423 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5424 }
5425 }
5426}
5427
5428static bool isOldLoopArgument(Metadata *MD) {
5429 auto *T = dyn_cast_or_null<MDTuple>(MD);
5430 if (!T)
5431 return false;
5432 if (T->getNumOperands() < 1)
5433 return false;
5434 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5435 if (!S)
5436 return false;
5437 return S->getString().starts_with("llvm.vectorizer.");
5438}
5439
5441 StringRef OldPrefix = "llvm.vectorizer.";
5442 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5443
5444 if (OldTag == "llvm.vectorizer.unroll")
5445 return MDString::get(C, "llvm.loop.interleave.count");
5446
5447 return MDString::get(
5448 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5449 .str());
5450}
5451
5453 auto *T = dyn_cast_or_null<MDTuple>(MD);
5454 if (!T)
5455 return MD;
5456 if (T->getNumOperands() < 1)
5457 return MD;
5458 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5459 if (!OldTag)
5460 return MD;
5461 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5462 return MD;
5463
5464 // This has an old tag. Upgrade it.
5466 Ops.reserve(T->getNumOperands());
5467 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5468 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5469 Ops.push_back(T->getOperand(I));
5470
5471 return MDTuple::get(T->getContext(), Ops);
5472}
5473
5475 auto *T = dyn_cast<MDTuple>(&N);
5476 if (!T)
5477 return &N;
5478
5479 if (none_of(T->operands(), isOldLoopArgument))
5480 return &N;
5481
5483 Ops.reserve(T->getNumOperands());
5484 for (Metadata *MD : T->operands())
5486
5487 return MDTuple::get(T->getContext(), Ops);
5488}
5489
5491 Triple T(TT);
5492 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5493 // the address space of globals to 1. This does not apply to SPIRV Logical.
5494 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5495 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5496 !DL.contains("-G") && !DL.starts_with("G")) {
5497 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5498 }
5499
5500 if (T.isLoongArch64() || T.isRISCV64()) {
5501 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5502 auto I = DL.find("-n64-");
5503 if (I != StringRef::npos)
5504 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5505 return DL.str();
5506 }
5507
5508 std::string Res = DL.str();
5509 // AMDGCN data layout upgrades.
5510 if (T.isAMDGCN()) {
5511 // Define address spaces for constants.
5512 if (!DL.contains("-G") && !DL.starts_with("G"))
5513 Res.append(Res.empty() ? "G1" : "-G1");
5514
5515 // Add missing non-integral declarations.
5516 // This goes before adding new address spaces to prevent incoherent string
5517 // values.
5518 if (!DL.contains("-ni") && !DL.starts_with("ni"))
5519 Res.append("-ni:7:8:9");
5520 // Update ni:7 to ni:7:8:9.
5521 if (DL.ends_with("ni:7"))
5522 Res.append(":8:9");
5523 if (DL.ends_with("ni:7:8"))
5524 Res.append(":9");
5525
5526 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5527 // resources) An empty data layout has already been upgraded to G1 by now.
5528 if (!DL.contains("-p7") && !DL.starts_with("p7"))
5529 Res.append("-p7:160:256:256:32");
5530 if (!DL.contains("-p8") && !DL.starts_with("p8"))
5531 Res.append("-p8:128:128");
5532 if (!DL.contains("-p9") && !DL.starts_with("p9"))
5533 Res.append("-p9:192:256:256:32");
5534
5535 return Res;
5536 }
5537
5538 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
5539 // If the datalayout matches the expected format, add pointer size address
5540 // spaces to the datalayout.
5541 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
5542 if (!DL.contains(AddrSpaces)) {
5544 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
5545 if (R.match(Res, &Groups))
5546 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5547 }
5548 };
5549
5550 // AArch64 data layout upgrades.
5551 if (T.isAArch64()) {
5552 // Add "-Fn32"
5553 if (!DL.empty() && !DL.contains("-Fn32"))
5554 Res.append("-Fn32");
5555 AddPtr32Ptr64AddrSpaces();
5556 return Res;
5557 }
5558
5559 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
5560 T.isWasm()) {
5561 // Mips64 with o32 ABI did not add "-i128:128".
5562 // Add "-i128:128"
5563 std::string I64 = "-i64:64";
5564 std::string I128 = "-i128:128";
5565 if (!StringRef(Res).contains(I128)) {
5566 size_t Pos = Res.find(I64);
5567 if (Pos != size_t(-1))
5568 Res.insert(Pos + I64.size(), I128);
5569 }
5570 return Res;
5571 }
5572
5573 if (!T.isX86())
5574 return Res;
5575
5576 AddPtr32Ptr64AddrSpaces();
5577
5578 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5579 // for i128 operations prior to this being reflected in the data layout, and
5580 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5581 // boundaries, so although this is a breaking change, the upgrade is expected
5582 // to fix more IR than it breaks.
5583 // Intel MCU is an exception and uses 4-byte-alignment.
5584 if (!T.isOSIAMCU()) {
5585 std::string I128 = "-i128:128";
5586 if (StringRef Ref = Res; !Ref.contains(I128)) {
5588 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5589 if (R.match(Res, &Groups))
5590 Res = (Groups[1] + I128 + Groups[3]).str();
5591 }
5592 }
5593
5594 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5595 // Raising the alignment is safe because Clang did not produce f80 values in
5596 // the MSVC environment before this upgrade was added.
5597 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5598 StringRef Ref = Res;
5599 auto I = Ref.find("-f80:32-");
5600 if (I != StringRef::npos)
5601 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5602 }
5603
5604 return Res;
5605}
5606
5608 StringRef FramePointer;
5609 Attribute A = B.getAttribute("no-frame-pointer-elim");
5610 if (A.isValid()) {
5611 // The value can be "true" or "false".
5612 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5613 B.removeAttribute("no-frame-pointer-elim");
5614 }
5615 if (B.contains("no-frame-pointer-elim-non-leaf")) {
5616 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5617 if (FramePointer != "all")
5618 FramePointer = "non-leaf";
5619 B.removeAttribute("no-frame-pointer-elim-non-leaf");
5620 }
5621 if (!FramePointer.empty())
5622 B.addAttribute("frame-pointer", FramePointer);
5623
5624 A = B.getAttribute("null-pointer-is-valid");
5625 if (A.isValid()) {
5626 // The value can be "true" or "false".
5627 bool NullPointerIsValid = A.getValueAsString() == "true";
5628 B.removeAttribute("null-pointer-is-valid");
5629 if (NullPointerIsValid)
5630 B.addAttribute(Attribute::NullPointerIsValid);
5631 }
5632}
5633
5634void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5635 // clang.arc.attachedcall bundles are now required to have an operand.
5636 // If they don't, it's okay to drop them entirely: when there is an operand,
5637 // the "attachedcall" is meaningful and required, but without an operand,
5638 // it's just a marker NOP. Dropping it merely prevents an optimization.
5639 erase_if(Bundles, [&](OperandBundleDef &OBD) {
5640 return OBD.getTag() == "clang.arc.attachedcall" &&
5641 OBD.inputs().empty();
5642 });
5643}
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:92
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:76
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDType * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
Definition: AutoUpgrade.cpp:56
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:60
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
This file contains constants used for implementing Dwarf debug support.
std::string Name
uint64_t Size
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
uint64_t High
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
raw_pwrite_stream & OS
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:395
Type * getElementType() const
Definition: DerivedTypes.h:408
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
Definition: Instructions.h:841
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ FAdd
*p = old + v
Definition: Instructions.h:741
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:752
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:748
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
bool isFloatingPointOperation() const
Definition: Instructions.h:882
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1341
Value * getCalledOperand() const
Definition: InstrTypes.h:1334
void setAttributes(AttributeList A)
Set the attributes for this call.
Definition: InstrTypes.h:1420
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1286
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1199
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1277
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1377
unsigned arg_size() const
Definition: InstrTypes.h:1284
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1417
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1380
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1672
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:532
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2307
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2253
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2293
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1378
static ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
Definition: Constants.cpp:1522
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
DWARF expression.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
This class represents an Operation in the Expression.
Records a position in IR for a source label (DILabel).
Base class for non-instruction debug metadata records that have positions within IR.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
Type * getReturnType() const
Definition: DerivedTypes.h:126
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:173
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:251
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Function.cpp:458
size_t arg_size() const
Definition: Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
Argument * getArg(unsigned i) const
Definition: Function.h:886
LinkageTypes getLinkage() const
Definition: GlobalValue.h:547
Type * getValueType() const
Definition: GlobalValue.h:297
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:480
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1595
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2511
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:530
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2562
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1633
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Definition: IRBuilder.h:1080
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2106
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2499
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:558
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1815
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1576
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2113
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1163
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2298
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1053
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition: IRBuilder.h:495
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2093
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition: IRBuilder.h:540
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1733
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2306
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2270
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2152
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1798
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1459
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2533
LLVMContext & getContext() const
Definition: IRBuilder.h:195
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1518
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1370
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:490
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Definition: IRBuilder.h:573
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition: IRBuilder.h:2588
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1862
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1540
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:588
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2282
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2225
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1834
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2128
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1499
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1562
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2290
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1614
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1742
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2157
Type * getBFloatTy()
Fetch the type representing a 16-bit brain floating point value.
Definition: IRBuilder.h:568
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1404
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:270
RetTy visitAtomicRMWInst(AtomicRMWInst &I)
Definition: InstVisitor.h:172
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
Definition: Instructions.h:176
MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition: MDBuilder.cpp:95
Metadata node.
Definition: Metadata.h:1073
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1434
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1549
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1440
LLVMContext & getContext() const
Definition: Metadata.h:1237
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:895
A single uniqued string.
Definition: Metadata.h:724
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:606
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1506
Metadata wrapper in the Value hierarchy.
Definition: Metadata.h:180
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:103
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition: Module.h:115
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition: Module.h:136
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition: Module.h:118
@ Min
Takes the min of the two values, which are required to be integers.
Definition: Module.h:150
@ Max
Takes the max of the two values, which are required to be integers.
Definition: Module.h:147
bool IsNewDbgInfoFormat
Is this Module using intrinsics to record the position of debugging information, or non-intrinsic rec...
Definition: Module.h:217
A tuple of MDNodes.
Definition: Metadata.h:1737
void setOperand(unsigned I, MDNode *New)
Definition: Metadata.cpp:1433
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1425
unsigned getNumOperands() const
Definition: Metadata.cpp:1421
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: InstrTypes.h:1065
ArrayRef< InputTy > inputs() const
Definition: InstrTypes.h:1080
StringRef getTag() const
Definition: InstrTypes.h:1088
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:83
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:812
ArrayRef< int > getShuffleMask() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:78
void reserve(size_type N)
Definition: SmallVector.h:663
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:147
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:609
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
static constexpr size_t npos
Definition: StringRef.h:53
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & StartsWith(StringLiteral S, T Value)
Definition: StringSwitch.h:83
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:218
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:365
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:366
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getBFloatTy(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:267
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
AttributeMask typeIncompatible(Type *Ty, AttributeSet AS, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
Definition: Intrinsics.cpp:446
std::optional< Function * > remangleIntrinsicFunction(Function *F)
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Intrinsics.cpp:46
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
Op::Description Desc
void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:608
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition: Metadata.h:52
bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:7308
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117