34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
62 cl::desc(
"Disable autoupgrade of debug info"));
72 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
87 Type *LastArgType =
F->getFunctionType()->getParamType(
88 F->getFunctionType()->getNumParams() - 1);
103 if (
F->getReturnType()->isVectorTy())
116 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
117 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
134 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
135 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
149 if (
F->getReturnType()->getScalarType()->isBFloatTy())
159 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
173 if (Name.consume_front(
"avx."))
174 return (Name.starts_with(
"blend.p") ||
175 Name ==
"cvt.ps2.pd.256" ||
176 Name ==
"cvtdq2.pd.256" ||
177 Name ==
"cvtdq2.ps.256" ||
178 Name.starts_with(
"movnt.") ||
179 Name.starts_with(
"sqrt.p") ||
180 Name.starts_with(
"storeu.") ||
181 Name.starts_with(
"vbroadcast.s") ||
182 Name.starts_with(
"vbroadcastf128") ||
183 Name.starts_with(
"vextractf128.") ||
184 Name.starts_with(
"vinsertf128.") ||
185 Name.starts_with(
"vperm2f128.") ||
186 Name.starts_with(
"vpermil."));
188 if (Name.consume_front(
"avx2."))
189 return (Name ==
"movntdqa" ||
190 Name.starts_with(
"pabs.") ||
191 Name.starts_with(
"padds.") ||
192 Name.starts_with(
"paddus.") ||
193 Name.starts_with(
"pblendd.") ||
195 Name.starts_with(
"pbroadcast") ||
196 Name.starts_with(
"pcmpeq.") ||
197 Name.starts_with(
"pcmpgt.") ||
198 Name.starts_with(
"pmax") ||
199 Name.starts_with(
"pmin") ||
200 Name.starts_with(
"pmovsx") ||
201 Name.starts_with(
"pmovzx") ||
203 Name ==
"pmulu.dq" ||
204 Name.starts_with(
"psll.dq") ||
205 Name.starts_with(
"psrl.dq") ||
206 Name.starts_with(
"psubs.") ||
207 Name.starts_with(
"psubus.") ||
208 Name.starts_with(
"vbroadcast") ||
209 Name ==
"vbroadcasti128" ||
210 Name ==
"vextracti128" ||
211 Name ==
"vinserti128" ||
212 Name ==
"vperm2i128");
214 if (Name.consume_front(
"avx512.")) {
215 if (Name.consume_front(
"mask."))
217 return (Name.starts_with(
"add.p") ||
218 Name.starts_with(
"and.") ||
219 Name.starts_with(
"andn.") ||
220 Name.starts_with(
"broadcast.s") ||
221 Name.starts_with(
"broadcastf32x4.") ||
222 Name.starts_with(
"broadcastf32x8.") ||
223 Name.starts_with(
"broadcastf64x2.") ||
224 Name.starts_with(
"broadcastf64x4.") ||
225 Name.starts_with(
"broadcasti32x4.") ||
226 Name.starts_with(
"broadcasti32x8.") ||
227 Name.starts_with(
"broadcasti64x2.") ||
228 Name.starts_with(
"broadcasti64x4.") ||
229 Name.starts_with(
"cmp.b") ||
230 Name.starts_with(
"cmp.d") ||
231 Name.starts_with(
"cmp.q") ||
232 Name.starts_with(
"cmp.w") ||
233 Name.starts_with(
"compress.b") ||
234 Name.starts_with(
"compress.d") ||
235 Name.starts_with(
"compress.p") ||
236 Name.starts_with(
"compress.q") ||
237 Name.starts_with(
"compress.store.") ||
238 Name.starts_with(
"compress.w") ||
239 Name.starts_with(
"conflict.") ||
240 Name.starts_with(
"cvtdq2pd.") ||
241 Name.starts_with(
"cvtdq2ps.") ||
242 Name ==
"cvtpd2dq.256" ||
243 Name ==
"cvtpd2ps.256" ||
244 Name ==
"cvtps2pd.128" ||
245 Name ==
"cvtps2pd.256" ||
246 Name.starts_with(
"cvtqq2pd.") ||
247 Name ==
"cvtqq2ps.256" ||
248 Name ==
"cvtqq2ps.512" ||
249 Name ==
"cvttpd2dq.256" ||
250 Name ==
"cvttps2dq.128" ||
251 Name ==
"cvttps2dq.256" ||
252 Name.starts_with(
"cvtudq2pd.") ||
253 Name.starts_with(
"cvtudq2ps.") ||
254 Name.starts_with(
"cvtuqq2pd.") ||
255 Name ==
"cvtuqq2ps.256" ||
256 Name ==
"cvtuqq2ps.512" ||
257 Name.starts_with(
"dbpsadbw.") ||
258 Name.starts_with(
"div.p") ||
259 Name.starts_with(
"expand.b") ||
260 Name.starts_with(
"expand.d") ||
261 Name.starts_with(
"expand.load.") ||
262 Name.starts_with(
"expand.p") ||
263 Name.starts_with(
"expand.q") ||
264 Name.starts_with(
"expand.w") ||
265 Name.starts_with(
"fpclass.p") ||
266 Name.starts_with(
"insert") ||
267 Name.starts_with(
"load.") ||
268 Name.starts_with(
"loadu.") ||
269 Name.starts_with(
"lzcnt.") ||
270 Name.starts_with(
"max.p") ||
271 Name.starts_with(
"min.p") ||
272 Name.starts_with(
"movddup") ||
273 Name.starts_with(
"move.s") ||
274 Name.starts_with(
"movshdup") ||
275 Name.starts_with(
"movsldup") ||
276 Name.starts_with(
"mul.p") ||
277 Name.starts_with(
"or.") ||
278 Name.starts_with(
"pabs.") ||
279 Name.starts_with(
"packssdw.") ||
280 Name.starts_with(
"packsswb.") ||
281 Name.starts_with(
"packusdw.") ||
282 Name.starts_with(
"packuswb.") ||
283 Name.starts_with(
"padd.") ||
284 Name.starts_with(
"padds.") ||
285 Name.starts_with(
"paddus.") ||
286 Name.starts_with(
"palignr.") ||
287 Name.starts_with(
"pand.") ||
288 Name.starts_with(
"pandn.") ||
289 Name.starts_with(
"pavg") ||
290 Name.starts_with(
"pbroadcast") ||
291 Name.starts_with(
"pcmpeq.") ||
292 Name.starts_with(
"pcmpgt.") ||
293 Name.starts_with(
"perm.df.") ||
294 Name.starts_with(
"perm.di.") ||
295 Name.starts_with(
"permvar.") ||
296 Name.starts_with(
"pmaddubs.w.") ||
297 Name.starts_with(
"pmaddw.d.") ||
298 Name.starts_with(
"pmax") ||
299 Name.starts_with(
"pmin") ||
300 Name ==
"pmov.qd.256" ||
301 Name ==
"pmov.qd.512" ||
302 Name ==
"pmov.wb.256" ||
303 Name ==
"pmov.wb.512" ||
304 Name.starts_with(
"pmovsx") ||
305 Name.starts_with(
"pmovzx") ||
306 Name.starts_with(
"pmul.dq.") ||
307 Name.starts_with(
"pmul.hr.sw.") ||
308 Name.starts_with(
"pmulh.w.") ||
309 Name.starts_with(
"pmulhu.w.") ||
310 Name.starts_with(
"pmull.") ||
311 Name.starts_with(
"pmultishift.qb.") ||
312 Name.starts_with(
"pmulu.dq.") ||
313 Name.starts_with(
"por.") ||
314 Name.starts_with(
"prol.") ||
315 Name.starts_with(
"prolv.") ||
316 Name.starts_with(
"pror.") ||
317 Name.starts_with(
"prorv.") ||
318 Name.starts_with(
"pshuf.b.") ||
319 Name.starts_with(
"pshuf.d.") ||
320 Name.starts_with(
"pshufh.w.") ||
321 Name.starts_with(
"pshufl.w.") ||
322 Name.starts_with(
"psll.d") ||
323 Name.starts_with(
"psll.q") ||
324 Name.starts_with(
"psll.w") ||
325 Name.starts_with(
"pslli") ||
326 Name.starts_with(
"psllv") ||
327 Name.starts_with(
"psra.d") ||
328 Name.starts_with(
"psra.q") ||
329 Name.starts_with(
"psra.w") ||
330 Name.starts_with(
"psrai") ||
331 Name.starts_with(
"psrav") ||
332 Name.starts_with(
"psrl.d") ||
333 Name.starts_with(
"psrl.q") ||
334 Name.starts_with(
"psrl.w") ||
335 Name.starts_with(
"psrli") ||
336 Name.starts_with(
"psrlv") ||
337 Name.starts_with(
"psub.") ||
338 Name.starts_with(
"psubs.") ||
339 Name.starts_with(
"psubus.") ||
340 Name.starts_with(
"pternlog.") ||
341 Name.starts_with(
"punpckh") ||
342 Name.starts_with(
"punpckl") ||
343 Name.starts_with(
"pxor.") ||
344 Name.starts_with(
"shuf.f") ||
345 Name.starts_with(
"shuf.i") ||
346 Name.starts_with(
"shuf.p") ||
347 Name.starts_with(
"sqrt.p") ||
348 Name.starts_with(
"store.b.") ||
349 Name.starts_with(
"store.d.") ||
350 Name.starts_with(
"store.p") ||
351 Name.starts_with(
"store.q.") ||
352 Name.starts_with(
"store.w.") ||
353 Name ==
"store.ss" ||
354 Name.starts_with(
"storeu.") ||
355 Name.starts_with(
"sub.p") ||
356 Name.starts_with(
"ucmp.") ||
357 Name.starts_with(
"unpckh.") ||
358 Name.starts_with(
"unpckl.") ||
359 Name.starts_with(
"valign.") ||
360 Name ==
"vcvtph2ps.128" ||
361 Name ==
"vcvtph2ps.256" ||
362 Name.starts_with(
"vextract") ||
363 Name.starts_with(
"vfmadd.") ||
364 Name.starts_with(
"vfmaddsub.") ||
365 Name.starts_with(
"vfnmadd.") ||
366 Name.starts_with(
"vfnmsub.") ||
367 Name.starts_with(
"vpdpbusd.") ||
368 Name.starts_with(
"vpdpbusds.") ||
369 Name.starts_with(
"vpdpwssd.") ||
370 Name.starts_with(
"vpdpwssds.") ||
371 Name.starts_with(
"vpermi2var.") ||
372 Name.starts_with(
"vpermil.p") ||
373 Name.starts_with(
"vpermilvar.") ||
374 Name.starts_with(
"vpermt2var.") ||
375 Name.starts_with(
"vpmadd52") ||
376 Name.starts_with(
"vpshld.") ||
377 Name.starts_with(
"vpshldv.") ||
378 Name.starts_with(
"vpshrd.") ||
379 Name.starts_with(
"vpshrdv.") ||
380 Name.starts_with(
"vpshufbitqmb.") ||
381 Name.starts_with(
"xor."));
383 if (Name.consume_front(
"mask3."))
385 return (Name.starts_with(
"vfmadd.") ||
386 Name.starts_with(
"vfmaddsub.") ||
387 Name.starts_with(
"vfmsub.") ||
388 Name.starts_with(
"vfmsubadd.") ||
389 Name.starts_with(
"vfnmsub."));
391 if (Name.consume_front(
"maskz."))
393 return (Name.starts_with(
"pternlog.") ||
394 Name.starts_with(
"vfmadd.") ||
395 Name.starts_with(
"vfmaddsub.") ||
396 Name.starts_with(
"vpdpbusd.") ||
397 Name.starts_with(
"vpdpbusds.") ||
398 Name.starts_with(
"vpdpwssd.") ||
399 Name.starts_with(
"vpdpwssds.") ||
400 Name.starts_with(
"vpermt2var.") ||
401 Name.starts_with(
"vpmadd52") ||
402 Name.starts_with(
"vpshldv.") ||
403 Name.starts_with(
"vpshrdv."));
406 return (Name ==
"movntdqa" ||
407 Name ==
"pmul.dq.512" ||
408 Name ==
"pmulu.dq.512" ||
409 Name.starts_with(
"broadcastm") ||
410 Name.starts_with(
"cmp.p") ||
411 Name.starts_with(
"cvtb2mask.") ||
412 Name.starts_with(
"cvtd2mask.") ||
413 Name.starts_with(
"cvtmask2") ||
414 Name.starts_with(
"cvtq2mask.") ||
415 Name ==
"cvtusi2sd" ||
416 Name.starts_with(
"cvtw2mask.") ||
421 Name ==
"kortestc.w" ||
422 Name ==
"kortestz.w" ||
423 Name.starts_with(
"kunpck") ||
426 Name.starts_with(
"padds.") ||
427 Name.starts_with(
"pbroadcast") ||
428 Name.starts_with(
"prol") ||
429 Name.starts_with(
"pror") ||
430 Name.starts_with(
"psll.dq") ||
431 Name.starts_with(
"psrl.dq") ||
432 Name.starts_with(
"psubs.") ||
433 Name.starts_with(
"ptestm") ||
434 Name.starts_with(
"ptestnm") ||
435 Name.starts_with(
"storent.") ||
436 Name.starts_with(
"vbroadcast.s") ||
437 Name.starts_with(
"vpshld.") ||
438 Name.starts_with(
"vpshrd."));
441 if (Name.consume_front(
"fma."))
442 return (Name.starts_with(
"vfmadd.") ||
443 Name.starts_with(
"vfmsub.") ||
444 Name.starts_with(
"vfmsubadd.") ||
445 Name.starts_with(
"vfnmadd.") ||
446 Name.starts_with(
"vfnmsub."));
448 if (Name.consume_front(
"fma4."))
449 return Name.starts_with(
"vfmadd.s");
451 if (Name.consume_front(
"sse."))
452 return (Name ==
"add.ss" ||
453 Name ==
"cvtsi2ss" ||
454 Name ==
"cvtsi642ss" ||
457 Name.starts_with(
"sqrt.p") ||
459 Name.starts_with(
"storeu.") ||
462 if (Name.consume_front(
"sse2."))
463 return (Name ==
"add.sd" ||
464 Name ==
"cvtdq2pd" ||
465 Name ==
"cvtdq2ps" ||
466 Name ==
"cvtps2pd" ||
467 Name ==
"cvtsi2sd" ||
468 Name ==
"cvtsi642sd" ||
469 Name ==
"cvtss2sd" ||
472 Name.starts_with(
"padds.") ||
473 Name.starts_with(
"paddus.") ||
474 Name.starts_with(
"pcmpeq.") ||
475 Name.starts_with(
"pcmpgt.") ||
480 Name ==
"pmulu.dq" ||
481 Name.starts_with(
"pshuf") ||
482 Name.starts_with(
"psll.dq") ||
483 Name.starts_with(
"psrl.dq") ||
484 Name.starts_with(
"psubs.") ||
485 Name.starts_with(
"psubus.") ||
486 Name.starts_with(
"sqrt.p") ||
488 Name ==
"storel.dq" ||
489 Name.starts_with(
"storeu.") ||
492 if (Name.consume_front(
"sse41."))
493 return (Name.starts_with(
"blendp") ||
494 Name ==
"movntdqa" ||
504 Name.starts_with(
"pmovsx") ||
505 Name.starts_with(
"pmovzx") ||
508 if (Name.consume_front(
"sse42."))
509 return Name ==
"crc32.64.8";
511 if (Name.consume_front(
"sse4a."))
512 return Name.starts_with(
"movnt.");
514 if (Name.consume_front(
"ssse3."))
515 return (Name ==
"pabs.b.128" ||
516 Name ==
"pabs.d.128" ||
517 Name ==
"pabs.w.128");
519 if (Name.consume_front(
"xop."))
520 return (Name ==
"vpcmov" ||
521 Name ==
"vpcmov.256" ||
522 Name.starts_with(
"vpcom") ||
523 Name.starts_with(
"vprot"));
525 return (Name ==
"addcarry.u32" ||
526 Name ==
"addcarry.u64" ||
527 Name ==
"addcarryx.u32" ||
528 Name ==
"addcarryx.u64" ||
529 Name ==
"subborrow.u32" ||
530 Name ==
"subborrow.u64" ||
531 Name.starts_with(
"vcvtph2ps."));
537 if (!Name.consume_front(
"x86."))
545 if (Name ==
"rdtscp") {
547 if (
F->getFunctionType()->getNumParams() == 0)
552 Intrinsic::x86_rdtscp);
559 if (Name.consume_front(
"sse41.ptest")) {
561 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
562 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
563 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
576 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
577 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
578 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
579 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
580 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
581 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
586 if (Name.consume_front(
"avx512.")) {
587 if (Name.consume_front(
"mask.cmp.")) {
590 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
599 }
else if (Name.starts_with(
"vpdpbusd.") ||
600 Name.starts_with(
"vpdpbusds.")) {
603 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
604 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
605 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
606 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
607 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
608 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
612 }
else if (Name.starts_with(
"vpdpwssd.") ||
613 Name.starts_with(
"vpdpwssds.")) {
616 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
617 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
618 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
619 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
620 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
621 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
629 if (Name.consume_front(
"avx2.")) {
630 if (Name.consume_front(
"vpdpb")) {
633 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
634 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
635 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
636 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
637 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
638 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
639 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
640 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
641 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
642 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
643 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
644 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
648 }
else if (Name.consume_front(
"vpdpw")) {
651 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
652 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
653 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
654 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
655 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
656 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
657 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
658 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
659 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
660 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
661 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
662 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
670 if (Name.consume_front(
"avx10.")) {
671 if (Name.consume_front(
"vpdpb")) {
674 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
675 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
676 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
677 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
678 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
679 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
683 }
else if (Name.consume_front(
"vpdpw")) {
685 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
686 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
687 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
688 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
689 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
690 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
698 if (Name.consume_front(
"avx512bf16.")) {
701 .
Case(
"cvtne2ps2bf16.128",
702 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .
Case(
"cvtne2ps2bf16.256",
704 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .
Case(
"cvtne2ps2bf16.512",
706 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .
Case(
"mask.cvtneps2bf16.128",
708 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .
Case(
"cvtneps2bf16.256",
710 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .
Case(
"cvtneps2bf16.512",
712 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
719 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
728 if (Name.consume_front(
"xop.")) {
730 if (Name.starts_with(
"vpermil2")) {
733 auto Idx =
F->getFunctionType()->getParamType(2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
746 }
else if (
F->arg_size() == 2)
749 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
750 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
761 if (Name ==
"seh.recoverfp") {
763 Intrinsic::eh_recoverfp);
775 if (Name.starts_with(
"rbit")) {
778 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
782 if (Name ==
"thread.pointer") {
785 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
789 bool Neon = Name.consume_front(
"neon.");
794 if (Name.consume_front(
"bfdot.")) {
798 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
803 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
807 std::array<Type *, 2> Tys{
818 if (Name.consume_front(
"bfm")) {
820 if (Name.consume_back(
".v4f32.v16i8")) {
866 F->arg_begin()->getType());
870 if (Name.consume_front(
"vst")) {
872 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
883 auto fArgs =
F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
887 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
890 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
899 if (Name.consume_front(
"mve.")) {
901 if (Name ==
"vctp64") {
911 if (Name.starts_with(
"vrintn.v")) {
913 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
918 if (Name.consume_back(
".v4i1")) {
920 if (Name.consume_back(
".predicated.v2i64.v4i32"))
922 return Name ==
"mull.int" || Name ==
"vqdmull";
924 if (Name.consume_back(
".v2i64")) {
926 bool IsGather = Name.consume_front(
"vldr.gather.");
927 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
928 if (Name.consume_front(
"base.")) {
930 Name.consume_front(
"wb.");
933 return Name ==
"predicated.v2i64";
936 if (Name.consume_front(
"offset.predicated."))
937 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
938 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
951 if (Name.consume_front(
"cde.vcx")) {
953 if (Name.consume_back(
".predicated.v2i64.v4i1"))
955 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
956 Name ==
"3q" || Name ==
"3qa";
970 F->arg_begin()->getType());
974 if (Name.starts_with(
"addp")) {
976 if (
F->arg_size() != 2)
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
981 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
987 if (Name.starts_with(
"bfcvt")) {
994 if (Name.consume_front(
"sve.")) {
996 if (Name.consume_front(
"bf")) {
997 if (Name.consume_back(
".lane")) {
1001 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1015 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1020 if (Name.consume_front(
"addqv")) {
1022 if (!
F->getReturnType()->isFPOrFPVectorTy())
1025 auto Args =
F->getFunctionType()->params();
1026 Type *Tys[] = {
F->getReturnType(), Args[1]};
1028 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1032 if (Name.consume_front(
"ld")) {
1034 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.
match(Name)) {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1047 LoadIDs[Name[0] -
'2'], Ty);
1053 if (Name.consume_front(
"tuple.")) {
1055 if (Name.starts_with(
"get")) {
1057 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1059 F->getParent(), Intrinsic::vector_extract, Tys);
1063 if (Name.starts_with(
"set")) {
1065 auto Args =
F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1068 F->getParent(), Intrinsic::vector_insert, Tys);
1072 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.
match(Name)) {
1075 auto Args =
F->getFunctionType()->params();
1076 Type *Tys[] = {
F->getReturnType(), Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1084 if (Name.starts_with(
"rev.nxv")) {
1087 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1099 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1103 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1105 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1107 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1121 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1135 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1146 if (Name.consume_front(
"mapa.shared.cluster"))
1147 if (
F->getReturnType()->getPointerAddressSpace() ==
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1151 if (Name.consume_front(
"cp.async.bulk.")) {
1154 .
Case(
"global.to.shared.cluster",
1155 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .
Case(
"shared.cta.to.cluster",
1157 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1161 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1170 if (Name.consume_front(
"fma.rn."))
1172 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1173 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1174 .
Case(
"ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1175 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1176 .
Case(
"ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1177 .
Case(
"ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1178 .
Case(
"ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1179 .
Case(
"ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1180 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1181 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1182 .
Case(
"sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1183 .
Case(
"sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1186 if (Name.consume_front(
"fmax."))
1188 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1189 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1190 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1191 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1192 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1193 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1194 .
Case(
"ftz.nan.xorsign.abs.bf16",
1195 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1196 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1197 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1198 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1199 .
Case(
"ftz.xorsign.abs.bf16x2",
1200 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1201 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1202 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1203 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1204 .
Case(
"nan.xorsign.abs.bf16x2",
1205 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1206 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1207 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1210 if (Name.consume_front(
"fmin."))
1212 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1213 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1214 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1215 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1216 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1217 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1218 .
Case(
"ftz.nan.xorsign.abs.bf16",
1219 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1220 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1221 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1222 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1223 .
Case(
"ftz.xorsign.abs.bf16x2",
1224 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1225 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1226 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1227 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1228 .
Case(
"nan.xorsign.abs.bf16x2",
1229 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1230 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1231 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1234 if (Name.consume_front(
"neg."))
1236 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1237 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1244 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1245 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1246 Name.consume_front(
"param");
1250 bool CanUpgradeDebugIntrinsicsToRecords) {
1251 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1256 if (!Name.consume_front(
"llvm.") || Name.empty())
1262 bool IsArm = Name.consume_front(
"arm.");
1263 if (IsArm || Name.consume_front(
"aarch64.")) {
1269 if (Name.consume_front(
"amdgcn.")) {
1270 if (Name ==
"alignbit") {
1273 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1277 if (Name.consume_front(
"atomic.")) {
1278 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1279 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1289 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1290 F->arg_size() == 7) {
1294 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1295 F->arg_size() == 8) {
1300 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1301 Name.consume_front(
"flat.atomic.")) {
1302 if (Name.starts_with(
"fadd") ||
1304 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1305 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1313 if (Name.starts_with(
"ldexp.")) {
1316 F->getParent(), Intrinsic::ldexp,
1317 {F->getReturnType(), F->getArg(1)->getType()});
1326 if (
F->arg_size() == 1) {
1334 F->arg_begin()->getType());
1339 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1342 Intrinsic::coro_end);
1349 if (Name.consume_front(
"dbg.")) {
1351 if (CanUpgradeDebugIntrinsicsToRecords) {
1352 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1353 Name ==
"declare" || Name ==
"label") {
1362 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1365 Intrinsic::dbg_value);
1372 if (Name.consume_front(
"experimental.vector.")) {
1378 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1379 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1380 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1381 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1382 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1384 Intrinsic::vector_partial_reduce_add)
1387 const auto *FT =
F->getFunctionType();
1389 if (
ID == Intrinsic::vector_extract ||
1390 ID == Intrinsic::vector_interleave2)
1393 if (
ID != Intrinsic::vector_interleave2)
1395 if (
ID == Intrinsic::vector_insert ||
1396 ID == Intrinsic::vector_partial_reduce_add)
1404 if (Name.consume_front(
"reduce.")) {
1406 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1407 if (R.match(Name, &
Groups))
1409 .
Case(
"add", Intrinsic::vector_reduce_add)
1410 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1411 .
Case(
"and", Intrinsic::vector_reduce_and)
1412 .
Case(
"or", Intrinsic::vector_reduce_or)
1413 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1414 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1415 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1416 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1417 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1418 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1419 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1424 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1429 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1430 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1435 auto Args =
F->getFunctionType()->params();
1437 {Args[V2 ? 1 : 0]});
1443 if (Name.consume_front(
"splice"))
1447 if (Name.consume_front(
"experimental.stepvector.")) {
1451 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1456 if (Name.starts_with(
"flt.rounds")) {
1459 Intrinsic::get_rounding);
1464 if (Name.starts_with(
"invariant.group.barrier")) {
1466 auto Args =
F->getFunctionType()->params();
1467 Type* ObjectPtr[1] = {Args[0]};
1470 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1475 if ((Name.starts_with(
"lifetime.start") ||
1476 Name.starts_with(
"lifetime.end")) &&
1477 F->arg_size() == 2) {
1479 ? Intrinsic::lifetime_start
1480 : Intrinsic::lifetime_end;
1483 F->getArg(0)->getType());
1492 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1493 .StartsWith(
"memmove.", Intrinsic::memmove)
1495 if (
F->arg_size() == 5) {
1499 F->getFunctionType()->params().slice(0, 3);
1505 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1508 const auto *FT =
F->getFunctionType();
1509 Type *ParamTypes[2] = {
1510 FT->getParamType(0),
1514 Intrinsic::memset, ParamTypes);
1520 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1521 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1522 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1523 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1525 if (MaskedID &&
F->arg_size() == 4) {
1527 if (MaskedID == Intrinsic::masked_load ||
1528 MaskedID == Intrinsic::masked_gather) {
1530 F->getParent(), MaskedID,
1531 {F->getReturnType(), F->getArg(0)->getType()});
1535 F->getParent(), MaskedID,
1536 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1542 if (Name.consume_front(
"nvvm.")) {
1544 if (
F->arg_size() == 1) {
1547 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1548 .Case(
"clz.i", Intrinsic::ctlz)
1549 .
Case(
"popc.i", Intrinsic::ctpop)
1553 {F->getReturnType()});
1559 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1587 bool Expand =
false;
1588 if (Name.consume_front(
"abs."))
1591 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1592 else if (Name.consume_front(
"fabs."))
1594 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1595 else if (Name.consume_front(
"ex2.approx."))
1598 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1599 else if (Name.consume_front(
"max.") || Name.consume_front(
"min."))
1601 Expand = Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
1602 Name ==
"ui" || Name ==
"ull";
1603 else if (Name.consume_front(
"atomic.load."))
1612 else if (Name.consume_front(
"bitcast."))
1615 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1616 else if (Name.consume_front(
"rotate."))
1618 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1619 else if (Name.consume_front(
"ptr.gen.to."))
1622 else if (Name.consume_front(
"ptr."))
1625 else if (Name.consume_front(
"ldg.global."))
1627 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1628 Name.starts_with(
"p."));
1631 .
Case(
"barrier0",
true)
1632 .
Case(
"barrier.n",
true)
1633 .
Case(
"barrier.sync.cnt",
true)
1634 .
Case(
"barrier.sync",
true)
1635 .
Case(
"barrier",
true)
1636 .
Case(
"bar.sync",
true)
1637 .
Case(
"barrier0.popc",
true)
1638 .
Case(
"barrier0.and",
true)
1639 .
Case(
"barrier0.or",
true)
1640 .
Case(
"clz.ll",
true)
1641 .
Case(
"popc.ll",
true)
1643 .
Case(
"swap.lo.hi.b64",
true)
1644 .
Case(
"tanh.approx.f32",
true)
1656 if (Name.starts_with(
"objectsize.")) {
1657 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1658 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1661 Intrinsic::objectsize, Tys);
1668 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1671 F->getParent(), Intrinsic::ptr_annotation,
1672 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1678 if (Name.consume_front(
"riscv.")) {
1681 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1682 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1683 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1684 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1687 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1700 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1701 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1710 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1711 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1712 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1713 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1718 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1730 if (Name ==
"stackprotectorcheck") {
1737 if (Name ==
"thread.pointer") {
1739 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1745 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1748 F->getParent(), Intrinsic::var_annotation,
1749 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1752 if (Name.consume_front(
"vector.splice")) {
1753 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1761 if (Name.consume_front(
"wasm.")) {
1764 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1765 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1766 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1771 F->getReturnType());
1775 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1777 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1779 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1798 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1807 auto *FT =
F->getFunctionType();
1810 std::string
Name =
F->getName().str();
1813 Name,
F->getParent());
1824 if (Result != std::nullopt) {
1837 bool CanUpgradeDebugIntrinsicsToRecords) {
1857 GV->
getName() ==
"llvm.global_dtors")) ||
1872 unsigned N =
Init->getNumOperands();
1873 std::vector<Constant *> NewCtors(
N);
1874 for (
unsigned i = 0; i !=
N; ++i) {
1877 Ctor->getAggregateElement(1),
1891 unsigned NumElts = ResultTy->getNumElements() * 8;
1895 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1905 for (
unsigned l = 0; l != NumElts; l += 16)
1906 for (
unsigned i = 0; i != 16; ++i) {
1907 unsigned Idx = NumElts + i - Shift;
1909 Idx -= NumElts - 16;
1910 Idxs[l + i] = Idx + l;
1913 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1917 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1925 unsigned NumElts = ResultTy->getNumElements() * 8;
1929 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1939 for (
unsigned l = 0; l != NumElts; l += 16)
1940 for (
unsigned i = 0; i != 16; ++i) {
1941 unsigned Idx = i + Shift;
1943 Idx += NumElts - 16;
1944 Idxs[l + i] = Idx + l;
1947 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
1951 return Builder.CreateBitCast(Res, ResultTy,
"cast");
1959 Mask = Builder.CreateBitCast(Mask, MaskTy);
1965 for (
unsigned i = 0; i != NumElts; ++i)
1967 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
1978 if (
C->isAllOnesValue())
1983 return Builder.CreateSelect(Mask, Op0, Op1);
1990 if (
C->isAllOnesValue())
1994 Mask->getType()->getIntegerBitWidth());
1995 Mask = Builder.CreateBitCast(Mask, MaskTy);
1996 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
1997 return Builder.CreateSelect(Mask, Op0, Op1);
2010 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2011 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2016 ShiftVal &= (NumElts - 1);
2025 if (ShiftVal > 16) {
2033 for (
unsigned l = 0; l < NumElts; l += 16) {
2034 for (
unsigned i = 0; i != 16; ++i) {
2035 unsigned Idx = ShiftVal + i;
2036 if (!IsVALIGN && Idx >= 16)
2037 Idx += NumElts - 16;
2038 Indices[l + i] = Idx + l;
2043 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2049 bool ZeroMask,
bool IndexForm) {
2052 unsigned EltWidth = Ty->getScalarSizeInBits();
2053 bool IsFloat = Ty->isFPOrFPVectorTy();
2055 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2056 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2057 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2058 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2059 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2060 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2061 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2062 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2063 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2064 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2065 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2066 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2067 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2068 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2069 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2070 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2071 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2072 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2073 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2074 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2075 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2076 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2077 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2078 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2079 else if (VecWidth == 128 && EltWidth == 16)
2080 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2081 else if (VecWidth == 256 && EltWidth == 16)
2082 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2083 else if (VecWidth == 512 && EltWidth == 16)
2084 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2085 else if (VecWidth == 128 && EltWidth == 8)
2086 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2087 else if (VecWidth == 256 && EltWidth == 8)
2088 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2089 else if (VecWidth == 512 && EltWidth == 8)
2090 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2101 Value *V = Builder.CreateIntrinsic(IID, Args);
2113 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2124 bool IsRotateRight) {
2134 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2135 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2138 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2139 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2184 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2189 bool IsShiftRight,
bool ZeroMask) {
2203 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2204 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2207 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2208 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2223 const Align Alignment =
2225 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2230 if (
C->isAllOnesValue())
2231 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2236 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2242 const Align Alignment =
2251 if (
C->isAllOnesValue())
2252 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2257 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2263 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2264 {Op0, Builder.getInt1(
false)});
2279 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2280 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2281 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2282 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2283 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2286 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2287 LHS = Builder.CreateAnd(
LHS, Mask);
2288 RHS = Builder.CreateAnd(
RHS, Mask);
2305 if (!
C || !
C->isAllOnesValue())
2306 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2311 for (
unsigned i = 0; i != NumElts; ++i)
2313 for (
unsigned i = NumElts; i != 8; ++i)
2314 Indices[i] = NumElts + i % NumElts;
2315 Vec = Builder.CreateShuffleVector(Vec,
2319 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2323 unsigned CC,
bool Signed) {
2331 }
else if (CC == 7) {
2367 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2368 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2370 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2371 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2380 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2386 Name = Name.substr(12);
2391 if (Name.starts_with(
"max.p")) {
2392 if (VecWidth == 128 && EltWidth == 32)
2393 IID = Intrinsic::x86_sse_max_ps;
2394 else if (VecWidth == 128 && EltWidth == 64)
2395 IID = Intrinsic::x86_sse2_max_pd;
2396 else if (VecWidth == 256 && EltWidth == 32)
2397 IID = Intrinsic::x86_avx_max_ps_256;
2398 else if (VecWidth == 256 && EltWidth == 64)
2399 IID = Intrinsic::x86_avx_max_pd_256;
2402 }
else if (Name.starts_with(
"min.p")) {
2403 if (VecWidth == 128 && EltWidth == 32)
2404 IID = Intrinsic::x86_sse_min_ps;
2405 else if (VecWidth == 128 && EltWidth == 64)
2406 IID = Intrinsic::x86_sse2_min_pd;
2407 else if (VecWidth == 256 && EltWidth == 32)
2408 IID = Intrinsic::x86_avx_min_ps_256;
2409 else if (VecWidth == 256 && EltWidth == 64)
2410 IID = Intrinsic::x86_avx_min_pd_256;
2413 }
else if (Name.starts_with(
"pshuf.b.")) {
2414 if (VecWidth == 128)
2415 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2416 else if (VecWidth == 256)
2417 IID = Intrinsic::x86_avx2_pshuf_b;
2418 else if (VecWidth == 512)
2419 IID = Intrinsic::x86_avx512_pshuf_b_512;
2422 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2423 if (VecWidth == 128)
2424 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2425 else if (VecWidth == 256)
2426 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2427 else if (VecWidth == 512)
2428 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2431 }
else if (Name.starts_with(
"pmulh.w.")) {
2432 if (VecWidth == 128)
2433 IID = Intrinsic::x86_sse2_pmulh_w;
2434 else if (VecWidth == 256)
2435 IID = Intrinsic::x86_avx2_pmulh_w;
2436 else if (VecWidth == 512)
2437 IID = Intrinsic::x86_avx512_pmulh_w_512;
2440 }
else if (Name.starts_with(
"pmulhu.w.")) {
2441 if (VecWidth == 128)
2442 IID = Intrinsic::x86_sse2_pmulhu_w;
2443 else if (VecWidth == 256)
2444 IID = Intrinsic::x86_avx2_pmulhu_w;
2445 else if (VecWidth == 512)
2446 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2449 }
else if (Name.starts_with(
"pmaddw.d.")) {
2450 if (VecWidth == 128)
2451 IID = Intrinsic::x86_sse2_pmadd_wd;
2452 else if (VecWidth == 256)
2453 IID = Intrinsic::x86_avx2_pmadd_wd;
2454 else if (VecWidth == 512)
2455 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2458 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2459 if (VecWidth == 128)
2460 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2461 else if (VecWidth == 256)
2462 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2463 else if (VecWidth == 512)
2464 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2467 }
else if (Name.starts_with(
"packsswb.")) {
2468 if (VecWidth == 128)
2469 IID = Intrinsic::x86_sse2_packsswb_128;
2470 else if (VecWidth == 256)
2471 IID = Intrinsic::x86_avx2_packsswb;
2472 else if (VecWidth == 512)
2473 IID = Intrinsic::x86_avx512_packsswb_512;
2476 }
else if (Name.starts_with(
"packssdw.")) {
2477 if (VecWidth == 128)
2478 IID = Intrinsic::x86_sse2_packssdw_128;
2479 else if (VecWidth == 256)
2480 IID = Intrinsic::x86_avx2_packssdw;
2481 else if (VecWidth == 512)
2482 IID = Intrinsic::x86_avx512_packssdw_512;
2485 }
else if (Name.starts_with(
"packuswb.")) {
2486 if (VecWidth == 128)
2487 IID = Intrinsic::x86_sse2_packuswb_128;
2488 else if (VecWidth == 256)
2489 IID = Intrinsic::x86_avx2_packuswb;
2490 else if (VecWidth == 512)
2491 IID = Intrinsic::x86_avx512_packuswb_512;
2494 }
else if (Name.starts_with(
"packusdw.")) {
2495 if (VecWidth == 128)
2496 IID = Intrinsic::x86_sse41_packusdw;
2497 else if (VecWidth == 256)
2498 IID = Intrinsic::x86_avx2_packusdw;
2499 else if (VecWidth == 512)
2500 IID = Intrinsic::x86_avx512_packusdw_512;
2503 }
else if (Name.starts_with(
"vpermilvar.")) {
2504 if (VecWidth == 128 && EltWidth == 32)
2505 IID = Intrinsic::x86_avx_vpermilvar_ps;
2506 else if (VecWidth == 128 && EltWidth == 64)
2507 IID = Intrinsic::x86_avx_vpermilvar_pd;
2508 else if (VecWidth == 256 && EltWidth == 32)
2509 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2510 else if (VecWidth == 256 && EltWidth == 64)
2511 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2512 else if (VecWidth == 512 && EltWidth == 32)
2513 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2514 else if (VecWidth == 512 && EltWidth == 64)
2515 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2518 }
else if (Name ==
"cvtpd2dq.256") {
2519 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2520 }
else if (Name ==
"cvtpd2ps.256") {
2521 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2522 }
else if (Name ==
"cvttpd2dq.256") {
2523 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2524 }
else if (Name ==
"cvttps2dq.128") {
2525 IID = Intrinsic::x86_sse2_cvttps2dq;
2526 }
else if (Name ==
"cvttps2dq.256") {
2527 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2528 }
else if (Name.starts_with(
"permvar.")) {
2530 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2531 IID = Intrinsic::x86_avx2_permps;
2532 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2533 IID = Intrinsic::x86_avx2_permd;
2534 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2535 IID = Intrinsic::x86_avx512_permvar_df_256;
2536 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2537 IID = Intrinsic::x86_avx512_permvar_di_256;
2538 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2539 IID = Intrinsic::x86_avx512_permvar_sf_512;
2540 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2541 IID = Intrinsic::x86_avx512_permvar_si_512;
2542 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2543 IID = Intrinsic::x86_avx512_permvar_df_512;
2544 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2545 IID = Intrinsic::x86_avx512_permvar_di_512;
2546 else if (VecWidth == 128 && EltWidth == 16)
2547 IID = Intrinsic::x86_avx512_permvar_hi_128;
2548 else if (VecWidth == 256 && EltWidth == 16)
2549 IID = Intrinsic::x86_avx512_permvar_hi_256;
2550 else if (VecWidth == 512 && EltWidth == 16)
2551 IID = Intrinsic::x86_avx512_permvar_hi_512;
2552 else if (VecWidth == 128 && EltWidth == 8)
2553 IID = Intrinsic::x86_avx512_permvar_qi_128;
2554 else if (VecWidth == 256 && EltWidth == 8)
2555 IID = Intrinsic::x86_avx512_permvar_qi_256;
2556 else if (VecWidth == 512 && EltWidth == 8)
2557 IID = Intrinsic::x86_avx512_permvar_qi_512;
2560 }
else if (Name.starts_with(
"dbpsadbw.")) {
2561 if (VecWidth == 128)
2562 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2563 else if (VecWidth == 256)
2564 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2565 else if (VecWidth == 512)
2566 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2569 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2570 if (VecWidth == 128)
2571 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2572 else if (VecWidth == 256)
2573 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2574 else if (VecWidth == 512)
2575 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2578 }
else if (Name.starts_with(
"conflict.")) {
2579 if (Name[9] ==
'd' && VecWidth == 128)
2580 IID = Intrinsic::x86_avx512_conflict_d_128;
2581 else if (Name[9] ==
'd' && VecWidth == 256)
2582 IID = Intrinsic::x86_avx512_conflict_d_256;
2583 else if (Name[9] ==
'd' && VecWidth == 512)
2584 IID = Intrinsic::x86_avx512_conflict_d_512;
2585 else if (Name[9] ==
'q' && VecWidth == 128)
2586 IID = Intrinsic::x86_avx512_conflict_q_128;
2587 else if (Name[9] ==
'q' && VecWidth == 256)
2588 IID = Intrinsic::x86_avx512_conflict_q_256;
2589 else if (Name[9] ==
'q' && VecWidth == 512)
2590 IID = Intrinsic::x86_avx512_conflict_q_512;
2593 }
else if (Name.starts_with(
"pavg.")) {
2594 if (Name[5] ==
'b' && VecWidth == 128)
2595 IID = Intrinsic::x86_sse2_pavg_b;
2596 else if (Name[5] ==
'b' && VecWidth == 256)
2597 IID = Intrinsic::x86_avx2_pavg_b;
2598 else if (Name[5] ==
'b' && VecWidth == 512)
2599 IID = Intrinsic::x86_avx512_pavg_b_512;
2600 else if (Name[5] ==
'w' && VecWidth == 128)
2601 IID = Intrinsic::x86_sse2_pavg_w;
2602 else if (Name[5] ==
'w' && VecWidth == 256)
2603 IID = Intrinsic::x86_avx2_pavg_w;
2604 else if (Name[5] ==
'w' && VecWidth == 512)
2605 IID = Intrinsic::x86_avx512_pavg_w_512;
2614 Rep = Builder.CreateIntrinsic(IID, Args);
2625 if (AsmStr->find(
"mov\tfp") == 0 &&
2626 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2627 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2628 AsmStr->replace(Pos, 1,
";");
2634 Value *Rep =
nullptr;
2636 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2638 Value *Neg = Builder.CreateNeg(Arg,
"neg");
2639 Value *Cmp = Builder.CreateICmpSGE(
2641 Rep = Builder.CreateSelect(Cmp, Arg, Neg,
"abs");
2642 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2643 Type *Ty = (Name ==
"abs.bf16")
2647 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2648 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2649 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2650 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2651 : Intrinsic::nvvm_fabs;
2652 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2653 }
else if (Name.consume_front(
"ex2.approx.")) {
2655 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2656 : Intrinsic::nvvm_ex2_approx;
2657 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2658 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2659 Name.starts_with(
"atomic.load.add.f64.p")) {
2664 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2665 Name.starts_with(
"atomic.load.dec.32.p")) {
2670 Rep = Builder.CreateAtomicRMW(
Op, Ptr, Val,
MaybeAlign(),
2672 }
else if (Name.consume_front(
"max.") &&
2673 (Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
2674 Name ==
"ui" || Name ==
"ull")) {
2677 Value *Cmp = Name.starts_with(
"u")
2678 ? Builder.CreateICmpUGE(Arg0, Arg1,
"max.cond")
2679 : Builder.CreateICmpSGE(Arg0, Arg1,
"max.cond");
2680 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1,
"max");
2681 }
else if (Name.consume_front(
"min.") &&
2682 (Name ==
"s" || Name ==
"i" || Name ==
"ll" || Name ==
"us" ||
2683 Name ==
"ui" || Name ==
"ull")) {
2686 Value *Cmp = Name.starts_with(
"u")
2687 ? Builder.CreateICmpULE(Arg0, Arg1,
"min.cond")
2688 : Builder.CreateICmpSLE(Arg0, Arg1,
"min.cond");
2689 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1,
"min");
2690 }
else if (Name ==
"clz.ll") {
2693 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2694 {Arg, Builder.getFalse()},
2696 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2697 }
else if (Name ==
"popc.ll") {
2701 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2702 Arg,
nullptr,
"ctpop");
2703 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2704 }
else if (Name ==
"h2f") {
2705 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2708 }
else if (Name.consume_front(
"bitcast.") &&
2709 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2712 }
else if (Name ==
"rotate.b32") {
2715 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2716 {Arg, Arg, ShiftAmt});
2717 }
else if (Name ==
"rotate.b64") {
2721 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2722 {Arg, Arg, ZExtShiftAmt});
2723 }
else if (Name ==
"rotate.right.b64") {
2727 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2728 {Arg, Arg, ZExtShiftAmt});
2729 }
else if (Name ==
"swap.lo.hi.b64") {
2732 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2733 {Arg, Arg, Builder.getInt64(32)});
2734 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2737 Name.starts_with(
".to.gen"))) {
2739 }
else if (Name.consume_front(
"ldg.global")) {
2743 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2746 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2748 }
else if (Name ==
"tanh.approx.f32") {
2752 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2754 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2756 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2757 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2759 }
else if (Name ==
"barrier") {
2760 Rep = Builder.CreateIntrinsic(
2761 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2763 }
else if (Name ==
"barrier.sync") {
2764 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2766 }
else if (Name ==
"barrier.sync.cnt") {
2767 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2769 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2770 Name ==
"barrier0.or") {
2772 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2776 .
Case(
"barrier0.popc",
2777 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2778 .
Case(
"barrier0.and",
2779 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2780 .
Case(
"barrier0.or",
2781 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2782 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2783 Rep = Builder.CreateZExt(Bar, CI->
getType());
2787 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2797 ? Builder.CreateBitCast(Arg, NewType)
2800 Rep = Builder.CreateCall(NewFn, Args);
2801 if (
F->getReturnType()->isIntegerTy())
2802 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2812 Value *Rep =
nullptr;
2814 if (Name.starts_with(
"sse4a.movnt.")) {
2826 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2829 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2830 }
else if (Name.starts_with(
"avx.movnt.") ||
2831 Name.starts_with(
"avx512.storent.")) {
2843 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2844 }
else if (Name ==
"sse2.storel.dq") {
2849 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2850 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2851 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2852 }
else if (Name.starts_with(
"sse.storeu.") ||
2853 Name.starts_with(
"sse2.storeu.") ||
2854 Name.starts_with(
"avx.storeu.")) {
2857 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2858 }
else if (Name ==
"avx512.mask.store.ss") {
2862 }
else if (Name.starts_with(
"avx512.mask.store")) {
2864 bool Aligned = Name[17] !=
'u';
2867 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2870 bool CmpEq = Name[9] ==
'e';
2873 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2874 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2881 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2882 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2884 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2885 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2886 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2887 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2888 Name.starts_with(
"sse2.sqrt.p") ||
2889 Name.starts_with(
"sse.sqrt.p")) {
2890 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2891 {CI->getArgOperand(0)});
2892 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2896 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
2897 : Intrinsic::x86_avx512_sqrt_pd_512;
2900 Rep = Builder.CreateIntrinsic(IID, Args);
2902 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2903 {CI->getArgOperand(0)});
2907 }
else if (Name.starts_with(
"avx512.ptestm") ||
2908 Name.starts_with(
"avx512.ptestnm")) {
2912 Rep = Builder.CreateAnd(Op0, Op1);
2918 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2920 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
2923 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
2926 }
else if (Name.starts_with(
"avx512.kunpck")) {
2931 for (
unsigned i = 0; i != NumElts; ++i)
2940 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
2941 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2942 }
else if (Name ==
"avx512.kand.w") {
2945 Rep = Builder.CreateAnd(
LHS,
RHS);
2946 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2947 }
else if (Name ==
"avx512.kandn.w") {
2950 LHS = Builder.CreateNot(
LHS);
2951 Rep = Builder.CreateAnd(
LHS,
RHS);
2952 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2953 }
else if (Name ==
"avx512.kor.w") {
2956 Rep = Builder.CreateOr(
LHS,
RHS);
2957 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2958 }
else if (Name ==
"avx512.kxor.w") {
2961 Rep = Builder.CreateXor(
LHS,
RHS);
2962 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2963 }
else if (Name ==
"avx512.kxnor.w") {
2966 LHS = Builder.CreateNot(
LHS);
2967 Rep = Builder.CreateXor(
LHS,
RHS);
2968 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2969 }
else if (Name ==
"avx512.knot.w") {
2971 Rep = Builder.CreateNot(Rep);
2972 Rep = Builder.CreateBitCast(Rep, CI->
getType());
2973 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
2976 Rep = Builder.CreateOr(
LHS,
RHS);
2977 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2979 if (Name[14] ==
'c')
2983 Rep = Builder.CreateICmpEQ(Rep,
C);
2984 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2985 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
2986 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
2987 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
2988 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
2991 ConstantInt::get(I32Ty, 0));
2993 ConstantInt::get(I32Ty, 0));
2995 if (Name.contains(
".add."))
2996 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2997 else if (Name.contains(
".sub."))
2998 EltOp = Builder.CreateFSub(Elt0, Elt1);
2999 else if (Name.contains(
".mul."))
3000 EltOp = Builder.CreateFMul(Elt0, Elt1);
3002 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3003 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3004 ConstantInt::get(I32Ty, 0));
3005 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3007 bool CmpEq = Name[16] ==
'e';
3009 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3017 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3020 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3023 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3030 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3035 if (VecWidth == 128 && EltWidth == 32)
3036 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3037 else if (VecWidth == 256 && EltWidth == 32)
3038 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3039 else if (VecWidth == 512 && EltWidth == 32)
3040 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3041 else if (VecWidth == 128 && EltWidth == 64)
3042 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3043 else if (VecWidth == 256 && EltWidth == 64)
3044 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3045 else if (VecWidth == 512 && EltWidth == 64)
3046 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3053 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3055 Type *OpTy = Args[0]->getType();
3059 if (VecWidth == 128 && EltWidth == 32)
3060 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3061 else if (VecWidth == 256 && EltWidth == 32)
3062 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3063 else if (VecWidth == 512 && EltWidth == 32)
3064 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3065 else if (VecWidth == 128 && EltWidth == 64)
3066 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3067 else if (VecWidth == 256 && EltWidth == 64)
3068 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3069 else if (VecWidth == 512 && EltWidth == 64)
3070 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3075 if (VecWidth == 512)
3077 Args.push_back(Mask);
3079 Rep = Builder.CreateIntrinsic(IID, Args);
3080 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3084 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3087 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3088 Name.starts_with(
"avx512.cvtw2mask.") ||
3089 Name.starts_with(
"avx512.cvtd2mask.") ||
3090 Name.starts_with(
"avx512.cvtq2mask.")) {
3095 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3096 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3097 Name.starts_with(
"avx512.mask.pabs")) {
3099 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3100 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3101 Name.starts_with(
"avx512.mask.pmaxs")) {
3103 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3104 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3105 Name.starts_with(
"avx512.mask.pmaxu")) {
3107 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3108 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3109 Name.starts_with(
"avx512.mask.pmins")) {
3111 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3112 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3113 Name.starts_with(
"avx512.mask.pminu")) {
3115 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3116 Name ==
"avx512.pmulu.dq.512" ||
3117 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3119 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3120 Name ==
"avx512.pmul.dq.512" ||
3121 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3123 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3124 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3129 }
else if (Name ==
"avx512.cvtusi2sd") {
3134 }
else if (Name ==
"sse2.cvtss2sd") {
3136 Rep = Builder.CreateFPExt(
3139 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3140 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3141 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3142 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3143 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3144 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3145 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3146 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3147 Name ==
"avx512.mask.cvtqq2ps.256" ||
3148 Name ==
"avx512.mask.cvtqq2ps.512" ||
3149 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3150 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3151 Name ==
"avx.cvt.ps2.pd.256" ||
3152 Name ==
"avx512.mask.cvtps2pd.128" ||
3153 Name ==
"avx512.mask.cvtps2pd.256") {
3158 unsigned NumDstElts = DstTy->getNumElements();
3160 assert(NumDstElts == 2 &&
"Unexpected vector size");
3161 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3164 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3165 bool IsUnsigned = Name.contains(
"cvtu");
3167 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3171 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3172 : Intrinsic::x86_avx512_sitofp_round;
3173 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3176 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3177 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3183 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3184 Name.starts_with(
"vcvtph2ps.")) {
3188 unsigned NumDstElts = DstTy->getNumElements();
3189 if (NumDstElts != SrcTy->getNumElements()) {
3190 assert(NumDstElts == 4 &&
"Unexpected vector size");
3191 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3193 Rep = Builder.CreateBitCast(
3195 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3199 }
else if (Name.starts_with(
"avx512.mask.load")) {
3201 bool Aligned = Name[16] !=
'u';
3204 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3207 ResultTy->getNumElements());
3209 Rep = Builder.CreateIntrinsic(
3210 Intrinsic::masked_expandload, ResultTy,
3212 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3218 Rep = Builder.CreateIntrinsic(
3219 Intrinsic::masked_compressstore, ResultTy,
3221 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3222 Name.starts_with(
"avx512.mask.expand.")) {
3226 ResultTy->getNumElements());
3228 bool IsCompress = Name[12] ==
'c';
3229 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3230 : Intrinsic::x86_avx512_mask_expand;
3231 Rep = Builder.CreateIntrinsic(
3233 }
else if (Name.starts_with(
"xop.vpcom")) {
3235 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3236 Name.ends_with(
"uq"))
3238 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3239 Name.ends_with(
"d") || Name.ends_with(
"q"))
3248 Name = Name.substr(9);
3249 if (Name.starts_with(
"lt"))
3251 else if (Name.starts_with(
"le"))
3253 else if (Name.starts_with(
"gt"))
3255 else if (Name.starts_with(
"ge"))
3257 else if (Name.starts_with(
"eq"))
3259 else if (Name.starts_with(
"ne"))
3261 else if (Name.starts_with(
"false"))
3263 else if (Name.starts_with(
"true"))
3270 }
else if (Name.starts_with(
"xop.vpcmov")) {
3272 Value *NotSel = Builder.CreateNot(Sel);
3275 Rep = Builder.CreateOr(Sel0, Sel1);
3276 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3277 Name.starts_with(
"avx512.mask.prol")) {
3279 }
else if (Name.starts_with(
"avx512.pror") ||
3280 Name.starts_with(
"avx512.mask.pror")) {
3282 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3283 Name.starts_with(
"avx512.mask.vpshld") ||
3284 Name.starts_with(
"avx512.maskz.vpshld")) {
3285 bool ZeroMask = Name[11] ==
'z';
3287 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3288 Name.starts_with(
"avx512.mask.vpshrd") ||
3289 Name.starts_with(
"avx512.maskz.vpshrd")) {
3290 bool ZeroMask = Name[11] ==
'z';
3292 }
else if (Name ==
"sse42.crc32.64.8") {
3295 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3297 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3298 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3299 Name.starts_with(
"avx512.vbroadcast.s")) {
3302 Type *EltTy = VecTy->getElementType();
3303 unsigned EltNum = VecTy->getNumElements();
3307 for (
unsigned I = 0;
I < EltNum; ++
I)
3308 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3309 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3310 Name.starts_with(
"sse41.pmovzx") ||
3311 Name.starts_with(
"avx2.pmovsx") ||
3312 Name.starts_with(
"avx2.pmovzx") ||
3313 Name.starts_with(
"avx512.mask.pmovsx") ||
3314 Name.starts_with(
"avx512.mask.pmovzx")) {
3316 unsigned NumDstElts = DstTy->getNumElements();
3320 for (
unsigned i = 0; i != NumDstElts; ++i)
3325 bool DoSext = Name.contains(
"pmovsx");
3327 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3332 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3333 Name ==
"avx512.mask.pmov.qd.512" ||
3334 Name ==
"avx512.mask.pmov.wb.256" ||
3335 Name ==
"avx512.mask.pmov.wb.512") {
3340 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3341 Name ==
"avx2.vbroadcasti128") {
3347 if (NumSrcElts == 2)
3348 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3350 Rep = Builder.CreateShuffleVector(Load,
3352 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3353 Name.starts_with(
"avx512.mask.shuf.f")) {
3358 unsigned ControlBitsMask = NumLanes - 1;
3359 unsigned NumControlBits = NumLanes / 2;
3362 for (
unsigned l = 0; l != NumLanes; ++l) {
3363 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3365 if (l >= NumLanes / 2)
3366 LaneMask += NumLanes;
3367 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3368 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3374 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3375 Name.starts_with(
"avx512.mask.broadcasti")) {
3378 unsigned NumDstElts =
3382 for (
unsigned i = 0; i != NumDstElts; ++i)
3383 ShuffleMask[i] = i % NumSrcElts;
3389 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3390 Name.starts_with(
"avx2.vbroadcast") ||
3391 Name.starts_with(
"avx512.pbroadcast") ||
3392 Name.starts_with(
"avx512.mask.broadcast.s")) {
3399 Rep = Builder.CreateShuffleVector(
Op, M);
3404 }
else if (Name.starts_with(
"sse2.padds.") ||
3405 Name.starts_with(
"avx2.padds.") ||
3406 Name.starts_with(
"avx512.padds.") ||
3407 Name.starts_with(
"avx512.mask.padds.")) {
3409 }
else if (Name.starts_with(
"sse2.psubs.") ||
3410 Name.starts_with(
"avx2.psubs.") ||
3411 Name.starts_with(
"avx512.psubs.") ||
3412 Name.starts_with(
"avx512.mask.psubs.")) {
3414 }
else if (Name.starts_with(
"sse2.paddus.") ||
3415 Name.starts_with(
"avx2.paddus.") ||
3416 Name.starts_with(
"avx512.mask.paddus.")) {
3418 }
else if (Name.starts_with(
"sse2.psubus.") ||
3419 Name.starts_with(
"avx2.psubus.") ||
3420 Name.starts_with(
"avx512.mask.psubus.")) {
3422 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3427 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3431 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3436 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3441 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3442 Name ==
"avx512.psll.dq.512") {
3446 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3447 Name ==
"avx512.psrl.dq.512") {
3451 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3452 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3453 Name.starts_with(
"avx2.pblendd.")) {
3458 unsigned NumElts = VecTy->getNumElements();
3461 for (
unsigned i = 0; i != NumElts; ++i)
3462 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3464 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3465 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3466 Name ==
"avx2.vinserti128" ||
3467 Name.starts_with(
"avx512.mask.insert")) {
3471 unsigned DstNumElts =
3473 unsigned SrcNumElts =
3475 unsigned Scale = DstNumElts / SrcNumElts;
3482 for (
unsigned i = 0; i != SrcNumElts; ++i)
3484 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3485 Idxs[i] = SrcNumElts;
3486 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3500 for (
unsigned i = 0; i != DstNumElts; ++i)
3503 for (
unsigned i = 0; i != SrcNumElts; ++i)
3504 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3505 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3511 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3512 Name ==
"avx2.vextracti128" ||
3513 Name.starts_with(
"avx512.mask.vextract")) {
3516 unsigned DstNumElts =
3518 unsigned SrcNumElts =
3520 unsigned Scale = SrcNumElts / DstNumElts;
3527 for (
unsigned i = 0; i != DstNumElts; ++i) {
3528 Idxs[i] = i + (Imm * DstNumElts);
3530 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3536 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3537 Name.starts_with(
"avx512.mask.perm.di.")) {
3541 unsigned NumElts = VecTy->getNumElements();
3544 for (
unsigned i = 0; i != NumElts; ++i)
3545 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3547 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3552 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3564 unsigned HalfSize = NumElts / 2;
3576 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3577 for (
unsigned i = 0; i < HalfSize; ++i)
3578 ShuffleMask[i] = StartIndex + i;
3581 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3582 for (
unsigned i = 0; i < HalfSize; ++i)
3583 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3585 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3587 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3588 Name.starts_with(
"avx512.mask.vpermil.p") ||
3589 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3593 unsigned NumElts = VecTy->getNumElements();
3595 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3596 unsigned IdxMask = ((1 << IdxSize) - 1);
3602 for (
unsigned i = 0; i != NumElts; ++i)
3603 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3605 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3610 }
else if (Name ==
"sse2.pshufl.w" ||
3611 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3617 for (
unsigned l = 0; l != NumElts; l += 8) {
3618 for (
unsigned i = 0; i != 4; ++i)
3619 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3620 for (
unsigned i = 4; i != 8; ++i)
3621 Idxs[i + l] = i + l;
3624 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3629 }
else if (Name ==
"sse2.pshufh.w" ||
3630 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3636 for (
unsigned l = 0; l != NumElts; l += 8) {
3637 for (
unsigned i = 0; i != 4; ++i)
3638 Idxs[i + l] = i + l;
3639 for (
unsigned i = 0; i != 4; ++i)
3640 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3643 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3648 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3655 unsigned HalfLaneElts = NumLaneElts / 2;
3658 for (
unsigned i = 0; i != NumElts; ++i) {
3660 Idxs[i] = i - (i % NumLaneElts);
3662 if ((i % NumLaneElts) >= HalfLaneElts)
3666 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3669 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3673 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3674 Name.starts_with(
"avx512.mask.movshdup") ||
3675 Name.starts_with(
"avx512.mask.movsldup")) {
3681 if (Name.starts_with(
"avx512.mask.movshdup."))
3685 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3686 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3687 Idxs[i + l + 0] = i + l +
Offset;
3688 Idxs[i + l + 1] = i + l +
Offset;
3691 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3695 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3696 Name.starts_with(
"avx512.mask.unpckl.")) {
3703 for (
int l = 0; l != NumElts; l += NumLaneElts)
3704 for (
int i = 0; i != NumLaneElts; ++i)
3705 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3707 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3711 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3712 Name.starts_with(
"avx512.mask.unpckh.")) {
3719 for (
int l = 0; l != NumElts; l += NumLaneElts)
3720 for (
int i = 0; i != NumLaneElts; ++i)
3721 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3723 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3727 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3728 Name.starts_with(
"avx512.mask.pand.")) {
3731 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3733 Rep = Builder.CreateBitCast(Rep, FTy);
3736 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3737 Name.starts_with(
"avx512.mask.pandn.")) {
3740 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3741 Rep = Builder.CreateAnd(Rep,
3743 Rep = Builder.CreateBitCast(Rep, FTy);
3746 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3747 Name.starts_with(
"avx512.mask.por.")) {
3750 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3752 Rep = Builder.CreateBitCast(Rep, FTy);
3755 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3756 Name.starts_with(
"avx512.mask.pxor.")) {
3759 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3761 Rep = Builder.CreateBitCast(Rep, FTy);
3764 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3768 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3772 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3776 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3777 if (Name.ends_with(
".512")) {
3779 if (Name[17] ==
's')
3780 IID = Intrinsic::x86_avx512_add_ps_512;
3782 IID = Intrinsic::x86_avx512_add_pd_512;
3784 Rep = Builder.CreateIntrinsic(
3792 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3793 if (Name.ends_with(
".512")) {
3795 if (Name[17] ==
's')
3796 IID = Intrinsic::x86_avx512_div_ps_512;
3798 IID = Intrinsic::x86_avx512_div_pd_512;
3800 Rep = Builder.CreateIntrinsic(
3808 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3809 if (Name.ends_with(
".512")) {
3811 if (Name[17] ==
's')
3812 IID = Intrinsic::x86_avx512_mul_ps_512;
3814 IID = Intrinsic::x86_avx512_mul_pd_512;
3816 Rep = Builder.CreateIntrinsic(
3824 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3825 if (Name.ends_with(
".512")) {
3827 if (Name[17] ==
's')
3828 IID = Intrinsic::x86_avx512_sub_ps_512;
3830 IID = Intrinsic::x86_avx512_sub_pd_512;
3832 Rep = Builder.CreateIntrinsic(
3840 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3841 Name.starts_with(
"avx512.mask.min.p")) &&
3842 Name.drop_front(18) ==
".512") {
3843 bool IsDouble = Name[17] ==
'd';
3844 bool IsMin = Name[13] ==
'i';
3846 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3847 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3850 Rep = Builder.CreateIntrinsic(
3855 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3857 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3858 {CI->getArgOperand(0), Builder.getInt1(false)});
3861 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3862 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3863 bool IsVariable = Name[16] ==
'v';
3864 char Size = Name[16] ==
'.' ? Name[17]
3865 : Name[17] ==
'.' ? Name[18]
3866 : Name[18] ==
'.' ? Name[19]
3870 if (IsVariable && Name[17] !=
'.') {
3871 if (
Size ==
'd' && Name[17] ==
'2')
3872 IID = Intrinsic::x86_avx2_psllv_q;
3873 else if (
Size ==
'd' && Name[17] ==
'4')
3874 IID = Intrinsic::x86_avx2_psllv_q_256;
3875 else if (
Size ==
's' && Name[17] ==
'4')
3876 IID = Intrinsic::x86_avx2_psllv_d;
3877 else if (
Size ==
's' && Name[17] ==
'8')
3878 IID = Intrinsic::x86_avx2_psllv_d_256;
3879 else if (
Size ==
'h' && Name[17] ==
'8')
3880 IID = Intrinsic::x86_avx512_psllv_w_128;
3881 else if (
Size ==
'h' && Name[17] ==
'1')
3882 IID = Intrinsic::x86_avx512_psllv_w_256;
3883 else if (Name[17] ==
'3' && Name[18] ==
'2')
3884 IID = Intrinsic::x86_avx512_psllv_w_512;
3887 }
else if (Name.ends_with(
".128")) {
3889 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3890 : Intrinsic::x86_sse2_psll_d;
3891 else if (
Size ==
'q')
3892 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3893 : Intrinsic::x86_sse2_psll_q;
3894 else if (
Size ==
'w')
3895 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3896 : Intrinsic::x86_sse2_psll_w;
3899 }
else if (Name.ends_with(
".256")) {
3901 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3902 : Intrinsic::x86_avx2_psll_d;
3903 else if (
Size ==
'q')
3904 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3905 : Intrinsic::x86_avx2_psll_q;
3906 else if (
Size ==
'w')
3907 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3908 : Intrinsic::x86_avx2_psll_w;
3913 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3914 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3915 : Intrinsic::x86_avx512_psll_d_512;
3916 else if (
Size ==
'q')
3917 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3918 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3919 : Intrinsic::x86_avx512_psll_q_512;
3920 else if (
Size ==
'w')
3921 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3922 : Intrinsic::x86_avx512_psll_w_512;
3928 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
3929 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3930 bool IsVariable = Name[16] ==
'v';
3931 char Size = Name[16] ==
'.' ? Name[17]
3932 : Name[17] ==
'.' ? Name[18]
3933 : Name[18] ==
'.' ? Name[19]
3937 if (IsVariable && Name[17] !=
'.') {
3938 if (
Size ==
'd' && Name[17] ==
'2')
3939 IID = Intrinsic::x86_avx2_psrlv_q;
3940 else if (
Size ==
'd' && Name[17] ==
'4')
3941 IID = Intrinsic::x86_avx2_psrlv_q_256;
3942 else if (
Size ==
's' && Name[17] ==
'4')
3943 IID = Intrinsic::x86_avx2_psrlv_d;
3944 else if (
Size ==
's' && Name[17] ==
'8')
3945 IID = Intrinsic::x86_avx2_psrlv_d_256;
3946 else if (
Size ==
'h' && Name[17] ==
'8')
3947 IID = Intrinsic::x86_avx512_psrlv_w_128;
3948 else if (
Size ==
'h' && Name[17] ==
'1')
3949 IID = Intrinsic::x86_avx512_psrlv_w_256;
3950 else if (Name[17] ==
'3' && Name[18] ==
'2')
3951 IID = Intrinsic::x86_avx512_psrlv_w_512;
3954 }
else if (Name.ends_with(
".128")) {
3956 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3957 : Intrinsic::x86_sse2_psrl_d;
3958 else if (
Size ==
'q')
3959 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3960 : Intrinsic::x86_sse2_psrl_q;
3961 else if (
Size ==
'w')
3962 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3963 : Intrinsic::x86_sse2_psrl_w;
3966 }
else if (Name.ends_with(
".256")) {
3968 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3969 : Intrinsic::x86_avx2_psrl_d;
3970 else if (
Size ==
'q')
3971 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3972 : Intrinsic::x86_avx2_psrl_q;
3973 else if (
Size ==
'w')
3974 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3975 : Intrinsic::x86_avx2_psrl_w;
3980 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3981 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3982 : Intrinsic::x86_avx512_psrl_d_512;
3983 else if (
Size ==
'q')
3984 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3985 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3986 : Intrinsic::x86_avx512_psrl_q_512;
3987 else if (
Size ==
'w')
3988 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3989 : Intrinsic::x86_avx512_psrl_w_512;
3995 }
else if (Name.starts_with(
"avx512.mask.psra")) {
3996 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3997 bool IsVariable = Name[16] ==
'v';
3998 char Size = Name[16] ==
'.' ? Name[17]
3999 : Name[17] ==
'.' ? Name[18]
4000 : Name[18] ==
'.' ? Name[19]
4004 if (IsVariable && Name[17] !=
'.') {
4005 if (
Size ==
's' && Name[17] ==
'4')
4006 IID = Intrinsic::x86_avx2_psrav_d;
4007 else if (
Size ==
's' && Name[17] ==
'8')
4008 IID = Intrinsic::x86_avx2_psrav_d_256;
4009 else if (
Size ==
'h' && Name[17] ==
'8')
4010 IID = Intrinsic::x86_avx512_psrav_w_128;
4011 else if (
Size ==
'h' && Name[17] ==
'1')
4012 IID = Intrinsic::x86_avx512_psrav_w_256;
4013 else if (Name[17] ==
'3' && Name[18] ==
'2')
4014 IID = Intrinsic::x86_avx512_psrav_w_512;
4017 }
else if (Name.ends_with(
".128")) {
4019 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4020 : Intrinsic::x86_sse2_psra_d;
4021 else if (
Size ==
'q')
4022 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4023 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4024 : Intrinsic::x86_avx512_psra_q_128;
4025 else if (
Size ==
'w')
4026 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4027 : Intrinsic::x86_sse2_psra_w;
4030 }
else if (Name.ends_with(
".256")) {
4032 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4033 : Intrinsic::x86_avx2_psra_d;
4034 else if (
Size ==
'q')
4035 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4036 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4037 : Intrinsic::x86_avx512_psra_q_256;
4038 else if (
Size ==
'w')
4039 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4040 : Intrinsic::x86_avx2_psra_w;
4045 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4046 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4047 : Intrinsic::x86_avx512_psra_d_512;
4048 else if (
Size ==
'q')
4049 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4050 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4051 : Intrinsic::x86_avx512_psra_q_512;
4052 else if (
Size ==
'w')
4053 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4054 : Intrinsic::x86_avx512_psra_w_512;
4060 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4062 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4064 }
else if (Name.ends_with(
".movntdqa")) {
4068 LoadInst *LI = Builder.CreateAlignedLoad(
4073 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4074 Name.starts_with(
"fma.vfmsub.") ||
4075 Name.starts_with(
"fma.vfnmadd.") ||
4076 Name.starts_with(
"fma.vfnmsub.")) {
4077 bool NegMul = Name[6] ==
'n';
4078 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4079 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4090 if (NegMul && !IsScalar)
4091 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4092 if (NegMul && IsScalar)
4093 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4095 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4097 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4101 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4109 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4113 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4114 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4115 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4116 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4117 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4118 bool IsMask3 = Name[11] ==
'3';
4119 bool IsMaskZ = Name[11] ==
'z';
4121 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4122 bool NegMul = Name[2] ==
'n';
4123 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4129 if (NegMul && (IsMask3 || IsMaskZ))
4130 A = Builder.CreateFNeg(
A);
4131 if (NegMul && !(IsMask3 || IsMaskZ))
4132 B = Builder.CreateFNeg(
B);
4134 C = Builder.CreateFNeg(
C);
4136 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4137 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4138 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4145 if (Name.back() ==
'd')
4146 IID = Intrinsic::x86_avx512_vfmadd_f64;
4148 IID = Intrinsic::x86_avx512_vfmadd_f32;
4149 Rep = Builder.CreateIntrinsic(IID,
Ops);
4151 Rep = Builder.CreateFMA(
A,
B,
C);
4160 if (NegAcc && IsMask3)
4165 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4167 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4168 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4169 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4170 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4171 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4172 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4173 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4174 bool IsMask3 = Name[11] ==
'3';
4175 bool IsMaskZ = Name[11] ==
'z';
4177 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4178 bool NegMul = Name[2] ==
'n';
4179 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4185 if (NegMul && (IsMask3 || IsMaskZ))
4186 A = Builder.CreateFNeg(
A);
4187 if (NegMul && !(IsMask3 || IsMaskZ))
4188 B = Builder.CreateFNeg(
B);
4190 C = Builder.CreateFNeg(
C);
4197 if (Name[Name.size() - 5] ==
's')
4198 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4200 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4204 Rep = Builder.CreateFMA(
A,
B,
C);
4212 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4216 if (VecWidth == 128 && EltWidth == 32)
4217 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4218 else if (VecWidth == 256 && EltWidth == 32)
4219 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4220 else if (VecWidth == 128 && EltWidth == 64)
4221 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4222 else if (VecWidth == 256 && EltWidth == 64)
4223 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4229 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4230 Rep = Builder.CreateIntrinsic(IID,
Ops);
4231 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4232 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4233 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4234 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4235 bool IsMask3 = Name[11] ==
'3';
4236 bool IsMaskZ = Name[11] ==
'z';
4238 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4239 bool IsSubAdd = Name[3] ==
's';
4243 if (Name[Name.size() - 5] ==
's')
4244 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4246 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4251 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4253 Rep = Builder.CreateIntrinsic(IID,
Ops);
4262 Value *Odd = Builder.CreateCall(FMA,
Ops);
4263 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4264 Value *Even = Builder.CreateCall(FMA,
Ops);
4270 for (
int i = 0; i != NumElts; ++i)
4271 Idxs[i] = i + (i % 2) * NumElts;
4273 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4281 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4282 Name.starts_with(
"avx512.maskz.pternlog.")) {
4283 bool ZeroMask = Name[11] ==
'z';
4287 if (VecWidth == 128 && EltWidth == 32)
4288 IID = Intrinsic::x86_avx512_pternlog_d_128;
4289 else if (VecWidth == 256 && EltWidth == 32)
4290 IID = Intrinsic::x86_avx512_pternlog_d_256;
4291 else if (VecWidth == 512 && EltWidth == 32)
4292 IID = Intrinsic::x86_avx512_pternlog_d_512;
4293 else if (VecWidth == 128 && EltWidth == 64)
4294 IID = Intrinsic::x86_avx512_pternlog_q_128;
4295 else if (VecWidth == 256 && EltWidth == 64)
4296 IID = Intrinsic::x86_avx512_pternlog_q_256;
4297 else if (VecWidth == 512 && EltWidth == 64)
4298 IID = Intrinsic::x86_avx512_pternlog_q_512;
4304 Rep = Builder.CreateIntrinsic(IID, Args);
4308 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4309 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4310 bool ZeroMask = Name[11] ==
'z';
4311 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4314 if (VecWidth == 128 && !
High)
4315 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4316 else if (VecWidth == 256 && !
High)
4317 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4318 else if (VecWidth == 512 && !
High)
4319 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4320 else if (VecWidth == 128 &&
High)
4321 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4322 else if (VecWidth == 256 &&
High)
4323 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4324 else if (VecWidth == 512 &&
High)
4325 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4331 Rep = Builder.CreateIntrinsic(IID, Args);
4335 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4336 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4337 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4338 bool ZeroMask = Name[11] ==
'z';
4339 bool IndexForm = Name[17] ==
'i';
4341 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4342 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4343 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4344 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4345 bool ZeroMask = Name[11] ==
'z';
4346 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4349 if (VecWidth == 128 && !IsSaturating)
4350 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4351 else if (VecWidth == 256 && !IsSaturating)
4352 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4353 else if (VecWidth == 512 && !IsSaturating)
4354 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4355 else if (VecWidth == 128 && IsSaturating)
4356 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4357 else if (VecWidth == 256 && IsSaturating)
4358 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4359 else if (VecWidth == 512 && IsSaturating)
4360 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4370 if (Args[1]->
getType()->isVectorTy() &&
4373 ->isIntegerTy(32) &&
4374 Args[2]->
getType()->isVectorTy() &&
4377 ->isIntegerTy(32)) {
4378 Type *NewArgType =
nullptr;
4379 if (VecWidth == 128)
4381 else if (VecWidth == 256)
4383 else if (VecWidth == 512)
4388 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4389 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4392 Rep = Builder.CreateIntrinsic(IID, Args);
4396 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4397 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4398 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4399 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4400 bool ZeroMask = Name[11] ==
'z';
4401 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4404 if (VecWidth == 128 && !IsSaturating)
4405 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4406 else if (VecWidth == 256 && !IsSaturating)
4407 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4408 else if (VecWidth == 512 && !IsSaturating)
4409 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4410 else if (VecWidth == 128 && IsSaturating)
4411 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4412 else if (VecWidth == 256 && IsSaturating)
4413 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4414 else if (VecWidth == 512 && IsSaturating)
4415 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4425 if (Args[1]->
getType()->isVectorTy() &&
4428 ->isIntegerTy(32) &&
4429 Args[2]->
getType()->isVectorTy() &&
4432 ->isIntegerTy(32)) {
4433 Type *NewArgType =
nullptr;
4434 if (VecWidth == 128)
4436 else if (VecWidth == 256)
4438 else if (VecWidth == 512)
4443 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4444 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4447 Rep = Builder.CreateIntrinsic(IID, Args);
4451 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4452 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4453 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4455 if (Name[0] ==
'a' && Name.back() ==
'2')
4456 IID = Intrinsic::x86_addcarry_32;
4457 else if (Name[0] ==
'a' && Name.back() ==
'4')
4458 IID = Intrinsic::x86_addcarry_64;
4459 else if (Name[0] ==
's' && Name.back() ==
'2')
4460 IID = Intrinsic::x86_subborrow_32;
4461 else if (Name[0] ==
's' && Name.back() ==
'4')
4462 IID = Intrinsic::x86_subborrow_64;
4469 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4472 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4475 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4479 }
else if (Name.starts_with(
"avx512.mask.") &&
4489 if (Name.starts_with(
"neon.bfcvt")) {
4490 if (Name.starts_with(
"neon.bfcvtn2")) {
4492 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4494 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4495 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4498 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4499 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4501 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4505 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4506 return Builder.CreateShuffleVector(
4509 return Builder.CreateFPTrunc(CI->
getOperand(0),
4512 }
else if (Name.starts_with(
"sve.fcvt")) {
4515 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4516 .
Case(
"sve.fcvtnt.bf16f32",
4517 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4529 if (Args[1]->
getType() != BadPredTy)
4532 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4533 BadPredTy, Args[1]);
4534 Args[1] = Builder.CreateIntrinsic(
4535 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4537 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4546 if (Name ==
"mve.vctp64.old") {
4549 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4552 Value *C1 = Builder.CreateIntrinsic(
4553 Intrinsic::arm_mve_pred_v2i,
4555 return Builder.CreateIntrinsic(
4556 Intrinsic::arm_mve_pred_i2v,
4558 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4559 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4560 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4561 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4563 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4564 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4565 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4566 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4568 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4569 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4570 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4571 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4572 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4573 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4574 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4575 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4576 std::vector<Type *> Tys;
4580 case Intrinsic::arm_mve_mull_int_predicated:
4581 case Intrinsic::arm_mve_vqdmull_predicated:
4582 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4585 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4586 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4587 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4591 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4595 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4599 case Intrinsic::arm_cde_vcx1q_predicated:
4600 case Intrinsic::arm_cde_vcx1qa_predicated:
4601 case Intrinsic::arm_cde_vcx2q_predicated:
4602 case Intrinsic::arm_cde_vcx2qa_predicated:
4603 case Intrinsic::arm_cde_vcx3q_predicated:
4604 case Intrinsic::arm_cde_vcx3qa_predicated:
4611 std::vector<Value *>
Ops;
4613 Type *Ty =
Op->getType();
4614 if (Ty->getScalarSizeInBits() == 1) {
4615 Value *C1 = Builder.CreateIntrinsic(
4616 Intrinsic::arm_mve_pred_v2i,
4618 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4623 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4638 auto UpgradeLegacyWMMAIUIntrinsicCall =
4643 Args.push_back(Builder.getFalse());
4647 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4654 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4659 NewCall->copyMetadata(*CI);
4663 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4664 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4665 "intrinsic should have 7 arguments");
4668 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4670 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4671 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4672 "intrinsic should have 8 arguments");
4677 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4697 if (NumOperands < 3)
4710 bool IsVolatile =
false;
4714 if (NumOperands > 3)
4719 if (NumOperands > 5) {
4721 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4735 if (VT->getElementType()->isIntegerTy(16)) {
4738 Val = Builder.CreateBitCast(Val, AsBF16);
4746 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4748 unsigned AddrSpace = PtrTy->getAddressSpace();
4751 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4753 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4758 MDNode *RangeNotPrivate =
4761 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4767 return Builder.CreateBitCast(RMW, RetTy);
4788 return MAV->getMetadata();
4795 return I->getDebugLoc().getAsMDNode();
4803 if (Name ==
"label") {
4806 }
else if (Name ==
"assign") {
4813 }
else if (Name ==
"declare") {
4818 }
else if (Name ==
"addr") {
4828 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr,
4830 }
else if (Name ==
"value") {
4833 unsigned ExprOp = 2;
4847 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
4855 int64_t OffsetVal =
Offset->getSExtValue();
4856 return Builder.CreateIntrinsic(OffsetVal >= 0
4857 ? Intrinsic::vector_splice_left
4858 : Intrinsic::vector_splice_right,
4860 {CI->getArgOperand(0), CI->getArgOperand(1),
4861 Builder.getInt32(std::abs(OffsetVal))});
4882 assert(Name.starts_with(
"llvm.") &&
"Intrinsic doesn't start with 'llvm.'");
4883 Name = Name.substr(5);
4885 bool IsX86 = Name.consume_front(
"x86.");
4886 bool IsNVVM = Name.consume_front(
"nvvm.");
4887 bool IsAArch64 = Name.consume_front(
"aarch64.");
4888 bool IsARM = Name.consume_front(
"arm.");
4889 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
4890 bool IsDbg = Name.consume_front(
"dbg.");
4892 (Name.consume_front(
"experimental.vector.splice") ||
4893 Name.consume_front(
"vector.splice")) &&
4894 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
4895 Value *Rep =
nullptr;
4897 if (!IsX86 && Name ==
"stackprotectorcheck") {
4899 }
else if (IsNVVM) {
4903 }
else if (IsAArch64) {
4907 }
else if (IsAMDGCN) {
4911 }
else if (IsOldSplice) {
4923 const auto &DefaultCase = [&]() ->
void {
4931 "Unknown function for CallBase upgrade and isn't just a name change");
4939 "Return type must have changed");
4940 assert(OldST->getNumElements() ==
4942 "Must have same number of elements");
4945 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4948 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4949 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4950 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4969 case Intrinsic::arm_neon_vst1:
4970 case Intrinsic::arm_neon_vst2:
4971 case Intrinsic::arm_neon_vst3:
4972 case Intrinsic::arm_neon_vst4:
4973 case Intrinsic::arm_neon_vst2lane:
4974 case Intrinsic::arm_neon_vst3lane:
4975 case Intrinsic::arm_neon_vst4lane: {
4977 NewCall = Builder.CreateCall(NewFn, Args);
4980 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4981 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4982 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4987 NewCall = Builder.CreateCall(NewFn, Args);
4990 case Intrinsic::aarch64_sve_ld3_sret:
4991 case Intrinsic::aarch64_sve_ld4_sret:
4992 case Intrinsic::aarch64_sve_ld2_sret: {
4994 Name = Name.substr(5);
5001 unsigned MinElts = RetTy->getMinNumElements() /
N;
5003 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5005 for (
unsigned I = 0;
I <
N;
I++) {
5006 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5007 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5013 case Intrinsic::coro_end: {
5016 NewCall = Builder.CreateCall(NewFn, Args);
5020 case Intrinsic::vector_extract: {
5022 Name = Name.substr(5);
5023 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5028 unsigned MinElts = RetTy->getMinNumElements();
5031 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5035 case Intrinsic::vector_insert: {
5037 Name = Name.substr(5);
5038 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5042 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5047 NewCall = Builder.CreateCall(
5051 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5057 assert(
N > 1 &&
"Create is expected to be between 2-4");
5060 unsigned MinElts = RetTy->getMinNumElements() /
N;
5061 for (
unsigned I = 0;
I <
N;
I++) {
5063 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5070 case Intrinsic::arm_neon_bfdot:
5071 case Intrinsic::arm_neon_bfmmla:
5072 case Intrinsic::arm_neon_bfmlalb:
5073 case Intrinsic::arm_neon_bfmlalt:
5074 case Intrinsic::aarch64_neon_bfdot:
5075 case Intrinsic::aarch64_neon_bfmmla:
5076 case Intrinsic::aarch64_neon_bfmlalb:
5077 case Intrinsic::aarch64_neon_bfmlalt: {
5080 "Mismatch between function args and call args");
5081 size_t OperandWidth =
5083 assert((OperandWidth == 64 || OperandWidth == 128) &&
5084 "Unexpected operand width");
5086 auto Iter = CI->
args().begin();
5087 Args.push_back(*Iter++);
5088 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5089 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5090 NewCall = Builder.CreateCall(NewFn, Args);
5094 case Intrinsic::bitreverse:
5095 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5098 case Intrinsic::ctlz:
5099 case Intrinsic::cttz: {
5106 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5110 case Intrinsic::objectsize: {
5111 Value *NullIsUnknownSize =
5115 NewCall = Builder.CreateCall(
5120 case Intrinsic::ctpop:
5121 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5124 case Intrinsic::convert_from_fp16:
5125 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5128 case Intrinsic::dbg_value: {
5130 Name = Name.substr(5);
5132 if (Name.starts_with(
"dbg.addr")) {
5146 if (
Offset->isZeroValue()) {
5147 NewCall = Builder.CreateCall(
5156 case Intrinsic::ptr_annotation:
5164 NewCall = Builder.CreateCall(
5173 case Intrinsic::var_annotation:
5180 NewCall = Builder.CreateCall(
5189 case Intrinsic::riscv_aes32dsi:
5190 case Intrinsic::riscv_aes32dsmi:
5191 case Intrinsic::riscv_aes32esi:
5192 case Intrinsic::riscv_aes32esmi:
5193 case Intrinsic::riscv_sm4ks:
5194 case Intrinsic::riscv_sm4ed: {
5204 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5205 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5211 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5212 Value *Res = NewCall;
5214 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5220 case Intrinsic::nvvm_mapa_shared_cluster: {
5224 Value *Res = NewCall;
5225 Res = Builder.CreateAddrSpaceCast(
5232 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5233 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5236 Args[0] = Builder.CreateAddrSpaceCast(
5239 NewCall = Builder.CreateCall(NewFn, Args);
5245 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5246 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5247 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5248 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5249 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5250 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5251 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5252 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5259 Args[0] = Builder.CreateAddrSpaceCast(
5268 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5270 NewCall = Builder.CreateCall(NewFn, Args);
5276 case Intrinsic::riscv_sha256sig0:
5277 case Intrinsic::riscv_sha256sig1:
5278 case Intrinsic::riscv_sha256sum0:
5279 case Intrinsic::riscv_sha256sum1:
5280 case Intrinsic::riscv_sm3p0:
5281 case Intrinsic::riscv_sm3p1: {
5288 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5290 NewCall = Builder.CreateCall(NewFn, Arg);
5292 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5299 case Intrinsic::x86_xop_vfrcz_ss:
5300 case Intrinsic::x86_xop_vfrcz_sd:
5301 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5304 case Intrinsic::x86_xop_vpermil2pd:
5305 case Intrinsic::x86_xop_vpermil2ps:
5306 case Intrinsic::x86_xop_vpermil2pd_256:
5307 case Intrinsic::x86_xop_vpermil2ps_256: {
5311 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5312 NewCall = Builder.CreateCall(NewFn, Args);
5316 case Intrinsic::x86_sse41_ptestc:
5317 case Intrinsic::x86_sse41_ptestz:
5318 case Intrinsic::x86_sse41_ptestnzc: {
5332 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5333 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5335 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5339 case Intrinsic::x86_rdtscp: {
5345 NewCall = Builder.CreateCall(NewFn);
5347 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5350 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5358 case Intrinsic::x86_sse41_insertps:
5359 case Intrinsic::x86_sse41_dppd:
5360 case Intrinsic::x86_sse41_dpps:
5361 case Intrinsic::x86_sse41_mpsadbw:
5362 case Intrinsic::x86_avx_dp_ps_256:
5363 case Intrinsic::x86_avx2_mpsadbw: {
5369 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5370 NewCall = Builder.CreateCall(NewFn, Args);
5374 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5375 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5376 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5377 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5378 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5379 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5385 NewCall = Builder.CreateCall(NewFn, Args);
5394 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5395 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5396 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5397 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5398 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5399 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5403 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5404 Args[1] = Builder.CreateBitCast(
5407 NewCall = Builder.CreateCall(NewFn, Args);
5408 Value *Res = Builder.CreateBitCast(
5416 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5417 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5418 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5422 Args[1] = Builder.CreateBitCast(
5424 Args[2] = Builder.CreateBitCast(
5427 NewCall = Builder.CreateCall(NewFn, Args);
5431 case Intrinsic::thread_pointer: {
5432 NewCall = Builder.CreateCall(NewFn, {});
5436 case Intrinsic::memcpy:
5437 case Intrinsic::memmove:
5438 case Intrinsic::memset: {
5454 NewCall = Builder.CreateCall(NewFn, Args);
5456 AttributeList NewAttrs = AttributeList::get(
5457 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5458 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5459 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5464 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5467 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5471 case Intrinsic::masked_load:
5472 case Intrinsic::masked_gather:
5473 case Intrinsic::masked_store:
5474 case Intrinsic::masked_scatter: {
5480 auto GetMaybeAlign = [](
Value *
Op) {
5490 auto GetAlign = [&](
Value *
Op) {
5499 case Intrinsic::masked_load:
5500 NewCall = Builder.CreateMaskedLoad(
5504 case Intrinsic::masked_gather:
5505 NewCall = Builder.CreateMaskedGather(
5511 case Intrinsic::masked_store:
5512 NewCall = Builder.CreateMaskedStore(
5516 case Intrinsic::masked_scatter:
5517 NewCall = Builder.CreateMaskedScatter(
5519 DL.getValueOrABITypeAlignment(
5533 case Intrinsic::lifetime_start:
5534 case Intrinsic::lifetime_end: {
5546 NewCall = Builder.CreateLifetimeStart(Ptr);
5548 NewCall = Builder.CreateLifetimeEnd(Ptr);
5557 case Intrinsic::x86_avx512_vpdpbusd_128:
5558 case Intrinsic::x86_avx512_vpdpbusd_256:
5559 case Intrinsic::x86_avx512_vpdpbusd_512:
5560 case Intrinsic::x86_avx512_vpdpbusds_128:
5561 case Intrinsic::x86_avx512_vpdpbusds_256:
5562 case Intrinsic::x86_avx512_vpdpbusds_512:
5563 case Intrinsic::x86_avx2_vpdpbssd_128:
5564 case Intrinsic::x86_avx2_vpdpbssd_256:
5565 case Intrinsic::x86_avx10_vpdpbssd_512:
5566 case Intrinsic::x86_avx2_vpdpbssds_128:
5567 case Intrinsic::x86_avx2_vpdpbssds_256:
5568 case Intrinsic::x86_avx10_vpdpbssds_512:
5569 case Intrinsic::x86_avx2_vpdpbsud_128:
5570 case Intrinsic::x86_avx2_vpdpbsud_256:
5571 case Intrinsic::x86_avx10_vpdpbsud_512:
5572 case Intrinsic::x86_avx2_vpdpbsuds_128:
5573 case Intrinsic::x86_avx2_vpdpbsuds_256:
5574 case Intrinsic::x86_avx10_vpdpbsuds_512:
5575 case Intrinsic::x86_avx2_vpdpbuud_128:
5576 case Intrinsic::x86_avx2_vpdpbuud_256:
5577 case Intrinsic::x86_avx10_vpdpbuud_512:
5578 case Intrinsic::x86_avx2_vpdpbuuds_128:
5579 case Intrinsic::x86_avx2_vpdpbuuds_256:
5580 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5585 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5586 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5588 NewCall = Builder.CreateCall(NewFn, Args);
5591 case Intrinsic::x86_avx512_vpdpwssd_128:
5592 case Intrinsic::x86_avx512_vpdpwssd_256:
5593 case Intrinsic::x86_avx512_vpdpwssd_512:
5594 case Intrinsic::x86_avx512_vpdpwssds_128:
5595 case Intrinsic::x86_avx512_vpdpwssds_256:
5596 case Intrinsic::x86_avx512_vpdpwssds_512:
5597 case Intrinsic::x86_avx2_vpdpwsud_128:
5598 case Intrinsic::x86_avx2_vpdpwsud_256:
5599 case Intrinsic::x86_avx10_vpdpwsud_512:
5600 case Intrinsic::x86_avx2_vpdpwsuds_128:
5601 case Intrinsic::x86_avx2_vpdpwsuds_256:
5602 case Intrinsic::x86_avx10_vpdpwsuds_512:
5603 case Intrinsic::x86_avx2_vpdpwusd_128:
5604 case Intrinsic::x86_avx2_vpdpwusd_256:
5605 case Intrinsic::x86_avx10_vpdpwusd_512:
5606 case Intrinsic::x86_avx2_vpdpwusds_128:
5607 case Intrinsic::x86_avx2_vpdpwusds_256:
5608 case Intrinsic::x86_avx10_vpdpwusds_512:
5609 case Intrinsic::x86_avx2_vpdpwuud_128:
5610 case Intrinsic::x86_avx2_vpdpwuud_256:
5611 case Intrinsic::x86_avx10_vpdpwuud_512:
5612 case Intrinsic::x86_avx2_vpdpwuuds_128:
5613 case Intrinsic::x86_avx2_vpdpwuuds_256:
5614 case Intrinsic::x86_avx10_vpdpwuuds_512:
5619 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5620 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5622 NewCall = Builder.CreateCall(NewFn, Args);
5625 assert(NewCall &&
"Should have either set this variable or returned through "
5626 "the default case");
5633 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5647 F->eraseFromParent();
5653 if (NumOperands == 0)
5661 if (NumOperands == 3) {
5665 Metadata *Elts2[] = {ScalarType, ScalarType,
5679 if (
Opc != Instruction::BitCast)
5683 Type *SrcTy = V->getType();
5700 if (
Opc != Instruction::BitCast)
5703 Type *SrcTy =
C->getType();
5730 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5731 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5732 if (Flag->getNumOperands() < 3)
5734 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5735 return K->getString() ==
"Debug Info Version";
5738 if (OpIt != ModFlags->op_end()) {
5739 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5746 bool BrokenDebugInfo =
false;
5749 if (!BrokenDebugInfo)
5755 M.getContext().diagnose(Diag);
5762 M.getContext().diagnose(DiagVersion);
5772 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5775 if (
F->hasFnAttribute(Attr)) {
5778 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5780 auto [Part, Rest] = S.
split(
',');
5786 const unsigned Dim = DimC -
'x';
5787 assert(Dim < 3 &&
"Unexpected dim char");
5797 F->addFnAttr(Attr, NewAttr);
5801 return S ==
"x" || S ==
"y" || S ==
"z";
5806 if (K ==
"kernel") {
5818 const unsigned Idx = (AlignIdxValuePair >> 16);
5819 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5824 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5829 if (K ==
"minctasm") {
5834 if (K ==
"maxnreg") {
5839 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
5843 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
5847 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
5851 if (K ==
"grid_constant") {
5866 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
5873 if (!SeenNodes.
insert(MD).second)
5880 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
5887 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5889 const MDOperand &V = MD->getOperand(j + 1);
5892 NewOperands.
append({K, V});
5895 if (NewOperands.
size() > 1)
5908 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
5909 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5910 if (ModRetainReleaseMarker) {
5916 ID->getString().split(ValueComp,
"#");
5917 if (ValueComp.
size() == 2) {
5918 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
5922 M.eraseNamedMetadata(ModRetainReleaseMarker);
5933 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
5959 bool InvalidCast =
false;
5961 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
5974 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
5976 Args.push_back(Arg);
5983 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5988 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6001 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6009 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6010 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6011 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6012 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6013 {
"objc_autoreleaseReturnValue",
6014 llvm::Intrinsic::objc_autoreleaseReturnValue},
6015 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6016 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6017 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6018 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6019 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6020 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6021 {
"objc_release", llvm::Intrinsic::objc_release},
6022 {
"objc_retain", llvm::Intrinsic::objc_retain},
6023 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6024 {
"objc_retainAutoreleaseReturnValue",
6025 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6026 {
"objc_retainAutoreleasedReturnValue",
6027 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6028 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6029 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6030 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6031 {
"objc_unsafeClaimAutoreleasedReturnValue",
6032 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6033 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6034 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6035 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6036 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6037 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6038 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6039 {
"objc_arc_annotation_topdown_bbstart",
6040 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6041 {
"objc_arc_annotation_topdown_bbend",
6042 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6043 {
"objc_arc_annotation_bottomup_bbstart",
6044 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6045 {
"objc_arc_annotation_bottomup_bbend",
6046 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6048 for (
auto &
I : RuntimeFuncs)
6049 UpgradeToIntrinsic(
I.first,
I.second);
6053 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6057 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6058 bool HasSwiftVersionFlag =
false;
6059 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6066 if (
Op->getNumOperands() != 3)
6080 if (
ID->getString() ==
"Objective-C Image Info Version")
6082 if (
ID->getString() ==
"Objective-C Class Properties")
6083 HasClassProperties =
true;
6085 if (
ID->getString() ==
"PIC Level") {
6086 if (
auto *Behavior =
6088 uint64_t V = Behavior->getLimitedValue();
6094 if (
ID->getString() ==
"PIE Level")
6095 if (
auto *Behavior =
6102 if (
ID->getString() ==
"branch-target-enforcement" ||
6103 ID->getString().starts_with(
"sign-return-address")) {
6104 if (
auto *Behavior =
6110 Op->getOperand(1),
Op->getOperand(2)};
6120 if (
ID->getString() ==
"Objective-C Image Info Section") {
6123 Value->getString().split(ValueComp,
" ");
6124 if (ValueComp.
size() != 1) {
6125 std::string NewValue;
6126 for (
auto &S : ValueComp)
6127 NewValue += S.str();
6138 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6141 assert(Md->getValue() &&
"Expected non-empty metadata");
6142 auto Type = Md->getValue()->getType();
6145 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6146 if ((Val & 0xff) != Val) {
6147 HasSwiftVersionFlag =
true;
6148 SwiftABIVersion = (Val & 0xff00) >> 8;
6149 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6150 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6161 if (
ID->getString() ==
"amdgpu_code_object_version") {
6164 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6176 if (HasObjCFlag && !HasClassProperties) {
6182 if (HasSwiftVersionFlag) {
6186 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6188 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6196 auto TrimSpaces = [](
StringRef Section) -> std::string {
6198 Section.split(Components,
',');
6203 for (
auto Component : Components)
6204 OS <<
',' << Component.trim();
6209 for (
auto &GV : M.globals()) {
6210 if (!GV.hasSection())
6215 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6220 GV.setSection(TrimSpaces(Section));
6236struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6237 StrictFPUpgradeVisitor() =
default;
6240 if (!
Call.isStrictFP())
6246 Call.removeFnAttr(Attribute::StrictFP);
6247 Call.addFnAttr(Attribute::NoBuiltin);
6252struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6253 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6254 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6256 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6271 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6272 StrictFPUpgradeVisitor SFPV;
6277 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6278 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6279 for (
auto &Arg :
F.args())
6281 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6285 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6286 A.isValid() &&
A.isStringAttribute()) {
6287 F.setSection(
A.getValueAsString());
6288 F.removeFnAttr(
"implicit-section-name");
6295 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6298 if (
A.getValueAsBool()) {
6299 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6305 F.removeFnAttr(
"amdgpu-unsafe-fp-atomics");
6313 if (!
F.hasFnAttribute(FnAttrName))
6314 F.addFnAttr(FnAttrName,
Value);
6321 if (!
F.hasFnAttribute(FnAttrName)) {
6323 F.addFnAttr(FnAttrName);
6325 auto A =
F.getFnAttribute(FnAttrName);
6326 if (
"false" ==
A.getValueAsString())
6327 F.removeFnAttr(FnAttrName);
6328 else if (
"true" ==
A.getValueAsString()) {
6329 F.removeFnAttr(FnAttrName);
6330 F.addFnAttr(FnAttrName);
6336 Triple T(M.getTargetTriple());
6337 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6347 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6351 if (
Op->getNumOperands() != 3)
6360 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6361 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6362 : IDStr ==
"guarded-control-stack" ? &GCSValue
6363 : IDStr ==
"sign-return-address" ? &SRAValue
6364 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6365 : IDStr ==
"sign-return-address-with-bkey"
6371 *ValPtr = CI->getZExtValue();
6377 bool BTE = BTEValue == 1;
6378 bool BPPLR = BPPLRValue == 1;
6379 bool GCS = GCSValue == 1;
6380 bool SRA = SRAValue == 1;
6383 if (SRA && SRAALLValue == 1)
6384 SignTypeValue =
"all";
6387 if (SRA && SRABKeyValue == 1)
6388 SignKeyValue =
"b_key";
6390 for (
Function &
F : M.getFunctionList()) {
6391 if (
F.isDeclaration())
6398 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6399 A.isValid() &&
"none" ==
A.getValueAsString()) {
6400 F.removeFnAttr(
"sign-return-address");
6401 F.removeFnAttr(
"sign-return-address-key");
6417 if (SRAALLValue == 1)
6419 if (SRABKeyValue == 1)
6428 if (
T->getNumOperands() < 1)
6433 return S->getString().starts_with(
"llvm.vectorizer.");
6437 StringRef OldPrefix =
"llvm.vectorizer.";
6440 if (OldTag ==
"llvm.vectorizer.unroll")
6452 if (
T->getNumOperands() < 1)
6457 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6462 Ops.reserve(
T->getNumOperands());
6464 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6465 Ops.push_back(
T->getOperand(
I));
6479 Ops.reserve(
T->getNumOperands());
6490 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6491 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6492 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6495 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6497 auto I =
DL.find(
"-n64-");
6499 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6504 std::string Res =
DL.str();
6507 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6508 Res.append(Res.empty() ?
"G1" :
"-G1");
6516 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6517 Res.append(
"-ni:7:8:9");
6519 if (
DL.ends_with(
"ni:7"))
6521 if (
DL.ends_with(
"ni:7:8"))
6526 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6527 Res.append(
"-p7:160:256:256:32");
6528 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6529 Res.append(
"-p8:128:128:128:48");
6530 constexpr StringRef OldP8(
"-p8:128:128-");
6531 if (
DL.contains(OldP8))
6532 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6533 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6534 Res.append(
"-p9:192:256:256:32");
6538 if (!
DL.contains(
"m:e"))
6539 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6544 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6547 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6548 if (!
DL.contains(AddrSpaces)) {
6550 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6551 if (R.match(Res, &
Groups))
6557 if (
T.isAArch64()) {
6559 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6560 Res.append(
"-Fn32");
6561 AddPtr32Ptr64AddrSpaces();
6565 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6569 std::string I64 =
"-i64:64";
6570 std::string I128 =
"-i128:128";
6572 size_t Pos = Res.find(I64);
6573 if (Pos !=
size_t(-1))
6574 Res.insert(Pos + I64.size(), I128);
6578 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6579 size_t Pos = Res.find(
"-S128");
6582 Res.insert(Pos,
"-f64:32:64");
6588 AddPtr32Ptr64AddrSpaces();
6596 if (!
T.isOSIAMCU()) {
6597 std::string I128 =
"-i128:128";
6600 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6601 if (R.match(Res, &
Groups))
6609 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6611 auto I =
Ref.find(
"-f80:32-");
6613 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6621 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6624 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6625 B.removeAttribute(
"no-frame-pointer-elim");
6627 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6629 if (FramePointer !=
"all")
6630 FramePointer =
"non-leaf";
6631 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6633 if (!FramePointer.
empty())
6634 B.addAttribute(
"frame-pointer", FramePointer);
6636 A =
B.getAttribute(
"null-pointer-is-valid");
6639 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6640 B.removeAttribute(
"null-pointer-is-valid");
6641 if (NullPointerIsValid)
6642 B.addAttribute(Attribute::NullPointerIsValid);
6652 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.