File: bf16-convert-intrinsics.ll

package info (click to toggle)

llvm-toolchain-20 1%3A20.1.6-1~exp1

links: PTS, VCS
area: main
in suites: experimental
size: 2,111,304 kB
sloc: cpp: 7,438,677; ansic: 1,393,822; asm: 1,012,926; python: 241,650; f90: 86,635; objc: 75,479; lisp: 42,144; pascal: 17,286; sh: 10,027; ml: 5,082; perl: 4,730; awk: 3,523; makefile: 3,349; javascript: 2,251; xml: 892; fortran: 672

file content (37 lines) | stat: -rw-r--r-- 1,221 bytes

parent folder | download | duplicates (6)

; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64 -mattr=+neon -mattr=+bf16 | FileCheck %s

; This test acts to test the old neon.bfcvt intrinsics, which are now
; autoupgraded to fptrunc operations.

declare bfloat @llvm.aarch64.neon.bfcvt(float)
declare <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float>)
declare <8 x bfloat> @llvm.aarch64.neon.bfcvtn2(<8 x bfloat>, <4 x float>)

; CHECK-LABEL: test_vcvth_bf16_f32
; CHECK:      bfcvt h0, s0
; CHECK-NEXT: ret
define bfloat @test_vcvth_bf16_f32(float %a) {
entry:
  %vcvth_bf16_f32 = call bfloat @llvm.aarch64.neon.bfcvt(float %a)
  ret bfloat %vcvth_bf16_f32
}

; CHECK-LABEL: test_vcvtq_low_bf16_f32
; CHECK:      bfcvtn v0.4h, v0.4s
; CHECK-NEXT: ret
define <8 x bfloat> @test_vcvtq_low_bf16_f32(<4 x float> %a) {
entry:
  %cvt = call <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float> %a)
  ret <8 x bfloat> %cvt
}

; CHECK-LABEL: test_vcvtq_high_bf16_f32
; CHECK:      bfcvtn2 v1.8h, v0.4s
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
define <8 x bfloat> @test_vcvtq_high_bf16_f32(<4 x float> %a, <8 x bfloat> %inactive) {
entry:
  %cvt = call <8 x bfloat> @llvm.aarch64.neon.bfcvtn2(<8 x bfloat> %inactive, <4 x float> %a)
  ret <8 x bfloat> %cvt
}