File: optimize-for-nvvm.mlir

package info (click to toggle)
swiftlang 6.1.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,791,604 kB
  • sloc: cpp: 9,901,740; ansic: 2,201,431; asm: 1,091,827; python: 308,252; objc: 82,166; f90: 80,126; lisp: 38,358; pascal: 25,559; sh: 20,429; ml: 5,058; perl: 4,745; makefile: 4,484; awk: 3,535; javascript: 3,018; xml: 918; fortran: 664; cs: 573; ruby: 396
file content (24 lines) | stat: -rw-r--r-- 1,488 bytes parent folder | download | duplicates (16)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
// RUN: mlir-opt %s -llvm-optimize-for-nvvm-target | FileCheck %s

// CHECK-LABEL: llvm.func @fdiv_fp16
llvm.func @fdiv_fp16(%arg0 : f16, %arg1 : f16) -> f16 {
  // CHECK-DAG: %[[c0:.*]]      = llvm.mlir.constant(0 : ui32) : i32
  // CHECK-DAG: %[[mask:.*]]    = llvm.mlir.constant(2139095040 : ui32) : i32
  // CHECK-DAG: %[[lhs:.*]]     = llvm.fpext %arg0 : f16 to f32
  // CHECK-DAG: %[[rhs:.*]]     = llvm.fpext %arg1 : f16 to f32
  // CHECK-DAG: %[[rcp:.*]]     = nvvm.rcp.approx.ftz.f %[[rhs]] : f32
  // CHECK-DAG: %[[approx:.*]]  = llvm.fmul %[[lhs]], %[[rcp]] : f32
  // CHECK-DAG: %[[neg:.*]]     = llvm.fneg %[[rhs]] : f32
  // CHECK-DAG: %[[err:.*]]     = llvm.intr.fma(%[[approx]], %[[neg]], %[[lhs]]) : (f32, f32, f32) -> f32
  // CHECK-DAG: %[[refined:.*]] = llvm.intr.fma(%[[err]], %[[rcp]], %[[approx]]) : (f32, f32, f32) -> f32
  // CHECK-DAG: %[[cast:.*]]    = llvm.bitcast %[[approx]] : f32 to i32
  // CHECK-DAG: %[[exp:.*]]     = llvm.and %[[cast]], %[[mask]] : i32
  // CHECK-DAG: %[[is_zero:.*]] = llvm.icmp "eq" %[[exp]], %[[c0]] : i32
  // CHECK-DAG: %[[is_mask:.*]] = llvm.icmp "eq" %[[exp]], %[[mask]] : i32
  // CHECK-DAG: %[[pred:.*]]    = llvm.or %[[is_zero]], %[[is_mask]] : i1
  // CHECK-DAG: %[[select:.*]]  = llvm.select %[[pred]], %[[approx]], %[[refined]] : i1, f32
  // CHECK-DAG: %[[result:.*]]  = llvm.fptrunc %[[select]] : f32 to f16
  %result = llvm.fdiv %arg0, %arg1 : f16
  // CHECK: llvm.return %[[result]] : f16
  llvm.return %result : f16
}