File: flops.h

package info (click to toggle)
magma 2.9.0%2Bds-2
  • links: PTS, VCS
  • area: contrib
  • in suites: trixie
  • size: 83,212 kB
  • sloc: cpp: 709,115; fortran: 121,916; ansic: 32,343; python: 25,603; f90: 15,208; makefile: 942; xml: 253; csh: 232; sh: 203; perl: 104
file content (400 lines) | stat: -rw-r--r-- 26,915 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
/**
 *
 * @file flops.h
 *
 *  File provided by Univ. of Tennessee,
 *
 * @version 1.0.0
 * @author Mathieu Faverge
 * @date January 2025
 *
 **/
/*
 * This file provide the flops formula for all Level 3 BLAS and some
 * Lapack routines.  Each macro uses the same size parameters as the
 * function associated and provide one formula for additions and one
 * for multiplications. Example to use these macros:
 *
 *    FLOPS_ZGEMM( m, n, k )
 *
 * All the formula are reported in the LAPACK Lawn 41:
 *     http://www.netlib.org/lapack/lawns/lawn41.ps
 */
#ifndef MAGMA_FLOPS_H
#define MAGMA_FLOPS_H

/***************************************************************************//**
                 Generic formula coming from LAWN 41
*******************************************************************************/
/*
 * Level 1 BLAS
 */
 #define FMULS_AXPY(n_) (n_)
 #define FADDS_AXPY(n_) (n_)

/*
 * Level 2 BLAS
 */
#define FMULS_GEMV(m_, n_) ((m_) * (n_) + 2. * (m_))
#define FADDS_GEMV(m_, n_) ((m_) * (n_)           )

#define FMULS_SYMV(n_) FMULS_GEMV( (n_), (n_) )
#define FADDS_SYMV(n_) FADDS_GEMV( (n_), (n_) )
#define FMULS_HEMV FMULS_SYMV
#define FADDS_HEMV FADDS_SYMV

/*
 * Level 3 BLAS
 */
#define FMULS_GEMM(m_, n_, k_) ((m_) * (n_) * (k_))
#define FADDS_GEMM(m_, n_, k_) ((m_) * (n_) * (k_))

#define FMULS_SYMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FMULS_GEMM((m_), (m_), (n_)) : FMULS_GEMM((m_), (n_), (n_)) )
#define FADDS_SYMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FADDS_GEMM((m_), (m_), (n_)) : FADDS_GEMM((m_), (n_), (n_)) )
#define FMULS_HEMM FMULS_SYMM
#define FADDS_HEMM FADDS_SYMM

#define FMULS_SYRK(k_, n_) (0.5 * (k_) * (n_) * ((n_)+1))
#define FADDS_SYRK(k_, n_) (0.5 * (k_) * (n_) * ((n_)+1))
#define FMULS_HERK FMULS_SYRK
#define FADDS_HERK FADDS_SYRK

#define FMULS_SYR2K(k_, n_) ((k_) * (n_) * (n_)        )
#define FADDS_SYR2K(k_, n_) ((k_) * (n_) * (n_) + (n_))
#define FMULS_HER2K FMULS_SYR2K
#define FADDS_HER2K FADDS_SYR2K

#define FMULS_TRMM_2(m_, n_) (0.5 * (n_) * (m_) * ((m_)+1))
#define FADDS_TRMM_2(m_, n_) (0.5 * (n_) * (m_) * ((m_)-1))


#define FMULS_TRMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FMULS_TRMM_2((m_), (n_)) : FMULS_TRMM_2((n_), (m_)) )
#define FADDS_TRMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FADDS_TRMM_2((m_), (n_)) : FADDS_TRMM_2((n_), (m_)) )

#define FMULS_TRSM FMULS_TRMM
#define FADDS_TRSM FADDS_TRMM

/*
 * Lapack
 */
#define FMULS_GETRF(m_, n_) ( ((m_) < (n_)) \
    ? (0.5 * (m_) * ((m_) * ((n_) - (1./3.) * (m_) - 1. ) + (n_)) + (2. / 3.) * (m_)) \
    : (0.5 * (n_) * ((n_) * ((m_) - (1./3.) * (n_) - 1. ) + (m_)) + (2. / 3.) * (n_)) )
#define FADDS_GETRF(m_, n_) ( ((m_) < (n_)) \
    ? (0.5 * (m_) * ((m_) * ((n_) - (1./3.) * (m_)      ) - (n_)) + (1. / 6.) * (m_)) \
    : (0.5 * (n_) * ((n_) * ((m_) - (1./3.) * (n_)      ) - (m_)) + (1. / 6.) * (n_)) )

#define FMULS_GETRI(n_) ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) + 0.5)) )
#define FADDS_GETRI(n_) ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) - 1.5)) )

#define FMULS_GETRS(n_, nrhs_) ((nrhs_) * (n_) *  (n_)      )
#define FADDS_GETRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) - 1 ))

#define FMULS_POTRF(n_) ((n_) * (((1. / 6.) * (n_) + 0.5) * (n_) + (1. / 3.)))
#define FADDS_POTRF(n_) ((n_) * (((1. / 6.) * (n_)      ) * (n_) - (1. / 6.)))

#define FMULS_POTRI(n_) ( (n_) * ((2. / 3.) + (n_) * ((1. / 3.) * (n_) + 1. )) )
#define FADDS_POTRI(n_) ( (n_) * ((1. / 6.) + (n_) * ((1. / 3.) * (n_) - 0.5)) )

#define FMULS_POTRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) + 1 ))
#define FADDS_POTRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) - 1 ))

//SPBTRF
//SPBTRS
//SSYTRF
//SSYTRI
//SSYTRS

#define FMULS_GEQRF(m_, n_) (((m_) > (n_)) \
    ? ((n_) * ((n_) * (  0.5-(1./3.) * (n_) + (m_)) +    (m_) + 23. / 6.)) \
    : ((m_) * ((m_) * ( -0.5-(1./3.) * (m_) + (n_)) + 2.*(n_) + 23. / 6.)) )
#define FADDS_GEQRF(m_, n_) (((m_) > (n_)) \
    ? ((n_) * ((n_) * (  0.5-(1./3.) * (n_) + (m_))           +  5. / 6.)) \
    : ((m_) * ((m_) * ( -0.5-(1./3.) * (m_) + (n_)) +    (n_) +  5. / 6.)) )

#define FMULS_GEQRT(m_, n_) (0.5 * (m_)*(n_))
#define FADDS_GEQRT(m_, n_) (0.5 * (m_)*(n_))

#define FMULS_GEQLF(m_, n_) FMULS_GEQRF(m_, n_)
#define FADDS_GEQLF(m_, n_) FADDS_GEQRF(m_, n_)

#define FMULS_GERQF(m_, n_) (((m_) > (n_)) \
    ? ((n_) * ((n_) * (  0.5-(1./3.) * (n_) + (m_)) +    (m_) + 29. / 6.)) \
    : ((m_) * ((m_) * ( -0.5-(1./3.) * (m_) + (n_)) + 2.*(n_) + 29. / 6.)) )
#define FADDS_GERQF(m_, n_) (((m_) > (n_)) \
    ? ((n_) * ((n_) * ( -0.5-(1./3.) * (n_) + (m_)) +    (m_) +  5. / 6.)) \
    : ((m_) * ((m_) * (  0.5-(1./3.) * (m_) + (n_)) +         +  5. / 6.)) )

#define FMULS_GELQF(m_, n_) FMULS_GERQF(m_, n_)
#define FADDS_GELQF(m_, n_) FADDS_GERQF(m_, n_)

#define FMULS_UNGQR(m_, n_, k_) ((k_) * (2.* (m_) * (n_) +   2. * (n_) - 5./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_)) - 1.)))
#define FADDS_UNGQR(m_, n_, k_) ((k_) * (2.* (m_) * (n_) + (n_) - (m_) + 1./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_))     )))
#define FMULS_ORGQR FMULS_UNGQR
#define FADDS_ORGQR FADDS_UNGQR

#define FMULS_UNGQL FMULS_UNGQR
#define FADDS_UNGQL FADDS_UNGQR
#define FMULS_ORGQL FMULS_UNGQR
#define FADDS_ORGQL FADDS_UNGQR

#define FMULS_UNGRQ(m_, n_, k_) ((k_) * (2.* (m_) * (n_) + (m_) + (n_) - 2./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_)) - 1.)))
#define FADDS_UNGRQ(m_, n_, k_) ((k_) * (2.* (m_) * (n_) + (m_) - (n_) + 1./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_))     )))
#define FMULS_ORGRQ FMULS_UNGRQ
#define FADDS_ORGRQ FADDS_UNGRQ

#define FMULS_UNGLQ FMULS_UNGRQ
#define FADDS_UNGLQ FADDS_UNGRQ
#define FMULS_ORGLQ FMULS_UNGRQ
#define FADDS_ORGLQ FADDS_UNGRQ

#define FMULS_GEQRS(m_, n_, nrhs_) ((nrhs_) * ((n_) * ( 2.* (m_) - 0.5 * (n_) + 2.5)))
#define FADDS_GEQRS(m_, n_, nrhs_) ((nrhs_) * ((n_) * ( 2.* (m_) - 0.5 * (n_) + 0.5)))

#define FMULS_UNMQR(m_, n_, k_, side_) (( (side_) == MagmaLeft ) \
    ?  (2.*(n_)*(m_)*(k_) - (n_)*(k_)*(k_) + 2.*(n_)*(k_)) \
    :  (2.*(n_)*(m_)*(k_) - (m_)*(k_)*(k_) + (m_)*(k_) + (n_)*(k_) - 0.5*(k_)*(k_) + 0.5*(k_)))
#define FADDS_UNMQR(m_, n_, k_, side_) (( ((side_)) == MagmaLeft ) \
    ?  (2.*(n_)*(m_)*(k_) - (n_)*(k_)*(k_) + (n_)*(k_)) \
    :  (2.*(n_)*(m_)*(k_) - (m_)*(k_)*(k_) + (m_)*(k_)))
#define FMULS_ORMQR FMULS_UNMQR
#define FADDS_ORMQR FADDS_UNMQR

#define FMULS_UNMQL FMULS_UNMQR
#define FADDS_UNMQL FADDS_UNMQR
#define FMULS_ORMQL FMULS_UNMQR
#define FADDS_ORMQL FADDS_UNMQR

#define FMULS_UNMRQ FMULS_UNMQR
#define FADDS_UNMRQ FADDS_UNMQR
#define FMULS_ORMRQ FMULS_UNMQR
#define FADDS_ORMRQ FADDS_UNMQR

#define FMULS_UNMLQ FMULS_UNMQR
#define FADDS_UNMLQ FADDS_UNMQR
#define FMULS_ORMLQ FMULS_UNMQR
#define FADDS_ORMLQ FADDS_UNMQR

#define FMULS_TRTRI(n_) ((n_) * ((n_) * ( 1./6. * (n_) + 0.5 ) + 1./3.))
#define FADDS_TRTRI(n_) ((n_) * ((n_) * ( 1./6. * (n_) - 0.5 ) + 1./3.))

#define FMULS_GEHRD(n_) ( (n_) * ((n_) * (5./3. *(n_) + 0.5) - 7./6.) - 13. )
#define FADDS_GEHRD(n_) ( (n_) * ((n_) * (5./3. *(n_) - 1. ) - 2./3.) -  8. )

#define FMULS_SYTRD(n_) ( (n_) *  ( (n_) * ( 2./3. * (n_) + 2.5 ) - 1./6. ) - 15.)
#define FADDS_SYTRD(n_) ( (n_) *  ( (n_) * ( 2./3. * (n_) + 1.  ) - 8./3. ) -  4.)
#define FMULS_HETRD FMULS_SYTRD
#define FADDS_HETRD FADDS_SYTRD

#define FMULS_GEBRD(m_, n_) ( ((m_) >= (n_)) \
    ? ((n_) * ((n_) * (2. * (m_) - 2./3. * (n_) + 2. )         + 20./3.)) \
    : ((m_) * ((m_) * (2. * (n_) - 2./3. * (m_) + 2. )         + 20./3.)) )
#define FADDS_GEBRD(m_, n_) ( ((m_) >= (n_)) \
    ? ((n_) * ((n_) * (2. * (m_) - 2./3. * (n_) + 1. ) - (m_) +  5./3.)) \
    : ((m_) * ((m_) * (2. * (n_) - 2./3. * (m_) + 1. ) - (n_) +  5./3.)) )

#define FMULS_LARFG(n_) (2*n_)
#define FADDS_LARFG(n_) (  n_)


/***************************************************************************//**
                 Users functions
*******************************************************************************/
/*
 * Level 1 BLAS
 */
#define FLOPS_ZAXPY(n_) (6. * FMULS_AXPY((double)(n_)) + 2.0 * FADDS_AXPY((double)(n_)) )
#define FLOPS_CAXPY(n_) (6. * FMULS_AXPY((double)(n_)) + 2.0 * FADDS_AXPY((double)(n_)) )
#define FLOPS_DAXPY(n_) (     FMULS_AXPY((double)(n_)) +       FADDS_AXPY((double)(n_)) )
#define FLOPS_SAXPY(n_) (     FMULS_AXPY((double)(n_)) +       FADDS_AXPY((double)(n_)) )

/*
 * Level 2 BLAS
 */
#define FLOPS_ZGEMV(m_, n_) (6. * FMULS_GEMV((double)(m_), (double)(n_)) + 2.0 * FADDS_GEMV((double)(m_), (double)(n_)) )
#define FLOPS_CGEMV(m_, n_) (6. * FMULS_GEMV((double)(m_), (double)(n_)) + 2.0 * FADDS_GEMV((double)(m_), (double)(n_)) )
#define FLOPS_DGEMV(m_, n_) (     FMULS_GEMV((double)(m_), (double)(n_)) +       FADDS_GEMV((double)(m_), (double)(n_)) )
#define FLOPS_SGEMV(m_, n_) (     FMULS_GEMV((double)(m_), (double)(n_)) +       FADDS_GEMV((double)(m_), (double)(n_)) )

#define FLOPS_ZHEMV(n_) (6. * FMULS_HEMV((double)(n_)) + 2.0 * FADDS_HEMV((double)(n_)) )
#define FLOPS_CHEMV(n_) (6. * FMULS_HEMV((double)(n_)) + 2.0 * FADDS_HEMV((double)(n_)) )

#define FLOPS_ZSYMV(n_) (6. * FMULS_SYMV((double)(n_)) + 2.0 * FADDS_SYMV((double)(n_)) )
#define FLOPS_CSYMV(n_) (6. * FMULS_SYMV((double)(n_)) + 2.0 * FADDS_SYMV((double)(n_)) )
#define FLOPS_DSYMV(n_) (     FMULS_SYMV((double)(n_)) +       FADDS_SYMV((double)(n_)) )
#define FLOPS_SSYMV(n_) (     FMULS_SYMV((double)(n_)) +       FADDS_SYMV((double)(n_)) )

/*
 * Level 3 BLAS
 */
#define FLOPS_ZGEMM(m_, n_, k_) (6. * FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_CGEMM(m_, n_, k_) (6. * FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_DGEMM(m_, n_, k_) (     FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) +       FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_SGEMM(m_, n_, k_) (     FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) +       FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) )

#define FLOPS_ZHEMM(side_, m_, n_) (6. * FMULS_HEMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_HEMM(side_, (double)(m_), (double)(n_)) )
#define FLOPS_CHEMM(side_, m_, n_) (6. * FMULS_HEMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_HEMM(side_, (double)(m_), (double)(n_)) )

#define FLOPS_ZSYMM(side_, m_, n_) (6. * FMULS_SYMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_SYMM(side_, (double)(m_), (double)(n_)) )
#define FLOPS_CSYMM(side_, m_, n_) (6. * FMULS_SYMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_SYMM(side_, (double)(m_), (double)(n_)) )
#define FLOPS_DSYMM(side_, m_, n_) (     FMULS_SYMM(side_, (double)(m_), (double)(n_)) +       FADDS_SYMM(side_, (double)(m_), (double)(n_)) )
#define FLOPS_SSYMM(side_, m_, n_) (     FMULS_SYMM(side_, (double)(m_), (double)(n_)) +       FADDS_SYMM(side_, (double)(m_), (double)(n_)) )

#define FLOPS_ZHERK(k_, n_) (6. * FMULS_HERK((double)(k_), (double)(n_)) + 2.0 * FADDS_HERK((double)(k_), (double)(n_)) )
#define FLOPS_CHERK(k_, n_) (6. * FMULS_HERK((double)(k_), (double)(n_)) + 2.0 * FADDS_HERK((double)(k_), (double)(n_)) )

#define FLOPS_ZSYRK(k_, n_) (6. * FMULS_SYRK((double)(k_), (double)(n_)) + 2.0 * FADDS_SYRK((double)(k_), (double)(n_)) )
#define FLOPS_CSYRK(k_, n_) (6. * FMULS_SYRK((double)(k_), (double)(n_)) + 2.0 * FADDS_SYRK((double)(k_), (double)(n_)) )
#define FLOPS_DSYRK(k_, n_) (     FMULS_SYRK((double)(k_), (double)(n_)) +       FADDS_SYRK((double)(k_), (double)(n_)) )
#define FLOPS_SSYRK(k_, n_) (     FMULS_SYRK((double)(k_), (double)(n_)) +       FADDS_SYRK((double)(k_), (double)(n_)) )

#define FLOPS_ZHER2K(k_, n_) (6. * FMULS_HER2K((double)(k_), (double)(n_)) + 2.0 * FADDS_HER2K((double)(k_), (double)(n_)) )
#define FLOPS_CHER2K(k_, n_) (6. * FMULS_HER2K((double)(k_), (double)(n_)) + 2.0 * FADDS_HER2K((double)(k_), (double)(n_)) )

#define FLOPS_ZSYR2K(k_, n_) (6. * FMULS_SYR2K((double)(k_), (double)(n_)) + 2.0 * FADDS_SYR2K((double)(k_), (double)(n_)) )
#define FLOPS_CSYR2K(k_, n_) (6. * FMULS_SYR2K((double)(k_), (double)(n_)) + 2.0 * FADDS_SYR2K((double)(k_), (double)(n_)) )
#define FLOPS_DSYR2K(k_, n_) (     FMULS_SYR2K((double)(k_), (double)(n_)) +       FADDS_SYR2K((double)(k_), (double)(n_)) )
#define FLOPS_SSYR2K(k_, n_) (     FMULS_SYR2K((double)(k_), (double)(n_)) +       FADDS_SYR2K((double)(k_), (double)(n_)) )

#define FLOPS_ZTRMM(side_, m_, n_) (6. * FMULS_TRMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRMM(side_, (double)(m_), (double)(n_)) )
#define FLOPS_CTRMM(side_, m_, n_) (6. * FMULS_TRMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRMM(side_, (double)(m_), (double)(n_)) )
#define FLOPS_DTRMM(side_, m_, n_) (     FMULS_TRMM(side_, (double)(m_), (double)(n_)) +       FADDS_TRMM(side_, (double)(m_), (double)(n_)) )
#define FLOPS_STRMM(side_, m_, n_) (     FMULS_TRMM(side_, (double)(m_), (double)(n_)) +       FADDS_TRMM(side_, (double)(m_), (double)(n_)) )

#define FLOPS_ZTRSM(side_, m_, n_) (6. * FMULS_TRSM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRSM(side_, (double)(m_), (double)(n_)) )
#define FLOPS_CTRSM(side_, m_, n_) (6. * FMULS_TRSM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRSM(side_, (double)(m_), (double)(n_)) )
#define FLOPS_DTRSM(side_, m_, n_) (     FMULS_TRSM(side_, (double)(m_), (double)(n_)) +       FADDS_TRSM(side_, (double)(m_), (double)(n_)) )
#define FLOPS_STRSM(side_, m_, n_) (     FMULS_TRSM(side_, (double)(m_), (double)(n_)) +       FADDS_TRSM(side_, (double)(m_), (double)(n_)) )

/*
 * Lapack
 */
#define FLOPS_ZGETRF(m_, n_) (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) )
#define FLOPS_CGETRF(m_, n_) (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) )
#define FLOPS_DGETRF(m_, n_) (     FMULS_GETRF((double)(m_), (double)(n_)) +       FADDS_GETRF((double)(m_), (double)(n_)) )
#define FLOPS_SGETRF(m_, n_) (     FMULS_GETRF((double)(m_), (double)(n_)) +       FADDS_GETRF((double)(m_), (double)(n_)) )

#define FLOPS_ZGETRI(n_) (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) )
#define FLOPS_CGETRI(n_) (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) )
#define FLOPS_DGETRI(n_) (     FMULS_GETRI((double)(n_)) +       FADDS_GETRI((double)(n_)) )
#define FLOPS_SGETRI(n_) (     FMULS_GETRI((double)(n_)) +       FADDS_GETRI((double)(n_)) )

#define FLOPS_ZGETRS(n_, nrhs_) (6. * FMULS_GETRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GETRS((double)(n_), (double)(nrhs_)) )
#define FLOPS_CGETRS(n_, nrhs_) (6. * FMULS_GETRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GETRS((double)(n_), (double)(nrhs_)) )
#define FLOPS_DGETRS(n_, nrhs_) (     FMULS_GETRS((double)(n_), (double)(nrhs_)) +       FADDS_GETRS((double)(n_), (double)(nrhs_)) )
#define FLOPS_SGETRS(n_, nrhs_) (     FMULS_GETRS((double)(n_), (double)(nrhs_)) +       FADDS_GETRS((double)(n_), (double)(nrhs_)) )

#define FLOPS_ZPOTRF(n_) (6. * FMULS_POTRF((double)(n_)) + 2.0 * FADDS_POTRF((double)(n_)) )
#define FLOPS_CPOTRF(n_) (6. * FMULS_POTRF((double)(n_)) + 2.0 * FADDS_POTRF((double)(n_)) )
#define FLOPS_DPOTRF(n_) (     FMULS_POTRF((double)(n_)) +       FADDS_POTRF((double)(n_)) )
#define FLOPS_SPOTRF(n_) (     FMULS_POTRF((double)(n_)) +       FADDS_POTRF((double)(n_)) )

#define FLOPS_ZPOTRI(n_) (6. * FMULS_POTRI((double)(n_)) + 2.0 * FADDS_POTRI((double)(n_)) )
#define FLOPS_CPOTRI(n_) (6. * FMULS_POTRI((double)(n_)) + 2.0 * FADDS_POTRI((double)(n_)) )
#define FLOPS_DPOTRI(n_) (     FMULS_POTRI((double)(n_)) +       FADDS_POTRI((double)(n_)) )
#define FLOPS_SPOTRI(n_) (     FMULS_POTRI((double)(n_)) +       FADDS_POTRI((double)(n_)) )

#define FLOPS_ZPOTRS(n_, nrhs_) (6. * FMULS_POTRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_POTRS((double)(n_), (double)(nrhs_)) )
#define FLOPS_CPOTRS(n_, nrhs_) (6. * FMULS_POTRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_POTRS((double)(n_), (double)(nrhs_)) )
#define FLOPS_DPOTRS(n_, nrhs_) (     FMULS_POTRS((double)(n_), (double)(nrhs_)) +       FADDS_POTRS((double)(n_), (double)(nrhs_)) )
#define FLOPS_SPOTRS(n_, nrhs_) (     FMULS_POTRS((double)(n_), (double)(nrhs_)) +       FADDS_POTRS((double)(n_), (double)(nrhs_)) )

#define FLOPS_ZGEQRF(m_, n_) (6. * FMULS_GEQRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRF((double)(m_), (double)(n_)) )
#define FLOPS_CGEQRF(m_, n_) (6. * FMULS_GEQRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRF((double)(m_), (double)(n_)) )
#define FLOPS_DGEQRF(m_, n_) (     FMULS_GEQRF((double)(m_), (double)(n_)) +       FADDS_GEQRF((double)(m_), (double)(n_)) )
#define FLOPS_SGEQRF(m_, n_) (     FMULS_GEQRF((double)(m_), (double)(n_)) +       FADDS_GEQRF((double)(m_), (double)(n_)) )

#define FLOPS_ZGEQRT(m_, n_) (6. * FMULS_GEQRT((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRT((double)(m_), (double)(n_)) )
#define FLOPS_CGEQRT(m_, n_) (6. * FMULS_GEQRT((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRT((double)(m_), (double)(n_)) )
#define FLOPS_DGEQRT(m_, n_) (     FMULS_GEQRT((double)(m_), (double)(n_)) +       FADDS_GEQRT((double)(m_), (double)(n_)) )
#define FLOPS_SGEQRT(m_, n_) (     FMULS_GEQRT((double)(m_), (double)(n_)) +       FADDS_GEQRT((double)(m_), (double)(n_)) )

#define FLOPS_ZGEQLF(m_, n_) (6. * FMULS_GEQLF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQLF((double)(m_), (double)(n_)) )
#define FLOPS_CGEQLF(m_, n_) (6. * FMULS_GEQLF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQLF((double)(m_), (double)(n_)) )
#define FLOPS_DGEQLF(m_, n_) (     FMULS_GEQLF((double)(m_), (double)(n_)) +       FADDS_GEQLF((double)(m_), (double)(n_)) )
#define FLOPS_SGEQLF(m_, n_) (     FMULS_GEQLF((double)(m_), (double)(n_)) +       FADDS_GEQLF((double)(m_), (double)(n_)) )

#define FLOPS_ZGERQF(m_, n_) (6. * FMULS_GERQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GERQF((double)(m_), (double)(n_)) )
#define FLOPS_CGERQF(m_, n_) (6. * FMULS_GERQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GERQF((double)(m_), (double)(n_)) )
#define FLOPS_DGERQF(m_, n_) (     FMULS_GERQF((double)(m_), (double)(n_)) +       FADDS_GERQF((double)(m_), (double)(n_)) )
#define FLOPS_SGERQF(m_, n_) (     FMULS_GERQF((double)(m_), (double)(n_)) +       FADDS_GERQF((double)(m_), (double)(n_)) )

#define FLOPS_ZGELQF(m_, n_) (6. * FMULS_GELQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GELQF((double)(m_), (double)(n_)) )
#define FLOPS_CGELQF(m_, n_) (6. * FMULS_GELQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GELQF((double)(m_), (double)(n_)) )
#define FLOPS_DGELQF(m_, n_) (     FMULS_GELQF((double)(m_), (double)(n_)) +       FADDS_GELQF((double)(m_), (double)(n_)) )
#define FLOPS_SGELQF(m_, n_) (     FMULS_GELQF((double)(m_), (double)(n_)) +       FADDS_GELQF((double)(m_), (double)(n_)) )

#define FLOPS_ZUNGQR(m_, n_, k_) (6. * FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_CUNGQR(m_, n_, k_) (6. * FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_DORGQR(m_, n_, k_) (     FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) +       FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_SORGQR(m_, n_, k_) (     FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) +       FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) )

#define FLOPS_ZUNGQL(m_, n_, k_) (6. * FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_CUNGQL(m_, n_, k_) (6. * FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_DORGQL(m_, n_, k_) (     FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) +       FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_SORGQL(m_, n_, k_) (     FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) +       FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) )

#define FLOPS_ZUNGRQ(m_, n_, k_) (6. * FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_CUNGRQ(m_, n_, k_) (6. * FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_DORGRQ(m_, n_, k_) (     FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) +       FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_SORGRQ(m_, n_, k_) (     FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) +       FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) )

#define FLOPS_ZUNGLQ(m_, n_, k_) (6. * FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_CUNGLQ(m_, n_, k_) (6. * FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_DORGLQ(m_, n_, k_) (     FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) +       FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) )
#define FLOPS_SORGLQ(m_, n_, k_) (     FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) +       FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) )

#define FLOPS_ZUNMQR(m_, n_, k_, side_) (6. * FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_CUNMQR(m_, n_, k_, side_) (6. * FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_DORMQR(m_, n_, k_, side_) (     FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) +       FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_SORMQR(m_, n_, k_, side_) (     FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) +       FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) )

#define FLOPS_ZUNMQL(m_, n_, k_, side_) (6. * FMULS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_CUNMQL(m_, n_, k_, side_) (6. * FMULS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_DORMQL(m_, n_, k_, side_) (     FMULS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) +       FADDS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_SORMQL(m_, n_, k_, side_) (     FMULS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) +       FADDS_UNMQL((double)(m_), (double)(n_), (double)(k_), (side_)) )

#define FLOPS_ZUNMRQ(m_, n_, k_, side_) (6. * FMULS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_CUNMRQ(m_, n_, k_, side_) (6. * FMULS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_DORMRQ(m_, n_, k_, side_) (     FMULS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) +       FADDS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_SORMRQ(m_, n_, k_, side_) (     FMULS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) +       FADDS_UNMRQ((double)(m_), (double)(n_), (double)(k_), (side_)) )

#define FLOPS_ZUNMLQ(m_, n_, k_, side_) (6. * FMULS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_CUNMLQ(m_, n_, k_, side_) (6. * FMULS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_DORMLQ(m_, n_, k_, side_) (     FMULS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) +       FADDS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) )
#define FLOPS_SORMLQ(m_, n_, k_, side_) (     FMULS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) +       FADDS_UNMLQ((double)(m_), (double)(n_), (double)(k_), (side_)) )

#define FLOPS_ZGEQRS(m_, n_, nrhs_) (6. * FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) )
#define FLOPS_CGEQRS(m_, n_, nrhs_) (6. * FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) )
#define FLOPS_DGEQRS(m_, n_, nrhs_) (     FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) +       FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) )
#define FLOPS_SGEQRS(m_, n_, nrhs_) (     FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) +       FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) )

#define FLOPS_ZTRTRI(n_) (6. * FMULS_TRTRI((double)(n_)) + 2.0 * FADDS_TRTRI((double)(n_)) )
#define FLOPS_CTRTRI(n_) (6. * FMULS_TRTRI((double)(n_)) + 2.0 * FADDS_TRTRI((double)(n_)) )
#define FLOPS_DTRTRI(n_) (     FMULS_TRTRI((double)(n_)) +       FADDS_TRTRI((double)(n_)) )
#define FLOPS_STRTRI(n_) (     FMULS_TRTRI((double)(n_)) +       FADDS_TRTRI((double)(n_)) )

#define FLOPS_ZGEHRD(n_) (6. * FMULS_GEHRD((double)(n_)) + 2.0 * FADDS_GEHRD((double)(n_)) )
#define FLOPS_CGEHRD(n_) (6. * FMULS_GEHRD((double)(n_)) + 2.0 * FADDS_GEHRD((double)(n_)) )
#define FLOPS_DGEHRD(n_) (     FMULS_GEHRD((double)(n_)) +       FADDS_GEHRD((double)(n_)) )
#define FLOPS_SGEHRD(n_) (     FMULS_GEHRD((double)(n_)) +       FADDS_GEHRD((double)(n_)) )

#define FLOPS_ZHETRD(n_) (6. * FMULS_HETRD((double)(n_)) + 2.0 * FADDS_HETRD((double)(n_)) )
#define FLOPS_CHETRD(n_) (6. * FMULS_HETRD((double)(n_)) + 2.0 * FADDS_HETRD((double)(n_)) )

#define FLOPS_ZSYTRD(n_) (6. * FMULS_SYTRD((double)(n_)) + 2.0 * FADDS_SYTRD((double)(n_)) )
#define FLOPS_CSYTRD(n_) (6. * FMULS_SYTRD((double)(n_)) + 2.0 * FADDS_SYTRD((double)(n_)) )
#define FLOPS_DSYTRD(n_) (     FMULS_SYTRD((double)(n_)) +       FADDS_SYTRD((double)(n_)) )
#define FLOPS_SSYTRD(n_) (     FMULS_SYTRD((double)(n_)) +       FADDS_SYTRD((double)(n_)) )

#define FLOPS_ZGEBRD(m_, n_) (6. * FMULS_GEBRD((double)(m_), (double)(n_)) + 2.0 * FADDS_GEBRD((double)(m_), (double)(n_)) )
#define FLOPS_CGEBRD(m_, n_) (6. * FMULS_GEBRD((double)(m_), (double)(n_)) + 2.0 * FADDS_GEBRD((double)(m_), (double)(n_)) )
#define FLOPS_DGEBRD(m_, n_) (     FMULS_GEBRD((double)(m_), (double)(n_)) +       FADDS_GEBRD((double)(m_), (double)(n_)) )
#define FLOPS_SGEBRD(m_, n_) (     FMULS_GEBRD((double)(m_), (double)(n_)) +       FADDS_GEBRD((double)(m_), (double)(n_)) )

#define FLOPS_ZLARFG(n_) (6. * FMULS_LARFG((double)n_) + 2. * FADDS_LARFG((double)n_) )
#define FLOPS_CLARFG(n_) (6. * FMULS_LARFG((double)n_) + 2. * FADDS_LARFG((double)n_) )
#define FLOPS_DLARFG(n_) (     FMULS_LARFG((double)n_) +      FADDS_LARFG((double)n_) )
#define FLOPS_SLARFG(n_) (     FMULS_LARFG((double)n_) +      FADDS_LARFG((double)n_) )

#endif /* MAGMA_FLOPS_H */