File: testGemmR.cpp

package info (click to toggle)
dmrgpp 6.06-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 113,900 kB
  • sloc: cpp: 80,986; perl: 14,772; ansic: 2,923; makefile: 83; sh: 17
file content (156 lines) | stat: -rw-r--r-- 4,420 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#include "GemmR.h"
#include "Matrix.h"
#include <iostream>

typedef std::complex<double> zcomplex;

template <typename T>
T make_val(double const x, double const y) { return (x); }

template <>
double make_val<double>(double const x, double const y)
{
	return (x);
}

template <>
zcomplex make_val<zcomplex>(double const x, double const y)
{
	zcomplex z(x, y);
	return (z);
}

template <typename T>
int test_GEMMR(int const Mmax, int const Nmax, int const Kmax, int const nb, bool needsPrinting)
{
	int const idebug = (needsPrinting) ? 1 : 0;
	int nerrors = 0;

	char const trans_table[3] = { 'N', 'T', 'C' };

	T const alpha = make_val<T>(1.1, 2.1);
	T const beta = make_val<T>(3.1, 4.1);

	PsimagLite::GemmR<T> gemmR(
	    needsPrinting, nb, PsimagLite::Concurrency::codeSectionParams.npthreads);

	for (int k = 1; k <= Kmax; k += nb) {
		for (int n = 1; n <= Nmax; n += nb) {
			for (int m = 1; m <= Mmax; m += nb) {
				for (int itransB = 0; itransB < 3; itransB++) {
					for (int itransA = 0; itransA < 3; itransA++) {

						char const transA = trans_table[itransA];
						char const transB = trans_table[itransB];

						bool const is_transA = (transA == 'T') || (transA == 't');
						bool const is_transB = (transB == 'T') || (transB == 't');
						bool const is_conjA = (transA == 'C') || (transA == 'c');
						bool const is_conjB = (transB == 'C') || (transB == 'c');
						bool const is_notransA = (!is_transA) && (!is_conjA);
						bool const is_notransB = (!is_transB) && (!is_conjB);

						int const mC = m;
						int const nC = n;
						int const mA = (is_notransA) ? mC : k;
						int const nA = (is_notransA) ? k : mC;
						int const mB = (is_notransB) ? k : nC;
						int const nB = (is_notransB) ? nC : k;

						PsimagLite::Matrix<T> C(mC, nC);
						PsimagLite::Matrix<T> C_gemmr(mC, nC);
						PsimagLite::Matrix<T> A(mA, nA);
						PsimagLite::Matrix<T> B(mB, nB);

						int const ldA = mA;
						int const ldB = mB;
						int const ldC = mC;

						for (int j = 0; j < nC; j++) {
							for (int i = 0; i < mC; i++) {
								T cij = make_val<T>(1.0 * (i + j) / (mC + nC),
								    1.0 * i * j / (mC * nC));
								C(i, j) = cij;
								C_gemmr(i, j) = cij;
							}
						}

						for (int j = 0; j < nA; j++) {
							for (int i = 0; i < mA; i++) {
								T aij = make_val<T>(-1.0 * (i + j + 1), 1.0 * (j - i + 1));
								A(i, j) = aij;
							}
						}

						for (int j = 0; j < nB; j++) {
							for (int i = 0; i < mB; i++) {
								T bij = make_val<T>(1.0 * (i + j + 1) / (mB * nB),
								    -1.0 * (j - i + 1) / (mB * nB));
								B(i, j) = bij;
							}
						}

						gemmR(transA, transB, m, n, k, alpha, &(A(0, 0)), ldA, &(B(0, 0)), ldB, beta, &(C_gemmr(0, 0)), ldC);

						psimag::BLAS::GEMM(transA, transB, m, n, k, alpha, &(A(0, 0)), ldA, &(B(0, 0)), ldB, beta, &(C(0, 0)), ldC);

						double max_err = 0;
						double c_norm = 0;
						for (int j = 0; j < nC; j++) {
							for (int i = 0; i < mC; i++) {
								double const err = std::abs(C(i, j) - C_gemmr(i, j));
								max_err = std::max(max_err, err);
								c_norm += std::abs(C(i, j));
							}
						}

						double const tol = 0.0000001;
						bool const isok = (max_err < tol);
						if (!isok) {
							nerrors++;
						}
						if ((!isok) || (idebug >= 1)) {
							std::cout << " transA " << transA << " transB " << transB << " m "
								  << m << " n " << n << " k " << k << " max_err "
								  << max_err << " c_norm " << c_norm << "\n";
						}
					}
				}
			}
		}
	}

	return (nerrors);
}

int main(int argc, char** argv)
{
	int const Nmax = 300;
	int const Mmax = 301;
	int const Kmax = 302;
	int nerr_zcomplex = 0;

	if (argc < 2)
		throw PsimagLite::RuntimeError("USAGE: " + PsimagLite::String(argv[0]) + " nthreads [nb] [debug]\n");

	int nthreads = atoi(argv[1]);

	int const nb = (argc >= 3) ? atoi(argv[2]) : 99;
	const bool needsPrinting = (argc == 4) ? atoi(argv[3]) > 0 : false;

	PsimagLite::Concurrency concurrency(&argc, &argv, nthreads);

	int nerr_double = test_GEMMR<double>(Mmax, Nmax, Kmax, nb, needsPrinting);
	if (nerr_double == 0) {
		nerr_zcomplex = test_GEMMR<zcomplex>(Mmax, Nmax, Kmax, nb, needsPrinting);
	}

	bool const all_passed = (nerr_double == 0) && (nerr_zcomplex == 0);
	if (all_passed) {
		std::cout << "ALL PASSED "
			  << "\n";
	} else {
		std::cout << " nerr_double = " << nerr_double
			  << " nerr_zcomplex = " << nerr_zcomplex << "\n";
	}
}