File: codegen.cpp

package info (click to toggle)
aqsis 1.8.2-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 18,340 kB
  • ctags: 19,671
  • sloc: cpp: 138,267; python: 12,880; ansic: 4,946; xml: 3,415; yacc: 1,887; sh: 406; makefile: 385; lex: 280
file content (282 lines) | stat: -rw-r--r-- 8,160 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#include <cassert>

#include "Array.h"
#include "Binders.h"
#include "Functionals.h"
#include "uvselect.h"
#include "applyvarying.h"

// Template-based code generator prototype for the aqsis shader VM
//
// The code generator is designed to be as simple as possible, while hopefully
// having sufficient generality to allow any of the shadeops in the real
// shaderVM to be constructed with it.  There is one obvious caveat to this:
// the prototype only deals with SVars (ie, shader data) of a single type.
//
// Two things will need to be changed to fix this problem:
// 1) Parametrization of a lot of the basic templates based on argument & return types
// 2) Introduction of a superclass for all shader data arrays, and casting to
//    the appropriate subclass array type for particular shadeops.  This can be
//    done via a kind of specialized substitue for dynamic_cast, or simply
//    using a blatent static_cast, depending on the level of safety we want.


//------------------------------------------------------------------------------
// Some hand-coded shadeop functions to benchmark our template-based code
// generator against.

void SO_add_byhand(const SVarBase& a, const SVarBase& b, SVarBase& result)
{
	int uniformVaryingIndicator = (a.isUniform() << 2) + (b.isUniform() << 1) + result.isUniform();
	switch(uniformVaryingIndicator)
	{
		case VarInd_uuu:
			uniform(result).value() = uniform(a).value() + uniform(b).value();
			break;
		case VarInd_uuv:
			{
				double r0 = uniform(a).value() + uniform(b).value();
				SVar& resV = varying(result);
				for(int i = 0, end = resV.length(); i < end; ++i)
					resV[i] = r0;
			}
			break;
		case VarInd_uvv:
			{
				double a0 = uniform(a).value();
				const SVar& bV = varying(b);
				SVar& resV = varying(result);
				for(int i = 0, end = resV.length(); i < end; ++i)
					resV[i] = a0 + bV[i];
			}
			break;
		case VarInd_vuv:
			{
				const SVar& aV = varying(a);
				double b0 = uniform(b).value();
				SVar& resV = varying(result);
				for(int i = 0, end = resV.length(); i < end; ++i)
					resV[i] = aV[i] + b0;
			}
			break;
		case VarInd_vvv:
			{
				const SVar& aV = varying(a);
				const SVar& bV = varying(b);
				SVar& resV = varying(result);
				for(int i = 0, end = resV.length(); i < end; ++i)
					resV[i] = aV[i] + bV[i];
			}
			break;
		default:
			// Assiging varying to uniform doesn't make sense & should be a
			// compiler error.
			assert(0);
			break;
	}
}

void SO_div_byhand(const SVarBase& a, const SVarBase& b, SVarBase& result)
{
	int uniformVaryingIndicator = (a.isUniform() << 2) + (b.isUniform() << 1) + result.isUniform();
	switch(uniformVaryingIndicator)
	{
		case VarInd_uuu:
			uniform(result).value() = uniform(a).value() + uniform(b).value();
			break;
		case VarInd_uuv:
			{
				double r0 = uniform(a).value() / uniform(b).value();
				SVar& resV = varying(result);
				for(int i = 0, end = resV.length(); i < end; ++i)
					resV[i] = r0;
			}
			break;
		case VarInd_uvv:
			{
				double a0 = uniform(a).value();
				const SVar& bV = varying(b);
				SVar& resV = varying(result);
				for(int i = 0, end = resV.length(); i < end; ++i)
					resV[i] = a0 / bV[i];
			}
			break;
		case VarInd_vuv:
			{
				const SVar& aV = varying(a);
				double invb0 = uniform(b).value();
				SVar& resV = varying(result);
				for(int i = 0, end = resV.length(); i < end; ++i)
					resV[i] = aV[i] * invb0;
			}
			break;
		case VarInd_vvv:
			{
				const SVar& aV = varying(a);
				const SVar& bV = varying(b);
				SVar& resV = varying(result);
				for(int i = 0, end = resV.length(); i < end; ++i)
					resV[i] = aV[i] / bV[i];
			}
			break;
		default:
			// Assiging varying to uniform doesn't make sense & should be a
			// compiler error.
			assert(0);
			break;
	}
}

void SO_sin_byhand(const SVarBase& a, SVarBase& result)
{
	int uniformVaryingIndicator = (a.isUniform() << 1) + result.isUniform();
	switch(uniformVaryingIndicator)
	{
		case VarInd_uu:
			uniform(result).value() = std::sin(uniform(a).value());
			break;
		case VarInd_uv:
			{
				double r0 = std::sin(uniform(a).value());
				SVar& resV = varying(result);
				for(int i = 0, end = resV.length(); i < end; ++i)
					resV[i] = r0;
			}
			break;
		case VarInd_vv:
			{
				const SVar& aV = varying(a);
				SVar& resV = varying(result);
				for(int i = 0, end = resV.length(); i < end; ++i)
					resV[i] = std::sin(aV[i]);
			}
			break;
		default:
			// Assiging varying to uniform doesn't make sense & should be a
			// compiler error.
			assert(0);
			break;
	}
}

void SO_diff_byhand(const SVarBase& a, SVarBase& result)
{
	int uniformVaryingIndicator = (a.isUniform() << 1) + result.isUniform();
	switch(uniformVaryingIndicator)
	{
		case VarInd_uu:
			uniform(result).value() = 0;
			break;
		case VarInd_uv:
			{
				SVar& resV = varying(result);
				for(int i = 0, end = resV.length(); i < end; ++i)
					resV[i] = 0;
			}
			break;
		case VarInd_vv:
			{
				const SVar& aV = varying(a);
				SVar& resV = varying(result);
				resV[0] = aV[1] - aV[0];
				for(int i = 1, end = resV.length(); i < end; ++i)
					resV[i] = aV[i] - aV[i-1];
			}
			break;
		default:
			// Assiging varying to uniform doesn't make sense & should be a
			// compiler error.
			assert(0);
			break;
	}
}


//------------------------------------------------------------------------------
// The follwoing are shadeops generated using the templated code generator
// instead of written out explicitly.
void SO_add_template(const SVarBase& a, const SVarBase& b, SVarBase& result)
{
	SO_ApplyFxnl<Add>(a, b, result);
}

void SO_div_template(const SVarBase& a, const SVarBase& b, SVarBase& result)
{
	SO_ApplyFxnl<Div>(a, b, result);
}

void SO_sin_template(const SVarBase& a, SVarBase& result)
{
	SO_ApplyFxnl<Sin>(a, result);
}

void SO_diff_template(const SVarBase& a, SVarBase& result)
{
	SO_ApplyFxnl<Diff>(a, result);
}


//------------------------------------------------------------------------------
// Test main
int main()
{
	const int arrayLength = 200;
	// a, b, r are varying.
	SVar a(arrayLength);
	SVar b(arrayLength);
	SVar r(arrayLength);

	// aU, bU, rU are uniform
	SVarUniform aU;
	SVarUniform bU;
	SVarUniform rU;

	const int numIters = 10000000;
	for(int j = 0; j < numIters; ++j)
	{
		// Benchmarks for addition.
		// SO_add_byhand(a,b,r);      //##bench SO_add_byhand
		// SO_add_template(a,b,r);    //##bench SO_add_template

		// Benchmarks for division, showing optimization.
		// SO_div_byhand(a,bU,r);      //##bench SO_div_byhand
		// SO_div_template(a,bU,r);    //##bench SO_div_template

		// Benchmarks for sin()
		// SO_sin_byhand(a, r);         //##bench SO_sin_byhand
		// SO_sin_template(a, r);       //##bench SO_sin_template

		// Benchmarks for a non-local operation (derivative)
		// SO_diff_byhand(aU, rU);         //##bench SO_diff_byhand
		// SO_diff_template(aU, rU);       //##bench SO_diff_template
	}

	return 0;
}

//------------------------------------------------------------------------------
// Benchmark descriptions:

//##description SO_add_byhand hand-optimized version of SO_add
//##description SO_add_template SO_add version using templated code generation.

//##description SO_div_byhand hand-optimized version of SO_div
//##description SO_div_template SO_div version using templated code generation.

//##description SO_sin_byhand hand-optimized version of SO_sin
//##description SO_sin_template version of SO_sin using templated code generation.

//##description SO_diff_byhand hand optimized version of SO_diff.  This has an unfair advantage, since this version of the code won't work for shader running states.
//##description SO_diff_template version of SO_diff using templated code generation.

// Compiler options
//##CXXFLAGS -DNDEBUG -O3
//##//CXX g++-4.1.2


//------------------------------------------------------------------------------
// Running the benchmarks above on a athlon64 with g++-3.4.6, shows that the
// template code generator is working exactly as we'd like it to.  That is, all
// the versions using templates are just as fast - ok, not quite, but very
// close to - the hand-coded versions at the top of this file.