File: anova_2.c

package info (click to toggle)
tamuanova 0.2-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 948 kB
  • sloc: sh: 7,281; ansic: 703; makefile: 31
file content (511 lines) | stat: -rw-r--r-- 15,498 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
#ifndef __GSL_TAMU_ANOVA_TWOWAY_H
#define __GSL_TAMU_ANOVA_TWOWAY_H

/*
Two way ANOVA
This package defines ANOVA for two factors, including:
	two-factor fixed effects
	two-factor random effects
	two-factor mixed model
This package was writen by:
Andrew Redd
Krista Rister
Elizabeth Young
*/


#include <gsl/gsl_cdf.h>
#include <gsl/gsl_errno.h>
#include <math.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_vector.h>
#include <stdio.h>

/*This structure contains all the data that will be included in a two-way ANOVA table.*/
struct tamu_anova_table_twoway
{
  long dfA, dfB, dfAB, dfE, dfT;	//Degrees of freedom for factor A, factor B, interaction, error, and total
  double SSA, MSA, FA, pA,	//For factor a, the sum of squares, mean squares, F statistic, and p value
    SSB, MSB, FB, pB,		//For factor b, the sum of squares, mean squares, F statistic, and p value
    SSAB, MSAB, FAB, pAB,	//For interaction, the sum of squares, mean squares, F statistic, and p value
    SSE, MSE,			//For error, the sum of squares and mean squares
    SST;			//The total sum of squares
};

//Definition of three different models
enum gsl_anova_twoway_types
{
  anova_fixed,
  anova_random,
  anova_mixed
};

/*This function runs two-way ANOVA after being passed the following arguments:
 -data:   An array of doubles representing all the observations to be included in the test.
 -factor: A two-dimensional matrix of long integers representing the group that the 
          corresponding data value belongs in. For example, if factor[i][0]=1 and factor[i][1]=2,
          then data[i] belongs in the group pertaining to the first treatment level of factor A
					and the second treatment level of factor B.
 -I:      The number of data values included in the test.
 -J:      An array containing the number of treatment levels of factor A (J[0]) and factor B (J[1]).
          For every i from 1 to I, factor[i][0] must be an integer value between 1 and J[0], and
					factor[i][1] must be an integer value between 1 and J[1], both inclusive.
 -type:   Can take one of three values (anova_fixed, anova_random, or anova_mixed) and defines
          the model of the study.
*/

struct tamu_anova_table_twoway
tamu_anova_twoway (double data[], long factor[][2], long I, long J[2],
		   enum gsl_anova_twoway_types type)
{
  //Error checking/////////////////////////////////////////////////////////////////////////////
  //This for loop checks to make sure each of member of the factor array is valid.
  long k;
  for (k = 0; k < I; k++)
    {
      if ((factor[k][0] < 1 || factor[k][0] > J[0])
	  || (factor[k][1] < 1 || factor[k][1] > J[1]))
	{
	  //Cannot use GSL_ERROR since it assumes an integer return value. but ay use this in the future.
	  //GSL_ERROR("Invalid factor passed to tamu_anova_twoway. All members of factor array must be between 1 and J[0] for factor A and between 1 and J[1] for factor B, both inclusive.",GSL_EINVAL);
	  gsl_error
	    ("Invalid factor passed to tamu_anova_twoway. All members of factor array must be between 1 and J[0] for factor A and between 1 and J[1] for factor B, both inclusive.",
	     __FILE__, __LINE__, GSL_EINVAL);
	  k = I;
	};
    };

  //Declarations///////////////////////////////////////////////////////////////////////////////
  struct tamu_anova_table_twoway rtntbl;	//Table to be returned by function
  int i, j0, j1;		//We will use these to iterate over I, J[0], and J[1], respectively.
  long n[J[0]][J[1]];		//Holds the number of data values belonging to each of the J[0]*J[1] groups
  long nrow[J[0]];		//Holds the number of data values belonging to each level of factor A
  long ncol[J[1]];		//Holds the number of data values belonging to each level of factor B

  //NOTE: Below averages are not the true arithmetic mean, but rather weighted means
  double motheravg = 0;		/*Mean of all data values - used only to calculate an approximation for 
				   the interaction sum of squares when an exact value cannot be found by
				   solving a system of linear equations. */
  double cellavg[J[0]][J[1]];	//Average of the data values in each group (treatment)
  double cellsum[J[0]][J[1]];	//Sum of the data values in each group
  double rowavg[J[0]];		//Average of the data values for each level of factor A
  double rowsum[J[0]];		//Sum of the data values for each level of factor A
  double colavg[J[1]];		//Average of the data values for each level of factor B
  double colsum[J[1]];		//Sum of the data values for each level of factor B
  double wa[J[0]], wb[J[1]];	//Weights used in calculating sum of squares
  gsl_vector *tauvec;		//Pointer to vector tau
  gsl_matrix *cmat;		//Pointer to the coefficient matrix
  gsl_vector *rvec;		//Pointer to the r vector
  gsl_vector *rltvec;		//Pointer to a vector to hold the results

  //Initialization of values//////////////////////////////////////////////////////////////////////
  for (j0 = 0; j0 < J[0]; j0++)
    for (j1 = 0; j1 < J[1]; j1++)
      {
	n[j0][j1] = 0;
      };
  for (j0 = 0; j0 < J[0]; j0++)
    for (j1 = 0; j1 < J[1]; j1++)
      {
	cellsum[j0][j1] = 0;
	cellavg[j0][j1] = 0;
      };
  for (j0 = 0; j0 < J[0]; j0++)
    {
      rowavg[j0] = 0;
      rowsum[j0] = 0;
      nrow[j0] = 0;
    };
  for (j1 = 0; j1 < J[1]; j1++)
    {
      colavg[j1] = 0;
      colsum[j1] = 0;
      ncol[j1] = 0;
    };
  rtntbl.SSA = 0;
  rtntbl.SSB = 0;
  rtntbl.SSAB = 0;
  rtntbl.SSE = 0;
  rtntbl.SST = 0;

  //Calculates sums and number of data values for each group (treatment)///////////////////////////
  for (i = 0; i < I; i++)
    {
      cellsum[factor[i][0] - 1][factor[i][1] - 1] += data[i];
      n[factor[i][0] - 1][factor[i][1] - 1]++;
    };

  //Calculates cell averages, row counts, and column counts////////////////////////////////////////
  for (j0 = 0; j0 < J[0]; j0++)
    {
      for (j1 = 0; j1 < J[1]; j1++)
	{
	  cellavg[j0][j1] = cellsum[j0][j1] / n[j0][j1];
	  nrow[j0] += n[j0][j1];
	  ncol[j1] += n[j0][j1];
	};
    };

#ifdef TAMU_ANOVA_DEBUG
  //Prints out cellsum and avg.
  printf ("cellavg:\n");
  for (j0 = 0; j0 < J[0]; j0++)
    {
      for (j1 = 0; j1 < J[1]; j1++)
	{
	  printf ("%f ", cellavg[j0][j1]);
	};
      printf ("\n");
    };
  printf ("cellsum\n");
  for (j0 = 0; j0 < J[0]; j0++)
    {
      for (j1 = 0; j1 < J[1]; j1++)
	{
	  printf ("%f ", cellsum[j0][j1]);
	};
      printf ("\n");
    };
#endif

  //Calculates row averages, row sums, column averages, and column sums/////////////////////////
  for (j0 = 0; j0 < J[0]; j0++)
    for (j1 = 0; j1 < J[1]; j1++)
      {
	rowavg[j0] += cellavg[j0][j1];
	rowsum[j0] += cellsum[j0][j1];
	colavg[j1] += cellavg[j0][j1];
	colsum[j1] += cellsum[j0][j1];
      };
  for (j0 = 0; j0 < J[0]; j0++)
    {
      rowavg[j0] /= J[1];
    };
  for (j1 = 0; j1 < J[1]; j1++)
    {
      colavg[j1] /= J[0];
    };

  //Calculates average of all data//////////////////////////////////////////////////////////////
  for (j0 = 0; j0 < J[0]; j0++)
    for (j1 = 0; j1 < J[1]; j1++)
      {
	motheravg += cellavg[j0][j1];
      };
  motheravg /= (J[0] * J[1]);

#ifdef TAMU_ANOVA_DEBUG
  //Prints out row avg and col avg.
  printf ("rowavg:\n");
  for (j0 = 0; j0 < J[0]; j0++)
    {
      printf ("%f  ", rowavg[j0]);
    };
  printf ("\n");
  printf ("colavg:\n");
  for (j1 = 0; j1 < J[1]; j1++)
    {
      printf ("%f  ", colavg[j1]);
    };
  printf ("\n");
  printf ("rowsum:\n");
  for (j0 = 0; j0 < J[0]; j0++)
    {
      printf ("%f  ", rowsum[j0]);
    };
  printf ("\n");
  printf ("colsum:\n");
  for (j1 = 0; j1 < J[1]; j1++)
    {
      printf ("%f  ", colsum[j1]);
    };
  printf ("\n");
  printf ("rowsum/nrow:\n");
  for (j0 = 0; j0 < J[0]; j0++)
    {
      printf ("%f  ", rowsum[j0] / nrow[j0]);
    };
  printf ("\n");
  printf ("colsum/ncol:\n");
  for (j1 = 0; j1 < J[1]; j1++)
    {
      printf ("%f  ", colsum[j1] / ncol[j1]);
    };
  printf ("\n");
#endif


  //Calculates sum of squares for factor A (SSA)///////////////////////////////////////////////
  //The calculations are according to formulas found in Searle's _Analysis of Unbalanced Data_
  double sw = 0;
  double y[J[0]];
  double c = 0;

  for (j0 = 0; j0 < J[0]; j0++)
    {
      wa[j0] = 0;
      y[j0] = 0;
      for (j1 = 0; j1 < J[1]; j1++)
	{
	  wa[j0] += 1.0 / (double) n[j0][j1];
	  y[j0] += cellavg[j0][j1];
	};
      wa[j0] = J[1] * J[1] / wa[j0];
      sw += wa[j0];
      y[j0] /= J[1];
    };

  for (j0 = 0; j0 < J[0]; j0++)
    {
      c += wa[j0] * y[j0] / sw;
    };
  for (j0 = 0; j0 < J[0]; j0++)
    {
      rtntbl.SSA += wa[j0] * pow (y[j0] - c, 2);
    };

  //Calculates sum of squares for factor B (SSB)///////////////////////////////////////////////////
  c = 0;
  sw = 0;
  double yb[J[1]];

  for (j1 = 0; j1 < J[1]; j1++)
    {				//finds wi and y~_i..
      wb[j1] = 0;
      yb[j1] = 0;
      for (j0 = 0; j0 < J[0]; j0++)
	{
	  wb[j1] += 1.0 / (double) n[j0][j1];
	  yb[j1] += cellavg[j0][j1];
	};
      wb[j1] = J[0] * J[0] / wb[j1];
      sw += wb[j1];
      yb[j1] /= J[0];
    };

  for (j1 = 0; j1 < J[1]; j1++)
    {
      c += wb[j1] * yb[j1] / sw;
    };
  for (j1 = 0; j1 < J[1]; j1++)
    {
      rtntbl.SSB += wb[j1] * pow (yb[j1] - c, 2);
    };

  //Calculates sum of squares for interaction (SSAB)///////////////////////////////////////////////////
  //Exact calculations, using a system of linear equations and GSL vectors/matrices////////////////////
  rvec = gsl_vector_alloc (J[1] - 1);
  tauvec = gsl_vector_alloc (J[1] - 1);
  cmat = gsl_matrix_alloc (J[1] - 1, J[1] - 1);

  for (j1 = 0; j1 < (J[1] - 1); j1++)
    {
      double x = 0;
      for (j0 = 0; j0 < J[0]; j0++)
	{
	  x += cellsum[j0][j1] - n[j0][j1] * rowsum[j0] / nrow[j0];	//rowavg[j0];
	};
      gsl_vector_set (rvec, j1, x);
    };
#ifdef TAMU_ANOVA_DEBUG
  printf ("rvec:\n");
  for (j1 = 0; j1 < J[1] - 1; j1++)
    {
      printf ("%f\n", gsl_vector_get (rvec, j1));
    };
#endif

  for (j0 = 0; j0 < J[1] - 1; j0++)
    for (j1 = 0; j1 < J[1] - 1; j1++)
      {
	double x = 0;
	if (j0 == j1)
	  {
	    for (i = 0; i < J[0]; i++)
	      {
		x += pow ((double) n[i][j1], 2) / (double) nrow[i];
	      };
	    gsl_matrix_set (cmat, j0, j1, (double) ncol[j1] - x);
	  }
	else
	  {
	    for (i = 0; i < J[0]; i++)
	      {
		x += (double) n[i][j0] * (double) n[i][j1] / (double) nrow[i];
	      };
	    gsl_matrix_set (cmat, j0, j1, -x);
	  };
      };

#ifdef TAMU_ANOVA_DEBUG
  printf ("cmat:\n");
  for (j0 = 0; j0 < J[1] - 1; j0++)
    {
      for (j1 = 0; j1 < J[1] - 1; j1++)
	{
	  printf ("%f  ", gsl_matrix_get (cmat, j0, j1));
	};
      printf ("\n");
    };
#endif

  //Factor C matrix for use in QR.
  int QR_rslt;
  rltvec = gsl_vector_alloc (J[1] - 1);
  QR_rslt = gsl_linalg_QR_decomp (cmat, rltvec);

  if (QR_rslt == 0)
    {
      QR_rslt = gsl_linalg_QR_solve (cmat, rltvec, rvec, tauvec);
#ifdef TAMU_ANOVA_DEBUG
      printf ("tauvec\n");
      for (j1 = 0; j1 < J[1] - 1; j1++)
	{
	  printf ("%f\n", gsl_vector_get (tauvec, j1));
	};
#endif

      //Finally we compute SSAB
      double R = 0;
      for (j0 = 0; j0 < J[0]; j0++)
	for (j1 = 0; j1 < J[1]; j1++)
	  {
	    rtntbl.SSAB += n[j0][j1] * pow (cellavg[j0][j1], 2);
	  };
      for (j0 = 0; j0 < J[0]; j0++)
	{
	  R += pow (rowsum[j0], 2) / nrow[j0];
	};
      for (j1 = 0; j1 < J[1] - 1; j1++)
	{
	  R += gsl_vector_get (tauvec, j1) * gsl_vector_get (rvec, j1);
	};
#ifdef TAMU_ANOVA_DEBUG
      printf ("R:  %f\n", R);
#endif
      rtntbl.SSAB -= R;
    }
  else
    {
      gsl_error
	("Unable to decompose matrix.  Approximate SS interaction being used.",
	 __FILE__, __LINE__, GSL_EFACTOR);
      //Approximating formula to be used if matrix cannot be decomposed
      for (j0 = 0; j0 < J[0]; j0++)
	for (j1 = 0; j1 < J[1]; j1++)
	  {
	    rtntbl.SSAB +=
	      n[j0][j1] * pow (cellavg[j0][j1] - rowavg[j0] - colavg[j1] +
			       motheravg, 2.0);
	  };

    };

  //Cleanup for vectors/////////////////////////////////////////////////////////////////////////////
  gsl_vector_free (rvec);
  gsl_vector_free (tauvec);
  gsl_matrix_free (cmat);
  gsl_vector_free (rltvec);

  //Calculates the error sum of squares (SSE)///////////////////////////////////////////////////////
  for (i = 0; i < I; i++)
    {
      rtntbl.SSE += pow (data[i], 2);
    };
  for (j0 = 0; j0 < J[0]; j0++)
    for (j1 = 0; j1 < J[1]; j1++)
      {
	rtntbl.SSE -= pow (cellavg[j0][j1], 2) * n[j0][j1];
      };

  //Calculates the total sum of squares (SST)///////////////////////////////////////////////////////
  for (i = 0; i < I; i++)
    {
      rtntbl.SST += (data[i]);
    };
  rtntbl.SST = -pow (rtntbl.SST, 2) / I;
  for (j0 = 0; j0 < J[0]; j0++)
    for (j1 = 0; j1 < J[1]; j1++)
      {
	rtntbl.SST += pow (cellavg[j0][j1], 2) * n[j0][j1];
      };
  rtntbl.SST += rtntbl.SSE;

  //Calculates the degrees of freedom////////////////////////////////////////////////////////////////
  rtntbl.dfA = J[0] - 1;
  rtntbl.dfB = J[1] - 1;
  rtntbl.dfT = I - 1;
  rtntbl.dfAB = (J[0] - 1) * (J[1] - 1);
  rtntbl.dfE = I - J[0] * J[1];

  //Calculates the mean squares/////////////////////////////////////////////////////////////////////
  rtntbl.MSA = rtntbl.SSA / rtntbl.dfA;
  rtntbl.MSB = rtntbl.SSB / rtntbl.dfB;
  rtntbl.MSAB = rtntbl.SSAB / rtntbl.dfAB;
  rtntbl.MSE = rtntbl.SSE / rtntbl.dfE;


  //Calculates the F statistic for each of the three different model types////////////////////////////
  switch (type)
    {
    case anova_fixed:
      rtntbl.FA = rtntbl.MSA / rtntbl.MSE;
      rtntbl.FB = rtntbl.MSB / rtntbl.MSE;
      rtntbl.FAB = rtntbl.MSAB / rtntbl.MSE;
      rtntbl.pA =
	gsl_cdf_fdist_Q (rtntbl.FA, (double) rtntbl.dfA, (double) rtntbl.dfE);
      rtntbl.pB =
	gsl_cdf_fdist_Q (rtntbl.FB, (double) rtntbl.dfB, (double) rtntbl.dfE);
      rtntbl.pAB =
	gsl_cdf_fdist_Q (rtntbl.FAB, (double) rtntbl.dfAB,
			 (double) rtntbl.dfE);
      break;
    case anova_random:
      rtntbl.FA = rtntbl.MSA / rtntbl.MSAB;
      rtntbl.FB = rtntbl.MSB / rtntbl.MSAB;
      rtntbl.FAB = rtntbl.MSAB / rtntbl.MSE;
      rtntbl.pA =
	1.0 - gsl_cdf_fdist_P (rtntbl.FA, (double) rtntbl.dfA,
			       (double) rtntbl.dfAB);
      rtntbl.pB =
	1.0 - gsl_cdf_fdist_P (rtntbl.FB, (double) rtntbl.dfB,
			       (double) rtntbl.dfAB);
      rtntbl.pAB =
	1.0 - gsl_cdf_fdist_P (rtntbl.FAB, (double) rtntbl.dfAB,
			       (double) rtntbl.dfE);
      break;
    case anova_mixed:
      rtntbl.FA = rtntbl.MSA / rtntbl.MSAB;
      rtntbl.FB = rtntbl.MSB / rtntbl.MSE;
      rtntbl.FAB = rtntbl.MSAB / rtntbl.MSE;
      rtntbl.pA =
	1.0 - gsl_cdf_fdist_P (rtntbl.FA, (double) rtntbl.dfA,
			       (double) rtntbl.dfAB);
      rtntbl.pB =
	1.0 - gsl_cdf_fdist_P (rtntbl.FB, (double) rtntbl.dfB,
			       (double) rtntbl.dfE);
      rtntbl.pAB =
	1.0 - gsl_cdf_fdist_P (rtntbl.FAB, (double) rtntbl.dfAB,
			       (double) rtntbl.dfE);
      break;
    }

  //Returns entire table
  return rtntbl;
};

/*This function prints a formatted version of the tamu_anova_table_twoway passed to it.*/
void
tamu_anova_printtable_twoway (struct tamu_anova_table_twoway t)
{
  printf ("results from anova twoway test on data\n");
  printf ("source  \tdf\tSS\t\tMS\t\tF \t\tP\n");
  printf ("Factor-A\t%ld\t%f\t%f\t%f\t%f\n", t.dfA, t.SSA, t.MSA, t.FA, t.pA);
  printf ("Factor-B\t%ld\t%f\t%f\t%f\t%f\n", t.dfB, t.SSB, t.MSB, t.FB, t.pB);
  printf ("Interact\t%ld\t%f\t%f\t%f\t%f\n", t.dfAB, t.SSAB, t.MSAB, t.FAB,
	  t.pAB);
  printf ("Error   \t%ld\t%f\t%f\t\n", t.dfE, t.SSE, t.MSE);
  printf ("Total   \t%ld\t%f\n", t.dfT, t.SST);
}

#endif