File: sweep.c

package info (click to toggle)
pspp 2.0.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 66,676 kB
  • sloc: ansic: 267,210; xml: 18,446; sh: 5,534; python: 2,881; makefile: 125; perl: 64
file content (152 lines) | stat: -rw-r--r-- 4,629 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/* PSPP - a program for statistical analysis.
   Copyright (C) 2005, 2009, 2011 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>. */

/*
  Find the least-squares estimate of b for the linear model:

  Y = Xb + Z

  where Y is an n-by-1 column vector, X is an n-by-p matrix of
  independent variables, b is a p-by-1 vector of regression coefficients,
  and Z is an n-by-1 normally-distributed random vector with independent
  identically distributed components with mean 0.

  This estimate is found via the sweep operator, which is a modification
  of Gauss-Jordan pivoting.


  References:

  Matrix Computations, third edition. GH Golub and CF Van Loan.
  The Johns Hopkins University Press. 1996. ISBN 0-8018-5414-8.

  Numerical Analysis for Statisticians. K Lange. Springer. 1999.
  ISBN 0-387-94979-8.

  Numerical Linear Algebra for Applications in Statistics. JE Gentle.
  Springer. 1998. ISBN 0-387-98542-5.
*/

#include <config.h>

#include "sweep.h"

#include <assert.h>
/*
  The matrix A will be overwritten. In ordinary uses of the sweep
  operator, A will be the matrix

  __       __
  |X'X    X'Y|
  |          |
  |Y'X    Y'Y|
  --        --

  X refers to the design matrix and Y to the vector of dependent
  observations. reg_sweep sweeps on the diagonal elements of
  X'X.

  The matrix A is assumed to be symmetric, so the sweep operation is
  performed only for the upper triangle of A.

  LAST_COL is considered to be the final column in the augmented matrix,
  that is, the column to the right of the '=' sign of the system.
*/

int
reg_sweep (gsl_matrix * A, int last_col)
{
  int i;
  int j;
  int k;
  gsl_matrix *B;

  if (A == NULL)
    return GSL_EFAULT;

  if (A->size1 != A->size2)
    return GSL_ENOTSQR;

  assert (last_col < A->size1);
  gsl_matrix_swap_rows (A, A->size1 - 1, last_col);
  gsl_matrix_swap_columns (A, A->size1 - 1 , last_col);

  B = gsl_matrix_alloc (A->size1, A->size2);
  for (k = 0; k < (A->size1 - 1); k++)
    {
      const double sweep_element = gsl_matrix_get (A, k, k);
      if (fabs (sweep_element) > GSL_DBL_MIN)
        {
          gsl_matrix_set (B, k, k, -1.0 / sweep_element);
          /*
            Rows before current row k.
          */
          for (i = 0; i < k; i++)
            {
              for (j = i; j < A->size2; j++)
                {
                  /* Use only the upper triangle of A. */
                  double tmp;
                  if (j < k)
                    {
                      tmp = gsl_matrix_get (A, i, j) -
                        gsl_matrix_get (A, i, k)
                        * gsl_matrix_get (A, j, k) / sweep_element;
                    }
                  else if (j > k)
                    {
                      tmp = gsl_matrix_get (A, i, j) -
                        gsl_matrix_get (A, i, k)
                        * gsl_matrix_get (A, k, j) / sweep_element;
                    }
                  else
                    {
                      tmp = gsl_matrix_get (A, i, k) / sweep_element;
                    }
                  gsl_matrix_set (B, i, j, tmp);
                }
            }
          /*
            Current row k.
          */
          for (j = k + 1; j < A->size1; j++)
            {
              double tmp = gsl_matrix_get (A, k, j) / sweep_element;
              gsl_matrix_set (B, k, j, tmp);
            }
          /*
            Rows after the current row k.
          */
          for (i = k + 1; i < A->size1; i++)
            {
              for (j = i; j < A->size2; j++)
                {
                  double tmp = gsl_matrix_get (A, i, j) -
                    gsl_matrix_get (A, k, i)
                    * gsl_matrix_get (A, k, j) / sweep_element;
                  gsl_matrix_set (B, i, j, tmp);
                }
            }
        }
      gsl_matrix_memcpy (A, B);
    }
  gsl_matrix_free (B);

  gsl_matrix_swap_columns (A, A->size1 - 1 , last_col);
  gsl_matrix_swap_rows (A, A->size1 - 1, last_col);

  return GSL_SUCCESS;
}