File: regress.lsp

package info (click to toggle)
xlispstat 3.52.0-3
  • links: PTS
  • area: main
  • in suites: hamm, slink
  • size: 7,472 kB
  • ctags: 12,480
  • sloc: ansic: 89,534; lisp: 21,690; sh: 1,525; makefile: 520; csh: 1
file content (512 lines) | stat: -rw-r--r-- 20,158 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
;;;;
;;;; regression.lsp XLISP-STAT regression model proto and methods
;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
;;;; You may give out copies of this software; for conditions see the file
;;;; COPYING included with this distribution.
;;;;
;;;;
;;;; Incorporates modifications suggested by Sandy Weisberg.
;;;;


(provide "regress")

(require "stats")
(require "graphics")
(require "help")

;;;;
;;;;
;;;; Regresion Model Prototype
;;;;
;;;;

(defproto regression-model-proto
          '(x y intercept sweep-matrix basis weights 
              included
              total-sum-of-squares
              residual-sum-of-squares
              predictor-names
              response-name
              case-labels)
          ()
          *object*
          "Normal Linear Regression Model")

;; The doc for this function string is at the limit of XLISP's string 
;; constant size - making it longer may cause problems
(defun regression-model (x y &key 
                           (intercept T) 
                           (print T) 
                           weights
                           (included (repeat t (length y)))
                           predictor-names
                           response-name
                           case-labels)
"Args: (x y &key (intercept T) (print T) weights 
          included predictor-names response-name case-labels)
X           - list of independent variables or X matrix
Y           - dependent variable.
INTERCEPT   - T to include (default), NIL for no intercept
PRINT       - if not NIL print summary information
WEIGHTS     - if supplied should be the same length as Y; error variances are
               assumed to be inversely proportional to WEIGHTS
PREDICTOR-NAMES
RESPONSE-NAME
CASE-LABELS - sequences of strings or symbols.
INCLUDED    - if supplied should be the same length as Y, with elements nil
              to skip a in computing estimates (but not in residual analysis). 
Returns a regression model object. To examine the model further assign the
result to a variable and send it messages.
Example (data are in file absorbtion.lsp in the sample data directory/folder):
  (def m (regression-model (list iron aluminum) absorbtion))
  (send m :help)
  (send m :plot-residuals)"
  (let ((x (cond 
             ((matrixp x) x)
             ((vectorp x) (list x))
             ((and (consp x) (numberp (car x))) (list x))
             (t x)))
        (m (send regression-model-proto :new)))
    (send m :x (if (matrixp x) x (apply #'bind-columns x)))
    (send m :y y)
    (send m :intercept intercept)
    (send m :weights weights)
    (send m :included included)
    (send m :predictor-names predictor-names)
    (send m :response-name response-name)
    (send m :case-labels case-labels)
    (if print (send m :display))
    m))

(defmeth regression-model-proto :isnew () (send self :needs-computing t))

(defmeth regression-model-proto :save ()
"Message args: ()
Returns an expression that will reconstruct the regression model."
  `(regression-model ',(send self :x)
                     ',(send self :y)
                     :intercept ',(send self :intercept)
                     :weights ',(send self :weights)
                     :included ',(send self :included)
                     :predictor-names ',(send self :predictor-names)
                     :response-name ',(send self :response-name)
                     :case-labels ',(send self :case-labels)))
                       
;;;
;;; Computing and Display Methods
;;;

#|
(defmeth regression-model-proto :compute ()
"Message args: ()
Recomputes the estimates. For internal use by other messages"
  (let* ((included (if-else (send self :included) 1 0))
         (x (send self :x))
         (y (send self :y))
         (intercept (send self :intercept))
         (weights (send self :weights))
         (w (if weights (* included weights) included))
         (m (make-sweep-matrix x y w))
         (n (array-dimension x 1))
         (p (- (array-dimension m 0) 1))
         (tss (aref m p p))
         (tol (* .0001 (mapcar #'standard-deviation (column-list x))))
         (sweep-result
          (if intercept
              (sweep-operator m (iseq 1 n) tol)
              (sweep-operator m (iseq 0 n) (cons 0.0 tol)))))
    (setf (slot-value 'sweep-matrix) (first sweep-result))
    (setf (slot-value 'total-sum-of-squares) tss)
    (setf (slot-value 'residual-sum-of-squares) 
          (aref (first sweep-result) p p))
    (setf (slot-value 'basis)
          (let ((b (remove 0 (second sweep-result))))
            (if b 
                (- (reverse b) 1)
                (error "no columns could be swept"))))))
|#
;; This should be overridden by a slot value.
(defparameter *regression-tolerance* 1.0e-9
  "Tolerance for including regression column")

;; Modified to use sweep diagonals in tolerance and internal
;; sweep methods on a matrix of C-DOUBLE elements type.
(defmeth regression-model-proto :compute ()
"Message args: ()
Recomputes the estimates. For internal use by other messages"
  (let* ((included (send self :included))
	 (x (coerce (send self :x) '(array c-double)))
         (y (coerce (send self :y) '(vector c-double)))
	 (intercept (send self :intercept))
         (weights (send self :weights))
         (w (coerce (if-else included (if weights weights 1) 0)
		    '(vector c-double)))
	 (n (array-dimension x 0))
	 (p (array-dimension x 1))
	 (p+1 (+ p 1))
	 (p+2 (+ p 2))
	 (m (make-array (list p+2 p+2) :element-type 'c-double))
	 (xmean (make-array p :element-type 'c-double)))
    (base-make-sweep-matrix n p x y w m xmean)
    (let* ((tss (aref m p+1 p+1))
	   (tol (* *regression-tolerance* (/ (butlast (rest (diagonal m))) n)))
	   (swept nil))
      (unless intercept (sweep-in-place p+2 p+2 m 0 0.0))
      (mapc #'(lambda (k tol)
		(if (sweep-in-place p+2 p+2 m k tol) (push k swept)))
	    (iseq 1 p)
	    tol)
      (setf (slot-value 'sweep-matrix) (coerce m '(array t)))
      (setf (slot-value 'total-sum-of-squares) tss)
      (setf (slot-value 'residual-sum-of-squares) (aref m p+1 p+1))
      (unless swept (error "no columns could be swept"))
      (setf (slot-value 'basis) (- (nreverse swept) 1)))))


(defmeth regression-model-proto :needs-computing (&optional set)
  (if set (setf (slot-value 'sweep-matrix) nil))
  (null (slot-value 'sweep-matrix)))
  
(defmeth regression-model-proto :display ()
"Message args: ()
Prints the least squares regression summary. Variables not used in the fit
are marked as aliased."
  (let ((coefs (coerce (send self :coef-estimates) 'list))
        (se-s (send self :coef-standard-errors))
        (x (send self :x))
        (p-names (send self :predictor-names)))
    (if (send self :weights) 
        (format t "~%Weighted Least Squares Estimates:~2%")
        (format t "~%Least Squares Estimates:~2%"))
    (when (send self :intercept)
          (format t "Constant~25t~13,6g~40t(~,6g)~%" (car coefs) (car se-s))
          (setf coefs (cdr coefs))
          (setf se-s (cdr se-s)))
    (dotimes (i (array-dimension x 1)) 
             (cond 
               ((member i (send self :basis))
                (format t "~a~25t~13,6g~40t(~,6g)~%"
                        (select p-names i) (car coefs) (car se-s))
                (setf coefs (cdr coefs) se-s (cdr se-s)))
               (t (format t "~a~25taliased~%" (select p-names i)))))
    (format t "~%")
    (format t "R Squared:~25t~13,6g~%" (send self :r-squared))
    (format t "Sigma hat:~25t~13,6g~%" (send self :sigma-hat))
    (format t "Number of cases:~25t~9d~%" (send self :num-cases))
    (if (/= (send self :num-cases) (send self :num-included))
        (format t "Number of cases used:~25t~9d~%" (send self :num-included)))
    (format t "Degrees of freedom:~25t~9d~%" (send self :df))
    (format t "~%")))


;;;
;;; Slot accessors and mutators
;;;

#|
(defmeth regression-model-proto :x (&optional new-x)
"Message args: (&optional new-x)
With no argument returns the x matrix as supplied to m. With an argument
NEW-X sets the x matrix to NEW-X and recomputes the estimates."
  (when (and new-x (matrixp new-x))
        (setf (slot-value 'x) new-x)
        (send self :needs-computing t))
  (slot-value 'x))
|#
;; Modified to store matrix as typed array with C-DOUBLE elements
(defmeth regression-model-proto :x (&optional new-x)
"Message args: (&optional new-x)
With no argument returns the x matrix as supplied to m. With an argument
NEW-X sets the x matrix to NEW-X and recomputes the estimates."
  (when (and new-x (matrixp new-x))
        (setf (slot-value 'x) (coerce new-x '(array c-double)))
        (send self :needs-computing t))
  (slot-value 'x))

(defmeth regression-model-proto :y (&optional new-y)
"Message args: (&optional new-y)
With no argument returns the y sequence as supplied to m. With an argument
NEW-Y sets the y sequence to NEW-Y and recomputes the estimates."
  (when (and new-y (or (matrixp new-y) (sequencep new-y)))
        (setf (slot-value 'y) new-y)
        (send self :needs-computing t))
  (slot-value 'y))

(defmeth regression-model-proto :intercept (&optional (val nil set))
"Message args: (&optional new-intercept)
With no argument returns T if the model includes an intercept term, nil if
not. With an argument NEW-INTERCEPT the model is changed to include or
exclude an intercept, according to the value of NEW-INTERCEPT."
  (when set 
        (setf (slot-value 'intercept) val)
        (send self :needs-computing t))
  (slot-value 'intercept))

(defmeth regression-model-proto :weights (&optional (new-w nil set))
"Message args: (&optional new-w)
With no argument returns the weight sequence as supplied to m; NIL means
an unweighted model. NEW-W sets the weights sequence to NEW-W and
recomputes the estimates."
  (when set 
        (setf (slot-value 'weights) new-w) 
        (send self :needs-computing t))
  (slot-value 'weights))

(defmeth regression-model-proto :total-sum-of-squares ()
"Message args: ()
Returns the total sum of squares around the mean."
  (if (send self :needs-computing) (send self :compute))
  (slot-value 'total-sum-of-squares))

(defmeth regression-model-proto :residual-sum-of-squares () 
"Message args: ()
Returns the residual sum of squares for the model."
  (if (send self :needs-computing) (send self :compute))
  (slot-value 'residual-sum-of-squares))

(defmeth regression-model-proto :basis ()
"Message args: ()
Returns the indices of the variables used in fitting the model."
  (if (send self :needs-computing) (send self :compute))
  (slot-value 'basis))

(defmeth regression-model-proto :sweep-matrix ()
"Message args: ()
Returns the swept sweep matrix. For internal use"
  (if (send self :needs-computing) (send self :compute))
  (slot-value 'sweep-matrix))

(defmeth regression-model-proto :included (&optional new-included)
"Message args: (&optional new-included)
With no argument,  NIL means a case is not used in calculating estimates, and non-nil means it is used.  NEW-INCLUDED is a sequence of length of y of nil and t to select cases.  Estimates are recomputed."
  (when (and new-included 
             (= (length new-included) (send self :num-cases)))
        (setf (slot-value 'included) (copy-seq new-included)) 
        (send self :needs-computing t))
  (if (slot-value 'included)
      (slot-value 'included)
      (repeat t (send self :num-cases))))

(defmeth regression-model-proto :predictor-names (&optional (names nil set))
"Message args: (&optional (names nil set))
With no argument returns the predictor names. NAMES sets the names."
  (if set (setf (slot-value 'predictor-names) (mapcar #'string names)))
  (let ((p (array-dimension (send self :x) 1))
        (p-names (slot-value 'predictor-names)))
    (if (not (and p-names (= (length p-names) p)))
        (setf (slot-value 'predictor-names)
              (mapcar #'(lambda (a) (format nil "Variable ~a" a)) 
                      (iseq 0 (- p 1))))))
  (slot-value 'predictor-names))

(defmeth regression-model-proto :response-name (&optional (name "Y" set))
"Message args: (&optional name)
With no argument returns the response name. NAME sets the name."
  (if set (setf (slot-value 'response-name) (if name (string name) "Y")))
  (slot-value 'response-name))

(defmeth regression-model-proto :case-labels (&optional (labels nil set))
"Message args: (&optional labels)
With no argument returns the case-labels. LABELS sets the labels."
  (if set (setf (slot-value 'case-labels) 
                (if labels 
                    (mapcar #'string labels)
                    (mapcar #'(lambda (x) (format nil "~d" x)) 
                            (iseq 0 (- (send self :num-cases) 1))))))
  (slot-value 'case-labels))

;;;
;;; Other Methods
;;; None of these methods access any slots directly.
;;;

(defmeth regression-model-proto :num-cases ()
"Message args: ()
Returns the number of cases in the model."
  (length (send self :y)))

(defmeth regression-model-proto :num-included ()
"Message args: ()
Returns the number of cases used in the computations."
  (sum (if-else (send self :included) 1 0)))

(defmeth regression-model-proto :num-coefs ()
"Message args: ()
Returns the number of coefficients in the fit model (including the
intercept if the model includes one)."
  (if (send self :intercept)
      (+ 1 (length (send self :basis)))
      (length (send self :basis))))

(defmeth regression-model-proto :df ()
"Message args: ()
Returns the number of degrees of freedom in the model."
  (- (send self :num-included) (send self :num-coefs)))
  
(defmeth regression-model-proto :x-matrix ()
"Message args: ()
Returns the X matrix for the model, including a column of 1's, if
appropriate. Columns of X matrix correspond to entries in basis."
  (let ((m (select (send self :x) 
                   (iseq 0 (- (send self :num-cases) 1)) 
                   (send self :basis))))
    (if (send self :intercept)
        (bind-columns (repeat 1 (send self :num-cases)) m)
        m)))

(defmeth regression-model-proto :leverages ()
"Message args: ()
Returns the diagonal elements of the hat matrix."
  (let* ((weights (send self :weights))
         (x (send self :x-matrix))
         (raw-levs 
          (matmult (* (matmult x (send self :xtxinv)) x)
                   (repeat 1 (send self :num-coefs)))))
    (if weights (* weights raw-levs) raw-levs)))

#|
(defmeth regression-model-proto :fit-values ()
"Message args: ()
Returns the fitted values for the model."
  (matmult (send self :x-matrix) (send self :coef-estimates)))
|#
;; modified to avoid creating a new matrix.
;; should be faster, especially if C storage is used for X
(defmeth regression-model-proto :fit-values ()
"Message args: ()
Returns the fitted values for the model."
  (let* ((x (send self :x))
	 (beta (send self :coef-estimates))
	 (basis (send self :basis))
	 (b (make-array (array-dimension x 1) :initial-element 0.0))
	 (intercept (send self :intercept)))
    (coerce (cond
	     (intercept
	      (setf (select b basis) (rest beta))
	      (+ (first beta) (matmult x b)))
	     (t
	      (setf (select b basis) beta)
	      (matmult x b)))
	    'list)))

(defmeth regression-model-proto :raw-residuals () 
"Message args: ()
Returns the raw residuals for a model."
  (- (send self :y) (send self :fit-values)))

(defmeth regression-model-proto :residuals () 
"Message args: ()
Returns the raw residuals for a model without weights. If the model
includes weights the raw residuals times the square roots of the weights
are returned."
  (let ((raw-residuals (send self :raw-residuals))
        (weights (send self :weights)))
    (if weights (* (sqrt weights) raw-residuals) raw-residuals)))

(defmeth regression-model-proto :sum-of-squares () 
"Message args: ()
Returns the error sum of squares for the model."
  (send self :residual-sum-of-squares))

(defmeth regression-model-proto :sigma-hat ()
"Message args: ()
Returns the estimated standard deviation of the deviations about the 
regression line."
  (let ((ss (send self :sum-of-squares))
        (df (send self :df)))
    (if (/= df 0) (sqrt (/ ss df)))))

;; for models without an intercept the 'usual' formula for R^2 can give
;; negative results; hence the max.
(defmeth regression-model-proto :r-squared ()
"Message args: ()
Returns the sample squared multiple correlation coefficient, R squared, for
the regression."
  (max (- 1 (/ (send self :sum-of-squares) (send self :total-sum-of-squares)))
       0))

(defmeth regression-model-proto :coef-estimates ()
"Message args: ()
Returns the OLS (ordinary least squares) estimates of the regression
coefficients. Entries beyond the intercept correspond to entries in basis."
  (let ((n (array-dimension (send self :x) 1))
        (indices (if (send self :intercept) 
                     (cons 0 (+ 1 (send self :basis)))
                     (+ 1 (send self :basis))))
        (m (send self :sweep-matrix)))
    (coerce (compound-data-seq (select m (+ 1 n) indices)) 'list)))

(defmeth regression-model-proto :xtxinv () 
"Message args: ()
Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)."
  (let ((indices (if (send self :intercept) 
                     (cons 0 (1+ (send self :basis))) 
                     (1+ (send self :basis)))))
    (select (send self :sweep-matrix) indices indices)))

(defmeth regression-model-proto :coef-standard-errors ()
"Message args: ()
Returns estimated standard errors of coefficients. Entries beyond the
intercept correspond to entries in basis."
  (let ((s (send self :sigma-hat)))
    (if s (* (send self :sigma-hat) (sqrt (diagonal (send self :xtxinv)))))))

(defmeth regression-model-proto :studentized-residuals ()
"Message args:  ()
Computes the internally studentized residuals for included cases and externally studentized residuals for excluded cases."
  (let ((res (send self :residuals))
        (lev (send self :leverages))
        (sig (send self :sigma-hat))
        (inc (send self :included)))
    (if-else inc
             (/ res (* sig (sqrt (pmax .00001 (- 1 lev)))))
             (/ res (* sig (sqrt (+ 1 lev)))))))

(defmeth regression-model-proto :externally-studentized-residuals ()
"Message args:  ()
Computes the externally studentized residuals."
  (let* ((res (send self :studentized-residuals))
         (df (send self :df)))
    (if-else (send self :included)
             (* res (sqrt (/ (- df 1) (- df (^ res 2)))))
             res)))

(defmeth regression-model-proto :cooks-distances ()
"Message args: ()
Computes Cook's distances."
  (let ((lev (send self :leverages))
        (res (/ (^ (send self :studentized-residuals) 2)
                (send self :num-coefs))))
    (if-else (send self :included) (* res (/ lev (- 1 lev) )) (* res lev))))

(defmeth regression-model-proto :plot-residuals (&optional x-values)
"Message args: (&optional x-values)
Opens a window with a plot of the residuals. If X-VALUES are not supplied 
the fitted values are used. The plot can be linked to other plots with the 
link-views function. Returns a plot object."
  (plot-points (if x-values x-values (send self :fit-values))
               (send self :residuals)
               :title "Residual Plot"
               :point-labels (send self :case-labels)))

(defmeth regression-model-proto :plot-bayes-residuals 
  (&optional x-values)
"Message args: (&optional x-values)
Opens a window with a plot of the standardized residuals and two standard
error bars for the posterior distribution of the actual deviations from the
line. See Chaloner and Brant. If X-VALUES are not supplied  the fitted values
are used. The plot can be linked to other plots with the link-views function.
Returns a plot object."
  (let* ((r (/ (send self :residuals) (send self :sigma-hat)))
         (d (* 2 (sqrt (send self :leverages))))
         (low (- r d))
         (high (+ r d))
         (x-values (if x-values x-values (send self :fit-values)))
         (p (plot-points x-values r :title "Bayes Residual Plot"
                         :point-labels (send self :case-labels))))
    (map 'list #'(lambda (a b c d) (send p :plotline a b c d nil))
               x-values low x-values high)
    (send p :adjust-to-data)
    p))