1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
|
/*
* c++ implementation of Lowess weighted regression by
* Peter Glaus http://www.cs.man.ac.uk/~glausp/
*
*
* Based on fortran code by Cleveland downloaded from:
* http://netlib.org/go/lowess.f
* original author:
* wsc@research.bell-labs.com Mon Dec 30 16:55 EST 1985
* W. S. Cleveland
* Bell Laboratories
* Murray Hill NJ 07974
*
* See original documentation below the code for details.
*
*/
#include<algorithm>
#include<cmath>
#include<fstream>
using namespace std;
#include "lowess.h"
#include "common.h"
void lowess(const vector<double> &x, const vector<double> &y, double f, long nsteps, vector<double> &ys){//{{{
vector<double> rw,res;
lowess(x,y,f,nsteps,0.,ys,rw,res);
}//}}}
void lowess(const vector<double> &x, const vector<double> &y, double f, long nsteps, double delta, vector<double> &ys, vector<double> &rw, vector<double>&res){ //{{{
long n=(long)x.size();
bool ok=false;
long nleft,nright, i, j, iter, last, m1, m2, ns;
double cut, cmad, r, d1, d2, c1, c9, alpha, denom;
if((n==0)||((long)y.size()!=n)) return;
ys.resize(n);
rw.resize(n);
res.resize(n);
if(n==1){
ys[0]=y[0];
return;
}
// ns - at least 2, at most n
ns = max(min((long)(f*n),n),(long)2);
for(iter=0;iter<nsteps+1; iter++){
// robustnes iterations
nleft = 0;
nright = ns-1;
// index of last estimated point
last = -1;
// index of current point
i=0;
do{
while(nright<n-1){
// move <nleft,nright> right, while radius decreases
d1 = x[i]-x[nleft];
d2 = x[nright+1] - x[i];
if(d1<=d2)break;
nleft++;
nright++;
}
// fit value at x[i]
lowest(x,y,x[i],ys[i],nleft,nright,res,iter>0,rw,ok);
if(!ok) ys[i]=y[i];
if(last<i-1){
// interpolate skipped points
if(last<0){
warning("Lowess: out of range.\n");
}
denom = x[i] - x[last];
for(j=last+1;j<i;j++){
alpha = (x[j]-x[last])/denom;
ys[j] = alpha * ys[i] + (1.0-alpha)*ys[last];
}
}
last = i;
cut = x[last]+delta;
for(i=last+1;i<n;i++){
if(x[i]>cut)break;
if(x[i]==x[last]){
ys[i]=ys[last];
last=i;
}
}
i=max(last+1,i-1);
}while(last<n-1);
for(i=0;i<n;i++)
res[i] = y[i]-ys[i];
if(iter==nsteps)break ;
for(i=0;i<n;i++)
rw[i]=abs(res[i]);
sort(rw.begin(),rw.end());
m1 = n/2+1;
m2 = n-m1;
m1 --;
cmad = 3.0 *(rw[m1]+rw[m2]);
c9 = .999*cmad;
c1 = .001*cmad;
for(i=0;i<n;i++){
r = abs(res[i]);
if(r<=c1) rw[i]=1;
else if(r>c9) rw[i]=0;
else rw[i] = (1.0-(r/cmad)*(r/cmad))*(1.0-(r/cmad)*(r/cmad));
}
}
}//}}}
void lowest(const vector<double> &x, const vector<double> &y, double xs, double &ys, long nleft, long nright, vector<double> &w, bool userw, vector<double> &rw, bool &ok){//{{{
long n = (long)x.size();
long nrt, j;
double a, b, c, h, r, h1, h9, range;
range = x[n-1]-x[0];
h = max(xs-x[nleft],x[nright]-xs);
h9 = 0.999*h;
h1 = 0.001*h;
// sum of weights
a = 0;
for(j=nleft;j<n;j++){
// compute weights (pick up all ties on right)
w[j]=0.;
r = abs(x[j]-xs);
if(r<=h9){
// small enough for non-zero weight
if(r>h1) w[j] = (1.0-(r/h)*(r/h)*(r/h))*(1.0-(r/h)*(r/h)*(r/h))*(1.0-(r/h)*(r/h)*(r/h));
else w[j] = 1.;
if(userw) w[j] *= rw[j];
a += w[j];
}else if(x[j]>xs) break; // get out at first zero wt on right
}
nrt = j-1;
// rightmost pt (may be greater than nright because of ties)
if(a<=0.) ok = false;
else{
// weighted least squares
ok = true;
// normalize weights
for(j=nleft;j<=nrt;j++)
w[j] /= a;
if(h>0.){
// use linear fit
a = 0.;
for(j=nleft;j<=nrt;j++)
a += w[j]*x[j]; // weighted centre of values
b = xs-a;
c = 0;
for(j=nleft;j<=nrt;j++)
c += w[j]*(x[j]-a)*(x[j]-a);
if(sqrt(c)>0.001*range){
// points are spread enough to compute slope
b /= c;
for(j=nleft;j<=nrt;j++)
w[j] *= (1.0+b*(x[j]-a));
}
}
ys = 0;
for(j=nleft;j<=nrt;j++)
ys += w[j]*y[j];
}
}//}}}
/* {{{ Documentation
* wsc@research.bell-labs.com Mon Dec 30 16:55 EST 1985
* W. S. Cleveland
* Bell Laboratories
* Murray Hill NJ 07974
*
* outline of this file:
* lines 1-72 introduction
* 73-177 documentation for lowess
* 178-238 ratfor version of lowess
* 239-301 documentation for lowest
* 302-350 ratfor version of lowest
* 351-end test driver and fortran version of lowess and lowest
*
* a multivariate version is available by "send dloess from a"
*
* COMPUTER PROGRAMS FOR LOCALLY WEIGHTED REGRESSION
*
* This package consists of two FORTRAN programs for
* smoothing scatterplots by robust locally weighted
* regression, or lowess. The principal routine is LOWESS
* which computes the smoothed values using the method
* described in The Elements of Graphing Data, by William S.
* Cleveland (Wadsworth, 555 Morego Street, Monterey,
* California 93940).
*
* LOWESS calls a support routine, LOWEST, the code for
* which is included. LOWESS also calls a routine SORT, which
* the user must provide.
*
* To reduce the computations, LOWESS requires that the
* arrays X and Y, which are the horizontal and vertical
* coordinates, respectively, of the scatterplot, be such that
* X is sorted from smallest to largest. The user must
* therefore use another sort routine which will sort X and Y
* according to X.
* To summarize the scatterplot, YS, the fitted values,
* should be plotted against X. No graphics routines are
* available in the package and must be supplied by the user.
*
* The FORTRAN code for the routines LOWESS and LOWEST has
* been generated from higher level RATFOR programs
* (B. W. Kernighan, ``RATFOR: A Preprocessor for a Rational
* Fortran,'' Software Practice and Experience, Vol. 5 (1975),
* which are also included.
*
* The following are data and output from LOWESS that can
* be used to check your implementation of the routines. The
* notation (10)v means 10 values of v.
*
*
*
*
* X values:
* 1 2 3 4 5 (10)6 8 10 12 14 50
*
* Y values:
* 18 2 15 6 10 4 16 11 7 3 14 17 20 12 9 13 1 8 5 19
*
*
* YS values with F = .25, NSTEPS = 0, DELTA = 0.0
* 13.659 11.145 8.701 9.722 10.000 (10)11.300 13.000 6.440 5.596
* 5.456 18.998
*
* YS values with F = .25, NSTEPS = 0 , DELTA = 3.0
* 13.659 12.347 11.034 9.722 10.511 (10)11.300 13.000 6.440 5.596
* 5.456 18.998
*
* YS values with F = .25, NSTEPS = 2, DELTA = 0.0
* 14.811 12.115 8.984 9.676 10.000 (10)11.346 13.000 6.734 5.744
* 5.415 18.998
*
*
*
*
* LOWESS
*
*
*
* Calling sequence
*
* CALL LOWESS(X,Y,N,F,NSTEPS,DELTA,YS,RW,RES)
*
* Purpose
*
* LOWESS computes the smooth of a scatterplot of Y against X
* using robust locally weighted regression. Fitted values,
* YS, are computed at each of the values of the horizontal
* axis in X.
*
* Argument description
*
* X = Input; abscissas of the points on the
* scatterplot; the values in X must be ordered
* from smallest to largest.
* Y = Input; ordinates of the points on the
* scatterplot.
* N = Input; dimension of X,Y,YS,RW, and RES.
* F = Input; specifies the amount of smoothing; F is
* the fraction of points used to compute each
* fitted value; as F increases the smoothed values
* become smoother; choosing F in the range .2 to
* .8 usually results in a good fit; if you have no
* idea which value to use, try F = .5.
* NSTEPS = Input; the number of iterations in the robust
* fit; if NSTEPS = 0, the nonrobust fit is
* returned; setting NSTEPS equal to 2 should serve
* most purposes.
* DELTA = input; nonnegative parameter which may be used
* to save computations; if N is less than 100, set
* DELTA equal to 0.0; if N is greater than 100 you
* should find out how DELTA works by reading the
* additional instructions section.
* YS = Output; fitted values; YS(I) is the fitted value
* at X(I); to summarize the scatterplot, YS(I)
* should be plotted against X(I).
* RW = Output; robustness weights; RW(I) is the weight
* given to the point (X(I),Y(I)); if NSTEPS = 0,
* RW is not used.
* RES = Output; residuals; RES(I) = Y(I)-YS(I).
*
*
* Other programs called
*
* LOWEST
* SSORT
*
* Additional instructions
*
* DELTA can be used to save computations. Very roughly the
* algorithm is this: on the initial fit and on each of the
* NSTEPS iterations locally weighted regression fitted values
* are computed at points in X which are spaced, roughly, DELTA
* apart; then the fitted values at the remaining points are
* computed using linear interpolation. The first locally
* weighted regression (l.w.r.) computation is carried out at
* X(1) and the last is carried out at X(N). Suppose the
* l.w.r. computation is carried out at X(I). If X(I+1) is
* greater than or equal to X(I)+DELTA, the next l.w.r.
* computation is carried out at X(I+1). If X(I+1) is less
* than X(I)+DELTA, the next l.w.r. computation is carried out
* at the largest X(J) which is greater than or equal to X(I)
* but is not greater than X(I)+DELTA. Then the fitted values
* for X(K) between X(I) and X(J), if there are any, are
* computed by linear interpolation of the fitted values at
* X(I) and X(J). If N is less than 100 then DELTA can be set
* to 0.0 since the computation time will not be too great.
* For larger N it is typically not necessary to carry out the
* l.w.r. computation for all points, so that much computation
* time can be saved by taking DELTA to be greater than 0.0.
* If DELTA = Range (X)/k then, if the values in X were
* uniformly scattered over the range, the full l.w.r.
* computation would be carried out at approximately k points.
* Taking k to be 50 often works well.
*
* Method
*
* The fitted values are computed by using the nearest neighbor
* routine and robust locally weighted regression of degree 1
* with the tricube weight function. A few additional features
* have been added. Suppose r is FN truncated to an integer.
* Let h be the distance to the r-th nearest neighbor
* from X(I). All points within h of X(I) are used. Thus if
* the r-th nearest neighbor is exactly the same distance as
* other points, more than r points can possibly be used for
* the smooth at X(I). There are two cases where robust
* locally weighted regression of degree 0 is actually used at
* X(I). One case occurs when h is 0.0. The second case
* occurs when the weighted standard error of the X(I) with
* respect to the weights w(j) is less than .001 times the
* range of the X(I), where w(j) is the weight assigned to the
* j-th point of X (the tricube weight times the robustness
* weight) divided by the sum of all of the weights. Finally,
* if the w(j) are all zero for the smooth at X(I), the fitted
* value is taken to be Y(I).
*
*
*
*
* subroutine lowess(x,y,n,f,nsteps,delta,ys,rw,res)
* real x(n),y(n),ys(n),rw(n),res(n)
* logical ok
* if (n<2){ ys(1) = y(1); return }
* ns = max0(min0(ifix(f*float(n)),n),2) # at least two, at most n points
* for(iter=1; iter<=nsteps+1; iter=iter+1){ # robustness iterations
* nleft = 1; nright = ns
* last = 0 # index of prev estimated point
* i = 1 # index of current point
* repeat{
* while(nright<n){
* # move nleft, nright to right if radius decreases
* d1 = x(i)-x(nleft)
* d2 = x(nright+1)-x(i)
* # if d1<=d2 with x(nright+1)==x(nright), lowest fixes
* if (d1<=d2) break
* # radius will not decrease by move right
* nleft = nleft+1
* nright = nright+1
* }
* call lowest(x,y,n,x(i),ys(i),nleft,nright,res,iter>1,rw,ok)
* # fitted value at x(i)
* if (!ok) ys(i) = y(i)
* # all weights zero - copy over value (all rw==0)
* if (last<i-1) { # skipped points -- interpolate
* denom = x(i)-x(last) # non-zero - proof?
* for(j=last+1; j<i; j=j+1){
* alpha = (x(j)-x(last))/denom
* ys(j) = alpha*ys(i)+(1.0-alpha)*ys(last)
* }
* }
* last = i # last point actually estimated
* cut = x(last)+delta # x coord of close points
* for(i=last+1; i<=n; i=i+1){ # find close points
* if (x(i)>cut) break # i one beyond last pt within cut
* if(x(i)==x(last)){ # exact match in x
* ys(i) = ys(last)
* last = i
* }
* }
* i=max0(last+1,i-1)
* # back 1 point so interpolation within delta, but always go forward
* } until(last>=n)
* do i = 1,n # residuals
* res(i) = y(i)-ys(i)
* if (iter>nsteps) break # compute robustness weights except last time
* do i = 1,n
* rw(i) = abs(res(i))
* call sort(rw,n)
* m1 = 1+n/2; m2 = n-m1+1
* cmad = 3.0*(rw(m1)+rw(m2)) # 6 median abs resid
* c9 = .999*cmad; c1 = .001*cmad
* do i = 1,n {
* r = abs(res(i))
* if(r<=c1) rw(i)=1. # near 0, avoid underflow
* else if(r>c9) rw(i)=0. # near 1, avoid underflow
* else rw(i) = (1.0-(r/cmad)**2)**2
* }
* }
* return
* end
*
*
*
*
* LOWEST
*
*
*
* Calling sequence
*
* CALL LOWEST(X,Y,N,XS,YS,NLEFT,NRIGHT,W,USERW,RW,OK)
*
* Purpose
*
* LOWEST is a support routine for LOWESS and ordinarily will
* not be called by the user. The fitted value, YS, is
* computed at the value, XS, of the horizontal axis.
* Robustness weights, RW, can be employed in computing the
* fit.
*
* Argument description
*
*
* X = Input; abscissas of the points on the
* scatterplot; the values in X must be ordered
* from smallest to largest.
* Y = Input; ordinates of the points on the
* scatterplot.
* N = Input; dimension of X,Y,W, and RW.
* XS = Input; value of the horizontal axis at which the
* smooth is computed.
* YS = Output; fitted value at XS.
* NLEFT = Input; index of the first point which should be
* considered in computing the fitted value.
* NRIGHT = Input; index of the last point which should be
* considered in computing the fitted value.
* W = Output; W(I) is the weight for Y(I) used in the
* expression for YS, which is the sum from
* I = NLEFT to NRIGHT of W(I)*Y(I); W(I) is
* defined only at locations NLEFT to NRIGHT.
* USERW = Input; logical variable; if USERW is .TRUE., a
* robust fit is carried out using the weights in
* RW; if USERW is .FALSE., the values in RW are
* not used.
* RW = Input; robustness weights.
* OK = Output; logical variable; if the weights for the
* smooth are all 0.0, the fitted value, YS, is not
* computed and OK is set equal to .FALSE.; if the
* fitted value is computed OK is set equal to
*
*
* Method
*
* The smooth at XS is computed using (robust) locally weighted
* regression of degree 1. The tricube weight function is used
* with h equal to the maximum of XS-X(NLEFT) and X(NRIGHT)-XS.
* Two cases where the program reverts to locally weighted
* regression of degree 0 are described in the documentation
* for LOWESS.
*
*
*
*
* subroutine lowest(x,y,n,xs,ys,nleft,nright,w,userw,rw,ok)
* real x(n),y(n),w(n),rw(n)
* logical userw,ok
* range = x(n)-x(1)
* h = amax1(xs-x(nleft),x(nright)-xs)
* h9 = .999*h
* h1 = .001*h
* a = 0.0 # sum of weights
* for(j=nleft; j<=n; j=j+1){ # compute weights (pick up all ties on right)
* w(j)=0.
* r = abs(x(j)-xs)
* if (r<=h9) { # small enough for non-zero weight
* if (r>h1) w(j) = (1.0-(r/h)**3)**3
* else w(j) = 1.
* if (userw) w(j) = rw(j)*w(j)
* a = a+w(j)
* }
* else if(x(j)>xs)break # get out at first zero wt on right
* }
* nrt=j-1 # rightmost pt (may be greater than nright because of ties)
* if (a<=0.0) ok = FALSE
* else { # weighted least squares
* ok = TRUE
* do j = nleft,nrt
* w(j) = w(j)/a # make sum of w(j) == 1
* if (h>0.) { # use linear fit
* a = 0.0
* do j = nleft,nrt
* a = a+w(j)*x(j) # weighted center of x values
* b = xs-a
* c = 0.0
* do j = nleft,nrt
* c = c+w(j)*(x(j)-a)**2
* if(sqrt(c)>.001*range) {
* # points are spread out enough to compute slope
* b = b/c
* do j = nleft,nrt
* w(j) = w(j)*(1.0+b*(x(j)-a))
* }
* }
* ys = 0.0
* do j = nleft,nrt
* ys = ys+w(j)*y(j)
* }
* return
* end
*
}}}*/
|