File: Fitting-Examples.html

package info (click to toggle)
gsl-ref-html 1.16-1
links: PTS
area: non-free
in suites: jessie, jessie-kfreebsd, stretch
size: 5,816 kB
ctags: 4,130
sloc: makefile: 35
file content (454 lines) | stat: -rw-r--r-- 14,076 bytes
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<!-- Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 The GSL Team.

Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.3 or
any later version published by the Free Software Foundation; with no
Invariant Sections and no cover texts.  A copy of the license is
included in the section entitled "GNU Free Documentation License". -->
<!-- Created by GNU Texinfo 5.1, http://www.gnu.org/software/texinfo/ -->
<head>
<title>GNU Scientific Library &ndash; Reference Manual: Fitting Examples</title>

<meta name="description" content="GNU Scientific Library &ndash; Reference Manual: Fitting Examples">
<meta name="keywords" content="GNU Scientific Library &ndash; Reference Manual: Fitting Examples">
<meta name="resource-type" content="document">
<meta name="distribution" content="global">
<meta name="Generator" content="makeinfo">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<link href="index.html#Top" rel="start" title="Top">
<link href="Function-Index.html#Function-Index" rel="index" title="Function Index">
<link href="Least_002dSquares-Fitting.html#Least_002dSquares-Fitting" rel="up" title="Least-Squares Fitting">
<link href="Fitting-References-and-Further-Reading.html#Fitting-References-and-Further-Reading" rel="next" title="Fitting References and Further Reading">
<link href="Troubleshooting.html#Troubleshooting" rel="previous" title="Troubleshooting">
<style type="text/css">
<!--
a.summary-letter {text-decoration: none}
blockquote.smallquotation {font-size: smaller}
div.display {margin-left: 3.2em}
div.example {margin-left: 3.2em}
div.indentedblock {margin-left: 3.2em}
div.lisp {margin-left: 3.2em}
div.smalldisplay {margin-left: 3.2em}
div.smallexample {margin-left: 3.2em}
div.smallindentedblock {margin-left: 3.2em; font-size: smaller}
div.smalllisp {margin-left: 3.2em}
kbd {font-style:oblique}
pre.display {font-family: inherit}
pre.format {font-family: inherit}
pre.menu-comment {font-family: serif}
pre.menu-preformatted {font-family: serif}
pre.smalldisplay {font-family: inherit; font-size: smaller}
pre.smallexample {font-size: smaller}
pre.smallformat {font-family: inherit; font-size: smaller}
pre.smalllisp {font-size: smaller}
span.nocodebreak {white-space:nowrap}
span.nolinebreak {white-space:nowrap}
span.roman {font-family:serif; font-weight:normal}
span.sansserif {font-family:sans-serif; font-weight:normal}
ul.no-bullet {list-style: none}
-->
</style>


</head>

<body lang="en" bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#800080" alink="#FF0000">
<a name="Fitting-Examples"></a>
<div class="header">
<p>
Next: <a href="Fitting-References-and-Further-Reading.html#Fitting-References-and-Further-Reading" accesskey="n" rel="next">Fitting References and Further Reading</a>, Previous: <a href="Troubleshooting.html#Troubleshooting" accesskey="p" rel="previous">Troubleshooting</a>, Up: <a href="Least_002dSquares-Fitting.html#Least_002dSquares-Fitting" accesskey="u" rel="up">Least-Squares Fitting</a> &nbsp; [<a href="Function-Index.html#Function-Index" title="Index" rel="index">Index</a>]</p>
</div>
<hr>
<a name="Examples-28"></a>
<h3 class="section">37.7 Examples</h3>

<p>The following program computes a least squares straight-line fit to a
simple dataset, and outputs the best-fit line and its
associated one standard-deviation error bars.
</p>
<div class="example">
<pre class="verbatim">#include &lt;stdio.h&gt;
#include &lt;gsl/gsl_fit.h&gt;

int
main (void)
{
  int i, n = 4;
  double x[4] = { 1970, 1980, 1990, 2000 };
  double y[4] = {   12,   11,   14,   13 };
  double w[4] = {  0.1,  0.2,  0.3,  0.4 };

  double c0, c1, cov00, cov01, cov11, chisq;

  gsl_fit_wlinear (x, 1, w, 1, y, 1, n, 
                   &amp;c0, &amp;c1, &amp;cov00, &amp;cov01, &amp;cov11, 
                   &amp;chisq);

  printf (&quot;# best fit: Y = %g + %g X\n&quot;, c0, c1);
  printf (&quot;# covariance matrix:\n&quot;);
  printf (&quot;# [ %g, %g\n#   %g, %g]\n&quot;, 
          cov00, cov01, cov01, cov11);
  printf (&quot;# chisq = %g\n&quot;, chisq);

  for (i = 0; i &lt; n; i++)
    printf (&quot;data: %g %g %g\n&quot;, 
                   x[i], y[i], 1/sqrt(w[i]));

  printf (&quot;\n&quot;);

  for (i = -30; i &lt; 130; i++)
    {
      double xf = x[0] + (i/100.0) * (x[n-1] - x[0]);
      double yf, yf_err;

      gsl_fit_linear_est (xf, 
                          c0, c1, 
                          cov00, cov01, cov11, 
                          &amp;yf, &amp;yf_err);

      printf (&quot;fit: %g %g\n&quot;, xf, yf);
      printf (&quot;hi : %g %g\n&quot;, xf, yf + yf_err);
      printf (&quot;lo : %g %g\n&quot;, xf, yf - yf_err);
    }
  return 0;
}
</pre></div>

<p>The following commands extract the data from the output of the program
and display it using the <small>GNU</small> plotutils <code>graph</code> utility, 
</p>
<div class="example">
<pre class="example">$ ./demo &gt; tmp
$ more tmp
# best fit: Y = -106.6 + 0.06 X
# covariance matrix:
# [ 39602, -19.9
#   -19.9, 0.01]
# chisq = 0.8

$ for n in data fit hi lo ; 
   do 
     grep &quot;^$n&quot; tmp | cut -d: -f2 &gt; $n ; 
   done
$ graph -T X -X x -Y y -y 0 20 -m 0 -S 2 -Ie data 
     -S 0 -I a -m 1 fit -m 2 hi -m 2 lo
</pre></div>


<p>The next program performs a quadratic fit <em>y = c_0 + c_1 x + c_2
x^2</em> to a weighted dataset using the generalised linear fitting function
<code>gsl_multifit_wlinear</code>.  The model matrix <em>X</em> for a quadratic
fit is given by,
where the column of ones corresponds to the constant term <em>c_0</em>.
The two remaining columns corresponds to the terms <em>c_1 x</em> and
<em>c_2 x^2</em>.
</p>
<p>The program reads <var>n</var> lines of data in the format (<var>x</var>, <var>y</var>,
<var>err</var>) where <var>err</var> is the error (standard deviation) in the
value <var>y</var>.
</p>
<div class="example">
<pre class="verbatim">#include &lt;stdio.h&gt;
#include &lt;gsl/gsl_multifit.h&gt;

int
main (int argc, char **argv)
{
  int i, n;
  double xi, yi, ei, chisq;
  gsl_matrix *X, *cov;
  gsl_vector *y, *w, *c;

  if (argc != 2)
    {
      fprintf (stderr,&quot;usage: fit n &lt; data\n&quot;);
      exit (-1);
    }

  n = atoi (argv[1]);

  X = gsl_matrix_alloc (n, 3);
  y = gsl_vector_alloc (n);
  w = gsl_vector_alloc (n);

  c = gsl_vector_alloc (3);
  cov = gsl_matrix_alloc (3, 3);

  for (i = 0; i &lt; n; i++)
    {
      int count = fscanf (stdin, &quot;%lg %lg %lg&quot;,
                          &amp;xi, &amp;yi, &amp;ei);

      if (count != 3)
        {
          fprintf (stderr, &quot;error reading file\n&quot;);
          exit (-1);
        }

      printf (&quot;%g %g +/- %g\n&quot;, xi, yi, ei);
      
      gsl_matrix_set (X, i, 0, 1.0);
      gsl_matrix_set (X, i, 1, xi);
      gsl_matrix_set (X, i, 2, xi*xi);
      
      gsl_vector_set (y, i, yi);
      gsl_vector_set (w, i, 1.0/(ei*ei));
    }

  {
    gsl_multifit_linear_workspace * work 
      = gsl_multifit_linear_alloc (n, 3);
    gsl_multifit_wlinear (X, w, y, c, cov,
                          &amp;chisq, work);
    gsl_multifit_linear_free (work);
  }

#define C(i) (gsl_vector_get(c,(i)))
#define COV(i,j) (gsl_matrix_get(cov,(i),(j)))

  {
    printf (&quot;# best fit: Y = %g + %g X + %g X^2\n&quot;, 
            C(0), C(1), C(2));

    printf (&quot;# covariance matrix:\n&quot;);
    printf (&quot;[ %+.5e, %+.5e, %+.5e  \n&quot;,
               COV(0,0), COV(0,1), COV(0,2));
    printf (&quot;  %+.5e, %+.5e, %+.5e  \n&quot;, 
               COV(1,0), COV(1,1), COV(1,2));
    printf (&quot;  %+.5e, %+.5e, %+.5e ]\n&quot;, 
               COV(2,0), COV(2,1), COV(2,2));
    printf (&quot;# chisq = %g\n&quot;, chisq);
  }

  gsl_matrix_free (X);
  gsl_vector_free (y);
  gsl_vector_free (w);
  gsl_vector_free (c);
  gsl_matrix_free (cov);

  return 0;
}
</pre></div>

<p>A suitable set of data for fitting can be generated using the following
program.  It outputs a set of points with gaussian errors from the curve
<em>y = e^x</em> in the region <em>0 &lt; x &lt; 2</em>.
</p>
<div class="example">
<pre class="verbatim">#include &lt;stdio.h&gt;
#include &lt;math.h&gt;
#include &lt;gsl/gsl_randist.h&gt;

int
main (void)
{
  double x;
  const gsl_rng_type * T;
  gsl_rng * r;
  
  gsl_rng_env_setup ();
  
  T = gsl_rng_default;
  r = gsl_rng_alloc (T);

  for (x = 0.1; x &lt; 2; x+= 0.1)
    {
      double y0 = exp (x);
      double sigma = 0.1 * y0;
      double dy = gsl_ran_gaussian (r, sigma);

      printf (&quot;%g %g %g\n&quot;, x, y0 + dy, sigma);
    }

  gsl_rng_free(r);

  return 0;
}
</pre></div>

<p>The data can be prepared by running the resulting executable program,
</p>
<div class="example">
<pre class="example">$ GSL_RNG_TYPE=mt19937_1999 ./generate &gt; exp.dat
$ more exp.dat
0.1 0.97935 0.110517
0.2 1.3359 0.12214
0.3 1.52573 0.134986
0.4 1.60318 0.149182
0.5 1.81731 0.164872
0.6 1.92475 0.182212
....
</pre></div>

<p>To fit the data use the previous program, with the number of data points
given as the first argument.  In this case there are 19 data points.
</p>
<div class="example">
<pre class="example">$ ./fit 19 &lt; exp.dat
0.1 0.97935 +/- 0.110517
0.2 1.3359 +/- 0.12214
...
# best fit: Y = 1.02318 + 0.956201 X + 0.876796 X^2
# covariance matrix:
[ +1.25612e-02, -3.64387e-02, +1.94389e-02  
  -3.64387e-02, +1.42339e-01, -8.48761e-02  
  +1.94389e-02, -8.48761e-02, +5.60243e-02 ]
# chisq = 23.0987
</pre></div>

<p>The parameters of the quadratic fit match the coefficients of the
expansion of <em>e^x</em>, taking into account the errors on the
parameters and the <em>O(x^3)</em> difference between the exponential and
quadratic functions for the larger values of <em>x</em>.  The errors on
the parameters are given by the square-root of the corresponding
diagonal elements of the covariance matrix.  The chi-squared per degree
of freedom is 1.4, indicating a reasonable fit to the data.
</p>

<p>The next program demonstrates the advantage of robust least squares on
a dataset with outliers. The program generates linear <em>(x,y)</em>
data pairs on the line <em>y = 1.45 x + 3.88</em>, adds some random
noise, and inserts 3 outliers into the dataset. Both the robust
and ordinary least squares (OLS) coefficients are computed for
comparison.
</p>
<div class="example">
<pre class="verbatim">#include &lt;stdio.h&gt;
#include &lt;gsl/gsl_multifit.h&gt;
#include &lt;gsl/gsl_randist.h&gt;

int
dofit(const gsl_multifit_robust_type *T,
      const gsl_matrix *X, const gsl_vector *y,
      gsl_vector *c, gsl_matrix *cov)
{
  int s;
  gsl_multifit_robust_workspace * work 
    = gsl_multifit_robust_alloc (T, X-&gt;size1, X-&gt;size2);

  s = gsl_multifit_robust (X, y, c, cov, work);
  gsl_multifit_robust_free (work);

  return s;
}

int
main (int argc, char **argv)
{
  int i;
  size_t n;
  const size_t p = 2; /* linear fit */
  gsl_matrix *X, *cov;
  gsl_vector *x, *y, *c, *c_ols;
  const double a = 1.45; /* slope */
  const double b = 3.88; /* intercept */
  gsl_rng *r;

  if (argc != 2)
    {
      fprintf (stderr,&quot;usage: robfit n\n&quot;);
      exit (-1);
    }

  n = atoi (argv[1]);

  X = gsl_matrix_alloc (n, p);
  x = gsl_vector_alloc (n);
  y = gsl_vector_alloc (n);

  c = gsl_vector_alloc (p);
  c_ols = gsl_vector_alloc (p);
  cov = gsl_matrix_alloc (p, p);

  r = gsl_rng_alloc(gsl_rng_default);

  /* generate linear dataset */
  for (i = 0; i &lt; n - 3; i++)
    {
      double dx = 10.0 / (n - 1.0);
      double ei = gsl_rng_uniform(r);
      double xi = -5.0 + i * dx;
      double yi = a * xi + b;

      gsl_vector_set (x, i, xi);
      gsl_vector_set (y, i, yi + ei);
    }

  /* add a few outliers */
  gsl_vector_set(x, n - 3, 4.7);
  gsl_vector_set(y, n - 3, -8.3);

  gsl_vector_set(x, n - 2, 3.5);
  gsl_vector_set(y, n - 2, -6.7);

  gsl_vector_set(x, n - 1, 4.1);
  gsl_vector_set(y, n - 1, -6.0);

  /* construct design matrix X for linear fit */
  for (i = 0; i &lt; n; ++i)
    {
      double xi = gsl_vector_get(x, i);

      gsl_matrix_set (X, i, 0, 1.0);
      gsl_matrix_set (X, i, 1, xi);
    }

  /* perform robust and OLS fit */
  dofit(gsl_multifit_robust_ols, X, y, c_ols, cov);
  dofit(gsl_multifit_robust_bisquare, X, y, c, cov);

  /* output data and model */
  for (i = 0; i &lt; n; ++i)
    {
      double xi = gsl_vector_get(x, i);
      double yi = gsl_vector_get(y, i);
      gsl_vector_view v = gsl_matrix_row(X, i);
      double y_ols, y_rob, y_err;

      gsl_multifit_robust_est(&amp;v.vector, c, cov, &amp;y_rob, &amp;y_err);
      gsl_multifit_robust_est(&amp;v.vector, c_ols, cov, &amp;y_ols, &amp;y_err);

      printf(&quot;%g %g %g %g\n&quot;, xi, yi, y_rob, y_ols);
    }

#define C(i) (gsl_vector_get(c,(i)))
#define COV(i,j) (gsl_matrix_get(cov,(i),(j)))

  {
    printf (&quot;# best fit: Y = %g + %g X\n&quot;, 
            C(0), C(1));

    printf (&quot;# covariance matrix:\n&quot;);
    printf (&quot;# [ %+.5e, %+.5e\n&quot;,
               COV(0,0), COV(0,1));
    printf (&quot;#   %+.5e, %+.5e\n&quot;, 
               COV(1,0), COV(1,1));
  }

  gsl_matrix_free (X);
  gsl_vector_free (x);
  gsl_vector_free (y);
  gsl_vector_free (c);
  gsl_vector_free (c_ols);
  gsl_matrix_free (cov);
  gsl_rng_free(r);

  return 0;
}
</pre></div>

<p>The output from the program is shown in the following plot.
</p>

<hr>
<div class="header">
<p>
Next: <a href="Fitting-References-and-Further-Reading.html#Fitting-References-and-Further-Reading" accesskey="n" rel="next">Fitting References and Further Reading</a>, Previous: <a href="Troubleshooting.html#Troubleshooting" accesskey="p" rel="previous">Troubleshooting</a>, Up: <a href="Least_002dSquares-Fitting.html#Least_002dSquares-Fitting" accesskey="u" rel="up">Least-Squares Fitting</a> &nbsp; [<a href="Function-Index.html#Function-Index" title="Index" rel="index">Index</a>]</p>
</div>



</body>
</html>