File: kernel_example.m

package info (click to toggle)
octave-econometrics 1%3A1.0.8-6
links: PTS, VCS
area: main
in suites: wheezy
size: 476 kB
sloc: makefile: 69; cpp: 32; sh: 4
file content (140 lines) | stat: -rw-r--r-- 5,272 bytes
parent folder | download | duplicates (2)
# Copyright (C) 2007 Michael Creel <michael.creel@uab.es>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; If not, see <http://www.gnu.org/licenses/>.

# kernel_example: examples of how to use kernel density and regression functions
# requires the optim and plot packages from Octave Forge
#
# usage: kernel_example;

# sample size (default n = 500) - should get better fit (on average)
# as this increases, supposing that you allow the optimal window width
# to be found, by uncommenting the relevant lines
n = 500;

# set this to greater than 0 to try parallel computations (requires MPITB)
compute_nodes = 0;
nodes = compute_nodes + 1; # count master node

close all;
hold off;

############################################################
# kernel regression example
# uniformly spaced data points on [0,2]
x = 1:n;
x = x';
x = 2*x/n;
# generate dependent variable
trueline =  x + (x.^2)/2 - 3.1*(x.^3)/3 + 1.2*(x.^4)/4;
sig = 0.5;
y = trueline + sig*randn(n,1);
tic;
fit = kernel_regression(x, y, x); # use the default bandwidth
t1 = toc;
printf("\n");
printf("########################################################################\n");
printf("time for kernel regression example using %d data points and %d compute nodes: %f\n", n, nodes, t1);
plot(x, fit, ";fit;", x, trueline,";true;");
grid("on");
title("Example 1: Kernel regression fit");

############################################################
# kernel density example: univariate - fit to Chi^2(3) data
data = sumsq(randn(n,3),2);
# evaluation point are on a grid for plotting
stepsize = 0.2;
grid_x = (-1:stepsize:11)';
bandwidth = 0.55;
# get optimal bandwidth (time consuming, uncomment if you want to try it)
# bandwidth = kernel_optimal_bandwidth(data);
# get the fitted density and do a plot
tic;
dens = kernel_density(grid_x, data, bandwidth, "__kernel_normal", false, false, compute_nodes);
t1 = toc;
printf("\n");
printf("########################################################################\n");
printf("time for univariate kernel density example using %d data points and %d compute nodes: %f\n", n, nodes, t1);
printf("A rough integration under the fitted univariate density is %f\n", sum(dens)*stepsize);
figure();
plot(grid_x, dens, ";fitted density;", grid_x, chi2pdf(grid_x,3), ";true density;");
title("Example 2: Kernel density fit: Univariate Chi^2(3) data");

############################################################
# kernel density example: bivariate
# X ~ N(0,1)
# Y ~ Chi squared(3)
# X, Y are dependent
d = randn(n,3);
data = [d(:,1) sumsq(d,2)];
# evaluation points are on a grid for plotting
stepsize = 0.2;
a = (-5:stepsize:5)'; # for the N(0,1)
b = (-1:stepsize:9)';  # for the Chi squared(3)
gridsize = rows(a);
[grid_x, grid_y] = meshgrid(a, b);
eval_points = [vec(grid_x) vec(grid_y)];
bandwidth = 0.85;
# get optimal bandwidth (time consuming, uncomment if you want to try it)
# bandwidth = kernel_optimal_bandwidth(data);
# get the fitted density and do a plot
tic;
dens = kernel_density(eval_points, data, bandwidth, "__kernel_normal", false, false, compute_nodes);
t1 = toc;
printf("\n");
printf("########################################################################\n");
printf("time for multivariate kernel density example using %d data points and %d compute nodes: %f\n", n, nodes, t1);
dens = reshape(dens, gridsize, gridsize);
printf("A rough integration under the fitted bivariate density is %f\n", sum(sum(dens))*stepsize^2);
figure();
surf(grid_x, grid_y, dens);
title("Example 3: Kernel density fit: dependent bivariate data");
xlabel("true marginal density is N(0,1)");
ylabel("true marginal density is Chi^2(3)");
# more extensive test of parallel
if compute_nodes > 0 # only try this if parallel is available
	ns =[4000; 8000; 10000; 12000; 16000; 20000];
	printf("\n");
	printf("########################################################################\n");
	printf("kernel regression example with several sample sizes serial/parallel timings\n");
	figure();
	clg;
	title("Compute time versus nodes, kernel regression with different sample sizes");
	xlabel("nodes");
	ylabel("time (sec)");
	hold on;
	ts = zeros(rows(ns),4);
	for i = 1:rows(ns)
		for compute_nodes = 0:3
			nodes = compute_nodes + 1;
			n = ns(i,:);
			x = 1:n;
			x = x';
			x = 2*x/n;
			# generate dependent variable
			trueline =  x + (x.^2)/2 - 3.1*(x.^3)/3 + 1.2*(x.^4)/4;
			sig = 0.5;
			y = trueline + sig*randn(n,1);
			bandwidth = 0.45;
			tic;
			fit = kernel_regression(x, y, x, bandwidth, "__kernel_normal", false, false, compute_nodes);
			t1 = toc;
			ts(i, nodes) = t1;
			plot(nodes, t1, "*");
			printf(" %d data points and %d compute nodes: %f\n", n, nodes, t1);
		endfor
		plot(ts(i,:)');
	endfor
	hold off;
endif