1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
|
#!/usr/bin/env python
# coding: utf-8
# DO NOT EDIT
# Autogenerated from the notebook gls.ipynb.
# Edit the notebook and then sync the output with this file.
#
# flake8: noqa
# DO NOT EDIT
# # Generalized Least Squares
import numpy as np
import statsmodels.api as sm
# The Longley dataset is a time series dataset:
data = sm.datasets.longley.load()
data.exog = sm.add_constant(data.exog)
print(data.exog.head())
#
# Let's assume that the data is heteroskedastic and that we know
# the nature of the heteroskedasticity. We can then define
# `sigma` and use it to give us a GLS model
#
# First we will obtain the residuals from an OLS fit
ols_resid = sm.OLS(data.endog, data.exog).fit().resid
# Assume that the error terms follow an AR(1) process with a trend:
#
# $\epsilon_i = \beta_0 + \rho\epsilon_{i-1} + \eta_i$
#
# where $\eta \sim N(0,\Sigma^2)$
#
# and that $\rho$ is simply the correlation of the residual a consistent
# estimator for rho is to regress the residuals on the lagged residuals
resid_fit = sm.OLS(
np.asarray(ols_resid)[1:],
sm.add_constant(np.asarray(ols_resid)[:-1])).fit()
print(resid_fit.tvalues[1])
print(resid_fit.pvalues[1])
# While we do not have strong evidence that the errors follow an AR(1)
# process we continue
rho = resid_fit.params[1]
# As we know, an AR(1) process means that near-neighbors have a stronger
# relation so we can give this structure by using a toeplitz matrix
from scipy.linalg import toeplitz
toeplitz(range(5))
order = toeplitz(range(len(ols_resid)))
# so that our error covariance structure is actually rho**order
# which defines an autocorrelation structure
sigma = rho**order
gls_model = sm.GLS(data.endog, data.exog, sigma=sigma)
gls_results = gls_model.fit()
# Of course, the exact rho in this instance is not known so it it might
# make more sense to use feasible gls, which currently only has experimental
# support.
#
# We can use the GLSAR model with one lag, to get to a similar result:
glsar_model = sm.GLSAR(data.endog, data.exog, 1)
glsar_results = glsar_model.iterative_fit(1)
print(glsar_results.summary())
# Comparing gls and glsar results, we see that there are some small
# differences in the parameter estimates and the resulting standard
# errors of the parameter estimate. This might be do to the numerical
# differences in the algorithm, e.g. the treatment of initial conditions,
# because of the small number of observations in the longley dataset.
print(gls_results.params)
print(glsar_results.params)
print(gls_results.bse)
print(glsar_results.bse)
|