File: wls.py

package info (click to toggle)
statsmodels 0.14.6%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 49,956 kB
  • sloc: python: 254,365; f90: 612; sh: 560; javascript: 337; asm: 156; makefile: 145; ansic: 32; xml: 9
file content (121 lines) | stat: -rw-r--r-- 3,218 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python
# coding: utf-8

# DO NOT EDIT
# Autogenerated from the notebook wls.ipynb.
# Edit the notebook and then sync the output with this file.
#
# flake8: noqa
# DO NOT EDIT

# # Weighted Least Squares

import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
from scipy import stats
from statsmodels.iolib.table import SimpleTable, default_txt_fmt

np.random.seed(1024)

# ## WLS Estimation
#
# ### Artificial data: Heteroscedasticity 2 groups
#
# Model assumptions:
#
#  * Misspecification: true model is quadratic, estimate only linear
#  * Independent noise/error term
#  * Two groups for error variance, low and high variance groups

nsample = 50
x = np.linspace(0, 20, nsample)
X = np.column_stack((x, (x - 5)**2))
X = sm.add_constant(X)
beta = [5.0, 0.5, -0.01]
sig = 0.5
w = np.ones(nsample)
w[nsample * 6 // 10:] = 3
y_true = np.dot(X, beta)
e = np.random.normal(size=nsample)
y = y_true + sig * w * e
X = X[:, [0, 1]]

# ### WLS knowing the true variance ratio of heteroscedasticity
#
# In this example, `w` is the standard deviation of the error.  `WLS`
# requires that the weights are proportional to the inverse of the error
# variance.

mod_wls = sm.WLS(y, X, weights=1.0 / (w**2))
res_wls = mod_wls.fit()
print(res_wls.summary())

# ## OLS vs. WLS
#
# Estimate an OLS model for comparison:

res_ols = sm.OLS(y, X).fit()
print(res_ols.params)
print(res_wls.params)

# Compare the WLS standard errors to  heteroscedasticity corrected OLS
# standard errors:

se = np.vstack([
    [res_wls.bse],
    [res_ols.bse],
    [res_ols.HC0_se],
    [res_ols.HC1_se],
    [res_ols.HC2_se],
    [res_ols.HC3_se],
])
se = np.round(se, 4)
colnames = ["x1", "const"]
rownames = ["WLS", "OLS", "OLS_HC0", "OLS_HC1", "OLS_HC3", "OLS_HC3"]
tabl = SimpleTable(se, colnames, rownames, txt_fmt=default_txt_fmt)
print(tabl)

# Calculate OLS prediction interval:

covb = res_ols.cov_params()
prediction_var = res_ols.mse_resid + (X * np.dot(covb, X.T).T).sum(1)
prediction_std = np.sqrt(prediction_var)
tppf = stats.t.ppf(0.975, res_ols.df_resid)

pred_ols = res_ols.get_prediction()
iv_l_ols = pred_ols.summary_frame()["obs_ci_lower"]
iv_u_ols = pred_ols.summary_frame()["obs_ci_upper"]

# Draw a plot to compare predicted values in WLS and OLS:

pred_wls = res_wls.get_prediction()
iv_l = pred_wls.summary_frame()["obs_ci_lower"]
iv_u = pred_wls.summary_frame()["obs_ci_upper"]

fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(x, y, "o", label="Data")
ax.plot(x, y_true, "b-", label="True")
# OLS
ax.plot(x, res_ols.fittedvalues, "r--")
ax.plot(x, iv_u_ols, "r--", label="OLS")
ax.plot(x, iv_l_ols, "r--")
# WLS
ax.plot(x, res_wls.fittedvalues, "g--.")
ax.plot(x, iv_u, "g--", label="WLS")
ax.plot(x, iv_l, "g--")
ax.legend(loc="best")

# ## Feasible Weighted Least Squares (2-stage FWLS)
#
# Like `w`, `w_est` is proportional to the standard deviation, and so must
# be squared.

resid1 = res_ols.resid[w == 1.0]
var1 = resid1.var(ddof=int(res_ols.df_model) + 1)
resid2 = res_ols.resid[w != 1.0]
var2 = resid2.var(ddof=int(res_ols.df_model) + 1)
w_est = w.copy()
w_est[w != 1.0] = np.sqrt(var2) / np.sqrt(var1)
res_fwls = sm.WLS(y, X, 1.0 / ((w_est**2))).fit()
print(res_fwls.summary())