#!/usr/bin/env python
# coding: utf-8

# DO NOT EDIT
# Autogenerated from the notebook rolling_ls.ipynb.
# Edit the notebook and then sync the output with this file.
#
# flake8: noqa
# DO NOT EDIT

# # Rolling Regression
#
# Rolling OLS applies OLS across a fixed windows of observations and then
# rolls
# (moves or slides) the window across the data set. They key parameter is
# `window`
# which determines the number of observations used in each OLS regression.
# By
# default, `RollingOLS` drops missing values in the window and so will
# estimate
# the model using the available data points.
#
# Estimated values are aligned so that models estimated using data points
# $i+1, i+2, ... i+window$ are stored in location $i+window$.
#
# Start by importing the modules that are used in this notebook.

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import seaborn

import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS

seaborn.set_style("darkgrid")
pd.plotting.register_matplotlib_converters()

# `pandas-datareader` is used to download data from
# [Ken French's website](https://mba.tuck.dartmouth.edu/pages/faculty/ken.
# french/data_library.html).
# The two data sets downloaded are the 3 Fama-French factors and the 10
# industry portfolios.
# Data is available from 1926.
#
# The data are monthly returns for the factors or industry portfolios.

factors = pdr.get_data_famafrench("F-F_Research_Data_Factors",
                                  start="1-1-1926")[0]
factors.head()

industries = pdr.get_data_famafrench("10_Industry_Portfolios",
                                     start="1-1-1926")[0]
industries.head()

# The first model estimated is a rolling version of the CAPM that
# regresses
# the excess return of Technology sector firms on the excess return of the
# market.
#
# The window is 60 months, and so results are available after the first 60
# (`window`)
# months. The first 59 (`window - 1`) estimates are all `nan` filled.

endog = industries.HiTec - factors.RF.values
exog = sm.add_constant(factors["Mkt-RF"])
rols = RollingOLS(endog, exog, window=60)
rres = rols.fit()
params = rres.params.copy()
params.index = np.arange(1, params.shape[0] + 1)
params.head()

params.iloc[57:62]

params.tail()

# We next plot the market loading along with a 95% point-wise confidence
# interval.
# The `alpha=False` omits the constant column, if present.

fig = rres.plot_recursive_coefficient(variables=["Mkt-RF"], figsize=(14, 6))

# Next, the model is expanded to include all three factors, the excess
# market, the size factor
# and the value factor.

exog_vars = ["Mkt-RF", "SMB", "HML"]
exog = sm.add_constant(factors[exog_vars])
rols = RollingOLS(endog, exog, window=60)
rres = rols.fit()
fig = rres.plot_recursive_coefficient(variables=exog_vars, figsize=(14, 18))

# ## Formulas
#
# `RollingOLS` and `RollingWLS` both support model specification using the
# formula interface. The example below is equivalent to the 3-factor model
# estimated previously. Note that one variable is renamed to have a valid
# Python variable name.

joined = pd.concat([factors, industries], axis=1)
joined["Mkt_RF"] = joined["Mkt-RF"]
mod = RollingOLS.from_formula("HiTec ~ Mkt_RF + SMB + HML",
                              data=joined,
                              window=60)
rres = mod.fit()
rres.params.tail()

# ## `RollingWLS`: Rolling Weighted Least Squares
#
# The `rolling` module also provides `RollingWLS` which takes an optional
# `weights` input to perform rolling weighted least squares.  It produces
# results that match `WLS` when applied to rolling windows of data.

# ## Fit Options
#
# Fit accepts other optional keywords to set the covariance estimator.
# Only two estimators are supported, `'nonrobust'` (the classic OLS
# estimator) and `'HC0'` which is White's heteroskedasticity robust
# estimator.
#
# You can set `params_only=True` to only estimate the model parameters.
# This is substantially faster than computing the full set of values
# required to perform inference.
#
# Finally, the parameter `reset` can be set to a positive integer to
# control estimation error in very long samples. `RollingOLS` avoids the
# full matrix product when rolling by only adding the most recent
# observation and removing the dropped observation as it rolls through the
# sample. Setting `reset` uses the full inner product every `reset` periods.
# In most applications this parameter can be omitted.

# ## Expanding Sample
# It is possible to expand the sample until sufficient observations are
# available for the full window length.  In this example, we start once we
# have 12 observations available, and then increase the sample until we have
# 60 observations available. The first non-`nan` value is computed using 12
# observations, the second 13, and so on. All other estimates are computed
# using 60 observations.

res = RollingOLS(endog, exog, window=60, min_nobs=12, expanding=True).fit()
res.params.iloc[10:15]

res.nobs[10:15]
