1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
|
#!/usr/bin/env python
# DO NOT EDIT
# Autogenerated from the notebook mediation_survival.ipynb.
# Edit the notebook and then sync the output with this file.
#
# flake8: noqa
# DO NOT EDIT
# ## Mediation analysis with duration data
# This notebook demonstrates mediation analysis when the
# mediator and outcome are duration variables, modeled
# using proportional hazards regression. These examples
# are based on simulated data.
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.mediation import Mediation
# Make the notebook reproducible.
np.random.seed(3424)
# Specify a sample size.
n = 1000
# Generate an exposure variable.
exp = np.random.normal(size=n)
# Generate a mediator variable.
def gen_mediator():
mn = np.exp(exp)
mtime0 = -mn * np.log(np.random.uniform(size=n))
ctime = -2 * mn * np.log(np.random.uniform(size=n))
mstatus = (ctime >= mtime0).astype(int)
mtime = np.where(mtime0 <= ctime, mtime0, ctime)
return mtime0, mtime, mstatus
# Generate an outcome variable.
def gen_outcome(otype, mtime0):
if otype == "full":
lp = 0.5 * mtime0
elif otype == "no":
lp = exp
else:
lp = exp + mtime0
mn = np.exp(-lp)
ytime0 = -mn * np.log(np.random.uniform(size=n))
ctime = -2 * mn * np.log(np.random.uniform(size=n))
ystatus = (ctime >= ytime0).astype(int)
ytime = np.where(ytime0 <= ctime, ytime0, ctime)
return ytime, ystatus
# Build a dataframe containing all the relevant variables.
def build_df(ytime, ystatus, mtime0, mtime, mstatus):
df = pd.DataFrame({
"ytime": ytime,
"ystatus": ystatus,
"mtime": mtime,
"mstatus": mstatus,
"exp": exp,
})
return df
# Run the full simulation and analysis, under a particular
# population structure of mediation.
def run(otype):
mtime0, mtime, mstatus = gen_mediator()
ytime, ystatus = gen_outcome(otype, mtime0)
df = build_df(ytime, ystatus, mtime0, mtime, mstatus)
outcome_model = sm.PHReg.from_formula("ytime ~ exp + mtime",
status="ystatus",
data=df)
mediator_model = sm.PHReg.from_formula("mtime ~ exp",
status="mstatus",
data=df)
med = Mediation(
outcome_model,
mediator_model,
"exp",
"mtime",
outcome_predict_kwargs={"pred_only": True},
)
med_result = med.fit(n_rep=20)
print(med_result.summary())
# Run the example with full mediation
run("full")
# Run the example with partial mediation
run("partial")
# Run the example with no mediation
run("no")
|