File: mediation_survival.py

package info (click to toggle)
statsmodels 0.14.6%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 49,956 kB
  • sloc: python: 254,365; f90: 612; sh: 560; javascript: 337; asm: 156; makefile: 145; ansic: 32; xml: 9
file content (116 lines) | stat: -rw-r--r-- 2,762 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python

# DO NOT EDIT
# Autogenerated from the notebook mediation_survival.ipynb.
# Edit the notebook and then sync the output with this file.
#
# flake8: noqa
# DO NOT EDIT

# ## Mediation analysis with duration data

# This notebook demonstrates mediation analysis when the
# mediator and outcome are duration variables, modeled
# using proportional hazards regression.  These examples
# are based on simulated data.

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.mediation import Mediation

# Make the notebook reproducible.

np.random.seed(3424)

# Specify a sample size.

n = 1000

# Generate an exposure variable.

exp = np.random.normal(size=n)

# Generate a mediator variable.


def gen_mediator():
    mn = np.exp(exp)
    mtime0 = -mn * np.log(np.random.uniform(size=n))
    ctime = -2 * mn * np.log(np.random.uniform(size=n))
    mstatus = (ctime >= mtime0).astype(int)
    mtime = np.where(mtime0 <= ctime, mtime0, ctime)
    return mtime0, mtime, mstatus


# Generate an outcome variable.


def gen_outcome(otype, mtime0):
    if otype == "full":
        lp = 0.5 * mtime0
    elif otype == "no":
        lp = exp
    else:
        lp = exp + mtime0
    mn = np.exp(-lp)
    ytime0 = -mn * np.log(np.random.uniform(size=n))
    ctime = -2 * mn * np.log(np.random.uniform(size=n))
    ystatus = (ctime >= ytime0).astype(int)
    ytime = np.where(ytime0 <= ctime, ytime0, ctime)
    return ytime, ystatus


# Build a dataframe containing all the relevant variables.


def build_df(ytime, ystatus, mtime0, mtime, mstatus):
    df = pd.DataFrame({
        "ytime": ytime,
        "ystatus": ystatus,
        "mtime": mtime,
        "mstatus": mstatus,
        "exp": exp,
    })
    return df


# Run the full simulation and analysis, under a particular
# population structure of mediation.


def run(otype):

    mtime0, mtime, mstatus = gen_mediator()
    ytime, ystatus = gen_outcome(otype, mtime0)
    df = build_df(ytime, ystatus, mtime0, mtime, mstatus)

    outcome_model = sm.PHReg.from_formula("ytime ~ exp + mtime",
                                          status="ystatus",
                                          data=df)
    mediator_model = sm.PHReg.from_formula("mtime ~ exp",
                                           status="mstatus",
                                           data=df)

    med = Mediation(
        outcome_model,
        mediator_model,
        "exp",
        "mtime",
        outcome_predict_kwargs={"pred_only": True},
    )
    med_result = med.fit(n_rep=20)
    print(med_result.summary())


# Run the example with full mediation

run("full")

# Run the example with partial mediation

run("partial")

# Run the example with no mediation

run("no")