File: mediation_survival.py

package info (click to toggle)

statsmodels 0.14.6%2Bdfsg-1

links: PTS, VCS
area: main
in suites: sid
size: 49,956 kB
sloc: python: 254,365; f90: 612; sh: 560; javascript: 337; asm: 156; makefile: 145; ansic: 32; xml: 9

file content (116 lines) | stat: -rw-r--r-- 2,762 bytes

parent folder | download | duplicates (3)

#!/usr/bin/env python

# DO NOT EDIT
# Autogenerated from the notebook mediation_survival.ipynb.
# Edit the notebook and then sync the output with this file.
#
# flake8: noqa
# DO NOT EDIT

# ## Mediation analysis with duration data

# This notebook demonstrates mediation analysis when the
# mediator and outcome are duration variables, modeled
# using proportional hazards regression.  These examples
# are based on simulated data.

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.mediation import Mediation

# Make the notebook reproducible.

np.random.seed(3424)

# Specify a sample size.

n = 1000

# Generate an exposure variable.

exp = np.random.normal(size=n)

# Generate a mediator variable.


def gen_mediator():
    mn = np.exp(exp)
    mtime0 = -mn * np.log(np.random.uniform(size=n))
    ctime = -2 * mn * np.log(np.random.uniform(size=n))
    mstatus = (ctime >= mtime0).astype(int)
    mtime = np.where(mtime0 <= ctime, mtime0, ctime)
    return mtime0, mtime, mstatus


# Generate an outcome variable.


def gen_outcome(otype, mtime0):
    if otype == "full":
        lp = 0.5 * mtime0
    elif otype == "no":
        lp = exp
    else:
        lp = exp + mtime0
    mn = np.exp(-lp)
    ytime0 = -mn * np.log(np.random.uniform(size=n))
    ctime = -2 * mn * np.log(np.random.uniform(size=n))
    ystatus = (ctime >= ytime0).astype(int)
    ytime = np.where(ytime0 <= ctime, ytime0, ctime)
    return ytime, ystatus


# Build a dataframe containing all the relevant variables.


def build_df(ytime, ystatus, mtime0, mtime, mstatus):
    df = pd.DataFrame({
        "ytime": ytime,
        "ystatus": ystatus,
        "mtime": mtime,
        "mstatus": mstatus,
        "exp": exp,
    })
    return df


# Run the full simulation and analysis, under a particular
# population structure of mediation.


def run(otype):

    mtime0, mtime, mstatus = gen_mediator()
    ytime, ystatus = gen_outcome(otype, mtime0)
    df = build_df(ytime, ystatus, mtime0, mtime, mstatus)

    outcome_model = sm.PHReg.from_formula("ytime ~ exp + mtime",
                                          status="ystatus",
                                          data=df)
    mediator_model = sm.PHReg.from_formula("mtime ~ exp",
                                           status="mstatus",
                                           data=df)

    med = Mediation(
        outcome_model,
        mediator_model,
        "exp",
        "mtime",
        outcome_predict_kwargs={"pred_only": True},
    )
    med_result = med.fit(n_rep=20)
    print(med_result.summary())


# Run the example with full mediation

run("full")

# Run the example with partial mediation

run("partial")

# Run the example with no mediation

run("no")