1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
|
# Basic LDA Enricher class
from math import sqrt
import math
import scipy.stats as s
import array as a
from scipy.optimize import fminbound
from scipy.special import gammaln as gamln
from numpy import log, pi, log10, e, log1p, exp
import numpy as np
from .MultiSiteCommon import MultiSiteCommon
from .MixtureEstimationMethods import MixtureEstimationMethods
class BasicLdaEnricher(MultiSiteCommon):
def __init__(self, gbmModel, sequence, rawKinetics,
identifyFlag, modsToCall=['H', 'J', 'K']):
MultiSiteCommon.__init__(self, gbmModel, sequence, rawKinetics)
# FIXME: For debugging LDA, load in parameters for forward and reverse
# strands:
self.fwd_model = np.genfromtxt(
"/home/UNIXHOME/obanerjee/nat_fwd_model_expanded.csv", delimiter=',')
self.rev_model = np.genfromtxt(
"/home/UNIXHOME/obanerjee/nat_rev_model_expanded.csv", delimiter=',')
if identifyFlag:
if 'K' in modsToCall:
self.fwd_model = np.genfromtxt(
"/home/UNIXHOME/obanerjee/tet_fwd_model_expanded.csv", delimiter=',')
self.rev_model = np.genfromtxt(
"/home/UNIXHOME/obanerjee/tet_rev_model_expanded.csv", delimiter=',')
# write a method to take perSiteResults dictionary in and add a column Ca5C
def useLDAmodel(self, kinetics, pos, model, up, down):
""" Test out LDA model """
res = np.zeros((up + down + 1, 5))
ind = 0
# range from -down to +up
for offset in range(-down, (up + 1)):
a = pos + offset
# res[ind,] = [kinetics[a]["tMean"], kinetics[a]["modelPrediction"], kinetics[a]["tErr"], kinetics[a]["coverage"]]
res[ind, ] = [kinetics[a]["tMean"], kinetics[a]["modelPrediction"], kinetics[a]
["tErr"], kinetics[a]["coverage"], np.exp(kinetics[a]["tStatistic"]) - 0.01]
ind += 1
apply = np.hstack(np.log(res + 0.01).transpose())
tmp = sum(np.multiply(apply, model[1:])) + model[0]
return tmp
def callLDAstrand(self, kinetics, strand, model, up, down):
tmp = [d for d in kinetics if d["strand"] == strand]
tmp.sort(key=lambda x: x["tpl"])
L = len(tmp)
for pos in range(down, (L - up)):
if tmp[pos]["base"] == 'C':
tmp[pos]["Ca5C"] = self.useLDAmodel(tmp, pos, model, up, down)
return tmp
def callEnricherFunction(self, kinetics, up=10, down=10):
fwd = self.callLDAstrand(kinetics, 0, self.fwd_model, up, down)
rev = self.callLDAstrand(kinetics, 1, self.rev_model, up, down)
res = fwd + rev
res.sort(key=lambda x: x["tpl"])
return res
|