File: TFFM.R

package info (click to toggle)
r-bioc-tfbstools 1.20.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 920 kB
  • sloc: xml: 1,137; ansic: 590; asm: 54; sh: 13; makefile: 2
file content (154 lines) | stat: -rw-r--r-- 5,592 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
### -----------------------------------------------------------------
### getTransition: get the transition probability from i to j. 
### i,j are characters.
### 
setMethod("getTransition", "TFFMFirst",
          function(tffm, i, j){
            tffm@transition[as.character(i),as.character(j)]
          })
setMethod("getTransition", "TFFMDetail",
          function(tffm, i, j){
            tffm@transition[as.character(i),as.character(j)]
          })

### -----------------------------------------------------------------
### ncol: get the length of TFFM, the number of nucleotides in the
### model excluding the background.
### Exported!
setMethod("ncol", "TFFMDetail",
          function(x){
            length(x@emission)/4L - 1L
          })
setMethod("ncol", "TFFMFirst",
          function(x){
            length(x@emission) - 2L
          })

### -----------------------------------------------------------------
### Get the background emission probability: bgEmissionProb
###
setMethod("bgEmissionProb", "TFFMFirst",
          function(tffm){
            # Retrieve emission proba for the first state which is not
            # 1st-order
            last_emissions <- tffm@emission[[1]]
            names(last_emissions) <- DNA_BASES
            # Compute emission proba for the background
            emissions <- matrix(tffm@emission[[2]], ncol=4) %*% last_emissions
            emissions <- as.numeric(emissions)
            names(emissions) <- DNA_BASES
            return(emissions)
          }
          )

setMethod("bgEmissionProb", "TFFMDetail",
          function(tffm){
            # Compute emission proba for the background
            emissions <- t(tffm@transition[as.character(0:3), 
                                           as.character(0:3)]) %*% 
              c(0.25, 0.25, 0.25, 0.25)
            emissions <- drop(emissions)
            names(emissions) <- DNA_BASES
            emissions <- emissions / sum(emissions)
            return(emissions)
          })

### -----------------------------------------------------------------
### Get the position start of emission: getPosStart
###
setMethod("getPosStart", "TFFMFirst",
          function(tffm){
            # Give the position of the first matching state.
            # Returns: The position of the first matching state of the TFFM.
            return(3L)
          })
setMethod("getPosStart", "TFFMDetail",
          function(tffm){
            return(1L)
          })

### -----------------------------------------------------------------
### Get the position probablity: getPosProb
### Exported!
setMethod("getPosProb", "TFFMFirst",
          ## verified!
          function(tffm){
            previous_position_proba <- bgEmissionProb(tffm)
            start <- getPosStart(tffm)
            ans <- list()
            i <- 1L
            for(position in start:length(tffm@emission)){
              ans[[i]] <- matrix(tffm@emission[[position]], ncol=4) %*% 
                previous_position_proba
              previous_position_proba <- as.numeric(ans[[i]])
              i <- i + 1L
            }
            ans <- do.call(cbind, ans)
            rownames(ans) <- DNA_BASES
            colnames(ans) <- 1:ncol(ans)
            return(ans)
          })

setMethod("getPosProb", "TFFMDetail",
          ## verified!
          function(tffm){
            previous_position_proba <- bgEmissionProb(tffm)
            start <- getPosStart(tffm)
            ans <- list()
            for(position in start:(start+ncol(tffm)-1L)){
              start_state <- 1:4 + (position - 1L) * 4L
              end_state <- 1:4 + position * 4L
              ans[[position]] <- t(tffm@transition[start_state, end_state]) %*%
                previous_position_proba
              previous_position_proba <- as.numeric(ans[[position]])
            }
            ans <- do.call(cbind, ans)
            rownames(ans) <- DNA_BASES
            colnames(ans) <- 1:ncol(ans)
            ans <- sweep(ans, MARGIN=2, colSums(ans), FUN="/")
            return(ans)
          })

### -----------------------------------------------------------------
### Get the emission distribution parameters at each position: getEmissionProb
### Exported!
setMethod("getEmissionProb", "TFFMFirst",
          function(tffm){
            start <- getPosStart(tffm)
            ans <- tffm@emission[start:length(tffm@emission)]
            ans <- do.call(cbind, ans)
            rownames(ans) <- rep(DNA_BASES, 4)
            colnames(ans) <- 1:ncol(ans)
            return(ans)
          })

setMethod("getEmissionProb", "TFFMDetail",
          function(tffm){
            start <- getPosStart(tffm)
            ans <- list()
            for(position in start:(start+ncol(tffm)-1L)){
              start_state <- 1:4 + (position - 1L) * 4L
              end_state <- 1:4 + position * 4L
              emissions <- tffm@transition[start_state, end_state]
              emissions <- sweep(emissions, MARGIN=1, rowSums(emissions), 
                                 FUN="/")
              ans[[position]] <- as.numeric(t(emissions))
            }
            ans <- do.call(cbind, ans)
            rownames(ans) <- rep(DNA_BASES, 4)
            colnames(ans) <- 1:ncol(ans)
            return(ans)
          })

### ----------------------------------------------------------------------
### Information content calculation at each position: totalIC 
### Exported!
setMethod("totalIC", "TFFM",
          function(x){
            pwm <- getPosProb(x)
            ans <- seqLogo:::pwm2ic(pwm)
            return(ans)
          }
          )