File: toICM-methods.Rd

package info (click to toggle)
r-bioc-tfbstools 1.28.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 940 kB
  • sloc: xml: 1,137; ansic: 590; asm: 54; sh: 13; makefile: 2
file content (104 lines) | stat: -rw-r--r-- 3,740 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
\name{toICM}
\docType{methods}
\alias{toICM}
\alias{toICM,character-method}
\alias{toICM,DNAStringSet-method}
\alias{toICM,matrix-method}
\alias{toICM,PFMatrix-method}
\alias{toICM,PFMatrixList-method}

\title{toICM method}
\description{
  Converts a raw frequency matrix (PFMatrix) to a information content matrix (ICMatrix).
  It takes the bases background frequencies, pseudocounts and 
  schneider as parameters.
}

\usage{
toICM(x, pseudocounts=0.8, schneider=FALSE, 
      bg=c(A=0.25, C=0.25, G=0.25, T=0.25))
}

\arguments{
  \item{x}{
    For \code{toPWM}, a \link{PFMatrix},
    rectangular \link{DNAStringSet} object ("rectangular" means that 
    all elements have the same number of characters) with 
    no IUPAC ambiguity letters,
    a rectangular \link{character} vector or
    a \link{matrix} with rownames containing at least A, C, G and T,
    or a \link{PFMatrixList} object
  }
  \item{pseudocounts}{
    A default value 0.8 is used.
  }
  \item{schneider}{
    This logical parameter controls whether a Schneider correction will be done.
    See more details below.
  }
  \item{bg}{
    bg is a vector of background frequencies of four bases with names containing A, C, G, T. When toPWM is applied to a \code{PFMatrix}, if bg is not specified, it will use the bg information contained in \code{PFMatrix}.
  }
}
\details{
  The information content matrix has a column sum between 0 (no base preference) and 2 (only 1 base used). Usually this information is used to plot sequence log.
  
  The information content at each position is computed
  \deqn{D = \log_2(nrow(pfm)) + colSums(postProbs \times \log_2(postProbs)}{%
  D = log2(nrow(pfm)) + colSums(postProbs * log2(postProbs))}
  \deqn{icm = posProbs * D}
  where D is the total information contect for each position.
  For detailed procedure of computation, please refer to the vignette.

  If a Schneider correction will be done if requested. 
  Please see the reference below for more comprehensive explanation.
}

\value{
  A \code{ICMatrix} object which contains the background frequency, 
  pseudocounts and Schneider correction used.
}

\references{
Schneider, T. D., Stormo, G. D., Gold, L., & Ehrenfeucht, A. (1986). Information content of binding sites on nucleotide sequences. Journal of molecular biology, 188(3), 415-431.
}

\author{
  Ge Tan
}

\seealso{
  \code{\link{toPWM}},
  \code{\linkS4class{XMatrix}},
  \code{\link{seqLogo}}
}

\examples{
  ## Constructor a PFMatrix
  pfm <- PFMatrix(ID="MA0004.1", name="Arnt", matrixClass="Zipper-Type", 
                  strand="+",
                  bg=c(A=0.25, C=0.25, G=0.25, T=0.25),
                  tags=list(family="Helix-Loop-Helix", 
                            species="10090", 
                            tax_group="vertebrates",
                            medline="7592839", type="SELEX", ACC="P53762", 
                            pazar_tf_id="TF0000003",
                            TFBSshape_ID="11", TFencyclopedia_ID="580"),
                  profileMatrix=matrix(c(4L,  19L, 0L,  0L,  0L,  0L,
                                         16L, 0L,  20L, 0L,  0L,  0L,
                                         0L,  1L,  0L,  20L, 0L,  20L,
                                         0L,  0L,  0L,  0L,  20L, 0L),
                                       byrow=TRUE, nrow=4, 
                                       dimnames=list(c("A", "C", "G", "T")))
                 )
   ## Convert it into a PWMatrix 
   icm <- toICM(pfm, pseudocounts=0.8, schneider=TRUE)
   
   ## Conversion on PWMatrixList
   data(MA0003.2)
   data(MA0004.1)
   pfmList <- PFMatrixList(pfm1=MA0003.2, pfm2=MA0004.1, use.names=TRUE)
   icmList <- toICM(pfmList, pseudocounts=0.8, schneider=TRUE)
}

\keyword{methods}