File: uco.R

package info (click to toggle)
r-cran-seqinr 3.3-3-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 5,844 kB
  • ctags: 69
  • sloc: ansic: 1,955; makefile: 13
file content (121 lines) | stat: -rwxr-xr-x 3,370 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
uco <- function (seq, frame = 0, index = c("eff", "freq", "rscu"), 
as.data.frame = FALSE, NA.rscu = NA) 
{
    choice <- match.arg(index)
    
    if(any(seq%in%LETTERS)){
      seq <- tolower(seq)
    }
    sequence <- splitseq(seq = seq, frame = frame, word = 3)

    if( as.data.frame == FALSE ) {
      eff <- table(factor(sequence, levels = SEQINR.UTIL$CODON.AA$CODON))
      if(choice == "eff") return(eff)
      
      freq <- eff/(floor(length(seq)/3))
      if(choice == "freq") return(freq)
      
      T <- split(freq, SEQINR.UTIL$CODON.AA$AA)
      rscu <- lapply(T, function(x) {
        return(x/((1/length(x)) * sum(x)))
      })
      names(rscu) <- NULL
      rscu <- unlist(rscu)[as.character(SEQINR.UTIL$CODON.AA$CODON)]
      is.na(rscu[!is.finite(rscu)]) <- TRUE
      rscu[is.na(rscu)] <- NA.rscu
      return(rscu)
    } else { # return all indices in a data.frame
      eff <- table(factor(sequence, levels = SEQINR.UTIL$CODON.AA$CODON))
      freq <- eff/(floor(length(seq)/3))
      T <- split(freq, SEQINR.UTIL$CODON.AA$AA)
      rscu <- lapply(T, function(x) {
        return(x/((1/length(x)) * sum(x)))
      })
      names(rscu) <- NULL
      rscu <- unlist(rscu)[as.character(SEQINR.UTIL$CODON.AA$CODON)]
      is.na(rscu[!is.finite(rscu)]) <- TRUE
      rscu[is.na(rscu)] <- NA.rscu
      df <- data.frame(SEQINR.UTIL$CODON.AA$AA, eff = eff, freq = as.vector(freq), RSCU = rscu)
      names(df) = c("AA", "codon", "eff", "freq", "RSCU")
      return(df)
    }
}


dotchart.uco <- function(x, numcode = 1, aa3 = TRUE, cex = 0.7, 
  alphabet = s2c("tcag"), pch = 21, gpch = 20, bg = par("bg"), 
  color = par("fg"), gcolor = par("fg"), lcolor = "gray", xlim, ...)
{
  if( is.null(names(x)) ) names(x) <- words( alphabet = alphabet )
#
# General sorting 
#
  x <- sort(x)
  labels <- names(x)
  stringlabel = paste(labels, sep="", collapse="")
  groups <- as.factor(translate(s2c(stringlabel), numcode =  numcode))
  gdata <- sapply(split(x, groups), sum)
#
# Now, sorting by aa order
#
  gordered <- rank(gdata)
  xidx <- numeric(64)

  for( i in seq_len(64) )
  {
    xidx[i] <- -0.01*i + gordered[groups[i]]
  }

  x <- x[order(xidx)]
  labels <- names(x)
  stringlabel = paste(labels, sep="", collapse="")
  aa <- translate(s2c(stringlabel), numcode =  numcode)
  groups <- factor(aa, levels = unique(aa))
  gdata <- sapply(split(x, groups), sum)

  if( missing(xlim) ) xlim <- c(0, max(gdata))
  if( aa3 )
  {
    levels(groups) <- aaa(levels(groups))
  }
  dotchart(x = x, labels = labels, groups = groups, gdata = gdata,
   cex = cex, pch = pch, gpch = gpch, bg = bg, color = color,
   gcolor = gcolor, lcolor = lcolor, xlim, ...)
#
# Return invisibly for further plots
#
  result <- list(0)
  result$x <- x
  result$labels <- labels
  result$groups <- groups
  result$gdata <- gdata

  ypg <- numeric( length(levels(groups)) )
  i <- 1
  for( aa in levels(groups) )
  {
    ypg[i] <- length(which(groups == aa)) + 2
    i <- i + 1
  }
  ypg <- rev(cumsum(rev(ypg))) - 1
  names(ypg) <- levels(groups)
  result$ypg <- ypg

  ypi <- numeric( length(x) )
  for( i in seq_len(length(x)) )
  {
    ypi[i] <- ypg[groups[i]]
  }
  antirank <- function(x) 
  {
    return( seq(length(x),1,by=-1 ))
  }
  ypi <- ypi - unlist(sapply(split(x, groups),antirank))
  names(ypi) <- labels
  result$ypi <- ypi

  return( invisible(result) ) 
}