File: synonymous.R

package info (click to toggle)
r-cran-seqinr 3.3-3-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 5,844 kB
  • ctags: 69
  • sloc: ansic: 1,955; makefile: 13
file content (74 lines) | stat: -rw-r--r-- 2,074 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
syncodons <- function(codons, numcode = 1) 
{
  #
  # Check argument:
  #
  if( any(nchar(codons) != 3)) stop("vector of three character string elements expected")

  #
  # Force to lower case:
  #
  alph <- unlist(lapply(codons, s2c))
  if(any(alph %in% LETTERS)) codons <- tolower(codons)

  #
  # Get the genetic code map:
  #
  allaminos <- sapply(words(), function(x) translate(s2c(x), numcode = numcode))

  getsyn <- function(c) names(allaminos[allaminos == allaminos[[c]]])
  synonymous <- lapply(codons, getsyn)
  names(synonymous) <- codons
  return(synonymous)
}

synsequence <- function (sequence, numcode = 1, ucoweight = NULL) 
{
    tra = translate(sequence, numcode = numcode)
    cod = splitseq(sequence)
    if (is.null(ucoweight)) {
        for (a in unique(tra)) {
            pos = which(tra == a)
            if (length(pos) > 1) {
                newcod = sample(cod[pos])
                cod[pos] = newcod
            }
        }
    }
    else {
        for (a in unique(tra)) {
            pos = which(tra == a)
            urne = rep(names(ucoweight[[a]]), ucoweight[[a]] * 
                length(pos))
            if (length(urne) > 1) {
                newcod = sample(urne, length(pos))
            }
            else if (length(urne) == 1) {
                newcod = urne
            }
            else {
                print(a)
                stop("bad codon usage content")
            }
            cod[pos] = newcod
        }
    }
    newseq = s2c(c2s(cod))
    return(newseq)
}

ucoweight <- function (sequence, numcode = 1) 
{
    allaminos = s2c(c2s(SEQINR.UTIL$CODES.NCBI$CODES[numcode]))
    allcodons = splitseq(as.vector(t(cbind(rep(s2c("tcag"), each = 16), 
        rep(s2c("tcag"), each = 4), rep(s2c("tcag"), 4)))))
    syncodons = lapply(seq(21), function(a) {
        which(allaminos == unique(allaminos)[a])
    })
    usage = uco(sequence)[allcodons] #re-order according to NCBI
    weight = lapply(seq(21), function(b) {
        usage[syncodons[b][[1]]]
    })
    names(weight) = unique(allaminos)
    return(weight)
}