File: lcc.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (135 lines) | stat: -rw-r--r-- 4,762 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Copyright 2003 by Sebastian Bassi. sbassi@genesdigitales.com
# All rights reserved.  This code is part of the Biopython 
# distribution and governed by its license.
# Please see the LICENSE file that should have been included as part
# of this package.

import math
from string import count

crom=0
compone=[0]
lccsal=[0]

def lcc_mult(seq,wsize,start,end):
    """Return a list called lccsal, the LCC, a complexity measure 
from a sequence, called seq."""
    l2=math.log(2)
    tamseq=end-start
    global compone
    global lccsal
    compone=[0]
    lccsal=[0]
    for i in range(wsize):
        compone.append(((i+1)/float(wsize))*((math.log((i+1)/float(wsize)))/l2))
    window=seq[0:wsize]
    cant_a=count(window,'A')
    cant_c=count(window,'C')
    cant_t=count(window,'T')
    cant_g=count(window,'G')
    term_a=compone[cant_a]
    term_c=compone[cant_c]
    term_t=compone[cant_t]
    term_g=compone[cant_g]
    lccsal[0]=(-(term_a+term_c+term_t+term_g))
    tail=seq[0]
    for x in range (tamseq-wsize):
        window=seq[x+1:wsize+x+1]
        if tail==window[-1]:
            lccsal.append(lccsal[-1])
            #break
        elif tail=='A':
            cant_a=cant_a-1
            if window[-1]=='C':
                cant_c=cant_c+1
                term_a=compone[cant_a]
                term_c=compone[cant_c]
                lccsal.append(-(term_a+term_c+term_t+term_g))
            elif window[-1]=='T':
                cant_t=cant_t+1
                term_a=compone[cant_a]
                term_t=compone[cant_t]
                lccsal.append(-(term_a+term_c+term_t+term_g))
            elif window[-1]=='G':
                cant_g=cant_g+1
                term_a=compone[cant_a]
                term_g=compone[cant_g]
                lccsal.append(-(term_a+term_c+term_t+term_g))
        elif tail=='C':
            cant_c=cant_c-1
            if window[-1]=='A':
                cant_a=cant_a+1
                term_a=compone[cant_a]
                term_c=compone[cant_c]
                lccsal.append(-(term_a+term_c+term_t+term_g))
            elif window[-1]=='T':
                cant_t=cant_t+1
                term_c=compone[cant_c]
                term_t=compone[cant_t]
                lccsal.append(-(term_a+term_c+term_t+term_g))
            elif window[-1]=='G':
                cant_g=cant_g+1
                term_c=compone[cant_c]
                term_g=compone[cant_g]
                lccsal.append(-(term_a+term_c+term_t+term_g))
        elif tail=='T':
            cant_t=cant_t-1
            if window[-1]=='A':
                cant_a=cant_a+1
                term_a=compone[cant_a]
                term_t=compone[cant_t]
                lccsal.append(-(term_a+term_c+term_t+term_g))
            elif window[-1]=='C':
                cant_c=cant_c+1
                term_c=compone[cant_c]
                term_t=compone[cant_t]
                lccsal.append(-(term_a+term_c+term_t+term_g))
            elif window[-1]=='G':
                cant_g=cant_g+1
                term_t=compone[cant_t]
                term_g=compone[cant_g]
                lccsal.append(-(term_a+term_c+term_t+term_g))
        elif tail=='G':
            cant_g=cant_g-1
            if window[-1]=='A':
                cant_a=cant_a+1
                term_a=compone[cant_a]
                term_g=compone[cant_g]
                lccsal.append(-(term_a+term_c+term_t+term_g))
            elif window[-1]=='C':
                cant_c=cant_c+1
                term_c=compone[cant_c]
                term_g=compone[cant_g]
                lccsal.append(-(term_a+term_c+term_t+term_g))
            elif window[-1]=='T':
                cant_t=cant_t+1
                term_t=compone[cant_t]
                term_g=compone[cant_g]
                lccsal.append(-(term_a+term_c+term_t+term_g))
        tail=window[0]
    return lccsal

def lcc_simp(seq,start,end):
    """Return LCC, a complexity measure from a sequence (seq.)"""
    wsize=end-start
    l2=math.log(2)
    window=seq[start:end]
    if count(window,'A')==0:
        term_a=0
	# This check is usefull in order to avoid calculate log of 0.
    else:
        term_a=((count(window,'A'))/float(wsize))*((math.log((count(window,'A'))/float(wsize)))/l2)
    if count(window,'C')==0:
        term_c=0
    else:
        term_c=((count(window,'C'))/float(wsize))*((math.log((count(window,'C'))/float(wsize)))/l2)
    if count(window,'T')==0:
        term_t=0
    else:
        term_t=((count(window,'T'))/float(wsize))*((math.log((count(window,'T'))/float(wsize)))/l2)
    if count(window,'G')==0:
        term_g=0
    else:
        term_g=((count(window,'G'))/float(wsize))*((math.log((count(window,'G'))/float(wsize)))/l2)
    lccsal=-(term_a+term_c+term_t+term_g)
    return lccsal