1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
"""
Pyrex/C extension for quickly finding potential CpG sites in pairs of
sequences.
"""
from cpython.version cimport PY_MAJOR_VERSION
cdef extern from "find_cpg.h":
int next_cpg( char * sp1, char * sp2, int start)
int next_cpg_restricted( char * sp1, char *sp2, int start)
int next_non_cpg( char * sp1, char * sp2, int start)
def find_cpg( sp1, sp2, start ):
cdef char* a
cdef char* b
cdef int pos
if PY_MAJOR_VERSION >= 3:
bytes_sp1, bytes_sp2 = sp1.encode(), sp2.encode()
else:
bytes_sp1, bytes_sp2 = sp1, sp2
a = bytes_sp1
b = bytes_sp2
pos = start
if pos > len(sp1): return -1
return next_cpg( a, b, pos )
def find_cpg_restricted( sp1, sp2, start ):
cdef char* a
cdef char* b
cdef int pos
if PY_MAJOR_VERSION >= 3:
bytes_sp1, bytes_sp2 = sp1.encode(), sp2.encode()
else:
bytes_sp1, bytes_sp2 = sp1, sp2
a = bytes_sp1
b = bytes_sp2
pos = start
if pos > len(sp1): return -1
return next_cpg_restricted( a, b, pos )
def find_non_cpg( sp1, sp2, start ):
cdef char* a
cdef char* b
cdef int pos
if PY_MAJOR_VERSION >= 3:
bytes_sp1, bytes_sp2 = sp1.encode(), sp2.encode()
else:
bytes_sp1, bytes_sp2 = sp1, sp2
a = bytes_sp1
b = bytes_sp2
pos = start
if pos > len(sp1): return -1
return next_non_cpg( a, b, pos )
def list_cpg( sp1, sp2 ):
cdef char * a
cdef char * b
cdef int start
if PY_MAJOR_VERSION >= 3:
bytes_sp1, bytes_sp2 = sp1.encode(), sp2.encode()
else:
bytes_sp1, bytes_sp2 = sp1, sp2
a = bytes_sp1
b = bytes_sp2
start = 0
cpglist = list()
while start > -1 and start < len(sp1):
start = next_cpg( a, b, start )
if start == -1: break
cpglist.append(start)
start = start + 1
return cpglist
def list_cpg_restricted( sp1, sp2 ):
cdef char * a
cdef char * b
cdef int start
if PY_MAJOR_VERSION >= 3:
bytes_sp1, bytes_sp2 = sp1.encode(), sp2.encode()
else:
bytes_sp1, bytes_sp2 = sp1, sp2
a = bytes_sp1
b = bytes_sp2
start = 0
cpglist = list()
while start > -1 and start < len(sp1):
start = next_cpg_restricted( a, b, start )
if start == -1: break
cpglist.append(start)
start = start + 1
return cpglist
def list_non_cpg( sp1, sp2 ):
cdef char * a
cdef char * b
cdef int start
if PY_MAJOR_VERSION >= 3:
bytes_sp1, bytes_sp2 = sp1.encode(), sp2.encode()
else:
bytes_sp1, bytes_sp2 = sp1, sp2
a = bytes_sp1
b = bytes_sp2
start = 0
cpglist = list()
while start > -1 and start < len(sp1):
start = next_non_cpg( a, b, start )
if start == -1: break
cpglist.append(start)
start = start + 1
return cpglist
def remove_gaps( sp, cpglist ):
for item in cpglist:
if sp[item] == '-':
cpglist.remove(item)
return cpglist
|