1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
|
"""
Support for masking potential CpG sites in *pairwise* alignments.
"""
from bx.align.sitemask import Masker
from ._cpg import (
list_cpg,
list_cpg_restricted,
list_non_cpg,
)
# Restricted. Only mask out sites that are defitely CpG
class Restricted(Masker):
def __init__(self, mask="?"):
self.mask = mask
self.masked = 0
self.total = 0
def __call__(self, block):
if not block:
return block
if len(block.components) < 2:
return
cpglist = list_cpg_restricted(block.components[0].text.upper(), block.components[1].text.upper())
# now we have a fast list of CpG columns, iterate/mask
self.masked += len(cpglist)
self.total += len(block.components[0].text)
for component in block.components:
component.text = mask_columns(cpglist, component.text, self.mask)
return block
# Inclusive. Mask out all sites that are not non-CpG sites.
class Inclusive(Masker):
def __init__(self, mask="?"):
self.mask = mask
self.masked = 0
self.total = 0
def __call__(self, block):
if not block:
return block
if len(block.components) < 2:
return
cpglist = list_cpg(block.components[0].text.upper(), block.components[1].text.upper())
self.masked += len(cpglist)
self.total += len(block.components[0].text)
for component in block.components:
component.text = mask_columns(cpglist, component.text, self.mask)
return block
# Mak nonCpG sites
class nonCpG(Masker):
def __init__(self, mask="?"):
self.mask = mask
self.masked = 0
self.total = 0
def __call__(self, block):
if not block:
return block
if len(block.components) < 2:
return
noncpglist = list_non_cpg(block.components[0].text.upper(), block.components[1].text.upper())
# now we have a fast list of non-CpG columns, iterate/mask
self.masked += len(noncpglist)
self.total += len(block.components[0].text)
for component in block.components:
component.text = mask_columns(noncpglist, component.text, self.mask)
return block
def mask_columns(masklist, text, mask):
templist = []
for position in masklist:
if text[position] != "-":
templist.append(position)
templist.append(len(text)) # Add the end of the text
# cut string
newtext = []
c = 0
for position in templist:
newtext.append(text[c:position])
c = position + 1 # Gaps have len = 1
joinedtext = mask.join(newtext)
return joinedtext
|