File: modify.py

package info (click to toggle)
pbsuite 15.8.24%2Bdfsg-8
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,508 kB
  • sloc: python: 10,988; sh: 152; xml: 21; makefile: 14
file content (35 lines) | stat: -rw-r--r-- 1,090 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import sys, random
from pbsuite.utils.FileHandlers import FastaFile, revComp, wrap

def getRandomSeq(length):
    return "".join([random.choice(['A', 'T', 'C', 'G']) for i in xrange(length)])
    
if __name__ == '__main__':
    fasta = FastaFile(sys.argv[1])
    key = fasta.keys()[0]
    ref = list(fasta[key])
    
    #800bp insertion in the sample (deletion in the reference) 
    ref[5000:5800] = ""
    #5000 Insertion

    #Inversion in the sample (inversion in the reference) tails
    ref[9000:12000] = list("".join(ref[10000:13000]).translate(revComp)[::-1])
    #9000-12000 - INversion
    
    #1kb deletion in sample (insert into the reference) tails
    seq = getRandomSeq(1000)
    ref[20000:20000] = list(seq)
    #20000-21000 -- Deletion 
    
    #100bp insertion in sample (deletion in the reference) spots
    ref[30000:30100] = ""
    #30000 - Insertion

    #200bp deletion in sample (insert into the reference) spots
    seq = getRandomSeq(200)
    ref[35000:35000] = list(seq)
    
    #35000 - 35200 -- Deletion
    print ">%s\n%s" % (key, wrap("".join(ref)))