File: sv_simulator.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (55 lines) | stat: -rw-r--r-- 1,775 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/python3

############################################################################
# Copyright (c) 2015 Saint Petersburg State University
# Copyright (c) 2011-2014 Saint Petersburg Academic University
# All Rights Reserved
# See file LICENSE for details.
############################################################################

import sys
import os
from Bio import SeqIO

#### for MODE 1 ####
# deletion
del_start = 20000
del_len = 1500
# copy
copy_start = 40000
copy_len = 500
# relocation
rel_start = 60000
rel_len = 2000
rel_new_start = 80000
####

#### for MODE 2 ####
trans_to = 50000
trans_from = 40000
trans_len = 10000
####

if len(sys.argv) < 2:
    print('Usage: ' + sys.argv[0] + ' reference.fasta [reference2.fasta]')

in_fpath = sys.argv[1]
original = SeqIO.read(open(in_fpath), "fasta")
modified = original

if len(sys.argv) == 2:  # MODE1
    out_fpath = os.path.splitext(in_fpath)[0] + '.sv' + os.path.splitext(in_fpath)[1]
    modified._seq = modified.seq[:rel_start] + modified.seq[rel_start + rel_len:rel_new_start] + modified.seq[rel_start:rel_start + rel_len] + modified.seq[rel_new_start]
    modified._seq = modified.seq[:copy_start + copy_len] + modified.seq[copy_start:copy_start + copy_len] + modified.seq[copy_start + copy_len:]
    modified._seq = modified.seq[:del_start] + modified.seq[del_start + del_len:]

else:  # MODE2
    out_fpath = os.path.splitext(in_fpath)[0] + '.trans' + os.path.splitext(in_fpath)[1]
    in2_fpath = sys.argv[2]
    original2 = SeqIO.read(open(in2_fpath), "fasta")
    modified._seq = original.seq[:trans_to] + original2.seq[trans_from: trans_from + trans_len] +  original.seq[trans_to:]

output_handle = open(out_fpath, "w")
SeqIO.write([modified], output_handle, "fasta")
output_handle.close()