1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
|
#!/usr/bin/env python3
"""
Creates a random graph for testing
"""
import argparse
import sys
import random
op = argparse.ArgumentParser(description=__doc__)
op.add_argument("--segments", "-s", type=int,
help="number of segments", required=True)
op.add_argument("--slen", "-l", type=int, default=100,
help="lenght of segments sequence")
op.add_argument("--with-sequence", "-w", action="store_true")
op.add_argument("--dovetails-per-segment", "-d",
help="average number of dovetail edges per segment",
default=2.0, type=float)
op.add_argument('--gfa-version', "-g", default="gfa1",
help="gfa version", choices=("gfa1", "gfa2"))
op.add_argument('--version', action='version', version='%(prog)s 1.0')
opts = op.parse_args()
if opts.segments < 0:
sys.stderr.write("Error: the number of segments must be "+
">= 0 ({})\n".format(opts.segments))
exit(1)
if opts.dovetails_per_segment < 0:
sys.stderr.write("Error: the average number of dovetails per segment must "+
"be >= 0 ({})\n".format(opts.dovetails_per_segment))
exit(1)
if opts.slen <= 0:
sys.stderr.write("Error: the length of segments sequence must be > 0"+
" ({})\n".format(opts.slen))
exit(1)
if opts.gfa_version == "gfa1":
print("H\tVN:Z:1.0")
else:
print("H\tVN:Z:2.0")
def random_sequence(slen):
sequence = []
for i in range(slen):
sequence.append(random.choice('ACGT'))
return "".join(sequence)
for i in range(opts.segments):
if opts.with_sequence:
sequence = random_sequence(opts.slen)
else:
sequence = "*"
if opts.gfa_version == "gfa1":
print("S\ts{}\t{}\tLN:i:{}".format(i, sequence, opts.slen))
else:
print("S\ts{}\t{}\t{}".format(i, opts.slen, sequence))
n_dovetails = int(opts.segments * opts.dovetails_per_segment)
edges = {}
for i in range(n_dovetails):
edge = False
while not edge:
s_from = random.randint(0, opts.segments-1)
s_from_or = random.choice('+-')
s_to = random.randint(0, opts.segments-1)
s_to_or = random.choice('+-')
if s_from not in edges:
edges[s_from] = {'+': {}, '-': {}}
if s_to not in edges[s_from][s_from_or]:
edges[s_from][s_from_or][s_to] = {'+': False, '-': False}
if not edges[s_from][s_from_or][s_to][s_to_or]:
edges[s_from][s_from_or][s_to][s_to_or] = True
edge = True
ovlen = opts.slen//10
if ovlen == 0: ovlen = 1
cigar = "{}M".format(ovlen)
if opts.gfa_version == "gfa1":
print("L\ts{}\t{}\ts{}\t{}\t{}\tID:Z:e{}".format(s_from, s_from_or, s_to,
s_to_or, cigar, i))
else:
s_from_begin = opts.slen - ovlen if s_from_or == "+" else 0
s_from_end = "{}$".format(opts.slen) if s_from_or == "+" else ovlen
s_to_begin = opts.slen - ovlen if s_to_or == "-" else 0
s_to_end = "{}$".format(opts.slen) if s_to_or == "-" else ovlen
print("E\te{}\ts{}{}\ts{}{}\t{}\t{}\t{}\t{}\t{}".format(
i, s_from, s_from_or, s_to, s_to_or, s_from_begin, s_from_end,
s_to_begin, s_to_end, cigar))
|