File: fqGen.py

package info (click to toggle)
python-screed 1.1.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 820 kB
  • sloc: python: 3,356; makefile: 169; sh: 32; javascript: 16
file content (82 lines) | stat: -rwxr-xr-x 2,276 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python
# Copyright (c) 2016, The Regents of the University of California.

import sys, os
import random

seqLength = 37

class collectionOFiles(object):
    def __init__(self, baseName, divisions, totalSize):
        self.baseName = baseName
        self.divisions = divisions
        self.totalSize = totalSize

        self.fileHandles = {}
        for i in range(0, divisions):
            filename = self.baseName + "_%d" % i
            fh = open(filename, "wb")
            divisor = 2 ** i

            self.fileHandles[filename]= (fh, self.totalSize/divisor, 0)

    def writeRecord(self, name, sequence, quality):
        toRemove = []
        for filename in self.fileHandles:
            file, limit, count = self.fileHandles[filename]
            file.write("%s\n%s\n+\n%s\n" % (name, sequence, quality))
            count += 1
            if count >= limit:
                file.close()
                toRemove.append(filename)
            else:
                self.fileHandles[filename] = (file, limit, count)

        for fh in toRemove:
            self.fileHandles.pop(fh)

    def finished(self):
        return len(self.fileHandles) == 0


def genSeq(length):
    """
    Generates a sequence with length characters
    """
    choices = ['A','T','C','G']
    result = []
    for i in range(0, length):
        result.append(random.choice(choices))
    return "".join(result)

def genAcc(length):
    """
    Generates a quality with length characters
    """
    choices = ['A','1','7','3','.',';','*','<']
    result = []
    for i in range(0, length):
        result.append(random.choice(choices))
    return "".join(result)

def createFastqFiles(filename, size, divisions):
    cof = collectionOFiles(filename, divisions, size)
    counter = 0
    while(not cof.finished()):
        name = "@HWI-EAS_4_PE-F%d" % counter
        sequence = genSeq(seqLength)
        quality = genAcc(seqLength)
        cof.writeRecord(name, sequence, quality)
        counter += 1
    return

if __name__ == '__main__':
    if len(sys.argv) != 4:
        print "Usage: <filename> <size> <divisions>"
        exit(1)

    filename = sys.argv[1]
    size = int(sys.argv[2])
    divisions = int(sys.argv[3])

    createFastqFiles(filename, size, divisions)