File: readlengths.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (31 lines) | stat: -rw-r--r-- 917 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
"""
Read sequence lengths from a file.  Each line is of the form <name> <length>
where <name> is typically a chromsome name (e.g. chr12) and length is the
number of bases the sequence.
"""


def read_lengths_file(name):
    """
    Returns a hash from sequence name to length.
    """

    chrom_to_length = {}
    f = open(name)
    for line in f:
        line = line.strip()
        if line == "" or line[0] == "#":
            continue
        try:
            fields = line.split()
            if len(fields) != 2:
                raise
            chrom = fields[0]
            length = int(fields[1])
        except Exception:
            raise ValueError(f"bad length file line: {line}")
        if chrom in chrom_to_length and length != chrom_to_length[chrom]:
            raise ValueError(f"{chrom} has more than one length!")
        chrom_to_length[chrom] = length
    f.close()
    return chrom_to_length