1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
|
"""
Read sequence lengths from a file. Each line is of the form <name> <length>
where <name> is typically a chromsome name (e.g. chr12) and length is the
number of bases the sequence.
"""
def read_lengths_file(name):
"""
Returns a hash from sequence name to length.
"""
chrom_to_length = {}
f = open(name)
for line in f:
line = line.strip()
if line == "" or line[0] == "#":
continue
try:
fields = line.split()
if len(fields) != 2:
raise
chrom = fields[0]
length = int(fields[1])
except Exception:
raise ValueError(f"bad length file line: {line}")
if chrom in chrom_to_length and length != chrom_to_length[chrom]:
raise ValueError(f"{chrom} has more than one length!")
chrom_to_length[chrom] = length
f.close()
return chrom_to_length
|