File: choose_bins.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (20 lines) | stat: -rwxr-xr-x 622 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/usr/bin/python3

import re
import sys

from common import contig_length
import numpy
import pandas
from pandas import DataFrame

min_len = int(sys.argv[1])
in_fn = sys.argv[2]
d = pandas.read_table(in_fn, names=["name", "bin"], dtype=str)
d["group"] = d.apply(lambda row: re.findall("\\w+\\d+", row["name"])[0], axis=1)
d["length"] = d.apply(lambda row: contig_length(row["name"]), axis=1)
del d["name"]
info = d.groupby(["bin", "group"], as_index=False).sum()
info = info.groupby("bin", as_index=False)["length"].max()
info = info[info["length"] > min_len]
info.to_csv(sys.stdout, sep="\t", header=False, index=False)