File: predictions.py

package info (click to toggle)
python-seqcluster 1.2.9%2Bds-3
  • links: PTS, VCS
  • area: contrib
  • in suites: bookworm
  • size: 113,624 kB
  • sloc: python: 5,308; makefile: 184; sh: 122; javascript: 55
file content (90 lines) | stat: -rw-r--r-- 2,961 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""Implementation of open source tools to predict small RNA functions"""

from os import path as op
import os
import shutil

from seqcluster.libs.utils import chdir, safe_dirs
from seqcluster.libs import utils, logger as mylog
# import seqcluster.libs.logger as mylog
from seqcluster.libs.read import get_loci_fasta, make_temp_directory
from seqcluster.libs.do import run
from seqcluster.function import coral

logger = mylog.getLogger(__name__)


def run_coral(clus_obj, out_dir, args):
    """
    Run some CoRaL modules to predict small RNA function
    """
    if not args.bed:
        raise ValueError("This module needs the bed file output from cluster subcmd.")
    workdir = op.abspath(op.join(args.out, 'coral'))
    safe_dirs(workdir)
    bam_in = op.abspath(args.bam)
    bed_in = op.abspath(args.bed)
    reference = op.abspath(args.ref)
    with chdir(workdir):
        bam_clean = coral.prepare_bam(bam_in, bed_in)
        out_dir = op.join(workdir, "regions")
        safe_dirs(out_dir)
        prefix = "seqcluster"
        loci_file = coral.detect_regions(bam_clean, bed_in, out_dir, prefix)
        coral.create_features(bam_clean, loci_file, reference, out_dir)


def is_tRNA(clus_obj, out_dir, args):
    """
    Iterates through cluster precursors to predict sRNA types
    """
    ref = os.path.abspath(args.reference)
    utils.safe_dirs(out_dir)
    for nc in clus_obj[0]:
        c = clus_obj[0][nc]
        loci = c['loci']
        out_fa = "cluster_" + nc
        if loci[0][3] - loci[0][2] < 500:
            with make_temp_directory() as tmpdir:
                os.chdir(tmpdir)
                get_loci_fasta({loci[0][0]: [loci[0][0:5]]}, out_fa, ref)
                summary_file, str_file = _run_tRNA_scan(out_fa)
                if "predictions" not in c:
                    c['predictions'] = {}
                c['predictions']['tRNA'] = _read_tRNA_scan(summary_file)
                score = _read_tRNA_scan(summary_file)
                logger.debug(score)
                shutil.move(summary_file, op.join(out_dir, summary_file))
                shutil.move(str_file, op.join(out_dir, str_file))
        else:
            c['errors'].add("precursor too long")
        clus_obj[0][nc] = c

    return clus_obj


def _read_tRNA_scan(summary_file):
    """
    Parse output from tRNA_Scan
    """
    score = 0
    if os.path.getsize(summary_file) == 0:
        return 0
    with open(summary_file) as in_handle:
        # header = in_handle.next().strip().split()
        for line in in_handle:
            if not line.startswith("--"):
                pre = line.strip().split()
                score = pre[-1]
    return score


def _run_tRNA_scan(fasta_file):
    """
    Run tRNA-scan-SE to predict tRNA
    """
    out_file = fasta_file + "_trnascan"
    se_file = fasta_file + "_second_str"
    cmd = "tRNAscan-SE -q -o {out_file} -f {se_file} {fasta_file}"
    run(cmd.format(**locals()))
    return out_file, se_file