1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
import subprocess
import ost
from ost import settings
def createdb(infasta, resultDB, exe_path=None):
"""
Convert fasta files containing query and/or target sequences into a mmseqs2 database.
:param infasta: The fasta file from which the mmseqs2 database will be created.
:type infasta: :class:`string`
:param resultDB: The output location for mmseqs2 database.
:type resultDB: :class:`string`
:param exe_path: The path where mmseqs2 executable is located
:type exe_path: :class:`string`
"""
mmseqs2_exe = settings.Locate('mmseqs2', explicit_file_name=exe_path)
args=[mmseqs2_exe, 'createdb', infasta, resultDB]
ost.LogInfo(f"running MMseqs2 {' '.join(args)}")
mmseqs2_pipe=subprocess.run(args)
def create_index(trg_db, exe_path=None, directory=None):
"""
An index file of the targetDB is computed for a fast read-in.
It is recommended to compute the index if the targetDB is reused for several searches.
A directory for temporary files is generated.
It is recommended to create this temporary folder on a local drive.
:param trg_db: The target database mmseqs2 file.
(You need to initially convert a fasta file into a mmseqs2 database using createdb).
:type trg_db: :class:`string`
:param exe_path: The path where mmseqs2 executable is located.
:type exe_path: :class:`string'
:param directory: The directory for temperary files.
:type directory: :class:`string`
"""
mmseqs2_exe = settings.Locate('mmseqs2', explicit_file_name=exe_path)
args=[mmseqs2_exe, 'createindex', trg_db, directory]
ost.LogInfo(f"running MMseqs2 {' '.join(args)}")
mmseqs2_pipe=subprocess.run(args)
def alignment(query_db, trg_db, resultDB, directory, resultDB_m8, sen=None, exe_path=None,
start_sens=None, sens_steps=None, fmt=None):
"""
The alignment consists of two steps the prefilter and alignment.
:param query_db: The query database mmseqs2 file.
(You need to initially convert a fasta file into a mmseqs2 database using createdb).
:type query_db: :class:`string`
:param trg_db: The target database mmseqs2 file.
(You need to initially convert a fasta file into a mmseqs2 database using createdb).
:type trg_db: :class:`string`
:param resultDB: The output location.
(Output of createdb)
:type resultDB: :class:`string`
:param exe_path: The path where mmseqs2 executable is located.
:type exe_path: :class:`string`
:param directory: The directory for temperary files.
:type directory: :class:`string`
:param sen: It controls the speed and sensitivity of the search.
A very fast search would use a sensitivity of 1.0,
while a very sensitive search would use a sensitivity of up to 7.0.
:type sen: :class:`float`
:param start_sens: Best hit fast. The lowest sensitivity is defined with --start-sens.
:type start_sens: :class:`int`
:param sens_steps: Best hit fast.
The number of steps to reach the highest sensitivity can be defined with --sens-steps.
:type sens_steps: :class:`int`
Convert the result database into a BLAST tab formatted file.
The file is formatted as a tab-separated list with 12 columns:
(1,2) identifiers for query and target sequences/profiles,
(3) sequence identity,
(4) alignment length,
(5) number of mismatches,
(6) number of gap openings,
(7-8, 9-10) domain start and end-position in query and in target,
(11) E-value,
and (12) bit score.
The option --format-output defines a custom output format.
The fields that are supported can be found in the following link:
https://github.com/soedinglab/mmseqs2/wiki#custom-alignment-format-with-convertalis
:param resultDB_m8: The output location
:type resultDB_m8: :class:`string`
:param fmt: Format output type, if the default is not used.
:type fmt: :class:`string`
"""
mmseqs2_exe = settings.Locate('mmseqs2', explicit_file_name=exe_path)
command=[mmseqs2_exe, 'search', query_db, trg_db, resultDB, directory, '-a']
if sen:
sen=str(sen)
command.append('-s')
command.append(sen)
if start_sens and sens_steps:
start_sens=str(start_sens)
command.append('--start-sens')
command.append(start_sens)
sens_steps=str(sens_steps)
command.append('--sens-steps')
command.append(sens_steps)
ost.LogInfo(f"running MMseqs2 {' '.join(command)}")
mmseqs2_pipe=subprocess.run(command)
args=[mmseqs2_exe, 'convertalis', query_db, trg_db, resultDB, resultDB_m8]
if fmt:
args.append('--format-output')
args.append(fmt)
ost.LogInfo(f"running MMseqs2 (' '.join(args))")
mmseqs2_pipe=subprocess.run(args)
|