1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
#include "dialign.h"
#include "config.h"
#include "Common/Options.h"
#include "FastaReader.h"
#include "IOUtil.h"
#include "Uncompress.h"
#include "alignGlobal.h"
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <getopt.h>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
using namespace std;
#define PROGRAM "abyss-align"
static const char VERSION_MESSAGE[] =
PROGRAM " (" PACKAGE_NAME ") " VERSION "\n"
"Written by Shaun Jackman.\n"
"\n"
"Copyright 2014 Canada's Michael Smith Genome Sciences Centre\n";
static const char USAGE_MESSAGE[] =
"Usage: " PROGRAM " [OPTION]... [FASTA]...\n"
"Align multiple sequences globally using either Needleman-Wunsch\n"
"or DIALIGN-TX. Groups of sequences may be separated using `#.'\n"
"\n"
" Arguments:\n"
"\n"
" FASTA sequences in FASTA format\n"
"\n"
" Options:\n"
"\n"
" -v, --verbose display verbose output\n"
" --help display this help and exit\n"
" --version output version information and exit\n"
"\n"
" DIALIGN-TX options:\n"
"\n"
" -D, --dialign-d=N dialign debug level, default: 0\n"
" -M, --dialign-m=FILE score matrix, default: dna_matrix.scr\n"
" -P, --dialign-p=FILE diagonal length probability distribution\n"
" default: dna_diag_prob_100_exp_550000\n"
"\n"
"Report bugs to <" PACKAGE_BUGREPORT ">.\n";
namespace opt {
static int dialign_debug;
static string dialign_score;
static string dialign_prob;
}
static const char shortopts[] = "v";
enum { OPT_HELP = 1, OPT_VERSION };
static const struct option longopts[] = {
{ "verbose", no_argument, NULL, 'v' },
{ "help", no_argument, NULL, OPT_HELP },
{ "version", no_argument, NULL, OPT_VERSION },
{ "dialign-d", required_argument, NULL, 'D' },
{ "dialign-m", required_argument, NULL, 'M' },
{ "dialign-p", required_argument, NULL, 'P' },
{ NULL, 0, NULL, 0 }
};
/** Align two sequences using the Needlman-Wunsch algorithm.
*/
static void alignPair(const string& seq0, const string& seq1,
ostream& out)
{
NWAlignment align;
unsigned match = alignGlobal(seq0, seq1, align);
float identity = (float)match / align.size();
out << align << identity << "\n\n";
}
/** Align multiple sequences using DIALIGN-TX. */
static void alignMulti(const vector<string>& seq, ostream& out)
{
unsigned match;
string alignment;
string consensus = dialign(seq, alignment, match);
float identity = (float)match / consensus.size();
out << alignment << consensus << '\n' << identity << "\n\n";
}
/** Align the specified sequences. */
static void align(const vector<string>& seq, ostream& out)
{
switch (seq.size()) {
case 0:
return;
case 1:
out << seq.front() << '\n' << 1 << "\n\n";
return;
case 2:
return alignPair(seq[0], seq[1], out);
default:
return alignMulti(seq, out);
}
}
/** Align multiple sequences. */
static void alignFile(const char* path) {
if (opt::verbose > 0)
cerr << "Aligning `" << path << "'\n";
FastaReader in(path, FastaReader::NO_FOLD_CASE);
for (vector<string> seq; in;) {
seq.clear();
FastaRecord fa;
if (in >> fa)
seq.push_back(fa.seq);
while (in.peek() == '>' && in >> fa)
seq.push_back(fa.seq);
align(seq, cout);
}
assert(in.eof());
}
int main(int argc, char** argv)
{
bool die = false;
for (int c; (c = getopt_long(argc, argv,
shortopts, longopts, NULL)) != -1;) {
istringstream arg(optarg != NULL ? optarg : "");
switch (c) {
case '?': die = true; break;
case 'D': arg >> opt::dialign_debug; break;
case 'M': arg >> opt::dialign_score; break;
case 'P': arg >> opt::dialign_prob; break;
case 'v': opt::verbose++; break;
case OPT_HELP:
cout << USAGE_MESSAGE;
exit(EXIT_SUCCESS);
case OPT_VERSION:
cout << VERSION_MESSAGE;
exit(EXIT_SUCCESS);
}
if (optarg != NULL && !arg.eof()) {
cerr << PROGRAM ": invalid option: `-"
<< (char)c << optarg << "'\n";
exit(EXIT_FAILURE);
}
}
if (die) {
cerr << "Try `" << PROGRAM
<< " --help' for more information.\n";
exit(EXIT_FAILURE);
}
// Initialize dialign.
init_parameters();
set_parameters_dna();
para->DEBUG = opt::dialign_debug;
para->SCR_MATRIX_FILE_NAME = (char*)opt::dialign_score.c_str();
para->DIAG_PROB_FILE_NAME = (char*)opt::dialign_prob.c_str();
initDialign();
if (optind < argc)
for_each(&argv[optind], &argv[argc], alignFile);
else
alignFile("-");
return 0;
}
|