1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
|
/*****************************************************************************
nucBedMain.cpp
(c) 2009 - Aaron Quinlan
Hall Laboratory
Department of Biochemistry and Molecular Genetics
University of Virginia
aaronquinlan@gmail.com
Licenced under the GNU General Public License 2.0 license.
******************************************************************************/
#include "nucBed.h"
#include "version.h"
using namespace std;
// define our program name
#define PROGRAM_NAME "bedtools nuc"
// define our parameter checking macro
#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
// function declarations
void nuc_help(void);
int nuc_main(int argc, char* argv[]) {
// our configuration variables
bool showHelp = false;
// input files
string fastaDbFile;
string bedFile;
string pattern;
// checks for existence of parameters
bool haveFastaDb = false;
bool haveBed = false;
bool printSeq = false;
bool hasPattern = false;
bool forceStrand = false;
bool ignoreCase = false;
bool useFullHeader = false;
// check to see if we should print out some help
if(argc <= 1) showHelp = true;
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
(PARAMETER_CHECK("--help", 5, parameterLength))) {
showHelp = true;
}
}
if(showHelp) nuc_help();
// do some parsing (all of these parameters require 2 strings)
for(int i = 1; i < argc; i++) {
int parameterLength = (int)strlen(argv[i]);
if(PARAMETER_CHECK("-fi", 3, parameterLength)) {
if ((i+1) < argc) {
haveFastaDb = true;
fastaDbFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
if ((i+1) < argc) {
haveBed = true;
bedFile = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-seq", 4, parameterLength)) {
printSeq = true;
}
else if(PARAMETER_CHECK("-s", 2, parameterLength)) {
forceStrand = true;
}
else if(PARAMETER_CHECK("-C", 2, parameterLength)) {
ignoreCase = true;
}
else if(PARAMETER_CHECK("-pattern", 8, parameterLength)) {
if ((i+1) < argc) {
hasPattern = true;
pattern = argv[i + 1];
i++;
}
}
else if(PARAMETER_CHECK("-fullHeader", 11, parameterLength)) {
useFullHeader = true;
}
else {
cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
showHelp = true;
}
}
if (!haveFastaDb || !haveBed) {
showHelp = true;
}
if (!showHelp) {
NucBed *nuc = new NucBed(fastaDbFile, bedFile, printSeq,
hasPattern, pattern, forceStrand, ignoreCase,
useFullHeader);
delete nuc;
}
else {
nuc_help();
}
return 0;
}
void nuc_help(void) {
cerr << "\nTool: bedtools nuc (aka nucBed)" << endl;
cerr << "Version: " << VERSION << "\n";
cerr << "Summary: Profiles the nucleotide content of intervals in a fasta file." << endl << endl;
cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf>" << endl << endl;
cerr << "Options: " << endl;
cerr << "\t-fi\tInput FASTA file" << endl << endl;
cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl << endl;
cerr << "\t-s\tProfile the sequence according to strand." << endl << endl;
cerr << "\t-seq\tPrint the extracted sequence" << endl << endl;
cerr << "\t-pattern\tReport the number of times a user-defined sequence" << endl;
cerr << "\t\t\tis observed (case-sensitive)." << endl << endl;
cerr << "\t-C\tIgnore case when matching -pattern. By defaulty, case matters." << endl << endl;
cerr << "\t-fullHeader\tUse full fasta header." << endl;
cerr << "\t\t- By default, only the word before the first space or tab "
<< "is used." << endl << endl;
cerr << "Output format: " << endl;
cerr << "\tThe following information will be reported after each BED entry:" << endl;
cerr << "\t 1) %AT content" << endl;
cerr << "\t 2) %GC content" << endl;
cerr << "\t 3) Number of As observed" << endl;
cerr << "\t 4) Number of Cs observed" << endl;
cerr << "\t 5) Number of Gs observed" << endl;
cerr << "\t 6) Number of Ts observed" << endl;
cerr << "\t 7) Number of Ns observed" << endl;
cerr << "\t 8) Number of other bases observed" << endl;
cerr << "\t 9) The length of the explored sequence/interval." << endl;
cerr << "\t 10) The seq. extracted from the FASTA file. (opt., if -seq is used)" << endl;
cerr << "\t 11) The number of times a user's pattern was observed." << endl;
cerr << "\t (opt., if -pattern is used.)" << endl << endl;
// end the program here
exit(1);
}
|