Description: Fix spelling errors in various generated binaries
Author: Lance Lin <lq27267@gmail.com>
Date: 2023-08-29

--- stacks.orig/src/kmer_filter.cc
+++ stacks/src/kmer_filter.cc
@@ -1011,7 +1011,7 @@
         parse_tsv(line, parts);
 
         if (parts.size() != 2) {
-            cerr << "kmer frequencies are not formated correctly: expecting two, tab separated columns, found " << parts.size() << ".\n";
+            cerr << "kmer frequencies are not formatted correctly: expecting two, tab separated columns, found " << parts.size() << ".\n";
             exit(1);
         }
 
@@ -1508,7 +1508,7 @@
         return 0;
     }
 
-    cerr << "Outputing details to log: '" << log_path << "'\n\n";
+    cerr << "Outputting details to log: '" << log_path << "'\n\n";
 
     log << "File\t"
         << "Retained Reads\t"
@@ -1816,14 +1816,14 @@
               << "  o: path to output the processed files.\n"
               << "  y: output type, either 'fastq' or 'fasta' (default fastq).\n"
               << "  D: capture discarded reads to a file.\n"
-              << "  h: display this help messsage.\n\n"
+              << "  h: display this help message.\n\n"
               << "  Filtering options:\n"
               << "    --rare: turn on filtering based on rare k-mers.\n"
               << "    --abundant: turn on filtering based on abundant k-mers.\n"
               << "    --k-len <len>: specify k-mer size (default 15).\n\n"
               << "  Advanced filtering options:\n"
               << "    --max-k-freq <value>: specify the number of times a kmer must occur to be considered abundant (default 20,000).\n"
-              << "    --min-lim <value>: specify number of rare kmers occuring in a row required to discard a read (default 80% of the k-mer length).\n"
+              << "    --min-lim <value>: specify number of rare kmers occurring in a row required to discard a read (default 80% of the k-mer length).\n"
               << "    --max-lim <value>: specify number of abundant kmers required to discard a read (default 80% of the k-mers in a read).\n\n"
               << "  Normalize data:\n"
               << "    --normalize <depth>: normalize read depth according to k-mer coverage.\n\n"
--- stacks.orig/src/process_shortreads.cc
+++ stacks/src/process_shortreads.cc
@@ -684,7 +684,7 @@
         return 0;
     }
 
-    cerr << "Outputing details to log: '" << log_path << "'\n\n";
+    cerr << "Outputting details to log: '" << log_path << "'\n\n";
 
     init_log(log, argc, argv);
 
@@ -1146,7 +1146,7 @@
               << "  D: capture discarded reads to a file.\n"
               << "  w: set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15).\n"
               << "  s: set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10).\n"
-              << "  h: display this help messsage.\n\n"
+              << "  h: display this help message.\n\n"
               << "  Barcode options:\n"
               << "    --inline-null:   barcode is inline with sequence, occurs only on single-end read (default).\n"
               << "    --index-null:    barcode is provded in FASTQ header (Illumina i5 or i7 read).\n"
--- stacks.orig/src/sstacks.cc
+++ stacks/src/sstacks.cc
@@ -1247,7 +1247,7 @@
     uint         match_depth;
     stringstream sstr;
 
-    cerr << "Outputing to file " << out_file << "\n";
+    cerr << "Outputting to file " << out_file << "\n";
 
     for (i = sample.begin(); i != sample.end(); i++) {
         qloc = i->second;
--- stacks.orig/src/clone_filter.cc
+++ stacks/src/clone_filter.cc
@@ -1328,7 +1328,7 @@
               << "  o: path to output the processed files.\n"
               << "  y: output type, either 'fastq', 'fasta', 'gzfasta', or 'gzfastq' (default same as input type).\n"
               << "  D: capture discarded reads to a file.\n"
-              << "  h: display this help messsage.\n"
+              << "  h: display this help message.\n"
               << "  --oligo-len-1 len: length of the single-end oligo sequence in data set.\n"
               << "  --oligo-len-2 len: length of the paired-end oligo sequence in data set.\n"
               << "  --retain-oligo: do not trim off the random oligo sequence (if oligo is inline).\n\n"
--- stacks.orig/src/phasedstacks.cc
+++ stacks/src/phasedstacks.cc
@@ -1738,14 +1738,14 @@
         parse_tsv(line, parts);
 
         if (parts.size() != 2) {
-            cerr << "Population map is not formated correctly: expecting two, tab separated columns, found " << parts.size() << ".\n";
+            cerr << "Population map is not formatted correctly: expecting two, tab separated columns, found " << parts.size() << ".\n";
             return 0;
         }
 
         strncpy(pop_id_str, parts[1].c_str(), id_len);
         for (int i = 0; i < id_len && pop_id_str[i] != '\0'; i++)
             if (!isdigit(pop_id_str[i])) {
-                cerr << "Population map is not formated correctly: expecting numerical ID in second column, found '" << parts[1] << "'.\n";
+                cerr << "Population map is not formatted correctly: expecting numerical ID in second column, found '" << parts[1] << "'.\n";
                 return 0;
             }
 
@@ -1953,7 +1953,7 @@
               << "  p: number of processes to run in parallel sections of code.\n"
               << "  M: path to the population map, a tab separated file describing which individuals belong in which population.\n"
               << "  v: print program version." << "\n"
-              << "  h: display this help messsage." << "\n"
+              << "  h: display this help message." << "\n"
               << "  --haplotypes: data were phased as RAD locus haplotypes.\n"
               << "  --dprime-bin-size: size of buckets for binning SNPs at a particular distance to calculate the mean D' value.\n"
               << "  --dprime-threshold <val>: if D' values fall above <val>, set the D' to 1, otherwise set D' to 0.\n\n"
--- stacks.orig/src/populations.cc
+++ stacks/src/populations.cc
@@ -720,7 +720,7 @@
         do {
             int rv = this->_fasta_reader.next_seq(seq);
             if (rv == 0) {
-                cerr << "Error: catalog VCF and FASTA files are discordant, maybe trucated. rv: "
+                cerr << "Error: catalog VCF and FASTA files are discordant, maybe truncated. rv: "
                      << rv << "; cloc_id: " << cloc_id << "\n";
                 throw exception();
             }
@@ -4270,7 +4270,7 @@
     }
 
     if (merge_sites == true && enz.length() == 0) {
-        cerr << "Error: You must specify the restriction enzyme associated with this data set to merge overlaping cutsites.\n";
+        cerr << "Error: You must specify the restriction enzyme associated with this data set to merge overlapping cutsites.\n";
         help();
     }
 
@@ -4297,7 +4297,7 @@
          << "populations -P dir [-O dir] [-M popmap] (filters) [--fstats] [-k [--sigma=150000] [--bootstrap [-N 100]]] (output formats)\n"
          << "populations -V vcf -O dir [-M popmap] (filters) [--fstats] [-k [--sigma=150000] [--bootstrap [-N 100]]] (output formats)\n"
          << "\n"
-         << "  -P,--in-path: path to a directory containing Stacks ouput files.\n"
+         << "  -P,--in-path: path to a directory containing Stacks output files.\n"
          << "  -V,--in-vcf: path to a standalone input VCF file.\n"
          << "  -O,--out-path: path to a directory where to write the output files. (Required by -V; otherwise defaults to value of -P.)\n"
          << "  -M,--popmap: path to a population map. (Format is 'SAMPLE1 \\t POP1 \\n SAMPLE2 ...'.)\n"
@@ -4373,7 +4373,7 @@
          << "  --map-format: mapping program format to write, 'joinmap', 'onemap', and 'rqtl' are currently supported.\n"
          << "\n"
          << "Additional options:\n"
-         << "  -h,--help: display this help messsage.\n"
+         << "  -h,--help: display this help message.\n"
          << "  -v,--version: print program version.\n"
          << "  --verbose: turn on additional logging.\n"
          << ("  --log-fst-comp: log components of Fst/Phi_st calculations to a file.\n")
--- stacks.orig/src/gstacks.cc
+++ stacks/src/gstacks.cc
@@ -485,7 +485,7 @@
                << cs.mean_olapd_locus_length() << ";\n"
                << "  Out of " << cs.n_tot_reads << " paired-end reads in these loci (mean "
                << (double) cs.n_aln_reads / pe_ctg << " reads per locus),\n"
-               << "    " << cs.n_aln_reads << " were successfuly aligned ("
+               << "    " << cs.n_aln_reads << " were successfully aligned ("
                << as_percentage((double) cs.n_aln_reads / cs.n_tot_reads) << ");\n"
                << "  Mean insert length was " << cs.insert_length_olap_mv.mean() << ", stdev: "
                << cs.insert_length_olap_mv.sd_p() << " (based on aligned reads in overlapped loci).\n";
@@ -3090,7 +3090,7 @@
         "De novo mode:\n"
         "  gstacks -P stacks_dir -M popmap\n"
         "\n"
-        "  -P: input directory containg '*.matches.bam' files created by the\n"
+        "  -P: input directory containing '*.matches.bam' files created by the\n"
         "      de novo Stacks pipeline, ustacks-cstacks-sstacks-tsv2bam\n"
         "\n"
         "Reference-based mode:\n"
--- stacks.orig/src/ustacks.cc
+++ stacks/src/ustacks.cc
@@ -2981,7 +2981,7 @@
     if (in_file_type == FileT::unknown) {
         in_file_type = guess_file_type(in_file);
         if (in_file_type == FileT::unknown) {
-            cerr << "Unable to recongnize the extention of file '" << in_file << "'.\n";
+            cerr << "Unable to recongnize the extension of file '" << in_file << "'.\n";
             help();
         }
     }
@@ -3033,7 +3033,7 @@
          << "  -H: disable calling haplotypes from secondary reads.\n"
          << "\n"
          << "  Stack assembly options:\n"
-         << "    --force-diff-len: allow raw input reads of different lengths, e.g. after trimming (default: ustacks perfers raw input reads of uniform length).\n"
+         << "    --force-diff-len: allow raw input reads of different lengths, e.g. after trimming (default: ustacks prefers raw input reads of uniform length).\n"
          << "    --keep-high-cov: disable the algorithm that removes highly-repetitive stacks and nearby errors.\n"
          << "    --high-cov-thres: highly-repetitive stacks threshold, in standard deviation units (default: 3.0).\n"
          << "    --max-locus-stacks <num>: maximum number of stacks at a single de novo locus (default 3).\n"
@@ -3053,7 +3053,7 @@
          << "    For the Fixed model:\n"
          << "      --bc-err-freq <num>: specify the barcode error frequency, between 0 and 1.0.\n"
          << "\n"
-         << "  h: display this help messsage.\n";
+         << "  h: display this help message.\n";
 
     exit(1);
 }