1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
|
/*
vcflib C++ library for parsing and manipulating VCF files
Copyright © 2010-2020 Erik Garrison
Copyright © 2020 Pjotr Prins
This software is published under the MIT License. See the LICENSE file.
*/
#include "Variant.h"
#include "split.h"
#include <string>
#include <iostream>
#include <set>
using namespace std;
using namespace vcflib;
// adds non-overlapping info fields from varB to varA
void addInfo(Variant& varA, Variant& varB) {
for (map<string, vector<string> >::iterator i = varB.info.begin(); i != varB.info.end(); ++i) {
if (varA.info.find(i->first) == varA.info.end()) {
varA.info[i->first] = i->second;
}
}
}
int main(int argc, char** argv) {
if (argc != 3) {
cerr << "usage: " << argv[0] << " <vcf file> <vcf file>" << endl << endl
<< "Adds info fields from the second file which are not present in the first vcf file." << endl;
cerr << endl << "Type: transformation" << endl << endl;
return 1;
}
string filenameA = argv[1];
string filenameB = argv[2];
if (filenameA == filenameB) {
cerr << "it won't help to add info data from the same file!" << endl;
return 1;
}
VariantCallFile variantFileA;
if (filenameA == "-") {
variantFileA.open(std::cin);
} else {
variantFileA.open(filenameA);
}
VariantCallFile variantFileB;
if (filenameB == "-") {
variantFileB.open(std::cin);
} else {
variantFileB.open(filenameB);
}
if (!variantFileA.is_open() || !variantFileB.is_open()) {
return 1;
}
Variant varA(variantFileA);
Variant varB(variantFileB);
// while the first file doesn't match the second positionally,
// step forward, annotating each genotype record with an empty genotype
// when the two match, iterate through the genotypes from the first file
// and get the genotypes reported in the second file
variantFileA.getNextVariant(varA);
variantFileB.getNextVariant(varB);
variantFileA.header = unionInfoHeaderLines(variantFileA.header, variantFileB.header);
cout << variantFileA.header << endl;
do {
while (!variantFileB.done()
&& (varB.sequenceName < varA.sequenceName
|| (varB.sequenceName == varA.sequenceName && varB.position < varA.position))
) {
variantFileB.getNextVariant(varB);
}
while (!variantFileA.done()
&& (varA.sequenceName < varB.sequenceName
|| (varA.sequenceName == varB.sequenceName && varA.position < varB.position))
) {
cout << varA << endl;
variantFileA.getNextVariant(varA);
}
while (!variantFileB.done()
&& (varB.sequenceName < varA.sequenceName
|| (varB.sequenceName == varA.sequenceName && varB.position < varA.position))
) {
variantFileB.getNextVariant(varB);
}
while (!variantFileA.done() && varA.sequenceName == varB.sequenceName && varA.position == varB.position) {
addInfo(varA, varB);
cout << varA << endl;
variantFileA.getNextVariant(varA);
variantFileB.getNextVariant(varB);
}
} while (!variantFileA.done() && !variantFileB.done());
if (!variantFileA.done()) {
cout << varA << endl;
while (variantFileA.getNextVariant(varA)) {
cout << varA << endl;
}
}
return 0;
}
|