File: vcfaddinfo.cpp

package info (click to toggle)
libvcflib 1.0.2%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 49,732 kB
  • sloc: cpp: 39,194; perl: 474; python: 321; ruby: 285; sh: 247; ansic: 198; makefile: 125; javascript: 94; lisp: 55
file content (120 lines) | stat: -rw-r--r-- 3,541 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*
    vcflib C++ library for parsing and manipulating VCF files

    Copyright © 2010-2020 Erik Garrison
    Copyright © 2020      Pjotr Prins

    This software is published under the MIT License. See the LICENSE file.
*/

#include "Variant.h"
#include "split.h"
#include <string>
#include <iostream>
#include <set>

using namespace std;
using namespace vcflib;

// adds non-overlapping info fields from varB to varA
void addInfo(Variant& varA, Variant& varB) {
    for (map<string, vector<string> >::iterator i = varB.info.begin(); i != varB.info.end(); ++i) {
        if (varA.info.find(i->first) == varA.info.end()) {
            varA.info[i->first] = i->second;
        }
    }
}

int main(int argc, char** argv) {

    if (argc != 3) {
      cerr << "usage: " << argv[0] << " <vcf file> <vcf file>" << endl << endl
           << "Adds info fields from the second file which are not present in the first vcf file." << endl;
      cerr << endl << "Type: transformation" << endl << endl;
      return 1;
    }

    string filenameA = argv[1];
    string filenameB = argv[2];

    if (filenameA == filenameB) {
        cerr << "it won't help to add info data from the same file!" << endl;
        return 1;
    }

    VariantCallFile variantFileA;
    if (filenameA == "-") {
        variantFileA.open(std::cin);
    } else {
        variantFileA.open(filenameA);
    }

    VariantCallFile variantFileB;
    if (filenameB == "-") {
        variantFileB.open(std::cin);
    } else {
        variantFileB.open(filenameB);
    }

    if (!variantFileA.is_open() || !variantFileB.is_open()) {
        return 1;
    }

    Variant varA(variantFileA);
    Variant varB(variantFileB);

    // while the first file doesn't match the second positionally,
    // step forward, annotating each genotype record with an empty genotype
    // when the two match, iterate through the genotypes from the first file
    // and get the genotypes reported in the second file

    variantFileA.getNextVariant(varA);
    variantFileB.getNextVariant(varB);

    variantFileA.header = unionInfoHeaderLines(variantFileA.header, variantFileB.header);

    cout << variantFileA.header << endl;

    do {

        while (!variantFileB.done()
               && (varB.sequenceName < varA.sequenceName
                   || (varB.sequenceName == varA.sequenceName && varB.position < varA.position))
            ) {
            variantFileB.getNextVariant(varB);
        }

        while (!variantFileA.done()
               && (varA.sequenceName < varB.sequenceName
                   || (varA.sequenceName == varB.sequenceName && varA.position < varB.position))
            ) {
            cout << varA << endl;
            variantFileA.getNextVariant(varA);
        }

        while (!variantFileB.done()
               && (varB.sequenceName < varA.sequenceName
                   || (varB.sequenceName == varA.sequenceName && varB.position < varA.position))
            ) {
            variantFileB.getNextVariant(varB);
        }

        while (!variantFileA.done() && varA.sequenceName == varB.sequenceName && varA.position == varB.position) {
            addInfo(varA, varB);
            cout << varA << endl;
            variantFileA.getNextVariant(varA);
            variantFileB.getNextVariant(varB);
        }

    } while (!variantFileA.done() && !variantFileB.done());

    if (!variantFileA.done()) {
        cout << varA << endl;
        while (variantFileA.getNextVariant(varA)) {
            cout << varA << endl;
        }
    }

    return 0;

}