File: vectorizer.cpp

package info (click to toggle)
vg 1.30.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 267,848 kB
  • sloc: cpp: 446,974; ansic: 116,148; python: 22,805; cs: 17,888; javascript: 11,031; sh: 5,866; makefile: 4,039; java: 1,415; perl: 1,303; xml: 442; lisp: 242
file content (163 lines) | stat: -rw-r--r-- 4,745 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#include "vectorizer.hpp"

using namespace std;
using namespace vg;
using namespace sdsl;
Vectorizer::Vectorizer(const PathPositionHandleGraph* x) : my_xg(x){
    size_t rank = 1;
    my_xg->for_each_handle([&](handle_t handle) {
            id_to_rank[my_xg->get_id(handle)] = rank++;
        });
}

Vectorizer::~Vectorizer(){
}

string Vectorizer::output_wabbit_map(){
    unordered_map<string, int>::iterator wab_it;
    stringstream sout;
    for (wab_it = wabbit_map.begin(); wab_it != wabbit_map.end(); wab_it++){
        sout << wab_it->second << "\t" << wab_it->first << "\n";
    }
    return sout.str();
}

void Vectorizer::emit(ostream &out, bool r_format=false, bool annotate=false){
    /**TODO print header*/
    //size_t ent_size = my_xg.node_count + my_xg.edge_count;
    // if (annotate){
    //   out << "Alignments" << "\t";
    //   for (int i = 0; i < my_vectors[0].size(); i++){
    //     if (my_xg.entity_is_node(i)){
    //         out << my_xg.rank_to_id(my_xg.entity_rank_as_node_rank(i));
    //     }
    //     else{
    //       out << "edge";
    //     }
    //       if (i < my_vectors[0].size() - 1){
    //         out << "\t";
    //       }
    //   }
    // }

    if (annotate){
        r_format = true;
        assert(my_names.size() == my_vectors.size());
    }

    int count = 0;
    for (auto v : my_vectors){
        if (annotate){
            out << my_names[count] << "\t";
        }
        if (r_format){
            out << format(v) << endl;
        }
        else{
            out << v << endl;
        }

        count += 1;
    }
}

void Vectorizer::add_bv(bit_vector v){
    my_vectors.push_back(v);
}

void Vectorizer::add_name(string n){
    my_names.push_back(n);
}

vector<int> Vectorizer::alignment_to_a_hot(Alignment a){
    int64_t entity_size = my_xg->get_node_count();
    vector<int> ret(entity_size, 0);
    Path path = a.path();
    for (int i = 0; i < path.mapping_size(); i++){
        Mapping mapping = path.mapping(i);
        if(! mapping.has_position()){
            continue;
        }
        Position pos = mapping.position();
        int64_t node_id = pos.node_id();
        if (!node_id) continue;
        int64_t key = id_to_rank[node_id];
        vector<step_handle_t> node_steps;
        my_xg->for_each_step_on_handle(my_xg->get_handle(node_id), [&](step_handle_t step) {
                node_steps.push_back(step);
            });
        if (node_steps.size() > 0){
            ret[key - 1] = 2;
        }
        else{
            ret[key - 1] = 1;
        }
    }
    return ret;
}

vector<double> Vectorizer::alignment_to_identity_hot(Alignment a){
    int64_t entity_size = my_xg->get_node_count();
    vector<double> ret(entity_size, 0.0);
    Path path = a.path();
    for (int i = 0; i < path.mapping_size(); i ++){
        Mapping mapping = path.mapping(i);
        if(! mapping.has_position()){
            continue;
        }
        Position pos = mapping.position();
        int64_t node_id = pos.node_id();
        if (!node_id) continue;
        int64_t key = id_to_rank[node_id];

        //Calculate % identity by walking the edits and counting matches.
        double pct_id = 0.0;
        double match_len = 0.0;
        double total_len = 0.0;

        for (int j = 0; j < mapping.edit_size(); j++){
            Edit e = mapping.edit(j);
            total_len += e.from_length();
            if (e.from_length() == e.to_length() && e.sequence() == ""){
                match_len += (double) e.to_length();
            }
            else if (e.from_length() == e.to_length() && e.sequence() != ""){
                // TODO if we map but don't match exactly, add half the average length to match_length
                //match_len += (double) (0.5 * ((double) e.to_length()));
            }
            else{
                
            }
            
        }
        pct_id = (match_len == 0.0 && total_len == 0.0) ? 0.0 : (match_len / total_len);
        ret[key - 1] = pct_id;
    }
    return ret;
}

bit_vector Vectorizer::alignment_to_onehot(Alignment a){
    int64_t entity_size = my_xg->get_node_count();
    bit_vector ret(entity_size, 0);
    Path path = a.path();
    for (int i = 0; i < path.mapping_size(); i++){
        Mapping mapping = path.mapping(i);
        if(! mapping.has_position()){
            continue;
        }
        Position pos = mapping.position();
        int64_t node_id = pos.node_id();
        if (!node_id) continue;
        int64_t key = id_to_rank[node_id];
        //Find entity rank of edge
        ret[key - 1] = 1;
    }
    return ret;
}

vector<double> Vectorizer::alignment_to_custom_score(Alignment a, std::function<double(Alignment)> lambda ){
    vector<double> ret;
    

    return ret;
}