File: GraphOutput.h

package info (click to toggle)
mapsembler2 2.2.4%2Bdfsg1-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,288 kB
  • sloc: cpp: 51,204; ansic: 13,165; sh: 542; makefile: 394; asm: 271; python: 28
file content (192 lines) | stat: -rwxr-xr-x 5,680 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#if __cplusplus > 199711L
//#if __clang__
#include <unordered_map>
#include <functional>
#else
#include <tr1/unordered_map>
#include <tr1/functional>
#endif

#include <set>
#include <stdlib.h> // for exit()
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include <assert.h> 

#include "../minia/Kmer.h"
#include "../minia/Bank.h"

#ifndef _GRAPHOUTPUT_H
#define _GRAPHOUTPUT_H

using namespace std;

#if __cplusplus <= 199711L
//#if ! __clang__
using namespace tr1;
#endif

// hash functions for unordered_map with various kmer_type's
namespace std
{

   //structure for print id nodes and edges in graph output
   struct id_els{
		long node;
		long edge;
	       };

#if __cplusplus <= 199711L
//#if ! __clang__
namespace tr1
{
#endif

    #ifdef _ttmath
   template <>
   struct hash<ttmath::UInt<KMER_PRECISION> > : public unary_function<ttmath::UInt<KMER_PRECISION>, size_t>
   {
       size_t operator()(const ttmath::UInt<KMER_PRECISION>& elem) const
       {
           hash<uint64_t> hash_func;

           // hash = XOR_of_series[hash(i-th chunk iof 64 bits)
           uint64_t result = 0, to_hash;
           ttmath::UInt<KMER_PRECISION> intermediate = elem;
           uint32_t mask=~0, chunk;
           int i;
           for (i=0;i<KMER_PRECISION/2;i++)
           {
               // retrieve a 64 bits part to hash 
               (intermediate & mask).ToInt(chunk);
               to_hash = chunk;
               intermediate >>= 32;
               (intermediate & mask).ToInt(chunk);
               to_hash |= ((uint64_t)chunk) << 32 ;
               intermediate >>= 32;

               result ^= hash_func(to_hash);
           }
           return result;
       }
   };
    #endif

    #ifdef _largeint
   template <>
       struct hash<LargeInt<KMER_PRECISION> > : public unary_function<LargeInt<KMER_PRECISION>, size_t>
       {
           size_t operator()(const LargeInt<KMER_PRECISION>& elem) const
           {
               hash<uint64_t> hash_func;

               // hash = XOR_of_series[hash(i-th chunk iof 64 bits)
               uint64_t result = 0, to_hash;
               LargeInt<KMER_PRECISION> intermediate = elem;
               uint32_t mask=~0, chunk;
               int i;
               for (i=0;i<KMER_PRECISION/2;i++)
               {
                   // retrieve a 64 bits part to hash 
                   chunk = (intermediate & mask).toInt();
                   to_hash = chunk;
                   intermediate = intermediate >> 32;
                   chunk = (intermediate & mask).toInt();
                   to_hash |= ((uint64_t)chunk) << 32 ;
                   intermediate = intermediate >> 32;

                   result ^= hash_func(to_hash,num_hash);
               }
               return result;
           }
       };
    #endif
    #if __cplusplus <= 199711L
    //#if ! __clang__
}
#endif
}

class GraphOutput {

public:

    string prefix;
    string graph_file_name;
    string nodes_file_name;
    string edges_file_name;
    string json_starters_file_name;
    string xml_file_name;
    string json_nodes_file_name;
    string json_edges_file_name;
    string json_file_name;
    int graph_format;
    id_els first_id_els;

    long edge_id; // the json format needs an id on the nodes. 
  
    static const string graph_file_suffix;
    static const string starters_file_suffix;
    static const string nodes_file_suffix;
    static const string edges_file_suffix;
    static const string xml_file_suffix;
    static const string json_starters_file_suffix;
    static const string json_nodes_file_suffix;
    static const string json_edges_file_suffix;
    static const string json_file_suffix;
    
    bool original; // The extended kmer comes originally from the starter (true), or (false) if is it a degenerated kmer (one substitution or one indel).
    
    FILE *graph_file,*nodes_file,*edges_file,*starters_file;

    GraphOutput(string prefix, int graph_format);
    GraphOutput(string prefix, int graph_format, id_els first_id_els); //PIERRE
    GraphOutput(string prefix);
    GraphOutput(string prefix, id_els first_id_els); //PIERRE
    void close();

    long sequence_length(string line);
    void print_node(long index, char *ascii_node);
    void print_edge(long index, long id, long id2, string label);
    void print_edge(long index, long id, long id2, string label, string comment);
    void print_starter_head(int index, char* sequence);
    void print_starter_end();
    


    enum LeftOrRight { LEFT=0, RIGHT=1 };
    enum Strand { FW=0, RC=1 };
    struct node_strand {
        long node;
        Strand strand;
        LeftOrRight left_or_right;
        node_strand(long node, Strand strand, LeftOrRight left_or_right) : node(node), strand(strand), left_or_right(left_or_right) {}
        bool operator<(const node_strand &other) const {
            if (node != other.node)
                return (node < other.node);
            if (left_or_right != other.left_or_right)
                return left_or_right < other.left_or_right;
            return (strand < other.strand);
        }
    };

#if __cplusplus > 199711L
//#if __clang__
    std::unordered_map<kmer_type,set<node_strand> > kmer_links;
#else
    std::tr1::unordered_map<kmer_type,set<node_strand> > kmer_links;
#endif

    id_els construct_graph(string linear_seqs_name, const string direction); // PIERRE: added the return value
    id_els construct_graph(string linear_seqs_name); // PIERRE: added the return value
    void load_nodes_extremities(string linear_seqs_name);

 private: 
    void init(bool erase); // PIERRE
};
#endif //_GRAPHOUTPUT_H