File: cc.cpp

package info (click to toggle)
combblas 2.0.0-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 190,476 kB
  • sloc: cpp: 55,912; ansic: 25,134; sh: 3,691; makefile: 548; csh: 66; python: 49; perl: 21
file content (156 lines) | stat: -rw-r--r-- 5,184 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <vector>
#include <algorithm>
#include <chrono>
#include <functional>
#include <cmath>
#include <map>
#include <tuple>
#include <cstdlib>
#include <cstdio>
#include <limits>
#include "MMConverter.h"
#include "DisjSets.h"

using namespace std;


// typedef void tommy_foreach_arg_func(void* arg, void* obj);

void* printfunc(void* arg, void* obj)
{
    pair<DisjSets*, ofstream*> * mypair = (pair<DisjSets*, ofstream*> *) arg;    // cast argument
    DisjSets * ds = mypair->first;
    ofstream * out = mypair->second;
    (*out) << ((tommy_object *) obj)->vname << "\t" << ds->find( (int) ((tommy_object *) obj)->vid) << "\n";
}

int main(int argc, char* argv[])
{
	if(argc < 3)
    {
        cout << "Usage: ./cc <vertices_file> <edges_file>" << endl;
        return 0;
    }
    
    ifstream inputvert(argv[1]);
    char vname[150];
    uint32_t vertexid = 0;
    
    tommy_hashdyn hashdyn;
    tommy_hashdyn_init(&hashdyn);
    while(inputvert >> vname)
    {
        string s_vname(vname);  // string version
        tommy_object* obj = new tommy_object(vertexid, s_vname);   // (vertexid,s_vname) pair is the payload (data)
        tommy_hashdyn_insert(&hashdyn, &(obj->node), obj, tommy_hash_u32(0, vname, strlen(vname))); // hashed string is key
        vertexid++;
    }
    cout << "vertex list read, there are " << vertexid << endl;
    DisjSets ds(vertexid);
    
    FILE *f;
    if ((f = fopen(argv[2], "r")) == NULL)
    {
        printf("file %s can not be found\n", argv[2]);
        exit(1);
    }
    
    // Use fseek again to go backwards two bytes and check that byte with fgetc
    struct stat st;     // get file size
    if (stat(argv[2], &st) == -1)
    {
        exit(1);
    }
    int64_t file_size = st.st_size;
    cout << "Edge file is " << file_size << " bytes" << endl;
    long int ffirst = ftell(f); // doesn't change
    long int fpos = ffirst;
    long int end_fpos = file_size;
    
    vector<string> lines;
    bool finished = FetchBatch(f, fpos, end_fpos, true, lines); // fpos will move
    int64_t entriesread = lines.size();
    
    
    char from[128];
    char to[128];
    double vv;
    for (vector<string>::iterator itr=lines.begin(); itr != lines.end(); ++itr)
    {
        // string::c_str() -> Returns a pointer to an array that contains a null-terminated sequence of characters (i.e., a C-string)
        sscanf(itr->c_str(), "%s %s %lg", from, to, &vv);
        string s_from = string(from);
        string s_to = string(to);
        
        tommy_object* obj1 = (tommy_object*) tommy_hashdyn_search(&hashdyn, compare, &s_from, tommy_hash_u32(0, from, strlen(from)));
        if(!obj1)
        {
            cout << "This doesn't make sense! " << s_from <<  " should exist" << endl;
        }
        
        tommy_object* obj2 = (tommy_object*) tommy_hashdyn_search(&hashdyn, compare, &s_to, tommy_hash_u32(0, to, strlen(to)));
        if(!obj2)
        {
            cout << "This doesn't make sense! " << s_to <<  " should exist" << endl;
        }
        int set1 = ds.find((int) obj1->vid);
        int set2 = ds.find((int) obj2->vid);
        if(set1 != set2)
        {
            ds.unionSets(set1, set2);
        }
    }
    vector<string>().swap(lines);
    
    while(!finished)
    {
        finished = FetchBatch(f, fpos, end_fpos, false, lines);
        entriesread += lines.size();
        cout << "entriesread: " << entriesread << ", current vertex id: " << vertexid << endl;
        
        // Process files
        char from[128];
        char to[128];
        double vv;
        for (vector<string>::iterator itr=lines.begin(); itr != lines.end(); ++itr)
        {
            // string::c_str() -> Returns a pointer to an array that contains a null-terminated sequence of characters (i.e., a C-string)
            sscanf(itr->c_str(), "%s %s %lg", from, to, &vv);
            
            string s_from = string(from);
            string s_to = string(to);
            
            tommy_object* obj1 = (tommy_object*) tommy_hashdyn_search(&hashdyn, compare, &s_from, tommy_hash_u32(0, from, strlen(from)));
            if(!obj1)
            {
                cout << "This doesn't make sense! " << s_from <<  " should exist" << endl;
            }
            
            tommy_object* obj2 = (tommy_object*) tommy_hashdyn_search(&hashdyn, compare, &s_to, tommy_hash_u32(0, to, strlen(to)));
            if(!obj2)
            {
                cout << "This doesn't make sense! " << s_to <<  " should exist" << endl;
            }
            int set1 = ds.find((int) obj1->vid);
            int set2 = ds.find((int) obj2->vid);
            if(set1 != set2)
            {
                ds.unionSets(set1, set2);
            }
        }
        vector<string>().swap(lines);
    }
    cout << "There are " << vertexid << " vertices and " << entriesread << " edges" << endl;
    
    string s_out(argv[1]);
    s_out += ".components";
    ofstream output(s_out);
    
    pair<DisjSets*, ofstream*> mypair(&ds, &output);
    tommy_hashdyn_foreach_arg(&hashdyn, (tommy_foreach_arg_func *) printfunc, &mypair);
    
}