File: dpagerank2.c

package info (click to toggle)
suitesparse 1%3A5.8.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 152,716 kB
  • sloc: ansic: 774,385; cpp: 24,213; makefile: 6,310; fortran: 1,927; java: 1,826; csh: 1,686; ruby: 725; sh: 535; perl: 225; python: 209; sed: 164; awk: 60
file content (445 lines) | stat: -rw-r--r-- 16,117 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
//------------------------------------------------------------------------------
// SuiteSparse/GraphBLAS/Demo/Source/dpagerank2: pagerank using a real semiring
//------------------------------------------------------------------------------

// SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2020, All Rights Reserved.
// http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.

//------------------------------------------------------------------------------

// PageRank via EXTREME GraphBLAS-ing!

// A is a square unsymmetric binary matrix of size n-by-n, where A(i,j) is the
// edge (i,j).  Self-edges are OK.  A can be of any built-in type.

// On output, P is pointer to an array of PageRank structs.  P[0] is the
// highest ranked page, with pagerank P[0].pagerank and the page corresponds to
// node number P[0].page in the graph.  P[1] is the next page, and so on, to
// the lowest-ranked page P[n-1].page with rank P[n-1].pagerank.

// This version operates on the original matrix A, without changing it.  The
// entire computation is done via a set of user-defined objects:  a type,
// several operators, a monoid, and a semiring.

// Acknowledgements:  this method was written with input from Richard Veras,
// Franz Franchetti, and Scott McMillan, Carnegie Mellon University.

#include "GraphBLAS.h"

//------------------------------------------------------------------------------
// helper macros
//------------------------------------------------------------------------------

// free all workspace
#define FREEWORK                                \
{                                               \
    GrB_Vector_free (&rdouble) ;                \
    GrB_Vector_free (&r) ;                      \
    GrB_Vector_free (&rnew) ;                   \
    GrB_Vector_free (&dout) ;                   \
    GrB_Vector_free (&rdiff) ;                  \
    GrB_Descriptor_free (&desc) ;               \
    if (I != NULL) free (I) ;                   \
    if (X != NULL) free (X) ;                   \
    GrB_BinaryOp_free (&PageRank_accum) ;       \
    GrB_BinaryOp_free (&PageRank_add) ;         \
    GrB_Monoid_free (&PageRank_monoid) ;        \
    GrB_BinaryOp_free (&PageRank_multiply) ;    \
    GrB_Semiring_free (&PageRank_semiring) ;    \
    GrB_BinaryOp_free (&PageRank_diff) ;        \
    GrB_Type_free (&PageRank_type) ;            \
    GrB_UnaryOp_free (&PageRank_div) ;          \
    GrB_UnaryOp_free (&PageRank_get) ;          \
    GrB_UnaryOp_free (&PageRank_init) ;         \
}

// error handler: free output P and all workspace (used by CHECK and OK macros)
#define FREE_ALL                \
{                               \
    if (P != NULL) free (P) ;   \
    FREEWORK ;                  \
}

#undef GB_PUBLIC
#define GB_LIBRARY
#include "graphblas_demos.h"

//------------------------------------------------------------------------------
// scalar types and operators
//------------------------------------------------------------------------------

// each node has a rank value, and a constant which is 1/outdegree
typedef struct
{
    double rank ;
    double invdegree ;
}
pagerank_type ;

// probability of walking to random neighbor
#define PAGERANK_DAMPING 0.85

// NOTE: these operators use global values.  dpagerank2 can be done in
// parallel, internally, but only one instance of dpagerank can be used.

// global values shared by all threads:
double pagerank_teleport, pagerank_init_rank, pagerank_rsum ;

// identity value for the pagerank_add monoid
pagerank_type pagerank_zero = { 0, 0 } ;

// unary operator to divide a double entry by the scalar pagerank_rsum
void pagerank_div (double *z, const double *x)
{
    (*z) = (*x) / pagerank_rsum ;
}

// unary operator that typecasts PageRank_type to double, extracting the rank
void pagerank_get_rank (double *z, const pagerank_type *x)
{
    (*z) = (x->rank) ;
}

// unary operator to initialize a node
void init_page (pagerank_type *z, const double *x)
{
    z->rank = pagerank_init_rank ;  // all nodes start with rank 1/n
    z->invdegree = 1. / (*x) ;      // set 1/outdegree of this node 
}

//------------------------------------------------------------------------------
// PageRank semiring
//------------------------------------------------------------------------------

// In MATLAB notation, the new rank is computed with:
// newrank = PAGERANK_DAMPING * (rank * D * A) + pagerank_teleport

// where A is a square binary matrix of the original graph, and A(i,j)=1 if
// page i has a link to page j.  rank is a row vector of size n.  The matrix D
// is diagonal, with D(i,i)=1/outdegree(i), where outdegree(i) = the outdegree
// of node i, or equivalently, outdegree(i) = sum (A (i,:)).

// That is, if newrank(j) were computed with a dot product:
//      newrank (j) = 0
//      for all i:
//          newrank (j) = newrank (j) + (rank (i) * D (i,i)) * A (i,j)

// To accomplish this computation in a single vector-matrix multiply, the value
// of D(i,i) is held as component of a combined data type, the pagerank_type,
// which has both the rank(i) and the entry D(i,i).

// binary multiplicative operator for the pagerank semiring
void pagerank_multiply
(
    pagerank_type *z,
    const pagerank_type *x,
    const bool *y
)
{
    // y is the boolean entry of the matrix, A(i,j)
    // x->rank is the rank of node i, and x->invdegree is 1/outdegree(i)
    // note that z->invdegree is left unchanged
    z->rank = (*y) ? ((x->rank) * (x->invdegree)) : 0 ;
}

// binary additive operator for the pagerank semiring
void pagerank_add
(
    pagerank_type *z,
    const pagerank_type *x,
    const pagerank_type *y
)
{
    // note that z->invdegree is left unchanged; it is unused
    z->rank = (x->rank) + (y->rank) ;
}

//------------------------------------------------------------------------------
// pagerank accumulator
//------------------------------------------------------------------------------

// The semiring computes the vector newrank = rank*D*A.  To complete the page
// rank computation, the new rank must be scaled by the
// PAGERANK_DAMPING, and the pagerank_teleport must be included, which is
// done in the page rank accumulator:

// newrank = PAGERANK_DAMPING * newrank + pagerank_teleport

// The PageRank_semiring does not construct the entire pagerank_type of
// rank*D*A, since the vector that holds newrank(i) must also keep the
// 1/invdegree(i), unchanged.  This is restored in the accumulator operator.

// binary operator to accumulate the new rank from the old
void pagerank_accum
(
    pagerank_type *z,
    const pagerank_type *x,
    const pagerank_type *y
)
{
    // note that this formula does not use the old rank:
    // new rank = PAGERANK_DAMPING * (rank*A ) + pagerank_teleport
    double rnew = PAGERANK_DAMPING * (y->rank) + pagerank_teleport ;

    // update the rank, and copy over the inverse degree from the old page info
    z->rank = rnew ;
    z->invdegree = x->invdegree ;
}

//------------------------------------------------------------------------------
// pagerank_diff: compute the change in the rank
//------------------------------------------------------------------------------

void pagerank_diff
(
    pagerank_type *z,
    const pagerank_type *x,
    const pagerank_type *y
)
{
    double delta = (x->rank) - (y->rank) ;
    z->rank = delta * delta ;
}

//------------------------------------------------------------------------------
// comparison function for qsort
//------------------------------------------------------------------------------

int pagerank_compar (const void *x, const void *y)
{
    PageRank *a = (PageRank *) x ;
    PageRank *b = (PageRank *) y ;

    // sort by pagerank in descending order
    if (a->pagerank > b->pagerank)
    {
        return (-1) ;
    }
    else if (a->pagerank == b->pagerank)
    {
        return (0) ;
    }
    else
    {
        return (1) ;
    }
}

//------------------------------------------------------------------------------
// dpagerank2: compute the PageRank of all nodes in a graph
//------------------------------------------------------------------------------

GB_PUBLIC
GrB_Info dpagerank2         // GrB_SUCCESS or error condition
(
    PageRank **Phandle,     // output: pointer to array of PageRank structs
    GrB_Matrix A,           // input graph, not modified
    int itermax,            // max number of iterations
    double tol,             // stop when norm (r-rnew,2) < tol
    int *iters,             // number of iterations taken
    GrB_Desc_Value method   // method to use for GrB_vxm (for testing only)
)
{

    GrB_Info info ;
    double *X = NULL ;
    GrB_Index n, *I = NULL ;
    PageRank *P = NULL ;
    GrB_Descriptor desc = NULL ;
    GrB_Vector r = NULL, dout = NULL, rdouble = NULL, rnew = NULL, rdiff = NULL;

    //--------------------------------------------------------------------------
    // create the new type, operators, monoid, and semiring
    //--------------------------------------------------------------------------

    GrB_Type PageRank_type = NULL ;
    GrB_UnaryOp PageRank_div = NULL, PageRank_get = NULL, PageRank_init = NULL ;
    GrB_BinaryOp PageRank_accum = NULL, PageRank_add = NULL,
        PageRank_multiply = NULL, PageRank_diff = NULL ;
    GrB_Monoid PageRank_monoid = NULL ;
    GrB_Semiring PageRank_semiring = NULL ;

    // create the new Page type
    OK (GrB_Type_new (&PageRank_type, sizeof (pagerank_type))) ;

    #define U (GxB_unary_function)
    #define B (GxB_binary_function)

    // create the unary operator to initialize the PageRank_type of each node
    OK (GrB_UnaryOp_new (&PageRank_init, U init_page, PageRank_type, GrB_FP64));

    // create PageRank_accum
    OK (GrB_BinaryOp_new (&PageRank_accum, B pagerank_accum,
        PageRank_type, PageRank_type, PageRank_type)) ;

    // create PageRank_add operator and monoid
    OK (GrB_BinaryOp_new (&PageRank_add, B pagerank_add,
        PageRank_type, PageRank_type, PageRank_type)) ;
    OK (GrB_Monoid_new_UDT (&PageRank_monoid, PageRank_add, &pagerank_zero)) ;

    // create PageRank_multiply operator
    OK (GrB_BinaryOp_new (&PageRank_multiply, B pagerank_multiply,
        PageRank_type, PageRank_type, GrB_BOOL)) ;

    // create PageRank_semiring
    OK (GrB_Semiring_new (&PageRank_semiring, PageRank_monoid,
        PageRank_multiply)) ;

    // create unary operator that typecasts the PageRank_type to double
    OK (GrB_UnaryOp_new (&PageRank_get, U pagerank_get_rank, GrB_FP64,
        PageRank_type)) ;

    // create unary operator that scales the rank by pagerank_rsum
    OK (GrB_UnaryOp_new (&PageRank_div, U pagerank_div, GrB_FP64, GrB_FP64)) ;

    // create PageRank_diff operator
    OK (GrB_BinaryOp_new (&PageRank_diff, B pagerank_diff,
        PageRank_type, PageRank_type, PageRank_type)) ;

    //--------------------------------------------------------------------------
    // initializations
    //--------------------------------------------------------------------------

    (*Phandle) = NULL ;

    // n = size (A,1) ;         // number of nodes
    OK (GrB_Matrix_nrows (&n, A)) ;

    // dout = sum (A,2) ;       // dout(i) is the out-degree of node i
    OK (GrB_Vector_new (&dout, GrB_FP64, n)) ;
    OK (GrB_Matrix_reduce_BinaryOp (dout, NULL, NULL, GrB_PLUS_FP64, A, NULL)) ;

    // all nodes start with rank 1/n
    pagerank_init_rank = 1.0 / ((double) n) ;

    // initialize the page rank and inverse degree of each node
    OK (GrB_Vector_new (&r, PageRank_type, n)) ;
    OK (GrB_Vector_apply (r, NULL, NULL, PageRank_init, dout, NULL)) ;

    // dout vector no longer needed
    OK (GrB_Vector_free (&dout)) ;

    // to jump to any random node in entire graph:
    pagerank_teleport = (1-PAGERANK_DAMPING) / n ;

    tol = tol*tol ;                 // use tol^2 so sqrt(...) not needed
    double pagerank_rdiff = 1 ;     // so first iteration is always done

    // create rdouble, a double vector of size n
    OK (GrB_Vector_new (&rdouble, GrB_FP64, n)) ;

    // Note that dup is needed, since the invdegree is copied by the
    // PageRank_accum.
    OK (GrB_Vector_dup (&rnew, r)) ;
    OK (GrB_Vector_new (&rdiff, PageRank_type, n)) ;

    // select method for GrB_vxm (for testing only; default is fine)
    if (method != GxB_DEFAULT)
    {
        OK (GrB_Descriptor_new (&desc)) ;
        OK (GxB_Desc_set (desc, GxB_AxB_METHOD, method)) ;
    }

    //--------------------------------------------------------------------------
    // iterate to compute the pagerank of each node
    //--------------------------------------------------------------------------

    for ((*iters) = 0 ; (*iters) < itermax && pagerank_rdiff > tol ; (*iters)++)
    {

        // rnew = PAGERANK_DAMPING * (r * D * A) + pagerank_teleport
        OK (GrB_vxm (rnew, NULL, PageRank_accum, PageRank_semiring, r, A,
            desc)) ;

        // compute pagerank_rdiff = sum ((r - rnew).^2)
        OK (GrB_Vector_eWiseAdd_BinaryOp (rdiff, NULL, NULL, PageRank_diff,
            r, rnew, NULL)) ;
        pagerank_type rsum ;
        OK (GrB_Vector_reduce_UDT (&rsum, NULL, PageRank_monoid, rdiff, NULL)) ;

        pagerank_rdiff = rsum.rank ;

        // r = rnew, using a swap, which is faster than assign or dup
        GrB_Vector rtemp = r ;
        r = rnew ;
        rnew = rtemp ;
    }

    //--------------------------------------------------------------------------
    // scale the result: rdouble = rank / sum(r)
    //--------------------------------------------------------------------------

    // rnew (for the safe version) is no longer needed
    GrB_Vector_free (&rnew) ;

    // rdouble = pagerank_get_rank (r)
    OK (GrB_Vector_apply (rdouble, NULL, NULL, PageRank_get, r, NULL)) ;

    // r no longer needed
    GrB_Vector_free (&r) ;

    // pagerank_rsum = sum (rdouble)
    OK (GrB_Vector_reduce_FP64 (&pagerank_rsum, NULL, GrB_PLUS_MONOID_FP64,
        rdouble, NULL)) ;

    // could also do this with GrB_vxm, with a 1-by-1 matrix
    // r = r / pagerank_rsum
    OK (GrB_Vector_apply (rdouble, NULL, NULL, PageRank_div, rdouble, NULL)) ;

    //--------------------------------------------------------------------------
    // sort the nodes by pagerank
    //--------------------------------------------------------------------------

    // GraphBLAS does not have a mechanism to sort the components of a vector,
    // so it must be done by extracting and then sorting the tuples from
    // the GrB_Vector rdouble.

    // [r,irank] = sort (r, 'descend') ;

    // [I,X] = find (r) ;
    X = (double *) malloc (n * sizeof (double)) ;
    I = (GrB_Index *) malloc (n * sizeof (GrB_Index)) ;
    CHECK (I != NULL && X != NULL, GrB_OUT_OF_MEMORY) ;
    GrB_Index nvals = n ;
    OK (GrB_Vector_extractTuples_FP64 (I, X, &nvals, rdouble)) ;

    // rdouble no longer needed
    GrB_Vector_free (&rdouble) ;

    // P = struct (X,I)
    P = (PageRank *) malloc (n * sizeof (PageRank)) ;
    CHECK (P != NULL, GrB_OUT_OF_MEMORY) ;
    int64_t k ;
    for (k = 0 ; k < nvals ; k++)
    {
        // The kth ranked page is P[k].page (with k=0 being the highest rank),
        // and its pagerank is P[k].pagerank.
        P [k].pagerank = X [k] ;
        // I [k] == k will be true for SuiteSparse:GraphBLAS but in general I
        // can be returned in any order, so use I [k] instead of k, for other
        // GraphBLAS implementations.
        P [k].page = I [k] ;
    }
    for ( ; k < n ; k++)
    {
        // If A has empty columns, then r will become sparse.  In this case,
        // pages with no incoming edges will be unranked.  The drowscale
        // function avoids this problem by adding a
        P [k].pagerank = 0 ;
        P [k].page = -1 ;
    }

    // free workspace
    FREEWORK ;

    // qsort (P) in descending order
    qsort (P, nvals, sizeof (PageRank), pagerank_compar) ;

    //--------------------------------------------------------------------------
    // return result
    //--------------------------------------------------------------------------

    (*Phandle) = P ;
    return (GrB_SUCCESS) ;
}