File: ptr2ind.py

package info (click to toggle)
pytorch-sparse 0.6.18-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 984 kB
  • sloc: python: 3,646; cpp: 2,444; sh: 54; makefile: 6
file content (82 lines) | stat: -rw-r--r-- 2,433 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import wget
import time
import errno
import argparse
import os.path as osp

import torch
from scipy.io import loadmat

parser = argparse.ArgumentParser()
parser.add_argument('--root', type=str, default='/tmp/test_ptr2ind')
args = parser.parse_args()

matrices = [
    ('DIMACS10', 'citationCiteseer'),
    ('SNAP', 'web-Stanford'),
    ('Janna', 'StocF-1465'),
    ('GHS_psdef', 'ldoor'),
]


def get_torch_sparse_coo_tensor(root, group, name):
    path = osp.join(root, f'{name}.mat')
    if not osp.exists(path):
        try:
            os.makedirs(root)
        except OSError as e:
            if e.errno != errno.EEXIST and osp.isdir(path):
                raise e
        url = f'https://sparse.tamu.edu/mat/{group}/{name}.mat'
        print(f'Downloading {group}/{name}:')
        wget.download(url, path)
    matrix = loadmat(path)['Problem'][0][0][2].tocoo()
    row = torch.from_numpy(matrix.row).to(torch.long)
    col = torch.from_numpy(matrix.col).to(torch.long)
    index = torch.stack([row, col], dim=0)
    value = torch.from_numpy(matrix.data).to(torch.float)
    print(f'{name}.mat: shape={matrix.shape} nnz={row.numel()}')
    return torch.sparse_coo_tensor(index, value, matrix.shape).coalesce()


def time_func(matrix, op, duration=5.0, warmup=1.0):
    t = time.time()
    while (time.time() - t) < warmup:
        op(matrix)
    torch.cuda.synchronize()
    count = 0
    t = time.time()
    while (time.time() - t) < duration:
        op(matrix)
        count += 1
        torch.cuda.synchronize()
    return (time.time() - t) / count


def bucketize(matrix):
    row_indices = matrix.indices()[0]
    arange = torch.arange(matrix.size(0) + 1, device=row_indices.device)
    return torch.bucketize(arange, row_indices)


def convert_coo_to_csr(matrix):
    row_indices = matrix.indices()[0]
    return torch._convert_coo_to_csr(row_indices, matrix.size(0))


for device in ['cpu', 'cuda']:
    print('DEVICE:', device)
    for group, name in matrices:
        matrix = get_torch_sparse_coo_tensor(args.root, group, name)
        matrix = matrix.to(device)

        out1 = bucketize(matrix)
        out2 = convert_coo_to_csr(matrix)
        assert out1.tolist() == out2.tolist()

        t = time_func(matrix, bucketize, duration=5.0, warmup=1.0)
        print('old impl:', t)
        t = time_func(matrix, convert_coo_to_csr, duration=5.0, warmup=1.0)
        print('new impl:', t)
        print()