File: mul_threaded.c

package info (click to toggle)
flint-arb 1%3A2.19.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 13,028 kB
  • sloc: ansic: 177,109; sh: 553; makefile: 288; python: 268
file content (133 lines) | stat: -rw-r--r-- 3,035 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*
    Copyright (C) 2013 Fredrik Johansson

    This file is part of Arb.

    Arb is free software: you can redistribute it and/or modify it under
    the terms of the GNU Lesser General Public License (LGPL) as published
    by the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
*/

#include "acb_mat.h"
#include "pthread.h"

typedef struct
{
    acb_ptr * C;
    const acb_ptr * A;
    const acb_ptr * B;
    slong ar0;
    slong ar1;
    slong bc0;
    slong bc1;
    slong br;
    slong prec;
}
acb_mat_mul_arg_t;

void *
_acb_mat_mul_thread(void * arg_ptr)
{
    acb_mat_mul_arg_t arg = *((acb_mat_mul_arg_t *) arg_ptr);
    slong i, j, br, bc;
    acb_ptr tmp;
    TMP_INIT;

    br = arg.br;
    bc = arg.bc1 - arg.bc0;

    TMP_START;
    tmp = TMP_ALLOC(sizeof(acb_struct) * br * bc);

    for (i = 0; i < br; i++)
        for (j = 0; j < bc; j++)
            tmp[j * br + i] = arg.B[i][arg.bc0 + j];

    for (i = arg.ar0; i < arg.ar1; i++)
    {
        for (j = arg.bc0; j < arg.bc1; j++)
        {
            acb_dot(arg.C[i] + j, NULL, 0,
                arg.A[i], 1, tmp + (j - arg.bc0) * br, 1, br, arg.prec);
        }
    }

    TMP_END;
    flint_cleanup();
    return NULL;
}

void
acb_mat_mul_threaded(acb_mat_t C, const acb_mat_t A, const acb_mat_t B, slong prec)
{
    slong ar, ac, br, bc, i, num_threads;
    pthread_t * threads;
    acb_mat_mul_arg_t * args;

    ar = acb_mat_nrows(A);
    ac = acb_mat_ncols(A);
    br = acb_mat_nrows(B);
    bc = acb_mat_ncols(B);

    if (ac != br || ar != acb_mat_nrows(C) || bc != acb_mat_ncols(C))
    {
        flint_printf("acb_mat_mul_threaded: incompatible dimensions\n");
        flint_abort();
    }

    if (br == 0)
    {
        acb_mat_zero(C);
        return;
    }

    if (A == C || B == C)
    {
        acb_mat_t T;
        acb_mat_init(T, ar, bc);
        acb_mat_mul_threaded(T, A, B, prec);
        acb_mat_swap(T, C);
        acb_mat_clear(T);
        return;
    }

    num_threads = flint_get_num_threads();
    threads = flint_malloc(sizeof(pthread_t) * num_threads);
    args = flint_malloc(sizeof(acb_mat_mul_arg_t) * num_threads);

    for (i = 0; i < num_threads; i++)
    {
        args[i].C = C->rows;
        args[i].A = A->rows;
        args[i].B = B->rows;

        if (ar >= bc)
        {
            args[i].ar0 = (ar * i) / num_threads;
            args[i].ar1 = (ar * (i + 1)) / num_threads;
            args[i].bc0 = 0;
            args[i].bc1 = bc;
        }
        else
        {
            args[i].ar0 = 0;
            args[i].ar1 = ar;
            args[i].bc0 = (bc * i) / num_threads;
            args[i].bc1 = (bc * (i + 1)) / num_threads;
        }

        args[i].br = br;
        args[i].prec = prec;

        pthread_create(&threads[i], NULL, _acb_mat_mul_thread, &args[i]);
    }

    for (i = 0; i < num_threads; i++)
    {
        pthread_join(threads[i], NULL);
    }

    flint_free(threads);
    flint_free(args);
}