File: memory.py

package info (click to toggle)
haskell-futhark 0.25.32-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 18,236 kB
  • sloc: haskell: 100,484; ansic: 12,100; python: 3,440; yacc: 785; sh: 561; javascript: 558; lisp: 399; makefile: 277
file content (175 lines) | stat: -rw-r--r-- 4,575 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# Start of memory.py.

import ctypes as ct


def allocateMem(size):
    return np.empty(size, dtype=np.byte)


# Copy an array if its is not-None.  This is important for treating
# Numpy arrays as flat memory, but has some overhead.
def normaliseArray(x):
    if (x.base is x) or (x.base is None):
        return x
    else:
        return x.copy()


def unwrapArray(x):
    return x.ravel().view(np.byte)


def indexArray(x, offset, bt):
    return x.view(bt)[offset]


def writeScalarArray(x, offset, v):
    x.view(type(v))[offset] = v


# An opaque Futhark value.
class opaque(object):
    def __init__(self, desc, *payload):
        self.data = payload
        self.desc = desc

    def __repr__(self):
        return "<opaque Futhark value of type {}>".format(self.desc)


# LMAD stuff


def lmad_contiguous_search(checked, expected, strides, shape, used):
    for i in range(len(strides)):
        for j in range(len(strides)):
            if not used[j] and strides[j] == expected and strides[j] >= 0:
                used[j] = True
                if checked + 1 == len(strides) or lmad_contiguous_search(
                    checked + 1, expected * shape[j], strides, shape, used
                ):
                    return True
                used[j] = False
    return False


def lmad_contiguous(strides, shape):
    used = len(strides) * [False]
    return lmad_contiguous_search(0, 1, strides, shape, used)


def lmad_memcpyable(dst_strides, src_strides, shape):
    if not lmad_contiguous(dst_strides, shape):
        return False
    for i in range(len(dst_strides)):
        if dst_strides[i] != src_strides[i] and shape[i] != 1:
            return False
    return True


def lmad_is_tr(strides, shape):
    r = len(shape)
    for i in range(1, r):
        n = 1
        m = 1
        ok = True
        expected = 1
        # Check strides before 'i'.
        for j in range(i - 1, -1, -1):
            ok = ok and strides[j] == expected
            expected *= shape[j]
            n *= shape[j]
        # Check strides after 'i'.
        for j in range(r - 1, i - 1, -1):
            ok = ok and strides[j] == expected
            expected *= shape[j]
            m *= shape[j]
        if ok:
            return (n, m)
    return None


def lmad_map_tr(dst_strides, src_strides, shape):
    r = len(dst_strides)
    rowmajor_strides = [0] * r
    rowmajor_strides[r - 1] = 1

    for i in range(r - 2, -1, -1):
        rowmajor_strides[i] = rowmajor_strides[i + 1] * shape[i + 1]

    # map_r will be the number of mapped dimensions on top.
    map_r = 0
    k = 1
    for i in range(r):
        if (
            dst_strides[i] != rowmajor_strides[i]
            or src_strides[i] != rowmajor_strides[i]
        ):
            break
        else:
            k *= shape[i]
            map_r += 1

    if rowmajor_strides[map_r:] == dst_strides[map_r:]:
        r = lmad_is_tr(src_strides[map_r:], shape[map_r:])
        if r is not None:
            (n, m) = r
            return (k, n, m)
    elif rowmajor_strides[map_r:] == src_strides[map_r:]:
        r = lmad_is_tr(dst_strides[map_r:], shape[map_r:])
        if r is not None:
            (n, m) = r
            return (k, m, n)  # Sic!
    return None


def lmad_copy_elements(
    pt, dst, dst_offset, dst_strides, src, src_offset, src_strides, shape
):
    if len(shape) == 1:
        for i in range(shape[0]):
            writeScalarArray(
                dst,
                dst_offset + i * dst_strides[0],
                indexArray(src, src_offset + i * src_strides[0], pt),
            )
    else:
        for i in range(shape[0]):
            lmad_copy_elements(
                pt,
                dst,
                dst_offset + i * dst_strides[0],
                dst_strides[1:],
                src,
                src_offset + i * src_strides[0],
                src_strides[1:],
                shape[1:],
            )


def lmad_copy(
    pt, dst, dst_offset, dst_strides, src, src_offset, src_strides, shape
):
    if lmad_memcpyable(dst_strides, src_strides, shape):
        dst[
            dst_offset * ct.sizeof(pt) : dst_offset * ct.sizeof(pt)
            + np.prod(shape) * ct.sizeof(pt)
        ] = src[
            src_offset * ct.sizeof(pt) : src_offset * ct.sizeof(pt)
            + np.prod(shape) * ct.sizeof(pt)
        ]
    else:
        lmad_copy_elements(
            pt,
            dst,
            dst_offset,
            dst_strides,
            src,
            src_offset,
            src_strides,
            shape,
        )


# End of memory.py.