File: lesson_13_tuples.py

package info (click to toggle)
halide 21.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 55,420 kB
  • sloc: cpp: 289,327; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (306 lines) | stat: -rw-r--r-- 11,770 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
#!/usr/bin/python3

# Halide tutorial lesson 13: Tuples

# This lesson describes how to write Funcs that evaluate to multiple
# values.

# This lesson can be built by invoking the command:
#    make test_tutorial_lesson_13_tuples
# in a shell with the current directory at python_bindings/

import math

import halide as hl
import numpy as np


def main():
    # So far Funcs (such as the one below) have evaluated to a single
    # scalar value for each point in their domain.
    single_valued = hl.Func()
    x, y = hl.Var("x"), hl.Var("y")
    single_valued[x, y] = x + y

    # One way to write a hl.Func that returns a collection of values is
    # to add an additional dimension which indexes that
    # collection. This is how we typically deal with color. For
    # example, the hl.Func below represents a collection of three values
    # for every x, y coordinate indexed by c.
    color_image = hl.Func()
    c = hl.Var("c")
    color_image[x, y, c] = hl.select(
        c == 0,
        245,  # Red value
        c == 1,
        42,  # Green value
        132,  # Blue value
    )

    # Since this pattern appears quite often, Halide provides a
    # syntatic sugar to write the code above as the following,
    # using the "mux" function.
    # color_image[x, y, c] = hl.mux(c, [245, 42, 132]);

    # This method is often convenient because it makes it easy to
    # operate on this hl.Func in a way that treats each item in the
    # collection equally:
    brighter = hl.Func()
    brighter[x, y, c] = color_image[x, y, c] + 10

    # However this method is also inconvenient for three reasons.
    #
    # 1) Funcs are defined over an infinite domain, so users of this
    # hl.Func can for example access color_image(x, y, -17), which is
    # not a meaningful value and is probably indicative of a bug.
    #
    # 2) It requires a hl.select, which can impact performance if not
    # bounded and unrolled:
    # brighter.bound(c, 0, 3).unroll(c)
    #
    # 3) With this method, all values in the collection must have the
    # same type. While the above two issues are merely inconvenient,
    # this one is a hard limitation that makes it impossible to
    # express certain things in this way.

    # It is also possible to represent a collection of values as a
    # collection of Funcs:
    func_array = [hl.Func() for i in range(3)]
    func_array[0][x, y] = x + y
    func_array[1][x, y] = hl.sin(x)
    func_array[2][x, y] = hl.cos(y)

    # This method avoids the three problems above, but introduces a
    # new annoyance. Because these are separate Funcs, it is
    # difficult to schedule them so that they are all computed
    # together inside a single loop over x, y.

    # A third alternative is to define a hl.Func as evaluating to a
    # Tuple instead of an hl.Expr. A Tuple is a fixed-size collection of
    # Exprs which may have different type. The following function
    # evaluates to an integer value (x+y), and a floating point value
    # (hl.sin(x*y)).
    multi_valued = hl.Func("multi_valued")
    multi_valued[x, y] = (x + y, hl.sin(x * y))

    # Realizing a tuple-valued hl.Func returns a collection of
    # Buffers. We call this a Realization. It's equivalent to a
    # std::vector of hl.Buffer/Image objects:
    if True:
        im1, im2 = multi_valued.realize([80, 60])
        assert im1.type() == hl.Int(32)
        assert im2.type() == hl.Float(32)
        assert im1[30, 40] == 30 + 40
        assert np.isclose(im2[30, 40], math.sin(30 * 40))

    # You can also pass a tuple of pre-allocated buffers to realize()
    # rather than having new ones created. (The Buffers must have the correct
    # types and have identical sizes.)
    if True:
        im1, im2 = hl.Buffer(hl.Int(32), [80, 60]), hl.Buffer(hl.Float(32), [80, 60])
        multi_valued.realize((im1, im2))
        assert im1[30, 40] == 30 + 40
        assert np.isclose(im2[30, 40], math.sin(30 * 40))

    # All Tuple elements are evaluated together over the same domain
    # in the same loop nest, but stored in distinct allocations. The
    # equivalent C++ code to the above is:
    if True:
        multi_valued_0 = np.empty((80 * 60), dtype=np.int32)
        multi_valued_1 = np.empty((80 * 60), dtype=np.int32)

        for yy in range(80):
            for xx in range(60):
                multi_valued_0[xx + 60 * yy] = xx + yy
                multi_valued_1[xx + 60 * yy] = math.sin(xx * yy)

    # When compiling ahead-of-time, a Tuple-valued hl.Func evaluates
    # into multiple distinct output halide_buffer_t structs. These appear in
    # order at the end of the function signature:
    # int multi_valued(...input buffers and params..., halide_buffer_t
    # *output_1, halide_buffer_t *output_2)

    # You can construct a Tuple by passing multiple Exprs to the
    # Tuple constructor as we did above. Perhaps more elegantly, you
    # can also take advantage of initializer lists and just
    # enclose your Exprs in braces:
    multi_valued_2 = hl.Func("multi_valued_2")
    multi_valued_2[x, y] = (x + y, hl.sin(x * y))

    # Calls to a multi-valued hl.Func cannot be treated as Exprs. The
    # following is a syntax error:
    # hl.Func consumer
    # consumer[x, y] = multi_valued_2[x, y] + 10

    # Instead you must index the returned object with square brackets
    # to retrieve the individual Exprs:
    integer_part = multi_valued_2[x, y][0]
    floating_part = multi_valued_2[x, y][1]
    assert type(integer_part) is hl.FuncTupleElementRef
    assert type(floating_part) is hl.FuncTupleElementRef

    consumer = hl.Func()
    consumer[x, y] = (integer_part + 10, floating_part + 10.0)

    # Tuple reductions.
    if True:
        # Tuples are particularly useful in reductions, as they allow
        # the reduction to maintain complex state as it walks along
        # its domain. The simplest example is an argmax.

        # First we create an Image to take the argmax over.
        input_func = hl.Func()
        input_func[x] = hl.sin(x)
        input = input_func.realize([100])
        assert input.type() == hl.Float(32)

        # Then we defined a 2-valued Tuple which tracks the maximum value
        # its index.
        arg_max = hl.Func()

        # Pure definition.
        # (using [()] for zero-dimensional Funcs is a convention of this python interface)
        arg_max[()] = (0, input[0])

        # Update definition.
        r = hl.RDom([(1, 99)])
        old_index = arg_max[()][0]
        old_max = arg_max[()][1]
        new_index = hl.select(old_max > input[r], r, old_index)
        new_max = hl.max(input[r], old_max)
        arg_max[()] = (new_index, new_max)

        # The equivalent C++ is:
        arg_max_0 = 0
        arg_max_1 = float(input[0])
        for r in range(1, 100):
            old_index = arg_max_0
            old_max = arg_max_1
            new_index = r if (old_max > input[r]) else old_index
            new_max = max(input[r], old_max)
            # In a tuple update definition, all loads and computation
            # are done before any stores, so that all Tuple elements
            # are updated atomically with respect to recursive calls
            # to the same hl.Func.
            arg_max_0 = new_index
            arg_max_1 = new_max

        # Let's verify that the Halide and C++ found the same maximum
        # value and index.
        if True:
            r0, r1 = arg_max.realize()

            assert r0.type() == hl.Int(32)
            assert r1.type() == hl.Float(32)
            assert arg_max_0 == r0[()]
            assert np.isclose(arg_max_1, r1[()])

        # Halide provides argmax and hl.argmin as built-in reductions
        # similar to sum, product, maximum, and minimum. They return
        # a Tuple consisting of the point in the reduction domain
        # corresponding to that value, and the value itself. In the
        # case of ties they return the first value found. We'll use
        # one of these in the following section.

    # Tuples for user-defined types.
    if True:
        # Tuples can also be a convenient way to represent compound
        # objects such as complex numbers. Defining an object that
        # can be converted to and from a Tuple is one way to extend
        # Halide's type system with user-defined types.
        class Complex:
            def __init__(self, r, i=None):
                if type(r) is float and type(i) is float:
                    self.real = hl.Expr(r)
                    self.imag = hl.Expr(i)
                elif i is not None:
                    self.real = r
                    self.imag = i
                else:
                    self.real = r[0]
                    self.imag = r[1]

            def as_tuple(self):
                "Convert to a Tuple"
                return (self.real, self.imag)

            def __add__(self, other):
                "Complex addition"
                return Complex(self.real + other.real, self.imag + other.imag)

            def __mul__(self, other):
                "Complex multiplication"
                return Complex(
                    self.real * other.real - self.imag * other.imag,
                    self.real * other.imag + self.imag * other.real,
                )

            def __getitem__(self, idx):
                return (self.real, self.imag)[idx]

            def __len__(self):
                return 2

            def magnitude(self):
                "Complex magnitude"
                return (self.real * self.real) + (self.imag * self.imag)

            # Other complex operators would go here. The above are
            # sufficient for this example.

        # Let's use the Complex struct to compute a Mandelbrot set.
        mandelbrot = hl.Func()

        # The initial complex value corresponding to an x, y coordinate
        # in our hl.Func.
        initial = Complex(x / 15.0 - 2.5, y / 6.0 - 2.0)

        # Pure definition.
        t = hl.Var("t")
        mandelbrot[x, y, t] = Complex(0.0, 0.0)

        # We'll use an update definition to take 12 steps.
        r = hl.RDom([(1, 12)])
        current = Complex(mandelbrot[x, y, r - 1])

        # The following line uses the complex multiplication and
        # addition we defined above.
        mandelbrot[x, y, r] = Complex(current * current) + initial

        # We'll use another tuple reduction to compute the iteration
        # number where the value first escapes a circle of radius 4.
        # This can be expressed as an hl.argmin of a boolean - we want
        # the index of the first time the given boolean expression is
        # false (we consider false to be less than true).  The argmax
        # would return the index of the first time the expression is
        # true.

        escape_condition = Complex(mandelbrot[x, y, r]).magnitude() < 16.0
        first_escape = hl.argmin(escape_condition)
        assert type(first_escape) is tuple
        # We only want the index, not the value, but hl.argmin returns
        # both, so we'll index the hl.argmin Tuple expression using
        # square brackets to get the hl.Expr representing the index.
        escape = hl.Func()
        escape[x, y] = first_escape[0]

        # Realize the pipeline and print the result as ascii art.
        result = escape.realize([61, 25])
        assert result.type() == hl.Int(32)
        code = " .:-~*={&%#@"
        for yy in range(result.height()):
            for xx in range(result.width()):
                index = result[xx, yy]
                if index < len(code):
                    print(f"{code[index]}", end="")
                else:
                    pass  # is lesson 13 cpp version buggy ?
            print()

    print("Success!")

    return 0


if __name__ == "__main__":
    main()