File: search.pyx

package info (click to toggle)
python-thinc 8.1.7-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 5,804 kB
  • sloc: python: 15,818; javascript: 1,554; ansic: 342; makefile: 20; sh: 13
file content (302 lines) | stat: -rw-r--r-- 12,243 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# cython: profile=True, experimental_cpp_class_def=True, cdivision=True, infer_types=True
cimport cython
from libc.string cimport memset, memcpy
from libc.math cimport log, exp
import math

from cymem.cymem cimport Pool
from preshed.maps cimport PreshMap


cdef class Beam:
    def __init__(self, class_t nr_class, class_t width, weight_t min_density=0.0):
        assert nr_class != 0
        assert width != 0
        self.nr_class = nr_class
        self.width = width
        self.min_density = min_density
        self.size = 1
        self.t = 0
        self.mem = Pool()
        self._parents = <_State*>self.mem.alloc(self.width, sizeof(_State))
        self._states = <_State*>self.mem.alloc(self.width, sizeof(_State))
        cdef int i
        self.histories = [[] for i in range(self.width)]
        self._parent_histories = [[] for i in range(self.width)]

        self.scores = <weight_t**>self.mem.alloc(self.width, sizeof(weight_t*))
        self.is_valid = <int**>self.mem.alloc(self.width, sizeof(weight_t*))
        self.costs = <weight_t**>self.mem.alloc(self.width, sizeof(weight_t*))
        for i in range(self.width):
            self.scores[i] = <weight_t*>self.mem.alloc(self.nr_class, sizeof(weight_t))
            self.is_valid[i] = <int*>self.mem.alloc(self.nr_class, sizeof(int))
            self.costs[i] = <weight_t*>self.mem.alloc(self.nr_class, sizeof(weight_t))

    def __len__(self):
        return self.size

    property score:
        def __get__(self):
            return self._states[0].score

    property min_score:
        def __get__(self):
            return self._states[self.size-1].score

    property loss:
        def __get__(self):
            return self._states[0].loss

    property probs:
        def __get__(self):
            return _softmax([self._states[i].score for i in range(self.size)])

    property scores:
        def __get__(self):
            return [self._states[i].score for i in range(self.size)]

    property histories:
        def __get__(self):
            return self.histories

    cdef int set_row(self, int i, const weight_t* scores, const int* is_valid,
                     const weight_t* costs) except -1:
        cdef int j
        for j in range(self.nr_class):
            self.scores[i][j] = scores[j]
            self.is_valid[i][j] = is_valid[j]
            self.costs[i][j] = costs[j]

    cdef int set_table(self, weight_t** scores, int** is_valid, weight_t** costs) except -1:
        cdef int i, j
        for i in range(self.width):
            memcpy(self.scores[i], scores[i], sizeof(weight_t) * self.nr_class)
            memcpy(self.is_valid[i], is_valid[i], sizeof(bint) * self.nr_class)
            memcpy(self.costs[i], costs[i], sizeof(int) * self.nr_class)

    cdef int initialize(self, init_func_t init_func, del_func_t del_func, int n, void* extra_args) except -1:
        for i in range(self.width):
            self._states[i].content = init_func(self.mem, n, extra_args)
            self._parents[i].content = init_func(self.mem, n, extra_args)
        self.del_func = del_func

    def __dealloc__(self):
        for i in range(self.width):
            self.del_func(self.mem, self._states[i].content, NULL)
            self.del_func(self.mem, self._parents[i].content, NULL)

    @cython.cdivision(True)
    cdef int advance(self, trans_func_t transition_func, hash_func_t hash_func,
                     void* extra_args) except -1:
        cdef weight_t** scores = self.scores
        cdef int** is_valid = self.is_valid
        cdef weight_t** costs = self.costs

        cdef Queue* q = new Queue()
        self._fill(q, scores, is_valid)
        # For a beam of width k, we only ever need 2k state objects. How?
        # Each transition takes a parent and a class and produces a new state.
        # So, we don't need the whole history --- just the parent. So at
        # each step, we take a parent, and apply one or more extensions to
        # it.
        self._parents, self._states = self._states, self._parents
        self._parent_histories, self.histories = self.histories, self._parent_histories
        cdef weight_t score
        cdef int p_i
        cdef int i = 0
        cdef class_t clas
        cdef _State* parent
        cdef _State* state
        cdef hash_t key
        cdef PreshMap seen_states = PreshMap(self.width)
        cdef uint64_t is_seen
        cdef uint64_t one = 1
        while i < self.width and not q.empty():
            data = q.top()
            p_i = data.second / self.nr_class
            clas = data.second % self.nr_class
            score = data.first
            q.pop()
            parent = &self._parents[p_i]
            # Indicates terminal state reached; i.e. state is done
            if parent.is_done:
                # Now parent will not be changed, so we don't have to copy.
                # Once finished, should also be unbranching.
                self._states[i], parent[0] = parent[0], self._states[i]
                parent.i = self._states[i].i
                parent.t = self._states[i].t
                parent.is_done = self._states[i].t
                self._states[i].score = score
                self.histories[i] = list(self._parent_histories[p_i])
                i += 1
            else:
                state = &self._states[i]
                # The supplied transition function should adjust the destination
                # state to be the result of applying the class to the source state
                transition_func(state.content, parent.content, clas, extra_args)
                key = hash_func(state.content, extra_args) if hash_func is not NULL else 0
                is_seen = <uint64_t>seen_states.get(key)
                if key == 0 or key == 1 or not is_seen:
                    if key != 0 and key != 1:
                        seen_states.set(key, <void*>one)
                    state.score = score
                    state.loss = parent.loss + costs[p_i][clas]
                    self.histories[i] = list(self._parent_histories[p_i])
                    self.histories[i].append(clas)
                    i += 1
        del q
        self.size = i
        assert self.size >= 1
        for i in range(self.width):
            memset(self.scores[i], 0, sizeof(weight_t) * self.nr_class)
            memset(self.costs[i], 0, sizeof(weight_t) * self.nr_class)
            memset(self.is_valid[i], 0, sizeof(int) * self.nr_class)
        self.t += 1

    cdef int check_done(self, finish_func_t finish_func, void* extra_args) except -1:
        cdef int i
        for i in range(self.size):
            if not self._states[i].is_done:
                self._states[i].is_done = finish_func(self._states[i].content, extra_args)
        for i in range(self.size):
            if not self._states[i].is_done:
                self.is_done = False
                break
        else:
            self.is_done = True

    @cython.cdivision(True)
    cdef int _fill(self, Queue* q, weight_t** scores, int** is_valid) except -1:
        """Populate the queue from a k * n matrix of scores, where k is the
        beam-width, and n is the number of classes.
        """
        cdef Entry entry
        cdef weight_t score
        cdef _State* s
        cdef int i, j, move_id
        assert self.size >= 1
        cdef vector[Entry] entries
        for i in range(self.size):
            s = &self._states[i]
            move_id = i * self.nr_class
            if s.is_done:
                # Update score by path average, following TACL '13 paper.
                if self.histories[i]:
                    entry.first = s.score + (s.score / self.t)
                else:
                    entry.first = s.score
                entry.second = move_id
                entries.push_back(entry)
            else:
                for j in range(self.nr_class):
                    if is_valid[i][j]:
                        entry.first = s.score + scores[i][j]
                        entry.second = move_id + j
                        entries.push_back(entry)
        cdef double max_, Z, cutoff
        if self.min_density == 0.0:
            for i in range(entries.size()):
                q.push(entries[i])
        elif not entries.empty():
            max_ = entries[0].first
            Z = 0.
            cutoff = 0.
            # Softmax into probabilities, so we can prune
            for i in range(entries.size()):
                if entries[i].first > max_:
                    max_ = entries[i].first
            for i in range(entries.size()):
                Z += exp(entries[i].first-max_)
            cutoff = (1. / Z) * self.min_density
            for i in range(entries.size()):
                prob = exp(entries[i].first-max_) / Z
                if prob >= cutoff:
                    q.push(entries[i])


cdef class MaxViolation:
    def __init__(self):
        self.p_score = 0.0
        self.g_score = 0.0
        self.Z = 0.0
        self.gZ = 0.0
        self.delta = -1
        self.cost = 0
        self.p_hist = []
        self.g_hist = []
        self.p_probs = []
        self.g_probs = []

    cpdef int check(self, Beam pred, Beam gold) except -1:
        cdef _State* p = &pred._states[0]
        cdef _State* g = &gold._states[0]
        cdef weight_t d = p.score - g.score
        if p.loss >= 1 and (self.cost == 0 or d > self.delta):
            self.cost = p.loss
            self.delta = d
            self.p_hist = list(pred.histories[0])
            self.g_hist = list(gold.histories[0])
            self.p_score = p.score
            self.g_score = g.score
            self.Z = 1e-10
            self.gZ = 1e-10
            for i in range(pred.size):
                if pred._states[i].loss > 0:
                    self.Z += exp(pred._states[i].score)
            for i in range(gold.size):
                if gold._states[i].loss == 0:
                    prob = exp(gold._states[i].score)
                    self.Z += prob
                    self.gZ += prob

    cpdef int check_crf(self, Beam pred, Beam gold) except -1:
        d = pred.score - gold.score
        seen_golds = set([tuple(gold.histories[i]) for i in range(gold.size)])
        if pred.loss > 0 and (self.cost == 0 or d > self.delta):
            p_hist = []
            p_scores = []
            g_hist = []
            g_scores = []
            for i in range(pred.size):
                if pred._states[i].loss > 0:
                    p_scores.append(pred._states[i].score)
                    p_hist.append(list(pred.histories[i]))
                # This can happen from non-monotonic actions
                # If we find a better gold analysis this way, be sure to keep it.
                elif pred._states[i].loss <= 0 \
                and tuple(pred.histories[i]) not in seen_golds:
                    g_scores.append(pred._states[i].score)
                    g_hist.append(list(pred.histories[i]))
            for i in range(gold.size):
                if gold._states[i].loss == 0:
                    g_scores.append(gold._states[i].score)
                    g_hist.append(list(gold.histories[i]))

            all_probs = _softmax(p_scores + g_scores)
            p_probs = all_probs[:len(p_scores)]
            g_probs_all = all_probs[len(p_scores):]
            g_probs = _softmax(g_scores)

            self.cost = pred.loss
            self.delta = d
            self.p_hist = p_hist
            self.g_hist = g_hist
            # TODO: These variables are misnamed! These are the gradients of the loss.
            self.p_probs = p_probs
            # Intuition here:
            # The gradient of the loss is:
            # P(model) - P(truth)
            # Normally, P(truth) is 1 for the gold
            # But, if we want to do the "partial credit" scheme, we want
            # to create a distribution over the gold, proportional to the scores
            # awarded.
            self.g_probs = [x-y for x, y in zip(g_probs_all, g_probs)]


def _softmax(nums):
    if not nums:
        return []
    max_ = max(nums)
    nums = [(exp(n-max_) if n is not None else None) for n in nums]
    Z = sum(n for n in nums if n is not None)
    return [(n/Z if n is not None else None) for n in nums]