File: wavtokenizer-dec.cpp

package info (click to toggle)
whisper.cpp 1.8.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 32,228 kB
  • sloc: cpp: 188,765; ansic: 121,729; lisp: 10,221; sh: 4,272; objc: 2,159; ruby: 1,682; python: 1,177; javascript: 594; makefile: 144
file content (149 lines) | stat: -rw-r--r-- 4,633 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#include "models.h"

llm_build_wavtokenizer_dec::llm_build_wavtokenizer_dec(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
    ggml_tensor * cur;
    ggml_tensor * inpL;

    inpL = build_inp_embd(model.tok_embd);

    cur = ggml_cont(ctx0, ggml_transpose(ctx0, inpL));

    cur = ggml_conv_1d_ph(ctx0, model.conv1d, cur, 1, 1);
    cur = ggml_add(ctx0, cur, model.conv1d_b);

    // posnet
    for (uint32_t il = 0; il < hparams.posnet.n_layer; ++il) {
        const auto & layer = model.layers[il].posnet;

        inpL = cur;

        switch (il) {
            case 0:
            case 1:
            case 3:
            case 4:
                {
                    cur = build_norm(cur,
                            layer.norm1,
                            layer.norm1_b,
                            LLM_NORM_GROUP, 0);

                    cur = ggml_mul(ctx0, ggml_sigmoid(ctx0, cur), cur);

                    cur = ggml_conv_1d_ph(ctx0, layer.conv1, cur, 1, 1);
                    cur = ggml_add(ctx0, cur, layer.conv1_b);

                    cur = build_norm(cur,
                            layer.norm2,
                            layer.norm2_b,
                            LLM_NORM_GROUP, 0);

                    cur = ggml_mul(ctx0, ggml_sigmoid(ctx0, cur), cur);

                    cur = ggml_conv_1d_ph(ctx0, layer.conv2, cur, 1, 1);
                    cur = ggml_add(ctx0, cur, layer.conv2_b);

                    cur = ggml_add(ctx0, cur, inpL);
                } break;
            case 2:
                {
                    cur = build_norm(cur,
                            layer.attn_norm,
                            layer.attn_norm_b,
                            LLM_NORM_GROUP, 0);

                    ggml_tensor * q;
                    ggml_tensor * k;
                    ggml_tensor * v;

                    q = ggml_conv_1d_ph(ctx0, layer.attn_q, cur, 1, 1);
                    k = ggml_conv_1d_ph(ctx0, layer.attn_k, cur, 1, 1);
                    v = ggml_conv_1d_ph(ctx0, layer.attn_v, cur, 1, 1);

                    q = ggml_add(ctx0, q, layer.attn_q_b);
                    k = ggml_add(ctx0, k, layer.attn_k_b);
                    v = ggml_add(ctx0, v, layer.attn_v_b);

                    q = ggml_cont(ctx0, ggml_transpose(ctx0, q));
                    k = ggml_cont(ctx0, ggml_transpose(ctx0, k));

                    ggml_tensor * kq = ggml_mul_mat(ctx0, k, q);

                    kq = ggml_soft_max_ext(ctx0, kq, nullptr, 1.0f/sqrtf(float(hparams.posnet.n_embd)), 0.0f);

                    cur = ggml_mul_mat(ctx0, kq, v);

                    cur = ggml_conv_1d_ph(ctx0, layer.attn_o, cur, 1, 1);
                    cur = ggml_add(ctx0, cur, layer.attn_o_b);

                    cur = ggml_add(ctx0, cur, inpL);
                } break;
            case 5:
                {
                    cur = build_norm(cur,
                            layer.norm,
                            layer.norm_b,
                            LLM_NORM_GROUP, 0);
                } break;
            default: GGML_ABORT("unknown posnet layer");
        };
    }
    cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));

    cur = build_norm(cur,
            model.tok_norm,
            model.tok_norm_b,
            LLM_NORM, -1);

    cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));

    inpL = cur;

    // convnext
    for (uint32_t il = 0; il < hparams.convnext.n_layer; ++il) {
        const auto & layer = model.layers[il].convnext;

        cur = inpL;

        cur = ggml_conv_1d_dw_ph(ctx0, layer.dw, cur, 1, 1);
        cur = ggml_add(ctx0, cur, layer.dw_b);

        cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));

        cur = build_norm(cur,
                layer.norm,
                layer.norm_b,
                LLM_NORM, -1);

        cur = build_ffn(cur,
                layer.pw1, layer.pw1_b, NULL,
                NULL,      NULL,        NULL,
                layer.pw2, layer.pw2_b, NULL,
                NULL,
                LLM_FFN_GELU, LLM_FFN_SEQ, il);

        cur = ggml_mul(ctx0, cur, layer.gamma);

        cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));

        inpL = ggml_add(ctx0, cur, inpL);
    }
    cur = inpL;

    cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));

    cur = build_norm(cur,
            model.output_norm,
            model.output_norm_b,
            LLM_NORM, -1);

    // lm_head
    cur = build_lora_mm(model.output, cur);

    cur = ggml_add(ctx0, cur, model.output_b);

    cb(cur, "result_embd", -1);
    res->t_embd = cur;

    ggml_build_forward_expand(gf, cur);
}