1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
|
#include "models.h"
llm_build_wavtokenizer_dec::llm_build_wavtokenizer_dec(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
ggml_tensor * cur;
ggml_tensor * inpL;
inpL = build_inp_embd(model.tok_embd);
cur = ggml_cont(ctx0, ggml_transpose(ctx0, inpL));
cur = ggml_conv_1d_ph(ctx0, model.conv1d, cur, 1, 1);
cur = ggml_add(ctx0, cur, model.conv1d_b);
// posnet
for (uint32_t il = 0; il < hparams.posnet.n_layer; ++il) {
const auto & layer = model.layers[il].posnet;
inpL = cur;
switch (il) {
case 0:
case 1:
case 3:
case 4:
{
cur = build_norm(cur,
layer.norm1,
layer.norm1_b,
LLM_NORM_GROUP, 0);
cur = ggml_mul(ctx0, ggml_sigmoid(ctx0, cur), cur);
cur = ggml_conv_1d_ph(ctx0, layer.conv1, cur, 1, 1);
cur = ggml_add(ctx0, cur, layer.conv1_b);
cur = build_norm(cur,
layer.norm2,
layer.norm2_b,
LLM_NORM_GROUP, 0);
cur = ggml_mul(ctx0, ggml_sigmoid(ctx0, cur), cur);
cur = ggml_conv_1d_ph(ctx0, layer.conv2, cur, 1, 1);
cur = ggml_add(ctx0, cur, layer.conv2_b);
cur = ggml_add(ctx0, cur, inpL);
} break;
case 2:
{
cur = build_norm(cur,
layer.attn_norm,
layer.attn_norm_b,
LLM_NORM_GROUP, 0);
ggml_tensor * q;
ggml_tensor * k;
ggml_tensor * v;
q = ggml_conv_1d_ph(ctx0, layer.attn_q, cur, 1, 1);
k = ggml_conv_1d_ph(ctx0, layer.attn_k, cur, 1, 1);
v = ggml_conv_1d_ph(ctx0, layer.attn_v, cur, 1, 1);
q = ggml_add(ctx0, q, layer.attn_q_b);
k = ggml_add(ctx0, k, layer.attn_k_b);
v = ggml_add(ctx0, v, layer.attn_v_b);
q = ggml_cont(ctx0, ggml_transpose(ctx0, q));
k = ggml_cont(ctx0, ggml_transpose(ctx0, k));
ggml_tensor * kq = ggml_mul_mat(ctx0, k, q);
kq = ggml_soft_max_ext(ctx0, kq, nullptr, 1.0f/sqrtf(float(hparams.posnet.n_embd)), 0.0f);
cur = ggml_mul_mat(ctx0, kq, v);
cur = ggml_conv_1d_ph(ctx0, layer.attn_o, cur, 1, 1);
cur = ggml_add(ctx0, cur, layer.attn_o_b);
cur = ggml_add(ctx0, cur, inpL);
} break;
case 5:
{
cur = build_norm(cur,
layer.norm,
layer.norm_b,
LLM_NORM_GROUP, 0);
} break;
default: GGML_ABORT("unknown posnet layer");
};
}
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
cur = build_norm(cur,
model.tok_norm,
model.tok_norm_b,
LLM_NORM, -1);
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
inpL = cur;
// convnext
for (uint32_t il = 0; il < hparams.convnext.n_layer; ++il) {
const auto & layer = model.layers[il].convnext;
cur = inpL;
cur = ggml_conv_1d_dw_ph(ctx0, layer.dw, cur, 1, 1);
cur = ggml_add(ctx0, cur, layer.dw_b);
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
cur = build_norm(cur,
layer.norm,
layer.norm_b,
LLM_NORM, -1);
cur = build_ffn(cur,
layer.pw1, layer.pw1_b, NULL,
NULL, NULL, NULL,
layer.pw2, layer.pw2_b, NULL,
NULL,
LLM_FFN_GELU, LLM_FFN_SEQ, il);
cur = ggml_mul(ctx0, cur, layer.gamma);
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
inpL = ggml_add(ctx0, cur, inpL);
}
cur = inpL;
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
cur = build_norm(cur,
model.output_norm,
model.output_norm_b,
LLM_NORM, -1);
// lm_head
cur = build_lora_mm(model.output, cur);
cur = ggml_add(ctx0, cur, model.output_b);
cb(cur, "result_embd", -1);
res->t_embd = cur;
ggml_build_forward_expand(gf, cur);
}
|