File: LlamaRuntimeLinker.h

package info (click to toggle)
firefox 145.0.1-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 4,653,528 kB
  • sloc: cpp: 7,594,999; javascript: 6,459,658; ansic: 3,752,909; python: 1,403,455; xml: 629,809; asm: 438,679; java: 186,421; sh: 67,287; makefile: 19,169; objc: 13,086; perl: 12,982; yacc: 4,583; cs: 3,846; pascal: 3,448; lex: 1,720; ruby: 1,003; exp: 762; php: 436; lisp: 258; awk: 247; sql: 66; sed: 54; csh: 10
file content (141 lines) | stat: -rw-r--r-- 7,079 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef LlamaRuntimeLinker_h__
#define LlamaRuntimeLinker_h__

#include "mozilla/Attributes.h"
#include "mozilla/Types.h"
#include "llama/llama.h"
#include "ggml.h"

struct PRLibrary;

namespace mozilla::llama {

// Format: X(return_type, name, params)
#define MOZINFERENCE_FUNCTION_LIST(X)                                       \
  X(void, llama_log_set,                                                    \
    (void (*callback)(enum ggml_log_level, const char*, void*),             \
     void* user_data))                                                      \
  X(struct llama_model_params, llama_model_default_params, (void))          \
  X(struct llama_model*, llama_model_load_from_file_handle,                 \
    (FILE * file, struct llama_model_params params))                        \
  X(int32_t, llama_model_meta_val_str,                                      \
    (const struct llama_model* model, const char* key, char* buf,           \
     size_t buf_size))                                                      \
  X(struct llama_context_params, llama_context_default_params, (void))      \
  X(struct llama_context*, llama_init_from_model,                           \
    (struct llama_model * model, struct llama_context_params params))       \
  X(void, llama_attach_threadpool,                                          \
    (struct llama_context * ctx, ggml_threadpool_t threadpool,              \
     ggml_threadpool_t threadpool_batch))                                   \
  X(const char*, llama_model_chat_template,                                 \
    (const struct llama_model* model, const char* name))                    \
  X(int32_t, llama_chat_apply_template,                                     \
    (const char* tmpl, const struct llama_chat_message* chat, size_t n_msg, \
     bool add_ass, char* buf, int32_t length))                              \
  X(struct llama_sampler_chain_params, llama_sampler_chain_default_params,  \
    (void))                                                                 \
  X(struct llama_sampler*, llama_sampler_chain_init,                        \
    (struct llama_sampler_chain_params params))                             \
  X(void, llama_sampler_chain_add,                                          \
    (struct llama_sampler * chain, struct llama_sampler * smpl))            \
  X(struct llama_sampler*, llama_sampler_init_greedy, (void))               \
  X(struct llama_sampler*, llama_sampler_init_temp, (float t))              \
  X(struct llama_sampler*, llama_sampler_init_dist, (uint32_t seed))        \
  X(struct llama_sampler*, llama_sampler_init_top_k, (int32_t k))           \
  X(struct llama_sampler*, llama_sampler_init_top_p,                        \
    (float p, size_t min_keep))                                             \
  X(struct llama_sampler*, llama_sampler_init_logit_bias,                   \
    (int32_t n_vocab, int32_t n_logit_bias,                                 \
     const llama_logit_bias* logit_bias))                                   \
  X(void, llama_memory_clear, (llama_memory_t mem, bool data))              \
  X(llama_memory_t, llama_get_memory, (const struct llama_context* ctx))    \
  X(const struct llama_vocab*, llama_model_get_vocab,                       \
    (const struct llama_model* model))                                      \
  X(int32_t, llama_vocab_n_tokens, (const struct llama_vocab* vocab))       \
  X(int32_t, llama_tokenize,                                                \
    (const struct llama_vocab* vocab, const char* text, int32_t text_len,   \
     llama_token* tokens, int32_t n_tokens_max, bool add_special,           \
     bool parse_special))                                                   \
  X(uint32_t, llama_n_ctx, (const struct llama_context* ctx))               \
  X(struct llama_batch, llama_batch_get_one,                                \
    (llama_token * tokens, int32_t n_tokens))                               \
  X(int32_t, llama_memory_seq_pos_max,                                      \
    (llama_memory_t mem, llama_seq_id seq_id))                              \
  X(int32_t, llama_decode,                                                  \
    (struct llama_context * ctx, struct llama_batch batch))                 \
  X(llama_token, llama_sampler_sample,                                      \
    (struct llama_sampler * smpl, struct llama_context * ctx, int32_t idx)) \
  X(bool, llama_vocab_is_eog,                                               \
    (const struct llama_vocab* vocab, llama_token token))                   \
  X(int32_t, llama_token_to_piece,                                          \
    (const struct llama_vocab* vocab, llama_token token, char* buf,         \
     int32_t length, int32_t lstrip, bool special))                         \
  X(void, llama_model_free, (struct llama_model * model))                   \
  X(void, llama_free, (struct llama_context * ctx))                         \
  X(void, llama_sampler_free, (struct llama_sampler * smpl))                \
  X(void, ggml_threadpool_params_init,                                      \
    (struct ggml_threadpool_params * p, int n_threads))                     \
  X(bool, ggml_threadpool_params_match,                                     \
    (const struct ggml_threadpool_params* p0,                               \
     const struct ggml_threadpool_params* p1))                              \
  X(ggml_threadpool_t, ggml_threadpool_new,                                 \
    (struct ggml_threadpool_params * params))                               \
  X(void, ggml_threadpool_free, (ggml_threadpool_t threadpool))

struct LlamaLibWrapper {
  LlamaLibWrapper() = default;
  ~LlamaLibWrapper() = default;

  enum class LinkResult {
    Success,
    NoProvidedLib,
    MissingFunction,
  };

  LinkResult Link();
  void Unlink();

  // Library handle
  PRLibrary* mLlamaLib;

#define DECLARE_FUNCTION_PTR(ret, name, params) ret(*name) params;
  MOZINFERENCE_FUNCTION_LIST(DECLARE_FUNCTION_PTR)
#undef DECLARE_FUNCTION_PTR
};

class LlamaRuntimeLinker {
 public:
  enum LinkStatus {
    LinkStatus_INIT = 0,
    LinkStatus_FAILED,
    LinkStatus_SUCCEEDED,
  };

  // Initialize the dynamic linker, returns true on success
  static bool Init();

  // Get the llama library wrapper
  static LlamaLibWrapper* Get() {
    if (!Init()) {
      return nullptr;
    }
    return &sLlamaLib;
  }

  // Check if the library has been successfully linked
  static bool IsAvailable() { return sLinkStatus == LinkStatus_SUCCEEDED; }

 private:
  static LlamaLibWrapper sLlamaLib;
  static LinkStatus sLinkStatus;
};

}  // namespace mozilla::llama

#endif  // LlamaRuntimeLinker_h__