1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
#include "ggml.h"
#include "ggml-backend.h"
#include <cassert>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <fstream>
#include <vector>
static void ggml_log_callback_default(ggml_log_level level, const char * text, void * user_data) {
(void) level;
(void) user_data;
fputs(text, stderr);
fflush(stderr);
}
// This is a simple model with two tensors a and b
struct simple_model {
struct ggml_tensor * a {};
struct ggml_tensor * b {};
// the backend to perform the computation (CPU, CUDA, METAL)
ggml_backend_t backend {};
ggml_backend_t cpu_backend {};
ggml_backend_sched_t sched {};
// storage for the graph and tensors
std::vector<uint8_t> buf;
};
// initialize data of matrices to perform matrix multiplication
const int rows_A = 4, cols_A = 2;
float matrix_A[rows_A * cols_A] = {
2, 8,
5, 1,
4, 2,
8, 6
};
const int rows_B = 3, cols_B = 2;
/* Transpose([
10, 9, 5,
5, 9, 4
]) 2 rows, 3 cols */
float matrix_B[rows_B * cols_B] = {
10, 5,
9, 9,
5, 4
};
// initialize the tensors of the model in this case two matrices 2x2
void init_model(simple_model & model) {
ggml_log_set(ggml_log_callback_default, nullptr);
ggml_backend_load_all();
model.backend = ggml_backend_init_best();
model.cpu_backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
ggml_backend_t backends[2] = { model.backend, model.cpu_backend };
model.sched = ggml_backend_sched_new(backends, nullptr, 2, GGML_DEFAULT_GRAPH_SIZE, false, true);
}
// build the compute graph to perform a matrix multiplication
struct ggml_cgraph * build_graph(simple_model& model) {
size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
model.buf.resize(buf_size);
struct ggml_init_params params0 = {
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ model.buf.data(),
/*.no_alloc =*/ true, // the tensors will be allocated later
};
// create a context to build the graph
struct ggml_context * ctx = ggml_init(params0);
struct ggml_cgraph * gf = ggml_new_graph(ctx);
// create tensors
model.a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, cols_A, rows_A);
model.b = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, cols_B, rows_B);
// result = a*b^T
struct ggml_tensor * result = ggml_mul_mat(ctx, model.a, model.b);
// build operations nodes
ggml_build_forward_expand(gf, result);
ggml_free(ctx);
return gf;
}
// compute with backend
struct ggml_tensor * compute(simple_model & model, struct ggml_cgraph * gf) {
ggml_backend_sched_reset(model.sched);
ggml_backend_sched_alloc_graph(model.sched, gf);
// load data from cpu memory to backend buffer
ggml_backend_tensor_set(model.a, matrix_A, 0, ggml_nbytes(model.a));
ggml_backend_tensor_set(model.b, matrix_B, 0, ggml_nbytes(model.b));
// compute the graph
ggml_backend_sched_graph_compute(model.sched, gf);
// in this case, the output tensor is the last one in the graph
return ggml_graph_node(gf, -1);
}
int main(void) {
ggml_time_init();
simple_model model;
init_model(model);
struct ggml_cgraph * gf = build_graph(model);
// perform computation
struct ggml_tensor * result = compute(model, gf);
// create a array to print result
std::vector<float> out_data(ggml_nelements(result));
// bring the data from the backend memory
ggml_backend_tensor_get(result, out_data.data(), 0, ggml_nbytes(result));
// expected result:
// [ 60.00 55.00 50.00 110.00
// 90.00 54.00 54.00 126.00
// 42.00 29.00 28.00 64.00 ]
printf("mul mat (%d x %d) (transposed result):\n[", (int) result->ne[0], (int) result->ne[1]);
for (int j = 0; j < result->ne[1] /* rows */; j++) {
if (j > 0) {
printf("\n");
}
for (int i = 0; i < result->ne[0] /* cols */; i++) {
printf(" %.2f", out_data[j * result->ne[0] + i]);
}
}
printf(" ]\n");
// release backend memory and free backend
ggml_backend_sched_free(model.sched);
ggml_backend_free(model.backend);
ggml_backend_free(model.cpu_backend);
return 0;
}
|