File: test_bench.py

package info (click to toggle)
llama.cpp 7593%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 71,012 kB
  • sloc: cpp: 329,391; ansic: 48,249; python: 32,103; lisp: 10,053; sh: 6,070; objc: 1,349; javascript: 924; xml: 384; makefile: 233
file content (63 lines) | stat: -rw-r--r-- 1,806 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pytest
import subprocess
import sys

tmp_path='/data/local/tmp'
pkg_path=f'{tmp_path}/llama.cpp'
lib_path=f'{pkg_path}/lib'
bin_path=f'{pkg_path}/bin'

model='../gguf/Llama-3.2-1B-Instruct-Q4_0.gguf'
cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_path} {bin_path}'


def run_cmd(cmd):
    p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
    sys.stdout.write(p.stdout)
    assert(p.returncode == 0)


@pytest.mark.dependency()
def test_install():
    run_cmd(['adb', 'push', 'llama.cpp', f'{tmp_path}'])
    run_cmd(['adb', 'shell', f'chmod 755 {bin_path}/*'])


## Basic cli tests
def run_llama_cli(dev, opts):
    prompt='what is the most popular cookie in the world?\nPlease provide a very brief bullet point summary.\nBegin your answer with **BEGIN**.'
    opts = '--batch-size 128 -n 128 -no-cnv --seed 42 ' + opts
    run_cmd(['adb', 'shell', f'{cli_pref}/llama-cli -m {model} --device {dev} -ngl 99 -t 4 {opts} -p "{prompt}"'])


@pytest.mark.dependency(depends=['test_install'])
def test_llama_cli_cpu():
    run_llama_cli('none', '-ctk q8_0 -ctv q8_0 -fa on')


@pytest.mark.dependency(depends=['test_install'])
def test_llama_cli_gpu():
    run_llama_cli('GPUOpenCL', '-fa on')


@pytest.mark.dependency(depends=['test_install'])
def test_llama_cli_npu():
    run_llama_cli('HTP0', '-ctk q8_0 -ctv q8_0 -fa on')


## Basic bench tests
def run_llama_bench(dev):
    run_cmd(['adb', 'shell', f'{cli_pref}/llama-bench -m {model} --device {dev} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32'])


@pytest.mark.dependency(depends=['test_install'])
def test_llama_bench_cpu():
    run_llama_bench('none')


def test_llama_bench_gpu():
    run_llama_bench('GPUOpenCL')


def test_llama_bench_npu():
    run_llama_bench('HTP0')