1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
|
import pytest
import time
from utils import *
server = ServerPreset.tinyllama2()
@pytest.fixture(autouse=True)
def create_server():
global server
server = ServerPreset.tinyllama2()
def test_server_sleep():
global server
server.sleep_idle_seconds = 1
server.start()
# wait a bit so that server can go to sleep
time.sleep(2)
# make sure these endpoints are still responsive after sleep
res = server.make_request("GET", "/health")
assert res.status_code == 200
res = server.make_request("GET", "/props")
assert res.status_code == 200
assert res.body["is_sleeping"] == True
# make a generation request to wake up the server
res = server.make_request("POST", "/completion", data={
"n_predict": 1,
"prompt": "Hello",
})
assert res.status_code == 200
# it should no longer be sleeping
res = server.make_request("GET", "/props")
assert res.status_code == 200
assert res.body["is_sleeping"] == False
|