File: test_pickle.py

package info (click to toggle)
tiktoken 0.11.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 300 kB
  • sloc: python: 1,219; makefile: 17
file content (23 lines) | stat: -rw-r--r-- 715 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import tiktoken


def test_pickle():
    import pickle

    enc_old = tiktoken.get_encoding("r50k_base")
    enc_new = pickle.loads(pickle.dumps(enc_old))
    assert enc_old.encode("hello world") == enc_new.encode("hello world")

    enc_old = tiktoken.Encoding(
        name="custom_enc",
        pat_str=enc_old._pat_str,
        mergeable_ranks=enc_old._mergeable_ranks,
        special_tokens={"<|pickle|>": 100_000},
    )
    enc_new = pickle.loads(pickle.dumps(enc_old))
    assert enc_old.encode("hello world") == enc_new.encode("hello world")
    assert (
        enc_old.encode("<|pickle|>", allowed_special="all")
        == enc_new.encode("<|pickle|>", allowed_special="all")
        == [100_000]
    )