File: test_tokenizer.py

package info (click to toggle)
python-rtf-tokenize 1.0.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 108 kB
  • sloc: ansic: 188; python: 64; makefile: 4
file content (70 lines) | stat: -rw-r--r-- 2,073 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from textwrap import dedent

from rtf_tokenize import RtfTokenizer


TEST_RTF = dedent(
    r'''
    {\rtf1\ansi
    {\*\cxs TEFT}
    escaped newline: line\
    break
    \test1 ing\test2;
    }
    '''
).lstrip()

TEST_RTF_TOKENS = (
    '{', r'\rtf1', r'\ansi',
    '{', r'\*', r'\cxs', 'TEFT', '}',
    'escaped newline: line', '\\\n',
    'break',
    r'\test1', 'ing', r'\test2', ';',
    '}',
)

TEST_RTF_LOCATIONS = [
    tuple(map(int, loc.split(':')))
    for loc in '''
    0:0 0:1 0:6
    1:0 1:1 1:3 1:8 1:12
    2:0 2:21
    3:0
    4:0 4:7 4:10 4:16
    5:0
    '''.split()
]


def test_tokenizer_next_token():
    tokenizer = RtfTokenizer(TEST_RTF)
    for n, (expected_token, expected_loc) in enumerate(zip(TEST_RTF_TOKENS, TEST_RTF_LOCATIONS)):
        token = tokenizer.next_token()
        loc = (tokenizer.lnum, tokenizer.cnum)
        msg = 'token %u at %u:%u' % (n, loc[0], loc[1])
        assert token == expected_token, msg
        assert loc == expected_loc, msg
    msg = 'token %u at end' % (n + 1)
    expected_loc = (expected_loc[0] + 1, 0)
    assert tokenizer.next_token() is None, msg
    assert (tokenizer.lnum, tokenizer.cnum) == expected_loc, msg


def test_tokenizer_rewind_token():
    tokenizer = RtfTokenizer(TEST_RTF)
    # Read first 2 tokens.
    assert tokenizer.next_token() == TEST_RTF_TOKENS[0]
    assert (tokenizer.lnum, tokenizer.cnum) == TEST_RTF_LOCATIONS[0]
    assert tokenizer.next_token() == TEST_RTF_TOKENS[1]
    assert (tokenizer.lnum, tokenizer.cnum) == TEST_RTF_LOCATIONS[1]
    # Rewind 2 unrelated tokens.
    tokenizer.rewind_token('re')
    tokenizer.rewind_token('wind')
    # Check next 2 tokens are rewound one.
    assert tokenizer.next_token() == r'wind'
    assert (tokenizer.lnum, tokenizer.cnum) == TEST_RTF_LOCATIONS[1]
    assert tokenizer.next_token() == r're'
    assert (tokenizer.lnum, tokenizer.cnum) == TEST_RTF_LOCATIONS[1]
    # And that we continue where we left.
    assert tokenizer.next_token() == TEST_RTF_TOKENS[2]
    assert (tokenizer.lnum, tokenizer.cnum) == TEST_RTF_LOCATIONS[2]