File: unique_table.py

package info (click to toggle)
firefox-esr 68.10.0esr-1~deb9u1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 3,143,932 kB
  • sloc: cpp: 5,227,879; javascript: 4,315,531; ansic: 2,467,042; python: 794,975; java: 349,993; asm: 232,034; xml: 228,320; sh: 82,008; lisp: 41,202; makefile: 22,347; perl: 15,555; objc: 5,277; cs: 4,725; yacc: 1,778; ada: 1,681; pascal: 1,673; lex: 1,417; exp: 527; php: 436; ruby: 225; awk: 162; sed: 53; csh: 44
file content (80 lines) | stat: -rw-r--r-- 2,256 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""
Generate a table of unique items.

The `UniqueTable` class collects items into an array, removing duplicates. Each
item is mapped to its offset in the final array.

This is a compression technique for compile-time generated tables.
"""

try:
    from typing import Any, List, Dict, Tuple, Sequence  # noqa
except ImportError:
    pass


class UniqueTable:
    """
    Collect items into the `table` list, removing duplicates.
    """
    def __init__(self):
        # type: () -> None
        # List of items added in order.
        self.table = list()  # type: List[Any]
        # Map item -> index.
        self.index = dict()  # type: Dict[Any, int]

    def add(self, item):
        # type: (Any) -> int
        """
        Add a single item to the table if it isn't already there.

        Return the offset into `self.table` of the item.
        """
        if item in self.index:
            return self.index[item]

        idx = len(self.table)
        self.index[item] = idx
        self.table.append(item)
        return idx


class UniqueSeqTable:
    """
    Collect sequences into the `table` list, removing duplicates.

    Sequences don't have to be of the same length.
    """
    def __init__(self):
        # type: () -> None
        self.table = list()  # type: List[Any]
        # Map seq -> index.
        self.index = dict()  # type: Dict[Tuple[Any, ...], int]

    def add(self, seq):
        # type: (Sequence[Any]) -> int
        """
        Add a sequence of items to the table. If the table already contains the
        items in `seq` in the same order, use those instead.

        Return the offset into `self.table` of the beginning of `seq`.
        """
        if len(seq) == 0:
            return 0
        tseq = tuple(seq)
        if tseq in self.index:
            return self.index[tseq]

        idx = len(self.table)
        self.table.extend(tseq)

        # Add seq and all sub-sequences to `index`.
        index = self.index  # type: Dict[Tuple[Any, ...], int]
        assert index is not None
        for length in range(1, len(tseq) + 1):
            for offset in range(len(tseq) - length + 1):
                key = tseq[offset:offset+length]
                index[key] = idx + offset

        return idx