File: get_vocab.py

package info (click to toggle)
fasttext 0.9.2%2Bds-9
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 4,952 kB
  • sloc: cpp: 5,459; python: 2,427; javascript: 635; sh: 621; makefile: 106; xml: 81; perl: 43
file content (48 lines) | stat: -rw-r--r-- 1,285 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python

# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division, absolute_import, print_function

from fasttext import load_model
import argparse
import errno

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=(
            "Print words or labels and frequency of a model's dictionary"
        )
    )
    parser.add_argument(
        "model",
        help="Model to use",
    )
    parser.add_argument(
        "-l",
        "--labels",
        help="Print labels instead of words",
        action='store_true',
        default=False,
    )
    args = parser.parse_args()

    f = load_model(args.model)
    if args.labels:
        words, freq = f.get_labels(include_freq=True)
    else:
        words, freq = f.get_words(include_freq=True)
    for w, f in zip(words, freq):
        try:
            print(w + "\t" + str(f))
        except IOError as e:
            if e.errno == errno.EPIPE:
                pass