File: fasttext_manager.py

package info (click to toggle)
dateparser 1.2.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 4,140 kB
  • sloc: python: 52,721; makefile: 155; sh: 15
file content (42 lines) | stat: -rw-r--r-- 1,495 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import logging
import os
import urllib.request
from pathlib import Path

from .exceptions import FastTextModelNotFoundException
from .utils import create_data_model_home, dateparser_model_home


def fasttext_downloader(model_name):
    create_data_model_home()
    models = {
        "small": "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz",
        "large": "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin",
    }
    if model_name not in models:
        message = 'dateparser-download: Couldn\'t find a model called "{}". Supported models are: {}'.format(
            model_name, ", ".join(models.keys())
        )
        raise FastTextModelNotFoundException(message)

    models_directory_path = os.path.join(dateparser_model_home, (model_name + ".bin"))

    if not Path(models_directory_path).is_file():
        model_url = models[model_name]
        logging.info(
            'dateparser-download: Downloading model "{}" from "{}"...'.format(
                model_name, model_url
            )
        )
        try:
            urllib.request.urlretrieve(model_url, models_directory_path)
        except urllib.error.HTTPError as e:
            raise Exception(
                "dateparser-download: Fasttext model cannot be downloaded due to HTTP error"
            ) from e
    else:
        logging.info(
            'dateparser-download: The model "{}" is already downloaded'.format(
                model_name
            )
        )