File: embedder_utils.py

package info (click to toggle)
orange3 3.40.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,908 kB
  • sloc: python: 162,745; ansic: 622; makefile: 322; sh: 93; cpp: 77
file content (119 lines) | stat: -rw-r--r-- 3,349 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import logging
import hashlib
import pickle
from os import environ
from os.path import join, isfile
from typing import Optional, Dict

from Orange.canvas.config import cache_dir


log = logging.getLogger(__name__)


class EmbeddingCancelledException(Exception):
    """
    Thrown when the embedding task is cancelled from another thread.
    (i.e. ImageEmbedder.cancelled attribute is set to True).
    """


class EmbeddingConnectionError(ConnectionError):
    """
    Common error when embedding is interrupted because of connection problems
    or server unavailability - embedder do not respond.
    """


class EmbedderCache:

    _cache_file_blueprint = '{:s}_embeddings.pickle'

    def __init__(self, model):
        # init the cache

        cache_file_path = self._cache_file_blueprint.format(model)
        self._cache_file_path = join(cache_dir(), cache_file_path)
        self._cache_dict = self._init_cache()

    def _init_cache(self):
        if isfile(self._cache_file_path):
            return self.load_pickle(self._cache_file_path)
        return {}

    @staticmethod
    def save_pickle(obj, file_name):
        try:
            with open(file_name, 'wb') as f:
                pickle.dump(obj, f)
        except PermissionError as ex:
            # skip saving cache if no right permissions
            log.warning(
                "Can't save embedding to %s due to %s.",
                file_name,
                type(ex).__name__,
                exc_info=True,
            )

    @staticmethod
    def load_pickle(file_name):
        try:
            with open(file_name, 'rb') as f:
                return pickle.load(f)
        except (EOFError, PermissionError) as ex:
            # load empty cache if no permission or EOF error
            log.warning(
                "Can't load embedding from %s due to %s.",
                file_name,
                type(ex).__name__,
                exc_info=True,
            )
            return {}

    @staticmethod
    def md5_hash(bytes_):
        md5 = hashlib.md5()
        md5.update(bytes_)
        return md5.hexdigest()

    def clear_cache(self):
        self._cache_dict = {}
        self.persist_cache()

    def persist_cache(self):
        self.save_pickle(self._cache_dict, self._cache_file_path)

    def get_cached_result_or_none(self, cache_key):
        if cache_key in self._cache_dict:
            return self._cache_dict[cache_key]
        return None

    def add(self, cache_key, value):
        self._cache_dict[cache_key] = value


def get_proxies() -> Optional[Dict[str, str]]:
    """
    Return dict with proxy addresses if they exist.

    Returns
    -------
    proxy_dict
        Dictionary with format {proxy type: proxy address} or None if
        they not set.
    """
    def add_scheme(url: Optional[str]) -> Optional[str]:
        if url is not None and "://" not in url:
            # if no scheme default to http - as other libraries do (e.g. requests)
            return f"http://{url}"
        else:
            return url

    http_proxy = add_scheme(environ.get("http_proxy"))
    https_proxy = add_scheme(environ.get("https_proxy"))
    proxy_dict = {}
    if http_proxy:
        proxy_dict["http://"] = http_proxy
    if https_proxy:
        proxy_dict["https://"] = https_proxy
    return proxy_dict if proxy_dict else None