File: cache.py

package info (click to toggle)
python-datacache 1.4.1-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 284 kB
  • sloc: python: 935; sh: 14; makefile: 4
file content (134 lines) | stat: -rw-r--r-- 4,463 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from os.path import exists, join
from os import remove

from . import common
from . import download
from .database_helpers import db_from_dataframe


class Cache(object):
    def __init__(self, subdir="datacache"):
        assert subdir
        self.subdir = subdir
        self.cache_directory_path = common.get_data_dir(subdir)

        # dictionary mapping from (URL, decompress) pair to local paths
        # TODO: handle decompression separately from download,
        # so we can use copies of compressed files we've already downloaded
        self._local_paths = {}

    def delete_url(self, url):
        """
        Delete local files downloaded from given URL
        """
        # file may exist locally in compressed and decompressed states
        # delete both
        for decompress in [False, True]:
            key = (url, decompress)
            if key in self._local_paths:
                path = self._local_paths[key]
                remove(path)
                del self._local_paths[key]

            # possible that file was downloaded via the download module without
            # using the Cache object, this wouldn't end up in the local_paths
            # but should still be deleted
            path = self.local_path(
                url, decompress=decompress, download=False)

            if exists(path):
                remove(path)

    def delete_all(self):
        self._local_paths.clear()
        common.clear_cache(self.cache_directory_path)
        common.ensure_dir(self.cache_directory_path)

    def exists(self, url, filename=None, decompress=False):
        """
        Return True if a local file corresponding to these arguments
        exists.
        """
        return download.file_exists(
            url,
            filename=filename,
            decompress=decompress,
            subdir=self.subdir)

    def fetch(
            self,
            url,
            filename=None,
            decompress=False,
            force=False,
            timeout=None,
            use_wget_if_available=True):
        """
        Return the local path to the downloaded copy of a given URL.
        Don't download the file again if it's already present,
        unless `force` is True.
        """
        key = (url, decompress)
        if not force and key in self._local_paths:
            path = self._local_paths[key]
            if exists(path):
                return path
            else:
                del self._local_paths[key]
        path = download.fetch_file(
            url,
            filename=filename,
            decompress=decompress,
            subdir=self.subdir,
            force=force,
            timeout=timeout,
            use_wget_if_available=use_wget_if_available)

        self._local_paths[key] = path
        return path

    def local_filename(
            self,
            url=None,
            filename=None,
            decompress=False):
        """
        What local filename will we use within the cache directory
        for the given URL/filename/decompress options.
        """
        return common.build_local_filename(url, filename, decompress)

    def local_path(self, url, filename=None, decompress=False, download=False):
        """
        What will the full local path be if we download the given file?
        """
        if download:
            return self.fetch(url=url, filename=filename, decompress=decompress)
        else:
            filename = self.local_filename(url, filename, decompress)
            return join(self.cache_directory_path, filename)

    def db_from_dataframe(
            self,
            db_filename,
            table_name,
            df,
            key_column_name=None):
        return db_from_dataframe(
            db_filename=db_filename,
            table_name=table_name,
            df=df,
            primary_key=key_column_name,
            subdir=self.subdir)