File: Cache.py

package info (click to toggle)
aap 1.072-1.1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k, lenny
  • size: 4,976 kB
  • ctags: 2,160
  • sloc: python: 15,113; makefile: 62; sh: 13
file content (499 lines) | stat: -rw-r--r-- 19,158 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
# Part of the A-A-P recipe executive: Cache downloaded files

# Copyright (C) 2002-2003 Stichting NLnet Labs
# Permission to copy and use this file is specified in the file COPYING.
# If this file is missing you can find it here: http://www.a-a-p.org/COPYING

#
# This module handles the caching of remote files.
#

# There can be several directories with a cache.  They can be shared between
# users to minimize the number of downloads.  $CACHEPATH specifies the list of
# directories.
#
# Each cache directory contains:
#  - An index file, each line containing these fields:
#       URL<ESC>localname<ESC>remote-time<ESC>local-time<ESC>access-time<ESC>
#    "URL" is the url of the file being cached
#    "localname" is the file name of the cached file, excluding the path.
#    Timestamps are in seconds.
#    Always written in binary mode, to make them usable on all systems.
#  - The cache files, with an arbitrary file name (actually a random number).
#    Suffixes are kept, some tools require this.
#  - A lock file, only present while the index file is being updated.
#
# The index files are read into our internal cache lookup, so that we can
# access it quickly and update it for downloaded files.  The updated index
# files are written when we exit or when $CACHEPATH is changed.

import time
import os
import os.path
import errno

from Remote import is_url, url_download
from Dictlist import varname2dictlist
from Util import *
from Message import *
import Global


cache = {}                  # Dictionary for cache entries.  Key is the URL.
cache_indexes = {}          # Dictionary of index files that have been read
                            # or need updating.
cache_val_read = {}         # Value of $CACHEPATH plus the current directory
                            # for which cache files have been read.

index_fname = "index"       # name of the file with the cache index


class Cache:
    def __init__(self, indexname, localname, rtime, ltime, atime):
        """An object in the dictionary of cache entries."""
        self.indexname = indexname      # full path of the index file
        self.localname = localname      # file name of cache file (no path)
        self.rtime = rtime              # timestamp of remote file or zero
        self.ltime = ltime              # timestamp of local file
        self.atime = atime              # timestamp when last accessed
        self.updated = 0                # updated this cached file
        self.gone = 0                   # file doesn't exist
        self.written = 0                # entry written to index file

    def localname_path(self):
        """Return the name of the cache file with the path."""
        return os.path.join(os.path.dirname(self.indexname), self.localname)

    def timestamp(self):
        """Return the last modified time."""
        if self.rtime:
            return self.rtime       # use the remote time if we know it
        return self.ltime


def cache_read(recdict, fname, rcache, lock):
    """Read cache index file "fname" and add entries to the cache "rchache".
       "fname" must have an absolute path."""
    # When "lock" is non-zero, wait for a lock in the index file to disappear.
    # We don't lock the cache index file, because it would make it impossible
    # to use a cache from a read-only directory.  There is a tiny risk that the
    # index file is truncated if some other program starts updating it just
    # after we checked the index file isn't locked.  Read the whole file at
    # once to minimize that.  The only risk is that we download a file that's
    # already cached.

    if lock:
        try:
            index_lock(recdict, fname, 0)
        except:
            # What to do when the lock sticks?  Let's just ignore it.
            pass

    # Read all the lines from the index file.
    # When there is an error silently ignore that.
    try:
        f = open(fname, "rb")
        lines = f.readlines()
        f.close()
    except:
        return

    global cache_indexes
    cache_indexes[fname] = 1    # remember we have read this index file

    # Read entries into our cache.
    for line in lines:
        try:
            url, lfname, rtime, ltime, atime, rest = string.split(line, "\033")
            rtime = long(rtime)
            ltime = long(ltime)
            atime = long(atime)
        except:
            # Some error in this line, skip it.
            continue
        if rcache.has_key(url):
            # URL already exists.  Only use new entry when it's newer.
            rc = rcache[url]
            if rtime > rc.rtime or ltime > rc.ltime:
                rc.indexname = fname
                rc.localname = lfname
                rc.rtime = rtime
                rc.ltime = ltime
                if atime > rc.atime:
                    rc.atime = atime
                rc.updated = 0
                rc.gone = 0
        else:
            # Add new entry.
            rcache[url] = Cache(fname, lfname, rtime, ltime, atime)


def index_write_newest(recdict, fname, f, use_cache, check_cache):
    """Write lines to index file "fname" opened as "f" from cache "use_cache",
       skipping entries for other index files and entries that are newer in
       "check_cache"."""
    for url in use_cache.keys():
        ce = use_cache[url]
        if ce.indexname == fname:
            if ce.gone:
                pass            # skip entries with deleted files

            # Only write a line when the check_cache doesn't have this entry or
            # it does have an entry which is not written but our entry is
            # newer.
            else:
                if check_cache.has_key(url) and not check_cache[url].gone:
                    ck = check_cache[url]
                else:
                    ck = None
                if (not ck
                        or (not ck.written
                                and (ce.rtime > ck.rtime
                                        or ce.ltime > ck.ltime
                                        or (ce.rtime == ck.rtime
                                                and ce.ltime == ck.ltime)))):
                    if ck and check_cache[url].atime > ce.atime:
                        atime = ck.atime
                    else:
                        atime = ce.atime
                    f.write("%s\033%s\033%d\033%d\033%d\033\n"
                              % (url, ce.localname, ce.rtime, ce.ltime, atime))
                    ce.updated = 0
                    ce.written = 1
                elif not (ck and ck.written) and ce.localname != ck.localname:
                    # An entry that is not written in the index file can be
                    # deleted.
                    fn = os.path.join(os.path.dirname(fname), ce.localname)
                    try:
                        os.remove(fn)
                        ce.gone = 1
                    except EnvironmentError, e:
                        msg_warning(recdict,
                                (_('Can\'t delete cached file "%s"') % fn)
                                                                      + str(e))


def cache_update(recdict, fname):
    """Update cache index file "fname" for the entries in our cache."""
    # First check if there is anything to update for this index file.
    global cache
    foundone = 0
    for url in cache.keys():
        if cache[url].indexname == fname and (
                                        cache[url].updated or cache[url].gone):
            foundone = 1
            break
    if not foundone:
        return

    # If the cache is "AAPDIR/cache/", create the directory if it doesn't exist.
    dir = os.path.dirname(fname)
    if (os.path.basename(dir) == "cache"
            and os.path.dirname(dir) == Global.aap_dirname
            and not os.path.exists(dir)):
        try:
            assert_aap_dir(recdict)
            os.makedirs(dir)
        except:
            # Silently skip this cache when it doesn't exist and we can't
            # create it.
            return

    # Lock the index file.
    # If this fails we probably can't write to this cache directory.
    try:
        index_lock(recdict, fname, 1)
    except:
        msg_note(recdict, _('Can\'t lock cache index file "%s"') % fname)
        return

    # Read the entries from the index file into "tcache".
    # They may have been updated since the last time we read it.
    tcache = {}
    cache_read(recdict, fname, tcache, 0)

    # open index file for writing
    try:
        f = open(fname, "wb")
    except EnvironmentError, e:
        # Can't write the index file, even though we can lock it!?
        msg_warning(recdict,
                    (_('Can\'t write cache index file "%s"') % fname) + str(e))
    else:
        try:
            # Rewrite entries that were already in this index file and for
            # which we don't have a newer entry.
            index_write_newest(recdict, fname, f, tcache, cache)

            # Add new entries from our cache.
            index_write_newest(recdict, fname, f, cache, tcache)

            # close file
            f.close()

        except EnvironmentError, e:
            msg_warning(recdict,
                           _('Error writing index file "%s"') % fname + str(e))

    # unlock file
    try:
        index_unlock(fname)
    except:
        msg_warning(recdict, _('Can\'t unlock cache index file "%s"') % fname)


def get_lock_fname(fname):
    """Return the index lock file name for the index file "fname"."""
    return os.path.join(os.path.dirname(fname), "indexlock")


def index_lock(recdict, fname, create):
    """Wait for index file "fname" to be unlocked.
       When "create" is non-zero: Lock index file "fname".
       Timeout after a while and delete the lock."""
    lname = get_lock_fname(fname)
    did_msg = 0

    # Try up to two hundred times.  Avoids hangup when something is wrong.
    trycount = 0
    while trycount < 200:
        trycount = trycount + 1

        try:
            if create:
                # Try creating the lock file, fail if it already exists.
                fd = os.open(lname, os.O_WRONLY + os.O_CREAT + os.O_EXCL)

                # Write our process number in it, so we know who created it.
                try:
                    pid = os.getpid()
                except:
                    pid = 1
                os.write(fd, "%d\n" % pid)
                os.close(fd)
            elif os.path.exists(lname):
                # Lock file exists.
                # Clumsy: throw an exception to go into the code below.
                raise IOError, (errno.EEXIST, "lock is there")
            break

        except EnvironmentError, (error, msg):
            # If creation didn't file because the file already exists, give up.
            if error != errno.EEXIST:
                raise

            # Couldn't create the lock file.  After waiting trying for 10
            # seconds, assume it's an orphan.
            # Note: this throws an IOError if we can't delete the lock file.
            if trycount == 100:
                os.remove(lname)
                if os.path.exists(lname):
                    raise IOError, 'Can\'t delete lock file "%s"' % lname
                msg_info(recdict, _('Deleted old lock file "%s"') % lname)
                did_msg = 0
                continue

        # Wait a tenth of a second before trying again.
        if not did_msg:
            msg_info(recdict,
                        _('Waiting 10 seconds for lock file "%s" to disappear')
                                                                       % lname)
            did_msg = 1
        time.sleep(0.1)

    if did_msg:
        msg_info(recdict, _("Lock file is gone now, continuing..."))


def index_unlock(fname):
    """Unlock an index file."""
    os.remove(get_lock_fname(fname))


def cache_dirlist(recdict):
    """Get the value of the $CACHEPATH variable as a list of strings."""
    if recdict["_no"]["CACHEPATH"] == '':
        return []
    return map(lambda x: os.path.expanduser(x["name"]),
                                 varname2dictlist(recdict, "_no", "CACHEPATH"))


def fill_cache(recdict):
    """Read all cache index files in $CACHEPATH."""
    # Only need to fill the cache when it wasn't done for the current value of
    # $CACHEPATH and the current directory.  Avoids expanding items in
    # $CACHEPATH to absolute paths each time.
    check = get_var_val_int(recdict, "CACHEPATH") + '>' + os.getcwd()
    if not cache_val_read.has_key(check):
        cache_val_read[check] = 1

        # Read the cache index files for all entries in $CACHEPATH
        for n in cache_dirlist(recdict):
            index = os.path.join(os.path.abspath(n), index_fname)
            if not cache_indexes.has_key(index):
                cache_read(recdict, index, cache, 1)


def dump_cache(recdict):
    """Update all cached index files.  Empties our cache.  Called just before
    $CACHEPATH is changed."""
    global cache, cache_indexes, cache_val_read
    for n in cache_indexes.keys():
        cache_update(recdict, n)
    cache = {}
    cache_indexes = {}
    cache_val_read = {}


def cache_lookup(recdict, name, cache_update_str = None):
    """Lookup URL "name" in the cache.  Return the Cache object if found."""
    if cache.has_key(name):
        ent = cache[name]

        # if the entry was updated this session, it's always accepted.
        if ent.updated:
            return ent

        # Check if the cached file is too old.
        if not cache_update_str:
            cache_update_str = get_var_val(0, recdict, "_no", "CACHEUPDATE")
        if time.time() < ent.ltime + date2secs(cache_update_str):
            return ent

        # When the old timestamp of the remote file is known, get the
        # timestamp of remote file and accept the cached file when it's
        # still the same.
        if ent.rtime != 0:
            from Remote import remote_time

            rt = remote_time(recdict, name)
            if rt != 0 and rt == ent.rtime:
                msg_depend(recdict, _('timestamp did not change for "%s"')
                                                                        % name)
                # Update ltime, so that we don't obtain the remote
                # timestamp too often.
                ent.ltime = time.time()
                ent.updated = 1
                return ent
            if rt == 0:
                msg_info(recdict, _('cannot get timestamp for "%s"') % name)
            else:
                msg_depend(recdict,
                        _('timestamp for "%s" changed from %d to %d')
                                                       % (name, ent.rtime, rt))

    return None


def local_name(recdict, name, cache_update_str = None):
    """Get the local file name for "name":
       If it's a local file "name" is returned.
       If it's a remote file and a cached copy is available, return the name of
       the cached copy.
       Otherwise try to download the file, cache it and return the name of the
       cached file.
       Returns the local name and a flag indicating the file is in the cache.
       Gives an error message and returns None if this fails."""
    from VersCont import separate_scheme

    scheme, fname = separate_scheme(name)
    if scheme == "file":
        return os.path.abspath(os.path.expanduser(fname)), 0
    if not is_url(name):
        return os.path.abspath(os.path.expanduser(name)), 0

    # Don't fill the cache when --nocache specified.
    if not Global.cmd_args.has_option("nocache"):
        fill_cache(recdict)

    msg_extra(recdict, "Looking up local name for %s" % name)
    cache_entry = cache_lookup(recdict, name, cache_update_str)
    if cache_entry:
        # Check if the file really exists, it may have been cleared since
        # we read the index file.
        p = cache_entry.localname_path()
        if os.path.exists(p):
            # Update the last-access time.
            cache_entry.atime = time.time()
            cache_entry.updated = 1
            return p, 1
        cache_entry.gone = 1

    # Skip when not actually building.
    if skip_commands():
        msg_info(recdict, _('skip downloading "%s"') % name)
        return None, 0

    # Isolate the suffixes.
    n = os.path.basename(name)
    i = string.find(n, ".")
    if i <= 0:
        suf = ''        # no suffix or starts with a dot
    else:
        suf = n[i:]     # suffix, can also be ".c.diff.gz"

    # Find a cache directory where we can write.
    # TODO: remember directories where we can't write and skip them.
    import random
    for cachedir in cache_dirlist(recdict):
        if not os.path.exists(cachedir):
            # If the name starts with "AAPDIR/" or $HOME may create the
            # directory.
            found = ''
            for dname in [ home_dir(), Global.aap_dirname ]:
                if dname:
                    l = len(dname)
                    if (len(cachedir) > l
                            and cachedir[:l] == dname
                            and cachedir[l] in "\\/"):
                        found = dname
                        break
            if not found:
                continue
            # Try creating the directory.  When this fails silently skip it.
            try:
                os.makedirs(cachedir)
            except:
                continue

        # Loop to try different random cache file names.
        while 1:
            fname = str(random.randint(1,99999999)) + suf
            path = os.path.join(cachedir, fname)
            try:
                # Try creating the cached file, fail if it already exists.
                fd = os.open(path, os.O_WRONLY + os.O_CREAT + os.O_EXCL)
                os.close(fd)
                break
            except EnvironmentError, (error, msg):
                if error != errno.EEXIST:
                    # Can't create this file for some reason, try another
                    # directory.
                    path = ''
                    break

        if path:
            # Try downloading the file to the cache directory.
            try:
                f, rtime = url_download(recdict, name, path)
            except EnvironmentError, e:
                msg_note(recdict, _('Cannot download "%s": %s')
                                                              % (name, str(e)))
                # Delete the empty file we created.
                try_delete(path)
                return None, 0

            # Downloading worked, add an entry to the cache index.
            ifname = os.path.join(cachedir, index_fname)
            cache[name] = Cache(ifname, fname, rtime, os.path.getmtime(path),
                                                                   time.time())
            cache[name].updated = 1
            # remember we need to update this index file
            cache_indexes[os.path.abspath(ifname)] = 1
            return path, 1

    # Get here when cannot write to any cache directory.
    msg_warning(recdict, _('Cannot write in any cache directory'))
    return None, 0

# vim: set sw=4 et sts=4 tw=79 fo+=l: