File: Sign.py

package info (click to toggle)
aap 1.072-1.1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k, lenny
  • size: 4,976 kB
  • ctags: 2,160
  • sloc: python: 15,113; makefile: 62; sh: 13
file content (598 lines) | stat: -rw-r--r-- 21,649 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
# Part of the A-A-P recipe executive: Store signatures

# Copyright (C) 2002-2003 Stichting NLnet Labs
# Permission to copy and use this file is specified in the file COPYING.
# If this file is missing you can find it here: http://www.a-a-p.org/COPYING

#
# This module handles remembering signatures of targets and sources.
#

import os
import os.path
import string
import md5
import time

from Util import *
from Message import *
from Filetype import ft_detect
import Global

# Both "signatures" dictionaries are indexed by the name of the target Node
# (file or directory).
# For non-virtual nodes the absulute name is used.
# Each entry is a dictionary indexed by the source-name@check-name and has a
# string value.
# The "buildcheck" entry is used for the build commands.
# The "signfile" entry is used to remember the sign file that stores the
# signatures for this target.
# "old_signatures" is for the signatures when we started.
# "upd_signatures" is for the signatures of items for which the build commands
# were successfully executed and are to be stored for the next time.
# Example:
# {"/aa/bb/file.o" : {  "signfile" : "/aa/bb/AAPDIR/sign",
#                       "/aa/bb/file.c@md5" : "13a445e5",
#                       "buildcheck" : "-O2"},
#  "/aa/bb/bar.o"  : {  "signfile" : "/aa/bb/mysign",
#                       "/aa/bb/bar-debug.c@time" : "143234",
#                       "aa/bb/bar.h@time" : "423421"}}
old_signatures = {}
upd_signatures = {}

# "new_signatures" caches the signatures we computed this invocation.  It is a
# dictionary of dictionaries:
#   new_signatures["/path/file"]["md5"] = md5hex("/path/file")
# The key for the toplevel dictionary is the Node name.
# The key for the second level is the check name.  The target name isn't used
# here.
new_signatures = {}

# "chd_signatures" remembers which files were marked as changed with --changed
# or ":changed".
chd_signatures = {}

# Key used for the timestamp on the signature entry.  Used to find the last
# updated entry for published files.
timekey = "lastupdate"

# Name for the sign file relative to the directory of the target or the recipe.
sign_normal_fname = in_aap_dir("sign")
sign_normal_fname_len = len(sign_normal_fname)

# Remember which sign files have been read.
# Also when the file couldn't actually be read, so that we remember to write
# this file when signs have been updated.
# An entry exists when the file has been read.  It's value is non-zero when the
# file should be written back.
sign_files = {}

def get_sign_file(recdict, target, update):
    """Get the sign file that is used for "target" if it wasn't done already.
       When "update" is non-zero, mark the file needs writing."""
    fname = fname_fold(target.get_sign_fname())
    if not sign_files.has_key(fname):
        sign_files[fname] = update
        sign_read(recdict, fname)
    elif update:
        sign_files[fname] = 1


def sign_file_dir(fname):
    """Return the directory to which files in sign file "fname" are relative
       to.  Use uniform format (forward slashes)."""
    # When using "AAPDIR/sign" remove two parts, otherwise only remove the file
    # name itself.
    if (len(fname) >= sign_normal_fname_len
            and fname_fold(fname[-sign_normal_fname_len:])
                                             == fname_fold(sign_normal_fname)):
        fname = os.path.dirname(fname)
    return fname_fold(os.path.dirname(fname))


# In the sign files, file names are stored with a leading "-" for a virtual
# node and "=" for a file name.  Expand to an absolute name for non-virtual
# nodes.
def sign_expand_name(recdict, dir, name):
    """Expand "name", which is used in a sign file in directory "dir" or
    "dir/AAPDIR"."""
    n = name[1:]
    if name[0] == '-' or os.path.isabs(n):
        return n

    # Make a full path by joining the dir and the file name.
    n_len = len(n)
    if n_len <= 3 or n[:3] != "../":
        return fname_fold(os.path.join(dir, n))

    # Remove "../" items.  Don't use os.path.normpath(), it's a bit slow (it
    # does more than removing ".." items).
    di = len(dir)
    ni = 3
    while 1:
        di = string.rfind(dir, "/", 0, di)
        if di < 0:
            # "cannot happen": sign file corrupted?  Return the name with the
            # ".." (equivalent to sign not found).
            msg_error(recdict, _('In sign file: Too many ".." in "%s" for directory "%s"') % (name, dir))
            return fname_fold(os.path.join(dir, n))
        if ni + 3 >= n_len or n[ni:ni+3] != "../":
            break
        ni = ni + 3

    return dir[:di + 1] + n[ni:]

def sign_reduce_name(dir, name):
    """Reduce "name" to what is used in a sign file."""
    if os.path.isabs(name):
        return '=' + fname_fold(shorten_name(name, dir))
    return '-' + fname_fold(name)


#
# A sign file stores the signatures for items (sources and targets) with the
# values they when they were computed in the past.
# The format of each line is:
#       =foo.o<ESC>=foo.c@md5_c=012346<ESC>...<ESC>\n
# "md5_c" can be "md5", "time", etc.  Note that it's not always equal to
# the "check" attribute, both "time" and "older" use "time" here.

def sign_read(recdict, fname):
    """Read the signature file "fname" into our dictionary of signatures."""
    basedir = sign_file_dir(fname)
    try:
        f = open(fname, "rb")
        for line in f.readlines():
            e = string.find(line, "\033")
            if e > 0:   # Only use lines with an ESC
                name = sign_expand_name(recdict, basedir, line[:e])
                old_signatures[name] = {"signfile" : fname_fold(fname)}
                while 1:
                    s = e + 1
                    e = string.find(line, "\033", s)
                    if e < 1:
                        break
                    i = string.rfind(line, "=", s, e)
                    if i < 1:
                        break
                    old_signatures[name][sign_expand_name(recdict,
                                           basedir, line[s:i])] = line[i + 1:e]
        f.close()
    except StandardError, e:
        # TODO: handle errors?  It's not an error if the file does not exist.
        msg_note(recdict, (_('Cannot read sign file "%s": ')
                                               % shorten_name(fname)) + str(e))


def sign_write_all(recdict):
    """Write all updated signature files from our dictionary of signatures."""

    # This assumes we are the only one updating this signature file, thus there
    # is no locking.  It wouldn't make sense sharing with others, since
    # building would fail as well.
    for fname in sign_files.keys():
        if sign_files[fname]:
            # This sign file needs to be written.
            sign_write(recdict, fname)

def sign_write(recdict, fname):
    """Write one updated signature file."""
    sign_dir = os.path.dirname(fname)
    if not os.path.exists(sign_dir):
        try:
            os.makedirs(sign_dir)
        except StandardError, e:
            msg_warning(recdict,
                        (_('Cannot create directory for signature file "%s": ')
                                                             % fname) + str(e))
    try:
        f = open(fname, "wb")
    except StandardError, e:
        msg_warning(recdict,
                          (_('Cannot open signature file for writing: "%s": ')
                              % fname) + str(e))
        return

    def write_sign_line(f, basedir, s, old, new):
        """Write a line to sign file "f" in directory "basedir" for item "s",
        with checks from "old", using checks from "new" if they are present."""
        f.write(sign_reduce_name(basedir, s) + "\033")

        # Go over all old checks, write all of them, using the new value
        # if it is available.
        for c in old.keys():
            if c != "signfile":
                if new and new.has_key(c):
                    val = new[c]
                else:
                    val = old[c]
                f.write("%s=%s\033" % (sign_reduce_name(basedir, c), val))

        # Go over all new checks, write the ones for which there is no old
        # value.
        if new:
            for c in new.keys():
                if c != "signfile" and not old.has_key(c):
                    f.write("%s=%s\033" % (sign_reduce_name(basedir, c),
                                                                       new[c]))

        f.write("\n")

    basedir = sign_file_dir(fname)
    try:
        # Go over all old signatures, write all of them, using checks from
        # upd_signatures when they are present.
        # When the item is in upd_signatures, use the directory specified
        # there, otherwise use the directory of old_signatures.
        for s in old_signatures.keys():
            if upd_signatures.has_key(s):
                if upd_signatures[s]["signfile"] != fname:
                    continue
                new = upd_signatures[s]
            else:
                if old_signatures[s]["signfile"] != fname:
                    continue
                new = None
            write_sign_line(f, basedir, s, old_signatures[s], new)


        # Go over all updated signatures, write only the ones for which there
        # is no old signature.
        for s in upd_signatures.keys():
            if (not old_signatures.has_key(s)
                                   and upd_signatures[s]["signfile"] == fname):
                write_sign_line(f, basedir, s, upd_signatures[s], None)

        f.close()
    except StandardError, e:
        msg_warning(recdict, (_('Write error for signature file "%s": '),
                                                               fname) + str(e))

def hexdigest(m):
    """Turn an md5 object into a string of hex characters."""
    # NOTE:  This routine is a method in the Python 2.0 interface
    # of the native md5 module, not in Python 1.5.
    h = string.hexdigits
    r = ''
    for c in m.digest():
        i = ord(c)
        r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
    return r


def check_md5(recdict, fname, msg = 1):
    if not os.path.isfile(fname):
        # A non-existing file isn't that bad, could be a virtual target that
        # wasn't marked as being virtual.
        if msg:
            msg_note(recdict,
                  _('Cannot compute md5 checksum for "%s": it does not exist')
                  % fname)
        return "unknown"

    try:
        f = open(fname, "rb")
        m = md5.new()
        while 1:
            # Read big blocks at a time for speed, but don't read the whole
            # file at once to reduce memory usage.
            data = f.read(32768)
            if not data:
                break
            m.update(data)
        f.close()
        res = hexdigest(m)
    except StandardError, e:
        if msg:
            msg_warning(recdict, (_('Cannot compute md5 checksum for "%s": ')
                                                             % fname) + str(e))
        res = "unknown"
    return res


def check_c_md5(recdict, fname):
    """Compute an md5 signature after filtering out irrelevant items for C
       code (white space and comments)."""
    try:
        f = open(fname)
    except StandardError, e:
        # Can't open a URL here.
        msg_warning(recdict, (_('Cannot compute md5 checksum for "%s": ')
                                                             % fname) + str(e))
        return "unknown"

    m = md5.new()

    inquote = 0
    incomment = 0
    while 1:
        # Read one line at a time.
        try:
            data = f.readline()
        except StandardError, e:
            # Can't read the file.
            msg_warning(recdict, (_('Cannot read "%s": ') % fname) + str(e))
            return "unknown"

        if not data:
            break

        # Filter out irrelevant changes:
        # - Collapse sequences of white space into one space.
        # - Remove comments.
        # TODO: double-byte characters may have a backslash or double quote
        # as their second byte, how to know this?
        data_len = len(data) - 1
        s = 0
        skipwhite = 1
        i = 0
        while i < data_len:
            if inquote:
                # Only need to search for the endquote.
                while i < data_len:
                    c = data[i]
                    i = i + 1
                    if c == '"':
                        inquote = 0
                        break
                    elif c == '\\':
                        i = i + 1
                continue

            if incomment:
                # Only need to search for the comment end "*/".
                while i < data_len:
                    if data[i] == '*' and data[i + 1] == '/':
                        incomment = 0
                        i = i + 2
                        s = i
                        skipwhite = 1
                        break
                    i = i + 1
                continue

            c = data[i]
            if c == ' ' or c == '\t':
                # White space after non-white: dump text.
                if not skipwhite:
                    m.update(data[s:i] + ' ')

                # Skip white space
                while 1:
                    i = i + 1
                    if i == data_len:
                        break
                    c = data[i]
                    if c != ' ' and c != '\t':
                        break
                s = i
                skipwhite = 0
                if i == data_len:
                    break

            if c == '/' and (data[i + 1] == '/' or data[i + 1] == '*'):
                # Start of // or /* comment.
                if i > s:
                    m.update(data[s:i] + ' ')
                i = i + 1
                if data[i] == '/':
                    s = data_len
                    break
                incomment = 1
            else:
                skipwhite = 0
                if c == "'":
                    # skip '"' or '\'', not the start of a sting
                    if data[i + 1] == '\\':
                        i = i + 1
                    i = i + 2
                elif c == '"':
                    inquote = 1
            i = i + 1

        if not (incomment or skipwhite) and s < data_len:
            m.update(data[s:data_len] + ' ')

    try:
        f.close()
    except:
        # Error while closing a read file???
        pass

    return hexdigest(m)


def buildcheckstr2sign(str):
    """Compute a signature from a string for the buildcheck."""
    return hexdigest(md5.new(str))


def _sign_lookup(signatures, name, key):
    """
    Get the "key" signature for item "name" from dictionary "signatures".
    "name" must have gone through fname_fold().
    """
    if not signatures.has_key(name):
        return ''
    s = signatures[name]
    if not s.has_key(key):
        return ''
    return s[key]


def sign_clear(name):
    """
    Clear the new signatures of an item.
    Store an item to note that it was cleared (see below).
    Used when it has been build.
    """
    new_signatures[name] = {}
    new_signatures[name]["cleared"] = 1


def get_new_sign(recdict, name, check, force = 0):
    """Get the current "check" signature for the item "name".
       "name" is the absolute name for non-virtual nodes.
       This doesn't depend on the target.  "name" can be a URL.
       When "force" is non-zero also use a cleared signature (for --touch).
       Returns a string (also for timestamps)."""
    # When not executing build commands and a target has been pretended to be
    # build, its signature is cleared.  Don't recompute it then, the file will
    # not be different but we do want a different signature.
    name = fname_fold(name)
    if (not force
            and skip_commands()
            and new_signatures.has_key(name)
            and new_signatures[name].has_key("cleared")):
        return "cleared"

    key = check
    res = _sign_lookup(new_signatures, fname_fold(name), key)
    if not res:
        # Compute the signature now
        if check == "time":
            from Remote import url_time
            res = str(url_time(recdict, name))
        elif check == "md5":
            res = check_md5(recdict, name)
        elif check == "c_md5":
            res = check_c_md5(recdict, name)
        # TODO: other checks, defined with actions
        else:
            res = "unknown"

        # Store the new signature to avoid recomputing it many times.
        if not new_signatures.has_key(name):
            new_signatures[name] = {}
        new_signatures[name][key] = res

    return res

def sign_clear_target(recdict, target):
    """Called to clear old signatures after successfully executing build rules
       for "target".  sign_updated() should be called next for each source."""
    get_sign_file(recdict, target, 1)
    target_name = fname_fold(target.get_name())
    if old_signatures.has_key(target_name):
        del old_signatures[target_name]
    if upd_signatures.has_key(target_name):
        del upd_signatures[target_name]


def sign_clear_file(fname, recursive):
    """Called to clear signatures for a file "fname".
       Used for ":changed" and "--changed=FILE"."""
    chd_signatures[full_fname(fname)] = recursive
    if upd_signatures.has_key(fname):
        del upd_signatures[fname]


def sign_clear_all():
    """Clear all computed signatures.  Used when starting to execute a toplevel
       recipe."""
    global old_signatures, upd_signatures, new_signatures, chd_signatures
    global sign_files
    old_signatures = {}
    chd_signatures = {}
    upd_signatures = {}
    new_signatures = {}
    sign_files = {}


def _sign_upd_sign(recdict, target, key, value):
    """Update signature for node "target" with "key" to "value"."""
    get_sign_file(recdict, target, 1)
    target_name = fname_fold(target.get_name())
    if not upd_signatures.has_key(target_name):
        upd_signatures[target_name] = {"signfile":
                                           fname_fold(target.get_sign_fname())}
    upd_signatures[target_name][key] = value
    # Update the timestamp on the target.
    upd_signatures[target_name][timekey] = str(time.time())


def sign_updated(recdict, source, dict, target):
    """Called after successfully executing build rules for node "target" from
    node "source", using check based on dictionary "dict"."""
    name = source.get_name()
    check = check_name(recdict, name, dict, source.attributes)
    res = get_new_sign(recdict, name, check, force = 1)
    _sign_upd_sign(recdict, target, name + '@' + check, res)

    # if the source file was considered changed and recursive attribute used,
    # the target should be as well.
    if chd_signatures.get(fname_fold(name)):
        chd_signatures[fname_fold(target.get_name())] = 1


def buildcheck_updated(recdict, target, value):
    """Called after successfully executing build rules for node "target" with
       the new buildcheck signature "value"."""
    _sign_upd_sign(recdict, target, '@buildcheck', value)


def get_old_sign(recdict, name, check, target, rootname = None):
    """Get the old "check" signature for item "name" and target node "target".
       "name" must be an absolute and normalized path.
       "rootname" is used for publishing and the "--contents" option.
       If it doesn't exist an empty string is returned."""
    # Check if this file was marked as changed.
    name = fname_fold(name)
    if chd_signatures.has_key(name):
        return "changed"

    # May need to read the sign file for this target.
    get_sign_file(recdict, target, 0)

    key = name + '@' + check
    if not rootname:
        # Use the updated signature if it exists, otherwise use the old one.
        name = fname_fold(target.get_name())
        ret = _sign_lookup(upd_signatures, name, key)
        if ret:
            return ret
        return _sign_lookup(old_signatures, name, key)

    # Go through all updated and old signatures to check if "rootname" matches.
    # Find the entry that was updated most recently.
    rootname = fname_fold(rootname)
    rootname_len = len(rootname)
    ret = ''
    newtime = 0
    for sigdict in [upd_signatures, old_signatures]:
        for name in sigdict.keys():
            if (len(name) > rootname_len
                    and name[:rootname_len] == rootname
                    and sigdict[name].has_key(key)
                    and sigdict[name].has_key(timekey)
                    and float(sigdict[name][timekey]) > newtime):
                ret = sigdict[name][key]
                newtime = float(sigdict[name][timekey])

    return ret


def check_name(recdict, name, itemdict, altdict = None):
    """Return the check name to be used for item "name" with dictlist
       "itemdict".  Also use "altdict" if given (attributes of the node)."""
    if itemdict.has_key("check"):
        check = itemdict["check"]
    elif altdict and altdict.has_key("check"):
        check = altdict["check"]
    else:
        # TODO: make mapping from name or filetype to check configurable
        #if itemdict.has_key("filetype"):
        #    type = itemdict["filetype"]
        #else:
        #    type = ft_detect(itemdict["name"])
        if ((itemdict.get("directory")
                    or (altdict and altdict.get("directory")))
                or os.path.isdir(name)):
            check = "none"      # default check for directories: none
        else:
            # default check is given with $DEFAULTCHECK
            check = get_var_val_int(recdict, "DEFAULTCHECK")
    return check


# vim: set sw=4 et sts=4 tw=79 fo+=l: