File: uniquely_projectb.py

package info (click to toggle)
debtorrent 0.1.9
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 1,452 kB
  • ctags: 1,183
  • sloc: python: 13,526; sh: 274; makefile: 51
file content (439 lines) | stat: -rwxr-xr-x 15,856 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
#!/usr/bin/env python

"""Process a Release file, creating, finding and updating any torrent files."""

import sha
import sys
import gzip
import pgdb
from bz2 import BZ2File
from math import ceil
from os import remove, rename, system, symlink
from os.path import exists
from time import strftime, gmtime
from debian_bundle import deb822
from tempfile import mkstemp

# The piece size to use (must match the '-extrapieces' file's piece size)
DEFAULT_PIECESIZE = 512*1024

# The Packages files to read
EXTENSION = ".gz"

# The fields to hash to determine the torrent identifier
# (can not contain Date, Infohash, NextPiece or OriginalPieces)
DEFAULT_HASH_FIELDS = ["Codename", "Suite", "Component", "Architecture",
                       "PieceSize", "OriginalDate"]

# The tracker announce URL to use
DEFAULT_TRACKER = "http://dttracker.debian.net:6969/announce"

# The order to write the headers in (headers not listed won't be written)
HEADER_ORDER = ["Torrent", "Infohash", "InfohashArchs", "OriginalDate", "Date",
                "PieceSize", "NextPiece", "OriginalPieces", "Codename", "Suite",
                "Component", "Architecture", "Tracker", "TorrentHashFields"]

# The maximum increase in the size of the torrent before it is reset
MAX_SIZE_INCREASE = 2.0

class Torrent(deb822._multivalued):
    """For reading piece numbers from a unique piece number torrent file."""
    _multivalued_fields = {
        "piecenumbers": [ "number", "file" ],
    }

def read_release(filename):
    """Read the headers and Packages file names from a Release file.
    
    @type filename: C{string}
    @param filename: the Release file to read
    @rtype: C{dictionary}, C{list} of C{string}
    @return: the headers and full file names of Packages files
    
    """

    # Initialize the Release file variables
    release_dir = filename.rsplit('/', 1)[0]
    read_packages = False
    headers = {}
    packages = []
    
    f = open(filename, 'r')
    
    rel = deb822.Release(f)
    for header in rel:
        if header.lower() not in ["md5sum", "sha1", "sha256"]:
            # Read the headers from the file
            headers[header] = rel[header]
            
    # Read the Packages file names
    for file in rel.get('MD5Sum', []):
        if file['name'].endswith("Packages"+EXTENSION) and release_dir + "/" + file['name'] not in packages:
            packages.append(release_dir + "/" + file['name'])
    for file in rel.get('SHA1', []):
        if file['name'].endswith("Packages"+EXTENSION) and release_dir + "/" + file['name'] not in packages:
            packages.append(release_dir + "/" + file['name'])
    for file in rel.get('SHA256', []):
        if file['name'].endswith("Packages"+EXTENSION) and release_dir + "/" + file['name'] not in packages:
            packages.append(release_dir + "/" + file['name'])
    
    f.close()
    
    return headers, packages

def get_old(old_file):
    """Read the headers and piece ordering data from an old file.
    
    @type old_file: C{string}
    @param old_file: the old piece ordering file to open
    @rtype: C{dictionary}, C{dictionary}
    @return: the old piece ordering (keys are the file names, values are the
        starting piece number) and headers
    
    """

    pieces = {}
    headers = {}
    
    try:
        f = gzip.open(old_file, 'r')
    
        tor = Torrent(f)
        for header in tor:
            if header.lower() != 'piecenumbers':
                # Read the headers from the file
                headers[header] = tor[header]
                
        # Read the piece ordering data from the file
        for piece in tor['PieceNumbers']:
            pieces[piece['file']] = int(piece['number'])

        f.close()
    except:
        # Delete the file and return empty variables to create a new torrent
        if exists(old_file):
            remove(old_file)
    
    return pieces, headers

def update_headers(headers, release_headers, component, arch):
    """Update the headers with new fields from the Release file.
    
    @type headers: C{dictionary}
    @param headers: the headers from the piece ordering file
    @type release_headers: C{dictionary}
    @param release_headers: the headers from the Release file
    @type component: C{string}
    @param component: the component name (e.g. main, contrib, non-free)
    @type arch: C{string}
    @param arch: the architecture name (e.g. i386, amd64, all)
    @rtype: C{boolean}
    @return: whether a new torrent has been created
    
    """

    # Set any required Release headers
    if len(release_headers.get("Date", "")) == 0:
        # Use today's date
        release_headers["Date"] = strftime('%a, %d %b %Y %H:%M:%S +0000', gmtime())
    
    # Create/update the headers
    headers.setdefault("OriginalDate", release_headers["Date"])
    headers["Date"] = release_headers["Date"]
    headers.setdefault("PieceSize", str(DEFAULT_PIECESIZE))
    headers.setdefault("NextPiece", str(0))
    headers["Codename"] = release_headers.get("Codename", "")
    headers["Suite"] = release_headers.get("Suite", "")
    headers["Component"] = component
    headers["Architecture"] = arch
    headers.setdefault("Tracker", DEFAULT_TRACKER)
    headers.setdefault("TorrentHashFields", " ".join(DEFAULT_HASH_FIELDS))
    
    # Calculate the new hash
    sha1 = sha.new()
    for header in headers["TorrentHashFields"].split():
        sha1.update(headers[header])
    new_hash = sha1.hexdigest()
    
    # Check if the hash has changed or the torrent is too big
    if (headers.get("Torrent", "") == new_hash and
        int(headers.get("NextPiece")) < 
        int(headers.get("OriginalPieces", "0"))*MAX_SIZE_INCREASE):
        return False
    else:
        # If it has, then reset the torrent to create a new one
        headers["OriginalDate"] = release_headers["Date"]
        headers["NextPiece"] = str(0)
        headers.pop("OriginalPieces", "")
        sha1 = sha.new()
        for header in headers["TorrentHashFields"].split():
            sha1.update(headers[header])
        headers["Torrent"] = sha1.hexdigest()

        return True

def get_new(db, suite, codename, component, arch, old_files):
    """Read the new piece data from a Packages file.
    
    Reads the Packages file, finding old files in it and copying their data to
    the new ordering, and adding any new files found to the end of the
    ordering. The old_files input is modified by removing the found files from
    it, and the 'NextPiece' header in the input headers is changed.
    
    @type db: C{DB-APIv2 connection}
    @param db: an open connection to the projectb database
    @type suite: C{string}
    @param suite: the suite name (e.g. testing, unstable)
    @type codename: C{string}
    @param codename: the codename of the suite (e.g. sid, lenny)
    @type component: C{string}
    @param component: the component name (e.g. main, contrib, non-free)
    @type arch: C{string}
    @param arch: the architecture name (e.g. i386, amd64, all)
    @type old_files: C{dictionary}
    @param old_files: the original piece ordering, keys are the file names,
        values are the starting piece number
    @rtype: C{dictionary}
    @return: the new piece ordering, keys are the starting piece numbers,
        values are the file names
    
    """

    c = db.cursor()
    c.execute("select location.path, files.filename, files.size " + 
              "from binaries join files on binaries.file = files.id " + 
                            "join location on files.location = location.id " +
                            "join architecture on binaries.architecture = architecture.id " + 
                            "join bin_associations on binaries.id = bin_associations.bin " +
                            "join suite on bin_associations.suite = suite.id " +
                            "join component on location.component = component.id " +
              "where suite_name = '" + suite +"' and component.name = '" + component + "' and " +
                     "arch_string = '" + arch + "' " +
              "order by location.path, files.filename")

    pieces = {}
    new_pieces = []
    
    res = c.fetchone()
    while res:
        filename = '/'.join(res[0].split('/')[-2:]) + res[1]
        size = res[2]
        if filename.endswith('.deb'):
            # Check which torrent to add the info to
            if filename in old_files:
                # Found old file, so add it
                pieces[old_files[filename]] = filename
                del old_files[filename]
            else:
                # Found new file, save it for later processing
                new_pieces.append((filename, long(size)))
        res = c.fetchone()
        
    c.close()

    return pieces, new_pieces

def add_new(pieces, new_pieces, headers):
    """Read the new piece data from a Packages file.
    
    Adds new files to the end of the piece ordering. The 'pieces' input is 
    modified by having the new pieces added to it. The 'new_pieces' input
    list is sorted. The 'NextPiece' header in the input 'headers' is updated.
    
    @type pieces: C{dictionary}
    @param pieces: the current piece ordering, keys are the starting piece
        numbers, values are the file names
    @type new_pieces: C{list} of (C{string}, C{long})
    @param new_pieces: the file name and file size of the new files that have
        been found and are to be added to the pirce ordering
    @type headers: C{dictionary}
    @param headers: the headers from the piece ordering file
    
    """

    # Get the needed header information
    next_piece = int(headers["NextPiece"])
    piece_size = int(headers["PieceSize"])
    
    new_pieces.sort()
    old_file = ""
    old_size = 0L
    for (file, size) in new_pieces:
        if file == old_file:
            if size != old_size:
                print "WARNING: multiple files with different size:", file
        else:
            pieces[next_piece] = file
            next_piece += int(ceil(size/float(piece_size)))
            
        old_file = file
        old_size = size

    # Set the final header values
    headers["NextPiece"] = str(next_piece)
    headers.setdefault("OriginalPieces", headers["NextPiece"])

def write_file(filename, pieces, headers):
    """Print the new data to the file.
    
    @type filename: C{string}
    @param filename: the file to write to
    @type pieces: C{dictionary}
    @param pieces: the current piece ordering, keys are the starting piece
        numbers, values are the file names
    @type headers: C{dictionary}
    @param headers: the headers from the piece ordering file
    
    """

    f = gzip.open(filename + '.new', 'w')
    
    # Write the headers
    for header in HEADER_ORDER:
        if header in headers:
            f.write("%s: %s\n" % (header, headers[header]))
    f.write("PieceNumbers:\n")
    
    # Write the starting piece numbers
    ps = pieces.keys()
    ps.sort()
    format_string = " %"+str(len(str(max(ps))))+"d %s\n"
    for p in ps:
        f.write(format_string % (p, pieces[p]))
    
    f.close()
    rename(filename + '.new', filename)

def process_file(db, torrent_file, release_headers, packages,
                  suite, codename, component, arch, link_file = None):
    """
    
    @type db: C{DB-APIv2 connection}
    @param db: an open connection to the projectb database
    @type torrent_file: C{string}
    @param torrent_file: the unique pieces file to process
    @type release_headers: C{dictionary}
    @param release_headers: the headers from the Release file
    @type packages: C{list} of C{string}
    @param packages: the list of Packages files listed in the Release file
    @type suite: C{string}
    @param suite: the suite name (e.g. testing, unstable)
    @type codename: C{string}
    @param codename: the codename of the suite (e.g. sid, lenny)
    @type component: C{string}
    @param component: the component name (e.g. main, contrib, non-free)
    @type arch: C{string}
    @param arch: the architecture name (e.g. i386, amd64, all)
    @type link_file: C{string}
    @param link_file: the symlink to create to the torrent_file
    
    """
    
    # Find the Packages file that will be parsed
    if arch != 'all':
        found = False
        for filename in packages:
            if (filename.find(component) >= 0 and 
                filename.find("binary-"+arch) >= 0):
                found = True
                break
        if not found:
            print "WARNING: no matching Packages file for component %s, arch %s" % (component, arch)
            if exists(torrent_file):
                remove(torrent_file)
            if link_file and exists(link_file):
                remove(link_file)
            return
        packages.pop(packages.index(filename))

    # Get the old data for this torrent, if any existed
    print torrent_file + ": reading ...",
    sys.stdout.flush()
    old_pieces, headers = get_old(torrent_file)

    # Update the headers from the Release file ones
    if update_headers(headers, release_headers, component, arch):
        print "new torrent created ...",
        sys.stdout.flush()
        old_pieces = {}

    # Parse the database for the new data
    print "updating ...",
    sys.stdout.flush()
    pieces, new_pieces = get_new(db, suite, codename, component, arch,
                                    old_pieces)

    # Add the old removed pieces so out-of-date mirrors will work too
    for file in old_pieces:
        pieces[old_pieces[file]] = file

    if pieces or new_pieces:
        # Add any new pieces to the end of pieces
        add_new(pieces, new_pieces, headers)
        
    if len(pieces) > 2:
        # Write the headers
        print "writing ...",
        sys.stdout.flush()
        write_file(torrent_file, pieces, headers)
        if link_file:
            if exists(link_file):
                remove(link_file)
            symlink(torrent_file, link_file)
    else:
        print "empty ...",
        if exists(torrent_file):
            remove(torrent_file)
        if link_file and exists(link_file):
            remove(link_file)
        
    print "done."

def run(db, releasefile):
    """Process a single Release file.
    
    @type db: C{DB-APIv2 connection}
    @param db: an open connection to the projectb database
    @type releasefile: C{string}
    @param releasefile: the Release file to process

    """
    
    # Process the Release file
    print "Processing: %s" % releasefile
    release_headers, packages = read_release(releasefile)
    
    # Extract the relevant headers
    suite = release_headers['Suite']
    codename = release_headers["Codename"]
    components = release_headers["Components"].split()
    archs = release_headers["Architectures"].split()
    archs.append('all')
    archs.sort()
    torrent_prefix = "dists_" + codename + "_"
    link_prefix = "dists_" + suite + "_"
    torrent_suffix = "_Packages-torrent.gz"
    
    # Process each components' architecture
    for component in components:
        for arch in archs:
            torrent_file = torrent_prefix + component + "_binary-" + arch + torrent_suffix
            link_file = None
            if suite != codename:
                link_file = link_prefix + component + "_binary-" + arch + torrent_suffix
            process_file(db, torrent_file, release_headers, packages,
                         suite, codename, component, arch, link_file)
    
    if packages:
        print "The following packages files were not used:"
        for package in packages:
            print "    %s" % package

if __name__ == '__main__':
    if len(sys.argv) >= 2:
        db = pgdb.connect(database = 'projectb')
        for file in sys.argv[1:]:
            run(db, file)
        db.close()
    else:
        print "Usage: " + sys.argv[0] + " Releasefile [Releasefile ...]"