1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561
|
#!/usr/bin/env python
#tooltool is a lookaside cache implemented in Python
#Copyright (C) 2011 John H. Ford <john@johnford.info>
#
#This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
#as published by the Free Software Foundation version 2
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# An manifest file specifies files in that directory that are stored
# elsewhere. This file should only contain file in the directory
# which the manifest file resides in and it should be called 'manifest.manifest'
__version__ = '1'
import json
import os
import optparse
import logging
import hashlib
import urllib2
import ConfigParser
log = logging.getLogger(__name__)
class FileRecordJSONEncoderException(Exception): pass
class InvalidManifest(Exception): pass
class ExceptionWithFilename(Exception):
def __init__(self, filename):
Exception.__init__(self)
self.filename = filename
class DigestMismatchException(ExceptionWithFilename): pass
class MissingFileException(ExceptionWithFilename): pass
class FileRecord(object):
def __init__(self, filename, size, digest, algorithm):
object.__init__(self)
self.filename = filename
self.size = size
self.digest = digest
self.algorithm = algorithm
log.debug("creating %s 0x%x" % (self.__class__.__name__, id(self)))
def __eq__(self, other):
if self is other:
return True
if self.filename == other.filename and \
self.size == other.size and \
self.digest == other.digest and \
self.algorithm == other.algorithm:
return True
else:
return False
def __ne__(self, other):
return not self.__eq__(other)
def __str__(self):
return repr(self)
def __repr__(self):
return "%s.%s(filename='%s', size='%s', digest='%s', algorithm='%s')" % (__name__,
self.__class__.__name__,
self.filename, self.size, self.digest, self.algorithm)
def present(self):
# Doesn't check validity
return os.path.exists(self.filename)
def validate_size(self):
if self.present():
return self.size == os.path.getsize(self.filename)
else:
log.debug("trying to validate size on a missing file, %s", self.filename)
raise MissingFileException(filename=self.filename)
def validate_digest(self):
if self.present():
with open(self.filename, 'rb') as f:
return self.digest == digest_file(f, self.algorithm)
else:
log.debug("trying to validate digest on a missing file, %s', self.filename")
raise MissingFileException(filename=self.filename)
def validate(self):
if self.validate_size():
if self.validate_digest():
return True
return False
def describe(self):
if self.present() and self.validate():
return "'%s' is present and valid" % self.filename
elif self.present():
return "'%s' is present and invalid" % self.filename
else:
return "'%s' is absent" % self.filename
def create_file_record(filename, algorithm):
fo = open(filename, 'rb')
stored_filename = os.path.split(filename)[1]
fr = FileRecord(stored_filename, os.path.getsize(filename), digest_file(fo, algorithm), algorithm)
fo.close()
return fr
class FileRecordJSONEncoder(json.JSONEncoder):
def encode_file_record(self, obj):
if not issubclass(type(obj), FileRecord):
err = "FileRecordJSONEncoder is only for FileRecord and lists of FileRecords, not %s" % obj.__class__.__name__
log.warn(err)
raise FileRecordJSONEncoderException(err)
else:
return {'filename': obj.filename, 'size': obj.size, 'algorithm': obj.algorithm, 'digest': obj.digest}
def default(self, f):
if issubclass(type(f), list):
record_list = []
for i in f:
record_list.append(self.encode_file_record(i))
return record_list
else:
return self.encode_file_record(f)
class FileRecordJSONDecoder(json.JSONDecoder):
"""I help the json module materialize a FileRecord from
a JSON file. I understand FileRecords and lists of
FileRecords. I ignore things that I don't expect for now"""
# TODO: make this more explicit in what it's looking for
# and error out on unexpected things
def process_file_records(self, obj):
if isinstance(obj, list):
record_list = []
for i in obj:
record = self.process_file_records(i)
if issubclass(type(record), FileRecord):
record_list.append(record)
return record_list
if isinstance(obj, dict) and \
len(obj.keys()) == 4 and \
obj.has_key('filename') and \
obj.has_key('size') and \
obj.has_key('algorithm') and \
obj.has_key('digest'):
rv = FileRecord(obj['filename'], obj['size'], obj['digest'], obj['algorithm'])
log.debug("materialized %s" % rv)
return rv
return obj
def decode(self, s):
decoded = json.JSONDecoder.decode(self, s)
rv = self.process_file_records(decoded)
return rv
class Manifest(object):
valid_formats = ('json',)
def __init__(self, file_records=[]):
self.file_records = file_records
def __eq__(self, other):
if self is other:
return True
if len(self.file_records) != len(other.file_records):
log.debug('Manifests differ in number of files')
return False
#TODO: Lists in a different order should be equal
for record in range(0,len(self.file_records)):
if self.file_records[record] != other.file_records[record]:
log.debug('FileRecords differ, %s vs %s' % (self.file_records[record],
other.file_records[record]))
return False
return True
def __deepcopy__(self, memo):
# This is required for a deep copy
return Manifest(self.file_records[:])
def __copy__(self):
return Manifest(self.file_records)
def copy(self):
return Manifest(self.file_records[:])
def present(self):
return all(i.present() for i in self.file_records)
def validate_sizes(self):
return all(i.validate_size() for i in self.file_records)
def validate_digests(self):
return all(i.validate_digest() for i in self.file_records)
def validate(self):
return all(i.validate() for i in self.file_records)
def sort(self):
#TODO: WRITE TESTS
self.file_records.sort(key=lambda x: x.size)
def load(self, data_file, fmt='json'):
assert fmt in self.valid_formats
if fmt == 'json':
try:
self.file_records.extend(json.load(data_file, cls=FileRecordJSONDecoder))
self.sort()
except ValueError:
raise InvalidManifest("trying to read invalid manifest file")
def loads(self, data_string, fmt='json'):
assert fmt in self.valid_formats
if fmt == 'json':
try:
self.file_records.extend(json.loads(data_string, cls=FileRecordJSONDecoder))
self.sort()
except ValueError:
raise InvalidManifest("trying to read invalid manifest file")
def dump(self, output_file, fmt='json'):
assert fmt in self.valid_formats
self.sort()
if fmt == 'json':
rv = json.dump(self.file_records, output_file, indent=0, cls=FileRecordJSONEncoder)
print >> output_file, ''
return rv
def dumps(self, fmt='json'):
assert fmt in self.valid_formats
self.sort()
if fmt == 'json':
return json.dumps(self.file_records, cls=FileRecordJSONEncoder)
def digest_file(f, a):
"""I take a file like object 'f' and return a hex-string containing
of the result of the algorithm 'a' applied to 'f'."""
h = hashlib.new(a)
chunk_size = 1024*10
data = f.read(chunk_size)
while data:
h.update(data)
data = f.read(chunk_size)
if hasattr(f, 'name'):
log.debug('hashed %s with %s to be %s', f.name, a, h.hexdigest())
else:
log.debug('hashed a file with %s to be %s', a, h.hexdigest())
return h.hexdigest()
# TODO: write tests for this function
def open_manifest(manifest_file):
"""I know how to take a filename and load it into a Manifest object"""
if os.path.exists(manifest_file):
manifest = Manifest()
with open(manifest_file) as f:
manifest.load(f)
log.debug("loaded manifest from file '%s'" % manifest_file)
return manifest
else:
log.debug("tried to load absent file '%s' as manifest" % manifest_file)
raise InvalidManifest("manifest file '%s' does not exist" % manifest_file)
# TODO: write tests for this function
def list_manifest(manifest_file):
"""I know how print all the files in a location"""
try:
manifest = open_manifest(manifest_file)
except InvalidManifest:
log.error("failed to load manifest file at '%s'" % manifest_file)
return False
for f in manifest.file_records:
print "%s\t%s\t%s" % ("P" if f.present() else "-",
"V" if f.present() and f.validate() else "-",
f.filename)
return True
def validate_manifest(manifest_file):
"""I validate that all files in a manifest are present and valid but
don't fetch or delete them if they aren't"""
try:
manifest = open_manifest(manifest_file)
except InvalidManifest:
log.error("failed to load manifest file at '%s'" % manifest_file)
return False
invalid_files = []
absent_files = []
for f in manifest.file_records:
if not f.present():
absent_files.append(f)
else:
if not f.validate():
invalid_files.append(f)
if len(invalid_files + absent_files) == 0:
return True
else:
return False
# TODO: write tests for this function
def add_files(manifest_file, algorithm, filenames):
# returns True if all files successfully added, False if not
# and doesn't catch library Exceptions. If any files are already
# tracked in the manifest, return will be False because they weren't
# added
all_files_added = True
# Create a old_manifest object to add to
if os.path.exists(manifest_file):
old_manifest = open_manifest(manifest_file)
else:
old_manifest = Manifest()
log.debug("creating a new manifest file")
new_manifest = Manifest() # use a different manifest for the output
for filename in filenames:
log.debug("adding %s" % filename)
path, name = os.path.split(filename)
new_fr = create_file_record(filename, algorithm)
log.debug("appending a new file record to manifest file")
add = True
for fr in old_manifest.file_records:
log.debug("manifest file has '%s'" % "', ".join([x.filename for x in old_manifest.file_records]))
if new_fr == fr and new_fr.validate():
# TODO: Decide if this case should really cause a False return
log.info("file already in old_manifest file and matches")
add = False
elif new_fr == fr and not new_fr.validate():
log.error("file already in old_manifest file but is invalid")
add = False
if filename == fr.filename:
log.error("manifest already contains file named %s" % filename)
add = False
if add:
new_manifest.file_records.append(new_fr)
log.debug("added '%s' to manifest" % filename)
else:
all_files_added = False
with open(manifest_file, 'wb') as output:
new_manifest.dump(output, fmt='json')
return all_files_added
# TODO: write tests for this function
def fetch_file(base_url, file_record, overwrite=False, grabchunk=1024*4):
# A file which is requested to be fetched that exists locally will be hashed.
# If the hash matches the requested file's hash, nothing will be done and the
# function will return. If the function is told to overwrite and there is a
# digest mismatch, the exiting file will be overwritten
if file_record.present():
if file_record.validate():
log.info("existing '%s' is valid, not fetching" % file_record.filename)
return True
if overwrite:
log.info("overwriting '%s' as requested" % file_record.filename)
else:
# All of the following is for a useful error message
with open(file_record.filename, 'rb') as f:
d = digest_file(f, file_record.algorithm)
log.error("digest mismatch between manifest(%s...) and local file(%s...)" % \
(file_record.digest[:8], d[:8]))
log.debug("full digests: manifest (%s) local file (%s)" % (file_record.digest, d))
# Let's bail!
return False
# Generate the URL for the file on the server side
url = "%s/%s/%s" % (base_url, file_record.algorithm, file_record.digest)
log.debug("fetching from '%s'" % url)
# TODO: This should be abstracted to make generic retreival protocol handling easy
# Well, the file doesn't exist locally. Lets fetch it.
try:
f = urllib2.urlopen(url)
log.debug("opened %s for reading" % url)
with open(file_record.filename, 'wb') as out:
k = True
size = 0
while k:
# TODO: print statistics as file transfers happen both for info and to stop
# buildbot timeouts
indata = f.read(grabchunk)
out.write(indata)
size += len(indata)
if indata == '':
k = False
if size != file_record.size:
log.error("transfer from %s to %s failed due to a difference of %d bytes" % (url,
file_record.filename, file_record.size - size))
return False
log.info("fetched %s" % file_record.filename)
except (urllib2.URLError, urllib2.HTTPError) as e:
log.error("failed to fetch '%s': %s" % (file_record.filename, e),
exc_info=True)
return False
except IOError:
log.error("failed to write to '%s'" % file_record.filename,
exc_info=True)
return False
return True
# TODO: write tests for this function
def fetch_files(manifest_file, base_url, overwrite, filenames=[]):
# Lets load the manifest file
try:
manifest = open_manifest(manifest_file)
except InvalidManifest:
log.error("failed to load manifest file at '%s'" % manifest_file)
return False
# We want to track files that fail to be fetched as well as
# files that are fetched
failed_files = []
# Lets go through the manifest and fetch the files that we want
fetched_files = []
for f in manifest.file_records:
if f.filename in filenames or len(filenames) == 0:
log.debug("fetching %s" % f.filename)
if fetch_file(base_url, f, overwrite):
fetched_files.append(f)
else:
failed_files.append(f.filename)
else:
log.debug("skipping %s" % f.filename)
# Even if we get the file, lets ensure that it matches what the
# manifest specified
for localfile in fetched_files:
if not localfile.validate():
log.error("'%s'" % localfile.describe())
# If we failed to fetch or validate a file, we need to fail
if len(failed_files) > 0:
log.error("The following files failed: '%s'" % "', ".join(failed_files))
return False
return True
# TODO: write tests for this function
def process_command(options, args):
""" I know how to take a list of program arguments and
start doing the right thing with them"""
cmd = args[0]
cmd_args = args[1:]
log.debug("processing '%s' command with args '%s'" % (cmd, '", "'.join(cmd_args)))
log.debug("using options: %s" % options)
if cmd == 'list':
return list_manifest(options['manifest'])
if cmd == 'validate':
return validate_manifest(options['manifest'])
elif cmd == 'add':
return add_files(options['manifest'], options['algorithm'], cmd_args)
elif cmd == 'fetch':
if not options.has_key('base_url') or options.get('base_url') is None:
log.critical('fetch command requires url option')
return False
return fetch_files(options['manifest'], options['base_url'], options['overwrite'], cmd_args)
else:
log.critical('command "%s" is not implemented' % cmd)
return False
# fetching api:
# http://hostname/algorithm/hash
# example: http://people.mozilla.org/sha1/1234567890abcedf
# This will make it possible to have the server allow clients to
# use different algorithms than what was uploaded to the server
# TODO: Implement the following features:
# -optimization: do small files first, justification is that they are faster
# and cause a faster failure if they are invalid
# -store permissions
# -local renames i.e. call the file one thing on the server and
# something different locally
# -deal with the cases:
# -local data matches file requested with different filename
# -two different files with same name, different hash
# -?only ever locally to digest as filename, symlink to real name
# -?maybe deal with files as a dir of the filename with all files in that dir as the versions of that file
# - e.g. ./python-2.6.7.dmg/0123456789abcdef and ./python-2.6.7.dmg/abcdef0123456789
def main():
# Set up logging, for now just to the console
ch = logging.StreamHandler()
cf = logging.Formatter("%(levelname)s - %(message)s")
ch.setFormatter(cf)
# Set up option parsing
parser = optparse.OptionParser()
# I wish there was a way to say "only allow args to be
# sequential and at the end of the argv.
# OH! i could step through sys.argv and check for things starting without -/-- before things starting with them
parser.add_option('-q', '--quiet', default=False,
dest='quiet', action='store_true')
parser.add_option('-v', '--verbose', default=False,
dest='verbose', action='store_true')
parser.add_option('-m', '--manifest', default='manifest.tt',
dest='manifest', action='store',
help='specify the manifest file to be operated on')
parser.add_option('-d', '--algorithm', default='sha512',
dest='algorithm', action='store',
help='openssl hashing algorithm to use')
parser.add_option('-o', '--overwrite', default=False,
dest='overwrite', action='store_true',
help='if fetching, remote copy will overwrite a local copy that is different. ')
parser.add_option('--url', dest='base_url', action='store',
help='base url for fetching files')
parser.add_option('--ignore-config-files', action='store_true', default=False,
dest='ignore_cfg_files')
(options_obj, args) = parser.parse_args()
# Dictionaries are easier to work with
options = vars(options_obj)
# Use some of the option parser to figure out application
# log level
if options.get('verbose'):
ch.setLevel(logging.DEBUG)
elif options.get('quiet'):
ch.setLevel(logging.ERROR)
else:
ch.setLevel(logging.INFO)
log.addHandler(ch)
cfg_file = ConfigParser.SafeConfigParser()
if not options.get("ignore_cfg_files"):
read_files = cfg_file.read(['/etc/tooltool', os.path.expanduser('~/.tooltool'),
os.path.join(os.getcwd(), '.tooltool')])
log.debug("read in the config files '%s'" % '", '.join(read_files))
else:
log.debug("skipping config files")
for option in ('base_url', 'algorithm'):
if not options.get(option):
try:
options[option] = cfg_file.get('general', option)
log.debug("read '%s' as '%s' from cfg_file" % (option, options[option]))
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError) as e:
log.debug("%s in config file" % e, exc_info=True)
if not options.has_key('manifest'):
parser.error("no manifest file specified")
if len(args) < 1:
parser.error('You must specify a command')
exit(0 if process_command(options, args) else 1)
if __name__ == "__main__":
main()
else:
log.addHandler(logging.NullHandler())
#log.addHandler(logging.StreamHandler())
|