1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
|
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
import os
import re
import subprocess
import tempfile
import shutil
import sys
'''Download all versions of the psidev OBO for mzML files'''
CVS_SERVER = ':pserver:anonymous:''@psidev.cvs.sourceforge.net:/cvsroot/psidev'
class CVSClient(object):
def __init__(self, root, client_dir=None, verbose=False):
self.root = root
self.client_dir = client_dir
self.cleanup = False
self.verbose = verbose
def __enter__(self):
if not self.client_dir or not os.path.isdir(self.client_dir):
self.client_dir = tempfile.mkdtemp(prefix='cvs')
self.cleanup = True
return self
def __exit__(self, exc_type, exc_value, traceback):
if self.cleanup:
shutil.rmtree(self.client_dir, ignore_errors=True)
def __call__(self, *args, **kwargs):
command = ['cvs', '-d' + self.root] + list(args)
# Default args for subprocess
processkwargs = dict(cwd=self.client_dir)
if not self.verbose:
command.insert(1, '-Q')
processkwargs['stderr'] = open(os.devnull, 'w')
# Override defaults with provided args
processkwargs.update(kwargs)
if self.verbose:
print('>>> ' + ' '.join(command), file=sys.stderr)
return subprocess.check_output(command, **processkwargs)
def get_version_map(cvs):
filename = 'psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo'
# Download the OBO file and get its revision log
cvs('-z3', 'co', filename)
revisions = parse_revisions(cvs('-z3', 'log').split('\n'))
version_map = {}
for revision in revisions:
file_at_revision = cvs('-z3', 'co', '-p', '-r', revision, filename)
version = parse_version(file_at_revision)
# Revisions go from newest to oldest, so if a version exists in the
# dictionary, it's already the newest revision of that version
if version and version not in version_map:
version_map[version] = file_at_revision
return version_map
def parse_revisions(revision_log):
revisions = []
revision_regexp = re.compile(r'revision (\d+\.\d+)')
for line in revision_log:
match = revision_regexp.match(line)
if match:
revisions.append(match.group(1))
return revisions
def parse_version(file_string):
version_regexp = re.compile(r'remark:\s+version: (\d+\.\d+\.\d+\S*)')
version = None
for line in file_string.split('\n'):
match = version_regexp.match(line)
if match:
version = match.group(1)
return version
def save_versions(version_map, destination):
for version, file_at_version in version_map.iteritems():
destination_path = os.path.join(destination, 'psi-ms-{0}.obo'.format(version))
with open(destination_path, 'w+') as destination_file:
destination_file.write(file_at_version)
if __name__ == '__main__':
parser = ArgumentParser(description=__doc__)
parser.add_argument('destination', help='directory into which the OBO files go')
parser.add_argument('-v', '--verbose', action='store_true',
help='show extra logging information')
args = parser.parse_args()
# Sanity checking
assert os.path.isdir(args.destination), 'destination must be a valid directory'
with CVSClient(CVS_SERVER, verbose=args.verbose) as cvs:
cvs('login')
revision_map = get_version_map(cvs)
save_versions(revision_map, args.destination)
# vim: ts=4:sw=4:sts=4
|