File: download_obo_database.py

package info (click to toggle)
python-pymzml 0.7.6-dfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 66,128 kB
  • ctags: 335
  • sloc: python: 2,428; makefile: 142; sh: 38
file content (118 lines) | stat: -rwxr-xr-x 3,635 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python

from __future__ import print_function
from argparse import ArgumentParser
import os
import re
import subprocess
import tempfile
import shutil
import sys

'''Download all versions of the psidev OBO for mzML files'''

CVS_SERVER = ':pserver:anonymous:''@psidev.cvs.sourceforge.net:/cvsroot/psidev'


class CVSClient(object):

    def __init__(self, root, client_dir=None, verbose=False):
        self.root = root

        self.client_dir = client_dir
        self.cleanup = False
        self.verbose = verbose

    def __enter__(self):
        if not self.client_dir or not os.path.isdir(self.client_dir):
            self.client_dir = tempfile.mkdtemp(prefix='cvs')
            self.cleanup = True

        return self

    def __exit__(self, exc_type, exc_value, traceback):
        if self.cleanup:
            shutil.rmtree(self.client_dir, ignore_errors=True)

    def __call__(self, *args, **kwargs):
        command = ['cvs', '-d' + self.root] + list(args)

        # Default args for subprocess
        processkwargs = dict(cwd=self.client_dir)

        if not self.verbose:
            command.insert(1, '-Q')
            processkwargs['stderr'] = open(os.devnull, 'w')

        # Override defaults with provided args
        processkwargs.update(kwargs)

        if self.verbose:
            print('>>> ' + ' '.join(command), file=sys.stderr)

        return subprocess.check_output(command, **processkwargs)


def get_version_map(cvs):
    filename = 'psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo'

    # Download the OBO file and get its revision log
    cvs('-z3', 'co', filename)
    revisions = parse_revisions(cvs('-z3', 'log').split('\n'))

    version_map = {}
    for revision in revisions:
        file_at_revision = cvs('-z3', 'co', '-p', '-r', revision, filename)
        version = parse_version(file_at_revision)

        # Revisions go from newest to oldest, so if a version exists in the
        # dictionary, it's already the newest revision of that version
        if version and version not in version_map:
            version_map[version] = file_at_revision

    return version_map


def parse_revisions(revision_log):
    revisions = []
    revision_regexp = re.compile(r'revision (\d+\.\d+)')
    for line in revision_log:
        match = revision_regexp.match(line)
        if match:
            revisions.append(match.group(1))
    return revisions


def parse_version(file_string):
    version_regexp = re.compile(r'remark:\s+version: (\d+\.\d+\.\d+\S*)')
    version = None
    for line in file_string.split('\n'):
        match = version_regexp.match(line)
        if match:
            version = match.group(1)

    return version


def save_versions(version_map, destination):
    for version, file_at_version in version_map.iteritems():
        destination_path = os.path.join(destination, 'psi-ms-{0}.obo'.format(version))
        with open(destination_path, 'w+') as destination_file:
            destination_file.write(file_at_version)

if __name__ == '__main__':
    parser = ArgumentParser(description=__doc__)
    parser.add_argument('destination', help='directory into which the OBO files go')
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='show extra logging information')
    args = parser.parse_args()

    # Sanity checking
    assert os.path.isdir(args.destination), 'destination must be a valid directory'

    with CVSClient(CVS_SERVER, verbose=args.verbose) as cvs:
        cvs('login')
        revision_map = get_version_map(cvs)
        save_versions(revision_map, args.destination)

# vim: ts=4:sw=4:sts=4